Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ported DBoW2 cluster initialization #25

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 84 additions & 1 deletion src/vocabulary_creator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,21 @@ inline int omp_get_max_threads(){return 1;}
inline int omp_get_thread_num(){return 0;}
#endif
#include <iostream>
#include <numeric>
using namespace std;
namespace fbow{

/**
* Returns a random number in the range [min..max]
* @param min
* @param max
* @return random T number in [min..max]
*/
template <class T>
static T RandomValue(T min, T max) {
return ((T)rand() / (T)RAND_MAX) * (max - min) + min;
}

void VocabularyCreator::create(fbow::Vocabulary &Voc, const cv::Mat &features, const std::string &desc_name, Params params)
{
std::vector<cv::Mat> vfeatures(1);
Expand Down Expand Up @@ -108,7 +120,7 @@ void VocabularyCreator::createLevel( int parent, int curL,bool recursive){
}

//initialize clusters
auto centers=getInitialClusterCenters(findices );
auto centers= initialClusterCentersKmpp(findices );
center_features.resize(centers.size());
for(size_t i=0;i<centers.size();i++)
center_features[i]=_features[centers[i]];
Expand Down Expand Up @@ -188,6 +200,77 @@ std::vector<uint32_t> VocabularyCreator::getInitialClusterCenters(const std::ve
return centers;
}

std::vector<uint32_t> VocabularyCreator::initialClusterCentersKmpp(const std::vector<uint32_t> &findices)
{
// Implements kmeans++ seeding algorithm
// Algorithm:
// 1. Choose one center uniformly at random from among the data points.
// 2. For each data point x, compute D(x), the distance between x and the nearest
// center that has already been chosen.
// 3. Add one new data point as a center. Each point x is chosen with probability
// proportional to D(x)^2.
// 4. Repeat Steps 2 and 3 until k centers have been chosen.
// 5. Now that the initial centers have been chosen, proceed using standard k-means
// clustering.

std::vector<uint32_t> centers;
centers.reserve(_params.k);
for (auto fi : findices) _features(fi).m_Dist = std::numeric_limits<float>::max();

// 1.

uint32_t ifeature = findices[rand() % findices.size()];

// create first cluster
centers.push_back(ifeature);

// compute the initial distances
auto last_center_feat = _features[centers.back()];
for (auto fi : findices) {
auto &feature = _features(fi);
feature.m_Dist = dist_func(last_center_feat, _features[fi]);
}

while ((int)centers.size() < _params.k)
{
last_center_feat = _features[centers.back()];
for (auto fi : findices) {
auto &feature = _features(fi);
if(feature.m_Dist > 0.0f)
feature.m_Dist = std::min(feature.m_Dist, dist_func(last_center_feat, _features[fi]));
}

double dist_sum = std::accumulate(findices.begin(), findices.end(), 0.0, [&](float acc, const unsigned fid) { return acc + _features(fid).m_Dist; });
if (dist_sum > 0)
{
double cut_d;
do
{
cut_d = RandomValue<double>(0, dist_sum);
} while (cut_d == 0.0);

double d_up_now = 0;
std::vector<unsigned>::const_iterator dit;
for (dit = findices.begin(); dit != findices.end(); ++dit)
{
d_up_now += _features(*dit).m_Dist;
if (d_up_now >= cut_d) break;
}

if (dit == findices.end())
--dit;

centers.push_back(*dit);

} // if dist_sum > 0
else
break;

} // while(used_clusters < m_k)

return centers;
}

std::size_t VocabularyCreator::vhash(const std::vector<std::vector<uint32_t> > & v_vec) {
std::size_t seed = 0;

Expand Down
2 changes: 1 addition & 1 deletion src/vocabulary_creator.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class FBOW_API VocabularyCreator
void createLevel(const std::vector<uint32_t> &findices, int parent=0, int curL=0);
void createLevel(int parent=0, int curL=0, bool recursive=true);
std::vector<uint32_t> getInitialClusterCenters(const std::vector<uint32_t> &findices);
std::vector<uint32_t> initialClusterCentersKmpp(const std::vector<uint32_t> &findices);

std::size_t vhash(const std::vector<std::vector<uint32_t> >& v_vec) ;

Expand Down Expand Up @@ -148,7 +149,6 @@ class FBOW_API VocabularyCreator
struct Node{
Node(){}
Node(uint32_t Id,uint32_t Parent,const cv::Mat &Feature, uint32_t Feat_idx=std::numeric_limits<uint32_t>::max() ):id(Id),parent(Parent),feature(Feature),feat_idx(Feat_idx){

}

uint32_t id=std::numeric_limits<uint32_t>::max();//id of this node in the tree
Expand Down