Files
secondo/Algebras/ImageSimilarity/fast_kmeans/annulus_kmeans.cpp

182 lines
5.2 KiB
C++
Raw Permalink Normal View History

2026-01-23 17:03:45 +08:00
/*
----
This file is NOT part of SECONDO.
Authors: Greg Hamerly and Jonathan Drake
Feedback: hamerly@cs.baylor.edu
See: http://cs.baylor.edu/~hamerly/software/kmeans.php
Copyright 2014
----
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//[TOC] [\tableofcontents]
[1] Implementation of the annulus algorithm
1 Implementation of the annulus algorithm
*/
/* Authors: Greg Hamerly and Jonathan Drake
* Feedback: hamerly@cs.baylor.edu
* See: http://cs.baylor.edu/~hamerly/software/kmeans.php
* Copyright 2014
*/
#include "annulus_kmeans.h"
#include "general_functions.h"
#include <cmath>
#include <numeric>
#include <algorithm>
void AnnulusKmeans::free() {
HamerlyKmeans::free();
delete [] guard;
delete [] xNorm;
delete [] cOrder;
guard = NULL;
xNorm = NULL;
cOrder = NULL;
}
int AnnulusKmeans::runThread(int threadId, int maxIterations) {
int iterations = 0;
int startNdx = start(threadId);
int endNdx = end(threadId);
while ((iterations < maxIterations) && ! converged) {
++iterations;
update_s(threadId);
synchronizeAllThreads();
if (threadId == 0) {
// compute the inter-center distances,
//keeping only the closest distances
sort_means_by_norm();
}
synchronizeAllThreads();
// loop over all records
for (int i = startNdx; i < endNdx; ++i) {
unsigned short closest = assignment[i];
// if upper[i] is less than the greater of these two, then we can
// ignore record i
double upper_comparison_bound = std::max(s[closest], lower[i]);
// first check: if u(x) <= s(c(x)) or u(x) <= lower(x), then ignore
// x, because its closest center must still be closest
if (upper[i] <= upper_comparison_bound) {
continue;
}
// otherwise, compute the real distance between this record and its
// closest center, and update upper
double u2 = pointCenterDist2(i, closest);
upper[i] = sqrt(u2);
// if (u(x) <= s(c(x))) or (u(x) <= lower(x)), then ignore x
if (upper[i] <= upper_comparison_bound) {
continue;
}
double l2 = pointCenterDist2(i, guard[i]);
lower[i] = sqrt(l2);
double beta = std::max(lower[i], upper[i]);
std::pair<double, int>* begin =
std::lower_bound(cOrder, cOrder + k,
std::make_pair(xNorm[i] - beta, k));
std::pair<double, int>* end =
std::lower_bound(begin, cOrder + k,
std::make_pair(xNorm[i] + beta, k));
for (std::pair<double, int>* jp = begin; jp != end; ++jp) {
if (jp->second == closest) continue;
double dist2 = pointCenterDist2(i, jp->second);
if (dist2 <= u2) {
if (dist2 == u2) {
if (jp->second < closest) closest = jp->second;
} else {
l2 = u2;
u2 = dist2;
guard[i] = closest;
closest = jp->second;
}
} else if (dist2 < l2) {
// we must reduce the lower bound on the distance to the
// *second* closest center to x[i]
l2 = dist2;
guard[i] = jp->second;
}
}
// we have been dealing in squared distances; need to convert
lower[i] = sqrt(l2);
// if the assignment for i has changed, then adjust the counts and
// locations of each center's accumulated mass
if (assignment[i] != closest) {
upper[i] = sqrt(u2);
changeAssignment(i, closest, threadId);
}
}
verifyAssignment(iterations, startNdx, endNdx);
// ELKAN 4, 5, AND 6
// calculate the new center locations
synchronizeAllThreads();
if (threadId == 0) {
int furthestMovingCenter = move_centers();
converged = (0.0 == centerMovement[furthestMovingCenter]);
}
synchronizeAllThreads();
if (! converged) {
update_bounds(startNdx, endNdx);
}
synchronizeAllThreads();
}
return iterations;
}
void AnnulusKmeans::initialize(Dataset const *aX, unsigned short aK,
unsigned short *initialAssignment, int aNumThreads) {
HamerlyKmeans::initialize(aX, aK, initialAssignment, aNumThreads);
guard = new unsigned short[n];
xNorm = new double[n];
cOrder = new std::pair<double, int>[k];
for (int i = 0; i < k; ++i) {
cOrder[i].first = 0.0;
cOrder[i].second = i;
}
std::fill(guard, guard + n, 1);
for (int i = 0; i < n; ++i) {
xNorm[i] = sqrt(pointPointInnerProduct(i, i));
}
}
void AnnulusKmeans::sort_means_by_norm() {
// sort the centers by their norms
for (int c1 = 0; c1 < k; ++c1) {
cOrder[c1].first = sqrt(centerCenterInnerProduct(c1, c1));
cOrder[c1].second = c1;
}
std::sort(cOrder, cOrder + k);
}