Files
2026-01-23 17:03:45 +08:00

241 lines
8.2 KiB
C++

/*
----
This file is NOT part of SECONDO.
Authors: Greg Hamerly and Jonathan Drake
Feedback: hamerly@cs.baylor.edu
See: http://cs.baylor.edu/~hamerly/software/kmeans.php
Copyright 2014
----
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//[TOC] [\tableofcontents]
[1] Implementation of the Drake kmeans algorithm
1 Implementation of the Drake kmeans algorithm
*/
/* Authors: Greg Hamerly and Jonathan Drake
* Feedback: hamerly@cs.baylor.edu
* See: http://cs.baylor.edu/~hamerly/software/kmeans.php
* Copyright 2014
*/
#include "drake_kmeans.h"
#include "general_functions.h"
#include <cmath>
#include <algorithm>
#include <cassert>
DrakeKmeans::DrakeKmeans(int aB) : closestOtherCenters(NULL) {
numLowerBounds = aB;
}
void DrakeKmeans::free() {
for (int i = 0; i < n; ++i) {
delete [] closestOtherCenters[i];
}
TriangleInequalityBaseKmeans::free();
delete [] closestOtherCenters;
closestOtherCenters = NULL;
}
int DrakeKmeans::runThread(int threadId, int maxIterations) {
int iterations = 0;
int startNdx = start(threadId);
int endNdx = end(threadId);
int numLowerBoundsRemaining = numLowerBounds;
std::pair<double, int> *centerOrder = new std::pair<double, int>[k];
while ((iterations < maxIterations) && ! converged) {
++iterations;
// Reset the catcher statistic
int maxCatcher = 0;
// Find nearest center for each point
for (int i = startNdx; i < endNdx; ++i) {
// The index of the lower bound that caught us (hopefully)
int catcher;
// Check bounds, widening the check outward as necessary
// to determine if we must recalculate all the bounds.
bool mustRecalculate = true;
for (catcher = 0; mustRecalculate && (catcher
< numLowerBoundsRemaining); ++catcher) {
// Check if the upper bound is within this lower bound
// Used to be <=, as that's theoretically equivalent,
// but I'm trying to match naive EXACTLY, i.e.
//stable_sort, etc.
if (upper[i] < lower[i * numLowerBounds + catcher]) {
// We've been caught by this lower bound, so we
// don't need to recalculate everything!
mustRecalculate = false;
// If this is the first lower bound,
//then the assigned
// center cannot possibly have changed,
//which is great
if (catcher != 0) {
// Otherwise, we only have to
//reorder the (hopefully
// few) centers within the lower
//bound that caught us.
reorder_near_centers(i, catcher, centerOrder,
threadId);
}
}
}
// Keep track of the catches
if ((maxCatcher < catcher) && (catcher
< numLowerBoundsRemaining)) {
maxCatcher = catcher;
}
// If none of the bounds held, then recalculate everything.
if (mustRecalculate) {
// Sort the centers by increasing distance
find_near_centers(i, numLowerBoundsRemaining,
centerOrder, threadId);
//find_near_centers_general(i, k);
}
}
verifyAssignment(iterations, startNdx, endNdx);
synchronizeAllThreads();
// Adjust the centers based on their new point memberships
if (threadId == 0) {
int furthestMovingCenter = move_centers();
// If nothing happened when we tried to move centers,
//we've converged!
converged = (0.0 == centerMovement[furthestMovingCenter]);
}
// Otherwise, release tension in the bounds caused by centers' movement
synchronizeAllThreads();
update_bounds(startNdx, endNdx, numLowerBoundsRemaining);
// Adjust the number of lower bounds being used
if ((10 < iterations) && ((k >> 3) <= maxCatcher)) {
numLowerBoundsRemaining = std::max(maxCatcher, 1);
}
}
delete [] centerOrder;
return iterations;
}
void DrakeKmeans::initialize(Dataset const *aX, unsigned short aK,
unsigned short *initialAssignment, int aNumThreads) {
TriangleInequalityBaseKmeans::initialize(aX, aK, initialAssignment,
aNumThreads);
assert(0 < numLowerBounds);
assert(numLowerBounds < k);
closestOtherCenters = new unsigned short*[n];
for (int i = 0; i < n; ++i) {
closestOtherCenters[i] = new unsigned short[numLowerBounds];
for (int j = 0; j < numLowerBounds; ++j) {
closestOtherCenters[i][j] = j + 1;
}
}
}
void DrakeKmeans::update_bounds(int startNdx, int endNdx,
int numLowerBoundsRemaining) {
int furthestMovingCenter = (int)(std::max_element(centerMovement,
centerMovement + k) - centerMovement);
// Update the upper and lower bounds for each point
for (int i = startNdx; i < endNdx; ++i) {
// Widen the upper bound based on the closest center's movement
upper[i] += centerMovement[assignment[i]];
// Update all but the outermost lower bound
for (int j = 0; j < numLowerBoundsRemaining - 1; ++j) {
// Shrink the lower bound by the distance its center has moved
lower[i * numLowerBounds + j]
-= centerMovement[closestOtherCenters[i][j]];
}
// Shrink the outermost lower bound by maximum distance moved by any
// center; of course this is not as tight as possible, but it's cheap!
lower[i * numLowerBounds + numLowerBoundsRemaining - 1]
-= centerMovement[furthestMovingCenter];
// TODO try the tighter version again. no idea why it didn't work.
// Force lower bounds to stay in order by collapsing
// the circles from the outside inward
for (int j = numLowerBoundsRemaining - 2; j >= 0; --j) {
if (lower[i * numLowerBounds + j + 1] < lower[i
* numLowerBounds + j]) {
lower[i * numLowerBounds + j] = lower[i
* numLowerBounds + j + 1];
}
}
}
}
void DrakeKmeans::find_near_centers(int i, int numLowerBoundsRemaining,
std::pair<double, int> *order, int threadId) {
// Sort all centers by increasing distance from this point
for (int j = 0; j < k; ++j) {
// Record the squared distances
order[j].first = pointCenterDist2(i, j);
order[j].second = j;
}
std::partial_sort(order, order + numLowerBoundsRemaining + 1,
order + k);
// Reassign the center (incremental)
if (assignment[i] != order[0].second) {
changeAssignment(i, order[0].second, threadId);
}
// Record the indices of the near centers,
// and update their lower bounds
upper[i] = sqrt(order[0].first);
for (int j = 0; j < numLowerBoundsRemaining; ++j) {
closestOtherCenters[i][j] = order[j + 1].second;
lower[i * numLowerBounds + j] = sqrt(order[j + 1].first);
}
}
void DrakeKmeans::reorder_near_centers(int i, int catcher,
std::pair<double, int> *order, int threadId) {
// Sort the centers we caught, by increasing distance from this point
order[0].first = pointCenterDist2(i, assignment[i]);
order[0].second = assignment[i];
for (int j = 0; j < catcher; ++j) {
// Record the squared distances
order[j + 1].second = closestOtherCenters[i][j];
order[j + 1].first = pointCenterDist2(i, order[j + 1].second);
}
std::partial_sort(order, order + catcher + 1, order + catcher + 1);
// Reassign the center (incremental)
if (assignment[i] != order[0].second) {
changeAssignment(i, order[0].second, threadId);
}
// Record the indices of the caught centers,
// and update their lower bounds
upper[i] = sqrt(order[0].first);
for (int j = 0; j < catcher; ++j) {
closestOtherCenters[i][j] = order[j + 1].second;
lower[i * numLowerBounds + j] = sqrt(order[j + 1].first);
}
}