161 lines
4.5 KiB
C++
161 lines
4.5 KiB
C++
|
|
/*
|
||
|
|
----
|
||
|
|
This file is NOT part of SECONDO.
|
||
|
|
Authors: Greg Hamerly and Jonathan Drake
|
||
|
|
Feedback: hamerly@cs.baylor.edu
|
||
|
|
See: http://cs.baylor.edu/~hamerly/software/kmeans.php
|
||
|
|
Copyright 2014
|
||
|
|
----
|
||
|
|
|
||
|
|
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
|
||
|
|
//[TOC] [\tableofcontents]
|
||
|
|
|
||
|
|
[1] Implementation of the heap kmeans algorithm
|
||
|
|
|
||
|
|
|
||
|
|
1 Implementation of the heap kmeans algorithm
|
||
|
|
|
||
|
|
*/
|
||
|
|
/* Authors: Greg Hamerly and Jonathan Drake
|
||
|
|
* Feedback: hamerly@cs.baylor.edu
|
||
|
|
* See: http://cs.baylor.edu/~hamerly/software/kmeans.php
|
||
|
|
* Copyright 2014
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include "heap_kmeans.h"
|
||
|
|
#include "general_functions.h"
|
||
|
|
#include <cmath>
|
||
|
|
#include <algorithm>
|
||
|
|
|
||
|
|
void HeapKmeans::free() {
|
||
|
|
for (int t = 0; t < numThreads; ++t) {
|
||
|
|
delete [] heaps[t];
|
||
|
|
}
|
||
|
|
TriangleInequalityBaseKmeans::free();
|
||
|
|
delete [] heaps;
|
||
|
|
delete [] heapBounds;
|
||
|
|
heaps = NULL;
|
||
|
|
heapBounds = NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
int HeapKmeans::runThread(int threadId, int maxIterations) {
|
||
|
|
int iterations = 0;
|
||
|
|
|
||
|
|
std::greater<std::pair<double, int> > heapComp;
|
||
|
|
|
||
|
|
while ((iterations < maxIterations) && ! converged) {
|
||
|
|
++iterations;
|
||
|
|
|
||
|
|
for (int h = 0; h < k; ++h) {
|
||
|
|
Heap &heap = heaps[threadId][h];
|
||
|
|
while (heap.size() > 0) {
|
||
|
|
if (heapBounds[h] <= heap[0].first)
|
||
|
|
break;
|
||
|
|
|
||
|
|
int i = heap[0].second;
|
||
|
|
|
||
|
|
std::pop_heap(heap.begin(), heap.end(), heapComp);
|
||
|
|
heap.pop_back();
|
||
|
|
|
||
|
|
unsigned short closest = assignment[i];
|
||
|
|
unsigned short nextClosest = 0;
|
||
|
|
|
||
|
|
double u2 = pointCenterDist2(i, closest);
|
||
|
|
double l2 = std::numeric_limits<double>::max();
|
||
|
|
|
||
|
|
for (unsigned short j = 0; j < k; ++j) {
|
||
|
|
if (j == closest) continue;
|
||
|
|
|
||
|
|
double dist2 = pointCenterDist2(i, j);
|
||
|
|
if (dist2 < u2) {
|
||
|
|
l2 = u2;
|
||
|
|
u2 = dist2;
|
||
|
|
nextClosest = closest;
|
||
|
|
closest = j;
|
||
|
|
} else if (dist2 < l2) {
|
||
|
|
l2 = dist2;
|
||
|
|
nextClosest = j;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
const double bound = sqrt(l2) - sqrt(u2);
|
||
|
|
|
||
|
|
// Break ties consistently with Lloyd (also prevents
|
||
|
|
//infinite cycle)
|
||
|
|
if ((bound == 0.0) && (nextClosest < closest)) {
|
||
|
|
closest = nextClosest;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (closest != assignment[i]) {
|
||
|
|
changeAssignment(i, closest, threadId);
|
||
|
|
}
|
||
|
|
|
||
|
|
Heap &newHeap = heaps[threadId][closest];
|
||
|
|
newHeap.push_back(std::make_pair(heapBounds[closest]
|
||
|
|
+ bound, i));
|
||
|
|
std::push_heap(newHeap.begin(), newHeap.end(),
|
||
|
|
heapComp);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
verifyAssignment(iterations, start(threadId), end(threadId));
|
||
|
|
|
||
|
|
synchronizeAllThreads();
|
||
|
|
if (threadId == 0) {
|
||
|
|
int furthestMoving = move_centers();
|
||
|
|
converged = (0.0 == centerMovement[furthestMoving]);
|
||
|
|
update_bounds();
|
||
|
|
}
|
||
|
|
|
||
|
|
synchronizeAllThreads();
|
||
|
|
}
|
||
|
|
|
||
|
|
return iterations;
|
||
|
|
}
|
||
|
|
|
||
|
|
void HeapKmeans::initialize(Dataset const *aX, unsigned short aK,
|
||
|
|
unsigned short *initialAssignment, int aNumThreads) {
|
||
|
|
TriangleInequalityBaseKmeans::initialize(aX, aK, initialAssignment,
|
||
|
|
aNumThreads);
|
||
|
|
|
||
|
|
heaps = new Heap*[numThreads];
|
||
|
|
heapBounds = new double[k];
|
||
|
|
|
||
|
|
for (int t = 0; t < numThreads; ++t) {
|
||
|
|
heaps[t] = new Heap[k];
|
||
|
|
int startNdx = start(t);
|
||
|
|
int endNdx = end(t);
|
||
|
|
heaps[t][0].resize(endNdx - startNdx, std::make_pair(-1.0, 0));
|
||
|
|
for (int i = 0; i < endNdx - startNdx; ++i) {
|
||
|
|
heaps[t][0][i].second = i + startNdx;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
std::fill(heapBounds, heapBounds + k, 0.0);
|
||
|
|
}
|
||
|
|
|
||
|
|
void HeapKmeans::update_bounds() {
|
||
|
|
int furthestMovingCenter = 0;
|
||
|
|
double longest = centerMovement[furthestMovingCenter];
|
||
|
|
double secondLongest = 0.0;
|
||
|
|
for (int j = 0; j < k; ++j) {
|
||
|
|
if (longest < centerMovement[j]) {
|
||
|
|
secondLongest = longest;
|
||
|
|
longest = centerMovement[j];
|
||
|
|
furthestMovingCenter = j;
|
||
|
|
} else if (secondLongest < centerMovement[j]) {
|
||
|
|
secondLongest = centerMovement[j];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for (int j = 0; j < k; ++j) {
|
||
|
|
heapBounds[j] += centerMovement[j];
|
||
|
|
if (j == furthestMovingCenter) {
|
||
|
|
heapBounds[j] += secondLongest;
|
||
|
|
} else {
|
||
|
|
heapBounds[j] += longest;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|