Files
secondo/Algebras/ImageSimilarity/original_space_kmeans.cpp
2026-01-23 17:03:45 +08:00

138 lines
4.0 KiB
C++

/*
----
This file is NOT part of SECONDO.
Authors: Greg Hamerly and Jonathan Drake
Feedback: hamerly@cs.baylor.edu
See: http://cs.baylor.edu/~hamerly/software/kmeans.php
Copyright 2014
----
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//[TOC] [\tableofcontents]
[1] Implementation of the original space kemans algorithm
1 Implementation of the original space kmeans algorithm
*/
/* Authors: Greg Hamerly and Jonathan Drake
* Feedback: hamerly@cs.baylor.edu
* See: http://cs.baylor.edu/~hamerly/software/kmeans.php
* Copyright 2014
*/
#include "original_space_kmeans.h"
#include "general_functions.h"
#include <cmath>
#include <cassert>
#include <numeric>
OriginalSpaceKmeans::OriginalSpaceKmeans() : centers(NULL),
sumNewCenters(NULL) { }
void OriginalSpaceKmeans::free() {
for (int t = 0; t < numThreads; ++t) {
delete sumNewCenters[t];
}
Kmeans::free();
delete centers;
delete [] sumNewCenters;
centers = NULL;
sumNewCenters = NULL;
}
/* This method moves the newCenters to their new locations, based on the
* sufficient statistics in sumNewCenters. It also computes the centerMovement
* and the center that moved the furthest.
*
* Parameters: none
*
* Return value: index of the furthest-moving centers
*/
int OriginalSpaceKmeans::move_centers() {
int furthestMovingCenter = 0;
for (int j = 0; j < k; ++j) {
centerMovement[j] = 0.0;
int totalClusterSize = 0;
for (int t = 0; t < numThreads; ++t) {
totalClusterSize += clusterSize[t][j];
}
if (totalClusterSize > 0) {
for (int dim = 0; dim < d; ++dim) {
double z = 0.0;
for (int t = 0; t < numThreads; ++t) {
z += (*sumNewCenters[t])(j,dim);
}
z /= totalClusterSize;
centerMovement[j] += (z - (*centers)(j, dim))
* (z - (*centers)(j, dim));
(*centers)(j, dim) = z;
}
}
centerMovement[j] = sqrt(centerMovement[j]);
if (centerMovement[furthestMovingCenter] < centerMovement[j]) {
furthestMovingCenter = j;
}
}
#ifdef COUNT_DISTANCES
numDistances += k;
#endif
return furthestMovingCenter;
}
void OriginalSpaceKmeans::initialize(Dataset const *aX,
unsigned short aK, unsigned short *initialAssignment,
int aNumThreads) {
Kmeans::initialize(aX, aK, initialAssignment, aNumThreads);
centers = new Dataset(k, d);
sumNewCenters = new Dataset *[numThreads];
centers->fill(0.0);
for (int t = 0; t < numThreads; ++t) {
sumNewCenters[t] = new Dataset(k, d, false);
sumNewCenters[t]->fill(0.0);
for (int i = start(t); i < end(t); ++i) {
addVectors(sumNewCenters[t]->data + assignment[i] * d,
x->data + i * d, d);
}
}
// put the centers at their initial locations, based on clusterSize and
// sumNewCenters
move_centers();
}
void OriginalSpaceKmeans::changeAssignment(int xIndex,
int closestCluster, int threadId) {
unsigned short oldAssignment = assignment[xIndex];
Kmeans::changeAssignment(xIndex, closestCluster, threadId);
double *xp = x->data + xIndex * d;
subVectors(sumNewCenters[threadId]->data + oldAssignment * d, xp, d);
addVectors(sumNewCenters[threadId]->data + closestCluster * d, xp, d);
}
double OriginalSpaceKmeans::pointPointInnerProduct(int x1, int x2) const
{
return std::inner_product(x->data + x1 * d, x->data + (x1 + 1) * d,
x->data + x2 * d, 0.0);
}
double OriginalSpaceKmeans::pointCenterInnerProduct(int xndx,
unsigned short cndx) const {
return std::inner_product(x->data + xndx * d, x->data + (xndx + 1)
* d, centers->data + cndx * d, 0.0);
}
double OriginalSpaceKmeans::centerCenterInnerProduct(unsigned short c1,
unsigned short c2) const {
return std::inner_product(centers->data + c1 * d, centers->data +
(c1 + 1) * d, centers->data + c2 * d, 0.0);
}