Files
secondo/Algebras/ImageSimilarity/dataset.h
2026-01-23 17:03:45 +08:00

111 lines
3.8 KiB
C++

/*
----
This file is NOT part of SECONDO.
Authors: Greg Hamerly and Jonathan Drake
Feedback: hamerly@cs.baylor.edu
See: http://cs.baylor.edu/~hamerly/software/kmeans.php
Copyright 2014
The Annulus K-means algorithm is based on Hamerly's algorithm, but also
sortsthe centers by their norms (distances from the origin). Doing this
allows searching the centers using the norm of the point to exclude
centers that cannot be close.
----
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//[TOC] [\tableofcontents]
[1] Declarations for the Dataset class
1 Declarations for Dataset class
*/
#ifndef DATASET_H
#define DATASET_H
/* Authors: Greg Hamerly and Jonathan Drake
* Feedback: hamerly@cs.baylor.edu
* See: http://cs.baylor.edu/~hamerly/software/kmeans.php
* Copyright 2014
*
* A Dataset class represents a collection of multidimensional records, as is
* typical in metric machine learning. Every record has the same number of
* dimensions (values), and every value must be numeric. Undefined values are
* not allowed.
*
* This particular implementation keeps all the data in a 1-dimensional array,
* and also optionally keeps extra storage for the sum of the squared values of
* each record. However, the Dataset class does NOT automatically populate or
* update the sumDataSquared values.
*/
#include <cstddef>
class Dataset {
public:
// default constructor -- constructs a completely empty dataset with no
// records
Dataset() : n(0), d(0), nd(0), data(NULL), sumDataSquared(NULL) {}
// construct a dataset of a particular size, and determine whether to
// keep the sumDataSquared
Dataset(int aN, int aD, bool keepSDS = false) : n(aN), d(aD),
nd(n * d),
data(new double[nd]),
sumDataSquared(keepSDS
? new double[n] : NULL) {}
// copy constructor -- makes a deep copy of everything in x
Dataset(Dataset const &x);
// destroys the dataset safely
~Dataset() {
n = d = nd = 0;
double *dp = data, *sdsp = sumDataSquared;
data = sumDataSquared = NULL;
delete [] dp;
delete [] sdsp;
}
// operator= is the standard deep-copy assignment operator, which
// returns a const reference to *this.
Dataset const &operator=(Dataset const &x);
// allows modification of the record ndx and dimension dim
double &operator()(int ndx, int dim);
// allows const access to record ndx and dimension dim
const double &operator()(int ndx, int dim) const;
// fill the entire dataset with value. Does NOT update sumDataSquared.
void fill(double value);
// print the dataset to standard output (cout), using formatting
//to keep the
// data in matrix format
void print() const;
// n represents the number of records
// d represents the dimension
// nd is a shortcut for the value n * d
int n, d, nd;
// data is an array of length n*d that stores all of the records in
// record-major (row-major) order. Thus data[0]...data[d-1] are the
// values associated with the first record.
double *data;
// sumDataSquared is an (optional) sum of squared values for every
// record. Thus,
// sumDataSquared[0] = data[0]^2 + data[1]^2 + ... + data[d-1]^2
// sumDataSquared[1] = data[d]^2 + data[d+1]^2 + ... + data[2*d-1]^2
// and so on. Note that this is the *intended* use of the sumDataSquared
// field, but that the Dataset class does NOT automatically populate or
// update the values in sumDataSquared.
double *sumDataSquared;
};
#endif