2459 lines
57 KiB
C++
2459 lines
57 KiB
C++
|
|
/*
|
||
|
|
----
|
||
|
|
This file is part of SECONDO.
|
||
|
|
|
||
|
|
Copyright (C) 2004, University in Hagen, Department of Computer Science,
|
||
|
|
Database Systems for New Applications.
|
||
|
|
|
||
|
|
SECONDO is free software; you can redistribute it and/or modify
|
||
|
|
it under the terms of the GNU General Public License as published by
|
||
|
|
the Free Software Foundation; either version 2 of the License, or
|
||
|
|
(at your option) any later version.
|
||
|
|
|
||
|
|
SECONDO is distributed in the hope that it will be useful,
|
||
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
|
GNU General Public License for more details.
|
||
|
|
|
||
|
|
You should have received a copy of the GNU General Public License
|
||
|
|
along with SECONDO; if not, write to the Free Software
|
||
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
----
|
||
|
|
|
||
|
|
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
|
||
|
|
//paragraph [10] Footnote: [{\footnote{] [}}]
|
||
|
|
//[TOC] [\tableofcontents]
|
||
|
|
//[_] [\_]
|
||
|
|
|
||
|
|
[1] Implementation of the Cluster Algebra
|
||
|
|
|
||
|
|
June, 2006.
|
||
|
|
Basic functionality, one operator with default values and one
|
||
|
|
with maximal distance and minimal number of points as values.
|
||
|
|
Only the type 'points' has been implemented so far.
|
||
|
|
|
||
|
|
[TOC]
|
||
|
|
|
||
|
|
1 Overview
|
||
|
|
|
||
|
|
This implementation file essentially contains the implementation of the
|
||
|
|
classes ~ClusterAlgebra~ and ~DBccan~ which contains the actual
|
||
|
|
cluster algorithm.
|
||
|
|
|
||
|
|
2 Defines and Includes
|
||
|
|
|
||
|
|
Eps is used for the clusteralgorithm as the maximal distance, the
|
||
|
|
minimum points (MinPts) may be apart. If there are further points
|
||
|
|
in the Eps-range to one of the points in the cluster, this point
|
||
|
|
(and further points from this on) belong to the same cluster.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include "Algebra.h"
|
||
|
|
#include "NestedList.h"
|
||
|
|
#include "QueryProcessor.h"
|
||
|
|
#include "StandardTypes.h"
|
||
|
|
#include "Algebras/Spatial/SpatialAlgebra.h"
|
||
|
|
#include "LogMsg.h"
|
||
|
|
|
||
|
|
#include "MMRTree.h"
|
||
|
|
|
||
|
|
#include <iostream>
|
||
|
|
#include <string>
|
||
|
|
#include <vector>
|
||
|
|
#include <algorithm>
|
||
|
|
|
||
|
|
extern NestedList* nl;
|
||
|
|
extern QueryProcessor* qp;
|
||
|
|
|
||
|
|
using namespace std;
|
||
|
|
|
||
|
|
namespace clusteralg{
|
||
|
|
|
||
|
|
#define MINIMUMPTS_DEF 4 // default min points - MinPts
|
||
|
|
#define EPS_DEF 400 // default max distance - Eps
|
||
|
|
|
||
|
|
class DBscan;
|
||
|
|
|
||
|
|
class DBscan
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
DBscan();
|
||
|
|
DBscan(Word*, Word&, int, Word&, Supplier, double**);
|
||
|
|
|
||
|
|
int Parameter_Standard(double**,int);
|
||
|
|
int Parameter_UserDefined(double**, int, int, int);//MinPts(int), Eps(int)
|
||
|
|
void CopyToResult(Word*, Word&, int, Word&, Supplier, double**);
|
||
|
|
|
||
|
|
private:
|
||
|
|
int MinPts;//minimum number of points to be a cluster
|
||
|
|
int Eps;//max distance for MinPts and further points in cluster
|
||
|
|
int FindClusters(double**, int); // main method
|
||
|
|
bool ExpandCluster(double**, int,int);
|
||
|
|
void Search(double**, int,int, int*);
|
||
|
|
};
|
||
|
|
/*
|
||
|
|
3.1 Type mapping function ~PointsTypeMapA~.
|
||
|
|
|
||
|
|
Used for the ~cluster\_a~ operator with one argument (points object).
|
||
|
|
|
||
|
|
Type mapping for ~cluster\_a~ is
|
||
|
|
|
||
|
|
---- points [->] points
|
||
|
|
|
||
|
|
----
|
||
|
|
|
||
|
|
*/
|
||
|
|
static ListExpr
|
||
|
|
PointsTypeMapA( ListExpr args )
|
||
|
|
{
|
||
|
|
if ( nl->ListLength(args) == 1 )
|
||
|
|
{
|
||
|
|
ListExpr arg1 = nl->First(args);
|
||
|
|
if ( nl->IsEqual(arg1, Points::BasicType()) )
|
||
|
|
return nl->SymbolAtom(Points::BasicType());
|
||
|
|
}
|
||
|
|
return nl->SymbolAtom(Symbol::TYPEERROR());
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
3.2 Type mapping function ~PointsTypeMapB~.
|
||
|
|
|
||
|
|
Used for the ~cluster\_b~ operator with three arguments (points object, Eps).
|
||
|
|
|
||
|
|
Type mapping for ~cluster\_b~ is
|
||
|
|
|
||
|
|
---- points[MinPts, Eps] [->] points
|
||
|
|
|
||
|
|
----
|
||
|
|
|
||
|
|
*/
|
||
|
|
static ListExpr
|
||
|
|
PointsTypeMapB( ListExpr args)
|
||
|
|
{
|
||
|
|
ListExpr arg1, arg2, arg3;
|
||
|
|
if ( nl->ListLength(args) == 3 )
|
||
|
|
{
|
||
|
|
arg1 = nl->First(args); // points
|
||
|
|
arg2 = nl->Second(args); // MinPts --> int
|
||
|
|
arg3 = nl->Third(args); // Eps --> int
|
||
|
|
|
||
|
|
if (
|
||
|
|
( nl->IsEqual(arg1, Points::BasicType())) &&
|
||
|
|
( nl->IsEqual(arg2, CcInt::BasicType())) &&
|
||
|
|
( nl->IsEqual(arg3, CcInt::BasicType())))
|
||
|
|
|
||
|
|
return nl->SymbolAtom(Points::BasicType());
|
||
|
|
}
|
||
|
|
return nl->SymbolAtom(Symbol::TYPEERROR());
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
static ListExpr cluster_c_TM(ListExpr args){
|
||
|
|
if(nl->ListLength(args)!=3){
|
||
|
|
ErrorReporter::ReportError("points x int x real expected");
|
||
|
|
return nl->TypeError();
|
||
|
|
}
|
||
|
|
if(nl->IsEqual(nl->First(args),Points::BasicType()) &&
|
||
|
|
nl->IsEqual(nl->Second(args),CcInt::BasicType()) &&
|
||
|
|
nl->IsEqual(nl->Third(args),CcReal::BasicType())){
|
||
|
|
return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
|
||
|
|
nl->SymbolAtom(Points::BasicType()));
|
||
|
|
|
||
|
|
}
|
||
|
|
ErrorReporter::ReportError("points x int x real expected");
|
||
|
|
return nl->TypeError();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
static ListExpr cluster_d_TM(ListExpr args){
|
||
|
|
if((nl->ListLength(args)==2) &&
|
||
|
|
(nl->IsEqual(nl->First(args),Points::BasicType())) &&
|
||
|
|
(nl->IsEqual(nl->Second(args),CcReal::BasicType()))){
|
||
|
|
return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
|
||
|
|
nl->SymbolAtom(Points::BasicType()));
|
||
|
|
}
|
||
|
|
ErrorReporter::ReportError("points x real expected");
|
||
|
|
return nl->TypeError();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
5.1 Value mapping function for operator ~cluster\_a~.
|
||
|
|
|
||
|
|
Predefined values for Eps (distance) and MinPts (minimum number of points)
|
||
|
|
for the cluster algorithm are used.
|
||
|
|
First an array with four columns is set up, a pointer array is being
|
||
|
|
used in all the DBscan-class functions for access to this array.
|
||
|
|
|
||
|
|
*/
|
||
|
|
int cluster_aFun (Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s)
|
||
|
|
{
|
||
|
|
Points* ps = ((Points*)args[0].addr);
|
||
|
|
int cpoints_size = ps->Size();
|
||
|
|
|
||
|
|
double** cpoints; // pointer-array to cpoints
|
||
|
|
double* rcpoints; // real cpoints
|
||
|
|
int nrows = ps->Size();
|
||
|
|
int ncols = 4; // cpoints: 0:x 1:y 2:border point 3:core point
|
||
|
|
int a;
|
||
|
|
|
||
|
|
// DATA array setup
|
||
|
|
// allocate memory for array 'rcpoints'
|
||
|
|
rcpoints = (double*) malloc(nrows * ncols * sizeof(double));
|
||
|
|
if (rcpoints == NULL)
|
||
|
|
{ printf("\nFailure to allocate room for the array\n");
|
||
|
|
exit(0); }
|
||
|
|
|
||
|
|
// allocate memory for pointers to rows
|
||
|
|
cpoints = (double**) malloc(nrows * sizeof(double *));
|
||
|
|
if (cpoints == NULL)
|
||
|
|
{ printf("\nFailure to allocate room for the pointers\n");
|
||
|
|
exit(0);}
|
||
|
|
|
||
|
|
// point the pointers
|
||
|
|
for (a = 0; a < nrows; a++)
|
||
|
|
cpoints[a] = rcpoints + (a * ncols);
|
||
|
|
|
||
|
|
// preset NOISE (0) and CORECHECK (0)
|
||
|
|
for(a=0; a < nrows; a++){
|
||
|
|
cpoints[a][2] = 0.0;
|
||
|
|
cpoints[a][3] = 0.0; }
|
||
|
|
|
||
|
|
// copy x/y from input into cluster array 'cpoints'
|
||
|
|
ps->StartBulkLoad(); // relax ordering
|
||
|
|
if(ps->IsEmpty()) {
|
||
|
|
((Points*)result.addr)->SetDefined(false);
|
||
|
|
free(rcpoints);
|
||
|
|
free(cpoints);
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
for(int a = 0; a < ps->Size();a++) // transfer x/y-values
|
||
|
|
{ Point p; // to cluster array
|
||
|
|
ps->Get(a, p);
|
||
|
|
cpoints[a][0] = p.GetX();
|
||
|
|
cpoints[a][1] = p.GetY();} // end for
|
||
|
|
|
||
|
|
ps->EndBulkLoad(true, false);
|
||
|
|
|
||
|
|
// for testing copy input to output
|
||
|
|
//Points* ps2;
|
||
|
|
//ps2 = ps->Clone();
|
||
|
|
//(Points*)result.addr = ps2;
|
||
|
|
// comment rest of function, if used
|
||
|
|
|
||
|
|
/*
|
||
|
|
Create an instance of DBscan.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
DBscan cluster;
|
||
|
|
|
||
|
|
/*
|
||
|
|
Here the no-parameter default setup function is being called, which
|
||
|
|
itself calls the actual cluster algorithm.
|
||
|
|
|
||
|
|
*/
|
||
|
|
a = cluster.Parameter_Standard(cpoints ,cpoints_size);
|
||
|
|
|
||
|
|
|
||
|
|
// debugging
|
||
|
|
if ( RTFlag::isActive("ClusterText:Trace") ) {cmsg.file() << "Cluster:"
|
||
|
|
" "
|
||
|
|
"cluster_aFun Ergebnis: " << a << endl;
|
||
|
|
cmsg.send();}
|
||
|
|
/*
|
||
|
|
Copy the result from the internal array 'cpoints' back into the
|
||
|
|
result 'points' memory location.
|
||
|
|
|
||
|
|
*/
|
||
|
|
cluster.CopyToResult(args,result, message, local, s, cpoints);
|
||
|
|
free(rcpoints);
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
5.2 Value mapping function for operator ~cluster\_b~.
|
||
|
|
|
||
|
|
This function receives tweo arguments: Eps and MinPts, which are used
|
||
|
|
for the cluster algorithm.
|
||
|
|
|
||
|
|
The first part ist identical to operator ~cluster\_a~.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
int cluster_bFun (Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s)
|
||
|
|
{
|
||
|
|
Points* ps = ((Points*)args[0].addr);
|
||
|
|
int cpoints_size = ps->Size();
|
||
|
|
|
||
|
|
double** cpoints; // pointer-array to cpoints
|
||
|
|
double* rcpoints; // real cpoints
|
||
|
|
int nrows = ps->Size();
|
||
|
|
int ncols = 4; // cpoints: 0:x 1:y 2:border point 3:core point
|
||
|
|
int a;
|
||
|
|
|
||
|
|
// DATA array setup
|
||
|
|
// allocate memory for array 'rcpoints'
|
||
|
|
rcpoints = (double*) malloc(nrows * ncols * sizeof(double));
|
||
|
|
if (rcpoints == NULL)
|
||
|
|
{ printf("\nFailure to allocate room for the array\n");
|
||
|
|
exit(0); }
|
||
|
|
|
||
|
|
// allocate memory for pointers to rows
|
||
|
|
cpoints = (double**) malloc(nrows * sizeof(double *));
|
||
|
|
if (cpoints == NULL)
|
||
|
|
{ printf("\nFailure to allocate room for the pointers\n");
|
||
|
|
exit(0);}
|
||
|
|
|
||
|
|
// point the pointers
|
||
|
|
for (a = 0; a < nrows; a++)
|
||
|
|
cpoints[a] = rcpoints + (a * ncols);
|
||
|
|
|
||
|
|
// preset NOISE (0) and CORECHECK (0)
|
||
|
|
for(a=0; a < nrows; a++){
|
||
|
|
cpoints[a][2] = 0.0;
|
||
|
|
cpoints[a][3] = 0.0;}
|
||
|
|
|
||
|
|
// debugging
|
||
|
|
if ( RTFlag::isActive("ClusterText:B") ) {
|
||
|
|
cmsg.file() << "Cluster: cluster_bFun: " << endl;
|
||
|
|
cmsg.send();}
|
||
|
|
|
||
|
|
// copy x/y from input into cluster array 'cpoints'
|
||
|
|
ps->StartBulkLoad(); // relax ordering
|
||
|
|
if(ps->IsEmpty()) {
|
||
|
|
((Points*)result.addr)->SetDefined(false);
|
||
|
|
free(rcpoints);
|
||
|
|
free(cpoints);
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
for(int a = 0; a < ps->Size();a++) // transfer x/y-values
|
||
|
|
{ Point p; // to cluster array
|
||
|
|
ps->Get(a, p);
|
||
|
|
cpoints[a][0] = p.GetX();
|
||
|
|
cpoints[a][1] = p.GetY();} // end for
|
||
|
|
|
||
|
|
ps->EndBulkLoad(true, false);
|
||
|
|
|
||
|
|
// for testing copy input to output
|
||
|
|
//Points* ps2;
|
||
|
|
//ps2 = ps->Clone();
|
||
|
|
//(Points*)result.addr = ps2;
|
||
|
|
// comment rest of function, if used
|
||
|
|
|
||
|
|
DBscan cluster; // create DBscan object
|
||
|
|
/*
|
||
|
|
The following part is different from ~cluster\_a~.
|
||
|
|
The parameters are eing used for the cluster algorithm.
|
||
|
|
1: MinPts (int)
|
||
|
|
|
||
|
|
2: Eps (int)
|
||
|
|
|
||
|
|
*/
|
||
|
|
CcInt* i1;
|
||
|
|
CcInt* i2;
|
||
|
|
|
||
|
|
i1 = ((CcInt*)args[1].addr);
|
||
|
|
i2 = ((CcInt*)args[2].addr);
|
||
|
|
|
||
|
|
int cMinPts = i1->GetIntval();
|
||
|
|
int cEps = i2->GetIntval();
|
||
|
|
|
||
|
|
//debugging
|
||
|
|
if ( RTFlag::isActive("ClusterText:Trace") ) {
|
||
|
|
cmsg.file() << "Cluster: cMinPts: ---------" << cMinPts << endl;
|
||
|
|
cmsg.file() << "Cluster: cEps -------------" << cEps << endl;
|
||
|
|
cmsg.send();}
|
||
|
|
|
||
|
|
a = cluster.Parameter_UserDefined(cpoints, cpoints_size, cMinPts, cEps);
|
||
|
|
|
||
|
|
// find cluster-points with user-defined parameters
|
||
|
|
// returns number of clusters found
|
||
|
|
|
||
|
|
if ( RTFlag::isActive("ClusterText:Trace") ) {cmsg.file() <<
|
||
|
|
"Cluster: cluster_bFun Ergebnis: " << a << endl;
|
||
|
|
cmsg.send();}
|
||
|
|
|
||
|
|
cluster.CopyToResult(args, result, message, local, s, cpoints);
|
||
|
|
free(rcpoints);
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
5.3 Value Mapping function for cluster[_]c
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
class ClusterC_LocalInfo{
|
||
|
|
public:
|
||
|
|
/*
|
||
|
|
~Constructor~
|
||
|
|
|
||
|
|
Creates a new local info from the value coming from the value mapping.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
ClusterC_LocalInfo(Points* pts, CcInt* minPts, CcReal* eps){
|
||
|
|
if(!pts->IsDefined() || !minPts->IsDefined() || !eps->IsDefined()){
|
||
|
|
defined = false;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
this->pts = pts;
|
||
|
|
this->minPts = max(0,minPts->GetIntval());
|
||
|
|
this->eps = eps->GetRealval();
|
||
|
|
this->eps2 = this->eps*this->eps;
|
||
|
|
defined = true;
|
||
|
|
size = pts->Size();
|
||
|
|
no = new int[size];
|
||
|
|
env1 = new set<int>*[size];
|
||
|
|
pos = 0;
|
||
|
|
// set all points to be UNCLASSIFIED
|
||
|
|
// and clean all sets
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
no[i] = UNCLASSIFIED;
|
||
|
|
env1[i] = new set<int>();
|
||
|
|
}
|
||
|
|
computeEnv();
|
||
|
|
pos = 0;
|
||
|
|
clusterId = 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~Destructor~
|
||
|
|
|
||
|
|
*/
|
||
|
|
~ClusterC_LocalInfo(){
|
||
|
|
if(defined){
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
delete env1[i];
|
||
|
|
}
|
||
|
|
delete[] env1;
|
||
|
|
delete[] no;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~getNext~
|
||
|
|
|
||
|
|
Returns the next cluster as points value.
|
||
|
|
|
||
|
|
*/
|
||
|
|
Points* getNext(){
|
||
|
|
if(!defined){ // no next cluster available
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
// search the next unclassified point
|
||
|
|
while(pos<size){
|
||
|
|
if(no[pos] >= 0 ){ // point already classified
|
||
|
|
pos++;
|
||
|
|
} else if ( env1[pos]->size()<minPts){
|
||
|
|
no[pos] = -2; // mark as NOISE
|
||
|
|
pos++;
|
||
|
|
} else {
|
||
|
|
// create a new cluster
|
||
|
|
Points* result = expand(pos);
|
||
|
|
clusterId++;
|
||
|
|
pos++;
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
/*
|
||
|
|
~Members~
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
Points* pts; // source points value
|
||
|
|
unsigned int minPts; // minimum size for core points
|
||
|
|
double eps; // epsilon
|
||
|
|
double eps2; // epsilon * epsilon
|
||
|
|
bool defined;
|
||
|
|
int* no; // cluster number
|
||
|
|
set<int>** env1; // environments;
|
||
|
|
int size; // number of points
|
||
|
|
int pos; // current position
|
||
|
|
int clusterId; // current cluster id
|
||
|
|
|
||
|
|
static const int UNCLASSIFIED = -1;
|
||
|
|
static const int NOISE = -2;
|
||
|
|
|
||
|
|
/*
|
||
|
|
~computeEnv~
|
||
|
|
|
||
|
|
This function computes the epsilon environment for each point
|
||
|
|
contained in pts;
|
||
|
|
|
||
|
|
*/
|
||
|
|
void computeEnv(){
|
||
|
|
mmrtree::Rtree<2> tree(10,30);
|
||
|
|
Point p;
|
||
|
|
double min1[2];
|
||
|
|
double max1[2];
|
||
|
|
|
||
|
|
/* insert all contained points into an R- tree */
|
||
|
|
for(int i=0;i<pts->Size();i++){
|
||
|
|
pts->Get(i,p);
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min1[0] = x - FACTOR;
|
||
|
|
min1[1] = y - FACTOR;
|
||
|
|
max1[0] = x + FACTOR;
|
||
|
|
max1[1] = y - FACTOR;
|
||
|
|
Rectangle<2> box(true,min1,max1);
|
||
|
|
tree.insert(box, i);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/* compute environments using filter /refine */
|
||
|
|
for(int i=0;i<pts->Size();i++){
|
||
|
|
pts->Get(i,p);
|
||
|
|
set<long> cands;
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min1[0] = x-eps;
|
||
|
|
min1[1] = y-eps;
|
||
|
|
max1[0] = x+eps;
|
||
|
|
max1[1] = y+eps;
|
||
|
|
Rectangle<2> searchbox(true, min1,max1);
|
||
|
|
tree.findAll(searchbox,cands);
|
||
|
|
set<long>::iterator it;
|
||
|
|
for(it = cands.begin(); it!=cands.end(); it++){
|
||
|
|
Point p2;
|
||
|
|
int cand = static_cast<int>(*it);
|
||
|
|
pts->Get(cand,p2);
|
||
|
|
if(qdist(p,p2)<eps2){
|
||
|
|
env1[i]->insert(cand);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
~qdist~
|
||
|
|
|
||
|
|
This function computes the square of the distance between two point value.
|
||
|
|
|
||
|
|
|
||
|
|
*/
|
||
|
|
double qdist(Point& p1, Point& p2){
|
||
|
|
double x1 = p1.GetX();
|
||
|
|
double x2 = p2.GetX();
|
||
|
|
double y1 = p1.GetY();
|
||
|
|
double y2 = p2.GetY();
|
||
|
|
double dx = x1-x2;
|
||
|
|
double dy = y1-y2;
|
||
|
|
return dx*dx + dy*dy;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
~expand~
|
||
|
|
|
||
|
|
This function implements the expand algorithm of dbscan.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
Points* expand(int pos){
|
||
|
|
Points* result = new Points(minPts);
|
||
|
|
result->StartBulkLoad();
|
||
|
|
|
||
|
|
set<int> seeds = *env1[pos];
|
||
|
|
no[pos] = clusterId;
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
(*result) += (p);
|
||
|
|
seeds.erase(pos);
|
||
|
|
|
||
|
|
while(!seeds.empty()){
|
||
|
|
int cpos = *(seeds.begin());
|
||
|
|
if(no[cpos]<0){ // not classified by another cluster
|
||
|
|
no[cpos] = clusterId;
|
||
|
|
pts->Get(cpos,p);
|
||
|
|
(*result) += (p);
|
||
|
|
set<int>::iterator it;
|
||
|
|
for(it = env1[cpos]->begin();it!=env1[cpos]->end(); it++){
|
||
|
|
if(no[*it]<0){
|
||
|
|
if(env1[*it]->size()>=minPts){ // a core point
|
||
|
|
seeds.insert(*it);
|
||
|
|
} else { // border point
|
||
|
|
no[*it] = clusterId;
|
||
|
|
pts->Get(*it,p);
|
||
|
|
(*result) += p;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
seeds.erase(cpos);
|
||
|
|
}
|
||
|
|
result->EndBulkLoad();
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
int cluster_cFun (Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s) {
|
||
|
|
switch(message){
|
||
|
|
case OPEN : {
|
||
|
|
Points* pts = static_cast<Points*>(args[0].addr);
|
||
|
|
CcInt* minPts = static_cast<CcInt*>(args[1].addr);
|
||
|
|
CcReal* eps = static_cast<CcReal*>(args[2].addr);
|
||
|
|
local.setAddr(new ClusterC_LocalInfo(pts,minPts,eps));
|
||
|
|
return 0;
|
||
|
|
} case REQUEST : {
|
||
|
|
if(local.addr==0){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
ClusterC_LocalInfo* linfo =
|
||
|
|
static_cast<ClusterC_LocalInfo*>(local.addr);
|
||
|
|
|
||
|
|
Points* hasNext = linfo->getNext();
|
||
|
|
result.setAddr(hasNext);
|
||
|
|
if(hasNext){
|
||
|
|
return YIELD;
|
||
|
|
} else {
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
} case CLOSE : {
|
||
|
|
if(local.addr!=0){
|
||
|
|
delete static_cast<ClusterC_LocalInfo*>(local.addr);
|
||
|
|
local.setAddr(0);
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return -1; // should never be reached
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
class ClusterG_LocalInfo{
|
||
|
|
public:
|
||
|
|
/*
|
||
|
|
~Constructor~
|
||
|
|
|
||
|
|
Creates a new local info from the value coming from the value mapping.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
ClusterG_LocalInfo(Points* pts, CcInt* minPts, CcReal* eps){
|
||
|
|
if(!pts->IsDefined() || !minPts->IsDefined() || !eps->IsDefined()){
|
||
|
|
defined = false;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
this->pts = pts;
|
||
|
|
this->minPts = max(0,minPts->GetIntval());
|
||
|
|
this->eps = eps->GetRealval();
|
||
|
|
this->eps2 = this->eps*this->eps;
|
||
|
|
defined = true;
|
||
|
|
size = pts->Size();
|
||
|
|
no = new int[size];
|
||
|
|
pos = 0;
|
||
|
|
// set all points to be UNCLASSIFIED
|
||
|
|
// and clean all sets
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
no[i] = UNCLASSIFIED;
|
||
|
|
}
|
||
|
|
createTree();
|
||
|
|
pos = 0;
|
||
|
|
clusterId = 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~Destructor~
|
||
|
|
|
||
|
|
*/
|
||
|
|
~ClusterG_LocalInfo(){
|
||
|
|
if(defined){
|
||
|
|
delete[] no;
|
||
|
|
delete tree;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~getNext~
|
||
|
|
|
||
|
|
Returns the next cluster as points value.
|
||
|
|
|
||
|
|
*/
|
||
|
|
Points* getNext(){
|
||
|
|
if(!defined){ // no next cluster available
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
// search the next unclassified point
|
||
|
|
while(pos<size){
|
||
|
|
if(no[pos] >= 0 ){ // point already classified
|
||
|
|
pos++;
|
||
|
|
} else {
|
||
|
|
set<int>* env = getEnv(pos);
|
||
|
|
unsigned int size = env->size();
|
||
|
|
delete env;
|
||
|
|
if ( size<minPts){
|
||
|
|
no[pos] = -2; // mark as NOISE
|
||
|
|
pos++;
|
||
|
|
} else {
|
||
|
|
// create a new cluster
|
||
|
|
Points* result = expand(pos);
|
||
|
|
clusterId++;
|
||
|
|
pos++;
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
/*
|
||
|
|
~Members~
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
Points* pts; // source points value
|
||
|
|
unsigned int minPts; // minimum size for core points
|
||
|
|
double eps; // epsilon
|
||
|
|
double eps2; // epsilon * epsilon
|
||
|
|
bool defined;
|
||
|
|
int* no; // cluster number
|
||
|
|
int size; // number of points
|
||
|
|
int pos; // current position
|
||
|
|
int clusterId; // current cluster id
|
||
|
|
mmrtree::Rtree<2>* tree;
|
||
|
|
|
||
|
|
static const int UNCLASSIFIED = -1;
|
||
|
|
static const int NOISE = -2;
|
||
|
|
|
||
|
|
void createTree(){
|
||
|
|
tree = new mmrtree::Rtree<2>(10,30);
|
||
|
|
Point p;
|
||
|
|
double min1[2];
|
||
|
|
double max1[2];
|
||
|
|
|
||
|
|
/* insert all contained points into an R- tree */
|
||
|
|
for(int i=0;i<pts->Size();i++){
|
||
|
|
pts->Get(i,p);
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min1[0] = x - FACTOR;
|
||
|
|
min1[1] = y - FACTOR;
|
||
|
|
max1[0] = x + FACTOR;
|
||
|
|
max1[1] = y - FACTOR;
|
||
|
|
Rectangle<2> box(true,min1,max1);
|
||
|
|
tree->insert(box, i);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
set<int>* getEnv(int pos){
|
||
|
|
set<int>* res = new set<int>();
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
set<long> cands;
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
double min1[2];
|
||
|
|
double max1[2];
|
||
|
|
min1[0] = x-eps;
|
||
|
|
min1[1] = y-eps;
|
||
|
|
max1[0] = x+eps;
|
||
|
|
max1[1] = y+eps;
|
||
|
|
Rectangle<2> searchbox(true, min1,max1);
|
||
|
|
tree->findAll(searchbox,cands);
|
||
|
|
set<long>::iterator it;
|
||
|
|
for(it = cands.begin(); it!=cands.end(); it++){
|
||
|
|
Point p2;
|
||
|
|
int cand = static_cast<int>(*it);
|
||
|
|
pts->Get(cand,p2);
|
||
|
|
if(qdist(p,p2)<eps2){
|
||
|
|
res->insert(cand);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
unsigned int getEnvSize(int pos){
|
||
|
|
int res = 0;
|
||
|
|
Point p;
|
||
|
|
double min1[2];
|
||
|
|
double max1[2];
|
||
|
|
pts->Get(pos,p);
|
||
|
|
set<long> cands;
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min1[0] = x-eps;
|
||
|
|
min1[1] = y-eps;
|
||
|
|
max1[0] = x+eps;
|
||
|
|
max1[1] = y+eps;
|
||
|
|
Rectangle<2> searchbox(true, min1,max1);
|
||
|
|
tree->findAll(searchbox,cands);
|
||
|
|
set<long>::iterator it;
|
||
|
|
for(it = cands.begin(); it!=cands.end(); it++){
|
||
|
|
Point p2;
|
||
|
|
int cand = static_cast<int>(*it);
|
||
|
|
pts->Get(cand,p2);
|
||
|
|
if(qdist(p,p2)<eps2){
|
||
|
|
res++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~qdist~
|
||
|
|
|
||
|
|
This function computes the square of the distance between two point value.
|
||
|
|
|
||
|
|
|
||
|
|
*/
|
||
|
|
double qdist(Point& p1, Point& p2){
|
||
|
|
double x1 = p1.GetX();
|
||
|
|
double x2 = p2.GetX();
|
||
|
|
double y1 = p1.GetY();
|
||
|
|
double y2 = p2.GetY();
|
||
|
|
double dx = x1-x2;
|
||
|
|
double dy = y1-y2;
|
||
|
|
return dx*dx + dy*dy;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
~expand~
|
||
|
|
|
||
|
|
This function implements the expand algorithm of dbscan.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
Points* expand(int pos){
|
||
|
|
|
||
|
|
|
||
|
|
Points* result = new Points(minPts);
|
||
|
|
result->StartBulkLoad();
|
||
|
|
|
||
|
|
set<int>* seeds = getEnv(pos);
|
||
|
|
no[pos] = clusterId;
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
(*result) += (p);
|
||
|
|
seeds->erase(pos);
|
||
|
|
while(!seeds->empty()){
|
||
|
|
int cpos = *(seeds->begin());
|
||
|
|
if(no[cpos]<0){ // not classified by another cluster
|
||
|
|
no[cpos] = clusterId;
|
||
|
|
pts->Get(cpos,p);
|
||
|
|
(*result) += (p);
|
||
|
|
tree->erase(p.BoundingBox(),cpos);
|
||
|
|
set<int>::iterator it;
|
||
|
|
set<int>* env = getEnv(cpos);
|
||
|
|
for(it = env->begin();it!=env->end(); it++){
|
||
|
|
if(no[*it]<0){
|
||
|
|
if(getEnvSize(*it)>=minPts){ // a core point
|
||
|
|
seeds->insert(*it);
|
||
|
|
} else { // border point
|
||
|
|
no[*it] = clusterId;
|
||
|
|
pts->Get(*it,p);
|
||
|
|
(*result) += p;
|
||
|
|
tree->erase(p.BoundingBox(),*it);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
delete env;
|
||
|
|
}
|
||
|
|
seeds->erase(cpos);
|
||
|
|
}
|
||
|
|
result->EndBulkLoad();
|
||
|
|
delete seeds;
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
int cluster_gFun (Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s) {
|
||
|
|
switch(message){
|
||
|
|
case OPEN : {
|
||
|
|
Points* pts = static_cast<Points*>(args[0].addr);
|
||
|
|
CcInt* minPts = static_cast<CcInt*>(args[1].addr);
|
||
|
|
CcReal* eps = static_cast<CcReal*>(args[2].addr);
|
||
|
|
local.setAddr(new ClusterG_LocalInfo(pts,minPts,eps));
|
||
|
|
return 0;
|
||
|
|
} case REQUEST : {
|
||
|
|
if(local.addr==0){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
ClusterG_LocalInfo* linfo =
|
||
|
|
static_cast<ClusterG_LocalInfo*>(local.addr);
|
||
|
|
|
||
|
|
Points* hasNext = linfo->getNext();
|
||
|
|
result.setAddr(hasNext);
|
||
|
|
if(hasNext){
|
||
|
|
return YIELD;
|
||
|
|
} else {
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
} case CLOSE : {
|
||
|
|
if(local.addr!=0){
|
||
|
|
delete static_cast<ClusterG_LocalInfo*>(local.addr);
|
||
|
|
local.setAddr(0);
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return -1; // should never be reached
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
ClusterD ValueMapping
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
class Edge{
|
||
|
|
public:
|
||
|
|
int src;
|
||
|
|
int dest;
|
||
|
|
double cost;
|
||
|
|
|
||
|
|
Edge(int src, int dest, double cost){
|
||
|
|
this->src = src;
|
||
|
|
this->dest = dest;
|
||
|
|
this->cost = cost;
|
||
|
|
}
|
||
|
|
|
||
|
|
Edge(const Edge& edge){
|
||
|
|
equalize(edge);
|
||
|
|
}
|
||
|
|
|
||
|
|
~Edge(){}
|
||
|
|
|
||
|
|
Edge& operator=(const Edge& src){
|
||
|
|
equalize(src);
|
||
|
|
return *this;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
bool operator<(const Edge& e)const{
|
||
|
|
if(cost < e.cost) return true;
|
||
|
|
if(cost > e.cost) return false;
|
||
|
|
if(src < e.src) return true;
|
||
|
|
if(src > e.src) return false;
|
||
|
|
return dest < e.dest;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool operator>(const Edge& e) const{
|
||
|
|
if(cost > e.cost) return true;
|
||
|
|
if(cost < e.cost) return false;
|
||
|
|
if(src > e.src) return true;
|
||
|
|
if(src < e.src) return false;
|
||
|
|
return dest > e.dest;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool operator==(const Edge& e)const{
|
||
|
|
return cost == e.cost &&
|
||
|
|
src == e.src &&
|
||
|
|
dest == e.dest;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
ostream& printTo(ostream& o)const{
|
||
|
|
return (o << "(" << src << " -> " << dest << ", " << cost <<")");
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
void equalize(const Edge& src){
|
||
|
|
this->cost = src.cost;
|
||
|
|
this->dest = src.dest;
|
||
|
|
this->src = src.src;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
ostream& operator<<(ostream& o, const Edge& e){
|
||
|
|
return e.printTo(o);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
ostream& operator<<(ostream& o, const set<int>& e){
|
||
|
|
set<int>::iterator it;
|
||
|
|
o << "{";
|
||
|
|
for(it = e.begin(); it!=e.end();it++){
|
||
|
|
if(it!=e.begin()){
|
||
|
|
o << ", ";
|
||
|
|
}
|
||
|
|
o << *it;
|
||
|
|
}
|
||
|
|
o << "}";
|
||
|
|
return o;
|
||
|
|
}
|
||
|
|
|
||
|
|
struct intset{
|
||
|
|
intset():member(),refs(1){}
|
||
|
|
|
||
|
|
void deleteIfAllowed(){
|
||
|
|
refs--;
|
||
|
|
if(refs<1){
|
||
|
|
delete this;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
set<int> member;
|
||
|
|
int refs;
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
struct cCluster{
|
||
|
|
cCluster(){
|
||
|
|
cx = 0.0;
|
||
|
|
cy = 0.0;
|
||
|
|
member = new intset();
|
||
|
|
forbidden = false;
|
||
|
|
}
|
||
|
|
|
||
|
|
cCluster(const cCluster& c){
|
||
|
|
cx = c.cx;
|
||
|
|
cy = c.cy;
|
||
|
|
forbidden = c.forbidden;
|
||
|
|
member = c.member;
|
||
|
|
member->refs++;
|
||
|
|
}
|
||
|
|
|
||
|
|
cCluster& operator=(const cCluster& c){
|
||
|
|
cx = c.cx;
|
||
|
|
cy = c.cy;
|
||
|
|
forbidden = c.forbidden;
|
||
|
|
member = c.member;
|
||
|
|
member->refs++;
|
||
|
|
return *this;
|
||
|
|
}
|
||
|
|
|
||
|
|
~cCluster(){
|
||
|
|
member->deleteIfAllowed();
|
||
|
|
}
|
||
|
|
|
||
|
|
set<int>::iterator begin(){
|
||
|
|
return member->member.begin();
|
||
|
|
}
|
||
|
|
|
||
|
|
set<int>::iterator end(){
|
||
|
|
return member->member.end();
|
||
|
|
}
|
||
|
|
|
||
|
|
size_t size(){
|
||
|
|
return member->member.size();
|
||
|
|
}
|
||
|
|
|
||
|
|
void insert(int i){
|
||
|
|
member->member.insert(i);
|
||
|
|
}
|
||
|
|
|
||
|
|
void erase(int i){
|
||
|
|
member->member.erase(i);
|
||
|
|
}
|
||
|
|
|
||
|
|
void clear(){
|
||
|
|
member->member.clear();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
double cx;
|
||
|
|
double cy;
|
||
|
|
intset* member; // avoid copying of this set !!!!
|
||
|
|
bool forbidden;
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
class ClusterD_LocalInfo{
|
||
|
|
public:
|
||
|
|
|
||
|
|
/*
|
||
|
|
~Constructor~
|
||
|
|
|
||
|
|
Creates a new localinfo for the cluster[_]d operator.
|
||
|
|
The complete clustering is done here.
|
||
|
|
|
||
|
|
*/
|
||
|
|
ClusterD_LocalInfo(Points* pts, CcReal* eps){
|
||
|
|
env = 0;
|
||
|
|
currentInitialCluster = 0;
|
||
|
|
currentInitialPos = 0;
|
||
|
|
pts->Copy();
|
||
|
|
if(pts->IsDefined() && eps->IsDefined()){
|
||
|
|
this->defined = true;
|
||
|
|
this->eps = eps->GetRealval();
|
||
|
|
this->eps2 = this->eps*this->eps;
|
||
|
|
this->pts = pts;
|
||
|
|
size = pts->Size();
|
||
|
|
icluster = new int[size];
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
icluster[i] = -1; // not assigned to an initial cluster
|
||
|
|
}
|
||
|
|
|
||
|
|
fcluster = 0;
|
||
|
|
computeEnv();
|
||
|
|
currentCNum = 0;
|
||
|
|
origPos = 0;
|
||
|
|
} else {
|
||
|
|
this->defined = 0;
|
||
|
|
this->pts = 0;
|
||
|
|
this->eps = 0.0;
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~Destructor~
|
||
|
|
|
||
|
|
|
||
|
|
Destroys this instance.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
~ClusterD_LocalInfo(){
|
||
|
|
pts->DeleteIfAllowed();
|
||
|
|
if(icluster){
|
||
|
|
delete[] icluster;
|
||
|
|
icluster=0;
|
||
|
|
}
|
||
|
|
if(fcluster){
|
||
|
|
delete[] fcluster;
|
||
|
|
fcluster=0;
|
||
|
|
}
|
||
|
|
if(env){
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
if(env[i] ){
|
||
|
|
delete env[i];
|
||
|
|
env[i] = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
delete[] env;
|
||
|
|
env = 0;
|
||
|
|
}
|
||
|
|
if(currentInitialCluster){
|
||
|
|
delete currentInitialCluster;
|
||
|
|
}
|
||
|
|
if(origPos){
|
||
|
|
delete [] origPos;
|
||
|
|
origPos = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
Returns the next cluster or 0 if no more clusters exist.
|
||
|
|
|
||
|
|
*/
|
||
|
|
Points* getNext(int i){
|
||
|
|
return getNextFinalCluster(i);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
bool defined;
|
||
|
|
Points* pts;
|
||
|
|
double eps;
|
||
|
|
double eps2; // the square of eps
|
||
|
|
int* icluster; // initial cluster
|
||
|
|
int* fcluster; // final cluster
|
||
|
|
int size;
|
||
|
|
|
||
|
|
set<int>** env;
|
||
|
|
|
||
|
|
int currentInitialPos;
|
||
|
|
int currentCNum;
|
||
|
|
set<int>* currentInitialCluster;
|
||
|
|
|
||
|
|
map<int, set<int> > currentFinalCluster;
|
||
|
|
map<int, set<int> >::iterator currentFinalPos;
|
||
|
|
|
||
|
|
int* origPos;
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
~computeEnv~
|
||
|
|
|
||
|
|
This function computes the epsilon environment for each point
|
||
|
|
contained in pts;
|
||
|
|
|
||
|
|
*/
|
||
|
|
void computeEnv(){
|
||
|
|
|
||
|
|
env = new set<int>*[size];
|
||
|
|
|
||
|
|
mmrtree::Rtree<2> tree(10,30);
|
||
|
|
Point p;
|
||
|
|
double min1[2];
|
||
|
|
double max1[2];
|
||
|
|
|
||
|
|
/* insert all contained points into an R- tree */
|
||
|
|
for(int i=0;i<pts->Size();i++){
|
||
|
|
pts->Get(i,p);
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min1[0] = x - FACTOR;
|
||
|
|
min1[1] = y - FACTOR;
|
||
|
|
max1[0] = x + FACTOR;
|
||
|
|
max1[1] = y - FACTOR;
|
||
|
|
Rectangle<2> box(true,min1,max1);
|
||
|
|
tree.insert(box, i);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/* compute environments using filter /refine */
|
||
|
|
for(int i=0;i<pts->Size();i++){
|
||
|
|
env[i] = new set<int>();
|
||
|
|
pts->Get(i,p);
|
||
|
|
set<long> cands;
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min1[0] = x-eps;
|
||
|
|
min1[1] = y-eps;
|
||
|
|
max1[0] = x+eps;
|
||
|
|
max1[1] = y+eps;
|
||
|
|
Rectangle<2> searchbox(true, min1,max1);
|
||
|
|
tree.findAll(searchbox,cands);
|
||
|
|
set<long>::iterator it;
|
||
|
|
for(it = cands.begin(); it!=cands.end(); it++){
|
||
|
|
Point p2;
|
||
|
|
int cand = static_cast<int>(*it);
|
||
|
|
pts->Get(cand,p2);
|
||
|
|
if(qdist(p,p2)<eps2){
|
||
|
|
env[i]->insert(cand);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
~qdist~
|
||
|
|
|
||
|
|
~qdist~ computes the square of the distance between two points
|
||
|
|
|
||
|
|
*/
|
||
|
|
double qdist(const Point& p1, const Point& p2){
|
||
|
|
double x1 = p1.GetX();
|
||
|
|
double x2 = p2.GetX();
|
||
|
|
double y1 = p1.GetY();
|
||
|
|
double y2 = p2.GetY();
|
||
|
|
double dx = x1-x2;
|
||
|
|
double dy = y1-y2;
|
||
|
|
return dx*dx + dy*dy;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
double qdist(double x1, double y1, double x2, double y2){
|
||
|
|
double dx = x1-x2;
|
||
|
|
double dy = y1-y2;
|
||
|
|
return dx*dx + dy*dy;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~getNextInitialCluster~
|
||
|
|
|
||
|
|
This function extracts the next initial clsuter from the original point.
|
||
|
|
If the poinst value is exhausted, null is returned. The caller of this
|
||
|
|
function has to delete the returned value.
|
||
|
|
|
||
|
|
*/
|
||
|
|
set<int>* getNextInitialCluster(){
|
||
|
|
|
||
|
|
while(currentInitialPos<size &&
|
||
|
|
icluster[currentInitialPos]>=0){
|
||
|
|
currentInitialPos++;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
if(currentInitialPos>=size){ // set exhausted
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
set<int>* res = new set<int>();
|
||
|
|
|
||
|
|
|
||
|
|
set<int> seed(*env[currentInitialPos]);
|
||
|
|
|
||
|
|
while(!seed.empty()){
|
||
|
|
int p = *(seed.begin());
|
||
|
|
if(icluster[p]<0){ // point is "free"
|
||
|
|
icluster[p] = 1; // mark as assigned
|
||
|
|
res->insert(p);
|
||
|
|
for(set<int>::iterator it=env[p]->begin(); it!= env[p]->end(); it++){
|
||
|
|
if(icluster[*it]<0){
|
||
|
|
seed.insert(*it);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
seed.erase(p);
|
||
|
|
}
|
||
|
|
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~computeFinalCluster0~
|
||
|
|
|
||
|
|
Divides the points value ps into a set of clusters. The
|
||
|
|
first number is set to cnum. Cnum is increased automatically.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void computeFinalCluster0(){
|
||
|
|
|
||
|
|
currentFinalCluster.clear();
|
||
|
|
|
||
|
|
const int size = currentInitialCluster->size();
|
||
|
|
|
||
|
|
// store all edges into an vector and
|
||
|
|
// build a single cluster for each point
|
||
|
|
Point p_i;
|
||
|
|
Point p_j;
|
||
|
|
|
||
|
|
vector<Edge> edges;
|
||
|
|
|
||
|
|
int tmpfcluster[size];
|
||
|
|
if(origPos){
|
||
|
|
delete[] origPos;
|
||
|
|
}
|
||
|
|
|
||
|
|
origPos = new int[size];
|
||
|
|
|
||
|
|
// initialize origPos
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
origPos[i] = -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
map<int,int> rev;
|
||
|
|
|
||
|
|
|
||
|
|
int pos = 0;
|
||
|
|
set<int>::iterator it1;
|
||
|
|
|
||
|
|
|
||
|
|
for(it1=currentInitialCluster->begin();
|
||
|
|
it1!=currentInitialCluster->end();
|
||
|
|
it1++){
|
||
|
|
rev[*it1] = pos;
|
||
|
|
pos++;
|
||
|
|
}
|
||
|
|
|
||
|
|
pos = 0;
|
||
|
|
for(it1=currentInitialCluster->begin();
|
||
|
|
it1!=currentInitialCluster->end();
|
||
|
|
it1++){
|
||
|
|
origPos[pos] = *it1;
|
||
|
|
tmpfcluster[pos] = pos; //each point builds its own cluster
|
||
|
|
set<int> s;
|
||
|
|
s.insert(pos);
|
||
|
|
currentFinalCluster[pos] = s;
|
||
|
|
pos++;
|
||
|
|
set<int>* e = env[*it1];
|
||
|
|
set<int>::iterator it2;
|
||
|
|
Point p1;
|
||
|
|
pts->Get(*it1,p1);
|
||
|
|
for(it2=e->begin();it2!=e->end();it2++){
|
||
|
|
Point p2;
|
||
|
|
pts->Get(*it2,p2);
|
||
|
|
double dist = qdist(p1,p2);
|
||
|
|
int src = rev[*it1];
|
||
|
|
int dest = rev[*it2];
|
||
|
|
if(src<dest){
|
||
|
|
edges.push_back(Edge(src,dest,dist));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// sort the vector of edges
|
||
|
|
sort(edges.begin(),edges.end());
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
vector<Edge>::iterator it;
|
||
|
|
// insert edges and connect clusters
|
||
|
|
for(it = edges.begin(); it!=edges.end();it++){
|
||
|
|
Edge e = *it;
|
||
|
|
int c1 = tmpfcluster[e.src];
|
||
|
|
int c2 = tmpfcluster[e.dest];
|
||
|
|
if(c1!=c2){ // otherwise the points are already in the same cluster
|
||
|
|
// compute the maximum distance between points of c1
|
||
|
|
// and points of c2
|
||
|
|
set<int>::iterator it1,it2;
|
||
|
|
set<int> s1 = currentFinalCluster[c1];
|
||
|
|
set<int> s2 = currentFinalCluster[c2];
|
||
|
|
double dist = 0.0;
|
||
|
|
for(it1=s1.begin(); it1!=s1.end() && dist <= eps2; it1++){
|
||
|
|
pts->Get(origPos[*it1],p_i);
|
||
|
|
for(it2=s2.begin();it2!=s2.end() && dist <= eps2; it2++){
|
||
|
|
pts->Get(origPos[*it2],p_j);
|
||
|
|
dist = max(dist,qdist(p_i,p_j));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if(dist <=eps2){ // build the union of the clusters
|
||
|
|
for(it2=s2.begin();it2!=s2.end();it2++){
|
||
|
|
s1.insert(*it2);
|
||
|
|
tmpfcluster[*it2] = c1;
|
||
|
|
}
|
||
|
|
currentFinalCluster.erase(c1);
|
||
|
|
currentFinalCluster.erase(c2);
|
||
|
|
currentFinalCluster[c1] = s1;
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
currentFinalPos = currentFinalCluster.begin();
|
||
|
|
|
||
|
|
|
||
|
|
// at this
|
||
|
|
|
||
|
|
|
||
|
|
currentFinalPos = currentFinalCluster.begin();
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
Another method for dividing a group into several subgroups.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void insertPoint(vector<cCluster>& clusters, int pos){
|
||
|
|
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
|
||
|
|
// first cluster
|
||
|
|
if(clusters.empty()){
|
||
|
|
cCluster cl;
|
||
|
|
cl.cx = x;
|
||
|
|
cl.cy = y;
|
||
|
|
cl.forbidden = false;
|
||
|
|
cl.insert(pos);
|
||
|
|
clusters.push_back(cl);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
int index = -1;
|
||
|
|
double bestDist=eps2+10;
|
||
|
|
|
||
|
|
for(unsigned int i=0;i<clusters.size();i++){
|
||
|
|
if(!clusters[i].forbidden){
|
||
|
|
double dist = qdist(x,y,clusters[i].cx,clusters[i].cy);
|
||
|
|
if(index < 0 ||
|
||
|
|
((dist < bestDist) ||
|
||
|
|
((dist == bestDist) &&
|
||
|
|
(clusters[index].size() > clusters[i].size())))){
|
||
|
|
index = i;
|
||
|
|
bestDist = dist;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if((index < 0) || (bestDist > eps2)){ // no cluster found, produce a new one
|
||
|
|
cCluster cl;
|
||
|
|
cl.cx = x;
|
||
|
|
cl.cy = y;
|
||
|
|
cl.forbidden = false;
|
||
|
|
cl.insert(pos);
|
||
|
|
clusters.push_back(cl);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// insert the point into the best cluster
|
||
|
|
clusters[index].cx = (clusters[index].cx *
|
||
|
|
clusters[index].size() + x ) /
|
||
|
|
(clusters[index].size() +1);
|
||
|
|
|
||
|
|
clusters[index].cy = (clusters[index].cy *
|
||
|
|
clusters[index].size() + y ) /
|
||
|
|
(clusters[index].size() +1 );
|
||
|
|
|
||
|
|
clusters[index].insert(pos);
|
||
|
|
|
||
|
|
// check whether some points are outside the cluster
|
||
|
|
Point pc(true,clusters[index].cx, clusters[index].cy);
|
||
|
|
|
||
|
|
set<int> removed;
|
||
|
|
set<int>::iterator it;
|
||
|
|
Point p2;
|
||
|
|
for(it = clusters[index].begin();
|
||
|
|
it != clusters[index].end();
|
||
|
|
it++){
|
||
|
|
pts->Get(*it,p2);
|
||
|
|
if(qdist(pc,p2) > eps2){
|
||
|
|
removed.insert(*it);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// remove 'bad' points from the cluster
|
||
|
|
double sx = 0.0;
|
||
|
|
double sy = 0.0;
|
||
|
|
for(it = removed.begin(); it!=removed.end();it++){
|
||
|
|
Point p3;
|
||
|
|
pts->Get(*it,p3);
|
||
|
|
sx += p3.GetX();
|
||
|
|
sy += p3.GetY();
|
||
|
|
clusters[index].erase(*it);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
// we avoid to correct the center again because of this correction
|
||
|
|
// further points may go out from the cluster
|
||
|
|
// thiy may lead to long running times
|
||
|
|
|
||
|
|
// correct the center
|
||
|
|
clusters[index].cx = ((clusters[index].cx *
|
||
|
|
(clusters[index].member.size() + removed.size())) -
|
||
|
|
sx) / clusters[index].member.size();
|
||
|
|
clusters[index].cy = ((clusters[index].cy *
|
||
|
|
(clusters[index].member.size() + removed.size())) -
|
||
|
|
sy) / clusters[index].member.size();
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
|
||
|
|
// inserts the points again
|
||
|
|
clusters[index].forbidden = true;
|
||
|
|
for(it = removed.begin(); it!=removed.end();it++){
|
||
|
|
insertPoint(clusters,*it);
|
||
|
|
}
|
||
|
|
clusters[index].forbidden = false;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
void insertPointSimple(vector<cCluster>& clusters, int pos){
|
||
|
|
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
|
||
|
|
// clusters has to be non-empty
|
||
|
|
|
||
|
|
int index = 0;
|
||
|
|
int size = clusters.size();
|
||
|
|
double bestDist = qdist(x,y,clusters[index].cx,clusters[index].cy);
|
||
|
|
|
||
|
|
for(int i=1;i<size;i++){
|
||
|
|
double dist = qdist(x,y,clusters[i].cx,clusters[i].cy);
|
||
|
|
if( (dist < bestDist) ||
|
||
|
|
((dist == bestDist) &&
|
||
|
|
(clusters[index].size() > clusters[i].size()))){
|
||
|
|
index = i;
|
||
|
|
bestDist = dist;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if(!(bestDist <= eps2)){
|
||
|
|
cout << "Error a point was not assigned to a cluster " << endl;
|
||
|
|
cout << "The position of the point was " << pos << endl;
|
||
|
|
cout << "The dist is " << bestDist << endl;
|
||
|
|
cout << "Allowed dist " << eps2 << endl;
|
||
|
|
cout << "Best cluster " << index << endl;
|
||
|
|
cout << "#cluster " << size << endl;
|
||
|
|
assert(false);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
clusters[index].insert(pos);
|
||
|
|
}
|
||
|
|
|
||
|
|
void computeFinalCluster1(){
|
||
|
|
|
||
|
|
currentFinalCluster.clear();
|
||
|
|
|
||
|
|
vector<cCluster> currentCluster;
|
||
|
|
|
||
|
|
// insert the points
|
||
|
|
set<int>::iterator it1;
|
||
|
|
for( it1 = currentInitialCluster->begin();
|
||
|
|
it1 != currentInitialCluster->end();
|
||
|
|
it1++){
|
||
|
|
insertPoint(currentCluster,*it1);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
// by the movement of the center, some
|
||
|
|
// clusters may have unhandsome overlappings
|
||
|
|
// we will redistribute the points located in such
|
||
|
|
// overlappings
|
||
|
|
|
||
|
|
// redistribute the points
|
||
|
|
vector<cCluster>::iterator it3;
|
||
|
|
for(it3 = currentCluster.begin(); it3 != currentCluster.end(); it3++){
|
||
|
|
((*it3)).clear();
|
||
|
|
}
|
||
|
|
|
||
|
|
for( it1 = currentInitialCluster->begin();
|
||
|
|
it1 != currentInitialCluster->end();
|
||
|
|
it1++){
|
||
|
|
insertPointSimple(currentCluster,*it1);
|
||
|
|
}
|
||
|
|
|
||
|
|
// copy the result into currentFinalCluster
|
||
|
|
int i = 0;
|
||
|
|
vector<cCluster>::iterator it2;
|
||
|
|
for(it2 = currentCluster.begin();
|
||
|
|
it2 != currentCluster.end();
|
||
|
|
it2++){
|
||
|
|
currentFinalCluster[i++] = (*it2).member->member;
|
||
|
|
}
|
||
|
|
|
||
|
|
currentFinalPos = currentFinalCluster.begin();
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
Returns the next cluster
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
|
||
|
|
Points* getNextFinalCluster(int method=0){
|
||
|
|
if(!defined) return 0;
|
||
|
|
if(!currentInitialCluster){
|
||
|
|
currentInitialCluster = getNextInitialCluster();
|
||
|
|
if(currentInitialCluster){
|
||
|
|
switch(method){
|
||
|
|
case 0: computeFinalCluster0(); break;
|
||
|
|
case 1: computeFinalCluster1(); break;
|
||
|
|
default: assert(false);
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
} else if(currentFinalPos == currentFinalCluster.end()){
|
||
|
|
delete currentInitialCluster;
|
||
|
|
currentInitialCluster = getNextInitialCluster();
|
||
|
|
if(currentInitialCluster){
|
||
|
|
switch(method){
|
||
|
|
case 0: computeFinalCluster0(); break;
|
||
|
|
case 1: computeFinalCluster1(); break;
|
||
|
|
default: assert(false);
|
||
|
|
}
|
||
|
|
} else{
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
set<int> cs = (*currentFinalPos).second;
|
||
|
|
Points* res = new Points(cs.size());
|
||
|
|
res->StartBulkLoad();
|
||
|
|
Point p;
|
||
|
|
set<int>::iterator it;
|
||
|
|
for(it = cs.begin();it!=cs.end();it++){
|
||
|
|
switch(method){
|
||
|
|
case 0 : pts->Get(origPos[*it],p); break;
|
||
|
|
case 1 : pts->Get(*it,p); break;
|
||
|
|
default : assert(false);
|
||
|
|
}
|
||
|
|
(*res) += p;
|
||
|
|
}
|
||
|
|
res->EndBulkLoad();
|
||
|
|
currentFinalPos++;
|
||
|
|
|
||
|
|
return res;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
template<int i>
|
||
|
|
int cluster_dFun (Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s) {
|
||
|
|
switch(message){
|
||
|
|
case OPEN : {
|
||
|
|
Points* pts = static_cast<Points*>(args[0].addr);
|
||
|
|
CcReal* eps = static_cast<CcReal*>(args[1].addr);
|
||
|
|
local.setAddr(new ClusterD_LocalInfo(pts,eps));
|
||
|
|
return 0;
|
||
|
|
} case REQUEST : {
|
||
|
|
if(local.addr==0){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
ClusterD_LocalInfo* linfo =
|
||
|
|
static_cast<ClusterD_LocalInfo*>(local.addr);
|
||
|
|
|
||
|
|
Points* hasNext = linfo->getNext(i);
|
||
|
|
result.setAddr(hasNext);
|
||
|
|
if(hasNext){
|
||
|
|
return YIELD;
|
||
|
|
} else {
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
} case CLOSE : {
|
||
|
|
if(local.addr!=0){
|
||
|
|
delete static_cast<ClusterD_LocalInfo*>(local.addr);
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return -1; // should never be reached
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.3 Value Mapping for cluster[_]f
|
||
|
|
|
||
|
|
Cluster[_]f implements the same slgoithms as cluster[_]e.
|
||
|
|
The difference is that this algoritms avoids the preprocessing step
|
||
|
|
to avoid large allocations of memory. Instead of that, an R-tree is
|
||
|
|
used to manage the centers.
|
||
|
|
|
||
|
|
1.3.1 LocalInfo
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
class ClusterF_LocalInfo{
|
||
|
|
|
||
|
|
public:
|
||
|
|
/*
|
||
|
|
~Constructor~
|
||
|
|
|
||
|
|
Here, the complete work is done.
|
||
|
|
|
||
|
|
*/
|
||
|
|
ClusterF_LocalInfo(Points* pts, CcReal* eps){
|
||
|
|
this->pts = static_cast<Points*>(pts->Copy());
|
||
|
|
if(!pts->IsDefined() || !eps->IsDefined()){
|
||
|
|
defined = false;
|
||
|
|
cluster = 0;
|
||
|
|
size = 0;
|
||
|
|
} else {
|
||
|
|
defined = true;
|
||
|
|
size = pts->Size();
|
||
|
|
this->eps = max(FACTOR,eps->GetRealval());
|
||
|
|
this->eps2 = this->eps * this->eps;
|
||
|
|
cluster = new vector<cCluster>();
|
||
|
|
computeCluster();
|
||
|
|
pos=0;
|
||
|
|
no_cluster = cluster->size();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~Destructor~
|
||
|
|
|
||
|
|
Destroys this structure.
|
||
|
|
|
||
|
|
*/
|
||
|
|
~ClusterF_LocalInfo(){
|
||
|
|
pts->DeleteIfAllowed();
|
||
|
|
if(cluster){
|
||
|
|
delete cluster;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~getNext~
|
||
|
|
|
||
|
|
This function returns the next cluster as a points value.
|
||
|
|
|
||
|
|
*/
|
||
|
|
Points* getNext(){
|
||
|
|
if(!defined){
|
||
|
|
return 0;
|
||
|
|
} else if (pos>= no_cluster){
|
||
|
|
return 0;
|
||
|
|
} else {
|
||
|
|
Points* res = new Points((*cluster)[pos].size());
|
||
|
|
res->StartBulkLoad();
|
||
|
|
set<int>::iterator it;
|
||
|
|
Point p;
|
||
|
|
for(it = (*cluster)[pos].begin();
|
||
|
|
it != (*cluster)[pos].end(); it++){
|
||
|
|
pts->Get(*it,p);
|
||
|
|
(*res) += p;
|
||
|
|
}
|
||
|
|
res->EndBulkLoad();
|
||
|
|
pos++;
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
2 Private Part
|
||
|
|
|
||
|
|
|
||
|
|
*/
|
||
|
|
private:
|
||
|
|
/*
|
||
|
|
~data members~
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
Points* pts; // source points value
|
||
|
|
double eps; // maximum deviation
|
||
|
|
double eps2; // = eps * eps
|
||
|
|
bool defined; // true if the inputs are correct
|
||
|
|
int size; // = pts->Size()
|
||
|
|
int pos; // the current cluster
|
||
|
|
int no_cluster; // number of clusters
|
||
|
|
vector<cCluster >* cluster; // the clusters
|
||
|
|
|
||
|
|
/*
|
||
|
|
~qdist~
|
||
|
|
|
||
|
|
Returns the square of the Euclidean distance between the points defined
|
||
|
|
by (x1, y1) and (x2,y2).
|
||
|
|
|
||
|
|
*/
|
||
|
|
double qdist(const double x1,const double y1,
|
||
|
|
const double x2, const double y2) const{
|
||
|
|
double dx = x2-x1;
|
||
|
|
double dy = y2-y1;
|
||
|
|
return dx*dx + dy * dy;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~indexOfNearestCluster~
|
||
|
|
|
||
|
|
Computes the index of the cluster whose center is closest to p within
|
||
|
|
the cluster vector. If all clusters have a minimum distance larger than
|
||
|
|
eps, -1 will be returned. The r-tree is used as index and has to contain
|
||
|
|
all cluster centers.
|
||
|
|
|
||
|
|
*/
|
||
|
|
int indexOfNearestCluster(const mmrtree::Rtree<2>& tree, const Point& p) const{
|
||
|
|
int res = -1;
|
||
|
|
double bestDist = eps2 + 10.0;
|
||
|
|
double min[2];
|
||
|
|
double max[2];
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
min[0] = x - eps - FACTOR;
|
||
|
|
min[1] = y - eps - FACTOR;
|
||
|
|
max[0] = x + eps + FACTOR;
|
||
|
|
max[1] = y + eps + FACTOR;
|
||
|
|
Rectangle<2> searchbox(true,min,max);
|
||
|
|
set<long> cands;
|
||
|
|
tree.findAll(searchbox,cands);
|
||
|
|
set<long>::iterator it;
|
||
|
|
for(it = cands.begin(); it != cands.end(); it++){
|
||
|
|
cCluster c = cluster->at(*it);
|
||
|
|
double d = qdist(c.cx,c.cy,x,y);
|
||
|
|
if(d <= eps2 && d < bestDist && !c.forbidden){
|
||
|
|
bestDist = d;
|
||
|
|
res = *it;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~insertPointSimple~
|
||
|
|
|
||
|
|
This function assigns the point at position ~pos~ in the ~pts~ member
|
||
|
|
variable to the nearest cluster w.r.t. its center. The cluster itself remains unchanged, i.e.
|
||
|
|
the center is not moved.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void insertPointSimple(const mmrtree::Rtree<2>& tree, const int pos){
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
int index = indexOfNearestCluster(tree,p);
|
||
|
|
assert(index >= 0);
|
||
|
|
(*cluster)[index].insert(pos);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~insertPoint~
|
||
|
|
|
||
|
|
Inserts a point to the nearest cluster. If no appropriate cluster is found, a new one
|
||
|
|
is created. The center of the cluster is changed to be the center of all points
|
||
|
|
within the cluster including that one at positon ~pos~. Thereby, some points of the
|
||
|
|
cluster may exceed the maximum distance to the cluster's center. Such points are
|
||
|
|
reinserted recursively but the source cluster is locked.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void insertPoint(mmrtree::Rtree<2>& tree, const int pos){
|
||
|
|
Point p;
|
||
|
|
pts->Get(pos,p);
|
||
|
|
int index = indexOfNearestCluster(tree,p);
|
||
|
|
double min[2];
|
||
|
|
double max[2];
|
||
|
|
double x = p.GetX();
|
||
|
|
double y = p.GetY();
|
||
|
|
if(index <0){ // no appropriate cluster found, build a new one
|
||
|
|
cCluster c;
|
||
|
|
c.cx = x;
|
||
|
|
c.cy = y;
|
||
|
|
c.insert(pos);
|
||
|
|
c.forbidden = false;
|
||
|
|
cluster->push_back(c);
|
||
|
|
min[0] = x - FACTOR;
|
||
|
|
min[1] = y - FACTOR;
|
||
|
|
max[0] = x + FACTOR;
|
||
|
|
max[1] = y + FACTOR;
|
||
|
|
Rectangle<2> box(true,min,max);
|
||
|
|
tree.insert(box,cluster->size()-1);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
(*cluster)[index].insert(pos);
|
||
|
|
double cx = (*cluster)[index].cx;
|
||
|
|
double cy = (*cluster)[index].cy;
|
||
|
|
int s = (*cluster)[index].size();
|
||
|
|
(*cluster)[index].cx = ( (cx * (s - 1.0) + x) / s);
|
||
|
|
(*cluster)[index].cy = ( (cy * (s - 1.0) + y) / s);
|
||
|
|
|
||
|
|
min[0] = cx - FACTOR;
|
||
|
|
min[1] = cy - FACTOR;
|
||
|
|
max[0] = cx + FACTOR;
|
||
|
|
max[1] = cy + FACTOR;
|
||
|
|
Rectangle<2> erasebox(true,min,max);
|
||
|
|
tree.erase(erasebox, index);
|
||
|
|
|
||
|
|
min[0] = (*cluster)[index].cx - FACTOR;
|
||
|
|
min[1] = (*cluster)[index].cy - FACTOR;
|
||
|
|
max[0] = (*cluster)[index].cx + FACTOR;
|
||
|
|
max[1] = (*cluster)[index].cy + FACTOR;
|
||
|
|
Rectangle<2> newCenter(true,min,max);
|
||
|
|
tree.insert(newCenter,index);
|
||
|
|
|
||
|
|
repairClusterAt(index,tree);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~repairClusterAt~
|
||
|
|
|
||
|
|
Removes all points exceeding the maximum allowed distance to the cluster's
|
||
|
|
center from the cluster at ~index~. Such points are reinserted.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void repairClusterAt(const int index, mmrtree::Rtree<2>& tree){
|
||
|
|
(*cluster)[index].forbidden = true;
|
||
|
|
double cx = (*cluster)[index].cx;
|
||
|
|
double cy = (*cluster)[index].cy;
|
||
|
|
set<int> wrong;
|
||
|
|
set<int>::iterator it;
|
||
|
|
Point p;
|
||
|
|
for(it = (*cluster)[index].begin();
|
||
|
|
it!= (*cluster)[index].end(); it++){
|
||
|
|
pts->Get(*it,p);
|
||
|
|
double d = qdist(cx,cy, p.GetX(),p.GetY());
|
||
|
|
if(d>eps2){
|
||
|
|
wrong.insert(*it);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for( it=wrong.begin(); it!=wrong.end(); it++){
|
||
|
|
(*cluster)[index].erase(*it);
|
||
|
|
}
|
||
|
|
|
||
|
|
for( it=wrong.begin(); it!=wrong.end(); it++){
|
||
|
|
insertPoint(tree,*it);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
(*cluster)[index].forbidden = false;
|
||
|
|
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
~computeCluster~
|
||
|
|
|
||
|
|
This function divides a points value into a set of clusters.
|
||
|
|
|
||
|
|
|
||
|
|
*/
|
||
|
|
void computeCluster(){
|
||
|
|
mmrtree::Rtree<2> tree(3,6);
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
insertPoint(tree, i);
|
||
|
|
}
|
||
|
|
|
||
|
|
// redistribute the points
|
||
|
|
for(unsigned int i=0;i<cluster->size();i++){
|
||
|
|
(*cluster)[i].clear();
|
||
|
|
}
|
||
|
|
|
||
|
|
for(int i=0;i<size;i++){
|
||
|
|
insertPointSimple(tree, i);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.3.2 The actual Value Mapping
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
int cluster_fFun (Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s) {
|
||
|
|
switch(message){
|
||
|
|
case OPEN : {
|
||
|
|
Points* pts = static_cast<Points*>(args[0].addr);
|
||
|
|
CcReal* eps = static_cast<CcReal*>(args[1].addr);
|
||
|
|
local.setAddr(new ClusterF_LocalInfo(pts,eps));
|
||
|
|
return 0;
|
||
|
|
} case REQUEST : {
|
||
|
|
if(local.addr==0){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
ClusterF_LocalInfo* linfo =
|
||
|
|
static_cast<ClusterF_LocalInfo*>(local.addr);
|
||
|
|
|
||
|
|
Points* hasNext = linfo->getNext();
|
||
|
|
result.setAddr(hasNext);
|
||
|
|
if(hasNext){
|
||
|
|
return YIELD;
|
||
|
|
} else {
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
} case CLOSE : {
|
||
|
|
if(local.addr!=0){
|
||
|
|
delete static_cast<ClusterF_LocalInfo*>(local.addr);
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return -1; // should never be reached
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
6.1 Specification Strings for Operator cluster\_a
|
||
|
|
|
||
|
|
*/
|
||
|
|
const string cluster_aSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" "
|
||
|
|
"\"Example\" ) "
|
||
|
|
"( <text>points -> points</text--->"
|
||
|
|
"<text>cluster_a ( _ )</text--->"
|
||
|
|
"<text>Find cluster for"
|
||
|
|
" points with standard cluster parameters. "
|
||
|
|
"[ADVICE: Do not use.]</text--->"
|
||
|
|
"<text>query cluster_a (Kneipen)</text--->"
|
||
|
|
") )";
|
||
|
|
/*
|
||
|
|
6.2 Specification Strings for Operator cluster\_b
|
||
|
|
|
||
|
|
*/
|
||
|
|
const string cluster_bSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" "
|
||
|
|
"\"Example\" ) "
|
||
|
|
"( <text>points -> points</text--->"
|
||
|
|
"<text>_ cluster_b [_, _] </text--->"
|
||
|
|
"<text>Find cluster for"
|
||
|
|
" points with parameters MinPts (1) and Eps (2). "
|
||
|
|
"[ADVICE: Do not use.]</text--->"
|
||
|
|
"<text>query Kneipen cluster_b[5,200]</text--->"
|
||
|
|
") )";
|
||
|
|
/*
|
||
|
|
6.3 Specification string for Operator cluster\_c
|
||
|
|
|
||
|
|
*/
|
||
|
|
const string cluster_cSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" "
|
||
|
|
"\"Example\" ) "
|
||
|
|
"( <text>points x int x real -> stream(points)</text--->"
|
||
|
|
"<text> _ cluster_c [ minpts, epsilon ] </text--->"
|
||
|
|
"<text>For a point set given as a points value, compute the clusters using "
|
||
|
|
"the DBSCAN algorithm with parameters minPts (minimum number of points "
|
||
|
|
"forming a cluster core) and epsilon (maximum distance between points in "
|
||
|
|
"a cluster core). "
|
||
|
|
"Returns a stream of points values (point sets) representing the clusters. "
|
||
|
|
"</text--->"
|
||
|
|
"<text>query Kneipen cluster_c[5,200.0] count</text--->"
|
||
|
|
") )";
|
||
|
|
|
||
|
|
const string cluster_gSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" "
|
||
|
|
"\"Example\" ) "
|
||
|
|
"( <text>points x int x real -> stream(points)</text--->"
|
||
|
|
"<text> _ cluster_g [ minpts, epsilon] </text--->"
|
||
|
|
"<text>For a point set given as a points value, compute the clusters using "
|
||
|
|
"the DBSCAN algorithm with parameters minPts (minimum number of points "
|
||
|
|
"forming a cluster core) and epsilon (maximum distance between points in "
|
||
|
|
"a cluster core). [Alternative implementation?]."
|
||
|
|
"Returns a stream of points values (point sets) representing the clusters. "
|
||
|
|
"<text>query Kneipen cluster_g[5,200.0] count</text--->"
|
||
|
|
") )";
|
||
|
|
/*
|
||
|
|
6.4 Specification string for Operator cluster\_d
|
||
|
|
|
||
|
|
*/
|
||
|
|
const string cluster_dSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" "
|
||
|
|
"\"Example\" ) "
|
||
|
|
"( <text>points x real -> stream(points)</text--->"
|
||
|
|
"<text> _ cluster_d [ maxdist ] </text--->"
|
||
|
|
"<text>For a point set given as a points value, compute the clusters "
|
||
|
|
"using the a distance-based clustering algorithm with parameter maxdist "
|
||
|
|
"(maximum distance between points within a cluster. Returns a stream of "
|
||
|
|
"points values (point sets) representing the clusters. "
|
||
|
|
"<text>query Kneipen cluster_b[200.0] count</text--->"
|
||
|
|
") )";
|
||
|
|
/*
|
||
|
|
7.1 Operator cluster\_a
|
||
|
|
|
||
|
|
*/
|
||
|
|
Operator cluster_a (
|
||
|
|
"cluster_a", //name
|
||
|
|
cluster_aSpec, //specification
|
||
|
|
cluster_aFun, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
PointsTypeMapA //type mapping
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
7.2 Operator cluster\_b
|
||
|
|
|
||
|
|
*/
|
||
|
|
Operator cluster_b (
|
||
|
|
"cluster_b", //name
|
||
|
|
cluster_bSpec, //specification
|
||
|
|
cluster_bFun, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
PointsTypeMapB //type mapping
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
7.3 Operator cluster[_]c
|
||
|
|
|
||
|
|
*/
|
||
|
|
Operator cluster_c (
|
||
|
|
"cluster_c", //name
|
||
|
|
cluster_cSpec, //specification
|
||
|
|
cluster_cFun, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
cluster_c_TM //type mapping
|
||
|
|
);
|
||
|
|
|
||
|
|
Operator cluster_g (
|
||
|
|
"cluster_g", //name
|
||
|
|
cluster_gSpec, //specification
|
||
|
|
cluster_gFun, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
cluster_c_TM //type mapping // equals to c
|
||
|
|
);
|
||
|
|
|
||
|
|
/*
|
||
|
|
7.4 Operator cluster[_]d
|
||
|
|
|
||
|
|
*/
|
||
|
|
Operator cluster_d (
|
||
|
|
"cluster_d", //name
|
||
|
|
cluster_dSpec, //specification
|
||
|
|
cluster_dFun<0>, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
cluster_d_TM //type mapping
|
||
|
|
);
|
||
|
|
|
||
|
|
Operator cluster_e (
|
||
|
|
"cluster_e", //name
|
||
|
|
cluster_dSpec, //specification
|
||
|
|
cluster_dFun<1>, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
cluster_d_TM //type mapping
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
Operator cluster_f (
|
||
|
|
"cluster_f", //name
|
||
|
|
cluster_dSpec, //specification
|
||
|
|
cluster_fFun, //value mapping
|
||
|
|
Operator::SimpleSelect, //trivial selection function
|
||
|
|
cluster_d_TM //type mapping
|
||
|
|
);
|
||
|
|
|
||
|
|
/*
|
||
|
|
10.1 class DBscan (cluster algorithm)
|
||
|
|
|
||
|
|
*/
|
||
|
|
DBscan::DBscan() // Constructor
|
||
|
|
{
|
||
|
|
//Default Constructor - does nothing
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
10.2 Function FindClusters
|
||
|
|
|
||
|
|
This function is being called through the 'Parameter-' functions,
|
||
|
|
which set up Eps and MinPts.
|
||
|
|
|
||
|
|
It loops through each point and passes it on to the 'ExpandCluster' function
|
||
|
|
if the point has not been classified as a cluster member yet.
|
||
|
|
|
||
|
|
*/
|
||
|
|
int DBscan::FindClusters(double** cpoints, int cpoints_size){
|
||
|
|
int point; // counter
|
||
|
|
float percentage = 0.0;
|
||
|
|
int anzahl = 0;
|
||
|
|
|
||
|
|
|
||
|
|
// iterate all cpoints
|
||
|
|
for(point=0; point < cpoints_size; point++)
|
||
|
|
if (cpoints[point][2] == 0.0) // not yet classified as cluster member
|
||
|
|
if(!ExpandCluster(cpoints, cpoints_size, point))
|
||
|
|
if ( RTFlag::isActive("ClusterText:Trace") ) {
|
||
|
|
cmsg.file() << "Cluster: Problem with ExpandCluster " << endl;
|
||
|
|
cmsg.send();}
|
||
|
|
|
||
|
|
// calculate percentage of cluster-cpoints
|
||
|
|
for(point=0; point < cpoints_size; point++)
|
||
|
|
if(cpoints[point][2] > 0) {
|
||
|
|
percentage++;
|
||
|
|
anzahl++;
|
||
|
|
}
|
||
|
|
|
||
|
|
percentage = percentage/(float)point;
|
||
|
|
|
||
|
|
if ( RTFlag::isActive("ClusterText:Trace") ) {
|
||
|
|
cmsg.file() << "Cluster: Percentage: " << (percentage*100) << endl
|
||
|
|
<< " EPS: " << Eps << endl << " MinPts: " << MinPts << endl;
|
||
|
|
cmsg.send();}
|
||
|
|
|
||
|
|
return (int)(percentage*100); // return percentage of points in cluster
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
10.3 Function Parameter\_Standard
|
||
|
|
|
||
|
|
This function only sets MinPts and Eps to the \#DEFINE values and
|
||
|
|
calls the function 'FindClusters'.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
int DBscan::Parameter_Standard(double** cpoints,int cpoints_size) {
|
||
|
|
MinPts = MINIMUMPTS_DEF;
|
||
|
|
Eps = EPS_DEF;
|
||
|
|
|
||
|
|
int res;
|
||
|
|
// call FindClusters
|
||
|
|
res = FindClusters(cpoints,cpoints_size);
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
10.4 Function Parameter\_UserDefined
|
||
|
|
|
||
|
|
Similar to the function 'Parameter\_Standard', but sets MinPts and
|
||
|
|
Eps to the parameter values.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
int DBscan::Parameter_UserDefined(double** cpoints,int cpoints_size,
|
||
|
|
int MinPts_user, int Eps_user){
|
||
|
|
MinPts = MinPts_user;
|
||
|
|
Eps = Eps_user;
|
||
|
|
|
||
|
|
int res;
|
||
|
|
|
||
|
|
res = FindClusters(cpoints,cpoints_size);
|
||
|
|
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
10.5 Function ExpandCluster
|
||
|
|
|
||
|
|
This function checks, if the passed point is member of a cluster and - if so -
|
||
|
|
checks for further members. For this, the function 'Search' is being used.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
bool DBscan::ExpandCluster(double** cpoints,int cpoints_size,int point)
|
||
|
|
{
|
||
|
|
int* seeds;
|
||
|
|
int a = 0;
|
||
|
|
|
||
|
|
seeds = (int*) malloc((cpoints_size) * sizeof(int));
|
||
|
|
seeds[0]=0; // none yet
|
||
|
|
|
||
|
|
Search(cpoints, cpoints_size, point, seeds);
|
||
|
|
|
||
|
|
// seeds: seeds[0] = number of seeds,
|
||
|
|
// seeds[1...] = ('cpoints'-) numbers of Eps-Points
|
||
|
|
|
||
|
|
if(seeds[0] < MinPts) // no core point - seeds[0]
|
||
|
|
// contains number of points in Eps
|
||
|
|
{ cpoints[point][3] = -1.0; // no core point
|
||
|
|
free(seeds);
|
||
|
|
return true;}
|
||
|
|
|
||
|
|
else // core point
|
||
|
|
{ while(a < seeds[0])
|
||
|
|
{ a++;
|
||
|
|
point = seeds[a];
|
||
|
|
if (cpoints[point][3] < 1.0) // no core point
|
||
|
|
Search(cpoints, cpoints_size, point, seeds);
|
||
|
|
} // end while
|
||
|
|
|
||
|
|
for(a=1; a<seeds[0]+1; a++) //all seeds are member of cluster
|
||
|
|
cpoints[seeds[a]][2]=1.0;
|
||
|
|
|
||
|
|
free(seeds);
|
||
|
|
return true;
|
||
|
|
} // end if
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
10.6 Function CopyToResult
|
||
|
|
|
||
|
|
This function copies the resulting cluster members back into the 'points'
|
||
|
|
value provided by the QueryProcessor.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void DBscan::CopyToResult(Word* args, Word& result, int message, Word& local,
|
||
|
|
Supplier s, double** cpoints)
|
||
|
|
{
|
||
|
|
Points* ps = ((Points*)args[0].addr);
|
||
|
|
result = qp->ResultStorage( s ); // Query Processor provided Points
|
||
|
|
//instance for the result
|
||
|
|
// copy x/y from cluster array back into result (only cluster members)
|
||
|
|
((Points*)result.addr)->Clear();
|
||
|
|
((Points*)result.addr)->StartBulkLoad();
|
||
|
|
for(int a=0; a < ps->Size(); a++)
|
||
|
|
if(cpoints[a][2] > 0) // cluster member
|
||
|
|
{ Point p(true, cpoints[a][0], cpoints[a][1]);
|
||
|
|
//((Points*)result.addr)->InsertPt(p);
|
||
|
|
(*((Points*)result.addr)) += p;
|
||
|
|
} // end if / end for
|
||
|
|
((Points*)result.addr)->EndBulkLoad();
|
||
|
|
// clean up, go home
|
||
|
|
free(cpoints);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
10.7 Function Search
|
||
|
|
|
||
|
|
This function searches for all points in the 'Eps'-area of each given
|
||
|
|
point and returns these.
|
||
|
|
This function has so far been implemented only as a SLOW each-by-each search.
|
||
|
|
Alternative methods (R[*]-Tree, ...) should be implemented.
|
||
|
|
|
||
|
|
*/
|
||
|
|
void DBscan::Search(double** cpoints,int cpoints_size, int point, int* seeds){
|
||
|
|
// return EPS-environment of point in seeds
|
||
|
|
// ... could be implemented as an EFFICIENT r*-tree
|
||
|
|
|
||
|
|
int a;
|
||
|
|
//int b = seeds[0]+1;
|
||
|
|
int c;
|
||
|
|
int seedcounter = 0;
|
||
|
|
bool check = true;
|
||
|
|
double min1, min2, dist;
|
||
|
|
|
||
|
|
for(a=0; a < cpoints_size; a++){
|
||
|
|
min1 = (double)cpoints[point][0]-(double)cpoints[a][0];
|
||
|
|
min2 = (double)cpoints[point][1]-(double)cpoints[a][1];
|
||
|
|
dist = sqrt(pow(min1, 2.0) + pow(min2, 2.0));
|
||
|
|
|
||
|
|
if(dist <= (double)Eps && point != a)
|
||
|
|
{
|
||
|
|
check = true;
|
||
|
|
for(c=1; c < seeds[0]+1 && check == true; c++)
|
||
|
|
{
|
||
|
|
if(seeds[c] == a)
|
||
|
|
check = false;
|
||
|
|
// don't put the same point into seeds more than once
|
||
|
|
else
|
||
|
|
check = true;
|
||
|
|
} // end for
|
||
|
|
if(check) {
|
||
|
|
// add a (in Eps) ... seed not yet included
|
||
|
|
seeds[0]++;
|
||
|
|
seeds[seeds[0]] = a;
|
||
|
|
} // end if(check...
|
||
|
|
seedcounter++; // used for core-point classification
|
||
|
|
} // end if(dist ...
|
||
|
|
} // end for
|
||
|
|
if (seedcounter > Eps)
|
||
|
|
cpoints[point][3] = 1.0; // core-point classification
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
8.1 Creating the cluster algebra
|
||
|
|
|
||
|
|
*/
|
||
|
|
class ClusterAlgebra : public Algebra
|
||
|
|
{
|
||
|
|
public:
|
||
|
|
ClusterAlgebra() : Algebra()
|
||
|
|
{
|
||
|
|
AddOperator ( &cluster_a );
|
||
|
|
AddOperator ( &cluster_b );
|
||
|
|
AddOperator ( &cluster_c );
|
||
|
|
AddOperator ( &cluster_g );
|
||
|
|
AddOperator ( &cluster_d );
|
||
|
|
AddOperator ( &cluster_e );
|
||
|
|
AddOperator ( &cluster_f );
|
||
|
|
|
||
|
|
///// tracefile /////
|
||
|
|
// if ( RTFlag::isActive("ClusterText:Trace") ) {
|
||
|
|
// cmsg.file() << "Cluster: Constructor " << endl;
|
||
|
|
// cmsg.send();
|
||
|
|
// }
|
||
|
|
///// tracefile end /////
|
||
|
|
}
|
||
|
|
~ClusterAlgebra() {};
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
} // end of namespace clusteralg
|
||
|
|
|
||
|
|
/*
|
||
|
|
9.1 Initialization (Standard)
|
||
|
|
|
||
|
|
Each algebra module needs an initialization function. The algebra manager
|
||
|
|
has a reference to this function if this algebra is included in the list
|
||
|
|
of required algebras, thus forcing the linker to include this module.
|
||
|
|
|
||
|
|
The algebra manager invokes this function to get a reference to the instance
|
||
|
|
of the algebra class and to provide references to the global nested list
|
||
|
|
container (used to store constructor, type, operator and object information)
|
||
|
|
and to the query processor.
|
||
|
|
|
||
|
|
The function has a C interface to make it possible to load the algebra
|
||
|
|
dynamically at runtime.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
extern "C"
|
||
|
|
Algebra*
|
||
|
|
InitializeClusterAlgebra( NestedList* nlRef,
|
||
|
|
QueryProcessor* qpRef
|
||
|
|
)
|
||
|
|
{
|
||
|
|
nl = nlRef;
|
||
|
|
qp = qpRef;
|
||
|
|
|
||
|
|
///// tracefile ////
|
||
|
|
// if ( RTFlag::isActive("ClusterText:Trace") ) {
|
||
|
|
// cmsg.file() << "Cluster: InitializeClusterAlgebra "
|
||
|
|
// << endl; cmsg.send(); }
|
||
|
|
|
||
|
|
return (new clusteralg::ClusterAlgebra());
|
||
|
|
}
|