Files
secondo/Algebras/DBScan/DBScanAlgebra.cpp
2026-01-23 17:03:45 +08:00

1007 lines
29 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//[_] [\_]
//characters [1] verbatim: [$] [$]
//characters [2] formula: [$] [$]
//characters [3] capital: [\textsc{] [}]
//characters [4] teletype: [\texttt{] [}]
1 Source file "DBScanAlgebra.cpp"[4]
March-October 2014, Natalie Jaeckel
1.1 Overview
This file contains the implementation of the DBScanAlgebra.
1.2 Includes
*/
#include "SetOfObjectsM.h"
#include "SetOfObjectsR.h"
#include "DBScanGen.h"
#include "NestedList.h"
#include "QueryProcessor.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "Stream.h"
#include "StandardTypes.h"
#include "Algebras/Spatial/SpatialAlgebra.h"
#include "DistFunction.h"
#include <iostream>
#include <string>
#include <algorithm>
#include <utility>
extern NestedList* nl;
extern QueryProcessor* qp;
using namespace std;
namespace clusterdbscanalg
{
/*
1 Operator dbScanR
This operator realizes the db scan for rectangles
using an r-tree as index structure.
1.1 Type Mapping
*/
ListExpr dbscanRTM( ListExpr args ) {
if(nl->ListLength(args)!=2) {
ErrorReporter::ReportError("two elements expected. "
"Stream and argument list");
return nl->TypeError();
}
ListExpr stream = nl->First(args);
if(!Stream<Tuple>::checkType(nl->First(args))) {
return listutils::typeError("first argument is not stream(Tuple)");
}
ListExpr arguments = nl->Second(args);
if(nl->ListLength(arguments)!=4) {
ErrorReporter::ReportError("non conform list of cluster attribute, "
"attribute name as cluster ID, Eps and MinPts");
return nl->TypeError();
}
if(!CcReal::checkType(nl->Third(arguments))) {
return listutils::typeError("no numeric Eps");
}
if(!CcInt::checkType(nl->Fourth(arguments))) {
return listutils::typeError("no numeric MinPts");
}
//Check the cluster attribute name, if it is in the tuple
ListExpr attrList = nl->Second(nl->Second(stream));
ListExpr attrType;
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
int found = FindAttribute(attrList, attrName, attrType);
if(found == 0) {
ErrorReporter::ReportError("Attribute "
+ attrName + " is no member of the tuple");
return nl->TypeError();
}
if( !Rectangle<2>::checkType(attrType)
&& !Rectangle<3>::checkType(attrType)
&& !Rectangle<4>::checkType(attrType)
&& !Rectangle<8>::checkType(attrType) ) {
return listutils::typeError("Attribute " + attrName + " not of type "
+ Rectangle<2>::BasicType() + ", "
+ Rectangle<3>::BasicType() + ", "
+ Rectangle<4>::BasicType() + " or "
+ Rectangle<8>::BasicType() );
}
ListExpr typeList;
// check functions
ListExpr name = nl->Second(arguments);
string errormsg;
if(!listutils::isValidAttributeName(name, errormsg)){
return listutils::typeError(errormsg);
}//endif
string namestr = nl->SymbolValue(name);
int pos = FindAttribute(attrList,namestr,typeList);
if(pos!=0) {
ErrorReporter::ReportError("Attribute "+ namestr +
" already member of the tuple");
return nl->TypeError();
}//endif
//Copy attrlist to newattrlist
attrList = nl->Second(nl->Second(stream));
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
ListExpr lastlistn = newAttrList;
attrList = nl->Rest(attrList);
while(!(nl->IsEmpty(attrList))) {
lastlistn = nl->Append(lastlistn,nl->First(attrList));
attrList = nl->Rest(attrList);
}
lastlistn = nl->Append(lastlistn,
(nl->TwoElemList(name, nl->SymbolAtom(CcInt::BasicType()) )));
lastlistn = nl->Append(lastlistn,
nl->TwoElemList(nl->SymbolAtom("Visited"),
nl->SymbolAtom(CcBool::BasicType()) ));
lastlistn = nl->Append(lastlistn,
nl->TwoElemList(nl->SymbolAtom("IsCore"),
nl->SymbolAtom(CcBool::BasicType()) ));
return nl->ThreeElemList(
nl->SymbolAtom(Symbol::APPEND())
,nl->OneElemList(nl->IntAtom(found-1))
,nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM())
,nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType())
,newAttrList)));
}
/*
1.2 Value mapping
*/
template <int dim>
int dbscanRVM1(Word* args, Word& result, int message, Word& local, Supplier s)
{
typedef clusterdbscanalg::RectDist<dim> Dist;
typedef dbscan::DBScanGen<
dbscan::SetOfObjectsR <dim,Dist >,
Dist >
dbscanclass;
dbscanclass* li = (dbscanclass*) local.addr;
switch (message)
{
case OPEN :
{
// arg0 : stream
Word stream = args[0];
Supplier supplier = qp->GetSupplier(args[1].addr, 2);
Word argument;
qp->Request(supplier, argument);
CcReal* eps = ((CcReal*)argument.addr);
supplier = qp->GetSupplier(args[1].addr, 3);
qp->Request(supplier, argument);
CcInt* minPts = ((CcInt*)argument.addr);
int cid = ((CcInt*)args[2].addr)->GetValue();
ListExpr resultType = GetTupleResultType( s );
ListExpr tt = ( nl->Second( resultType ) );
if(li) {
delete li;
local.addr=0;
}
size_t maxMem = (qp->GetMemorySize(s) * 1024);
if(!eps->IsDefined() || eps->GetValue() < 0){
return 0;
}
if(!minPts->IsDefined() || minPts->GetValue() < 0){
return 0;
}
Dist dist;
local.addr = new dbscanclass(stream,tt,
eps->GetValue(),
minPts->GetValue(),
maxMem,
cid, dist);
return 0;
}
case REQUEST:
result.addr= li?li->next():0;
return result.addr?YIELD:CANCEL;
case CLOSE:{
if(li){
delete li;
local.addr=0;
}
}
}
return 0;
}
/*
1.3 Struct ~dbscanInfoRT~
*/
struct dbscanRInfo : OperatorInfo
{
dbscanRInfo() : OperatorInfo()
{
name = "dbscanR";
signature = "stream(Tuple) x Id x Id x real x int -> stream(Tuple)";
syntax = "_ dbscanR [_, _, _, _]";
meaning = "Detects cluster from a given stream using MMR-Tree as index "
"structure. The first parameter has to be a bbox, the second parameter is "
"the name for the cluster ID attribute, the third paramter is eps and "
"the fourth parameter is MinPts. A tuple stream will be returned but the "
"tuple will have additional attributes as IsCore, Visited and ClusterID";
example = "query Kneipen feed extend[B : bbox(.GeoData)] dbscanR "
"[B, No, 1000.0, 5] consume";
}
};
/*
1.4 Selection method ~dbscanRSel~
*/
int dbscanRSel(ListExpr args)
{
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
ListExpr attrType;
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
int found = FindAttribute(attrList, attrName, attrType);
assert(found > 0);
if(Rectangle<2>::checkType(attrType)) {
return 0;
} else if(Rectangle<3>::checkType(attrType)) {
return 1;
} else if(Rectangle<4>::checkType(attrType)) {
return 2;
} else if(Rectangle<8>::checkType(attrType)) {
return 3;
}
return -1;
}
/*
1.5 Value Mapping Array
*/
ValueMapping dbscanRVM[] =
{
dbscanRVM1<2>,
dbscanRVM1<3>,
dbscanRVM1<4>,
dbscanRVM1<8>
};
/*
2 Operator dbScanM
*/
/*
2.1 Type mapping
*/
ListExpr dbscanMTM(ListExpr args) {
if (nl->ListLength(args) != 2) {
ErrorReporter::ReportError("two elements expected: stream and arg list");
return nl->TypeError();
}
ListExpr stream = nl->First(args);
if(!Stream<Tuple>::checkType(nl->First(args))) {
return listutils::typeError("first argument is not a tuple stream");
}
ListExpr arguments = nl->Second(args);
if (nl->ListLength(arguments) != 4 && nl->ListLength(arguments) != 5) {
ErrorReporter::ReportError("non conform list of cluster attribute, "
"attribute name as cluster ID, Eps, MinPts (and Geoid)");
return nl->TypeError();
}
if (!CcReal::checkType(nl->Third(arguments))) {
return listutils::typeError("no numeric Eps");
}
if (!CcInt::checkType(nl->Fourth(arguments))) {
return listutils::typeError("no numeric MinPts");
}
if (nl->HasLength(arguments, 5)) {
if (!Geoid::checkType(nl->Fifth(arguments))) {
return listutils::typeError("last argument is not a geoid");
}
}
// Check if the cluster attribute name exists in the tuple
ListExpr attrList = nl->Second(nl->Second(stream));
ListExpr attrType;
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
int found = FindAttribute(attrList, attrName, attrType);
if (found == 0) {
ErrorReporter::ReportError("Attribute " + attrName + " does not exist");
return nl->TypeError();
}
if( !CcInt::checkType(attrType)
&& !CcReal::checkType(attrType)
&& !Point::checkType(attrType)
&& !CcString::checkType(attrType)
&& !Picture::checkType(attrType)
&& !temporalalgebra::MPoint::checkType(attrType)
&& !temporalalgebra::CUPoint::checkType(attrType)
&& !temporalalgebra::CMPoint::checkType(attrType) ) {
return listutils::typeError("Attribute " + attrName + " not of type "
+ CcInt::BasicType() + ", "
+ CcReal::BasicType() + ", "
+ Point::BasicType() + ", "
+ CcString::BasicType() + ", "
+ Picture::BasicType() + ", "
+ temporalalgebra::MPoint::BasicType() + ", "
+ temporalalgebra::CUPoint::BasicType() + ", "
+ temporalalgebra::CMPoint::BasicType() );
}
ListExpr typeList;
// check clusterNo attribute name
ListExpr name = nl->Second(arguments);
string errormsg;
if (!listutils::isValidAttributeName(name, errormsg)) {
return listutils::typeError(errormsg);
}
string namestr = nl->SymbolValue(name);
int pos = FindAttribute(attrList, namestr, typeList);
if (pos != 0) {
ErrorReporter::ReportError("Attribute "+ namestr + " already exists");
return nl->TypeError();
}
//Copy attrlist to newattrlist
attrList = nl->Second(nl->Second(stream));
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
ListExpr lastlistn = newAttrList;
attrList = nl->Rest(attrList);
while (!(nl->IsEmpty(attrList))) {
lastlistn = nl->Append(lastlistn,nl->First(attrList));
attrList = nl->Rest(attrList);
}
lastlistn = nl->Append(lastlistn,
(nl->TwoElemList(name, nl->SymbolAtom(CcInt::BasicType()))));
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("Visited"),
nl->SymbolAtom(CcBool::BasicType())));
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("IsCore"),
nl->SymbolAtom(CcBool::BasicType())));
return nl->ThreeElemList(
nl->SymbolAtom(Symbol::APPEND()), nl->OneElemList(nl->IntAtom(found - 1)),
nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),
newAttrList)));
}
/*
1.2 Value mapping method ~dbscanM~
*/
template <class T, class DistComp>
int dbscanMVM1(Word* args, Word& result, int message, Word& local, Supplier s) {
typedef dbscan::DBScanGen<dbscan::SetOfObjectsM<DistComp, T>, DistComp>
dbscanclass;
dbscanclass* li = (dbscanclass*)local.addr;
switch (message) {
case OPEN : {
Word stream = args[0];
Supplier supplier = qp->GetSupplier(args[1].addr, 2);
Word argument;
qp->Request(supplier, argument);
CcReal* eps = ((CcReal*)argument.addr);
supplier = qp->GetSupplier(args[1].addr, 3);
qp->Request(supplier, argument);
CcInt* minPts = ((CcInt*)argument.addr);
int cid = ((CcInt*)args[2].addr)->GetValue();
ListExpr resultType = GetTupleResultType(s);
ListExpr tt = (nl->Second(resultType));
if (li) {
delete li;
local.addr = 0;
}
size_t maxMem = (qp->GetMemorySize(s) * 1024);
if (!eps->IsDefined() || eps->GetValue() < 0) {
return 0;
}
if (!minPts->IsDefined() || minPts->GetValue() < 0) {
return 0;
}
DistComp dist;
local.addr = new dbscanclass(stream, tt, eps->GetValue(),
minPts->GetValue(), maxMem, cid, dist);
return 0;
}
case REQUEST: {
result.addr = li ? li->next() : 0;
return result.addr ? YIELD : CANCEL;
}
case CLOSE: {
if (li) {
delete li;
local.addr=0;
}
}
}
return 0;
}
/*
1.3 Struct ~dbscanMInfo~
*/
struct dbscanMInfo : OperatorInfo
{
dbscanMInfo() : OperatorInfo()
{
name = "dbscanM";
signature = "stream(Tuple) x IDENT x IDENT x real x int (x geoid) -> "
"stream(Tuple)";
syntax = "_ dbscanM [_, _, _, _]";
meaning = "Detects cluster from a given stream using an MMM-Tree as index "
"structure. The first parameter is the attribute to cluster, the second "
"parameter is the name for the cluster ID attribute, the third parameter "
"is eps and the fourth parameter is MinPts. A tuple stream will be returned "
"but the tuple will have additional attributes as Visited and the clusterID";
example = "query Kneipen feed dbscanM[GeoData, CID, 1000.0, 5] consume";
}
};
/*
1.4 Selection Function
*/
int dbscanMSel(ListExpr args) {
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
ListExpr attrType;
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
int found = FindAttribute(attrList, attrName, attrType);
assert(found > 0);
bool useGeoid = nl->HasLength(nl->Second(args), 5);
if (CcInt::checkType(attrType)) {
return 0;
}
if (CcReal::checkType(attrType)) {
return 1;
}
if (Point::checkType(attrType)) {
return (useGeoid ? 3 : 2);
}
if (CcString::checkType(attrType)) {
return 4;
}
if (Picture::checkType(attrType)) {
return 5;
}
if (temporalalgebra::MPoint::checkType(attrType)) {
return (useGeoid ? 7 : 6);
}
if (temporalalgebra::CUPoint::checkType(attrType)) {
return (useGeoid ? 9 : 8);
}
if (temporalalgebra::CMPoint::checkType(attrType)) {
return (useGeoid ? 11 : 10);
}
return -1;
};
/*
Value mapping array ~dbscanMVM~
*/
ValueMapping dbscanMVM[] =
{
dbscanMVM1<CcInt, IntDist>,
dbscanMVM1<CcReal, RealDist>,
dbscanMVM1<Point, PointDist<false> >,
dbscanMVM1<Point, PointDist<true> >,
dbscanMVM1<CcString, StringDist>,
dbscanMVM1<Picture, PictureDist>,
dbscanMVM1<temporalalgebra::MPoint, MPointDist<false> >,
dbscanMVM1<temporalalgebra::MPoint, MPointDist<true> >,
dbscanMVM1<temporalalgebra::CUPoint, CUPointDist<false> >,
dbscanMVM1<temporalalgebra::CUPoint, CUPointDist<true> >,
dbscanMVM1<temporalalgebra::CMPoint, CMPointDist<false> >,
dbscanMVM1<temporalalgebra::CMPoint, CMPointDist<true> >
};
/*
3 Operator ~dbscanF~
This operator works the same as the ~dbScanM~ operator.
The difference is that this operator allows the user to define
its own distance function instead of using a predefined one.
While the ~dbScanM~ operator has a small set of attribute
data type which can be processed, the ~dbscanF~ operator
is able to process arbitrary attribute data types.
3.1 Type Mapping
*/
ListExpr dbscanFTM( ListExpr args ) {
if(nl->ListLength(args)!=6) {
return listutils::typeError("two elements expected. "
"Stream and argument list");
}
ListExpr stream = nl->First(args);
if(!Stream<Tuple>::checkType(stream)) {
return listutils::typeError("first argument is not stream(Tuple)");
}
ListExpr arguments = nl->Rest(args);
if(!CcReal::checkType(nl->Third(arguments))) {
return listutils::typeError("no numeric Eps");
}
if(!CcInt::checkType(nl->Fourth(arguments))) {
return listutils::typeError("no numeric MinPts");
}
ListExpr fun = nl->Fifth(arguments);
if(!listutils::isMap<2>(fun)) {
return listutils::typeError("arg4 is not a map with 2 arguments");
}
if( !nl->Equal(nl->Second(fun), nl->Third(fun))
|| ( !CcReal::checkType(nl->Fourth(fun))
&& !CcInt::checkType(nl->Fourth(fun)) )) {
return listutils::typeError("fun is not of type: T x T -> {int, real} ");
}
//Check the cluster attribute name, if it is in the tuple
ListExpr attrList = nl->Second(nl->Second(stream));
ListExpr attrType;
ListExpr clusterAttr = nl->First(arguments);
if(nl->AtomType(clusterAttr)!=SymbolType){
return listutils::typeError("First arg of the parameter list "
+ nl->ToString(clusterAttr) + " is not "
"a valid attribute name");
}
string attrName = nl->SymbolValue(clusterAttr);
int found = FindAttribute(attrList, attrName, attrType);
if(found == 0) {
return listutils::typeError("Attribute "
+ attrName + " is no member of the tuple");
}
if(!nl->Equal(attrType, nl->Second(fun))) {
return listutils::typeError("Clustervalue type and function value type"
"different");
}
ListExpr typeList;
// check functions
ListExpr name = nl->Second(arguments);
string errormsg;
if(!listutils::isValidAttributeName(name, errormsg)){
return listutils::typeError(errormsg);
}//endif
string namestr = nl->SymbolValue(name);
int pos = FindAttribute(attrList,namestr,typeList);
if(pos!=0) {
return listutils::typeError("Attribute "+ namestr +
" already member of the tuple");
}//endif
pos = FindAttribute(attrList,"Visited",typeList);
if(pos!=0) {
return listutils::typeError("Attribute Visited"
" already member of the tuple");
}//endif
pos = FindAttribute(attrList,"IsCore",typeList);
if(pos!=0) {
return listutils::typeError("Attribute IsCore"
" already member of the tuple");
}//endif
//Copy attrlist to newattrlist
attrList = nl->Second(nl->Second(stream));
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
ListExpr lastlistn = newAttrList;
attrList = nl->Rest(attrList);
while(!(nl->IsEmpty(attrList))) {
lastlistn = nl->Append(lastlistn,nl->First(attrList));
attrList = nl->Rest(attrList);
}
lastlistn = nl->Append(lastlistn,
(nl->TwoElemList(name, nl->SymbolAtom(CcInt::BasicType()) )));
lastlistn = nl->Append(lastlistn,
nl->TwoElemList(nl->SymbolAtom("Visited"),
nl->SymbolAtom(CcBool::BasicType()) ));
lastlistn = nl->Append(lastlistn,
nl->TwoElemList(nl->SymbolAtom("IsCore"),
nl->SymbolAtom(CcBool::BasicType()) ));
return nl->ThreeElemList(
nl->SymbolAtom(Symbol::APPEND())
,nl->OneElemList(nl->IntAtom(found-1))
,nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM())
,nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType())
,newAttrList)));
}
/*
3.1 Value Mapping
The template argument specifies the result type of the distance function and
may be CcInt or CcReal
*/
template <class R>
int dbscanFVM1(Word* args, Word& result,
int message, Word& local, Supplier s) {
typedef CustomDist<Attribute*,R> DistComp;
typedef dbscan::DBScanGen<
dbscan::SetOfObjectsM<DistComp,Attribute >,
DistComp >
dbscanclass;
dbscanclass* li = (dbscanclass*) local.addr;
switch (message)
{
case OPEN :
{
// arg0 : stream
Word stream = args[0];
// arg1 : name of the attribute to cluster
// arg2 : name for cluster-ID
CcReal* eps = (CcReal*)args[3].addr;
CcInt* minPts = (CcInt*)args[4].addr;
int cid = ((CcInt*)args[6].addr)->GetValue();
ListExpr resultType = GetTupleResultType( s );
ListExpr tt = ( nl->Second( resultType ) );
if(li) {
delete li;
local.addr=0;
}
size_t maxMem = (qp->GetMemorySize(s) * 1024);
if(!eps->IsDefined() || eps->GetValue() < 0){
return 0;
}
if(!minPts->IsDefined() || minPts->GetValue() < 0){
return 0;
}
DistComp dist;
Supplier supplier2 = args[5].addr;
dist.initialize(qp, supplier2);
local.addr = new dbscanclass(stream,tt,
eps->GetValue(),
minPts->GetValue(),
maxMem,
cid, dist);
return 0;
}
case REQUEST:
result.addr= li?li->next():0;
return result.addr?YIELD:CANCEL;
case CLOSE:{
if(li){
delete li;
local.addr=0;
}
}
}
return 0;
}
/*
1.3 Struct ~dbscanFInfo~
*/
struct dbscanFInfo : OperatorInfo
{
dbscanFInfo() : OperatorInfo()
{
name = "dbscanF";
signature = "stream(tuple) x ID x ID x real x int x fun -> stream(tuple)";
syntax = "_ dbscanF [_, _, _, _, fun]";
meaning = "Detects cluster from a given stream using an MMM-Tree as index "
"structure. The first argument is a tuple stream containing the data. "
"The second argument is the attribute to cluster, the third "
"argument is the name for the cluster ID attribute, the fourth argument "
"is eps and the fifth argument is MinPts. "
"The last argument is a function mapping from a pair of cluster attributes "
" to the distance between their values."
"A tuple stream will be returned "
"that will have additional attributes as visited "
"and clusterID. ";
example = "query plz feed dbscanF[PLZ, CID, 1000.0, 5, "
"fun(i1: int, i2: int) abs(i1 - i2)] consume";
}
};
/*
1.4 Selection method
*/
int dbscanFSel(ListExpr args)
{
ListExpr funResult= nl->Fourth(nl->Sixth(args));
if(CcInt::checkType(funResult)) return 0;
if(CcReal::checkType(funResult)) return 1;
return -1;
};
/*
1.5. Value mapping array
*/
ValueMapping dbscanFVM[] =
{
dbscanFVM1<CcInt>,
dbscanFVM1<CcReal>
};
/*
3 Operator ~dbscanTF~
This operator works similarly to the ~dbScanF~ operator.
The difference is that this operator allows the user to define
a distance function based on tuples, including the possibility to process
several attributes of a tuple.
3.1 Type Mapping
*/
ListExpr dbscanTFTM(ListExpr args) {
if (nl->ListLength(args) != 5) {
return listutils::typeError("5 arguments expected");
}
if(!Stream<Tuple>::checkType(nl->First(args))) {
return listutils::typeError("first argument is not a tuple stream");
}
ListExpr arguments = nl->Rest(args);
if (!CcReal::checkType(nl->Second(arguments))) {
return listutils::typeError("eps must have type real");
}
if (!CcInt::checkType(nl->Third(arguments))) {
return listutils::typeError("MinPts must have type int");
}
ListExpr fun = nl->Fourth(arguments);
if (!listutils::isMap<2>(fun)) {
return listutils::typeError("function with two arguments required");
}
if (!nl->Equal(nl->Second(fun), nl->Third(fun)) ||
(!CcReal::checkType(nl->Fourth(fun)) && !CcInt::checkType(nl->Fourth(fun)))) {
return listutils::typeError("fun is not of type: T x T -> {int, real}");
}
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
ListExpr typeList;
ListExpr cidname = nl->First(arguments);
string errormsg;
if (!listutils::isValidAttributeName(cidname, errormsg)) {
return listutils::typeError(errormsg);
}
string cidnamestr = nl->SymbolValue(cidname);
int pos = FindAttribute(attrList, cidnamestr, typeList);
if (pos!=0) {
return listutils::typeError("Attribute "+ cidnamestr + " already exists");
}
pos = FindAttribute(attrList, "Visited", typeList);
if (pos!=0) {
return listutils::typeError("Attribute Visited already exists");
}
pos = FindAttribute(attrList, "IsCore", typeList);
if (pos!=0) {
return listutils::typeError("Attribute IsCore already exists");
}
// Copy attrlist to newattrlist
attrList = nl->Second(nl->Second(nl->First(args)));
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
ListExpr lastlistn = newAttrList;
attrList = nl->Rest(attrList);
while (!(nl->IsEmpty(attrList))) {
lastlistn = nl->Append(lastlistn, nl->First(attrList));
attrList = nl->Rest(attrList);
}
lastlistn = nl->Append(lastlistn, nl->TwoElemList(cidname,
nl->SymbolAtom(CcInt::BasicType())));
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("Visited"),
nl->SymbolAtom(CcBool::BasicType())));
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("IsCore"),
nl->SymbolAtom(CcBool::BasicType())));
return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),
newAttrList));
}
/*
3.1 Value Mapping
The template argument specifies the result type of the distance function and
may be CcInt or CcReal
*/
template <class R>
int dbscanTFVM(Word* args, Word& result, int message, Word& local, Supplier s) {
typedef TupleDist<R> TupleComp;
typedef dbscan::DBScanGen<dbscan::SetOfObjectsM<TupleComp, Tuple>,
TupleComp> dbscanclass;
dbscanclass* li = (dbscanclass*) local.addr;
switch (message) {
case OPEN : {
Word stream = args[0]; // stream
// args[1] = new argument name -> ignore
// args[2] = epsilon
// args[3] = minPts
// args[4] = function: tuple x tuple -> real
CcReal* eps = (CcReal*) args[2].addr;
CcInt* minPts = (CcInt*) args[3].addr;
if (!eps->IsDefined() || !minPts->IsDefined()) {
return 0;
}
if (eps->GetValue() < 0 || minPts->GetValue() < 0) {
return 0;
}
ListExpr tt = (nl->Second(GetTupleResultType(s)));
if (li) {
delete li;
local.addr = 0;
}
size_t maxMem = (qp->GetMemorySize(s) * 1024);
TupleComp dist;
Supplier supplier2 = args[4].addr;
dist.initialize(qp, supplier2);
local.addr = new dbscanclass(stream, tt, eps->GetValue(),
minPts->GetValue(), maxMem, -1, dist);
return 0;
}
case REQUEST: {
result.addr = li ? li->next() : 0;
return result.addr ? YIELD : CANCEL;
}
case CLOSE: {
if (li) {
delete li;
local.addr = 0;
}
}
}
return 0;
}
/*
1.3 Struct ~dbscanTFInfo~
*/
struct dbscanTFInfo : OperatorInfo {
dbscanTFInfo() : OperatorInfo() {
name = "dbscanTF";
signature = "stream(tuple) x Id x real x int x fun -> stream(tuple)";
syntax = "_ dbscanTF [_, _, _, fun]";
meaning = "Detects clusters in a given stream using MMM-Tree as index "
"structure. The first parameter is the name for the new attribute "
"containing the cluster number."
"The second parameter is eps and the third parameter is MinPts. The last "
"parameter is a function defining the distance between the incoming tuples."
" A tuple "
"stream will be returned but the tuple will have additional attributes as "
"visited and clusterID. ";
example = "query Kneipen feed dbscanTF[No, 500.0, 5, fun(t1: tuple((Name "
"string) (Strasse string) (GeoData point)), t2: tuple((Name string) ("
"Strasse string) (GeoData point))) distance(attr(t1, GeoData), attr(t2, "
"GeoData))] sortby[No] groupby[No ; C : group count] count";
}
};
/*
1.4 Selection method
*/
int dbscanTFSel(ListExpr args) {
ListExpr funResultType = nl->Fourth(nl->Fifth(args));
if (CcInt::checkType(funResultType)) return 0;
if (CcReal::checkType(funResultType)) return 1;
return -1;
};
/*
1.5. Value mapping array
*/
ValueMapping dbscanTFVMs[] = {dbscanTFVM<CcInt>, dbscanTFVM<CcReal>};
/*
Algebra class ~ClusterDBScanAlgebra~
*/
class ClusterDBScanAlgebra : public Algebra
{
public:
ClusterDBScanAlgebra() : Algebra()
{
AddOperator(dbscanRInfo(), dbscanRVM,
dbscanRSel,
dbscanRTM)->SetUsesMemory();
AddOperator(dbscanMInfo(), dbscanMVM,
dbscanMSel, dbscanMTM)->SetUsesMemory();
AddOperator(dbscanFInfo(),dbscanFVM,
dbscanFSel, dbscanFTM)->SetUsesMemory();
AddOperator(dbscanTFInfo(),dbscanTFVMs,
dbscanTFSel, dbscanTFTM)->SetUsesMemory();
}
~ClusterDBScanAlgebra() {};
};
}
extern "C"
Algebra* InitializeDBScanAlgebra( NestedList* nlRef, QueryProcessor* qpRef)
{
nl = nlRef;
qp = qpRef;
return (new clusterdbscanalg::ClusterDBScanAlgebra());
}