1178 lines
33 KiB
C++
1178 lines
33 KiB
C++
/*
|
|
----
|
|
This file is part of SECONDO.
|
|
|
|
Copyright (C) 2004, University in Hagen, Department of Computer Science,
|
|
Database Systems for New Applications.
|
|
|
|
SECONDO is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
SECONDO is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with SECONDO; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
----
|
|
|
|
//[_] [\_]
|
|
//characters [1] verbatim: [$] [$]
|
|
//characters [2] formula: [$] [$]
|
|
//characters [3] capital: [\textsc{] [}]
|
|
//characters [4] teletype: [\texttt{] [}]
|
|
|
|
1 Source file "OpticsAlgebra.cpp"[4]
|
|
|
|
March-October 2014, Marius Haug
|
|
|
|
1.1 Overview
|
|
|
|
This file contains the implementation of the OpticsAlgebra.
|
|
|
|
1.2 Includes
|
|
|
|
*/
|
|
#include "NestedList.h"
|
|
#include "QueryProcessor.h"
|
|
#include "Algebras/Relation-C++/RelationAlgebra.h"
|
|
#include "LogMsg.h"
|
|
#include "Stream.h"
|
|
#include "StandardTypes.h"
|
|
#include "Algebras/Spatial/SpatialAlgebra.h"
|
|
#include "Algebras/SymbolicTrajectory/Algorithms.h"
|
|
#include "DistFunction.h"
|
|
|
|
#include "OpticsGen.h"
|
|
#include "SetOfObjectsR.h"
|
|
#include "SetOfObjectsM.h"
|
|
|
|
#include "Symbols.h"
|
|
|
|
#include <limits.h>
|
|
#include <float.h>
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <algorithm>
|
|
|
|
extern NestedList* nl;
|
|
extern QueryProcessor* qp;
|
|
|
|
|
|
using namespace std;
|
|
|
|
namespace clusteropticsalg
|
|
{
|
|
|
|
/*
|
|
1 R-tree based variant
|
|
|
|
|
|
1.1 Type mapping method ~opticsRTM~
|
|
|
|
*/
|
|
ListExpr opticsRTM( ListExpr args )
|
|
{
|
|
if(nl->ListLength(args)!=2)
|
|
{
|
|
ErrorReporter::ReportError("two element expected");
|
|
return nl->TypeError();
|
|
}
|
|
|
|
ListExpr stream = nl->First(args);
|
|
|
|
if(!Stream<Tuple>::checkType(nl->First(args)))
|
|
{
|
|
return listutils::typeError("stream(Tuple) expected");
|
|
}
|
|
|
|
//Check the arguments
|
|
ListExpr arguments = nl->Second(args);
|
|
|
|
if(nl->ListLength(arguments)!=3)
|
|
{
|
|
ErrorReporter::ReportError("non conform list (three arguments expected)");
|
|
return nl->TypeError();
|
|
}
|
|
|
|
if(!CcReal::checkType(nl->Second(arguments)))
|
|
{
|
|
return listutils::typeError("arg2 is not a real (Eps)");
|
|
}
|
|
|
|
if(!CcInt::checkType(nl->Third(arguments)))
|
|
{
|
|
return listutils::typeError("arg3 is not an int (MinPts)");
|
|
}
|
|
|
|
//Check the attribute name, is it in the tuple list
|
|
ListExpr attrList = nl->Second(nl->Second(stream));
|
|
ListExpr attrType;
|
|
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
|
|
int found = FindAttribute(attrList, attrName, attrType);
|
|
if(found == 0)
|
|
{
|
|
ErrorReporter::ReportError("Attribute "
|
|
+ attrName + " is not a member of the tuple");
|
|
return nl->TypeError();
|
|
}
|
|
|
|
if( !Rectangle<2>::checkType(attrType)
|
|
&& !Rectangle<3>::checkType(attrType)
|
|
&& !Rectangle<4>::checkType(attrType)
|
|
&& !Rectangle<8>::checkType(attrType) )
|
|
{
|
|
return listutils::typeError("Attribute " + attrName + " not of type "
|
|
+ Rectangle<2>::BasicType() + ", "
|
|
+ Rectangle<3>::BasicType() + ", "
|
|
+ Rectangle<4>::BasicType() + " or "
|
|
+ Rectangle<8>::BasicType() );
|
|
}
|
|
|
|
//Copy attrlist to newattrlist
|
|
attrList = nl->Second(nl->Second(stream));
|
|
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
|
|
ListExpr lastlistn = newAttrList;
|
|
|
|
attrList = nl->Rest(attrList);
|
|
|
|
while(!(nl->IsEmpty(attrList)))
|
|
{
|
|
lastlistn = nl->Append(lastlistn,nl->First(attrList));
|
|
attrList = nl->Rest(attrList);
|
|
}
|
|
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("CoreDist")
|
|
,nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("ReachDist")
|
|
,nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("Processed")
|
|
,nl->SymbolAtom(CcBool::BasicType())));
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("Eps")
|
|
,nl->SymbolAtom(CcReal::BasicType())));
|
|
|
|
return nl->ThreeElemList(nl->SymbolAtom(Symbol::APPEND())
|
|
,nl->OneElemList(nl->IntAtom(found-1))
|
|
,nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM())
|
|
,nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType())
|
|
,newAttrList)));
|
|
}
|
|
|
|
/*
|
|
1.2 Value Mapping for the R-tree variant
|
|
|
|
*/
|
|
template <int dim>
|
|
int opticsRVM1(Word* args, Word& result, int message, Word& local, Supplier s)
|
|
{
|
|
|
|
typedef OpticsGen<Rectangle<dim>,
|
|
SetOfObjectsR<RectDist<dim>, dim>,
|
|
RectDist<dim> > opticsclass;
|
|
|
|
opticsclass* info = (opticsclass*) local.addr;
|
|
|
|
switch (message)
|
|
{
|
|
case OPEN :
|
|
{
|
|
if(info){
|
|
delete info;
|
|
local.addr=0;
|
|
}
|
|
|
|
//set the result type of the tuple
|
|
ListExpr resultType = nl->Second(GetTupleResultType(s));
|
|
//set the given eps
|
|
Supplier son = qp->GetSupplier(args[1].addr, 1);
|
|
Word argument;
|
|
qp->Request(son, argument);
|
|
CcReal* Eps = ((CcReal*) argument.addr);
|
|
if(!Eps->IsDefined()){
|
|
return 0;
|
|
}
|
|
double eps = Eps->GetValue();
|
|
if(eps <= 0) {
|
|
return 0;
|
|
}
|
|
|
|
//set the given minPts
|
|
son = qp->GetSupplier(args[1].addr, 2);
|
|
qp->Request(son, argument);
|
|
CcInt* MinPts = ((CcInt*)argument.addr);
|
|
if(!MinPts->IsDefined()){
|
|
return 0;
|
|
}
|
|
int minPts = MinPts->GetValue();
|
|
if(minPts < 1){
|
|
return 0;
|
|
}
|
|
//set the index of the attribute in the tuple
|
|
int attrPos = static_cast<CcInt*>(args[2].addr)->GetIntval();
|
|
size_t maxMem = qp->GetMemorySize(s)*1024*1024;
|
|
double UNDEFINED = -1.0;
|
|
|
|
RectDist<dim> df;
|
|
local.addr = new opticsclass(
|
|
args[0], attrPos, eps, minPts,
|
|
UNDEFINED, resultType, maxMem, df);
|
|
return 0;
|
|
}
|
|
case REQUEST : {
|
|
result.addr = info?info->next():0;
|
|
return result.addr?YIELD:CANCEL;
|
|
}
|
|
case CLOSE : {
|
|
if(info){
|
|
delete info;
|
|
local.addr = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
1.3 Selection method for value mapping array ~opticsRRecSL~
|
|
|
|
*/
|
|
int opticsRRecSL(ListExpr args)
|
|
{
|
|
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
|
|
ListExpr attrType;
|
|
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
|
|
int found = FindAttribute(attrList, attrName, attrType);
|
|
assert(found > 0);
|
|
|
|
if(Rectangle<2>::checkType(attrType))
|
|
{
|
|
return 0;
|
|
}
|
|
else if(Rectangle<3>::checkType(attrType))
|
|
{
|
|
return 1;
|
|
}
|
|
else if(Rectangle<4>::checkType(attrType))
|
|
{
|
|
return 2;
|
|
}
|
|
else if(Rectangle<8>::checkType(attrType))
|
|
{
|
|
return 3;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
1.4 ValueMapping array
|
|
|
|
*/
|
|
|
|
ValueMapping opticsRVM[] =
|
|
{
|
|
opticsRVM1<2>,
|
|
opticsRVM1<3>,
|
|
opticsRVM1<4>,
|
|
opticsRVM1<8>
|
|
};
|
|
|
|
|
|
/*
|
|
1.5 Struct ~opticsInfoR~
|
|
|
|
*/
|
|
|
|
struct opticsInfoR : OperatorInfo
|
|
{
|
|
opticsInfoR() : OperatorInfo()
|
|
{
|
|
name = "opticsR";
|
|
signature = "stream(tuple) x IDENT x real x int -> stream(tuple)";
|
|
syntax = "_ opticsR[_, _, _]";
|
|
meaning = "This operator will order data to identify the cluster "
|
|
"structure. The operator uses the MMRTree index structure. The "
|
|
"first paramater has to be a stream of tuple, the second is the "
|
|
"attribute for clustering, the third is eps and the fourth is "
|
|
"MinPts. The return value is a stream of tuples."
|
|
"The supported type to cluster is the bbox.";
|
|
example = "query Kneipen feed extend[B : bbox(.GeoData)]"
|
|
"opticsR[B, 1000.0, 5] consume";
|
|
}
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
2 Variant using an M-tree
|
|
|
|
|
|
2.1 Type mapping method ~opticsMTM~
|
|
|
|
*/
|
|
ListExpr opticsMTM(ListExpr args) {
|
|
if (nl->ListLength(args) != 2) {
|
|
ErrorReporter::ReportError("two elements expected");
|
|
return nl->TypeError();
|
|
}
|
|
ListExpr stream = nl->First(args);
|
|
if (!Stream<Tuple>::checkType(nl->First(args))) {
|
|
return listutils::typeError("stream(tuple) expected");
|
|
}
|
|
ListExpr arguments = nl->Second(args);
|
|
if (nl->ListLength(arguments) != 3 && nl->ListLength(arguments) != 4) {
|
|
ErrorReporter::ReportError("non conform list (3 or 4 arguments expected)");
|
|
return nl->TypeError();
|
|
}
|
|
if (!CcReal::checkType(nl->Second(arguments))) {
|
|
return listutils::typeError("arg2 is not a real (Eps)");
|
|
}
|
|
if (!CcInt::checkType(nl->Third(arguments))) {
|
|
return listutils::typeError("arg3 is not an int (MinPts)");
|
|
}
|
|
if (nl->ListLength(arguments) == 4) {
|
|
if (!Geoid::checkType(nl->Fourth(arguments))) {
|
|
return listutils::typeError("arg4 is not a Geoid");
|
|
}
|
|
}
|
|
ListExpr attrList = nl->Second(nl->Second(stream));
|
|
ListExpr attrType;
|
|
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
|
|
int found = FindAttribute(attrList, attrName, attrType);
|
|
if(found == 0)
|
|
{
|
|
ErrorReporter::ReportError("Attribute " + attrName + " does not exist");
|
|
return nl->TypeError();
|
|
}
|
|
if (!CcInt::checkType(attrType) && !CcReal::checkType(attrType) &&
|
|
!Point::checkType(attrType) && !CcString::checkType(attrType) &&
|
|
!Picture::checkType(attrType) && !stj::MLabel::checkType(attrType) &&
|
|
!temporalalgebra::MPoint::checkType(attrType) &&
|
|
!temporalalgebra::CUPoint::checkType(attrType) &&
|
|
!temporalalgebra::CMPoint::checkType(attrType)) {
|
|
return listutils::typeError("Attribute " + attrName + " not of type "
|
|
+ CcInt::BasicType() + ", " + CcReal::BasicType() + ", "
|
|
+ Point::BasicType() + ", " + CcString::BasicType() + ", "
|
|
+ Picture::BasicType() + ", " + stj::MLabel::BasicType() + ", "
|
|
+ temporalalgebra::MPoint::BasicType() + ", "
|
|
+ temporalalgebra::CUPoint::BasicType() + ", "
|
|
+ temporalalgebra::CMPoint::BasicType());
|
|
}
|
|
//Copy attrlist to newattrlist
|
|
attrList = nl->Second(nl->Second(stream));
|
|
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
|
|
ListExpr lastlistn = newAttrList;
|
|
attrList = nl->Rest(attrList);
|
|
while (!(nl->IsEmpty(attrList))) {
|
|
lastlistn = nl->Append(lastlistn,nl->First(attrList));
|
|
attrList = nl->Rest(attrList);
|
|
}
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("CoreDist"),
|
|
nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("ReachDist"),
|
|
nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("Processed"),
|
|
nl->SymbolAtom(CcBool::BasicType())));
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("Eps"),
|
|
nl->SymbolAtom(CcReal::BasicType())));
|
|
return nl->ThreeElemList(nl->SymbolAtom(Symbol::APPEND()),
|
|
nl->OneElemList(nl->IntAtom(found - 1)),
|
|
nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
|
|
nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()), newAttrList)));
|
|
}
|
|
|
|
|
|
/*
|
|
2.2 Value Mapping Function for M-tree variant
|
|
|
|
*/
|
|
template <class T, class DistComp>
|
|
int opticsMVM1(Word* args, Word& result, int message, Word& local, Supplier s) {
|
|
typedef OpticsGen<T, SetOfObjectsM<DistComp, T>, DistComp> opticsclass;
|
|
opticsclass* info = (opticsclass*) local.addr;
|
|
switch (message) {
|
|
case OPEN : {
|
|
if(info){
|
|
delete info;
|
|
local.addr=0;
|
|
}
|
|
ListExpr resultType = nl->Second(GetTupleResultType(s));
|
|
Supplier son = qp->GetSupplier(args[1].addr, 1);
|
|
Word argument;
|
|
qp->Request(son, argument);
|
|
CcReal* Eps = ((CcReal*) argument.addr);
|
|
if (!Eps->IsDefined()) {
|
|
return 0;
|
|
}
|
|
double eps = Eps->GetValue();
|
|
if (eps <= 0) {
|
|
return 0;
|
|
}
|
|
son = qp->GetSupplier(args[1].addr, 2);
|
|
qp->Request(son, argument);
|
|
CcInt* MinPts = ((CcInt*)argument.addr);
|
|
if (!MinPts->IsDefined()) {
|
|
return 0;
|
|
}
|
|
int minPts = MinPts->GetValue();
|
|
if (minPts < 1) {
|
|
return 0;
|
|
}
|
|
int attrPos = static_cast<CcInt*>(args[2].addr)->GetIntval();
|
|
size_t maxMem = qp->GetMemorySize(s) * 1024 * 1024;
|
|
double UNDEFINED = -1.0;
|
|
DistComp df;
|
|
local.addr = new opticsclass(args[0], attrPos, eps, minPts,
|
|
UNDEFINED, resultType, maxMem, df);
|
|
return 0;
|
|
}
|
|
case REQUEST : {
|
|
result.addr = info ? info->next() : 0;
|
|
return result.addr ? YIELD : CANCEL;
|
|
}
|
|
case CLOSE : {
|
|
if (info) {
|
|
delete info;
|
|
local.addr = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
2.3 Selection method for value mapping array ~opticsMSL~
|
|
|
|
*/
|
|
int opticsMDisSL(ListExpr args)
|
|
{
|
|
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
|
|
ListExpr attrType;
|
|
string attrName = nl->SymbolValue(nl->First(nl->Second(args)));
|
|
int found = FindAttribute(attrList, attrName, attrType);
|
|
assert(found > 0);
|
|
bool useGeoid = (nl->ListLength(nl->Second(args)) == 4);
|
|
if (CcInt::checkType(attrType)) {
|
|
return 0;
|
|
}
|
|
if (CcReal::checkType(attrType)) {
|
|
return 1;
|
|
}
|
|
if (Point::checkType(attrType)) {
|
|
return (useGeoid ? 3 : 2);
|
|
}
|
|
if (CcString::checkType(attrType)) {
|
|
return 4;
|
|
}
|
|
if (Picture::checkType(attrType)) {
|
|
return 5;
|
|
}
|
|
if (stj::MLabel::checkType(attrType)) {
|
|
return 6;
|
|
}
|
|
if (temporalalgebra::MPoint::checkType(attrType)) {
|
|
return (useGeoid ? 8 : 7);
|
|
}
|
|
if (temporalalgebra::CUPoint::checkType(attrType)) {
|
|
return (useGeoid ? 10 : 9);
|
|
}
|
|
if (temporalalgebra::CMPoint::checkType(attrType)) {
|
|
return (useGeoid ? 12 : 11);
|
|
}
|
|
return -1;
|
|
};
|
|
|
|
/*
|
|
2.4 Value mapping array ~opticsMVM[]~
|
|
|
|
*/
|
|
|
|
ValueMapping opticsMVM[] = {
|
|
opticsMVM1<CcInt, IntDist>,
|
|
opticsMVM1<CcReal, RealDist>,
|
|
opticsMVM1<Point, PointDist<false> >,
|
|
opticsMVM1<Point, PointDist<true> >,
|
|
opticsMVM1<CcString, StringDist>,
|
|
opticsMVM1<Picture, PictureDist>,
|
|
opticsMVM1<stj::MLabel, MLabelDist>,
|
|
opticsMVM1<temporalalgebra::MPoint, MPointDist<false> >,
|
|
opticsMVM1<temporalalgebra::MPoint, MPointDist<true> >,
|
|
opticsMVM1<temporalalgebra::CUPoint, CUPointDist<false> >,
|
|
opticsMVM1<temporalalgebra::CUPoint, CUPointDist<true> >,
|
|
opticsMVM1<temporalalgebra::CMPoint, CMPointDist<false> >,
|
|
opticsMVM1<temporalalgebra::CMPoint, CMPointDist<true> >,
|
|
};
|
|
|
|
|
|
/*
|
|
2.5 Struct ~opticsInfoM~
|
|
|
|
*/
|
|
struct opticsInfoM : OperatorInfo
|
|
{
|
|
opticsInfoM() : OperatorInfo()
|
|
{
|
|
name = "opticsM";
|
|
signature = "stream(tuple) x IDENT x real x int (x geoid) -> stream(Tuple)";
|
|
syntax = "_ opticsM [_, _, _]";
|
|
meaning = "This operator will order data to identify the cluster "
|
|
"structure. The operator uses the MMMTree index structure. The "
|
|
"first paramater has to be a stream of tuple, the second is the "
|
|
"attribute for clustering, the third is eps and the fourth is "
|
|
"MinPts. The return value is a stream of tuples."
|
|
"The supported types to cluster are point, picture, int, real "
|
|
"string, mpoint, cupoint, cmpoint.";
|
|
example = "query Kneipen feed opticsM[Name, 10.0, 5] consume";
|
|
}
|
|
};
|
|
|
|
/*
|
|
3 Variant using an M-tree and used defined functions
|
|
|
|
3.1 Type mapping method ~opticsFTM~
|
|
|
|
*/
|
|
ListExpr opticsFTM( ListExpr args )
|
|
{
|
|
if(nl->ListLength(args)!=5)
|
|
{
|
|
return listutils::typeError("5 elements expected ");
|
|
}
|
|
|
|
ListExpr stream = nl->First(args);
|
|
|
|
if(!Stream<Tuple>::checkType(nl->First(args)))
|
|
{
|
|
return listutils::typeError("stream(Tuple) expected");
|
|
}
|
|
|
|
//Check the arguments
|
|
ListExpr arguments = nl->Rest(args);
|
|
|
|
if(!CcReal::checkType(nl->Second(arguments)))
|
|
{
|
|
return listutils::typeError("arg3 is not a real (Eps)");
|
|
}
|
|
|
|
if(!CcInt::checkType(nl->Third(arguments)))
|
|
{
|
|
return listutils::typeError("arg4 is not an int (MinPts)");
|
|
}
|
|
|
|
if(!listutils::isMap<2>(nl->Fourth(arguments)))
|
|
{
|
|
return listutils::typeError("arg5 is not a map");
|
|
}
|
|
ListExpr funres = nl->Fourth(nl->Fourth(arguments));
|
|
if(!CcInt::checkType(funres) &&
|
|
!CcReal::checkType(funres)){
|
|
return listutils::typeError("function result not of type int or real");
|
|
}
|
|
|
|
//Check the attribute name, is it in the tuple list
|
|
ListExpr attrType;
|
|
ListExpr attrList = nl->Second(nl->Second(stream));
|
|
string attrName = nl->SymbolValue(nl->Second(args));
|
|
int found = FindAttribute(attrList, attrName, attrType);
|
|
if(found == 0)
|
|
{
|
|
ErrorReporter::ReportError("Attribute "
|
|
+ attrName + " is not a member of the tuple");
|
|
return nl->TypeError();
|
|
}
|
|
|
|
if(!nl->Equal(attrType, nl->Second(nl->Fourth(arguments))))
|
|
{
|
|
return listutils::typeError("Clustervalue type and function value type"
|
|
"different");
|
|
}
|
|
|
|
if(!nl->Equal(attrType, nl->Third(nl->Fourth(arguments))))
|
|
{
|
|
return listutils::typeError("Clustervalue type and function value type"
|
|
"different");
|
|
}
|
|
|
|
|
|
//Copy attrlist to newattrlist
|
|
attrList = nl->Second(nl->Second(stream));
|
|
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
|
|
ListExpr lastlistn = newAttrList;
|
|
|
|
attrList = nl->Rest(attrList);
|
|
|
|
while(!(nl->IsEmpty(attrList)))
|
|
{
|
|
lastlistn = nl->Append(lastlistn,nl->First(attrList));
|
|
attrList = nl->Rest(attrList);
|
|
}
|
|
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("CoreDist")
|
|
,nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("ReachDist")
|
|
,nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("Processed")
|
|
,nl->SymbolAtom(CcBool::BasicType())));
|
|
lastlistn = nl->Append(lastlistn
|
|
,nl->TwoElemList(nl->SymbolAtom("Eps")
|
|
,nl->SymbolAtom(CcReal::BasicType())));
|
|
|
|
return nl->ThreeElemList(nl->SymbolAtom(Symbol::APPEND())
|
|
,nl->OneElemList(nl->IntAtom(found-1))
|
|
,nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM())
|
|
,nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType())
|
|
,newAttrList)));
|
|
}
|
|
|
|
|
|
/*
|
|
3.2 Value Mapping Function
|
|
|
|
*/
|
|
|
|
template <class T, class DistComp>
|
|
int opticsMFVM1(Word* args, Word& result,
|
|
int message, Word& local, Supplier s)
|
|
{
|
|
|
|
typedef OpticsGen<T,
|
|
SetOfObjectsM<DistComp,T>,
|
|
DistComp> opticsclass;
|
|
|
|
opticsclass* info = (opticsclass*) local.addr;
|
|
|
|
switch (message)
|
|
{
|
|
case OPEN :
|
|
{
|
|
if(info){
|
|
delete info;
|
|
local.addr=0;
|
|
}
|
|
|
|
//set the result type of the tuple
|
|
ListExpr resultType = nl->Second(GetTupleResultType(s));
|
|
//set the given eps
|
|
CcReal* Eps = (CcReal*) args[2].addr;
|
|
if(!Eps->IsDefined()){
|
|
return 0;
|
|
}
|
|
double eps = Eps->GetValue();
|
|
if(eps <= 0) {
|
|
return 0;
|
|
}
|
|
|
|
//set the given minPts
|
|
CcInt* MinPts = (CcInt*) args[3].addr;
|
|
if(!MinPts->IsDefined()){
|
|
return 0;
|
|
}
|
|
int minPts = MinPts->GetValue();
|
|
if(minPts < 1){
|
|
return 0;
|
|
}
|
|
//set the index of the attribute in the tuple
|
|
int attrPos = static_cast<CcInt*>(args[5].addr)->GetIntval();
|
|
size_t maxMem = qp->GetMemorySize(s)*1024*1024;
|
|
double UNDEFINED = -1.0;
|
|
|
|
DistComp df(qp,args[4].addr);
|
|
local.addr = new opticsclass(
|
|
args[0], attrPos, eps, minPts,
|
|
UNDEFINED, resultType, maxMem, df);
|
|
return 0;
|
|
}
|
|
case REQUEST : {
|
|
result.addr = info?info->next():0;
|
|
return result.addr?YIELD:CANCEL;
|
|
}
|
|
case CLOSE : {
|
|
if(info){
|
|
delete info;
|
|
local.addr = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
3.3 Selection method for value mapping array ~opticsFDisSL~
|
|
|
|
*/
|
|
int opticsFDisSL(ListExpr args)
|
|
{
|
|
ListExpr funResType = nl->Fourth(nl->Fifth(args));
|
|
if(CcInt::checkType(funResType)) return 0;
|
|
if(CcReal::checkType(funResType)) return 1;
|
|
|
|
return -1;
|
|
};
|
|
|
|
/*
|
|
3.4 Value mapping array ~opticsFVM[]~
|
|
|
|
*/
|
|
|
|
ValueMapping opticsFVM[] =
|
|
{
|
|
opticsMFVM1<Attribute, CustomDist<Attribute*, CcInt> >,
|
|
opticsMFVM1<Attribute, CustomDist<Attribute*, CcReal> >
|
|
};
|
|
|
|
|
|
/*
|
|
3.5 Struct ~opticsInfoF~
|
|
|
|
*/
|
|
|
|
struct opticsInfoF : OperatorInfo
|
|
{
|
|
opticsInfoF() : OperatorInfo()
|
|
{
|
|
name = "opticsF";
|
|
signature = "stream(tuple) x IDENT x real x int x fun -> stream(tuple)";
|
|
syntax = "_ opticsF [_, _, _, fun]";
|
|
meaning = "This operator will ordering data to identify the cluster "
|
|
"structure. The operator uses the MMMTree index structure. The "
|
|
"first paramater has to be a stream of tuple, the second is the "
|
|
"attribute for clustering, the third is eps, the fourth is "
|
|
"MinPts and the sixth is the distance function. The return "
|
|
"value is a stream of tuples."
|
|
"The supported types to cluster are point, picture, int, real "
|
|
"and string.";
|
|
example = "query plz feed opticsF[PLZ, 10.0, 5, fun(i1: int, i2: int)"
|
|
"i1 - i2] consume";
|
|
}
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
3 Variant using an M-tree and a user-defined function processing two tuples
|
|
|
|
3.1 Type mapping method ~opticsTFTM~
|
|
|
|
*/
|
|
ListExpr opticsTFTM(ListExpr args) {
|
|
if (nl->ListLength(args) != 4) {
|
|
return listutils::typeError("two elements expected");
|
|
}
|
|
if (!Stream<Tuple>::checkType(nl->First(args))) {
|
|
return listutils::typeError("tuple stream expected");
|
|
}
|
|
ListExpr arguments = nl->Rest(args);
|
|
if (nl->ListLength(arguments) != 3) {
|
|
return listutils::typeError("three arguments required after stream");
|
|
}
|
|
if (!CcReal::checkType(nl->First(arguments))) {
|
|
return listutils::typeError("eps must have type real");
|
|
}
|
|
if (!CcInt::checkType(nl->Second(arguments))) {
|
|
return listutils::typeError("MinPts must have type int");
|
|
}
|
|
if (!listutils::isMap<2>(nl->Third(arguments))) {
|
|
return listutils::typeError("function with two arguments required");
|
|
}
|
|
ListExpr fun = nl->Third(arguments);
|
|
if (!nl->Equal(nl->Second(fun), nl->Third(fun)) ||
|
|
(!CcInt::checkType(nl->Fourth(fun)) && !CcReal::checkType(nl->Fourth(fun)))) {
|
|
return listutils::typeError("fun is not of type: T x T -> {int, real}");
|
|
}
|
|
//Copy attrlist to newattrlist
|
|
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
|
|
ListExpr newAttrList = nl->OneElemList(nl->First(attrList));
|
|
ListExpr lastlistn = newAttrList;
|
|
attrList = nl->Rest(attrList);
|
|
while (!(nl->IsEmpty(attrList))) {
|
|
lastlistn = nl->Append(lastlistn,nl->First(attrList));
|
|
attrList = nl->Rest(attrList);
|
|
}
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("CoreDist"),
|
|
nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("ReachDist"),
|
|
nl->SymbolAtom(CcReal::BasicType())));
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("Processed"),
|
|
nl->SymbolAtom(CcBool::BasicType())));
|
|
lastlistn = nl->Append(lastlistn, nl->TwoElemList(nl->SymbolAtom("Eps"),
|
|
nl->SymbolAtom(CcReal::BasicType())));
|
|
return nl->TwoElemList(nl->SymbolAtom(Symbol::STREAM()),
|
|
nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()),
|
|
newAttrList));
|
|
}
|
|
|
|
/*
|
|
3.2 Value Mapping Function
|
|
|
|
*/
|
|
template <class T, class DistComp>
|
|
int opticsTFVM(Word* args, Word& result, int message, Word& local, Supplier s) {
|
|
typedef OpticsGen<T, SetOfObjectsM<DistComp,T>, DistComp> opticsclass;
|
|
opticsclass* info = (opticsclass*) local.addr;
|
|
switch (message) {
|
|
case OPEN : {
|
|
if (info) {
|
|
delete info;
|
|
local.addr = 0;
|
|
}
|
|
ListExpr resultType = nl->Second(GetTupleResultType(s));
|
|
CcReal* Eps = (CcReal*) args[1].addr;
|
|
CcInt* MinPts = (CcInt*) args[2].addr;
|
|
if (!Eps->IsDefined() || !MinPts->IsDefined()) {
|
|
return 0;
|
|
}
|
|
double eps = Eps->GetValue();
|
|
int minPts = MinPts->GetValue();
|
|
if(eps <= 0 || minPts <= 0) {
|
|
return 0;
|
|
}
|
|
size_t maxMem = qp->GetMemorySize(s) * 1024 * 1024;
|
|
double UNDEFINED = -1.0;
|
|
DistComp df(qp, args[3].addr);
|
|
local.addr = new opticsclass(args[0], -1, eps, minPts, UNDEFINED,
|
|
resultType, maxMem, df);
|
|
return 0;
|
|
}
|
|
case REQUEST : {
|
|
result.addr = info ? info->next() : 0;
|
|
return result.addr ? YIELD : CANCEL;
|
|
}
|
|
case CLOSE : {
|
|
if (info) {
|
|
delete info;
|
|
local.addr = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
3.3 Selection method for value mapping array ~opticsTFSelect~
|
|
|
|
*/
|
|
int opticsTFSelect(ListExpr args) {
|
|
ListExpr funResType = nl->Fourth(nl->Fourth(args));
|
|
if (CcInt::checkType(funResType)) return 0;
|
|
if (CcReal::checkType(funResType)) return 1;
|
|
return -1;
|
|
};
|
|
|
|
/*
|
|
3.4 Value mapping array ~opticsFVM[]~
|
|
|
|
*/
|
|
|
|
ValueMapping opticsTFVMs[] = {opticsTFVM<Tuple, TupleDist<CcInt> >,
|
|
opticsTFVM<Tuple, TupleDist<CcReal> >};
|
|
|
|
|
|
/*
|
|
3.5 Struct ~opticsInfoF~
|
|
|
|
*/
|
|
|
|
struct opticsInfoTF : OperatorInfo
|
|
{
|
|
opticsInfoTF() : OperatorInfo()
|
|
{
|
|
name = "opticsTF";
|
|
signature = "stream(tuple) x real x int x fun -> stream(tuple)";
|
|
syntax = "_ opticsF [_, _, fun]";
|
|
meaning = "This operator will order the data to identify the cluster "
|
|
"structure. The operator uses the MMMTree index structure. The "
|
|
"first paramater has to be a stream of tuple, the second is eps,"
|
|
" the third is MinPts and the fourth is a distance function "
|
|
"taking two tuples. The operator returns a stream of tuples.";
|
|
example = "query Kneipen feed opticsTF[500.0, 5, fun(t1: tuple((Name: "
|
|
"string, Strasse: string, GeoData: point)), t2: tuple((Name: "
|
|
"string, Strasse: string, GeoData: point))) distance(attr(t1, "
|
|
"GeoData), attr(t2, GeoData))] count";
|
|
}
|
|
};
|
|
|
|
|
|
/*
|
|
4 Operator ~extractDbScan~
|
|
|
|
4.1 Type Mapping
|
|
|
|
*/
|
|
ListExpr extractDbScanTM(ListExpr args){
|
|
|
|
if(!nl->HasLength(args,2)){
|
|
return listutils::typeError("two arguments expected");
|
|
}
|
|
ListExpr stream = nl->First(args);
|
|
if(!Stream<Tuple>::checkType(stream)){
|
|
return listutils::typeError("first argument must be a tuple stream");
|
|
}
|
|
if(!CcReal::checkType(nl->Second(args))){
|
|
return listutils::typeError("first argument must be a real");
|
|
}
|
|
ListExpr attrList = nl->Second(nl->Second(stream));
|
|
ListExpr type;
|
|
int coreDistPos = listutils::findAttribute(attrList, "CoreDist", type);
|
|
if(!coreDistPos){
|
|
return listutils::typeError("Attribute CoreDist not member of the stream");
|
|
}
|
|
if(!CcReal::checkType(type)){
|
|
return listutils::typeError("Attribute CoreDist not of type real");
|
|
}
|
|
int reachDistPos = listutils::findAttribute(attrList, "ReachDist", type);
|
|
if(!reachDistPos){
|
|
return listutils::typeError("Attribute reachDist not "
|
|
"member of the stream");
|
|
}
|
|
if(!CcReal::checkType(type)){
|
|
return listutils::typeError("Attribute reachDist not of type real");
|
|
}
|
|
|
|
int EpsPos = listutils::findAttribute(attrList, "Eps", type);
|
|
if(!EpsPos){
|
|
return listutils::typeError("Attribute Eps not member of the stream");
|
|
}
|
|
if(!CcReal::checkType(type)){
|
|
return listutils::typeError("Attribute Eps not of type real");
|
|
}
|
|
int cidPos = listutils::findAttribute(attrList,"Cid", type);
|
|
if(cidPos){
|
|
return listutils::typeError("Attribute Cid already "
|
|
"part of the attributes");
|
|
}
|
|
|
|
|
|
|
|
ListExpr newAttr = nl->OneElemList(
|
|
nl->TwoElemList( nl->SymbolAtom("Cid"),
|
|
listutils::basicSymbol<CcInt>()));
|
|
ListExpr newAttrList = listutils::concat(attrList, newAttr);
|
|
ListExpr appendList = nl->ThreeElemList( nl->IntAtom(coreDistPos-1),
|
|
nl->IntAtom(reachDistPos-1),
|
|
nl->IntAtom(EpsPos-1));
|
|
return nl->ThreeElemList(
|
|
nl->SymbolAtom(Symbols::APPEND()),
|
|
appendList,
|
|
nl->TwoElemList( listutils::basicSymbol<Stream<Tuple> >(),
|
|
nl->TwoElemList(
|
|
listutils::basicSymbol<Tuple>(),
|
|
newAttrList)));
|
|
}
|
|
|
|
/*
|
|
4.2 LocalInfo
|
|
|
|
*/
|
|
|
|
class extractLocal{
|
|
|
|
public:
|
|
extractLocal(Word _stream, double _eps, int _coreDistPos,
|
|
int _reachDistPos, int _epsPos, ListExpr type):
|
|
stream(_stream), eps(_eps), coreDistPos(_coreDistPos),
|
|
reachDistPos(_reachDistPos), epsPos(_epsPos), id(0){
|
|
tt = new TupleType(type);
|
|
stream.open();
|
|
}
|
|
|
|
~extractLocal(){
|
|
stream.close();
|
|
tt->DeleteIfAllowed();
|
|
}
|
|
|
|
Tuple* next(){
|
|
Tuple* inTuple = stream.request();
|
|
if(!inTuple){
|
|
return 0;
|
|
}
|
|
Tuple* resTuple = new Tuple(tt);
|
|
int attrCnt = inTuple->GetNoAttributes();
|
|
// copy attributes to resTuple
|
|
for(int i=0;i<attrCnt; i++){
|
|
resTuple->CopyAttribute(i,inTuple,i);
|
|
}
|
|
inTuple->DeleteIfAllowed();
|
|
CcReal* Eps = (CcReal*) resTuple->GetAttribute(epsPos);
|
|
CcReal* ReachDist = (CcReal*) resTuple->GetAttribute(reachDistPos);
|
|
CcReal* CoreDist = (CcReal*) resTuple->GetAttribute(coreDistPos);
|
|
// optics never creates undefined attributes.
|
|
// undefined is simulated by a value < 0
|
|
if( !Eps->IsDefined() || !ReachDist->IsDefined()
|
|
|| !CoreDist->IsDefined()){
|
|
resTuple->PutAttribute(attrCnt, new CcInt(false,0));
|
|
return resTuple;
|
|
}
|
|
double oldEps = Eps->GetValue();
|
|
if(eps > oldEps){ // check condition
|
|
resTuple->PutAttribute(attrCnt, new CcInt(false,0));
|
|
return resTuple;
|
|
}
|
|
double reachDist = ReachDist->GetValue();
|
|
double coreDist = CoreDist->GetValue();
|
|
if((reachDist > eps) || (reachDist < 0)){
|
|
if((coreDist <= eps) && !(coreDist < 0)){
|
|
// start new cluster
|
|
id++;
|
|
resTuple->PutAttribute(attrCnt, new CcInt(true,id));
|
|
} else {
|
|
// mark as noise
|
|
resTuple->PutAttribute(attrCnt, new CcInt(true, -2));
|
|
}
|
|
} else {
|
|
resTuple->PutAttribute(attrCnt, new CcInt(true,id));
|
|
}
|
|
return resTuple;
|
|
}
|
|
|
|
|
|
private:
|
|
Stream<Tuple> stream;
|
|
double eps;
|
|
int coreDistPos;
|
|
int reachDistPos;
|
|
int epsPos;
|
|
TupleType* tt;
|
|
int id;
|
|
};
|
|
|
|
/*
|
|
4.3 Value Mapping
|
|
|
|
*/
|
|
|
|
int extractDbScanVM(Word* args, Word& result,
|
|
int message, Word& local, Supplier s) {
|
|
extractLocal* info = (extractLocal*) local.addr;
|
|
switch(message){
|
|
case OPEN: {
|
|
if(info) {
|
|
delete info;
|
|
local.addr=0;
|
|
}
|
|
int coreDistPos = ((CcInt*)args[2].addr)->GetValue();
|
|
int reachDistPos = ((CcInt*)args[3].addr)->GetValue();
|
|
int epsPos = ((CcInt*)args[4].addr)->GetValue();
|
|
ListExpr type = nl->Second(GetTupleResultType(s));
|
|
CcReal* Eps = (CcReal*) args[1].addr;
|
|
if(!Eps->IsDefined()){
|
|
return 0;
|
|
}
|
|
double eps = Eps->GetValue();
|
|
if(eps<=0){
|
|
return 0;
|
|
}
|
|
info = new extractLocal(args[0], eps, coreDistPos,
|
|
reachDistPos, epsPos, type);
|
|
local.addr = info;
|
|
return 0;
|
|
}
|
|
case REQUEST: {
|
|
result.addr = info?info->next():0;
|
|
return result.addr?YIELD:CANCEL;
|
|
}
|
|
case CLOSE: {
|
|
if(info){
|
|
delete info;
|
|
local.addr = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
4.4. Specification
|
|
|
|
*/
|
|
OperatorSpec extractDbScanSpec(
|
|
"stream x real -> stream",
|
|
"_ extractDbScan",
|
|
" Extract cluster from a stream processed via optics.",
|
|
"query Kneipen feed extend[B : bbox(.GeoData)] "
|
|
"opticsR[B, 2000.0, 10] extractDbScan[500.0] consume"
|
|
);
|
|
|
|
|
|
/*
|
|
4.5 Operator definition
|
|
|
|
*/
|
|
Operator extractDbScanOP(
|
|
"extractDbScan",
|
|
extractDbScanSpec.getStr(),
|
|
extractDbScanVM,
|
|
Operator::SimpleSelect,
|
|
extractDbScanTM
|
|
);
|
|
|
|
|
|
|
|
|
|
/*
|
|
5 Algebra class ~ClusterOpticsAlgebra~
|
|
|
|
*/
|
|
class ClusterOpticsAlgebra : public Algebra
|
|
{
|
|
public:
|
|
ClusterOpticsAlgebra() : Algebra()
|
|
{
|
|
|
|
Operator* opr =
|
|
AddOperator(opticsInfoR(), opticsRVM, opticsRRecSL, opticsRTM);
|
|
opr->SetUsesMemory();
|
|
|
|
Operator* opm =
|
|
AddOperator(opticsInfoM(), opticsMVM, opticsMDisSL, opticsMTM);
|
|
opm->SetUsesMemory();
|
|
|
|
Operator* opf =
|
|
AddOperator(opticsInfoF(), opticsFVM, opticsFDisSL, opticsFTM);
|
|
opf->SetUsesMemory();
|
|
|
|
Operator* optf =
|
|
AddOperator(opticsInfoTF(), opticsTFVMs, opticsTFSelect, opticsTFTM);
|
|
optf->SetUsesMemory();
|
|
|
|
AddOperator(&extractDbScanOP);
|
|
|
|
}
|
|
|
|
~ClusterOpticsAlgebra() {};
|
|
};
|
|
}
|
|
|
|
extern "C"
|
|
Algebra* InitializeOpticsAlgebra( NestedList* nlRef, QueryProcessor* qpRef)
|
|
{
|
|
nl = nlRef;
|
|
qp = qpRef;
|
|
|
|
return (new clusteropticsalg::ClusterOpticsAlgebra());
|
|
}
|
|
|
|
|
|
|