2345 lines
72 KiB
C++
2345 lines
72 KiB
C++
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
----
|
||
|
|
This file is part of SECONDO.
|
||
|
|
|
||
|
|
Copyright (C) 2011, University in Hagen, Department of Computer Science,
|
||
|
|
Database Systems for New Applications.
|
||
|
|
|
||
|
|
SECONDO is free software{} you can redistribute it and/or modify
|
||
|
|
it under the terms of the GNU General Public License as published by
|
||
|
|
the Free Software Foundation{} either version 2 of the License, or
|
||
|
|
(at your option) any later version.
|
||
|
|
|
||
|
|
SECONDO is distributed in the hope that it will be useful,
|
||
|
|
but WITHOUT ANY WARRANTY{} without even the implied warranty of
|
||
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
|
GNU General Public License for more details.
|
||
|
|
|
||
|
|
You should have received a copy of the GNU General Public License
|
||
|
|
along with SECONDO{} if not, write to the Free Software
|
||
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
----
|
||
|
|
|
||
|
|
|
||
|
|
//[_] [\_]
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
|
||
|
|
#include <limits>
|
||
|
|
|
||
|
|
#include "MMRTreeAlgebra.h"
|
||
|
|
#include "MMRTree.h"
|
||
|
|
|
||
|
|
//#include "MMRStarTree.h"
|
||
|
|
|
||
|
|
#include "Algebras/Relation-C++/RelationAlgebra.h"
|
||
|
|
#include "Algebras/FText/FTextAlgebra.h"
|
||
|
|
#include "StringUtils.h"
|
||
|
|
|
||
|
|
#include "Progress.h"
|
||
|
|
#include "../CostEstimation/MMRTreeAlgebraCostEstimation.h"
|
||
|
|
|
||
|
|
#include "Algebras/Spatial/Point.h"
|
||
|
|
#include "MMMTree.h"
|
||
|
|
#include "Algebras/Temporal/TemporalAlgebra.h"
|
||
|
|
#include "Algebras/MovingRegion/MovingRegionAlgebra.h"
|
||
|
|
|
||
|
|
|
||
|
|
using namespace std;
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.1 Auxiliary Functions
|
||
|
|
|
||
|
|
~hMem~
|
||
|
|
|
||
|
|
This function formats a byte value as a human readable value.
|
||
|
|
|
||
|
|
*/
|
||
|
|
string hMem(size_t mem){
|
||
|
|
|
||
|
|
stringstream ss;
|
||
|
|
if(mem >= (1u << 30)){
|
||
|
|
ss << (mem / (1u << 30) ) << "GB";
|
||
|
|
} else if(mem >= (1u << 20)){
|
||
|
|
ss << (mem / (1u << 20)) << "MB" ;
|
||
|
|
} else if(mem > (1u << 10)){
|
||
|
|
ss << (mem / (1u << 10)) << "kB" ;
|
||
|
|
} else {
|
||
|
|
ss << mem << "b";
|
||
|
|
}
|
||
|
|
return ss.str();
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
Selection function
|
||
|
|
|
||
|
|
*/
|
||
|
|
int realJoinSelect(ListExpr args){
|
||
|
|
|
||
|
|
ListExpr attrList1 = nl->Second(nl->Second(nl->First(args)));
|
||
|
|
string name1 = nl->SymbolValue(nl->Third(args));
|
||
|
|
ListExpr type;
|
||
|
|
|
||
|
|
int index = listutils::findAttribute(attrList1,name1,type);
|
||
|
|
|
||
|
|
assert(index>0);
|
||
|
|
int num1;
|
||
|
|
|
||
|
|
if(listutils::isKind(type,Kind::SPATIAL2D())){
|
||
|
|
num1 = 0;
|
||
|
|
} else if(listutils::isKind(type, Kind::SPATIAL3D())){
|
||
|
|
num1 = 1;
|
||
|
|
} else if(temporalalgebra::MRegion::checkType(type)){
|
||
|
|
num1 = 2;
|
||
|
|
} else if(temporalalgebra::MPoint::checkType(type)){
|
||
|
|
num1 = 3;
|
||
|
|
} else {
|
||
|
|
assert(false);
|
||
|
|
}
|
||
|
|
|
||
|
|
ListExpr attrList2 = nl->Second(nl->Second(nl->Second(args)));
|
||
|
|
string name2 = nl->SymbolValue(nl->Fourth(args));
|
||
|
|
index = listutils::findAttribute(attrList2,name2,type);
|
||
|
|
assert(index>0);
|
||
|
|
int num2;
|
||
|
|
|
||
|
|
if(listutils::isKind(type,Kind::SPATIAL2D())){
|
||
|
|
num2 = 0;
|
||
|
|
} else if(listutils::isKind(type, Kind::SPATIAL3D())){
|
||
|
|
num2 = 1;
|
||
|
|
} else if(temporalalgebra::MRegion::checkType(type)){
|
||
|
|
num2 = 2;
|
||
|
|
} else if(temporalalgebra::MPoint::checkType(type)){
|
||
|
|
num2 = 3;
|
||
|
|
} else {
|
||
|
|
assert(false);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
return 4 * num1 + num2;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5 Operator ~realJoinMMRTRee~
|
||
|
|
|
||
|
|
1.5.1 Type mapping
|
||
|
|
|
||
|
|
Signature is: stream(tuple(A)) x stream(tuple(B)) x a[_]i x b[_]j x
|
||
|
|
int x int [ x int ] -> stream(tuple(A o B))
|
||
|
|
|
||
|
|
Meaning is : stream1 x stream2 x attrname1 x attrname2 x min x max x maxMem
|
||
|
|
where
|
||
|
|
|
||
|
|
|
||
|
|
----
|
||
|
|
stream1 : first input stream
|
||
|
|
stream2 : second input stream
|
||
|
|
attrname1 : attribute name for an attribute of spatial type in stream1
|
||
|
|
attrname2 : attribute name for an attribute of spatial type in stream2
|
||
|
|
min : minimum number of entries within a node of the used rtree
|
||
|
|
max : maximum number of entries within a node of the used rtree
|
||
|
|
[maxMem] : maximum memory available for storing tuples
|
||
|
|
|
||
|
|
----
|
||
|
|
|
||
|
|
*/
|
||
|
|
ListExpr realJoinMMRTreeTM(ListExpr args){
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
string err = "stream(tuple(A)) x stream(tuple(B)) x"
|
||
|
|
" a_i x b_i x [int x int [x int]] expected";
|
||
|
|
int len = nl->ListLength(args);
|
||
|
|
if((len!=4) && (len!=6) && (len!=7)){
|
||
|
|
return listutils::typeError(err + " (wrong number of arguments)");
|
||
|
|
}
|
||
|
|
|
||
|
|
if(!Stream<Tuple>::checkType(nl->First(args)) ||
|
||
|
|
!Stream<Tuple>::checkType(nl->Second(args))){
|
||
|
|
return listutils::typeError(err + " (the first two arguments have to "
|
||
|
|
"be tuple streams)");
|
||
|
|
}
|
||
|
|
if(!listutils::isSymbol(nl->Third(args)) ||
|
||
|
|
!listutils::isSymbol(nl->Fourth(args)) ) {
|
||
|
|
return listutils::typeError(err + " ( the third and the fourth "
|
||
|
|
"argument have to represent an attribute name)");
|
||
|
|
}
|
||
|
|
|
||
|
|
if( (len>4) &&
|
||
|
|
(!CcInt::checkType(nl->Fifth(args)) ||
|
||
|
|
!CcInt::checkType(nl->Sixth(args)))){
|
||
|
|
return listutils::typeError(err +
|
||
|
|
" ( 5th or 6th element not of type int)");
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
if( (len==7) && ! CcInt::checkType(nl->Sixth(nl->Rest((args))))){
|
||
|
|
return listutils::typeError(err +
|
||
|
|
" ( the 7th arguemnts is not of type int)");
|
||
|
|
}
|
||
|
|
|
||
|
|
ListExpr attrList1 = nl->Second(nl->Second(nl->First(args)));
|
||
|
|
ListExpr attrList2 = nl->Second(nl->Second(nl->Second(args)));
|
||
|
|
string name1 = nl->SymbolValue(nl->Third(args));
|
||
|
|
string name2 = nl->SymbolValue(nl->Fourth(args));
|
||
|
|
|
||
|
|
ListExpr type1;
|
||
|
|
ListExpr type2;
|
||
|
|
|
||
|
|
int index1 = listutils::findAttribute(attrList1, name1, type1);
|
||
|
|
if(index1 == 0){
|
||
|
|
return listutils::typeError("Attribute " + name1 +
|
||
|
|
" not present in first stream");
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
int index2 = listutils::findAttribute(attrList2, name2, type2);
|
||
|
|
if(index2 == 0){
|
||
|
|
return listutils::typeError("Attribute " + name2 +
|
||
|
|
" not present in second stream");
|
||
|
|
}
|
||
|
|
|
||
|
|
// check for spatial attribute
|
||
|
|
if(!listutils::isKind(type1,Kind::SPATIAL2D()) &&
|
||
|
|
!listutils::isKind(type1,Kind::SPATIAL3D()) &&
|
||
|
|
!temporalalgebra::MRegion::checkType(type1) &&
|
||
|
|
!temporalalgebra::MPoint::checkType(type1) ){
|
||
|
|
string t = " (type is " + nl->ToString(type1)+ ")";
|
||
|
|
return listutils::typeError("Attribute " + name1 +
|
||
|
|
" is not in Kind Spatial2D or Spatial3D"
|
||
|
|
+ t);
|
||
|
|
}
|
||
|
|
if(!listutils::isKind(type2,Kind::SPATIAL2D()) &&
|
||
|
|
!listutils::isKind(type2,Kind::SPATIAL3D()) &&
|
||
|
|
!temporalalgebra::MRegion::checkType(type2) &&
|
||
|
|
!temporalalgebra::MPoint::checkType(type2) ){
|
||
|
|
string t = " (type is " + nl->ToString(type2)+ ")";
|
||
|
|
return listutils::typeError("Attribute " + name2 +
|
||
|
|
" is not in Kind Spatial2D or Spatial3D"
|
||
|
|
+ t);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
bool rect1 = Rectangle<2>::checkType(type1)
|
||
|
|
|| Rectangle<3>::checkType(type1);
|
||
|
|
bool rect2 = Rectangle<2>::checkType(type2)
|
||
|
|
|| Rectangle<3>::checkType(type2);
|
||
|
|
|
||
|
|
|
||
|
|
ListExpr attrList = listutils::concat(attrList1, attrList2);
|
||
|
|
|
||
|
|
if(!listutils::isAttrList(attrList)){
|
||
|
|
return listutils::typeError("name conflicts in tuple streams");
|
||
|
|
}
|
||
|
|
|
||
|
|
ListExpr resList = nl->TwoElemList(
|
||
|
|
nl->SymbolAtom(Stream<Tuple>::BasicType()),
|
||
|
|
nl->TwoElemList(
|
||
|
|
nl->SymbolAtom(Tuple::BasicType()),
|
||
|
|
attrList));
|
||
|
|
ListExpr appendList;
|
||
|
|
if(len==7){
|
||
|
|
appendList = nl->FourElemList(nl->IntAtom(index1-1),
|
||
|
|
nl->IntAtom(index2-1),
|
||
|
|
nl->BoolAtom(rect1),
|
||
|
|
nl->BoolAtom(rect2));
|
||
|
|
} else if(len==6) {
|
||
|
|
appendList = nl->FiveElemList(
|
||
|
|
nl->IntAtom(-1),
|
||
|
|
nl->IntAtom(index1-1),
|
||
|
|
nl->IntAtom(index2-1),
|
||
|
|
nl->BoolAtom(type1),
|
||
|
|
nl->BoolAtom(type2));
|
||
|
|
} else { // len == 4
|
||
|
|
// use (4,8) as default parameter of the rtree
|
||
|
|
appendList = listutils::xElemList(7,
|
||
|
|
nl->IntAtom(4),
|
||
|
|
nl->IntAtom(8),
|
||
|
|
nl->IntAtom(-1),
|
||
|
|
nl->IntAtom(index1-1),
|
||
|
|
nl->IntAtom(index2-1),
|
||
|
|
nl->BoolAtom(type1),
|
||
|
|
nl->BoolAtom(type2));
|
||
|
|
}
|
||
|
|
|
||
|
|
return nl->ThreeElemList(
|
||
|
|
nl->SymbolAtom(Symbols::APPEND()),
|
||
|
|
appendList,
|
||
|
|
resList);
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.4.3 Specification
|
||
|
|
|
||
|
|
*/
|
||
|
|
const string realJoinMMRTreeSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" \"Example\" )"
|
||
|
|
"( <text> stream(tuple(A)) x stream(tuple(B)) x a_i x b_j x int"
|
||
|
|
" x int [ x int]-> stream(tuple(X o Y)) </text---> "
|
||
|
|
"<text>streamA streamB realJoinMMRTree[attrnameA, attrnameB, min,"
|
||
|
|
" max, maxMem]"
|
||
|
|
" </text--->"
|
||
|
|
"<text>Performes a spatial join on two streams. "
|
||
|
|
"The attributes a_i and b_j must"
|
||
|
|
" be of type rect or rect3 in A and B, respectively. "
|
||
|
|
"The result is a stream of "
|
||
|
|
"tuples with intersecting rectangles build as concatenation"
|
||
|
|
" of the source tuples. min and max define the range for the number of"
|
||
|
|
" entries within the nodes of the used rtree. maxMem is the maximum cache"
|
||
|
|
" size of the tuple store for tuples coming from streamA."
|
||
|
|
" If maxMem is omitted, "
|
||
|
|
" The default value MaxMemPerOperator (see SecondoConfig.ini) is used."
|
||
|
|
" </text--->"
|
||
|
|
"<text>query strassen feed extend[B : bbox(.geoData] strassen "
|
||
|
|
"feed extend[B : bbox(.geoData)] {a} realJoinMMRTree[B,B_a,8,16, 1024]"
|
||
|
|
" count "
|
||
|
|
" </text--->"
|
||
|
|
") )";
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.4.4 Operator Instance
|
||
|
|
|
||
|
|
*/
|
||
|
|
ValueMapping realJoinMMRTreeVM[] = {
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<2>,2,2,2 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<3>,2,3,2 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>, temporalalgebra::MRegion,2,3,2 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>, temporalalgebra::MPoint,2,3,2 > >,
|
||
|
|
|
||
|
|
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>, temporalalgebra::MRegion,3,3,3 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>, temporalalgebra::MPoint,3,3,3 > >,
|
||
|
|
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
temporalalgebra::MRegion, StandardSpatialAttribute<2>,3,2,2 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MRegion, StandardSpatialAttribute<3>,3,3,3 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MRegion, temporalalgebra::MRegion,3,3,3 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MRegion, temporalalgebra::MPoint,3,3,3 > >,
|
||
|
|
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
temporalalgebra::MPoint, StandardSpatialAttribute<2>,3,2,2 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MPoint, StandardSpatialAttribute<3>,3,3,3 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MPoint, temporalalgebra::MRegion,3,3,3 > >,
|
||
|
|
joinRTreeVM<RealJoinTreeLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MPoint, temporalalgebra::MPoint,3,3,3 > >,
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
Operator realJoinMMRTree(
|
||
|
|
"realJoinMMRTree",
|
||
|
|
realJoinMMRTreeSpec,
|
||
|
|
16,
|
||
|
|
realJoinMMRTreeVM,
|
||
|
|
realJoinSelect,
|
||
|
|
realJoinMMRTreeTM
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
ValueMapping realJoinMMRTreeVecVM[] = {
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<2>,2,2,2> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<3>,2,3,2> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>, temporalalgebra::MRegion,2,3,2> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<2>, temporalalgebra::MPoint,2,3,2> >,
|
||
|
|
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>, temporalalgebra::MRegion,3,3,3> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
StandardSpatialAttribute<3>, temporalalgebra::MPoint,3,3,3> >,
|
||
|
|
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<2, TupleId>,
|
||
|
|
temporalalgebra::MRegion, StandardSpatialAttribute<2>,3,2,2> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MRegion, StandardSpatialAttribute<3>,3,3,3> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MRegion, temporalalgebra::MRegion,3,3,3> >,
|
||
|
|
joinRTreeVM<RealJoinTreeVecLocalInfo<mmrtree::RtreeT<3, TupleId>,
|
||
|
|
temporalalgebra::MRegion, temporalalgebra::MPoint,3,3,3> >,
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
Operator realJoinMMRTreeVec(
|
||
|
|
"realJoinMMRTreeVec",
|
||
|
|
realJoinMMRTreeSpec,
|
||
|
|
16,
|
||
|
|
realJoinMMRTreeVecVM,
|
||
|
|
realJoinSelect,
|
||
|
|
realJoinMMRTreeTM
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5 insertMMRTree
|
||
|
|
|
||
|
|
This operator can be used to check the performance of building an rtree
|
||
|
|
in main memory.
|
||
|
|
|
||
|
|
1.5.1 Type Mapping
|
||
|
|
|
||
|
|
Signature is: stream(rectangle) x int x int
|
||
|
|
|
||
|
|
The last two arguments denote the range of number of entries within the
|
||
|
|
used rtree.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
ListExpr insertMMRTreeTM(ListExpr args){
|
||
|
|
string err = "stream(rectangle) x int x int expected";
|
||
|
|
if(!nl->HasLength(args,3)){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
|
||
|
|
if(!Stream<Rectangle<2> >::checkType(nl->First(args))){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
if(!CcInt::checkType(nl->Second(args))){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
if(!CcInt::checkType(nl->Third(args))){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
return nl->First(args);
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.2 Value Mapping
|
||
|
|
|
||
|
|
*/
|
||
|
|
int insertMMRTreeVM( Word* args, Word& result, int message,
|
||
|
|
Word& local, Supplier s )
|
||
|
|
{
|
||
|
|
|
||
|
|
static long c = 0;
|
||
|
|
|
||
|
|
mmrtree::Rtree<2>* mmrtree1 = static_cast<mmrtree::Rtree<2>*>(local.addr);
|
||
|
|
|
||
|
|
switch(message){
|
||
|
|
case OPEN: { qp->Open(args[0].addr);
|
||
|
|
if(mmrtree1){
|
||
|
|
delete mmrtree1;
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
CcInt* min1 = static_cast<CcInt*>(args[1].addr);
|
||
|
|
CcInt* max1 = static_cast<CcInt*>(args[2].addr);
|
||
|
|
if(!min1->IsDefined() || !max1->IsDefined()){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
int min2 = min1->GetValue();
|
||
|
|
int max2 = max1->GetValue();
|
||
|
|
if(min2 <1 || max2 < 2* min2){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
local.setAddr(new mmrtree::Rtree<2>(min2,max2));
|
||
|
|
return 0;
|
||
|
|
|
||
|
|
}
|
||
|
|
case REQUEST: {
|
||
|
|
if(!mmrtree1){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
Word w;
|
||
|
|
qp->Request(args[0].addr, w);
|
||
|
|
if(qp->Received(args[0].addr)){
|
||
|
|
result = w;
|
||
|
|
Rectangle<2>* r = (Rectangle<2>*) w.addr;
|
||
|
|
if(mmrtree1){
|
||
|
|
mmrtree1->insert(*r,c++);
|
||
|
|
}
|
||
|
|
return YIELD;
|
||
|
|
} else {
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
case CLOSE: {
|
||
|
|
qp->Close(args[0].addr);
|
||
|
|
if(mmrtree1){
|
||
|
|
delete mmrtree1;
|
||
|
|
}
|
||
|
|
local.setAddr(0);
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
assert(false);
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.3 Specification
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
const string insertMMRTreeSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" \"Example\" )"
|
||
|
|
"( <text> stream(rectangle) x int x int -> stream(rectangle) </text---> "
|
||
|
|
"<text> _ insertMMRTRee [min , max] </text--->"
|
||
|
|
"<text>Inserts all the rectangles within the "
|
||
|
|
"stream into a main memory based rtree. [min, max] is the allowed range"
|
||
|
|
" for the numbers of entries within a node of the rtree. min must be"
|
||
|
|
" greater than 0 and max must be at least two times min. "
|
||
|
|
" </text--->"
|
||
|
|
"<text>query strassen feed extend[B : bbox(.geoData)] "
|
||
|
|
"projecttransformstream[B] insertMMRTRee[4,8] count </text--->"
|
||
|
|
") )";
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.4 Operator instance
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
Operator insertMMRTree (
|
||
|
|
"insertMMRTree",
|
||
|
|
insertMMRTreeSpec,
|
||
|
|
insertMMRTreeVM,
|
||
|
|
Operator::SimpleSelect,
|
||
|
|
insertMMRTreeTM);
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6 Operator ~statMMRTree~
|
||
|
|
|
||
|
|
Provides statistical information about an mmrtree build from a tuple stream.
|
||
|
|
|
||
|
|
|
||
|
|
1.6.1 Type Mapping
|
||
|
|
|
||
|
|
Signature is: stream(tuple(A)) x a[_]i x int x int
|
||
|
|
|
||
|
|
|
||
|
|
*/
|
||
|
|
ListExpr statMMRTreeTM(ListExpr args){
|
||
|
|
string err = "stream(Tuple(A)) x a_i x int x int expected";
|
||
|
|
if(!nl->HasLength(args,4)){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
if(!Stream<Tuple>::checkType(nl->First(args)) ||
|
||
|
|
!listutils::isSymbol(nl->Second(args)) ||
|
||
|
|
!CcInt::checkType(nl->Third(args)) ||
|
||
|
|
!CcInt::checkType(nl->Fourth(args))){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
string name = nl->SymbolValue(nl->Second(args));
|
||
|
|
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
|
||
|
|
ListExpr type;
|
||
|
|
int index = listutils::findAttribute(attrList, name, type);
|
||
|
|
if(index==0){
|
||
|
|
return listutils::typeError("Attribute " + name + " unknown in tuple");
|
||
|
|
}
|
||
|
|
if(!Rectangle<2>::checkType(type) && !Rectangle<3>::checkType(type)){
|
||
|
|
return listutils::typeError("Attribute " + name + " not of type" +
|
||
|
|
Rectangle<2>::BasicType() + " or " +
|
||
|
|
Rectangle<3>::BasicType());
|
||
|
|
}
|
||
|
|
return nl->ThreeElemList(
|
||
|
|
nl->SymbolAtom(Symbols::APPEND()),
|
||
|
|
nl->OneElemList(nl->IntAtom(index-1)),
|
||
|
|
nl->SymbolAtom(FText::BasicType()));
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.2 Value Mapping
|
||
|
|
|
||
|
|
*/
|
||
|
|
template <int dim>
|
||
|
|
int statMMRTreeVM( Word* args, Word& result, int message,
|
||
|
|
Word& local, Supplier s )
|
||
|
|
{
|
||
|
|
result = qp->ResultStorage(s);
|
||
|
|
FText* res = (FText*) result.addr;
|
||
|
|
CcInt* Min = (CcInt*) args[2].addr;
|
||
|
|
CcInt* Max = (CcInt*) args[3].addr;
|
||
|
|
if(!Min->IsDefined() || !Max->IsDefined()){
|
||
|
|
res->Set(true,"At least one of the range value is undefined");
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
int min = Min->GetValue();
|
||
|
|
int max = Max->GetValue();
|
||
|
|
if(min<1 || max < min*2){
|
||
|
|
res->Set(true, "Invalid range, min must be greater "
|
||
|
|
"than 0, and max >= 2*min");
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
size_t tuples = 0;
|
||
|
|
Stream<Tuple> stream(args[0]);
|
||
|
|
stream.open();
|
||
|
|
int index = ((CcInt*)args[4].addr)->GetValue();
|
||
|
|
mmrtree::RtreeT<dim,TupleId> tree(min,max);
|
||
|
|
Tuple* t = stream.request();
|
||
|
|
while(t){
|
||
|
|
Rectangle<dim>* r = (Rectangle<dim>*) t->GetAttribute(index);
|
||
|
|
tree.insert(*r, t->GetTupleId());
|
||
|
|
tuples += t->GetMemSize();
|
||
|
|
t->DeleteIfAllowed();
|
||
|
|
t = stream.request();
|
||
|
|
}
|
||
|
|
stream.close();
|
||
|
|
stringstream ss;
|
||
|
|
tree.printStats(ss);
|
||
|
|
ss << " Mem used by tuples = " << tuples
|
||
|
|
<< "(" << hMem(tuples) << ")" << endl;
|
||
|
|
res->Set(true, ss.str());
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.3 Specification
|
||
|
|
|
||
|
|
*/
|
||
|
|
const string statMMRTreeSpec =
|
||
|
|
"( ( \"Signature\" \"Syntax\" \"Meaning\" \"Example\" )"
|
||
|
|
"( <text> stream(Tuple(A)) x a_i x int x int -> text </text---> "
|
||
|
|
"<text>_ statMMRTRee [ attrname, min , max] </text--->"
|
||
|
|
"<text>Inserts all the rectangles within the "
|
||
|
|
"stream (pointered by a_i) into a main memory based rtree ."
|
||
|
|
" Statistical information about this tree is the result of this "
|
||
|
|
"operator</text--->"
|
||
|
|
"<text>query strassen feed extend[B : bbox(.geoData)] "
|
||
|
|
" statMMRTRee[B,4,8] </text--->"
|
||
|
|
") )";
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.4 Operator instance
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
int statMMRTreeSelect(ListExpr args){
|
||
|
|
ListExpr attrList = nl->Second(nl->Second(nl->First(args)));
|
||
|
|
string name = nl->SymbolValue(nl->Second(args));
|
||
|
|
ListExpr type;
|
||
|
|
int index = listutils::findAttribute(attrList,name,type);
|
||
|
|
assert(index >0);
|
||
|
|
if(Rectangle<2>::checkType(type)){
|
||
|
|
return 0;
|
||
|
|
} else if(Rectangle<3>::checkType(type)){
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
ValueMapping statMMRTreevm[] = {
|
||
|
|
statMMRTreeVM<2>,
|
||
|
|
statMMRTreeVM<3>
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
Operator statMMRTree (
|
||
|
|
"statMMRTree",
|
||
|
|
statMMRTreeSpec,
|
||
|
|
2,
|
||
|
|
statMMRTreevm,
|
||
|
|
statMMRTreeSelect,
|
||
|
|
statMMRTreeTM);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.4 Operator joinMMRTreeIt
|
||
|
|
|
||
|
|
This operator does the same as the realjoinMMRTree operator. In contrast to
|
||
|
|
that operator, here an iterator is used to get all results instead of a
|
||
|
|
vector collecting all results within a single scan.
|
||
|
|
|
||
|
|
1.4.1 Type Mapping
|
||
|
|
|
||
|
|
Because the operator has the same functionality as the realJoinMMRTree
|
||
|
|
operator, we can reuse the type mapping of that operator.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
template<class Type1, class Type2, int dim1, int dim2, int minDim>
|
||
|
|
class joinMMRTreeItLocalInfo{
|
||
|
|
public:
|
||
|
|
joinMMRTreeItLocalInfo(Word& stream1, Word& stream2,
|
||
|
|
int min, int max,
|
||
|
|
int maxMem,
|
||
|
|
const int attrPos1, const int _attrPos2,
|
||
|
|
ListExpr resultType) :
|
||
|
|
s2(stream2),attrPos2(_attrPos2),
|
||
|
|
index(min,max), buffer(maxMem){
|
||
|
|
Stream<Tuple> s1(stream1);
|
||
|
|
s1.open();
|
||
|
|
Tuple* tuple = s1.request();
|
||
|
|
while(tuple!=0){
|
||
|
|
TupleId id = buffer.AppendTuple(tuple);
|
||
|
|
Rectangle<dim1> box = ((Type1*)
|
||
|
|
tuple->GetAttribute(attrPos1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index.insert(box,id);
|
||
|
|
//} else {
|
||
|
|
index.insert(box. template project<minDim>(), id);
|
||
|
|
//}
|
||
|
|
tuple->DeleteIfAllowed();
|
||
|
|
tuple = s1.request();
|
||
|
|
}
|
||
|
|
s1.close();
|
||
|
|
tt = new TupleType(resultType);
|
||
|
|
s2.open();
|
||
|
|
currentTuple = s2.request();
|
||
|
|
it = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
~joinMMRTreeItLocalInfo(){
|
||
|
|
tt->DeleteIfAllowed();
|
||
|
|
if(it){
|
||
|
|
delete it;
|
||
|
|
}
|
||
|
|
s2.close();
|
||
|
|
}
|
||
|
|
|
||
|
|
Tuple* next(){
|
||
|
|
while(currentTuple!=0){
|
||
|
|
if(it==0){
|
||
|
|
Rectangle<dim2> r = ((Type2*)
|
||
|
|
currentTuple->GetAttribute(attrPos2))->BoundingBox();
|
||
|
|
//if(dim2==minDim){
|
||
|
|
// it=index.find( r );
|
||
|
|
//} else {
|
||
|
|
it = index.find(r. template project<minDim>());
|
||
|
|
//}
|
||
|
|
}
|
||
|
|
TupleId const* id = it->next();
|
||
|
|
if(id){ // new result found
|
||
|
|
Tuple* res = new Tuple(tt);
|
||
|
|
Tuple* t1 = buffer.GetTuple(*id);
|
||
|
|
Concat(t1,currentTuple,res);
|
||
|
|
t1->DeleteIfAllowed();
|
||
|
|
return res;
|
||
|
|
} else { // iterator exhausted, try next currentTuple
|
||
|
|
delete it;
|
||
|
|
it = 0;
|
||
|
|
currentTuple->DeleteIfAllowed();
|
||
|
|
currentTuple = s2.request();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
Stream<Tuple> s2;
|
||
|
|
int attrPos2;
|
||
|
|
mmrtree::RtreeT<minDim, TupleId> index;
|
||
|
|
TupleStore buffer;
|
||
|
|
typename mmrtree::RtreeT<minDim, TupleId>::iterator* it;
|
||
|
|
Tuple* currentTuple;
|
||
|
|
TupleType* tt;
|
||
|
|
}; // class joinMMRTreeItLocalInfo
|
||
|
|
|
||
|
|
|
||
|
|
template<class Type1, class Type2, int dim1, int dim2, int minDim>
|
||
|
|
class joinMMRTreeItVecLocalInfo{
|
||
|
|
public:
|
||
|
|
joinMMRTreeItVecLocalInfo(Word& stream1, Word& stream2,
|
||
|
|
int min, int max,
|
||
|
|
int maxMem,
|
||
|
|
const int attrPos1, const int _attrPos2,
|
||
|
|
ListExpr resultType) :
|
||
|
|
s2(stream2),attrPos2(_attrPos2),
|
||
|
|
index(min,max), buffer(){
|
||
|
|
Stream<Tuple> s1(stream1);
|
||
|
|
s1.open();
|
||
|
|
Tuple* tuple = s1.request();
|
||
|
|
while(tuple!=0){
|
||
|
|
TupleId id = (TupleId) buffer.size();
|
||
|
|
buffer.push_back(tuple);
|
||
|
|
Rectangle<dim1> box = ((Type1*)
|
||
|
|
tuple->GetAttribute(attrPos1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index.insert(box,id);
|
||
|
|
//} else {
|
||
|
|
//assert(box.Proper());
|
||
|
|
index.insert(box.template project<minDim>(), id);
|
||
|
|
//}
|
||
|
|
tuple = s1.request();
|
||
|
|
}
|
||
|
|
s1.close();
|
||
|
|
tt = new TupleType(resultType);
|
||
|
|
s2.open();
|
||
|
|
currentTuple = s2.request();
|
||
|
|
it = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
~joinMMRTreeItVecLocalInfo(){
|
||
|
|
tt->DeleteIfAllowed();
|
||
|
|
if(it){
|
||
|
|
delete it;
|
||
|
|
}
|
||
|
|
s2.close();
|
||
|
|
for(size_t i=0;i<buffer.size();i++){
|
||
|
|
buffer[i]->DeleteIfAllowed();
|
||
|
|
}
|
||
|
|
buffer.clear();
|
||
|
|
}
|
||
|
|
|
||
|
|
Tuple* next(){
|
||
|
|
while(currentTuple!=0){
|
||
|
|
if(it==0){
|
||
|
|
Rectangle<dim2> r = ((Type2*)
|
||
|
|
currentTuple->GetAttribute(attrPos2))->BoundingBox();
|
||
|
|
//if(dim2==minDim){
|
||
|
|
// it = index.find(r);
|
||
|
|
//} else {
|
||
|
|
it = index.find(r. template project<minDim>());
|
||
|
|
//}
|
||
|
|
}
|
||
|
|
TupleId const* id = it->next();
|
||
|
|
if(id){ // new result found
|
||
|
|
Tuple* res = new Tuple(tt);
|
||
|
|
Tuple* t1 = buffer[*id];
|
||
|
|
Concat(t1,currentTuple,res);
|
||
|
|
return res;
|
||
|
|
} else { // iterator exhausted, try next currentTuple
|
||
|
|
delete it;
|
||
|
|
it = 0;
|
||
|
|
currentTuple->DeleteIfAllowed();
|
||
|
|
currentTuple = s2.request();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
Stream<Tuple> s2;
|
||
|
|
int attrPos2;
|
||
|
|
mmrtree::RtreeT<minDim, TupleId> index;
|
||
|
|
vector<Tuple*> buffer;
|
||
|
|
typename mmrtree::RtreeT<minDim, TupleId>::iterator* it;
|
||
|
|
Tuple* currentTuple;
|
||
|
|
TupleType* tt;
|
||
|
|
}; // class joinMMRTreeItVecLocalInfo
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
template <class LocalInfo>
|
||
|
|
int joinMMRTreeItVM( Word* args, Word& result, int message,
|
||
|
|
Word& local, Supplier s ) {
|
||
|
|
|
||
|
|
|
||
|
|
LocalInfo* li = (LocalInfo*) local.addr;
|
||
|
|
switch (message){
|
||
|
|
case OPEN : {
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
li = 0;
|
||
|
|
local.setAddr(0);
|
||
|
|
}
|
||
|
|
CcInt* Min = (CcInt*) args[4].addr;
|
||
|
|
CcInt* Max = (CcInt*) args[5].addr;
|
||
|
|
CcInt* MaxMem = (CcInt*) args[6].addr;
|
||
|
|
if(!Min->IsDefined() || !Max->IsDefined()){
|
||
|
|
return 0; // invalid option
|
||
|
|
}
|
||
|
|
int min = Min->GetIntval();
|
||
|
|
int max = Max->GetIntval();
|
||
|
|
if(min<2 || max < 2*min){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
size_t maxMem = (qp->GetMemorySize(s) * 1024); // in kB
|
||
|
|
if(MaxMem->IsDefined() && MaxMem->GetValue() > 0){
|
||
|
|
maxMem = MaxMem->GetValue();
|
||
|
|
}
|
||
|
|
local.setAddr(new LocalInfo(args[0], args[1], min,
|
||
|
|
max, maxMem,
|
||
|
|
((CcInt*)args[7].addr)->GetIntval(),
|
||
|
|
((CcInt*)args[8].addr)->GetIntval(),
|
||
|
|
nl->Second(GetTupleResultType(s)) ));
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
case REQUEST : {
|
||
|
|
if(!li){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
result.addr = li->next();
|
||
|
|
return result.addr?YIELD:CANCEL;
|
||
|
|
|
||
|
|
}
|
||
|
|
case CLOSE :{
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
local.setAddr(0);
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
ValueMapping joinMMRTreeItvm[] = {
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<2>,2,2,2> >,
|
||
|
|
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<3>,2,3,2> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MRegion,2,3,2> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MPoint,2,3,2> >,
|
||
|
|
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MRegion,3,3,3> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MPoint,3,3,3> >,
|
||
|
|
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MRegion,3,3,3> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MPoint,3,3,3> >,
|
||
|
|
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MRegion,3,3,3> >,
|
||
|
|
joinMMRTreeItVM<joinMMRTreeItLocalInfo<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MPoint,3,3,3> >
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
Operator joinMMRTreeIt (
|
||
|
|
"joinMMRTreeIt",
|
||
|
|
realJoinMMRTreeSpec,
|
||
|
|
16,
|
||
|
|
joinMMRTreeItvm,
|
||
|
|
realJoinSelect ,
|
||
|
|
realJoinMMRTreeTM
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
ValueMapping joinMMRTreeItVecvm[] = {
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
2,2,2> >,
|
||
|
|
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
2,3,2> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MRegion,
|
||
|
|
2,3,2> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MPoint,
|
||
|
|
2,3,2> >,
|
||
|
|
|
||
|
|
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
3,2,2> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
3,3,3> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MRegion,
|
||
|
|
3,3,3> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MPoint,
|
||
|
|
3,3,3> >,
|
||
|
|
|
||
|
|
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
3,2,2> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
3,3,3> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MRegion,
|
||
|
|
3,3,3> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MPoint,
|
||
|
|
3,3,3> >,
|
||
|
|
|
||
|
|
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<2>,
|
||
|
|
3,2,2> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<3>,
|
||
|
|
3,3,3> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MRegion,
|
||
|
|
3,3,3> >,
|
||
|
|
joinMMRTreeItVM< joinMMRTreeItVecLocalInfo<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MPoint,
|
||
|
|
3,3,3> >
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
Operator joinMMRTreeItVec (
|
||
|
|
"joinMMRTreeItVec",
|
||
|
|
realJoinMMRTreeSpec,
|
||
|
|
16,
|
||
|
|
joinMMRTreeItVecvm,
|
||
|
|
realJoinSelect ,
|
||
|
|
realJoinMMRTreeTM
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5 itSpatialJoin
|
||
|
|
|
||
|
|
This version of the SpatialJoin operator uses only the memory
|
||
|
|
available for that operator. From the first stream, it collects
|
||
|
|
all Tuples into a vector and creates an r-tree from the geometries
|
||
|
|
until the stream or the memory is exhausted.
|
||
|
|
Then, it starts to search on the r-tree using the tuples from the
|
||
|
|
second stream. If the first stream was not exhausted, all tuples
|
||
|
|
of the second stream are collected within a tuple buffer.
|
||
|
|
While there are elements within the first stream, a partition of
|
||
|
|
is is used to create a new r-tree and the tuple buffer is scanned
|
||
|
|
for searching on the r-tree.
|
||
|
|
|
||
|
|
|
||
|
|
1.5.1 Type Mapping
|
||
|
|
|
||
|
|
The Type mapping is the same as for other join operations within this
|
||
|
|
algebra, so we can just use ~realJoinMMRTreeTM~ here.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.2 LocalInfo
|
||
|
|
|
||
|
|
*/
|
||
|
|
template<class Type1, class Type2, int dim1, int dim2, int minDim, bool reopen>
|
||
|
|
class ItSpatialJoinInfo{
|
||
|
|
public:
|
||
|
|
|
||
|
|
ItSpatialJoinInfo(Word& _s1, Word& _s2,
|
||
|
|
const int _min, const int _max,
|
||
|
|
size_t _maxMem, const int _a1,
|
||
|
|
const int _a2, ListExpr ttl,
|
||
|
|
ItSpatialJoinCostEstimation* _costEstimation
|
||
|
|
):
|
||
|
|
s1(_s1), s2(_s2), min(_min), max(_max),
|
||
|
|
a1(_a1), a2(_a2), costEstimation(_costEstimation)
|
||
|
|
{
|
||
|
|
|
||
|
|
s1.open();
|
||
|
|
s2.open();
|
||
|
|
Tuple* t1 = s1.request();
|
||
|
|
tt = new TupleType(ttl);
|
||
|
|
tuples2 = 0;
|
||
|
|
treeIt = 0;
|
||
|
|
bufferIt = 0;
|
||
|
|
currentTuple2 =0;
|
||
|
|
|
||
|
|
costEstimation -> setMinRtree(min);
|
||
|
|
costEstimation -> setMaxRtree(max);
|
||
|
|
|
||
|
|
// process the first Tuple
|
||
|
|
if(t1){
|
||
|
|
index = new mmrtree::RtreeT<minDim, TupleId>(min,max);
|
||
|
|
int sizePerTuple = sizeof(void*) + t1->GetMemSize();
|
||
|
|
costEstimation -> setSizeOfTupleSt1(sizePerTuple);
|
||
|
|
maxTuples = (_maxMem*1024) / sizePerTuple;
|
||
|
|
if(maxTuples <= 10){
|
||
|
|
maxTuples = 10; // force a minimum of ten tuples
|
||
|
|
} else {
|
||
|
|
size_t indexSize = index->guessSize(maxTuples, true);
|
||
|
|
maxTuples = ((_maxMem*1024)-indexSize) / sizePerTuple;
|
||
|
|
if(maxTuples <=10){
|
||
|
|
maxTuples = 10;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
TupleId id = (TupleId) tuples1.size();
|
||
|
|
tuples1.push_back(t1);
|
||
|
|
Rectangle<dim1> box = ((Type1*)
|
||
|
|
t1->GetAttribute(a1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index->insert(box,id);
|
||
|
|
//} else {
|
||
|
|
index->insert(box. template project<minDim>(), id);
|
||
|
|
//}
|
||
|
|
} else {
|
||
|
|
index = 0;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
costEstimation -> processedTupleInStream1();
|
||
|
|
t1 = s1.request();
|
||
|
|
int noTuples = 1;
|
||
|
|
while( (t1!=0) && (noTuples < maxTuples -1)){
|
||
|
|
TupleId id = (TupleId) tuples1.size();
|
||
|
|
tuples1.push_back(t1);
|
||
|
|
Rectangle<dim1> box = ((Type1*) t1->GetAttribute(a1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index->insert(box,id);
|
||
|
|
//} else {
|
||
|
|
index->insert(box.template project<minDim>(), id);
|
||
|
|
//}
|
||
|
|
costEstimation -> processedTupleInStream1();
|
||
|
|
t1 = s1.request();
|
||
|
|
noTuples++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// process the last tuple if present
|
||
|
|
if(t1){
|
||
|
|
finished = false;
|
||
|
|
TupleId id = (TupleId) tuples1.size();
|
||
|
|
tuples1.push_back(t1);
|
||
|
|
Rectangle<dim1> box = ((Type1*) t1->GetAttribute(a1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index->insert(box,id);
|
||
|
|
//} else {
|
||
|
|
index->insert(box.template project<minDim>(),id);
|
||
|
|
//}
|
||
|
|
|
||
|
|
} else {
|
||
|
|
costEstimation -> setStream1Exhausted(true);
|
||
|
|
finished = true; // stream 1 exhausted
|
||
|
|
}
|
||
|
|
scans = 1;
|
||
|
|
costEstimation -> readPartitionDone();
|
||
|
|
}
|
||
|
|
|
||
|
|
~ItSpatialJoinInfo(){
|
||
|
|
s1.close();
|
||
|
|
s2.close();
|
||
|
|
tt->DeleteIfAllowed();
|
||
|
|
for(unsigned int i=0;i< tuples1.size();i++){
|
||
|
|
if(tuples1[i]){
|
||
|
|
tuples1[i]->DeleteIfAllowed();
|
||
|
|
tuples1[i] = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
tuples1.clear();
|
||
|
|
if(bufferIt){
|
||
|
|
delete bufferIt;
|
||
|
|
}
|
||
|
|
if(tuples2){
|
||
|
|
// tuples2->DeleteAndTruncate();
|
||
|
|
delete tuples2;
|
||
|
|
}
|
||
|
|
if(treeIt){
|
||
|
|
delete treeIt;
|
||
|
|
}
|
||
|
|
if(index){
|
||
|
|
delete index;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
Tuple* next(){
|
||
|
|
if(!index){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
while(true){ // use return for finishing this loop
|
||
|
|
if(!treeIt){ // try to create a new tree iterator
|
||
|
|
createNewTreeIt();
|
||
|
|
if(treeIt==0){
|
||
|
|
costEstimation -> setStream2Exhausted(true);
|
||
|
|
cout << "Join finished with " << scans
|
||
|
|
<< " scans for stream 2" << endl;
|
||
|
|
cout << "there are " << maxTuples
|
||
|
|
<< " maximum stored tuples within the index" << endl;
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
TupleId const* id = treeIt->next();
|
||
|
|
if(!id){ //treeIt exhausted
|
||
|
|
delete treeIt;
|
||
|
|
treeIt = 0;
|
||
|
|
currentTuple2->DeleteIfAllowed();
|
||
|
|
currentTuple2=0;
|
||
|
|
} else { // new result found
|
||
|
|
Tuple* res = new Tuple(tt);
|
||
|
|
Tuple* t1 = tuples1[*id];
|
||
|
|
Concat(t1,currentTuple2,res);
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
Stream<Tuple> s1;
|
||
|
|
Stream<Tuple> s2;
|
||
|
|
int min;
|
||
|
|
int max;
|
||
|
|
int a1;
|
||
|
|
int a2;
|
||
|
|
TupleType* tt;
|
||
|
|
int maxTuples;
|
||
|
|
vector<Tuple*> tuples1;
|
||
|
|
TupleFile* tuples2;
|
||
|
|
bool finished; // all tuples from stream1 read
|
||
|
|
typename mmrtree::RtreeT<minDim, TupleId>::iterator* treeIt;
|
||
|
|
Tuple* currentTuple2; // tuple from stream 2
|
||
|
|
TupleFileIterator* bufferIt;
|
||
|
|
mmrtree::RtreeT<minDim, TupleId>* index;
|
||
|
|
int scans;
|
||
|
|
ItSpatialJoinCostEstimation* costEstimation;
|
||
|
|
|
||
|
|
void createNewTreeIt(){
|
||
|
|
assert(!treeIt);
|
||
|
|
if(!currentTuple2){
|
||
|
|
getCurrentTuple();
|
||
|
|
}
|
||
|
|
if(!currentTuple2){
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
Rectangle<dim2> r = ((Type2*)
|
||
|
|
currentTuple2->GetAttribute(a2))->BoundingBox();
|
||
|
|
//if(dim2==minDim){
|
||
|
|
// treeIt = index->find(r);
|
||
|
|
//} else {
|
||
|
|
treeIt = index->find(r.template project<minDim>());
|
||
|
|
//}
|
||
|
|
}
|
||
|
|
|
||
|
|
void getCurrentTuple(){
|
||
|
|
if(currentTuple2){
|
||
|
|
currentTuple2->DeleteIfAllowed();
|
||
|
|
currentTuple2=0;
|
||
|
|
}
|
||
|
|
if(bufferIt){ // read from Buffer
|
||
|
|
currentTuple2 = bufferIt->GetNextTuple();
|
||
|
|
costEstimation -> incReadInIteration();
|
||
|
|
if(!currentTuple2){
|
||
|
|
if(finished){
|
||
|
|
return;
|
||
|
|
} else {
|
||
|
|
rebuildIndex();
|
||
|
|
resetBuffer();
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
return; // next current tuple found
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
currentTuple2 = s2.request();
|
||
|
|
costEstimation -> processedTupleInStream2();
|
||
|
|
costEstimation -> incReadInIteration();
|
||
|
|
if(!currentTuple2){
|
||
|
|
if(finished){
|
||
|
|
return;
|
||
|
|
} else {
|
||
|
|
rebuildIndex();
|
||
|
|
resetBuffer();
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
if(!finished){ //insert Tuple into buffer
|
||
|
|
if(!reopen){
|
||
|
|
if(!tuples2){
|
||
|
|
tuples2 = new TupleFile(currentTuple2->GetTupleType(),0);
|
||
|
|
}
|
||
|
|
//currentTuple2->PinAttributes();
|
||
|
|
//tuples2->AppendTupleNoLOBs(currentTuple2);
|
||
|
|
tuples2->Append(currentTuple2);
|
||
|
|
costEstimation -> incTuplesInTupleFile();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void rebuildIndex(){
|
||
|
|
if(treeIt){
|
||
|
|
delete treeIt;
|
||
|
|
treeIt=0;
|
||
|
|
}
|
||
|
|
if(index){
|
||
|
|
delete index;
|
||
|
|
}
|
||
|
|
for(unsigned int i=0;i<tuples1.size();i++){
|
||
|
|
tuples1[i]->DeleteIfAllowed();
|
||
|
|
tuples1[i]=0;
|
||
|
|
}
|
||
|
|
tuples1.clear();
|
||
|
|
index = new mmrtree::RtreeT<minDim, TupleId>(min,max);
|
||
|
|
Tuple* t1 = s1.request();
|
||
|
|
costEstimation -> processedTupleInStream1();
|
||
|
|
int noTuples = 0;
|
||
|
|
while( (t1!=0) && (noTuples < maxTuples -1)){
|
||
|
|
TupleId id = (TupleId) tuples1.size();
|
||
|
|
tuples1.push_back(t1);
|
||
|
|
Rectangle<dim1> box = ((Type1*)
|
||
|
|
t1->GetAttribute(a1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index->insert(box,id);
|
||
|
|
//} else {
|
||
|
|
index->insert(box.template project<minDim>(),id);
|
||
|
|
//}
|
||
|
|
t1 = s1.request();
|
||
|
|
costEstimation -> processedTupleInStream1();
|
||
|
|
noTuples++;
|
||
|
|
}
|
||
|
|
// process the last tuple if present
|
||
|
|
if(t1){
|
||
|
|
finished = false;
|
||
|
|
TupleId id = (TupleId) tuples1.size();
|
||
|
|
tuples1.push_back(t1);
|
||
|
|
Rectangle<dim1> box = ((Type1*)
|
||
|
|
t1->GetAttribute(a1))->BoundingBox();
|
||
|
|
//if(dim1==minDim){
|
||
|
|
// index->insert(box,id);
|
||
|
|
//} else {
|
||
|
|
index-> insert(box. template project<minDim>(), id);
|
||
|
|
//}
|
||
|
|
} else {
|
||
|
|
costEstimation -> setStream1Exhausted(true);
|
||
|
|
finished = true; // stream 1 exhausted
|
||
|
|
}
|
||
|
|
|
||
|
|
costEstimation -> readPartitionDone();
|
||
|
|
}
|
||
|
|
|
||
|
|
void resetBuffer(){
|
||
|
|
if(!reopen){
|
||
|
|
assert(tuples2);
|
||
|
|
if(bufferIt){
|
||
|
|
delete bufferIt;
|
||
|
|
}
|
||
|
|
costEstimation -> resetReadInIteration();
|
||
|
|
costEstimation -> setTupleFileWritten(true);
|
||
|
|
bufferIt = tuples2->MakeScan();
|
||
|
|
assert(currentTuple2==0);
|
||
|
|
currentTuple2 = bufferIt->GetNextTuple();
|
||
|
|
costEstimation -> incReadInIteration();
|
||
|
|
} else {
|
||
|
|
s2.close();
|
||
|
|
s2.open();
|
||
|
|
currentTuple2 = s2.request();
|
||
|
|
}
|
||
|
|
|
||
|
|
// Next iteration
|
||
|
|
scans++;
|
||
|
|
costEstimation -> setIteration(scans);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.3 Value Mapping
|
||
|
|
|
||
|
|
*/
|
||
|
|
template <class Type1, class Type2, int dim1, int dim2, int minDim,
|
||
|
|
bool reopen>
|
||
|
|
int itSpatialJoinVM( Word* args, Word& result, int message,
|
||
|
|
Word& local, Supplier s ) {
|
||
|
|
|
||
|
|
|
||
|
|
ItSpatialJoinInfo<Type1, Type2, dim1, dim2, minDim,reopen>* li =
|
||
|
|
(ItSpatialJoinInfo<Type1, Type2, dim1, dim2, minDim,reopen>*)
|
||
|
|
local.addr;
|
||
|
|
switch (message){
|
||
|
|
case OPEN : {
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
li = 0;
|
||
|
|
local.setAddr(0);
|
||
|
|
}
|
||
|
|
CcInt* Min = (CcInt*) args[4].addr;
|
||
|
|
CcInt* Max = (CcInt*) args[5].addr;
|
||
|
|
CcInt* MaxMem = (CcInt*) args[6].addr;
|
||
|
|
if(!Min->IsDefined() || !Max->IsDefined()){
|
||
|
|
return 0; // invalid option
|
||
|
|
}
|
||
|
|
int min = Min->GetIntval();
|
||
|
|
int max = Max->GetIntval();
|
||
|
|
if(min<2 || max < 2*min){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
size_t maxMem = (qp->GetMemorySize(s) * 1024); // in kB
|
||
|
|
if(MaxMem->IsDefined() && MaxMem->GetValue() > 0){
|
||
|
|
maxMem = MaxMem->GetValue();
|
||
|
|
}
|
||
|
|
|
||
|
|
ItSpatialJoinCostEstimation* costEstimation =
|
||
|
|
(ItSpatialJoinCostEstimation*) qp->getCostEstimation(s);
|
||
|
|
|
||
|
|
costEstimation -> init(NULL, NULL);
|
||
|
|
|
||
|
|
local.setAddr(new
|
||
|
|
ItSpatialJoinInfo<Type1,Type2,dim1,dim2, minDim, reopen>(
|
||
|
|
args[0], args[1],
|
||
|
|
min, max, maxMem,
|
||
|
|
((CcInt*)args[7].addr)->GetIntval(),
|
||
|
|
((CcInt*)args[8].addr)->GetIntval(),
|
||
|
|
nl->Second(GetTupleResultType(s)),
|
||
|
|
costEstimation ));
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
case REQUEST : {
|
||
|
|
if(!li){
|
||
|
|
return CANCEL;
|
||
|
|
}
|
||
|
|
result.addr = li->next();
|
||
|
|
return result.addr?YIELD:CANCEL;
|
||
|
|
|
||
|
|
}
|
||
|
|
case CLOSE :{
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
local.setAddr(0);
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.4 Value Mapping Array
|
||
|
|
|
||
|
|
*/
|
||
|
|
ValueMapping ItSpatialJoinVM[] = {
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<2>,2, 2, 2,false>,
|
||
|
|
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<3>, 2, 3, 2,false>,
|
||
|
|
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MRegion,2,3,2,false>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MPoint,2,3,2,false>,
|
||
|
|
|
||
|
|
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2,false>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3,false>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MRegion,3,3,3,false>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MPoint,3,3,3,false>,
|
||
|
|
|
||
|
|
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2,false>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3,false>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MRegion,3,3,3,false>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MPoint,3,3,3,false>,
|
||
|
|
|
||
|
|
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2,false>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3,false>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MRegion,3,3,3,false>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MPoint,3,3,3,false>,
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
ValueMapping ItSpatialJoinRVM[] = {
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<2>,2, 2, 2,true>,
|
||
|
|
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
StandardSpatialAttribute<3>, 2, 3, 2,true>,
|
||
|
|
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MRegion,2,3,2,true>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<2>,
|
||
|
|
temporalalgebra::MPoint,2,3,2,true>,
|
||
|
|
|
||
|
|
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2,true>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3,true>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MRegion,3,3,3,true>,
|
||
|
|
itSpatialJoinVM<StandardSpatialAttribute<3>,
|
||
|
|
temporalalgebra::MPoint,3,3,3,true>,
|
||
|
|
|
||
|
|
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2,true>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3,true>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MRegion,3,3,3,true>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MRegion,
|
||
|
|
temporalalgebra::MPoint,3,3,3,true>,
|
||
|
|
|
||
|
|
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<2>,3,2,2,true>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
StandardSpatialAttribute<3>,3,3,3,true>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MRegion,3,3,3,true>,
|
||
|
|
itSpatialJoinVM<temporalalgebra::MPoint,
|
||
|
|
temporalalgebra::MPoint,3,3,3,true>,
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.5 Selection function
|
||
|
|
|
||
|
|
We use realJoinSelect here.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.6 Cost Estimation
|
||
|
|
|
||
|
|
*/
|
||
|
|
CostEstimation* ItSpatialJoinCostEstimationFunc() {
|
||
|
|
return new ItSpatialJoinCostEstimation();
|
||
|
|
}
|
||
|
|
|
||
|
|
CreateCostEstimation ItSpatialJoinCostEstimationList[]
|
||
|
|
= {ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc,
|
||
|
|
ItSpatialJoinCostEstimationFunc, ItSpatialJoinCostEstimationFunc};
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.5.7 Operator instance
|
||
|
|
|
||
|
|
*/
|
||
|
|
Operator itSpatialJoin(
|
||
|
|
"itSpatialJoin",
|
||
|
|
realJoinMMRTreeSpec,
|
||
|
|
16,
|
||
|
|
ItSpatialJoinVM,
|
||
|
|
realJoinSelect ,
|
||
|
|
realJoinMMRTreeTM,
|
||
|
|
ItSpatialJoinCostEstimationList
|
||
|
|
);
|
||
|
|
|
||
|
|
Operator itSpatialJoinR(
|
||
|
|
"itSpatialJoinR",
|
||
|
|
realJoinMMRTreeSpec,
|
||
|
|
16,
|
||
|
|
ItSpatialJoinRVM,
|
||
|
|
realJoinSelect ,
|
||
|
|
realJoinMMRTreeTM,
|
||
|
|
ItSpatialJoinCostEstimationList
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6 Similarity Join
|
||
|
|
|
||
|
|
1.6.1 Type Mapping
|
||
|
|
|
||
|
|
stream(tuple(A)) x stream(tuple(B)) x a[_]i x b[_]j x real x [ x int x int]
|
||
|
|
|
||
|
|
*/
|
||
|
|
template<bool usefun>
|
||
|
|
ListExpr SimJoinTM(ListExpr args){
|
||
|
|
|
||
|
|
string err = "stream(tuple(A)) x stream(tuple(B)) "
|
||
|
|
"x a_i x b_j x real [x int x int] expected";
|
||
|
|
|
||
|
|
int len = nl->ListLength(args);
|
||
|
|
int reqargs = usefun?6:5;
|
||
|
|
|
||
|
|
if( (len !=reqargs) && (len != (reqargs+2))){
|
||
|
|
return listutils::typeError(err);
|
||
|
|
}
|
||
|
|
ListExpr stream1 = nl->First(args);
|
||
|
|
if(!Stream<Tuple>::checkType(stream1)){
|
||
|
|
return listutils::typeError("first arg is not a tuple stream");
|
||
|
|
}
|
||
|
|
ListExpr stream2 = nl->Second(args);
|
||
|
|
if(!Stream<Tuple>::checkType(stream2)){
|
||
|
|
return listutils::typeError("second arg is not a tuple stream");
|
||
|
|
}
|
||
|
|
ListExpr aname1 = nl->Third(args);
|
||
|
|
if(!listutils::isSymbol(aname1)){
|
||
|
|
return listutils::typeError("third arg is not an attribute name");
|
||
|
|
}
|
||
|
|
ListExpr aname2 = nl->Fourth(args);
|
||
|
|
if(!listutils::isSymbol(aname2)){
|
||
|
|
return listutils::typeError("fourth arg is not an attribute name");
|
||
|
|
}
|
||
|
|
|
||
|
|
if(!CcReal::checkType(nl->Fifth(args))){
|
||
|
|
return listutils::typeError("ffth arg is not a real");
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
ListExpr attrList1 = nl->Second(nl->Second(stream1));
|
||
|
|
ListExpr attrType1;
|
||
|
|
int attrindex1 = listutils::findAttribute(attrList1,
|
||
|
|
nl->SymbolValue(aname1),attrType1);
|
||
|
|
if(attrindex1 == 0){
|
||
|
|
return listutils::typeError("Attribute " + nl->SymbolValue(aname1) +
|
||
|
|
" not known in first stream");
|
||
|
|
}
|
||
|
|
ListExpr attrList2 = nl->Second(nl->Second(stream2));
|
||
|
|
ListExpr attrType2;
|
||
|
|
int attrindex2 = listutils::findAttribute(attrList2,
|
||
|
|
nl->SymbolValue(aname2),attrType2);
|
||
|
|
if(attrindex2 == 0){
|
||
|
|
return listutils::typeError("Attribute " + nl->SymbolValue(aname2) +
|
||
|
|
" not known in second stream");
|
||
|
|
}
|
||
|
|
|
||
|
|
ListExpr resAttrList = listutils::concat(attrList1 , attrList2);
|
||
|
|
|
||
|
|
|
||
|
|
if(!listutils::isAttrList(resAttrList)){
|
||
|
|
return listutils::typeError("naming conflicts detected ");
|
||
|
|
}
|
||
|
|
|
||
|
|
if(usefun){ // the 6th argument must be a function
|
||
|
|
ListExpr funList = nl->Sixth(args);
|
||
|
|
args = nl->Rest(nl->Rest(nl->Rest(nl->Rest(nl->Rest(nl->Rest(args))))));
|
||
|
|
if(!listutils::isMap<2>(funList)){
|
||
|
|
return listutils::typeError("sixth element must be a function");
|
||
|
|
}
|
||
|
|
ListExpr farg1 = nl->Second(funList);
|
||
|
|
ListExpr farg2 = nl->Third(funList);
|
||
|
|
ListExpr fres = nl->Fourth(funList);
|
||
|
|
if(!CcReal::checkType(fres)){
|
||
|
|
return listutils::typeError("Function result not of type double");
|
||
|
|
}
|
||
|
|
if(!nl->Equal(farg1,farg2)){
|
||
|
|
return listutils::typeError("Function arguments differ");
|
||
|
|
}
|
||
|
|
if(!nl->Equal(attrType1,attrType2)){
|
||
|
|
return listutils::typeError("Type confliuct in stream attributes");
|
||
|
|
}
|
||
|
|
if(!nl->Equal(attrType1,farg1)){
|
||
|
|
return listutils::typeError("Type conflict between stream "
|
||
|
|
"attribute and function argument");
|
||
|
|
}
|
||
|
|
// map is ok
|
||
|
|
} else {
|
||
|
|
args = nl->Rest(nl->Rest(nl->Rest(nl->Rest(nl->Rest(args)))));
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
if(len==(reqargs+2)){
|
||
|
|
if(!CcInt::checkType(nl->First(args))){
|
||
|
|
return listutils::typeError("next to last element is not an int");
|
||
|
|
}
|
||
|
|
if(!CcInt::checkType(nl->Second(args))){
|
||
|
|
return listutils::typeError("last element is not an int");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// build result
|
||
|
|
ListExpr appendList;
|
||
|
|
if(len==(reqargs+2)){
|
||
|
|
appendList = nl->TwoElemList(nl->IntAtom(attrindex1-1),
|
||
|
|
nl->IntAtom(attrindex2-1));
|
||
|
|
} else { // use standard values for min and max
|
||
|
|
appendList = nl->FourElemList(nl->IntAtom(4),
|
||
|
|
nl->IntAtom(8),
|
||
|
|
nl->IntAtom(attrindex1-1),
|
||
|
|
nl->IntAtom(attrindex2-1));
|
||
|
|
}
|
||
|
|
ListExpr resType = nl->TwoElemList( listutils::basicSymbol<Stream<Tuple> >(),
|
||
|
|
nl->TwoElemList(
|
||
|
|
listutils::basicSymbol<Tuple>(),
|
||
|
|
resAttrList));
|
||
|
|
ListExpr result = nl->ThreeElemList(
|
||
|
|
nl->SymbolAtom(Symbols::APPEND()),
|
||
|
|
appendList,
|
||
|
|
resType);
|
||
|
|
|
||
|
|
if(usefun){
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
|
||
|
|
// check for valid attribute types
|
||
|
|
if(CcInt::checkType(attrType1) && CcInt::checkType(attrType2)){
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
if(Point::checkType(attrType1) && Point::checkType(attrType2)){
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
if(CcString::checkType(attrType1) && CcString::checkType(attrType2)){
|
||
|
|
return result;
|
||
|
|
}
|
||
|
|
return listutils::typeError("Unsupported attributes");
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.2 Some distance functors
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
class DistCount{
|
||
|
|
public:
|
||
|
|
DistCount(){
|
||
|
|
cnt = 0;
|
||
|
|
}
|
||
|
|
void reset(){
|
||
|
|
cnt =0;
|
||
|
|
}
|
||
|
|
|
||
|
|
size_t getCount() const {
|
||
|
|
return cnt;
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
size_t cnt;
|
||
|
|
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
class IntDist: public DistCount{
|
||
|
|
public:
|
||
|
|
|
||
|
|
double operator()(const pair<CcInt*,int>& p1, const pair<CcInt*,int>& p2){
|
||
|
|
DistCount::cnt++;
|
||
|
|
assert(p1.first);
|
||
|
|
assert(p2.first);
|
||
|
|
if(!p1.first->IsDefined() && !p2.first->IsDefined()){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
if(!p1.first->IsDefined() || !p2.first->IsDefined()){
|
||
|
|
return numeric_limits<double>::max();
|
||
|
|
}
|
||
|
|
int i1 = p1.first->GetValue();
|
||
|
|
int i2 = p2.first->GetValue();
|
||
|
|
int c = i1-i2;
|
||
|
|
return c<0?-c:c;
|
||
|
|
}
|
||
|
|
|
||
|
|
ostream& print(const pair<CcInt*,int>& p, ostream& o){
|
||
|
|
o << *(p.first);
|
||
|
|
return o;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
class PointDist: public DistCount{
|
||
|
|
public:
|
||
|
|
double operator()(const pair<Point*,int>& p1, const pair<Point*,int>& p2){
|
||
|
|
cnt++;
|
||
|
|
assert(p1.first);
|
||
|
|
assert(p2.first);
|
||
|
|
if(!p1.first->IsDefined() && !p2.first->IsDefined()){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
if(!p1.first->IsDefined() || !p2.first->IsDefined()){
|
||
|
|
return numeric_limits<double>::max();
|
||
|
|
}
|
||
|
|
return p1.first->Distance(*(p2.first));
|
||
|
|
}
|
||
|
|
|
||
|
|
ostream& print(const pair<Point*,int>& p, ostream& o){
|
||
|
|
o << *(p.first);
|
||
|
|
return o;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
class StringDist: public DistCount{
|
||
|
|
public:
|
||
|
|
double operator()(const pair<CcString*,int>& p1,
|
||
|
|
const pair<CcString*,int>& p2){
|
||
|
|
cnt++;
|
||
|
|
assert(p1.first);
|
||
|
|
assert(p2.first);
|
||
|
|
if(!p1.first->IsDefined() && !p2.first->IsDefined()){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
if(!p1.first->IsDefined() || !p2.first->IsDefined()){
|
||
|
|
return numeric_limits<double>::max();
|
||
|
|
}
|
||
|
|
return stringutils::ld(p1.first->GetValue(), p2.first->GetValue());
|
||
|
|
}
|
||
|
|
|
||
|
|
ostream& print(const pair<CcString*,int>& p, ostream& o){
|
||
|
|
o << *(p.first);
|
||
|
|
return o;
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.2 LocalInfo Class
|
||
|
|
|
||
|
|
*/
|
||
|
|
template<class T, class DistComp>
|
||
|
|
class SimJoinLocalInfo{
|
||
|
|
public:
|
||
|
|
SimJoinLocalInfo( Word _stream1, Word _stream2,
|
||
|
|
const double _epsilon,
|
||
|
|
const int _min, const int _max, // parameters for the tree
|
||
|
|
const int _a1, const int _a2, // attribute indexes
|
||
|
|
const size_t _maxMem, ListExpr resType,
|
||
|
|
DistComp& _dc):
|
||
|
|
stream1(_stream1), stream2(_stream2),
|
||
|
|
epsilon(_epsilon),
|
||
|
|
min(_min), max(_max),
|
||
|
|
a1(_a1), a2(_a2),
|
||
|
|
tt(0), tree(0), buffer_stream1(0),
|
||
|
|
it1(0), stream1Finished(false), stream2Finished(false),
|
||
|
|
maxMem(_maxMem), buffer_stream2(0), it2(0), current(0),
|
||
|
|
dc(_dc),
|
||
|
|
partitions(0),max_elems(0), comparisons(0){
|
||
|
|
|
||
|
|
tt = new TupleType(resType);
|
||
|
|
stream1.open();
|
||
|
|
stream2.open();
|
||
|
|
readNextPartition();
|
||
|
|
current = getNextTuple();
|
||
|
|
if(current){ // case of empty second stream
|
||
|
|
T key = (T) current->GetAttribute(a2);
|
||
|
|
pair<T,int> p(key,0);
|
||
|
|
it1 = tree->rangeSearch(p, epsilon);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
~SimJoinLocalInfo(){
|
||
|
|
|
||
|
|
cout << "Join finsihed with " << partitions
|
||
|
|
<< " scans of the second stream" << endl;
|
||
|
|
cout << "The maximum number of elements within the index was "
|
||
|
|
<< max_elems << endl;
|
||
|
|
|
||
|
|
if(it1){
|
||
|
|
comparisons += it1->noComparisons();
|
||
|
|
}
|
||
|
|
|
||
|
|
cout << "no Comparisons : " << comparisons << endl;
|
||
|
|
|
||
|
|
tt->DeleteIfAllowed();
|
||
|
|
stream1.close();
|
||
|
|
stream2.close();
|
||
|
|
removePartition();
|
||
|
|
if(it1){
|
||
|
|
//cout << "Comparisons for scan : " << it1->noComparisons() << endl;
|
||
|
|
//cout << "Processed stream1 : " << stream1.getCount() << endl;
|
||
|
|
//cout << "Processed stream2 : " << stream2.getCount() << endl;
|
||
|
|
delete it1;
|
||
|
|
}
|
||
|
|
if(it2){
|
||
|
|
delete it2;
|
||
|
|
}
|
||
|
|
if(buffer_stream2){
|
||
|
|
delete buffer_stream2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
Tuple* next(){
|
||
|
|
while(it1){ // range iterator initialized
|
||
|
|
if(it1->hasNext()){
|
||
|
|
pair<T,int> p = *(it1->next());
|
||
|
|
Tuple* t1 = (*buffer_stream1)[p.second];
|
||
|
|
Tuple* res = new Tuple(tt);
|
||
|
|
Concat(t1,current,res);
|
||
|
|
return res;
|
||
|
|
} else { // range iterator exhausted
|
||
|
|
current->DeleteIfAllowed();
|
||
|
|
current = getNextTuple();
|
||
|
|
//cout << "Comparisons for scan : " << it1->noComparisons() << endl;
|
||
|
|
//cout << "Processed stream1 : " << stream1.getCount() << endl;
|
||
|
|
//cout << "Processed stream2 : " << stream2.getCount() << endl;
|
||
|
|
comparisons += it1->noComparisons();
|
||
|
|
delete it1;
|
||
|
|
it1 = 0;
|
||
|
|
if(!current){
|
||
|
|
if(stream1Finished){
|
||
|
|
return 0;
|
||
|
|
} else {
|
||
|
|
readNextPartition();
|
||
|
|
if(it2){
|
||
|
|
delete it2;
|
||
|
|
it2 = 0;
|
||
|
|
}
|
||
|
|
current = getNextTuple();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if(!current){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
T key = (T) current->GetAttribute(a2);
|
||
|
|
pair<T,int> p(key,0);
|
||
|
|
|
||
|
|
it1 = tree->rangeSearch(p, epsilon);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
private:
|
||
|
|
Stream<Tuple> stream1;
|
||
|
|
Stream<Tuple> stream2;
|
||
|
|
double epsilon;
|
||
|
|
int min;
|
||
|
|
int max;
|
||
|
|
int a1;
|
||
|
|
int a2;
|
||
|
|
TupleType* tt;
|
||
|
|
MMMTree<pair<T,int>, DistComp>* tree;
|
||
|
|
vector<Tuple*>* buffer_stream1;
|
||
|
|
RangeIterator<pair<T,int>,DistComp >* it1;
|
||
|
|
bool stream1Finished;
|
||
|
|
bool stream2Finished;
|
||
|
|
size_t maxMem;
|
||
|
|
Relation* buffer_stream2;
|
||
|
|
GenericRelationIterator* it2;
|
||
|
|
Tuple* current; // current tuple of stream 2
|
||
|
|
DistComp dc;
|
||
|
|
size_t partitions;
|
||
|
|
size_t max_elems;
|
||
|
|
size_t comparisons;
|
||
|
|
|
||
|
|
|
||
|
|
// replaces the current partition by the next one
|
||
|
|
void readNextPartition(){
|
||
|
|
partitions++;
|
||
|
|
removePartition();
|
||
|
|
if(stream1Finished){
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
buffer_stream1 = new vector<Tuple*>();
|
||
|
|
tree = new MMMTree<pair<T,int>,DistComp> (min,max,dc);
|
||
|
|
size_t usedSize = 0;
|
||
|
|
Tuple* t = stream1.request();
|
||
|
|
|
||
|
|
stream1Finished = t==0;
|
||
|
|
size_t nodesize = sizeof(MTreeNode<pair<T,int>,DistComp>);
|
||
|
|
|
||
|
|
size_t cnt = 0;
|
||
|
|
|
||
|
|
while(t){
|
||
|
|
cnt++;
|
||
|
|
T key = (T) t->GetAttribute(a1);
|
||
|
|
int pos = buffer_stream1->size();
|
||
|
|
buffer_stream1->push_back(t);
|
||
|
|
pair<T,int> p(key,pos);
|
||
|
|
tree->insert(p);
|
||
|
|
usedSize = usedSize + nodesize + t->GetMemSize();
|
||
|
|
if(usedSize < maxMem){
|
||
|
|
t = stream1.request();
|
||
|
|
if(t==0){
|
||
|
|
stream1Finished = true;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
t = 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
//cout << "tree created" << endl;
|
||
|
|
//cout << "comparisons required : " << tree->noComparisons() << endl;
|
||
|
|
//cout << cnt << " elements inserted" << endl;
|
||
|
|
comparisons += tree->noComparisons();
|
||
|
|
|
||
|
|
if(cnt > max_elems){
|
||
|
|
max_elems = cnt;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// removes the currently buffered partition
|
||
|
|
void removePartition(){
|
||
|
|
if(tree==0){
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
delete tree;
|
||
|
|
for(size_t i=0;i<buffer_stream1->size();i++){
|
||
|
|
(*buffer_stream1)[i]->DeleteIfAllowed();
|
||
|
|
}
|
||
|
|
delete buffer_stream1;
|
||
|
|
buffer_stream1 = 0;
|
||
|
|
tree = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
// returns the next tuple of stream 2
|
||
|
|
Tuple* getNextTuple(){
|
||
|
|
Tuple* res = 0;
|
||
|
|
if(!stream2Finished){
|
||
|
|
// read from stream
|
||
|
|
res = stream2.request();
|
||
|
|
if(res){
|
||
|
|
if(!stream1Finished){
|
||
|
|
if(!buffer_stream2){
|
||
|
|
buffer_stream2 = new Relation(res->GetTupleType(),true);
|
||
|
|
}
|
||
|
|
buffer_stream2->AppendTuple(res);
|
||
|
|
}
|
||
|
|
return res;
|
||
|
|
} else {
|
||
|
|
stream2Finished = true;
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
assert(buffer_stream2);
|
||
|
|
if(!it2){
|
||
|
|
it2 = buffer_stream2->MakeScan();
|
||
|
|
}
|
||
|
|
return it2->GetNextTuple();
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.3 Value Mapping
|
||
|
|
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
template <class T, class DistComp>
|
||
|
|
int SimJoinVMT( Word* args, Word& result, int message,
|
||
|
|
Word& local, Supplier s ) {
|
||
|
|
|
||
|
|
SimJoinLocalInfo<T,DistComp>* li = (SimJoinLocalInfo<T,DistComp>*) local.addr;
|
||
|
|
|
||
|
|
switch(message){
|
||
|
|
case OPEN: {
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
CcInt* Min = (CcInt*) args[5].addr;
|
||
|
|
CcInt* Max = (CcInt*) args[6].addr;
|
||
|
|
int min = 4;
|
||
|
|
int max = 8;
|
||
|
|
if(Min->IsDefined() && Max->IsDefined()){
|
||
|
|
int m1 = Min->GetValue();
|
||
|
|
int m2 = Max->GetValue();
|
||
|
|
if((m1 >= 2) && (m2>= 2* m1)){
|
||
|
|
min = m1;
|
||
|
|
max = m2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
CcReal* eps = (CcReal*) args[4].addr;
|
||
|
|
if(!eps->IsDefined()){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
double epsilon = eps->GetValue();
|
||
|
|
if(epsilon < 0){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
int a1 = ((CcInt*)args[7].addr)->GetValue();
|
||
|
|
int a2 = ((CcInt*)args[8].addr)->GetValue();
|
||
|
|
size_t memSize = qp->GetMemorySize(s) * 1024 * 1024;
|
||
|
|
ListExpr resType = nl->Second(GetTupleResultType(s));
|
||
|
|
DistComp dc;
|
||
|
|
local.addr = new SimJoinLocalInfo<T,DistComp>(args[0], args[1],
|
||
|
|
epsilon, min, max, a1, a2,
|
||
|
|
memSize,
|
||
|
|
resType, dc);
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
case REQUEST: {
|
||
|
|
result.addr = li?li->next():0;
|
||
|
|
return result.addr?YIELD:CANCEL;
|
||
|
|
}
|
||
|
|
case CLOSE: {
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.4 Specification
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
OperatorSpec SimJoinSpec(
|
||
|
|
" stream(tuple(A)) x stream(tuple(B)) x a_i x b_j "
|
||
|
|
"x real [ x int x int ] -> stream (tuple(AB))",
|
||
|
|
" _ _ simJoin [ a_i, b_j, epsilon, min, max] ",
|
||
|
|
"Similariry join based on predefined distance functions",
|
||
|
|
" query Orte feed Orte feed {o} simJoin[ Name, Name_o, 4.0] count"
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.5 Value Mapping array and selection function
|
||
|
|
|
||
|
|
*/
|
||
|
|
ValueMapping SimJoinVM[] = {
|
||
|
|
SimJoinVMT<CcInt*,IntDist>,
|
||
|
|
SimJoinVMT<Point*,PointDist>,
|
||
|
|
SimJoinVMT<CcString*,StringDist>
|
||
|
|
};
|
||
|
|
|
||
|
|
int SimJoinSelect(ListExpr args){
|
||
|
|
ListExpr s1 = nl->First(args);
|
||
|
|
string aname = nl->SymbolValue(nl->Third(args));
|
||
|
|
ListExpr type;
|
||
|
|
listutils::findAttribute(nl->Second(nl->Second(s1)), aname, type);
|
||
|
|
if(CcInt::checkType(type)){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
if(Point::checkType(type)){
|
||
|
|
return 1;
|
||
|
|
}
|
||
|
|
if(CcString::checkType(type)){
|
||
|
|
return 2;
|
||
|
|
}
|
||
|
|
return -1;
|
||
|
|
};
|
||
|
|
|
||
|
|
/*
|
||
|
|
1.6.6 Operator instance
|
||
|
|
|
||
|
|
*/
|
||
|
|
Operator SimJoinOp(
|
||
|
|
"simJoin",
|
||
|
|
SimJoinSpec.getStr(),
|
||
|
|
3,
|
||
|
|
SimJoinVM,
|
||
|
|
SimJoinSelect,
|
||
|
|
SimJoinTM<false>
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
class FunDistComp: public DistCount{
|
||
|
|
public:
|
||
|
|
FunDistComp(ArgVectorPointer _funargs, QueryProcessor* _qp, Word _fun):
|
||
|
|
funargs(_funargs), qp(_qp), fun(_fun) { }
|
||
|
|
|
||
|
|
|
||
|
|
double operator()(const pair<Attribute*, int>& p1,
|
||
|
|
const pair<Attribute*, int>& p2){
|
||
|
|
cnt++;
|
||
|
|
assert(p1.first);
|
||
|
|
assert(p2.first);
|
||
|
|
|
||
|
|
(*funargs)[0] = p1.first;
|
||
|
|
(*funargs)[1] = p2.first;
|
||
|
|
Word res;
|
||
|
|
qp->Request(fun.addr,res);
|
||
|
|
CcReal* r = (CcReal*) res.addr;
|
||
|
|
if(!r->IsDefined()){
|
||
|
|
return numeric_limits<double>::max();
|
||
|
|
}
|
||
|
|
double rd = r->GetValue();
|
||
|
|
return rd<0?-rd:rd;
|
||
|
|
}
|
||
|
|
|
||
|
|
ostream& print(const pair<Word,int>& p, ostream& o){
|
||
|
|
return o;
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
ArgVectorPointer funargs;
|
||
|
|
QueryProcessor* qp;
|
||
|
|
Word fun;
|
||
|
|
};
|
||
|
|
|
||
|
|
int SimJoinFunVM( Word* args, Word& result, int message,
|
||
|
|
Word& local, Supplier s ) {
|
||
|
|
|
||
|
|
SimJoinLocalInfo<Attribute*,FunDistComp>* li =
|
||
|
|
(SimJoinLocalInfo<Attribute*, FunDistComp>*) local.addr;
|
||
|
|
|
||
|
|
switch(message){
|
||
|
|
case OPEN: {
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
CcInt* Min = (CcInt*) args[6].addr;
|
||
|
|
CcInt* Max = (CcInt*) args[7].addr;
|
||
|
|
int min = 4;
|
||
|
|
int max = 8;
|
||
|
|
if(Min->IsDefined() && Max->IsDefined()){
|
||
|
|
int m1 = Min->GetValue();
|
||
|
|
int m2 = Max->GetValue();
|
||
|
|
if((m1 >= 2) && (m2>= 2* m1)){
|
||
|
|
min = m1;
|
||
|
|
max = m2;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
CcReal* eps = (CcReal*) args[4].addr;
|
||
|
|
if(!eps->IsDefined()){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
double epsilon = eps->GetValue();
|
||
|
|
if(epsilon < 0){
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
int a1 = ((CcInt*)args[8].addr)->GetValue();
|
||
|
|
int a2 = ((CcInt*)args[9].addr)->GetValue();
|
||
|
|
size_t memSize = qp->GetMemorySize(s) * 1024 * 1024;
|
||
|
|
ListExpr resType = nl->Second(GetTupleResultType(s));
|
||
|
|
// create distcomp from function (arg[6])
|
||
|
|
ArgVectorPointer funargs = qp->Argument(args[5].addr);
|
||
|
|
Word fun = args[5];
|
||
|
|
FunDistComp dc(funargs,qp,fun);
|
||
|
|
local.addr = new SimJoinLocalInfo<Attribute*, FunDistComp>(
|
||
|
|
args[0], args[1],
|
||
|
|
epsilon, min, max, a1, a2,
|
||
|
|
memSize,
|
||
|
|
resType, dc);
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
case REQUEST: {
|
||
|
|
result.addr = li?li->next():0;
|
||
|
|
return result.addr?YIELD:CANCEL;
|
||
|
|
}
|
||
|
|
case CLOSE: {
|
||
|
|
if(li){
|
||
|
|
delete li;
|
||
|
|
local.addr = 0;
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
OperatorSpec SimJoinFunSpec(
|
||
|
|
" stream(tuple(A)) x stream(tuple(B)) x a_i x b_j x "
|
||
|
|
"real x (fun: t_i x t_j -> real) [ x int x int ] -> stream (tuple(AB))",
|
||
|
|
" _ _ simJoin [ a_i, b_j, epsilon, distfun, min, max] ",
|
||
|
|
"Similariry join based on user defined distance functions",
|
||
|
|
" query Orte feed Orte feed {o} simJoin[ Name, Name_o, 4.0, "
|
||
|
|
"fun(a:string, b: string) 1.0*(length(a)-length(b) ] count"
|
||
|
|
);
|
||
|
|
|
||
|
|
Operator SimJoinFunOp(
|
||
|
|
"simjoinfun",
|
||
|
|
SimJoinFunSpec.getStr(),
|
||
|
|
SimJoinFunVM,
|
||
|
|
Operator::SimpleSelect,
|
||
|
|
SimJoinTM<true>
|
||
|
|
);
|
||
|
|
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
2 Algebra Definition
|
||
|
|
|
||
|
|
2.1 Algebra Class
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
class MMRTreeAlgebra : public Algebra {
|
||
|
|
public:
|
||
|
|
MMRTreeAlgebra() : Algebra()
|
||
|
|
{
|
||
|
|
// operators using mmrtrees
|
||
|
|
AddOperator(&insertMMRTree);
|
||
|
|
AddOperator(&statMMRTree);
|
||
|
|
AddOperator(&realJoinMMRTree);
|
||
|
|
realJoinMMRTree.SetUsesMemory();
|
||
|
|
AddOperator(&realJoinMMRTreeVec);
|
||
|
|
realJoinMMRTreeVec.SetUsesMemory();
|
||
|
|
AddOperator(&joinMMRTreeIt);
|
||
|
|
joinMMRTreeIt.SetUsesMemory();
|
||
|
|
AddOperator(&joinMMRTreeItVec);
|
||
|
|
joinMMRTreeItVec.SetUsesMemory();
|
||
|
|
AddOperator(&itSpatialJoin);
|
||
|
|
itSpatialJoin.SetUsesMemory();
|
||
|
|
|
||
|
|
AddOperator(&itSpatialJoinR);
|
||
|
|
itSpatialJoinR.SetUsesMemory();
|
||
|
|
|
||
|
|
AddOperator(&SimJoinOp);
|
||
|
|
SimJoinOp.SetUsesMemory();
|
||
|
|
|
||
|
|
AddOperator(&SimJoinFunOp);
|
||
|
|
SimJoinFunOp.SetUsesMemory();
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
/*
|
||
|
|
2.2 Algebra Initialization
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
extern "C"
|
||
|
|
Algebra*
|
||
|
|
InitializeMMRTreeAlgebra( NestedList* nlRef, QueryProcessor* qpRef )
|
||
|
|
{
|
||
|
|
nl = nlRef;
|
||
|
|
qp = qpRef;
|
||
|
|
return (new MMRTreeAlgebra());
|
||
|
|
}
|
||
|
|
|
||
|
|
|