Files
secondo/Algebras/MMRTree/MMRTreeAlgebra.h
2026-01-23 17:03:45 +08:00

507 lines
12 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2011, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software{} you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation{} either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY{} without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO{} if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//[_] [\_]
*/
#ifndef MMRTREEALGEBRA_H
#define MMRTREEALGEBRA_H
#include <sstream>
#include "NestedList.h"
#include "QueryProcessor.h"
#include "Stream.h"
#include "ListUtils.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "Algebras/Rectangle/RectangleAlgebra.h"
#include "TupleStore1.h"
#include "TupleStore2.h"
#include "TupleStore3.h"
extern NestedList* nl;
extern QueryProcessor* qp;
/*
1 MMRTreeAlgebra
This algebra provides some spatial join algorithms for small datasets.
It uses an rtree which ist hold completely in the main memory.
The incoming tuples of the first stream are also hold in main memory in
some versions of this operators. Other operators restrict the memory usage
for tuples but not for the tree.
1.1 Auxiliary Functions
~hMem~
This function formats a byte value as a human readable value.
*/
std::string hMem(size_t mem);
/*
1.2 TupleStore Definition
This typedef defines the tuplestore to use. They are several possibilities:
----
TupleStore1: Uses a vector and after exhausting available memory a relation to
store tuples
TupleStore2: Uses only a vector. All tuples are hold in main memory
TupleStore3: Uses only a relation. All tuples are stored persistent.
----
*/
typedef TupleStore1 TupleStore;
/*
Auxiliary class ~RealJoinTreeLocalInfo~
The template parameter Tree determines the structure
used as index.
*/
template <class Tree, class Type1, class Type2, int dim1, int dim2, int minDim>
class RealJoinTreeLocalInfo{
public:
/*
~Constructor~
The parameters are:
----
_s1 : first stream
_s2 : second stream
_i1 : index of an attribute of type Type1 in _s1
_i2 : index of an attribute of type Type2 in _s2
_tt : list describing the result tuple type
_maxMem : maximum cache size for tuples of _s1 in kB
----
The cinstructor is blocking. This means, the first stream is
processed completely. Each tuple is inserted into a tuple store
and into an Tree structure.
*/
RealJoinTreeLocalInfo(Word& _s1, Word& _s2, int _i1,
int _i2, ListExpr _tt, size_t maxMem,
bool _rect1 = true, bool _rect2=true):
ind(), s2(_s2), i2(_i2), tt(new TupleType(_tt)), lastRes(),
currentTuple(0), tb(0) {
Stream<Tuple> s1(_s1);
init(s1,_i1,maxMem);
}
/*
~Constructor~
The parameters are:
----
_s1 : first stream
_s2 : second stream
_i1 : index of a rect attribute in _s1
_i2 : index of a rect attribute in _s2
_tt : list describing the result tuple type
min : minimum number of entries within a node of the index
max : maximum number of entries within a node of the index
_maxMem : maximum cache size for tuples of _s1 in kB
----
*/
RealJoinTreeLocalInfo(Word& _s1, Word& _s2, int _i1,
int _i2, ListExpr _tt, int min, int max,
size_t maxMem):
ind(min,max), s2(_s2), i2(_i2), tt(new TupleType(_tt)),
lastRes(), currentTuple(0), tb(0) {
Stream<Tuple> s1(_s1);
init(s1,_i1,maxMem);
}
/*
~Destructor~
*/
~RealJoinTreeLocalInfo(){
s2.close();
tt->DeleteIfAllowed();
if(tb){
delete tb;
}
if(currentTuple){
currentTuple->DeleteIfAllowed();
}
}
/*
~nextTuple~
Returns the next result tuple or 0 if no more tuples are available.
*/
Tuple* nextTuple(){
if(lastRes.empty() && (currentTuple!=0)){
currentTuple->DeleteIfAllowed();
currentTuple = 0;
}
while(lastRes.empty()){
Tuple* t = s2.request();
if(t==0){
return 0;
}
Rectangle<dim2>
r(((Type2*) t->GetAttribute(i2))->BoundingBox());
//if(dim2==minDim){
// ind.findSimple(r, lastRes);
//} else {
ind.findSimple(r. template project<minDim>(), lastRes);
//}
if(lastRes.empty()){
t->DeleteIfAllowed();
} else {
currentTuple = t;
}
}
std::pair<Rectangle<minDim>,TupleId> p1 = lastRes.back();
lastRes.pop_back();
Tuple*result = new Tuple(tt);
Tuple* t1 = tb->GetTuple(p1.second);
Concat(t1, currentTuple, result);
t1->DeleteIfAllowed();
return result;
}
private:
Tree ind;
Stream<Tuple> s2;
int i2;
TupleType* tt;
std::vector<std::pair<Rectangle<minDim>,TupleId> > lastRes;
Tuple* currentTuple;
TupleStore* tb;
void init(Stream<Tuple>& s1, int _i1, size_t maxMem){
// build the index from the first stream
s1.open();
Tuple* t = s1.request();
if(t){
tb = new TupleStore(maxMem);
} else {
tb = 0;
}
while(t){
TupleId id = tb->AppendTuple(t);
Rectangle<dim1> r = ((Type1*)
t->GetAttribute(_i1))->BoundingBox();
//if(dim1==minDim){
// ind.insert(r, id);
//} else {
ind.insert(r. template project<minDim>(),id);
//}
t->DeleteIfAllowed();
t = s1.request();
}
s1.close();
s2.open();
currentTuple = 0;
}
};
template <class Tree, class Type1, class Type2, int dim1, int dim2, int minDim >
class RealJoinTreeVecLocalInfo{
public:
/*
~Constructor~
The parameters are:
----
_s1 : first stream
_s2 : second stream
_i1 : index of an attribute of type Type1 in _s1
_i2 : index of an attribute of type Type2 in _s2
_tt : list describing the result tuple type
_maxMem : maximum cache size for tuples of _s1 in kB
----
The cinstructor is blocking. This means, the first stream is
processed completely. Each tuple is inserted into a vector
and into an Tree structure.
*/
RealJoinTreeVecLocalInfo(Word& _s1, Word& _s2, int _i1,
int _i2, ListExpr _tt, size_t maxMem):
ind(), s2(_s2), i2(_i2) {
tt = new TupleType(_tt);
Stream<Tuple> s1(_s1);
init(s1,_i1,maxMem);
}
/*
~Constructor~
The parameters are:
----
_s1 : first stream
_s2 : second stream
_i1 : index of a rect attribute in _s1
_i2 : index of a rect attribute in _s2
_tt : list describing the result tuple type
min : minimum number of entries within a node of the index
max : maximum number of entries within a node of the index
_maxMem : maximum cache size for tuples of _s1 in kB
----
*/
RealJoinTreeVecLocalInfo(Word& _s1, Word& _s2, int _i1,
int _i2, ListExpr _tt, int min, int max,
size_t maxMem):
ind(min,max), s2(_s2), i2(_i2) {
tt = new TupleType(_tt);
Stream<Tuple> s1(_s1);
init(s1,_i1,maxMem);
}
/*
~Destructor~
*/
~RealJoinTreeVecLocalInfo(){
s2.close();
tt->DeleteIfAllowed();
std::vector<Tuple*>::iterator it;
for(it=vec.begin(); it!=vec.end(); it++){
(*it)->DeleteIfAllowed();
}
vec.clear();
if(currentTuple){
currentTuple->DeleteIfAllowed();
}
}
/*
~nextTuple~
Returns the next result tuple or 0 if no more tuples are available.
*/
Tuple* nextTuple(){
if(lastRes.empty() && (currentTuple!=0)){
currentTuple->DeleteIfAllowed();
currentTuple = 0;
}
while(lastRes.empty()){
Tuple* t = s2.request();
if(t==0){
return 0;
}
Rectangle<dim2> r = ((Type2*)
t->GetAttribute(i2))->BoundingBox();
//if(dim2==minDim){
// ind.findSimple(r, lastRes);
//} else {
ind.findSimple(r. template project<minDim>(), lastRes);
//}
if(lastRes.empty()){
t->DeleteIfAllowed();
} else {
currentTuple = t;
}
}
std::pair<Rectangle<minDim>,TupleId> p1 = lastRes.back();
lastRes.pop_back();
Tuple*result = new Tuple(tt);
Tuple* t1 = vec[p1.second];
Concat(t1, currentTuple, result);
return result;
}
private:
Tree ind;
Stream<Tuple> s2;
int i2;
TupleType* tt;
std::vector<std::pair<Rectangle<minDim>,TupleId> > lastRes;
Tuple* currentTuple;
std::vector<Tuple*> vec;
void init(Stream<Tuple>& s1, int _i1, size_t maxMem){
// build the index from the first stream
s1.open();
Tuple* t = s1.request();
while(t){
TupleId id = vec.size();
vec.push_back(t);
Rectangle<dim1> r = ((Type1*)
t->GetAttribute(_i1))->BoundingBox();
//if(dim1==minDim){
// ind.insert(r, id);
//} else {
ind.insert(r. template project<minDim>(), id);
//}
t = s1.request();
}
s1.close();
s2.open();
currentTuple = 0;
}
};
/*
Selection function
*/
int realJoinSelect(ListExpr args);
/*
1.4.2 Value Mapping
*/
template<class JLI>
int joinRTreeVM( Word* args, Word& result, int message,
Word& local, Supplier s ){
JLI* li = static_cast<JLI*>(local.addr);
switch(message){
case OPEN: {
if(li){
delete li;
local.addr = 0;
li = 0;
}
CcInt* MaxMem = (CcInt*) args[6].addr;
size_t maxMem = (qp->GetMemorySize(s) * 1024); // in kB
if(MaxMem->IsDefined() && MaxMem->GetValue() > 0){
maxMem = MaxMem->GetValue();
}
CcInt* Min = (CcInt*)(args[4].addr);
CcInt* Max = (CcInt*)(args[5].addr);
if(!Min->IsDefined() || !Max->IsDefined()){
return 0;
}
int min = Min->GetValue();
int max = Max->GetValue();
if(min<1 || max< (2*min)){
return 0;
}
int i1 = ((CcInt*)(args[7].addr))->GetValue();
int i2 = ((CcInt*)(args[8].addr))->GetValue();
local.addr = new JLI(args[0], args[1],i1,i2,
nl->Second(GetTupleResultType(s)),
min, max, maxMem);
return 0;
}
case REQUEST: {
if(!li){
return CANCEL;
}
result.addr = li->nextTuple();
return result.addr?YIELD:CANCEL;
}
case CLOSE:{
if(li){
delete li;
local.addr = 0;
}
return 0;
}
}
return -1;
}
#endif