/* ---- This file is part of SECONDO. Copyright (C) 2009, University in Hagen, Faculty of Mathematics and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- 1 Implementation File SortMergeJoin.cpp June 2009, Sven Jungnickel. Initial version 2 Includes and defines */ #include #include "stdlib.h" #include "LogMsg.h" #include "QueryProcessor.h" #include "StandardTypes.h" #include "RTuple.h" #include "Sort.h" #include "SortMergeJoin.h" /* 3 External linking */ extern QueryProcessor* qp; using namespace std; /* 4 Implementation of class ~SortMergeJoinLocalInfo~ */ namespace extrel2 { SortMergeJoinLocalInfo::SortMergeJoinLocalInfo( Word streamA, int attrIndexA, Word streamB, int attrIndexB, Supplier s, ProgressLocalInfo* p, size_t maxMemSize ) : ProgressWrapper(p) , streamA(streamA) , streamB(streamB) , tmpB(0) , cmp(0) , attrIndexA(attrIndexA) , attrIndexB(attrIndexB) , traceMode(RTFlag::isActive("ERA:TraceMergeJoin")) , continueMerge(false) { grpB = 0; iter = 0; liA = 0; sliA = 0; liB = 0; sliB = 0; // sort the input streams SortOrderSpecification specA; SortOrderSpecification specB; specA.push_back( pair(attrIndexA, true) ); specB.push_back( pair(attrIndexB, true) ); // set available main memory (MAX_MEMORY) setMemory(maxMemSize, s); liA = new SortProgressLocalInfo(); progress->firstLocalInfo = liA; size_t reservedMemory = MAX_MEMORY / 5; // 20 percent for merge if(reservedMemory < 1024){ reservedMemory = 1024; } size_t memForSort = MAX_MEMORY - reservedMemory; sliA = new SortAlgorithm(streamA, specA, liA, s, UINT_MAX, memForSort / 2 ); memForSort -= sliA->getUsedMemory(); liB = new SortProgressLocalInfo(); progress->secondLocalInfo = liB; sliB = new SortAlgorithm(streamB, specB, liB, s, UINT_MAX, memForSort); ListExpr resultType = qp->GetNumType(s); resultTupleType = new TupleType( nl->Second( resultType ) ); memForSort -= sliB->getUsedMemory(); if(memForSort>0){ reservedMemory += memForSort; } ptA.setTuple( NextTupleA() ); ptB.setTuple( NextTupleB() ); grpB = new TupleBuffer2( reservedMemory ); if ( traceMode ) { cmsg.info() << "-------------------- SortMerge-Join2 ------------------" << endl << "Memory: \t" << MAX_MEMORY / 1024 << " KByte" << endl; cmsg.send(); } } SortMergeJoinLocalInfo::~SortMergeJoinLocalInfo() { if ( sliA ) { delete sliA; sliA = 0; } if ( sliB ) { delete sliB; sliB = 0; } if ( liA ) { delete liA; liA = 0; } if ( liB ) { delete liB; liB = 0; } if ( grpB ) { delete grpB; grpB = 0; } if ( iter ) { delete iter; iter = 0; } resultTupleType->DeleteIfAllowed(); } void SortMergeJoinLocalInfo::setMemory(size_t maxMemory, Supplier s) { if ( maxMemory == UINT_MAX ) { MAX_MEMORY = qp->GetMemorySize(s) * 1024 * 1024; // in bytes } else if ( maxMemory < MIN_USER_DEF_MEMORY ) { MAX_MEMORY = MIN_USER_DEF_MEMORY; } else { MAX_MEMORY = maxMemory; } } Tuple* SortMergeJoinLocalInfo::NextResultTuple() { Tuple* resultTuple = 0; if ( !continueMerge && ptB == 0 ) { return 0; } while( ptA != 0 ) { if ( !continueMerge && ptB != 0 ) { tmpB = ptB; grpB->AppendTuple(tmpB.tuple); // advance the tuple pointer ptB.setTuple( NextTupleB() ); // collect a group of tuples from B which // have the same attribute value bool done = false; while ( !done && ptB != 0 ) { int cmp = CompareTuplesB( tmpB.tuple, ptB.tuple ); if ( cmp == 0) { // append equal tuples to group grpB->AppendTuple(ptB.tuple); // release tuple of input B ptB.setTuple( NextTupleB() ); } else { done = true; } } // end collect group cmp = CompareTuples( ptA.tuple, tmpB.tuple ); while ( ptA != 0 && cmp < 0 ) { // skip tuples from A while they are smaller than the // value of the tuples in grpB ptA.setTuple( NextTupleA() ); if (ptA != 0) { cmp = CompareTuples( ptA.tuple, tmpB.tuple ); } } } // continue or start a merge with grpB while ( ptA != 0 && cmp == 0 ) { // join ptA with grpB if (!continueMerge) { iter = grpB->MakeScan(); continueMerge = true; resultTuple = NextConcat(); if (resultTuple) { return resultTuple; } } else { // continue merging, create the next result tuple resultTuple = NextConcat(); if (resultTuple) { return resultTuple; } else { // Iteration over the group finished. // Continue with the next tuple of argument A continueMerge = false; delete iter; iter = 0; ptA.setTuple( NextTupleA() ); if (ptA != 0) { cmp = CompareTuples( ptA.tuple, tmpB.tuple ); } } } } grpB->Clear(); // tpA > tmpB if ( ptB == 0 ) { // short exit return 0; } } // end of main loop return 0; } /* 5 Value mapping function of operator ~sortmergejoin2~ */ template int SortMergeJoinValueMap( Word* args, Word& result, int message, Word& local, Supplier s ) { // if ( param = false ) // args[0] : stream A // args[1] : stream B // args[2] : attribute name of join attribute for stream A // args[3] : attribute name join attribute for stream B // args[4] : attribute index of join attribute for stream A // args[5] : attribute index of join attribute for stream B // if ( param = true ) // args[0] : stream A // args[1] : stream B // args[2] : attribute name of join attribute for stream A // args[3] : attribute name join attribute for stream B // args[4] : usable main memory in bytes (only if param is true) // args[5] : attribute index of join attribute for stream A // args[6] : attribute index of join attribute for stream B typedef LocalInfo LocalType; LocalType* li = static_cast( local.addr ); switch(message) { case OPEN: { if ( li ) { delete li; } li = new LocalType(); local.addr = li; qp->Open(args[0].addr); qp->Open(args[1].addr); li->ptr = 0; return 0; } case REQUEST: { if ( li->ptr == 0 ) //first request; //constructor put here to avoid delays in OPEN //which are a problem for progress estimation { if ( param ) { size_t maxMemSize = (size_t)StdTypes::GetInt( args[4] ); int attrIndexA = StdTypes::GetInt( args[5] ); int attrIndexB = StdTypes::GetInt( args[6] ); li->ptr = new SortMergeJoinLocalInfo( args[0], attrIndexA, args[1], attrIndexB, s, li, maxMemSize ); } else { int attrIndexA = StdTypes::GetInt( args[4] ); int attrIndexB = StdTypes::GetInt( args[5] ); li->ptr = new SortMergeJoinLocalInfo( args[0], attrIndexA, args[1], attrIndexB, s, li ); } } SortMergeJoinLocalInfo* mli = li->ptr; result.addr = mli->NextResultTuple(); li->returned++; return result.addr != 0 ? YIELD : CANCEL; } case CLOSE: { qp->Close(args[0].addr); qp->Close(args[1].addr); if (li) { delete li; local.addr = 0; } return 0; } } return 0; } /* 6 Instantiation of Template Functions For some reasons the compiler cannot expand these template functions in the file ~ExtRelation2Algebra.cpp~, thus the value mapping functions are instantiated here. */ template int SortMergeJoinValueMap( Word* args, Word& result, int message, Word& local, Supplier s ); template int SortMergeJoinValueMap( Word* args, Word& result, int message, Word& local, Supplier s); } // end of namespace extrel2