/* ---- This file is part of SECONDO. Copyright (C) 2004-2008, University in Hagen, Faculty of Mathematics and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}] //paragraph [10] Footnote: [{\footnote{] [}}] //[TOC] [\tableofcontents] Dec 2008. M. Spiekermann: Integration of code implemented formerly in the ExtRelation-Algebra. [1] Implementation of special variants for sort merge joins [TOC] 0 Overview This file contains the implementation of algorithms for external sorting, merging wich integrate sample techniques to guarantee that the output will also start with a random prefix. 1 Includes and defines */ #include #include #include #include #include "LogMsg.h" #include "StandardTypes.h" #include "Algebras/Relation-C++/RelationAlgebra.h" #include "CPUTimeMeasurer.h" #include "QueryProcessor.h" #include "SecondoInterface.h" #include "StopWatch.h" #include "Counter.h" #include "Progress.h" #include "RTuple.h" #include "Algebras/ExtRelation-C++/Hashtable.h" #include "StreamIterator.h" #include "Algebras/ExtRelation-C++/Tupleorder.h" extern NestedList* nl; extern QueryProcessor* qp; using namespace std; /* 2 Operators 2.1 class ~SortByLocalInfo2~ This class contains some big changes compared to the basic implementation, thus for the sake of simplicity and programming time efficiency we did not try to rearrange the code in such a way that we can keep most of them in a base class. As a result we accept to have many redundant code lines !!! */ class SortByLocalInfo2 : protected ProgressWrapper { public: SortByLocalInfo2( Word stream, const bool lexicographic, void *tupleCmp, ProgressLocalInfo* p ) : ProgressWrapper(p), stream( stream ), currentIndex( 0 ), lexiTupleCmp( lexicographic ? (LexicographicalTupleSmaller*)tupleCmp : 0 ), tupleCmpBy( lexicographic ? 0 : (TupleCompareBy*)tupleCmp ), lexicographic( lexicographic), pq( UniversalCompare() ) {} public: void PrepareResultIteration(bool mkRndSubset = false) { InitRuns(); if (!mkRndSubset) { CreateRuns(); } else { MakeRndSubset(); } FinishRuns(); InitMerge(); } void InitRuns() { currentRun = &queue[0]; nextRun = &queue[1]; c = 0, i = 0, a = 0, n = 0, m = 0, r = 0; // counter variables newRelation = true; MAX_MEMORY = qp->FixedMemory(); cmsg.info("ERA:ShowMemInfo") << "Sortby.MAX_MEMORY (" << MAX_MEMORY/1024 << " kb)" << endl; cmsg.send(); lastTuple.setTuple(0); minTuple.setTuple(0); rel=0; } void CreateRuns() { StreamIterator is(stream); while( is.valid() ) // consume the stream completely { Tuple* tuple = *is; AppendTuple(tuple); ++is; tuple->DeleteIfAllowed(); } } inline StreamIterator GetIterator() { return StreamIterator(stream); } void MakeRndSubset() { // Before a tuple will be passed to the sorting algorithm // it may be chosen as member of a random subset. StreamIterator is(stream); while( is.valid() ) // consume the stream completely { // choose random tuples size_t i = 0; bool replaced = false; Tuple* s = rtBuf.ReplacedByRandom(*is, i, replaced); if ( replaced ) { // s was replaced by *is if (s != 0) { MAX_MEMORY -= (*is)->GetSize(); MAX_MEMORY += s->GetSize(); AppendTuple(s); s->DeleteIfAllowed(); } } else { assert(s == 0); // s == 0, and *is was not stored in buffer AppendTuple(*is); (*is)->DeleteIfAllowed(); } ++is; } } HashTable* CreateHashTable(int buckets, int i, int j) { // create a hash table for the tuples stored in // rtBuf and return a pointer to it. ht = new HashTable(buckets, CmpTuples(i,j)); RandomTBuf::iterator tuple = rtBuf.begin(); for( ; tuple != rtBuf.end(); tuple++) { if (*tuple) { ht->add( *tuple, ((*tuple)->HashValue(i) ) ); } } return ht; } HashTable* GetHashTable() const { return ht; } void FinishRuns() { ShowPartitionInfo(c,a,n,m,r,rel); Counter::getRef("Sortby:ExternPartitions") = relations.size(); // copy the lastRun and NextRun runs into tuple buffers // which stay in memory. CopyQueue2Vector(0); CopyQueue2Vector(1); } inline void AppendTuple(Tuple* t) { progress->read++; c++; // tuple counter; TupleAndRelPos nextTuple(t, tupleCmpBy); if( MAX_MEMORY > (size_t)t->GetSize() ) { currentRun->push(nextTuple); i++; // increment Tuples in memory counter MAX_MEMORY -= t->GetSize(); } else { // memory is completely used progress->state = 1; if ( newRelation ) { // create new relation r++; rel = new TupleBuffer( 0 ); GenericRelationIterator *iter = 0; relations.push_back( std::make_pair( rel, iter ) ); newRelation = false; // get first tuple and store it in an relation currentRun->push(nextTuple); minTuple = currentRun->topTuple(); rel->AppendTuple( minTuple.tuple ); minTuple.tuple->DeleteIfAllowed(); lastTuple = minTuple; currentRun->pop(); } else { // check if nextTuple can be saved in current relation if ( nextTuple < TupleAndRelPos(lastTuple.tuple, tupleCmpBy) ) { // nextTuple is in order // Push the next tuple int the heap and append the minimum to // the current relation and push currentRun->push(nextTuple); minTuple = currentRun->topTuple(); rel->AppendTuple( minTuple.tuple ); minTuple.tuple->DeleteIfAllowed(); lastTuple = minTuple; currentRun->pop(); m++; } else { // nextTuple is smaller, save it for the next relation nextRun->push(nextTuple); n++; if ( !currentRun->empty() ) { // Append the minimum to the current relation minTuple.setTuple(currentRun->top().tuple()); rel->AppendTuple( minTuple.tuple ); minTuple.tuple->DeleteIfAllowed(); lastTuple = minTuple; currentRun->pop(); } else { //create a new run newRelation = true; // swap queues Heap* helpRun = currentRun; currentRun = nextRun; nextRun = helpRun; ShowPartitionInfo(c,a,n,m,r,rel); i=n; a=0; n=0; m=0; } // end new run } // end next tuple is smaller } // end of check if nextTuple can be saved in current relation }// end of memory is completely used } /* It may happen, that the localinfo object will be destroyed before all internal buffered tuples are delivered stream upwards, e.g. queries which use a ~head~ operator. In this case we need to delete also all tuples stored in memory. */ ~SortByLocalInfo2() { while( !mergeTuples.empty() ) { mergeTuples.topTuple()->DeleteIfAllowed(); mergeTuples.pop(); } for( int i = 0; i < 2; i++ ) { while( !queue[i].empty() ) { queue[i].topTuple()->DeleteIfAllowed(); queue[i].pop(); } } // delete information about sorted runs for( size_t i = 0; i < relations.size(); i++ ) { delete relations[i].second; relations[i].second = 0; //relations[i].first->Clear(); delete relations[i].first; relations[i].first = 0; } delete lexiTupleCmp; lexiTupleCmp = 0; delete tupleCmpBy; tupleCmpBy = 0; } Tuple* NextResultTuple() { if( mergeTuples.empty() ) // stream finished return 0; else { // Take the first element out of the merge heap TupleAndRelPos p = mergeTuples.top(); Tuple* result = p.tuple(); mergeTuples.pop(); // push next tuple into the merge heap Tuple* t = relations[p.pos].second->GetNextTuple(); if( t != 0 ) { // run not finished mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, p.pos) ); } return result; } } void InitMerge() { for( size_t i = 0; i < relations.size(); i++ ) { if ( relations[i].second != 0 ) { delete relations[i].second; } relations[i].second = relations[i].first->MakeScan(); // Get next tuple from each relation and push it into the heap. Tuple *t = relations[i].second->GetNextTuple(); if( t != 0 ) { mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, i) ); } } } protected: void ShowPartitionInfo( int c, int a, int n, int m, int r, GenericRelation* rel ) { int rs = (rel != 0) ? rel->GetNoTuples() : 0; if ( RTFlag::isActive("ERA:Sort:PartitionInfo") ) { cmsg.info() << "Current run finished: " << " processed tuples=" << c << ", append minimum=" << m << ", append next=" << n << endl << " materialized runs=" << r << ", last partition's tuples=" << rs << endl << " Runs in memory: queue1= " << queue[0].size() << ", queue2= " << queue[1].size() << endl; cmsg.send(); } } void CopyQueue2Vector(int i) { assert( i == 0 || i == 1 ); TupleBuffer* tbuf = new TupleBuffer(); GenericRelationIterator *iter = 0; relations.push_back( std::make_pair( tbuf, iter ) ); while( !queue[i].empty() ) { Tuple* t = queue[i].top().tuple(); tbuf->AppendTuple(t); queue[i].pop(); } } Word stream; size_t currentIndex; // tuple information LexicographicalTupleSmaller *lexiTupleCmp; TupleCompareBy *tupleCmpBy; bool lexicographic; // sorted runs created by in memory heap filtering size_t MAX_MEMORY; typedef std::pair SortedRun; vector< SortedRun > relations; typedef TupleQueue Heap; Heap queue[2]; Heap mergeTuples; // Alternate queue type which can be constructed with a user specific // comparison function. Currently, this is only experimental code, the // member pq is just instantiated not used. priority_queue< TupleAndRelPos, vector, UniversalCompare > pq; private: Heap* currentRun; Heap* nextRun; TupleBuffer* rel; size_t c, i, a, n, m, r; // counter variables bool newRelation; RTuple lastTuple; RTuple minTuple; RandomTBuf rtBuf; HashTable* ht; }; /* 2.1.1 Value mapping function of operator ~sortby~ The argument vector ~args~ contains in the first slot ~args[0]~ the stream and in ~args[2]~ the number of sort attributes. ~args[3]~ contains the index of the first sort attribute, ~args[4]~ a boolean indicating wether the stream should be sorted in ascending order with regard to the sort first attribute. ~args[5]~ and ~args[6]~ contain these values for the second sort attribute and so on. */ template int SortBy(Word* args, Word& result, int message, Word& local, Supplier s) { // args[0] : stream // args[1] : ignored // args[2] : the number of sort attributes // args[3] : the index of the first sort attribute // args[4] : a boolean which indicates if sortorder should // be asc or desc. // args[5] : Same as 3 but for the second sort attribute // args[6] : Same as 4 // .... // LocalInfo* li; li = static_cast*>( local.addr ); switch(message) { case OPEN: { if ( li ) delete li; li = new LocalInfo(); local.addr = li; // at this point the local value is well defined // afterwards progress request calls are // allowed. li->ptr = 0; qp->Open(args[0].addr); return 0; } case REQUEST: { if ( li->ptr == 0 ) { void *tupleCmp = CompareObject(lexicographically, args).getPtr(); //Sorting is done in the following constructor. It was moved from //OPEN to REQUEST to avoid long delays in the OPEN method, which are //a problem for progress estimation li->ptr = new SortByLocalInfo2( args[0], lexicographically, tupleCmp, li ); li->ptr->PrepareResultIteration(); } SortByLocalInfo2* sli = li->ptr; result.setAddr( sli->NextResultTuple() ); li->returned++; return result.addr != 0 ? YIELD : CANCEL; } case CLOSE: qp->Close(args[0].addr); return 0; case CLOSEPROGRESS: if ( li ) { delete li; local.addr = 0; } return 0; case REQUESTPROGRESS: ProgressInfo p1; ProgressInfo *pRes; const double uSortBy = 0.000396; //millisecs per byte input and sort const double vSortBy = 0.000194; //millisecs per byte output const double oSortBy = 0.00004; //offset due to writing to disk //not yet measurable pRes = (ProgressInfo*) result.addr; if( !li ) return CANCEL; else { if (qp->RequestProgress(args[0].addr, &p1)) { pRes->Card = li->returned == 0 ? p1.Card : li->read; pRes->CopySizes(p1); pRes->Time = //li->state = 0 or 1 p1.Time + pRes->Card * p1.Size * (uSortBy + oSortBy * li->state) + pRes->Card * p1.Size * vSortBy; pRes->Progress = (p1.Progress * p1.Time + li->read * p1.Size * (uSortBy + oSortBy * li->state) + li->returned * p1.Size * vSortBy) / pRes->Time; pRes->BTime = p1.Time + pRes->Card * p1.Size * (uSortBy + oSortBy * li->state); pRes->BProgress = (p1.Progress * p1.Time + li->read * p1.Size * (uSortBy + oSortBy * li->state)) / pRes->BTime; return YIELD; } else return CANCEL; } } return 0; } /* 2.2 Operator ~mergejoin~ This operator computes the equijoin of two streams. It uses a text book algorithm as outlined in A. Silberschatz, H. F. Korth, S. Sudarshan, McGraw-Hill, 3rd. Edition, 1997. 2.2.1 Auxiliary definitions for value mapping function of operator ~mergejoin~ */ class MergeJoinLocalInfo2: protected ProgressWrapper { protected: // buffer limits size_t MAX_MEMORY; size_t MAX_TUPLES_IN_MEMORY; // buffer related members and iterators TupleBuffer *grpB; GenericRelationIterator *iter; // members needed for sorting the input streams typedef LocalInfo LocalSRT; LocalSRT* liA; SortByLocalInfo2* sliA; LocalSRT* liB; SortByLocalInfo2* sliB; Word streamA; Word streamB; // the current pair of tuples Word resultA; Word resultB; RTuple ptA; RTuple ptB; RTuple tmpB; // the last comparison result int cmp; // the indexes of the attributes which will // be merged and the result type int attrIndexA; int attrIndexB; TupleType *resultTupleType; // a flag which indicates if sorting is needed bool expectSorted; // Members needed for the random subset option bool randomPrefix; bool continueHashjoin; bool continueProbe; bool earlyExit; StreamIterator iterB; HashTable* ht; // switch trace messages on/off const bool traceFlag; // a flag needed in function NextTuple which tells // if the merge with grpB has been finished bool continueMerge; template int CompareTuples(Tuple* t1, Tuple* t2) { Attribute* a = 0; if (BOTH_B) { a = static_cast( t1->GetAttribute(attrIndexB) ); } else { a = static_cast( t1->GetAttribute(attrIndexA) ); } Attribute* b = static_cast( t2->GetAttribute(attrIndexB) ); /* tuples with NULL-Values in the join attributes are never matched with other tuples. */ if( !a->IsDefined() ) { return -1; } if( !b->IsDefined() ) { return 1; } int cmp = a->Compare(b); if (traceFlag) { cmsg.info() << "CompareTuples:" << endl << " BOTH_B = " << BOTH_B << endl << " tuple_1 = " << *t1 << endl << " tuple_2 = " << *t2 << endl << " cmp(t1,t2) = " << cmp << endl; cmsg.send(); } return cmp; } inline int CompareTuplesB(Tuple* t1, Tuple* t2) { return CompareTuples(t1, t2); } inline int CompareTuples(Tuple* t1, Tuple* t2) { return CompareTuples(t1, t2); } inline Tuple* NextTuple(Word stream, SortByLocalInfo2* sli) { bool yield = false; Word result( Address(0) ); if(!expectSorted) { return sli->NextResultTuple(); } qp->Request(stream.addr, result); yield = qp->Received(stream.addr); if(yield) { return static_cast( result.addr ); } else { result.addr = 0; return static_cast( result.addr ); } } inline Tuple* NextTupleA() { progress->readFirst++; return NextTuple(streamA, sliA); } inline Tuple* NextTupleB() { progress->readSecond++; return NextTuple(streamB, sliB); } SortByLocalInfo2* SortInput( const Word& stream, int attrIndex, LocalSRT*& li) { // sort the input streams SortOrderSpecification spec; spec.push_back( std::pair(attrIndex + 1, true) ); void* tupleCmp = new TupleCompareBy( spec ); li = new LocalSRT(); return new SortByLocalInfo2( stream, false, tupleCmp, li); } void SortInputs() { // sort the input streams SortOrderSpecification specA; SortOrderSpecification specB; specA.push_back( std::pair(attrIndexA + 1, true) ); specB.push_back( std::pair(attrIndexB + 1, true) ); void* tupleCmpA = new TupleCompareBy( specA ); void* tupleCmpB = new TupleCompareBy( specB ); liA = new LocalInfo(); progress->firstLocalInfo = liA; sliA = new SortByLocalInfo2( streamA, false, tupleCmpA, liA ); liB = new LocalInfo(); progress->secondLocalInfo = liB; sliB = new SortByLocalInfo2( streamB, false, tupleCmpB, liB ); } public: MergeJoinLocalInfo2( Word _streamA, Word wAttrIndexA, Word _streamB, Word wAttrIndexB, bool _expectSorted, Supplier s, ProgressLocalInfo* p, bool _randomPrefix = false, bool _earlyExit = false ) : ProgressWrapper(p), traceFlag( RTFlag::isActive("PSA:TraceMergeJoin") ) { expectSorted = _expectSorted; randomPrefix = _randomPrefix; earlyExit = _earlyExit; streamA = _streamA; streamB = _streamB; attrIndexA = StdTypes::GetInt( wAttrIndexA ) - 1; attrIndexB = StdTypes::GetInt( wAttrIndexB ) - 1; ListExpr resultType = SecondoSystem::GetCatalog()->NumericType( qp->GetType( s ) ); resultTupleType = new TupleType( nl->Second( resultType ) ); MAX_MEMORY = qp->FixedMemory(); cmsg.info("ERA:ShowMemInfo") << "MergeJoin.MAX_MEMORY (" << MAX_MEMORY/1024 << " kb)" << endl; cmsg.send(); liA = 0; sliA = 0; liB = 0; grpB = 0; sliB = 0; ht = 0; continueHashjoin = false; continueProbe = false; if ( randomPrefix ) { sliA = SortInput(streamA, attrIndexA, liA); sliA->PrepareResultIteration(true); progress->firstLocalInfo = liA; // Now a random subset S1 of 500 tuples is stored in a hash table. // Next the tuples of streamB will be joined with S1 and passed to // the Sorting-Algorithm for B. Finally, the sorted streams are merged. sliB = SortInput(streamB, attrIndexB, liB); if (traceFlag) cerr << "Input B sorted" << endl; progress->secondLocalInfo = liB; iterB = sliB->GetIterator(); // prime numbers: 503, 701, 1009, 2003 ht = sliA->CreateHashTable(701, attrIndexA, attrIndexB); if (traceFlag) cerr << "HashTable created" << endl; continueHashjoin = true; sliB->InitRuns(); if (traceFlag) cerr << "Input B initialized" << endl; } else { if( !expectSorted ) { sliA = SortInput(streamA, attrIndexA, liA); sliA->PrepareResultIteration(); progress->firstLocalInfo = liA; sliB = SortInput(streamB, attrIndexB, liB); sliB->PrepareResultIteration(); progress->secondLocalInfo = liB; } InitIteration(); } } ~MergeJoinLocalInfo2() { //cerr << "calling ~MergeJoinLocalInfo2()" << endl; if( !expectSorted ) { // delete the objects instantiated for sorting delete sliA; delete sliB; delete liA; delete liB; } delete grpB; resultTupleType->DeleteIfAllowed(); } inline Tuple* NextResultTuple() { Tuple* resultTuple = 0; while ( continueHashjoin ) { // probe hash buckets if ( !continueProbe ) // initialize hash bucket iteration { if (traceFlag) cerr << "Initialize hash bucket iteration" << endl; if ( iterB.valid() ) { (*iterB)->IncReference(); ht->initProbe( (*iterB)->HashValue(attrIndexB) ); continueProbe = true; sliB->AppendTuple(*iterB); } else // end of stream B and end of hashjoin { if (traceFlag) cerr << "End of stream B" << endl; continueHashjoin = false; continueProbe = false; sliB->FinishRuns(); sliB->InitMerge(); InitIteration(); } } if ( continueProbe ) { Tuple* b = *iterB; Tuple* a = ht->probe(b); if (a != 0) { // concat a and b Tuple* result = new Tuple( resultTupleType ); Concat( a, b, result ); return result; } else // switch to next tuple of B { //cout << "b:refs =" << b->GetNumOfRefs() << endl; b->DeleteIfAllowed(); ++iterB; continueProbe = false; } } } if (earlyExit) { if (traceFlag) cerr << "Early exit" << endl; return 0; } if ( !continueMerge && ptB == 0) return 0; while( ptA != 0 ) { if (!continueMerge && ptB != 0) { //save ptB in tmpB tmpB = ptB; grpB->AppendTuple(tmpB.tuple); // advance the tuple pointer ptB.setTuple( NextTupleB() ); // collect a group of tuples from B which // have the same attribute value bool done = false; while ( !done && ptB != 0 ) { int cmp = CompareTuplesB( tmpB.tuple, ptB.tuple ); if ( cmp == 0) { // append equal tuples to group grpB->AppendTuple(ptB.tuple); // release tuple of input B ptB.setTuple( NextTupleB() ); } else { done = true; } } // end collect group cmp = CompareTuples( ptA.tuple, tmpB.tuple ); while ( ptA != 0 && cmp < 0 ) { // skip tuples from A while they are smaller than the // value of the tuples in grpB ptA.setTuple( NextTupleA() ); if (ptA != 0) { cmp = CompareTuples( ptA.tuple, tmpB.tuple ); } } } // continue or start a merge with grpB while ( ptA != 0 && cmp == 0 ) { // join ptA with grpB if (!continueMerge) { iter = grpB->MakeScan(); continueMerge = true; resultTuple = NextConcat(); if (resultTuple) return resultTuple; } else { // continue merging, create the next result tuple resultTuple = NextConcat(); if (resultTuple) { return resultTuple; } else { // Iteration over the group finished. // Continue with the next tuple of argument A continueMerge = false; delete iter; iter = 0; ptA.setTuple( NextTupleA() ); if (ptA != 0) { cmp = CompareTuples( ptA.tuple, tmpB.tuple ); } } } } grpB->Clear(); // tpA > tmpB if ( ptB == 0 ) { // short exit return 0; } } // end of main loop return 0; } inline Tuple* NextConcat() { Tuple* t = iter->GetNextTuple(); if( t != 0 ) { Tuple* result = new Tuple( resultTupleType ); Concat( ptA.tuple, t, result ); t->DeleteIfAllowed(); return result; } return 0; } void InitIteration() { // read in the first tuple of both input streams Tuple* tA = NextTupleA(); ptA = RTuple( tA ); if(tA) tA->DeleteIfAllowed(); Tuple* tB = NextTupleB(); ptB = RTuple( tB ); if(tB) tB->DeleteIfAllowed(); // initialize the status for the result // set iteration tmpB = 0; cmp = 0; continueMerge = false; if (grpB != 0) delete grpB; grpB = new TupleBuffer( MAX_MEMORY ); } }; /* 2.2.2 MergeJoinLocalInfoSHF A variant of a sortmergejoin which produces an output stream which starts with a random sample of 500 tuples. */ class MergeJoinLocalInfoSHF : protected MergeJoinLocalInfo2 { public: MergeJoinLocalInfoSHF( Word _streamA, Word wAttrIndexA, Word _streamB, Word wAttrIndexB, bool _expectSorted, Supplier s, ProgressLocalInfo* p, bool rnd = false, bool earlyexit = false ) : MergeJoinLocalInfo2( _streamA, wAttrIndexA, _streamB, wAttrIndexB, _expectSorted, s, p, rnd, earlyexit ), streamPos(0), positions(500,0), memBufIter(0), memBufFinished(false), firstScanFinished(false), trace(true) {} ~MergeJoinLocalInfoSHF() { cerr << "calling ~MergeJoinLocalInfoSHF()" << endl; } inline Tuple* NextResultTuple() { Tuple* res = 0; if (!firstScanFinished) { res = MergeJoinLocalInfo2::NextResultTuple(); while (res != 0) { // decide if tuple replaces one of the buffer streamPos++; size_t i = 0; bool replaced = false; Tuple* v = rtBuf.ReplacedByRandom(res, i, replaced); if ( replaced ) { positions[i] = streamPos; // v was replaced by res if (v != 0) { //persBuf.AppendTuple(v); v->DeleteIfAllowed(); } } else { assert(v == 0); // v == 0, and t was not stored in buffer res->DeleteIfAllowed(); } res = MergeJoinLocalInfo2::NextResultTuple(); } if (trace) cerr << "copy2TupleBuf" << endl; rtBuf.copy2TupleBuf( memBuf ); // reset scan firstScanFinished = true; sliA->InitMerge(); sliB->InitMerge(); InitIteration(); sort(positions.begin(), positions.end()); posIter = positions.begin(); memBufIter = memBuf.MakeScan(); streamPos = 0; if (trace) cerr << "Start 2nd run" << endl; } if (firstScanFinished) { if (!memBufFinished) { res = memBufIter->GetNextTuple(); if (res == 0) { if (trace) { cerr << endl; cerr << "streamPos: " << streamPos << endl; cerr << "memBuf : " << memBuf.GetNoTuples() << endl; } memBufFinished = true; delete memBufIter; memBufIter = 0; } } if ( memBufFinished == true) { res = MergeJoinLocalInfo2::NextResultTuple(); streamPos++; while (streamPos == *posIter) { res->DeleteIfAllowed(); res = MergeJoinLocalInfo2::NextResultTuple(); streamPos++; posIter++; } } } return res; } private: size_t streamPos; TupleBuffer memBuf; vector positions; vector::const_iterator posIter; RandomTBuf rtBuf; GenericRelationIterator* memBufIter; bool memBufFinished; bool firstScanFinished; const bool trace; }; /* 2.2.3 Value mapping function of operator ~mergejoin~ */ //CPUTimeMeasurer mergeMeasurer; template int MergeJoin(Word* args, Word& result, int message, Word& local, Supplier s) { typedef LocalInfo LocalType; LocalType* li = static_cast( local.addr ); switch(message) { case OPEN: if ( li ) { delete li->ptr; delete li; } li = new LocalType(); local.addr = li; qp->Open(args[0].addr); qp->Open(args[1].addr); li->ptr = 0; return 0; case REQUEST: { //mergeMeasurer.Enter(); if ( li->ptr == 0 ) //first request; //constructor put here to avoid delays in OPEN //which are a problem for progress estimation { li->ptr = new T( args[0], args[4], args[1], args[5], SRT, s, li, RND, R3 ); } T* mli = li->ptr; result.addr = mli->NextResultTuple(); li->returned++; //mergeMeasurer.Exit(); return result.addr != 0 ? YIELD : CANCEL; } case CLOSE: //mergeMeasurer.PrintCPUTimeAndReset("CPU Time for Merging Tuples : "); qp->Close(args[0].addr); qp->Close(args[1].addr); //nothing is deleted on close because the substructures are still //needed for progress estimation. Instea/* //(repeated) OPEN and on CLOSEPROGRESS return 0; case CLOSEPROGRESS: if ( li ) { delete li; local.addr = 0; } return 0; case REQUESTPROGRESS: { ProgressInfo p1, p2; ProgressInfo* pRes = static_cast( result.addr ); const double uMergeJoin = 0.041; //millisecs per tuple merge (merge) const double vMergeJoin = 0.000076; //millisecs per byte merge (sortmerge) const double uSortBy = 0.00043; //millisecs per byte sort if( !li ) { return CANCEL; } else { if (qp->RequestProgress(args[0].addr, &p1) && qp->RequestProgress(args[1].addr, &p2)) { li->SetJoinSizes(p1, p2); pRes->CopySizes(li); if ( SRT ) // already sorted inputes { pRes->Time = p1.Time + p2.Time + (p1.Card + p2.Card) * uMergeJoin; pRes->Progress = (p1.Progress * p1.Time + p2.Progress * p2.Time + (((double) li->readFirst) + ((double) li->readSecond)) * uMergeJoin) / pRes->Time; pRes->CopyBlocking(p1, p2); //non-blocking in this case } else // unsorted inputs { pRes->Time = p1.Time + p2.Time + p1.Card * p1.Size * uSortBy + p2.Card * p2.Size * uSortBy + (p1.Card * p1.Size + p2.Card * p2.Size) * vMergeJoin; typedef LocalInfo LocalSRT; LocalSRT* liFirst = 0; LocalSRT* liSecond = 0; liFirst = static_cast( li->firstLocalInfo ); liSecond = static_cast( li->secondLocalInfo ); long readFirst = (liFirst ? liFirst->read : 0); long readSecond = (liSecond ? liSecond->read : 0); pRes->Progress = (p1.Progress * p1.Time + p2.Progress * p2.Time + ((double) readFirst) * p1.Size * uSortBy + ((double) readSecond) * p2.Size * uSortBy + (((double) li->readFirst) * p1.Size + ((double) li->readSecond) * p2.Size) * vMergeJoin) / pRes->Time; pRes->BTime = p1.Time + p2.Time + p1.Card * p1.Size * uSortBy + p2.Card * p2.Size * uSortBy; pRes->BProgress = (p1.Progress * p1.Time + p2.Progress * p2.Time + ((double) readFirst) * p1.Size * uSortBy + ((double) readSecond) * p2.Size * uSortBy) / pRes->BTime; } if (li->returned > enoughSuccessesJoin ) // stable state { pRes->Card = ((double) li->returned * (p1.Card + p2.Card) / ((double) li->readFirst + (double) li->readSecond)); } else { pRes->Card = p1.Card * p2.Card * qp->GetSelectivity(s); } return YIELD; } else return CANCEL; } } } return 0; } /* 3 Instantiation of Template Functions */ // sortmergejoin_r template int MergeJoin( Word* args, Word& result, int message, Word& local, Supplier s ); int sortmergejoinr_vm( Word* args, Word& result, int message, Word& local, Supplier s ) { return MergeJoin(args, result, message, local, s); } int sortmergejoinr2_vm( Word* args, Word& result, int message, Word& local, Supplier s ) { return MergeJoin(args, result, message, local, s); } int sortmergejoinr3_vm( Word* args, Word& result, int message, Word& local, Supplier s ) { return MergeJoin(args, result, message, local, s); }