Files
secondo/Algebras/PartitionedStream/SortMerge.cpp
2026-01-23 17:03:45 +08:00

1466 lines
36 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2004-2008, University in Hagen, Faculty of Mathematics and
Computer Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//paragraph [10] Footnote: [{\footnote{] [}}]
//[TOC] [\tableofcontents]
Dec 2008. M. Spiekermann: Integration of code implemented formerly in the
ExtRelation-Algebra.
[1] Implementation of special variants for sort merge joins
[TOC]
0 Overview
This file contains the implementation of algorithms for external sorting,
merging wich integrate sample techniques to guarantee that the output will
also start with a random prefix.
1 Includes and defines
*/
#include <vector>
#include <list>
#include <set>
#include <queue>
#include "LogMsg.h"
#include "StandardTypes.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "CPUTimeMeasurer.h"
#include "QueryProcessor.h"
#include "SecondoInterface.h"
#include "StopWatch.h"
#include "Counter.h"
#include "Progress.h"
#include "RTuple.h"
#include "Algebras/ExtRelation-C++/Hashtable.h"
#include "StreamIterator.h"
#include "Algebras/ExtRelation-C++/Tupleorder.h"
extern NestedList* nl;
extern QueryProcessor* qp;
using namespace std;
/*
2 Operators
2.1 class ~SortByLocalInfo2~
This class contains some big changes compared to the basic implementation, thus for the
sake of simplicity and programming time efficiency we did not try to rearrange the code
in such a way that we can keep most of them in a base class.
As a result we accept to have many redundant code lines !!!
*/
class SortByLocalInfo2 : protected ProgressWrapper
{
public:
SortByLocalInfo2( Word stream, const bool lexicographic,
void *tupleCmp, ProgressLocalInfo* p ) :
ProgressWrapper(p),
stream( stream ),
currentIndex( 0 ),
lexiTupleCmp( lexicographic ?
(LexicographicalTupleSmaller*)tupleCmp :
0 ),
tupleCmpBy( lexicographic ? 0 : (TupleCompareBy*)tupleCmp ),
lexicographic( lexicographic),
pq( UniversalCompare<LexicographicalTupleSmaller>() )
{}
public:
void PrepareResultIteration(bool mkRndSubset = false)
{
InitRuns();
if (!mkRndSubset)
{
CreateRuns();
}
else
{
MakeRndSubset();
}
FinishRuns();
InitMerge();
}
void InitRuns()
{
currentRun = &queue[0];
nextRun = &queue[1];
c = 0, i = 0, a = 0, n = 0, m = 0, r = 0; // counter variables
newRelation = true;
MAX_MEMORY = qp->FixedMemory();
cmsg.info("ERA:ShowMemInfo")
<< "Sortby.MAX_MEMORY (" << MAX_MEMORY/1024 << " kb)" << endl;
cmsg.send();
lastTuple.setTuple(0);
minTuple.setTuple(0);
rel=0;
}
void CreateRuns()
{
StreamIterator<Tuple> is(stream);
while( is.valid() ) // consume the stream completely
{
Tuple* tuple = *is;
AppendTuple(tuple);
++is;
tuple->DeleteIfAllowed();
}
}
inline StreamIterator<Tuple> GetIterator()
{
return StreamIterator<Tuple>(stream);
}
void MakeRndSubset()
{
// Before a tuple will be passed to the sorting algorithm
// it may be chosen as member of a random subset.
StreamIterator<Tuple> is(stream);
while( is.valid() ) // consume the stream completely
{
// choose random tuples
size_t i = 0;
bool replaced = false;
Tuple* s = rtBuf.ReplacedByRandom(*is, i, replaced);
if ( replaced )
{
// s was replaced by *is
if (s != 0) {
MAX_MEMORY -= (*is)->GetSize();
MAX_MEMORY += s->GetSize();
AppendTuple(s);
s->DeleteIfAllowed();
}
}
else
{
assert(s == 0);
// s == 0, and *is was not stored in buffer
AppendTuple(*is);
(*is)->DeleteIfAllowed();
}
++is;
}
}
HashTable* CreateHashTable(int buckets, int i, int j)
{
// create a hash table for the tuples stored in
// rtBuf and return a pointer to it.
ht = new HashTable(buckets, CmpTuples(i,j));
RandomTBuf::iterator tuple = rtBuf.begin();
for( ; tuple != rtBuf.end(); tuple++) {
if (*tuple) {
ht->add( *tuple, ((*tuple)->HashValue(i) ) );
}
}
return ht;
}
HashTable* GetHashTable() const { return ht; }
void FinishRuns()
{
ShowPartitionInfo(c,a,n,m,r,rel);
Counter::getRef("Sortby:ExternPartitions") = relations.size();
// copy the lastRun and NextRun runs into tuple buffers
// which stay in memory.
CopyQueue2Vector(0);
CopyQueue2Vector(1);
}
inline void AppendTuple(Tuple* t)
{
progress->read++;
c++; // tuple counter;
TupleAndRelPos nextTuple(t, tupleCmpBy);
if( MAX_MEMORY > (size_t)t->GetSize() )
{
currentRun->push(nextTuple);
i++; // increment Tuples in memory counter
MAX_MEMORY -= t->GetSize();
}
else
{ // memory is completely used
progress->state = 1;
if ( newRelation )
{ // create new relation
r++;
rel = new TupleBuffer( 0 );
GenericRelationIterator *iter = 0;
relations.push_back( std::make_pair( rel, iter ) );
newRelation = false;
// get first tuple and store it in an relation
currentRun->push(nextTuple);
minTuple = currentRun->topTuple();
rel->AppendTuple( minTuple.tuple );
minTuple.tuple->DeleteIfAllowed();
lastTuple = minTuple;
currentRun->pop();
}
else
{ // check if nextTuple can be saved in current relation
if ( nextTuple < TupleAndRelPos(lastTuple.tuple, tupleCmpBy) )
{ // nextTuple is in order
// Push the next tuple int the heap and append the minimum to
// the current relation and push
currentRun->push(nextTuple);
minTuple = currentRun->topTuple();
rel->AppendTuple( minTuple.tuple );
minTuple.tuple->DeleteIfAllowed();
lastTuple = minTuple;
currentRun->pop();
m++;
}
else
{ // nextTuple is smaller, save it for the next relation
nextRun->push(nextTuple);
n++;
if ( !currentRun->empty() )
{
// Append the minimum to the current relation
minTuple.setTuple(currentRun->top().tuple());
rel->AppendTuple( minTuple.tuple );
minTuple.tuple->DeleteIfAllowed();
lastTuple = minTuple;
currentRun->pop();
}
else
{ //create a new run
newRelation = true;
// swap queues
Heap* helpRun = currentRun;
currentRun = nextRun;
nextRun = helpRun;
ShowPartitionInfo(c,a,n,m,r,rel);
i=n;
a=0;
n=0;
m=0;
} // end new run
} // end next tuple is smaller
} // end of check if nextTuple can be saved in current relation
}// end of memory is completely used
}
/*
It may happen, that the localinfo object will be destroyed
before all internal buffered tuples are delivered stream
upwards, e.g. queries which use a ~head~ operator.
In this case we need to delete also all tuples stored in memory.
*/
~SortByLocalInfo2()
{
while( !mergeTuples.empty() )
{
mergeTuples.topTuple()->DeleteIfAllowed();
mergeTuples.pop();
}
for( int i = 0; i < 2; i++ )
{
while( !queue[i].empty() )
{
queue[i].topTuple()->DeleteIfAllowed();
queue[i].pop();
}
}
// delete information about sorted runs
for( size_t i = 0; i < relations.size(); i++ )
{
delete relations[i].second;
relations[i].second = 0;
//relations[i].first->Clear();
delete relations[i].first;
relations[i].first = 0;
}
delete lexiTupleCmp;
lexiTupleCmp = 0;
delete tupleCmpBy;
tupleCmpBy = 0;
}
Tuple* NextResultTuple()
{
if( mergeTuples.empty() ) // stream finished
return 0;
else
{
// Take the first element out of the merge heap
TupleAndRelPos p = mergeTuples.top();
Tuple* result = p.tuple();
mergeTuples.pop();
// push next tuple into the merge heap
Tuple* t = relations[p.pos].second->GetNextTuple();
if( t != 0 )
{ // run not finished
mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, p.pos) );
}
return result;
}
}
void InitMerge()
{
for( size_t i = 0; i < relations.size(); i++ )
{
if ( relations[i].second != 0 ) {
delete relations[i].second;
}
relations[i].second = relations[i].first->MakeScan();
// Get next tuple from each relation and push it into the heap.
Tuple *t = relations[i].second->GetNextTuple();
if( t != 0 )
{
mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, i) );
}
}
}
protected:
void ShowPartitionInfo( int c, int a, int n,
int m, int r, GenericRelation* rel )
{
int rs = (rel != 0) ? rel->GetNoTuples() : 0;
if ( RTFlag::isActive("ERA:Sort:PartitionInfo") )
{
cmsg.info() << "Current run finished: "
<< " processed tuples=" << c
<< ", append minimum=" << m
<< ", append next=" << n << endl
<< " materialized runs=" << r
<< ", last partition's tuples=" << rs << endl
<< " Runs in memory: queue1= " << queue[0].size()
<< ", queue2= " << queue[1].size() << endl;
cmsg.send();
}
}
void CopyQueue2Vector(int i)
{
assert( i == 0 || i == 1 );
TupleBuffer* tbuf = new TupleBuffer();
GenericRelationIterator *iter = 0;
relations.push_back( std::make_pair( tbuf, iter ) );
while( !queue[i].empty() )
{
Tuple* t = queue[i].top().tuple();
tbuf->AppendTuple(t);
queue[i].pop();
}
}
Word stream;
size_t currentIndex;
// tuple information
LexicographicalTupleSmaller *lexiTupleCmp;
TupleCompareBy *tupleCmpBy;
bool lexicographic;
// sorted runs created by in memory heap filtering
size_t MAX_MEMORY;
typedef std::pair<TupleBuffer*, GenericRelationIterator*> SortedRun;
vector< SortedRun > relations;
typedef TupleQueue Heap;
Heap queue[2];
Heap mergeTuples;
// Alternate queue type which can be constructed with a user specific
// comparison function. Currently, this is only experimental code, the
// member pq is just instantiated not used.
priority_queue< TupleAndRelPos,
vector<TupleAndRelPos>,
UniversalCompare<LexicographicalTupleSmaller> > pq;
private:
Heap* currentRun;
Heap* nextRun;
TupleBuffer* rel;
size_t c, i, a, n, m, r; // counter variables
bool newRelation;
RTuple lastTuple;
RTuple minTuple;
RandomTBuf rtBuf;
HashTable* ht;
};
/*
2.1.1 Value mapping function of operator ~sortby~
The argument vector ~args~ contains in the first slot ~args[0]~ the stream and
in ~args[2]~ the number of sort attributes. ~args[3]~ contains the index of the
first sort attribute, ~args[4]~ a boolean indicating wether the stream should
be sorted in ascending order with regard to the sort first attribute. ~args[5]~
and ~args[6]~ contain these values for the second sort attribute and so on.
*/
template<bool lexicographically> int
SortBy(Word* args, Word& result, int message, Word& local, Supplier s)
{
// args[0] : stream
// args[1] : ignored
// args[2] : the number of sort attributes
// args[3] : the index of the first sort attribute
// args[4] : a boolean which indicates if sortorder should
// be asc or desc.
// args[5] : Same as 3 but for the second sort attribute
// args[6] : Same as 4
// ....
//
LocalInfo<SortByLocalInfo2>* li;
li = static_cast<LocalInfo<SortByLocalInfo2>*>( local.addr );
switch(message)
{
case OPEN:
{
if ( li ) delete li;
li = new LocalInfo<SortByLocalInfo2>();
local.addr = li;
// at this point the local value is well defined
// afterwards progress request calls are
// allowed.
li->ptr = 0;
qp->Open(args[0].addr);
return 0;
}
case REQUEST:
{
if ( li->ptr == 0 )
{
void *tupleCmp = CompareObject(lexicographically, args).getPtr();
//Sorting is done in the following constructor. It was moved from
//OPEN to REQUEST to avoid long delays in the OPEN method, which are
//a problem for progress estimation
li->ptr = new SortByLocalInfo2( args[0],
lexicographically,
tupleCmp, li );
li->ptr->PrepareResultIteration();
}
SortByLocalInfo2* sli = li->ptr;
result.setAddr( sli->NextResultTuple() );
li->returned++;
return result.addr != 0 ? YIELD : CANCEL;
}
case CLOSE:
qp->Close(args[0].addr);
return 0;
case CLOSEPROGRESS:
if ( li ) {
delete li;
local.addr = 0;
}
return 0;
case REQUESTPROGRESS:
ProgressInfo p1;
ProgressInfo *pRes;
const double uSortBy = 0.000396; //millisecs per byte input and sort
const double vSortBy = 0.000194; //millisecs per byte output
const double oSortBy = 0.00004; //offset due to writing to disk
//not yet measurable
pRes = (ProgressInfo*) result.addr;
if( !li ) return CANCEL;
else
{
if (qp->RequestProgress(args[0].addr, &p1))
{
pRes->Card = li->returned == 0 ? p1.Card : li->read;
pRes->CopySizes(p1);
pRes->Time = //li->state = 0 or 1
p1.Time
+ pRes->Card * p1.Size * (uSortBy + oSortBy * li->state)
+ pRes->Card * p1.Size * vSortBy;
pRes->Progress =
(p1.Progress * p1.Time
+ li->read * p1.Size * (uSortBy + oSortBy * li->state)
+ li->returned * p1.Size * vSortBy)
/ pRes->Time;
pRes->BTime = p1.Time + pRes->Card * p1.Size *
(uSortBy + oSortBy * li->state);
pRes->BProgress =
(p1.Progress * p1.Time
+ li->read * p1.Size * (uSortBy + oSortBy * li->state))
/ pRes->BTime;
return YIELD;
}
else return CANCEL;
}
}
return 0;
}
/*
2.2 Operator ~mergejoin~
This operator computes the equijoin of two streams. It uses a text book
algorithm as outlined in A. Silberschatz, H. F. Korth, S. Sudarshan,
McGraw-Hill, 3rd. Edition, 1997.
2.2.1 Auxiliary definitions for value mapping function of operator ~mergejoin~
*/
class MergeJoinLocalInfo2: protected ProgressWrapper
{
protected:
// buffer limits
size_t MAX_MEMORY;
size_t MAX_TUPLES_IN_MEMORY;
// buffer related members and iterators
TupleBuffer *grpB;
GenericRelationIterator *iter;
// members needed for sorting the input streams
typedef LocalInfo<SortByLocalInfo2> LocalSRT;
LocalSRT* liA;
SortByLocalInfo2* sliA;
LocalSRT* liB;
SortByLocalInfo2* sliB;
Word streamA;
Word streamB;
// the current pair of tuples
Word resultA;
Word resultB;
RTuple ptA;
RTuple ptB;
RTuple tmpB;
// the last comparison result
int cmp;
// the indexes of the attributes which will
// be merged and the result type
int attrIndexA;
int attrIndexB;
TupleType *resultTupleType;
// a flag which indicates if sorting is needed
bool expectSorted;
// Members needed for the random subset option
bool randomPrefix;
bool continueHashjoin;
bool continueProbe;
bool earlyExit;
StreamIterator<Tuple> iterB;
HashTable* ht;
// switch trace messages on/off
const bool traceFlag;
// a flag needed in function NextTuple which tells
// if the merge with grpB has been finished
bool continueMerge;
template<bool BOTH_B>
int CompareTuples(Tuple* t1, Tuple* t2)
{
Attribute* a = 0;
if (BOTH_B) {
a = static_cast<Attribute*>( t1->GetAttribute(attrIndexB) );
}
else {
a = static_cast<Attribute*>( t1->GetAttribute(attrIndexA) );
}
Attribute* b = static_cast<Attribute*>( t2->GetAttribute(attrIndexB) );
/* tuples with NULL-Values in the join attributes
are never matched with other tuples. */
if( !a->IsDefined() )
{
return -1;
}
if( !b->IsDefined() )
{
return 1;
}
int cmp = a->Compare(b);
if (traceFlag)
{
cmsg.info()
<< "CompareTuples:" << endl
<< " BOTH_B = " << BOTH_B << endl
<< " tuple_1 = " << *t1 << endl
<< " tuple_2 = " << *t2 << endl
<< " cmp(t1,t2) = " << cmp << endl;
cmsg.send();
}
return cmp;
}
inline int CompareTuplesB(Tuple* t1, Tuple* t2)
{
return CompareTuples<true>(t1, t2);
}
inline int CompareTuples(Tuple* t1, Tuple* t2)
{
return CompareTuples<false>(t1, t2);
}
inline Tuple* NextTuple(Word stream, SortByLocalInfo2* sli)
{
bool yield = false;
Word result( Address(0) );
if(!expectSorted) {
return sli->NextResultTuple();
}
qp->Request(stream.addr, result);
yield = qp->Received(stream.addr);
if(yield)
{
return static_cast<Tuple*>( result.addr );
}
else
{
result.addr = 0;
return static_cast<Tuple*>( result.addr );
}
}
inline Tuple* NextTupleA()
{
progress->readFirst++;
return NextTuple(streamA, sliA);
}
inline Tuple* NextTupleB()
{
progress->readSecond++;
return NextTuple(streamB, sliB);
}
SortByLocalInfo2* SortInput( const Word& stream, int attrIndex,
LocalSRT*& li)
{
// sort the input streams
SortOrderSpecification spec;
spec.push_back( std::pair<int, bool>(attrIndex + 1, true) );
void* tupleCmp = new TupleCompareBy( spec );
li = new LocalSRT();
return new SortByLocalInfo2( stream, false, tupleCmp, li);
}
void SortInputs()
{
// sort the input streams
SortOrderSpecification specA;
SortOrderSpecification specB;
specA.push_back( std::pair<int, bool>(attrIndexA + 1, true) );
specB.push_back( std::pair<int, bool>(attrIndexB + 1, true) );
void* tupleCmpA = new TupleCompareBy( specA );
void* tupleCmpB = new TupleCompareBy( specB );
liA = new LocalInfo<SortByLocalInfo2>();
progress->firstLocalInfo = liA;
sliA = new SortByLocalInfo2( streamA,
false,
tupleCmpA, liA );
liB = new LocalInfo<SortByLocalInfo2>();
progress->secondLocalInfo = liB;
sliB = new SortByLocalInfo2( streamB,
false,
tupleCmpB, liB );
}
public:
MergeJoinLocalInfo2( Word _streamA, Word wAttrIndexA,
Word _streamB, Word wAttrIndexB,
bool _expectSorted, Supplier s,
ProgressLocalInfo* p,
bool _randomPrefix = false,
bool _earlyExit = false )
: ProgressWrapper(p),
traceFlag( RTFlag::isActive("PSA:TraceMergeJoin") )
{
expectSorted = _expectSorted;
randomPrefix = _randomPrefix;
earlyExit = _earlyExit;
streamA = _streamA;
streamB = _streamB;
attrIndexA = StdTypes::GetInt( wAttrIndexA ) - 1;
attrIndexB = StdTypes::GetInt( wAttrIndexB ) - 1;
ListExpr resultType =
SecondoSystem::GetCatalog()->NumericType( qp->GetType( s ) );
resultTupleType = new TupleType( nl->Second( resultType ) );
MAX_MEMORY = qp->FixedMemory();
cmsg.info("ERA:ShowMemInfo")
<< "MergeJoin.MAX_MEMORY (" << MAX_MEMORY/1024 << " kb)" << endl;
cmsg.send();
liA = 0;
sliA = 0;
liB = 0;
grpB = 0;
sliB = 0;
ht = 0;
continueHashjoin = false;
continueProbe = false;
if ( randomPrefix ) {
sliA = SortInput(streamA, attrIndexA, liA);
sliA->PrepareResultIteration(true);
progress->firstLocalInfo = liA;
// Now a random subset S1 of 500 tuples is stored in a hash table.
// Next the tuples of streamB will be joined with S1 and passed to
// the Sorting-Algorithm for B. Finally, the sorted streams are merged.
sliB = SortInput(streamB, attrIndexB, liB);
if (traceFlag)
cerr << "Input B sorted" << endl;
progress->secondLocalInfo = liB;
iterB = sliB->GetIterator();
// prime numbers: 503, 701, 1009, 2003
ht = sliA->CreateHashTable(701, attrIndexA, attrIndexB);
if (traceFlag)
cerr << "HashTable created" << endl;
continueHashjoin = true;
sliB->InitRuns();
if (traceFlag)
cerr << "Input B initialized" << endl;
}
else
{
if( !expectSorted ) {
sliA = SortInput(streamA, attrIndexA, liA);
sliA->PrepareResultIteration();
progress->firstLocalInfo = liA;
sliB = SortInput(streamB, attrIndexB, liB);
sliB->PrepareResultIteration();
progress->secondLocalInfo = liB;
}
InitIteration();
}
}
~MergeJoinLocalInfo2()
{
//cerr << "calling ~MergeJoinLocalInfo2()" << endl;
if( !expectSorted )
{
// delete the objects instantiated for sorting
delete sliA;
delete sliB;
delete liA;
delete liB;
}
delete grpB;
resultTupleType->DeleteIfAllowed();
}
inline Tuple* NextResultTuple()
{
Tuple* resultTuple = 0;
while ( continueHashjoin ) { // probe hash buckets
if ( !continueProbe ) // initialize hash bucket iteration
{
if (traceFlag)
cerr << "Initialize hash bucket iteration" << endl;
if ( iterB.valid() )
{
(*iterB)->IncReference();
ht->initProbe( (*iterB)->HashValue(attrIndexB) );
continueProbe = true;
sliB->AppendTuple(*iterB);
}
else // end of stream B and end of hashjoin
{
if (traceFlag)
cerr << "End of stream B" << endl;
continueHashjoin = false;
continueProbe = false;
sliB->FinishRuns();
sliB->InitMerge();
InitIteration();
}
}
if ( continueProbe )
{
Tuple* b = *iterB;
Tuple* a = ht->probe(b);
if (a != 0) { // concat a and b
Tuple* result = new Tuple( resultTupleType );
Concat( a, b, result );
return result;
}
else // switch to next tuple of B
{
//cout << "b:refs =" << b->GetNumOfRefs() << endl;
b->DeleteIfAllowed();
++iterB;
continueProbe = false;
}
}
}
if (earlyExit) {
if (traceFlag)
cerr << "Early exit" << endl;
return 0;
}
if ( !continueMerge && ptB == 0)
return 0;
while( ptA != 0 ) {
if (!continueMerge && ptB != 0) {
//save ptB in tmpB
tmpB = ptB;
grpB->AppendTuple(tmpB.tuple);
// advance the tuple pointer
ptB.setTuple( NextTupleB() );
// collect a group of tuples from B which
// have the same attribute value
bool done = false;
while ( !done && ptB != 0 ) {
int cmp = CompareTuplesB( tmpB.tuple, ptB.tuple );
if ( cmp == 0)
{
// append equal tuples to group
grpB->AppendTuple(ptB.tuple);
// release tuple of input B
ptB.setTuple( NextTupleB() );
}
else
{
done = true;
}
} // end collect group
cmp = CompareTuples( ptA.tuple, tmpB.tuple );
while ( ptA != 0 && cmp < 0 )
{
// skip tuples from A while they are smaller than the
// value of the tuples in grpB
ptA.setTuple( NextTupleA() );
if (ptA != 0) {
cmp = CompareTuples( ptA.tuple, tmpB.tuple );
}
}
}
// continue or start a merge with grpB
while ( ptA != 0 && cmp == 0 )
{
// join ptA with grpB
if (!continueMerge)
{
iter = grpB->MakeScan();
continueMerge = true;
resultTuple = NextConcat();
if (resultTuple)
return resultTuple;
}
else
{
// continue merging, create the next result tuple
resultTuple = NextConcat();
if (resultTuple) {
return resultTuple;
}
else
{
// Iteration over the group finished.
// Continue with the next tuple of argument A
continueMerge = false;
delete iter;
iter = 0;
ptA.setTuple( NextTupleA() );
if (ptA != 0) {
cmp = CompareTuples( ptA.tuple, tmpB.tuple );
}
}
}
}
grpB->Clear();
// tpA > tmpB
if ( ptB == 0 ) {
// short exit
return 0;
}
} // end of main loop
return 0;
}
inline Tuple* NextConcat()
{
Tuple* t = iter->GetNextTuple();
if( t != 0 ) {
Tuple* result = new Tuple( resultTupleType );
Concat( ptA.tuple, t, result );
t->DeleteIfAllowed();
return result;
}
return 0;
}
void InitIteration()
{
// read in the first tuple of both input streams
Tuple* tA = NextTupleA();
ptA = RTuple( tA );
if(tA) tA->DeleteIfAllowed();
Tuple* tB = NextTupleB();
ptB = RTuple( tB );
if(tB) tB->DeleteIfAllowed();
// initialize the status for the result
// set iteration
tmpB = 0;
cmp = 0;
continueMerge = false;
if (grpB != 0)
delete grpB;
grpB = new TupleBuffer( MAX_MEMORY );
}
};
/*
2.2.2 MergeJoinLocalInfoSHF
A variant of a sortmergejoin which produces an output stream which starts with
a random sample of 500 tuples.
*/
class MergeJoinLocalInfoSHF : protected MergeJoinLocalInfo2
{
public:
MergeJoinLocalInfoSHF( Word _streamA,
Word wAttrIndexA,
Word _streamB,
Word wAttrIndexB,
bool _expectSorted,
Supplier s,
ProgressLocalInfo* p,
bool rnd = false, bool earlyexit = false )
: MergeJoinLocalInfo2( _streamA, wAttrIndexA,
_streamB, wAttrIndexB,
_expectSorted, s, p, rnd, earlyexit ),
streamPos(0),
positions(500,0),
memBufIter(0),
memBufFinished(false),
firstScanFinished(false),
trace(true)
{}
~MergeJoinLocalInfoSHF() {
cerr << "calling ~MergeJoinLocalInfoSHF()" << endl;
}
inline Tuple* NextResultTuple()
{
Tuple* res = 0;
if (!firstScanFinished)
{
res = MergeJoinLocalInfo2::NextResultTuple();
while (res != 0)
{
// decide if tuple replaces one of the buffer
streamPos++;
size_t i = 0;
bool replaced = false;
Tuple* v = rtBuf.ReplacedByRandom(res, i, replaced);
if ( replaced )
{
positions[i] = streamPos;
// v was replaced by res
if (v != 0) {
//persBuf.AppendTuple(v);
v->DeleteIfAllowed();
}
}
else
{
assert(v == 0);
// v == 0, and t was not stored in buffer
res->DeleteIfAllowed();
}
res = MergeJoinLocalInfo2::NextResultTuple();
}
if (trace)
cerr << "copy2TupleBuf" << endl;
rtBuf.copy2TupleBuf( memBuf );
// reset scan
firstScanFinished = true;
sliA->InitMerge();
sliB->InitMerge();
InitIteration();
sort(positions.begin(), positions.end());
posIter = positions.begin();
memBufIter = memBuf.MakeScan();
streamPos = 0;
if (trace)
cerr << "Start 2nd run" << endl;
}
if (firstScanFinished)
{
if (!memBufFinished)
{
res = memBufIter->GetNextTuple();
if (res == 0) {
if (trace) {
cerr << endl;
cerr << "streamPos: " << streamPos << endl;
cerr << "memBuf : " << memBuf.GetNoTuples() << endl;
}
memBufFinished = true;
delete memBufIter;
memBufIter = 0;
}
}
if ( memBufFinished == true)
{
res = MergeJoinLocalInfo2::NextResultTuple();
streamPos++;
while (streamPos == *posIter)
{
res->DeleteIfAllowed();
res = MergeJoinLocalInfo2::NextResultTuple();
streamPos++;
posIter++;
}
}
}
return res;
}
private:
size_t streamPos;
TupleBuffer memBuf;
vector<size_t> positions;
vector<size_t>::const_iterator posIter;
RandomTBuf rtBuf;
GenericRelationIterator* memBufIter;
bool memBufFinished;
bool firstScanFinished;
const bool trace;
};
/*
2.2.3 Value mapping function of operator ~mergejoin~
*/
//CPUTimeMeasurer mergeMeasurer;
template<class T, bool SRT, bool RND, bool R3> int
MergeJoin(Word* args, Word& result, int message, Word& local, Supplier s)
{
typedef LocalInfo<T> LocalType;
LocalType* li = static_cast<LocalType*>( local.addr );
switch(message)
{
case OPEN:
if ( li ) {
delete li->ptr;
delete li;
}
li = new LocalType();
local.addr = li;
qp->Open(args[0].addr);
qp->Open(args[1].addr);
li->ptr = 0;
return 0;
case REQUEST: {
//mergeMeasurer.Enter();
if ( li->ptr == 0 ) //first request;
//constructor put here to avoid delays in OPEN
//which are a problem for progress estimation
{
li->ptr = new T( args[0], args[4], args[1],
args[5], SRT, s, li, RND, R3 );
}
T* mli = li->ptr;
result.addr = mli->NextResultTuple();
li->returned++;
//mergeMeasurer.Exit();
return result.addr != 0 ? YIELD : CANCEL;
}
case CLOSE:
//mergeMeasurer.PrintCPUTimeAndReset("CPU Time for Merging Tuples : ");
qp->Close(args[0].addr);
qp->Close(args[1].addr);
//nothing is deleted on close because the substructures are still
//needed for progress estimation. Instea/*
//(repeated) OPEN and on CLOSEPROGRESS
return 0;
case CLOSEPROGRESS:
if ( li ) {
delete li;
local.addr = 0;
}
return 0;
case REQUESTPROGRESS:
{
ProgressInfo p1, p2;
ProgressInfo* pRes = static_cast<ProgressInfo*>( result.addr );
const double uMergeJoin = 0.041; //millisecs per tuple merge (merge)
const double vMergeJoin = 0.000076; //millisecs per byte merge (sortmerge)
const double uSortBy = 0.00043; //millisecs per byte sort
if( !li )
{
return CANCEL;
}
else
{
if (qp->RequestProgress(args[0].addr, &p1)
&& qp->RequestProgress(args[1].addr, &p2))
{
li->SetJoinSizes(p1, p2);
pRes->CopySizes(li);
if ( SRT ) // already sorted inputes
{
pRes->Time = p1.Time + p2.Time +
(p1.Card + p2.Card) * uMergeJoin;
pRes->Progress =
(p1.Progress * p1.Time + p2.Progress * p2.Time +
(((double) li->readFirst) + ((double) li->readSecond))
* uMergeJoin)
/ pRes->Time;
pRes->CopyBlocking(p1, p2); //non-blocking in this case
}
else // unsorted inputs
{
pRes->Time =
p1.Time +
p2.Time +
p1.Card * p1.Size * uSortBy +
p2.Card * p2.Size * uSortBy +
(p1.Card * p1.Size + p2.Card * p2.Size) * vMergeJoin;
typedef LocalInfo<SortByLocalInfo2> LocalSRT;
LocalSRT* liFirst = 0;
LocalSRT* liSecond = 0;
liFirst = static_cast<LocalSRT*>( li->firstLocalInfo );
liSecond = static_cast<LocalSRT*>( li->secondLocalInfo );
long readFirst = (liFirst ? liFirst->read : 0);
long readSecond = (liSecond ? liSecond->read : 0);
pRes->Progress =
(p1.Progress * p1.Time +
p2.Progress * p2.Time +
((double) readFirst) * p1.Size * uSortBy +
((double) readSecond) * p2.Size * uSortBy +
(((double) li->readFirst) * p1.Size +
((double) li->readSecond) * p2.Size)
* vMergeJoin)
/ pRes->Time;
pRes->BTime = p1.Time + p2.Time
+ p1.Card * p1.Size * uSortBy
+ p2.Card * p2.Size * uSortBy;
pRes->BProgress =
(p1.Progress * p1.Time + p2.Progress * p2.Time
+ ((double) readFirst) * p1.Size * uSortBy
+ ((double) readSecond) * p2.Size * uSortBy)
/ pRes->BTime;
}
if (li->returned > enoughSuccessesJoin ) // stable state
{
pRes->Card = ((double) li->returned * (p1.Card + p2.Card)
/ ((double) li->readFirst + (double) li->readSecond));
}
else
{
pRes->Card = p1.Card * p2.Card * qp->GetSelectivity(s);
}
return YIELD;
}
else return CANCEL;
}
}
}
return 0;
}
/*
3 Instantiation of Template Functions
*/
// sortmergejoin_r
template int
MergeJoin<MergeJoinLocalInfoSHF, false, false, false>( Word* args,
Word& result,
int message,
Word& local,
Supplier s );
int
sortmergejoinr_vm( Word* args, Word& result,
int message, Word& local, Supplier s )
{
return MergeJoin<MergeJoinLocalInfoSHF, false, false, false>(args, result,
message, local, s);
}
int
sortmergejoinr2_vm( Word* args, Word& result,
int message, Word& local, Supplier s )
{
return MergeJoin<MergeJoinLocalInfo2, false, true, false>(args, result,
message, local, s);
}
int
sortmergejoinr3_vm( Word* args, Word& result,
int message, Word& local, Supplier s )
{
return MergeJoin<MergeJoinLocalInfo2, false, true, true>(args, result,
message, local, s);
}