/* ---- This file is part of SECONDO. Copyright (C) 2004, University in Hagen, Department of Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}] //paragraph [10] Footnote: [{\footnote{] [}}] //[TOC] [\tableofcontents] //[_] [\_] //[&] [\&] //[x] [\ensuremath{\times}] //[->] [\ensuremath{\rightarrow}] //[>] [\ensuremath{>}] //[<] [\ensuremath{<}] //[ast] [\ensuremath{\ast}] */ /* [1] ExtRelation2AlgebraCostEstimation Mai, 2012. Jan Kristof Nidzwetzki [TOC] 0 Description This file provides some CostEstimationClasses for the ExtRelation2Algebra. Mai 2012, JKN, First version of this file */ /* 0.1 Defines */ #ifndef COST_EST_EXT2_RELATION_ALG_H #define COST_EST_EXT2_RELATION_ALG_H #define DEBUG false #include #include #include /* 1.0 Prototyping Local info for operator */ class ItHashJoinDInfo; /* 1.1 The class ~ItHashJoinCostEstimation~ provides cost estimation capabilities for the operator itHashJoin */ class ItHashJoinCostEstimation : public CostEstimation { public: ItHashJoinCostEstimation() : buckets(999997) { pli = new ProgressLocalInfo(); } /* 1.2 Free local datastructures */ virtual ~ItHashJoinCostEstimation() { if(pli) { delete pli; } }; virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo, bool argsAvialable) { // no progress info available => cancel if(! argsAvialable) { return CANCEL; } // Determination of constants in file bin/UpdateProgressConstants // Time for processing one tuple in stream 1 static const double uItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "uItHashJoin"); // Time for processing one tuple in stream 2 (partitions = 1) static const double vItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "vItHashJoin"); // msecs per byte written and read from/to TupleFile static const double wItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "wItHashJoin"); // msecs per byte read from TupleFile static const double xItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "xItHashJoin"); // msecs per attr in result tuple static const double yItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "yItHashJoin"); if (qp->RequestProgress(args[0].addr, &p1) && qp->RequestProgress(args[1].addr, &p2)) { pli->SetJoinSizes(p1, p2); // Read memory for operator in bytes size_t maxmem = qp->GetMemorySize(supplier) * 1024 * 1024; // Calculate number of partitions size_t partitions = getNoOfPartitions(p1.Card, p1.Size, maxmem); // Number of tuples per iteration size_t tuplesPerIteration = p2.Card; // is the tuplefile written completely? Otherwise we assume // that all tuples of p2 are written to tuplefile if(tupleFileWritten) { tuplesPerIteration = tuplesInTupleFile; } if(partitions > 1) { // For partition 1: write 'tuplesInTupleFile' to tuplefile // For partition 1+n: read 'tuplesInTupleFile' from tuplefile pRes->Time = p2.Time + (tuplesPerIteration * wItHashJoin * p2.Size) + ((partitions - 1) * tuplesPerIteration * xItHashJoin * p2.Size) + p1.Card * uItHashJoin + p1.Time; // Calculate Elapsed time size_t elapsedTime = p2.Time * p2.Progress + (p1.Progress * p1.Card * uItHashJoin) + (p1.Progress * p1.Time); if(iteration <= 1) { elapsedTime += readInIteration * wItHashJoin * p2.Size; } else { // 1st iteration: Tuples are written to tuplefile elapsedTime += tuplesPerIteration * wItHashJoin * p2.Size; // Time for the completed iterations elapsedTime += (iteration - 2) * tuplesPerIteration * xItHashJoin * p2.Size; // Current iteration elapsedTime += readInIteration * xItHashJoin * p2.Size; } // Calculate progress pRes->Progress = (double) elapsedTime / (double) pRes->Time; if(DEBUG) { cout << "DEBUG: ellapsed / it " << elapsedTime << " of " << pRes->Time << " / " << iteration << endl; cout << "DEBUG: iteration / tuplefile " << iteration << " / " << tupleFileWritten << endl; cout << "DEBUG: read in iteration " << readInIteration << endl; } } else { if(DEBUG) { cout << p2 << endl; } pRes->Progress = p2.Progress; pRes->Time = p2.Time + p2.Card * vItHashJoin + p1.Card * uItHashJoin + p1.Time ; } // Blocking time is: adding p1.Card tuples to hashtable // and the blocking time of our predecessors pRes->BTime = p1.Card * uItHashJoin + p1.Time + p1.BTime + p2.BTime; pRes->BProgress = ((p1.Progress * p1.Card * uItHashJoin) + (p1.Progress * p1.Time) + (p1.BProgress * p1.BTime) + (p2.BProgress * p2.BTime)) / pRes->BTime; // Calculate cardinality // Warm state or cold state? if(qp->GetSelectivity(supplier) == 0.1 && returned >= (size_t) enoughSuccessesJoin) { pRes->Card = returned / pRes->Progress; } else { pRes->Card = qp->GetSelectivity(supplier) * p1.Card * p2.Card; } // is computation done? if(stream1Exhausted && stream2Exhausted) { pRes->Progress = 1.0; pRes->BProgress = 1.0; pRes->Card = returned; } // Append time for creating new tuples. Assume that the creation // of new tuples is equally distributed during the calculation. So // we can add the time without affecting the progress calculation pRes->Time += (p1.noAttrs + p2.noAttrs) * yItHashJoin * pRes->Card; if(DEBUG) { cout << "Progress is " << pRes->Progress << endl; cout << "Time is " << pRes->Time << endl; cout << "BProgress is " << pRes->BProgress << endl; cout << "BTime is " << pRes->BTime << endl; cout << "Card is: " << pRes->Card << endl; cout << "Partitions is: " << partitions << endl; cout << "Card is: " << pRes->Card << endl; cout << "Returned / Progress" << returned << " / " << pRes->Progress << endl; } pRes->CopySizes(pli); return YIELD; } // default: send cancel return CANCEL; } /* 1.3 getCosts Returns the estimated time in ms for given arguments. */ virtual bool getCosts(const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, const double memoryMB, double &costs) const{ // Init calculation size_t maxmem = memoryMB * 1024 * 1024; // Read variables //cerr << __PRETTY_FUNCTION__ << endl // << "TODO: implement use of noAttributes and selectivity" << endl; // now done in optimizer // Time for processing one tuple in stream 2 (partitions = 1) static const double uItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "uItHashJoin"); // Time for processing one tuple in stream 2 (partitions = 1) static const double vItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "vItHashJoin"); // msecs per byte written and read from/to TupleFile static const double wItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "wItHashJoin"); // msecs per byte read from TupleFile static const double xItHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "itHashJoin", "xItHashJoin"); //Calculate number of partitions size_t partitions = getNoOfPartitions(NoTuples1, sizeOfTuple1, maxmem); if(partitions > 1) { // For partition 1: write 'tuplesInTupleFile' to tuplefile // For partition 2+n: read 'tuplesInTupleFile' from tuplefile costs = NoTuples1 * uItHashJoin // place tuples in hash table // write tuples in stream 2 to buffer + (NoTuples2 * wItHashJoin * sizeOfTuple2) // read tuples from buffer + ((partitions - 1) * xItHashJoin * sizeOfTuple2); } else { costs = NoTuples1 * uItHashJoin + NoTuples2 * vItHashJoin; } // Write Debug output to file. Needed if called from optimizer if(DEBUG) { std::ofstream file; file.open("/tmp/secondolog", std::ios::out | std::ios::app); file << "Called with NoTuples1 " << NoTuples1 << " sizeOfTuple1 " << sizeOfTuple1 << endl; file << "Called with NoTuples2 " << NoTuples2 << " sizeOfTuple2 " << sizeOfTuple2 << endl; file << "Partitions is " << partitions << endl; file << "Memory is " << maxmem << endl; file << "Costs " << costs << endl << endl; file.close(); } return true; } /* 1.4 Calculate the sufficent memory for this operator. */ double calculateSufficientMemory(size_t NoTuples1, size_t sizeOfTuple1) const { // calculate size for one bucket datastructure std::vector* bucket = new std::vector(); size_t sizePerBucket = sizeof(bucket); sizePerBucket += sizeof(void*) * bucket->capacity(); delete bucket; bucket = NULL; // calculate size of the whole datastructure size_t memoryOfDatastruct = sizePerBucket * buckets; size_t memory = memoryOfDatastruct + (NoTuples1 * sizeOfTuple1); double suffMemory = ceil(memory / (1024 * 1024)); // At least 16 mb are required return std::max(16.0, suffMemory); } /* 1.6 getFunction This function approximates the costfunction by an parametrizable function. Allowed types are: 1: linear function 2: a / x */ virtual bool getFunction( const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, int& functionType, double& sufficientMemory, double& timeAtSuffMemory, double& timeAt16MB, double& a, double& b, double& c, double& d) const { // Function is a/x + b functionType=2; // Init variables a = 0; b = 0; c = 0; d = 0; // Calculate sufficientMemory and time at sufficientMemory and 16MB sufficientMemory=calculateSufficientMemory(NoTuples2, sizeOfTuple2); // Points for resolving parameter double point1, point2, timeAtPoint1, timeAtPoint2; calculateXPoints(sufficientMemory, point1, point2); // Calculate costs for first point getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, point1, timeAtPoint1); // Calculate costs for second point getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, point2, timeAtPoint2); // Calculate a and b for function f(x) = a/x+b resolveInverseProportionality(point1, timeAtPoint1, point2, timeAtPoint2, a, b); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, sufficientMemory, timeAtSuffMemory); // is point1 at 16mb? => We have already costs for 16mb if(point1 == 16) { timeAt16MB = timeAtPoint1; } else { getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, 16, timeAt16MB); } return true; } /* 1.7 Calculate the numer of partitions for this operator */ size_t getNoOfPartitions(size_t s1Card, size_t s1Size, size_t maxmem) const { // if the first stream is exhausted, we are in the last // partition / iteration if(stream1Exhausted) { return iteration; } // if we have a partition size // use them if(partitionSize > 0) { return ceil(s1Card / partitionSize) + 1; } // otherwise we must estimate // calculate size for one bucket std::vector* bucket = new std::vector(); size_t sizePerBucket = sizeof(bucket); delete bucket; bucket = NULL; // Handle low memory situations, hashtable is to big for memory // Recalcualte size of buckets (needed if called from optimizer and // not from operator) size_t realBuckets = buckets; if(buckets * sizePerBucket > maxmem / 5){ // reduce size of table when table structure takes more than // 20 percent of the available memory realBuckets = maxmem / (5 * sizeof(void*)); if(realBuckets < 3){ realBuckets = 3; } } // calculate size of the whole datastructure size_t memoryOfDatastruct = sizePerBucket * realBuckets; // calculate max number of tuples in hashtable size_t tuplesInMemory = (maxmem - memoryOfDatastruct) / (s1Size + sizeof(void*)); // calculate number of partitions size_t noOfPartitions = ceil((double) s1Card / (double) tuplesInMemory); if(DEBUG) { std::ofstream file; file.open("/tmp/secondolog", std::ios::out | std::ios::app); file << "s1Card " << s1Card << " s1Size " << s1Size << endl; file << "Memory is " << maxmem << endl; file << "Real buckets " << realBuckets << endl; file << "DEBUG: Size of datastucture is: " << memoryOfDatastruct << endl; file << "DEBUG: Size per Bucket: " << sizePerBucket << endl; file << "DEBUG: Tuples is memory are: " << tuplesInMemory << endl; file << "DEBUG: total Tuples are: " << s1Card << endl; file << "DEBUG: No of partitons is: " << noOfPartitions << endl; file << endl << endl; file.close(); } return noOfPartitions; } /* 1.8 Setter for stream1Exhausted */ void setStream1Exhausted(bool exhausted) { stream1Exhausted = exhausted; } /* 1.9 Setter for stream2Exhausted */ void setStream2Exhausted(bool exhausted) { stream2Exhausted = exhausted; } /* 1.10 Update processed tuples in stream1 */ void processedTupleInStream1() { readStream1++; } /* 1.11 Update processed tuples in stream2 */ void processedTupleInStream2() { readStream2++; } /* 1.12 Setter for iterattion */ void setIteration(size_t iter) { // reset read counter if(iteration != iter) { readInIteration = 0; } iteration = iter; } /* 1.13 Setter for Buckets */ void setBuckets(size_t bucketno) { buckets = bucketno; } /* 1.14 Setter for readInIteration */ void incReadInIteration() { readInIteration++; } /* 1.15 Reset read in iteration */ void resetReadInIteration() { readInIteration = 0; } /* 1.16 Set number of tuples in tuplefile */ void incTuplesInTupleFile() { tuplesInTupleFile++; } /* 1.17 Set number of tuples in tuplefile */ void setTuplesInTupleFile(size_t tuples) { tuplesInTupleFile = tuples; } /* 1.18 Set tupleFileWritten state */ void setTupleFileWritten(bool state) { tupleFileWritten = state; } /* 1.19 Set readPartitionDone state */ void readPartitionDone() { if(partitionSize == 0) { partitionSize = readStream1; } } /* 1.20 init our class */ virtual void init(Word* args, void* localInfo) { returned = 0; stream1Exhausted = false; stream2Exhausted = false; tupleFileWritten = false; readStream1 = 0; readStream2 = 0; iteration = 0; readInIteration = 1; buckets = 999997; // default buckets tuplesInTupleFile = 0; partitionSize = 0; } private: ProgressLocalInfo *pli; // Local Progress info ProgressInfo p1, p2; // Progress info for stream 1 / 2 bool stream1Exhausted; // is stream 1 exhaused? bool stream2Exhausted; // is stream 2 exhaused? bool tupleFileWritten; // is the tuplefile completely written? size_t readStream1; // processed tuple in stream1 size_t readStream2; // processes tuple in stream2 size_t iteration; // number of iteration in operator size_t readInIteration; // no of tuples read in this iteration size_t buckets; // number of buckets size_t tuplesInTupleFile; // number of tuples in tuplefile size_t partitionSize; // size of a partition }; /* 2.0 The class ~GraceHashJoinCostEstimation~ provides cost estimation capabilities for the operator gracehashjoin */ class GraceHashJoinCostEstimation : public CostEstimation { public: GraceHashJoinCostEstimation() { pli = new ProgressLocalInfo(); } /* 2.1 Free local datastructures */ virtual ~GraceHashJoinCostEstimation() { if(pli) { delete pli; } }; virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo, bool argsAvialable) { // no progress info available => cancel if(! argsAvialable) { return CANCEL; } extrel2::GraceHashJoinLocalInfo* li; li = static_cast( localInfo ); if( !li ) { // if localInfo is deleted, and we have // a old ProgressInfo. Use them and assume // that the calculation is done if(pi.Time > 0) { pRes->Copy(pi); pRes -> BProgress = 1; pRes -> Progress = 1; pRes -> Card = returned; return YIELD; } return CANCEL; } if (qp->RequestProgress(args[0].addr, &p1) && qp->RequestProgress(args[1].addr, &p2)) { li->CalcProgress(p1, p2, pRes, supplier); // Copy values, if the localInfo value of the // operator is deleted, we can use the last progress // estimation pi.Copy(*pRes); return YIELD; } // default: send cancel return CANCEL; } /* 2.2 getCosts Returns the estimated time in ms for given arguments. */ virtual bool getCosts(const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, const double memoryMB, double &costs) const{ //cerr << __PRETTY_FUNCTION__ << endl // << "TODO: implement use of noAttributes and selectivity" << endl; // now done in optimizer double uHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "uHashJoin"); double vHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "vHashJoin"); double t_read = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "tread"); double t_write = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "twrite"); double t_probe = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "tprobe"); double t_hash = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "thash"); // Internal or external mode? if(calculateSufficientMemory(NoTuples1, sizeOfTuple1, NoTuples2, sizeOfTuple2) > memoryMB) { // External mode costs = NoTuples1 * ( t_probe + t_hash + t_read + t_write ) + NoTuples2 * ( t_hash + t_read + t_write ); } else { // Internal mode costs = NoTuples1 * vHashJoin // reading stream B into hash table + NoTuples2 * uHashJoin; // probing stream A against hash table } return true; } /* 2.3 Calculate the sufficent memory for this operator. */ double calculateSufficientMemory(size_t NoTuples1, size_t sizeOfTuple1, const size_t NoTuples2, const size_t sizeOfTuple2) const { // Space for placing all tuples in memory double suffMemory = ceil((NoTuples2 * sizeOfTuple2) / (1024 * 1024)); // At least 16 mb are required return std::max(16.0, suffMemory); } /* 2.4 Get Linear Params Input: NoTuples1, sizeOfTuple1 NoTuples2, sizeOfTuple2, Output: sufficientMemory = sufficientMemory for this operator with the given input timeAtSuffMemory = Time for the calculation with sufficientMemory timeAt16MB - Time for the calculation with 16MB Memory */ virtual bool getLinearParams( const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, double& sufficientMemory, double& timeAtSuffMemory, double& timeAt16MB ) const { sufficientMemory=calculateSufficientMemory(NoTuples1, sizeOfTuple1, NoTuples2, sizeOfTuple2); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, sufficientMemory, timeAtSuffMemory); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, 16, timeAt16MB); return true; } /* 2.5 getFunction This function approximates the costfunction by an parametrizable function. Allowed types are: 1: linear function 2: a / x */ virtual bool getFunction( const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, int& functionType, double& sufficientMemory, double& timeAtSuffMemory, double& timeAt16MB, double& a, double& b, double& c, double& d) const { cout << __PRETTY_FUNCTION__ << endl << "TODO : use of noAttributes and selectivity" << endl; functionType=1; a=0;b=0;c=0;d=0; return getLinearParams(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, sufficientMemory, timeAtSuffMemory, timeAt16MB); } /* 2.6 init our class */ virtual void init(Word* args, void* localInfo) { returned = 0; } private: ProgressLocalInfo *pli; // Local Progress info ProgressInfo p1, p2; // Progress info for stream 1 / 2 ProgressInfo pi; // Progress Info }; /* 3.0 The class ~HybridHashJoinCostEstimation~ provides cost estimation capabilities for the operator hybridhashjoin */ class HybridHashJoinCostEstimation : public CostEstimation { public: HybridHashJoinCostEstimation() { pli = new ProgressLocalInfo(); } /* 3.1 Free local datastructures */ virtual ~HybridHashJoinCostEstimation() { if(pli) { delete pli; } }; virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo, bool argsAvialable) { // no progress info available => cancel if(! argsAvialable) { return CANCEL; } extrel2::HybridHashJoinLocalInfo* li; li = static_cast( localInfo ); if( !li ) { // if localInfo is deleted, and we have // a old ProgressInfo. Use them and assume // that the calculation is done if(pi.Time > 0) { pRes->Copy(pi); pRes -> BProgress = 1; pRes -> Progress = 1; pRes -> Card = returned; return YIELD; } return CANCEL; } if (qp->RequestProgress(args[0].addr, &p1) && qp->RequestProgress(args[1].addr, &p2)) { li->CalcProgress(p1, p2, pRes, supplier); // Copy values, if the localInfo value of the // operator is deleted, we can use the last progress // estimation pi.Copy(*pRes); return YIELD; } // default: send cancel return CANCEL; } /* 3.2 getCosts Returns the estimated time in ms for given arguments. */ virtual bool getCosts(const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, const double memoryMB, double &costs) const{ //cerr << __PRETTY_FUNCTION__ << endl // << "TODO: implement use of noAttributes and selectivity" << endl; // now done in optimizer double uHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "uHashJoin"); double vHashJoin = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "vHashJoin"); double t_read = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "tread"); double t_write = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "twrite"); double t_probe = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "tprobe"); double t_hash = ProgressConstants::getValue("ExtRelation2Algebra", "gracehashjoin", "thash"); // Internal or external mode? if(calculateSufficientMemory(NoTuples1, sizeOfTuple1, NoTuples2, sizeOfTuple2) > memoryMB) { // External mode // No of tuples in partition 0 double card0 = (memoryMB * 1024 * 1024) / sizeOfTuple2; costs = NoTuples1 * ( t_probe + t_hash + t_read + t_write ) + NoTuples2 * ( t_hash + t_read + t_write ) // Tuples in Partition 0 are hold in memory - card0 * ( t_read + t_write ); } else { // Internal mode costs = + NoTuples1 * vHashJoin // reading stream B into hash table + NoTuples2 * uHashJoin; // probing stream A against hash table } return true; } /* 3.3 Calculate the sufficent memory for this operator. */ double calculateSufficientMemory(size_t NoTuples1, size_t sizeOfTuple1, const size_t NoTuples2, const size_t sizeOfTuple2) const { // Space for placing all tuples in memory double suffMemory = ceil((NoTuples2 * sizeOfTuple2) / (1024 * 1024)); // At least 16 mb are required return std::max(16.0, suffMemory); } /* 3.4 Get Linear Params Input: NoTuples1, sizeOfTuple1 NoTuples2, sizeOfTuple2, Output: sufficientMemory = sufficientMemory for this operator with the given input timeAtSuffMemory = Time for the calculation with sufficientMemory timeAt16MB - Time for the calculation with 16MB Memory */ virtual bool getLinearParams( const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, size_t NoTuples2, size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, double& sufficientMemory, double& timeAtSuffMemory, double& timeAt16MB ) const { sufficientMemory=calculateSufficientMemory(NoTuples1, sizeOfTuple1, NoTuples2, sizeOfTuple2); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, sufficientMemory, timeAtSuffMemory); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, 16, timeAt16MB); return true; } /* 3.5 init our class */ virtual void init(Word* args, void* localInfo) { returned = 0; } private: ProgressLocalInfo *pli; // Local Progress info ProgressInfo p1, p2; // Progress info for stream 1 / 2 ProgressInfo pi; // Progress Info }; /* 4.0 The class ~SortMergeJoinCostEstimation~ provides cost estimation capabilities for the operator sortmergejoin */ class SortMergeJoinCostEstimation : public CostEstimation { public: SortMergeJoinCostEstimation() { pli = new LocalInfo(); } /* 4.1 Free local datastructures */ virtual ~SortMergeJoinCostEstimation() { if(pli) { delete pli; } pli = 0; }; virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo, bool argsAvialable) { // no progress info available => cancel if(! argsAvialable) { return CANCEL; } ProgressInfo p1, p2; //millisecs per byte read in sort step static const double uSortBy = ProgressConstants::getValue("ExtRelationAlgebra", "mergejoin", "uSortBy"); //millisecs per byte read in merge step (sortmerge) const double wMergeJoin = ProgressConstants::getValue("ExtRelationAlgebra", "mergejoin", "wMergeJoin"); // millisecs per attr in result tuple (0.0001072) static const double yMergeJoin = ProgressConstants::getValue("Global", "ResultTuple", "attr"); extrel2::SortProgressLocalInfo* liFirst; extrel2::SortProgressLocalInfo* liSecond; if( !localInfo ) { // if localInfo is deleted and we have // an old ProgressInfo. Use them and assume // that the calculation is done if(pi.Time > 0) { pRes->Copy(pi); pRes -> BProgress = 1; pRes -> Progress = 1; pRes -> Card = returned; return YIELD; } return CANCEL; } typedef LocalInfo LocalType; LocalType* li = static_cast( localInfo ); liFirst = static_cast (li->firstLocalInfo); liSecond = static_cast (li->secondLocalInfo); if (qp->RequestProgress(args[0].addr, &p1) && qp->RequestProgress(args[1].addr, &p2)) { pli->SetJoinSizes(p1, p2); pRes->CopySizes(pli); long readFirst = (liFirst ? liFirst->read : 0); long readSecond = (liSecond ? liSecond->read : 0); double factor = (double) li->readFirst / p1.Card; // Calculate result cardinality if ( returned > (size_t) enoughSuccessesJoin ) { double m = (double)returned; double k1 = (double)li->readFirst; double k2 = (double)li->readSecond; // estimated selectivity double sel = m / ( k1 * k2 ); // warm state if ( qp->GetSelectivity(supplier) != 0.1 ) { // estimated selectivity from optimizer is used // as more tuples are processed the weight of the // optimizer estimation is reduced pRes->Card = ( p1.Card * p2.Card ) * ( factor * sel + ( 1.0 - factor ) * qp->GetSelectivity(supplier) ); } else { // if optimizer is not used use only estimation pRes->Card = sel * p1.Card * p2.Card; } } else { // cold state pRes->Card = p1.Card * p2.Card * qp->GetSelectivity(supplier); } // total time pRes->Time = p1.Time + p2.Time + + p1.Card * p1.Size * uSortBy + + p2.Card * p2.Size * uSortBy + + (p1.Card * p1.Size + p2.Card * p2.Size) * wMergeJoin + pRes->Card * (pRes->noAttrs * yMergeJoin); pRes->Progress = ( p1.Progress * p1.Time + p2.Progress * p2.Time + ((double) readFirst) * p1.Size * uSortBy + ((double) readSecond) * p2.Size * uSortBy + (((double) li->readFirst) * p1.Size + ((double) li->readSecond) * p2.Size) * wMergeJoin + ((double) li->returned) * (pRes->noAttrs * yMergeJoin) ) / pRes->Time; // first result tuple is possible after both input streams // deliver the first tuples and both sort algorithm have // consumed their streams completely pRes->BTime = p1.BTime + p2.BTime + p1.Card * p1.Size * uSortBy + p2.Card * p2.Size * uSortBy; // blocking progress pRes->BProgress = ( p1.BProgress * p1.BTime + p2.BProgress * p2.BTime + ((double) readFirst) * p1.Size * uSortBy + ((double) readSecond) * p2.Size * uSortBy ) / pRes->BTime; // Copy values, if the localInfo value of the // operator is deleted, we can use the last progress // estimation pi.Copy(*pRes); return YIELD; } else { return CANCEL; } // default: send cancel return CANCEL; } /* 4.2 getCosts Returns the estimated time in ms for given arguments. */ virtual bool getCosts(const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, const double memoryMB, double &costs) const{ //cerr << __PRETTY_FUNCTION__ << endl // << "TODO: implement use of noAttributes and selectivity" << endl; // now done in optimizer //millisecs per byte read in sort step static const double uSortBy = ProgressConstants::getValue("ExtRelationAlgebra", "mergejoin", "uSortBy"); //millisecs per byte read in merge step (sortmerge) const double wMergeJoin = ProgressConstants::getValue("ExtRelationAlgebra", "mergejoin", "wMergeJoin"); // Time for sorting (uSortBy) + time for merging (wMergeJoin) costs = NoTuples1 * sizeOfTuple1 * uSortBy + NoTuples2 * sizeOfTuple2 * uSortBy + (NoTuples1 * sizeOfTuple1 + NoTuples2 * sizeOfTuple2) * wMergeJoin; return true; } /* 4.3 Calculate the sufficent memory for this operator. */ double calculateSufficientMemory(size_t NoTuples1, size_t sizeOfTuple1, size_t NoTuples2, size_t sizeOfTuple2) const { // Space for in memory sorting of both streams // + 20% memory for merge double suffMemory = ceil(((NoTuples1 * sizeOfTuple1 + NoTuples2 * sizeOfTuple2) * 1.2) / (1024 * 1024)); // At least 16 mb are required return std::max(16.0, suffMemory); } /* 4.4 Get Linear Params Input: NoTuples1, sizeOfTuple1 NoTuples2, sizeOfTuple2, Output: sufficientMemory = sufficientMemory for this operator with the given input timeAtSuffMemory = Time for the calculation with sufficientMemory timeAt16MB - Time for the calculation with 16MB Memory */ virtual bool getLinearParams( const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, const size_t NoTuples2, const size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, double& sufficientMemory, double& timeAtSuffMemory, double& timeAt16MB ) const { sufficientMemory=calculateSufficientMemory(NoTuples1, sizeOfTuple1, NoTuples2, sizeOfTuple2); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, sufficientMemory, timeAtSuffMemory); getCosts(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, 16, timeAt16MB); return true; } /* 4.5 getFunction This function approximates the costfunction by an parametrizable function. Allowed types are: 1: linear function 2: a / x */ virtual bool getFunction( const size_t NoTuples1, const size_t sizeOfTuple1, const size_t noAttributes1, size_t NoTuples2, size_t sizeOfTuple2, const size_t noAttributes2, const double selectivity, int& functionType, double& sufficientMemory, double& timeAtSuffMemory, double& timeAt16MB, double& a, double& b, double& c, double& d) const { functionType=1; a=0;b=0;c=0;d=0; return getLinearParams(NoTuples1, sizeOfTuple1, noAttributes1, NoTuples2, sizeOfTuple2, noAttributes2, selectivity, sufficientMemory, timeAtSuffMemory, timeAt16MB); } /* 4.6 init our class */ virtual void init(Word* args, void* localInfo) { returned = 0; } private: LocalInfo *pli; // Local Progress info ProgressInfo pi; // Progress info ProgressInfo p1, p2; // Progress info for stream 1 / 2 }; #endif