Files
secondo/Algebras/CostEstimation/ExtRelationAlgebraCostEstimation.h

845 lines
21 KiB
C
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
----
This file is part of SECONDO.
Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//paragraph [10] Footnote: [{\footnote{] [}}]
//[TOC] [\tableofcontents]
//[_] [\_]
//[&] [\&]
//[x] [\ensuremath{\times}]
//[->] [\ensuremath{\rightarrow}]
//[>] [\ensuremath{>}]
//[<] [\ensuremath{<}]
//[ast] [\ensuremath{\ast}]
*/
/*
[1] ExtRelationAlgebraCostEstimation
Jun, 2012. First Revision - Jan Kristof Nidzwetzki
[TOC]
0 Description
This file provides some CostEstimationClasses for the ExtRelationAlgebra.
Jun 2012, JKN, First version of this file
*/
/*
0.1 Defines
*/
#ifndef COST_EST_EXT_RELATION_ALG_H
#define COST_EST_EXT_RELATION_ALG_H
#define DEBUG false
/*
0.2 Includes
*/
#include "SortByLocalInfo.h"
/*
1.0 Prototyping
Local info for operator
*/
/*
1.1 The class ~ItHashJoinCostEstimation~ provides cost estimation
capabilities for the operator itHashJoin
*/
// Operation mode: join or select
// this class are used in loopjoin
// and loopsel
template<bool join>
class LoopJoinCostEstimation : public CostEstimation
{
public:
LoopJoinCostEstimation()
{
pli = new ProgressLocalInfo();
}
/*
1.2 Free local datastructures
*/
virtual ~LoopJoinCostEstimation() {
if(pli) {
delete pli;
}
};
virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo,
bool argsAvialable) {
// no progress info available => cancel
if(! argsAvialable) {
return CANCEL;
}
if (qp->RequestProgress(args[0].addr, &p1)
&& qp->RequestProgress(args[1].addr, &p2))
{
if(join) {
pli->SetJoinSizes(p1, p2);
pRes->CopySizes(pli);
} else {
// tuples are axtracted from fun relation
// for each tuplex, all matching tupley will be collected,
// therefore the time for the query for tupley is multiplied
pRes->CopySizes(p2);
}
if (returned > (size_t) enoughSuccessesJoin) {
pRes->Card = p1.Card *
((double) (returned) /
(double) (readStream1));
} else {
pRes->Card = p1.Card * p2.Card;
}
// millisecs per attr in result tuple
static const double tattr =
ProgressConstants::getValue("Global",
"ResultTuple", "attr");
// attrs in result tuple
size_t resultAttr;
if(join) {
resultAttr = p1.noAttrs + p2.noAttrs;
} else {
resultAttr = p2.noAttrs;
}
pRes->Time = p1.Time + p1.Card * p2.Time
+ pRes->Card * tattr * resultAttr;
if (stream1Exhausted) {
pRes->Progress = 1.0;
} else if ( p1.BTime < 0.1 && pipelinedProgress ) {
pRes->Progress = p1.Progress;
} else {
pRes->Progress =
(p1.Progress * p1.Time + (double) readStream1 * p2.Time
+ returned * tattr * resultAttr)
/ pRes->Time;
pRes->CopyBlocking(p1); //non-blocking operator;
//second argument assumed not to block
}
return YIELD;
}
// default: send cancel
return CANCEL;
}
/*
1.8 Setter for stream1Exhausted
*/
void setStream1Exhausted(bool exhausted) {
stream1Exhausted = exhausted;
}
/*
1.9 Update processed tuples in stream1
*/
void processedTupleInStream1() {
readStream1++;
}
/*
1.10 init our class
*/
virtual void init(Word* args, void* localInfo)
{
returned = 0;
stream1Exhausted = false;
readStream1 = 0;
}
private:
ProgressLocalInfo *pli; // Local Progress info
ProgressInfo p1, p2; // Progress info for stream 1 / 2
bool stream1Exhausted; // is stream 1 exhaused?
size_t readStream1; // processed tuple in stream1
};
/*
1.1 The class ~symmjoinCostEstimation~ provides cost estimation
capabilities for the operator symmjoin
*/
class SymmjoinCostEstimation : public CostEstimation
{
public:
SymmjoinCostEstimation()
{
pli = new ProgressLocalInfo();
}
/*
1.2 Free local datastructures
*/
virtual ~SymmjoinCostEstimation() {
if(pli) {
delete pli;
}
};
virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo,
bool argsAvialable) {
// no progress info available => cancel
if(! argsAvialable) {
return CANCEL;
}
// Read memory for operator in bytes
size_t maxmem = qp->GetMemorySize(supplier) * 1024 * 1024;
if (qp->RequestProgress(args[0].addr, &p1)
&& qp->RequestProgress(args[1].addr, &p2))
{
pli->SetJoinSizes(p1, p2);
// millisecs per tuple pair 0.2
static const double uSymmJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"symmjoin", "uSymmJoin");
// millisecs per byte written
static const double twrite =
ProgressConstants::getValue("Global",
"TupleFile", "twrite");
// millisecs per byte read
static const double tread =
ProgressConstants::getValue("Global",
"TupleFile", "tread");
// millisecs per attr in result tuple
static const double tattr =
ProgressConstants::getValue("Global",
"ResultTuple", "attr");
if (!pli){
return CANCEL;
}
pRes->CopySizes(pli);
double predCost =
(qp->GetPredCost(supplier) == 0.1 ? 0.004
: qp->GetPredCost(supplier));
//the default value of 0.1 is only suitable for selections
if (returned > (size_t) enoughSuccessesJoin ) {
// stable state assumed now
pRes->Card = p1.Card * p2.Card *
((double) returned /
(double) (readStream1 * readStream2));
} else {
pRes->Card = p1.Card * p2.Card * qp->GetSelectivity(supplier);
}
// % of tuples in stream 1 are written to disk
double t1d = percentTupleOnDisk(p1.Card, p1.Size, maxmem / 2);
// % of tuples in stream 2 are written to disk
double t2d = percentTupleOnDisk(p2.Card, p2.Size, maxmem / 2);
// Tuples in Buffer 1 or Buffer 2 are written to disk?
double wtd1 = 0;
double wtd2 = 0;
if(t1d > 0) {
wtd1 = 1;
}
if(t2d > 0) {
wtd2 = 1;
}
pRes->Time = p1.Time + p2.Time
+ p1.Card * p2.Card * predCost * uSymmJoin
+ pRes->Card * tattr * (p1.noAttrs + p2.noAttrs)
+ wtd1 * p1.Size * p1.Card * twrite
+ wtd2 * p2.Size * p2.Card * twrite
+ t1d * p1.Size * p1.Card * tread * p2.Card
+ t2d * p2.Size * p2.Card * tread * p1.Card;
if(stream1Exhausted && stream2Exhausted) {
pRes->Progress = 1.0;
} else {
pRes->Progress =
(p1.Progress * p1.Time + p2.Progress * p2.Time
+ readStream1 * readStream2 * predCost * uSymmJoin
+ returned * tattr * (p1.noAttrs + p2.noAttrs)
+ tupleBuffer1OnDisk * p1.Size * p1.Card * twrite
+ tupleBuffer2OnDisk * p2.Size * p2.Card * twrite
+ t1d * p1.Size * p1.Card * tread * p2.Card * p2.Progress
+ t2d * p2.Size * p2.Card * tread * p1.Card * p1.Progress
) / pRes->Time;
/* Debug
cout << "r1 " << readStream1 << endl;
cout << "r2 " << readStream2 << endl;
cout << "wtd1 " << wtd1 << endl;
cout << "t1d " << t1d << endl;
cout << "wtd2 " << wtd2 << endl;
cout << "t2d " << t2d << endl;
*/
}
pRes->CopyBlocking(p1, p2); //non-blocking oprator
return YIELD;
}
// default: send cancel
return CANCEL;
}
/*
1.3 getCosts
Returns the estimated time in ms for given arguments.
*/
virtual bool getCosts(const size_t NoTuples1, const size_t sizeOfTuple1,
const size_t noAttributes1,
const size_t NoTuples2, const size_t sizeOfTuple2,
const size_t noAttributes2,
const double selectivity,
const double memoryMB, double &costs) const{
std::cerr << __PRETTY_FUNCTION__ << std::endl
<< "TODO: use of noAttributes and selectivity" << std::endl;
// millisecs per tuple pair (0.2)
static const double uSymmJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"symmjoin", "uSymmJoin");
// millisecs per byte written
static const double twrite =
ProgressConstants::getValue("Global",
"TupleFile", "twrite");
// millisecs per byte read
static const double tread =
ProgressConstants::getValue("Global",
"TupleFile", "tread");
// Tuples in Buffer 1 or Buffer 2 are written to disk?
double wtd1, wtd2;
// % of tuples in stream 1 are written to disk
double t1d = percentTupleOnDisk(NoTuples1, sizeOfTuple1, memoryMB / 2);
// % of tuples in stream 2 are written to disk
double t2d = percentTupleOnDisk(NoTuples2, sizeOfTuple2, memoryMB / 2);
if(t1d > 0) {
wtd1 = 1;
} else {
wtd1 = 0;
}
if(t2d > 0) {
wtd2 = 1;
} else {
wtd2 = 0;
}
costs = NoTuples1 * NoTuples2 * uSymmJoin
+ wtd1 * NoTuples1 * sizeOfTuple1 * twrite
+ t1d * NoTuples1 * sizeOfTuple1 * tread * NoTuples2
+ wtd2 * NoTuples2 * sizeOfTuple2 * twrite
+ t2d * NoTuples2 * sizeOfTuple2 * tread * NoTuples1;
return true;
}
/*
1.4 percecent of Tuple written to disk
*/
double percentTupleOnDisk(const size_t NoTuples,
const size_t sizeOfTuple, const double memoryMB) const {
double tupleInMemory =
1 - (memoryMB / (NoTuples * sizeOfTuple / 1024 * 1024));
if(tupleInMemory < 0) {
tupleInMemory = 0;
}
return tupleInMemory;
}
/*
1.4 Calculate the sufficent memory for this operator.
*/
double calculateSufficientMemory(size_t NoTuples1,
size_t sizeOfTuple1, size_t NoTuples2,
size_t sizeOfTuple2) const {
// Space for placing all tuples in memory
double suffMemory = ceil((NoTuples1 * sizeOfTuple1
+ NoTuples2 * sizeOfTuple2) / (1024 * 1024));
// At least 16 mb are required
return std::max(16.0, suffMemory);
}
/*
1.5 Get Linear Params
Input:
NoTuples1, sizeOfTuple1
NoTuples2, sizeOfTuple2,
Output:
sufficientMemory = sufficientMemory for this operator with the given
input
timeAtSuffMemory = Time for the calculation with sufficientMemory
timeAt16MB - Time for the calculation with 16MB Memory
*/
virtual bool getLinearParams(
const size_t NoTuples1, const size_t sizeOfTuple1,
const size_t noAttributes1,
const size_t NoTuples2, const size_t sizeOfTuple2,
const size_t noAttributes2,
const double selectivity,
double& sufficientMemory, double& timeAtSuffMemory,
double& timeAt16MB ) const {
sufficientMemory=calculateSufficientMemory(NoTuples1, sizeOfTuple1,
NoTuples2, sizeOfTuple2);
getCosts(NoTuples1, sizeOfTuple1, noAttributes1,
NoTuples2, sizeOfTuple2, noAttributes2,
selectivity,
sufficientMemory, timeAtSuffMemory);
getCosts(NoTuples1, sizeOfTuple1,noAttributes1,
NoTuples2, sizeOfTuple2, noAttributes2,
selectivity,
16, timeAt16MB);
return true;
}
/*
1.8 Setter for stream1Exhausted
*/
void setStream1Exhausted(bool exhausted) {
stream1Exhausted = exhausted;
}
/*
1.9 Setter for stream2Exhausted
*/
void setStream2Exhausted(bool exhausted) {
stream2Exhausted = exhausted;
}
/*
1.10 Update processed tuples in stream1
*/
void processedTupleInStream1() {
readStream1++;
}
/*
1.11 Tuple Buffer 1 on disk
*/
void setTupleBuffer1OnDisk(bool onDisk) {
tupleBuffer1OnDisk = onDisk;
}
/*
1.12 Tuple Buffer 2 on disk
*/
void setTupleBuffer2OnDisk(bool onDisk) {
tupleBuffer2OnDisk = onDisk;
}
/*
1.13 Update processed tuples in stream2
*/
void processedTupleInStream2() {
readStream2++;
}
/*
1.14 init our class
*/
virtual void init(Word* args, void* localInfo)
{
returned = 0;
stream1Exhausted = false;
stream2Exhausted = false;
tupleBuffer1OnDisk = false;
tupleBuffer2OnDisk = false;
readStream1 = 0;
readStream2 = 0;
}
private:
ProgressLocalInfo *pli; // Local Progress info
ProgressInfo p1, p2; // Progress info for stream 1 / 2
bool stream1Exhausted; // is stream 1 exhaused?
bool stream2Exhausted; // is stream 2 exhaused?
bool tupleBuffer1OnDisk; // is tuple buffer 1 written to disk?
bool tupleBuffer2OnDisk; // is tuple buffer 2 written to disk?
size_t readStream1; // processed tuple in stream1
size_t readStream2; // processes tuple in stream2
};
/*
1.1 The class ~MergeJoinCostEstimation~ provides cost estimation
capabilities for the operator mergejoin and sortmergejoin\_old
*/
/*
1.1.1 Prototyping
*/
template<bool expectSorted>
class MergeJoinCostEstimation : public CostEstimation
{
public:
MergeJoinCostEstimation()
{
pli = new ProgressLocalInfo();
}
/*
1.2 Free local datastructures
*/
virtual ~MergeJoinCostEstimation() {
if(pli) {
delete pli;
}
}
virtual int requestProgress(Word* args, ProgressInfo* pRes, void* localInfo,
bool argsAvialable) {
// no progress info available => cancel
if(! argsAvialable) {
return CANCEL;
}
if (qp->RequestProgress(args[0].addr, &p1)
&& qp->RequestProgress(args[1].addr, &p2)) {
//millisecs per byte read in sort step (0.00043)
static const double uSortBy =
ProgressConstants::getValue("ExtRelationAlgebra",
"mergejoin", "uSortBy");
//millisecs per byte read in merge step (sortmerge)
// (0.0001738)
static const double uMergeJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"mergejoin", "uMergeJoin");
//millisecs per byte read in merge step (sortmerge)
// (0.0001738)
static const double wMergeJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"mergejoin", "wMergeJoin");
//millisecs per result tuple in merge step (0.0012058)
static const double xMergeJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"mergejoin", "xMergeJoin");
//millisecs per result attribute in merge step
// (0.0001072)
static const double yMergeJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"mergejoin", "yMergeJoin");
pli->SetJoinSizes(p1, p2);
pRes->CopySizes(pli);
double factor = (double) readStream1 / p1.Card;
if ( (qp->GetSelectivity(supplier) != 0.1) &&
( returned > (size_t) enoughSuccessesJoin) ) {
pRes->Card = factor * ((double) returned) * p1.Card
/ ((double) readStream1) +
(1.0 - factor) * p1.Card * p2.Card
* qp->GetSelectivity(supplier);
} else {
if ( returned > (size_t) enoughSuccessesJoin ) {
pRes->Card = ((double) returned) * p1.Card
/ ((double) readStream1);
} else {
pRes->Card = p1.Card * p2.Card * qp->GetSelectivity(supplier);
}
}
if ( expectSorted ) {
pRes->Time = p1.Time + p2.Time +
p1.Card * uMergeJoin + // Stream 1
p2.Card * uMergeJoin + // Stream 2
pRes->Card * (pRes->noAttrs * yMergeJoin);
pRes->Progress =
(p1.Progress * p1.Time + p2.Progress * p2.Time +
readStream1 * uMergeJoin +
readStream2 * uMergeJoin +
returned * (pRes->noAttrs * yMergeJoin))
/ pRes->Time;
//non-blocking in this case
pRes->CopyBlocking(p1, p2);
} else {
pRes->Time =
p1.Time + p2.Time +
p1.Card * p1.Size * uSortBy +
p2.Card * p2.Size * uSortBy +
(p1.Card * p1.Size + p2.Card * p2.Size) * wMergeJoin +
pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin);
pRes->Progress =
(p1.Progress * p1.Time +
p2.Progress * p2.Time +
readFirst * p1.Size * uSortBy +
readSecond * p2.Size * uSortBy +
(((double) readStream1) * p1.Size +
((double) readStream2) * p2.Size) * wMergeJoin +
((double) returned)
* (xMergeJoin + pRes->noAttrs * yMergeJoin))
/ pRes->Time;
pRes->BTime = p1.Time + p2.Time
+ p1.Card * p1.Size * uSortBy
+ p2.Card * p2.Size * uSortBy;
pRes->BProgress =
(p1.Progress * p1.Time + p2.Progress * p2.Time
+ ((double) readFirst) * p1.Size * uSortBy
+ ((double) readSecond) * p2.Size * uSortBy)
/ pRes->BTime;
}
return YIELD;
}
// default: send cancel
return CANCEL;
}
/*
1.3 getCosts
Returns the estimated time in ms for given arguments.
*/
virtual bool getCosts(const size_t NoTuples1, const size_t sizeOfTuple1,
const size_t noAttributes1,
const size_t NoTuples2, const size_t sizeOfTuple2,
const size_t noAttributes2,
const double selectivity,
const double memoryMB, double &costs) const{
std::cerr << __PRETTY_FUNCTION__ << std::endl
<< "TODO: use of noAttributes and selectivity" << std::endl;
// Cost Estimation is only implemented for mergejoin
if ( expectSorted ) {
//millisecs per tuple read in merge step
static const double uMergeJoin =
ProgressConstants::getValue("ExtRelationAlgebra",
"mergejoin", "uMergeJoin");
// costs for merging tuples of stream 1 and stream 2
costs = (NoTuples1 + NoTuples2) * uMergeJoin;
return true;
}
return false;
}
/*
1.5 Get Linear Params
Input:
NoTuples1, sizeOfTuple1
NoTuples2, sizeOfTuple2,
Output:
sufficientMemory = sufficientMemory for this operator with the given
input
timeAtSuffMemory = Time for the calculation with sufficientMemory
timeAt16MB - Time for the calculation with 16MB Memory
*/
virtual bool getLinearParams(
const size_t NoTuples1, const size_t sizeOfTuple1,
const size_t noAttributes1,
const size_t NoTuples2, const size_t sizeOfTuple2,
const size_t noAttributes2,
const double selectivity,
double& sufficientMemory, double& timeAtSuffMemory,
double& timeAt16MB ) const {
// Cost Estimation is only implemented for mergejoin
if ( expectSorted ) {
sufficientMemory = 16;
getCosts(NoTuples1, sizeOfTuple1, noAttributes1,
NoTuples2, sizeOfTuple2, noAttributes2,
selectivity,
sufficientMemory, timeAtSuffMemory);
getCosts(NoTuples1, sizeOfTuple1, noAttributes1,
NoTuples2, sizeOfTuple2, noAttributes2,
selectivity,
16, timeAt16MB);
return true;
}
return false;
}
/*
1.7 Update processed tuples in stream1
*/
void processedTupleInStream1() {
readStream1++;
}
/*
1.8 Update processed tuples in stream2
*/
void processedTupleInStream2() {
readStream2++;
}
/*
1.9 Update processed tuples in readFirst
*/
void processedTupleInReadFirst() {
readFirst++;
}
/*
1.19 Update processed tuples in readSecond
*/
void processedTupleInReadSecond() {
readSecond++;
}
/*
1.11 init our class
*/
virtual void init(Word* args, void* localInfo)
{
returned = 0;
readStream1 = 0;
readStream2 = 0;
readFirst = 0;
readSecond = 0;
}
private:
ProgressLocalInfo *pli; // Local Progress info
ProgressInfo p1, p2; // Progress info for stream 1 / 2
size_t readStream1; // processed tuple in stream1
size_t readStream2; // processes tuple in stream2
size_t readFirst; // processed tuple in sort (stream 1)
size_t readSecond; // processed tuple in sort (stream 2)
};
#endif