Files
secondo/Algebras/PartitionedStream/PartitionedStream.cpp
2026-01-23 17:03:45 +08:00

4235 lines
98 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2004-2008, University in Hagen, Faculty Mathematics and Computer
Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [1] title: [{\Large \bf ] [}]
[1] Partitioned Stream Algebra
January 2006 M. Spiekermann. Framework of the algebra
May 2006 M. Spiekermann. Implementation of ~pjoin2~ and cost functions
June 2006 M. Spiekermann. Corrections for bugs in the cardinality estimation. Implementation
of operator ~pjoin1~.
This algebra implements a type constructor ~ptuple~ which represents ~normal~ tuples
of the relational algebra or ~marker~ tuples which contain information about a bunch
of tuples. Since tuples are normally processed in a stream the markers defines partitions
of the stream.
Operations like ~puse~ and ~pjoin~ are implemented to support adaptive techniques
for query processing.
1 Preliminaries
1.1 Includes and global declarations
*/
#include <assert.h>
#include <iostream>
#include <sstream>
#include <queue>
#include <algorithm>
#include <random>
#undef TRACE_ON
//#define TRACE_ON 1
#include "LogMsg.h"
#include "CharTransform.h"
#include "StopWatch.h"
#include "Algebra.h"
#include "NList.h"
#include "QueryProcessor.h"
#include "SecondoSystem.h"
#include "Attribute.h"
#include "Algebras/Array/FunVector.h"
#include "CostFunction.h"
#include "SystemInfoRel.h"
#include "Environment.h"
#include "Symbols.h"
#include "Stream.h"
#include "ListUtils.h"
/*
Dependencies with other algebras: RelationAlgebra, StandardAlgebra
*/
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "StandardTypes.h"
extern NestedList* nl;
extern QueryProcessor *qp;
using namespace std;
inline int
nextInt(const double d)
{
return static_cast<int>( ceil(d) );
}
inline int
previousInt(const double d)
{
return static_cast<int>( floor(d) );
}
// a system table which stores some internals about
// the adaptive processing
class PJoinTuple : public InfoTuple
{
public:
int id;
string join;
int arg1_est;
int arg1_real;
int arg1_err;
int arg2_est;
int arg2_real;
int arg2_err;
int result_est;
int result_real;
int result_err;
float sel_est;
float sel_real;
int sel_err;
int probe_result;
float probe_seconds;
int probe_arg1;
int probe_arg2;
int probe_cpuOps;
string usedFunction;
PJoinTuple() : join(""), usedFunction("") {
id=0;
arg1_est=0;
arg1_real=0;
arg1_err=0;
arg2_est=0;
arg2_real=0;
arg2_err=0;
result_est=0;
result_real=0;
result_err=0;
sel_est=0;
sel_real=0;
sel_err=0;
probe_result=0;
probe_arg1=0;
probe_arg2=0;
probe_seconds=0.0;
probe_cpuOps=0;
}
virtual ~PJoinTuple() {}
virtual NList valueList() const
{
NList list;
list.makeHead( NList().intAtom(id) );
list.append( NList().stringAtom(join) );
list.append( NList().intAtom(arg1_est) );
list.append( NList().intAtom(arg1_real) );
list.append( NList().intAtom(arg1_err) );
list.append( NList().intAtom(arg2_est) );
list.append( NList().intAtom(arg2_real) );
list.append( NList().intAtom(arg2_err) );
list.append( NList().intAtom(result_est) );
list.append( NList().intAtom(result_real) );
list.append( NList().intAtom(result_err) );
list.append( NList().realAtom(sel_est) );
list.append( NList().realAtom(sel_real) );
list.append( NList().intAtom(sel_err) );
list.append( NList().intAtom(probe_result) );
list.append( NList().realAtom(probe_seconds) );
list.append( NList().intAtom(probe_cpuOps) );
list.append( NList().intAtom(probe_arg1) );
list.append( NList().intAtom(probe_arg2) );
list.append( NList().stringAtom(usedFunction) );
return list;
}
virtual ostream& print(ostream& os) const
{
os << id << sep
<< join << sep
<< result_est << endl;
return os;
}
};
class CostTuple : public InfoTuple
{
public:
int id;
int param_arg1_card;
int param_arg2_card;
int param_res_card;
int param_arg1_pages;
int param_arg2_pages;
SEC_STD_REAL param_join_sel;
string cost_name;
int cost_write;
int cost_read;
int cost_cpu;
SEC_STD_REAL cost_value;
int real_write;
int real_read;
int real_cpu;
SEC_STD_REAL real_runtime;
CostTuple() {
id=0;
param_arg1_card = 0;
param_arg2_card = 0;
param_res_card = 0;
param_arg1_pages = 0;
param_arg2_pages = 0;
param_join_sel = 0.0;
cost_name = "";
cost_write = 0;
cost_read = 0;
cost_cpu = 0;
cost_value = 0;
real_write = 0;
real_read = 0;
real_cpu = 0;
real_runtime = 0;
}
CostTuple(const CostParams& cp, const CostResult& cr) {
id=0;
param_arg1_card = cp.cardA;
param_arg2_card = cp.cardB;
param_res_card = 0;
param_arg1_pages = cp.pagesA;
param_arg2_pages = cp.pagesB;
param_join_sel = cp.sel;
cost_name = "";
cost_write = cr.write;
cost_read = cr.read;
cost_cpu = cr.cpu;
cost_value = cr.value;
real_write = 0;
real_read = 0;
real_cpu = 0;
real_runtime = 0;
}
virtual ~CostTuple() {}
virtual NList valueList() const
{
NList list;
list.makeHead( NList().intAtom(id) );
list.append( NList().intAtom(param_arg1_card) );
list.append( NList().intAtom(param_arg2_card) );
list.append( NList().intAtom(param_res_card) );
list.append( NList().intAtom(param_arg1_pages) );
list.append( NList().intAtom(param_arg2_pages) );
list.append( NList().realAtom(param_join_sel) );
list.append( NList().stringAtom(cost_name) );
list.append( NList().intAtom(cost_write) );
list.append( NList().intAtom(cost_read) );
list.append( NList().intAtom(cost_cpu) );
list.append( NList().realAtom(cost_value) );
list.append( NList().intAtom(real_write) );
list.append( NList().intAtom(real_read) );
list.append( NList().intAtom(real_cpu) );
list.append( NList().realAtom(real_runtime) );
return list;
}
virtual ostream& print(ostream& os) const
{
os << id << sep
<< cost_name << sep
<< cost_value << endl;
return os;
}
};
class PJoinRel : public SystemInfoRel
{
public:
PJoinRel(const string& name) : SystemInfoRel(name)
{}
virtual ~PJoinRel() {}
virtual void initSchema()
{
addAttribute("Id", CcInt::BasicType() );
addAttribute("Join", CcString::BasicType() );
addAttribute("Arg1_guess", CcInt::BasicType() );
addAttribute("Arg1_value", CcInt::BasicType() );
addAttribute("Arg1_error", CcInt::BasicType() );
addAttribute("Arg2_guess", CcInt::BasicType() );
addAttribute("Arg2_value", CcInt::BasicType() );
addAttribute("Arg2_error", CcInt::BasicType() );
addAttribute("Result_guess", CcInt::BasicType() );
addAttribute("Result_value", CcInt::BasicType() );
addAttribute("Result_error", CcInt::BasicType() );
addAttribute("Sel_guess", CcReal::BasicType() );
addAttribute("Sel_value", CcReal::BasicType() );
addAttribute("Sel_error", CcInt::BasicType() );
addAttribute("Probe_Result", CcInt::BasicType() );
addAttribute("Probe_Seconds", CcReal::BasicType() );
addAttribute("Probe_CPU_Ops", CcInt::BasicType() );
addAttribute("Probe_Arg1", CcInt::BasicType() );
addAttribute("Probe_Arg2", CcInt::BasicType() );
addAttribute("Used_Function", CcString::BasicType() );
}
};
class CostRel : public SystemInfoRel
{
public:
CostRel(const string& name) : SystemInfoRel(name)
{}
virtual ~CostRel() {}
virtual void initSchema()
{
addAttribute("Id", CcInt::BasicType() );
addAttribute("Param_arg1_card", CcInt::BasicType() );
addAttribute("Param_arg2_card", CcInt::BasicType() );
addAttribute("Param_res_card", CcInt::BasicType() );
addAttribute("Param_arg1_pages", CcInt::BasicType() );
addAttribute("Param_arg2_pages", CcInt::BasicType() );
addAttribute("Param_join_sel", CcReal::BasicType() );
addAttribute("Cost_name", CcString::BasicType() );
addAttribute("Cost_write", CcInt::BasicType() );
addAttribute("Cost_read", CcInt::BasicType() );
addAttribute("Cost_cpu", CcInt::BasicType() );
addAttribute("Cost_value", CcReal::BasicType() );
addAttribute("Real_write", CcInt::BasicType() );
addAttribute("Real_read", CcInt::BasicType() );
addAttribute("Real_cpu", CcInt::BasicType() );
addAttribute("Real_runtime", CcReal::BasicType() );
}
};
// the system relation instance pointer
PJoinRel* pjoinRel = 0;
CostRel* costRel = 0;
// extern InObject InRel;
ostream& operator<<(ostream& os, const CostResult& c)
{
return c.print(os);
}
ostream& operator<<(ostream& os, const CostInfo& c)
{
return c.print(os);
}
ostream& operator<<(ostream& os, const CostParams& c)
{
return c.print(os);
}
/*
2 Data structures
*/
struct Marker
{
int num; // sequence number
int parts; // expected total number of partitions
int tuples; // (requested) partition size
Marker() : num(0), parts(0), tuples(0) {}
Marker(const int n, const int p, const int t) :
num(n), parts(p), tuples(t)
{}
inline void print(ostream& os) const
{
os << "(" << num << ", " << parts << ", [" << tuples << "])";
}
};
ostream& operator<<(ostream& os, const Marker& m)
{
m.print(os);
return os;
}
struct PTuple
{
const Marker* marker;
Tuple* tuple;
PTuple() : marker(0), tuple(0) {}
PTuple(const Marker* m) : marker(m), tuple(0) {}
PTuple(Tuple* t) : marker(0), tuple(t) {}
~PTuple()
{
if (marker)
delete marker;
}
static const string BasicType() {return "ptuple";}
};
struct PartCtr
{
int tuples; // number of tuples per partion
int parts; // number of partitions
int num;
mutable int ctr;
PartCtr() : tuples(0), parts(0), num(0), ctr(0) {}
PartCtr(const int pSize, const int card)
{
init(pSize, card);
}
void init(const int pSize, const int card)
{
num = 0;
ctr = 0;
adjustPartSize(pSize);
computeParts(card);
assert(tuples > 0);
}
void adjustPartSize(const int p) {
tuples = max(abs(p), 2);
}
void computeParts(const int card) {
parts = card /tuples;
}
/*
After construction we have ctr == 0, hence the first call
will return true. After ~tuples~ calls the counter will be set
to ~tuples~.
*/
inline bool resetIfNeeded()
{
if (ctr != 0) {
ctr--;
return false;
}
ctr = tuples;
return true;
}
inline Marker* newMarker()
{
assert(ctr == tuples);
num++;
return new Marker( num-1, parts, tuples );
}
};
/*
Below we define a class which helps to maintain stream in value
mappings.
*/
template<class T>
struct StreamBase
{
typedef enum {opened, finished, closed} StreamState;
private:
const T& child() const {
return static_cast<const T&>( *this );
}
public:
mutable StreamState state;
inline void open() {
return child().open();
}
inline void close() {
return child().close();
}
inline void* getNext() const
{
return child().getNext();
}
StreamBase() : state(closed) {}
StreamBase(const StreamBase& rhs) : state(rhs.state) {}
};
struct StreamOpAddr : public StreamBase<StreamOpAddr>
{
public:
void* stream;
StreamOpAddr( ) : StreamBase<StreamOpAddr>() {}
StreamOpAddr( Supplier s ) : stream(s) {}
StreamOpAddr( const StreamOpAddr& rhs ) :
StreamBase<StreamOpAddr>(rhs),
stream(rhs.stream)
{}
inline void open()
{
if (state == closed) {
TRACE("StreamOpAddr()::open");
qp->Open(stream);
state = opened;
}
}
inline void close()
{
if (state != closed) {
TRACE("StreamOpAddr()::close");
qp->Close(stream);
state = closed;
}
}
/*
The next function gets a tuple of the input stream and returns
the tuple or a marker. At the first call a marker will be produced
This ensures that operators stream-downwards will always know the
requested partsize. Afterwards when the amount of ~tuples~ requests are made
a marker will be injected.
*/
inline void* getNext() const
{
static Word element;
if (state != finished) {
qp->Request(stream, element);
if( qp->Received(stream) ) {
return element.addr;
}
else {
state = finished;
return 0;
}
}
return 0;
}
};
/*
The class below can take a relation but can be handled like a stream
of PTuple
*/
struct RelationAddr : public StreamBase<RelationAddr>
{
private:
GenericRelation* rel;
GenericRelationIterator* rit;
mutable PartCtr parts;
public:
RelationAddr( GenericRelation* r, const int tuples = 100 ) : rel(r), rit(0)
{
int card = r->GetNoTuples();
parts.init(tuples, card);
}
~RelationAddr() {}
inline void open()
{
if (state == closed) {
TRACE("RelationAddr()::open")
rit = rel->MakeScan();
state = opened;
}
}
inline void close()
{
if (state != closed) {
TRACE("RelationAddr()::close")
delete rit;
rit = 0;
state = closed;
}
}
inline void* getNext() const
{
if ( parts.resetIfNeeded() )
{
return new PTuple( parts.newMarker() );
}
else
{
Tuple* t = rit->GetNextTuple();
if (t) {
return new PTuple(t);
} else {
state = finished;
return 0;
}
}
return 0;
}
};
/*
3 The Type Constructor ~ptuple~
Since type constructor ~ptuple~ will not be used to construct values from a
list representation it should never be called. Moreover, the ~create~, ~open~
~close~, ~clone~, etc. functions are useless since persistence of ~ptuple~
needs not to be supported.
3.1 Creation of the Type Constructor Instance
Many template functions are used for generic implementations. However,
most of them are not needed and should never be called. Defining ~open~
and ~save~ functions is only needed when the datatype should be used as
attribute type in relations. In and out functions are only interesting if
the object needs to be converted to or from a textual representation.
Data types like indexes don't need this.
*/
/*
4.1 Type mapping and value mapping functions of operator ~pfeed~
This operator makes the following type mapping
----
((rel(tuple(y))) int) -> (stream(ptuple(y)))
----
and creates a marker tuple after a number of tuples which is
specified in the second argument has been created.
*/
struct PartStreamMappings {
static Symbols sym;
PartStreamMappings() {}
template<class T>
inline static T* nextOfStream(const Word& w)
{
Word wTuple = SetWord(Address(0));
qp->Request(w.addr, wTuple);
if( qp->Received(w.addr) )
return static_cast<T*>( wTuple.addr );
else
return 0;
}
template<class T>
inline static T* nextOfStream2(const Supplier s)
{
Word wTuple = SetWord(Address(0));
qp->Request(s, wTuple);
if( qp->Received(s) )
return static_cast<T*>( wTuple.addr );
else
return 0;
}
inline static Tuple* nextTuple(const Word& w)
{
return nextOfStream<Tuple>(w);
}
inline static PTuple* nextPTuple(const Word& w)
{
return nextOfStream<PTuple>(w);
}
inline static Tuple* nextTuple(const Supplier s)
{
return nextOfStream2<Tuple>(s);
}
inline static PTuple* nextPTuple(const Supplier s)
{
return nextOfStream2<PTuple>(s);
}
template<class T>
inline static T* getArg(const Word& w)
{
return static_cast<T*>( w.addr );
}
inline static string expects( const string& s1,
const string& s2,
const string attr="..." )
{
return "(" + s1 + "(" + s2 + "(" + attr + ")))";
}
/*
The functiion ~checkMap~ tests if a list represents a map of
~n~ arguments. It stores the ~n~ arguments and the result type
in the given vector reference. This can be used for subsequent
integrity checks for the arguments and result type.
*/
static bool checkMap(const NList& l, Cardinal n, vector<NList>& sig)
{
Cardinal len = l.length();
if ( len != (n+2) )
return false;
if ( !l.first().isSymbol( Symbol::MAP() ) )
return false;
sig.resize(n+1);
for(Cardinal i = 0; i <= n; i++)
{
sig[i] = l.elem(2+i);
}
return true;
}
static bool checkRelTuple(const NList& l, NList& attrs)
{
return checkDepth3(l, Relation::BasicType(), Tuple::BasicType(), attrs);
}
static bool checkStreamTuple(const NList& l, NList& attrs)
{
return checkDepth3(l, Symbol::STREAM(), Tuple::BasicType(), attrs);
}
static bool checkStreamPTuple(const NList& l, NList& attrs)
{
return checkDepth3(l, Symbol::STREAM(), PTuple::BasicType(), attrs);
}
static NList makeStreamTuple(const NList& attrs)
{
NList tup( NList(Tuple::BasicType()), attrs );
return NList( NList(Symbol::STREAM()), tup );
}
static NList makeStreamPTuple(const NList& attrs)
{
NList tup( NList(PTuple::BasicType()), attrs );
return NList( NList(Symbol::STREAM()), tup );
}
static bool checkLength(const NList& l, const int len, string& err)
{
if ( !l.hasLength(len) )
{
stringstream s;
s << "List length unequal " << len << ", " << err;
err = s.str();
return false;
}
return true;
}
static string argNotCorrect( const int n)
{
stringstream s;
s << "Type of argument no. " << n << " not correct, ";
return s.str();
}
static bool checkDepth3( const NList& l, const string& s1,
const string& s2, NList& attrs )
{
if ( !l.hasLength(2) )
return false;
if ( !l.first().isSymbol(s1) )
return false;
NList s = l.second();
if ( !s.hasLength(2) )
return false;
if ( !s.first().isSymbol(s2) )
return false;
if ( !s.second().isList() )
return false;
attrs = s.second();
return true;
}
static ListExpr pfeed_tm(ListExpr args)
{
NList l(args);
string e1 = expects(Relation::BasicType(),Tuple::BasicType());
static const string err1 = "pfeed expects (" + e1 + " "
+ CcInt::BasicType() + ")!";
if ( !l.hasLength(2) )
return l.typeError(err1);
NList attrs;
if ( !checkRelTuple( l.first(), attrs ) )
return l.typeError("First list element should be " + e1 + ".");
if ( !l.second().isSymbol( CcInt::BasicType() ) )
return l.typeError( "Second list element should be symbol '"
+ CcInt::BasicType() + "'." );
return makeStreamPTuple(attrs).listExpr();
}
struct PartStreamInfo
{
RelationAddr relStream;
PartStreamInfo( GenericRelation* r, const int partSize ) :
relStream(r, partSize)
{
relStream.open();
}
~PartStreamInfo()
{
relStream.close();
}
inline PTuple* nextPTuple() {
return static_cast<PTuple*>( relStream.getNext() );
}
};
struct BufferedStreamInfo {
private:
TupleBuffer* tupBuf;
PartStreamInfo* pi;
public:
BufferedStreamInfo(Word& stream, const int pSize) : pi(0)
{
qp->Open(stream.addr);
tupBuf = new TupleBuffer();
Tuple *t = nextTuple( stream );
while (t != 0)
{
tupBuf->AppendTuple(t);
t = nextTuple( stream );
}
qp->Close(stream.addr);
GenericRelation* r = static_cast<GenericRelation*>( tupBuf );
pi = new PartStreamInfo(r, pSize);
}
inline PTuple* nextPTuple() {
return pi->nextPTuple();
}
~BufferedStreamInfo()
{
delete tupBuf;
delete pi;
}
};
static int
pfeed_vm(Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input Relation
// args[1]: An integer defining the partition size
static const string pre = "pfeed: ";
PartStreamInfo* info = static_cast<PartStreamInfo*>( local.addr );
switch (message)
{
case OPEN :
{
GenericRelation* r = getArg<GenericRelation>( args[0] );
int partSize = StdTypes::GetInt( args[1] );
//SHOW(partSize)
//SHOW(parts)
info = new PartStreamInfo( r, partSize );
local.addr = info;
return 0;
}
case REQUEST :
{
// the nextPTuple function returns a marker or a tuple
result.addr = info->nextPTuple();
if ( result.addr )
{
return YIELD;
}
else
{
return CANCEL;
}
}
case CLOSE :
{
//TRACE(pre << "CLOSE received!")
delete info;
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
/*
4.2 Operator ~pdelete~
This operator does the following type mapping:
----
(stream(ptuple(y))) -> (stream(tuple(y)))
----
the value mapping removes all marker tuples fomr the input stream
*/
static ListExpr pdelete_tm(ListExpr args)
{
NList l(args);
string e1 = expects( Symbol::STREAM(), PTuple::BasicType() );
static const string err1 = "pdelete expects " + e1 + "!";
if ( !l.hasLength(1) )
return l.typeError(err1);
NList attrs;
if ( !checkStreamPTuple( l.first(), attrs ) )
return l.typeError(err1);
return makeStreamTuple(attrs).listExpr();
}
static int pdelete_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y)))
static const string pre = "pdelete: ";
switch (message)
{
case OPEN :
{
qp->Open( args[0].addr );
return 0;
}
case REQUEST :
{
bool endOfStream = false;
do
{
PTuple *pt = nextPTuple( args[0] );
if (pt)
{
if (pt->marker) // marker tuple detected
{
//TRACE( pre << "Marker: " << *(pt->marker) )
delete pt;
}
else // a normal tuple
{
//TRACE( pre << "Tuple: " << *(pt->tuple) )
result.addr = pt->tuple;
delete pt;
return YIELD;
}
}
else
{
endOfStream = true;
}
} while ( !endOfStream );
return CANCEL;
}
case CLOSE :
{
//TRACE(pre << "CLOSE received")
qp->Close( args[0].addr );
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
/*
4.2 Operator ~pshow~
This operator does the following type mapping:
----
(stream(ptuple(y))) -> (stream(ptuple(y)))
----
the value mapping prints out all markers of the input stream.
*/
static ListExpr pshow_tm(ListExpr args)
{
NList l(args);
string e1 = expects( Symbol::STREAM(), PTuple::BasicType() );
static const string err1 = "pshow expects " + e1 + "!";
if ( !l.hasLength(1) )
return l.typeError(err1);
NList attrs;
if ( !checkStreamPTuple( l.first(), attrs ) )
return l.typeError(err1);
return l.first().listExpr();
}
static int pshow_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y)))
static const string pre = "pshow: ";
static const bool showall = RTFlag::isActive("pshow:all");
switch (message)
{
case OPEN :
{
qp->Open( args[0].addr );
return 0;
}
case REQUEST :
{
PTuple *pt = nextPTuple( args[0] );
if (pt)
{
if (pt->marker) // marker tuple detected
{
cerr << *(pt->marker) << endl;
}
else {
if (showall)
cerr << static_cast<void*>( pt->tuple ) << endl;
}
result.addr = pt;
return YIELD;
}
else
{
result.addr = 0;
return CANCEL;
}
}
case CLOSE :
{
qp->Close( args[0].addr );
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
/*
4.3 Type Mapping Operators ~PSTREAM1~ and ~PSTREAM2~
Thesr operators do the following type mappings:
----
(stream(ptuple(y)) ... ) -> (stream(tuple(y)))
((...) (stream(ptuple(y))) ... ) -> (stream(tuple(y)))
----
*/
static ListExpr PSTREAM1_tm(ListExpr args)
{
NList l(args);
//TRACE("PSTREAM1_tm:" << l.convertToString())
string e1 = expects( Symbol::STREAM(), PTuple::BasicType() );
static const string err1 = "PSTREAM1 expects (" + e1 + "...)!";
if ( !(l.length() >= 1) )
return l.typeError(err1);
NList attrs;
if ( !checkStreamPTuple( l.first(), attrs ) )
return l.typeError(err1);
return makeStreamTuple(attrs).listExpr();
}
static ListExpr PSTREAM2_tm(ListExpr args)
{
NList l(args);
string e1 = expects( Symbol::STREAM(), PTuple::BasicType() );
static const string err1 = "PSTREAM2 expects ((...) " + e1 + "...)!";
if ( !(l.length() >= 2) )
return l.typeError(err1);
NList attrs;
if ( !checkStreamPTuple( l.second(), attrs) )
return l.typeError(err1);
return makeStreamTuple(attrs).listExpr();
}
/*
4.3 Operator ~puse~
This operator does the following type mapping:
----
(stream(ptuple(y)) (map (stream(tuple(y))) (stream(tuple(z)))) )
-> (stream(ptuple(z)))
----
The value mapping passes over all marker tuples from
the input stream to the output stream. Therefore it uses internally a
marker queue which stores incoming markers until the first tuple of output
is produced. This is needed since calling function qp->Request() with the
parameter functions node of the query tree will block until a first tuple is
returned, but meanwhile there could be received some markers from the input
stream which need to be filtered out by the ~puse~ operator.
*/
static ListExpr puse_tm(ListExpr args)
{
NList l(args);
static const string e1 = expects( Symbol::STREAM(), PTuple::BasicType() );
static string err1 = "Expecting input (" + e1 + ")!";
if ( !checkLength( l, 2, err1 ) )
return l.typeError( err1 );
NList attrs;
if ( !checkStreamPTuple( l.first(), attrs) )
return l.typeError(err1);
// Test the parameter function's signature
static const string err2 = "Expecting as second argument a function "
"(map (stream(tuple(y))) (stream(tuple(z))))";
vector<NList> sig;
if ( !checkMap( l.second(), 1, sig ) )
return l.typeError( argNotCorrect(2) + err2);
NList attrs2;
if ( !checkStreamTuple(sig[0], attrs2) )
return l.typeError( "First argument of parameter function not correct!\n"
"Received " + sig[0].convertToString() + "." );
if ( !(attrs == attrs2) )
return l.typeError( "Tuple types do not match!\n"
"Received " + attrs2.convertToString() + "." );
if ( !checkStreamTuple(sig[1], attrs2) )
return l.typeError( "Result type of parameter function not correct!\n"
"Received " + sig[1].convertToString() + "." );
return makeStreamPTuple(attrs2).listExpr();
}
struct MarkerQueue {
MarkerQueue() : max(0), endOfStream(false) {}
~MarkerQueue()
{
assert( q.empty() );
//cout << "Max. queue size: " << max << endl;
}
inline const Marker* removeFront()
{
const Marker* m = q.front();
q.pop();
//TRACE( "puse: remove Marker: " << *m )
return m;
}
inline bool empty() const { return q.empty(); }
inline void push(const Marker* m)
{
q.push(m);
if (q.size() > max)
max = q.size();
}
inline Tuple* getNextTuple(Word& fun) {
if (endOfStream)
return 0;
Tuple* t = nextTuple(fun);
if (!t)
endOfStream = true;
return t;
};
queue< const Marker* > q;
size_t max;
bool endOfStream;
};
static int puse_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y)))
// args[1]: map stream(tuple(y))) -> stream(tuple(z))
static const string pre("puse: ");
//Word funresult;
ArgVectorPointer funargs;
Word inStream = args[0];
Word fun = args[1];
MarkerQueue* m = static_cast<MarkerQueue*>( local.addr );
switch ( message )
{
case OPEN: {
//TRACE(pre << "Open received")
// retrieve argument vector
funargs = qp->Argument(fun.addr);
// store the supplier of this operator at the last argument
// position. This indicates that the argument is a stream.
(*funargs)[MAXARG-1] = SetWord(s);
// initalize local information. The first tuple of a nonempty
// ptuple stream must be a marker tuple.
m = new MarkerQueue();
local.addr = m;
// The parameter function's return type is a stream
// hence we need to open it
qp->Open(fun.addr);
//TRACE("param function opened!")
return 0;
}
case REQUEST: {
//TRACE(pre << "Request received")
bool ok = false;
do {
if ( !m->empty() ) // is a marker present?
{
result.addr = new PTuple( m->removeFront() );
ok = true;
break;
}
else
{
// return elements computed by the param function
Tuple* t = m->getNextTuple(fun);
// a side effect of nextTuple(fun) could be that new markers
// are pushed to the marker queue
if (t) // handle tuple pointer
{
result.addr = new PTuple(t);
ok = true;
break;
}
else
{
result.addr=0;
ok = false;
}
}
} while ( !m->empty() );
if (ok) {
assert(result.addr);
return YIELD;
}
else {
return CANCEL;
}
}
case FUNMSG+OPEN: { // just open the input stream
//TRACE(pre << "Message FUNMSG+OPEN received")
qp->Open(inStream.addr);
return 0;
}
/*
The message below will map a ptuple of the input stream to a ~normal~ tuple
to be evaluated by the parameter function. If a marker is
*/
case FUNMSG+REQUEST: {
//TRACE(pre << "Message FUNMSG+REQUEST received")
bool endOfStream = false;
do
{
PTuple* pt = nextPTuple(inStream);
if (pt)
{
if (pt->tuple) // return tuple to parameter function
{
result = SetWord( pt->tuple );
return YIELD;
}
else // save marker in queue
{
//TRACE( pre << "save Marker: " << *(pt->marker) )
m->push( pt->marker );
}
// don't delete pt, only operators consuming streams of PTuple
// should do it.
}
else
{
endOfStream=true;
}
}
while (!endOfStream);
return CANCEL;
}
case FUNMSG+CLOSE: {
// This message must be ignored since we will send a CLOSE message
// to our input stream when requested by our parent node
TRACE(pre << "Message FUNMSG+CLOSE received")
return 0;
}
case CLOSE: {
//TRACE(pre << "Message CLOSE received")
// close the input stream
qp->Close(inStream.addr);
// send a close message to the parameter fun in order that it can be
// propagated to its childs.
qp->Close(fun.addr);
delete m;
local.addr=0;
return 0;
}
default: {
cerr << pre << "Cannot handle message " << message << endl;
}
}
return 0;
}
/*
4.3 Operator ~pjoin2~
This operator does the following type mapping:
----
( stream(ptuple(y1)) stream(ptuple(y2))
( ( map (stream(tuple(y1))) (stream(tuple(y2))) (stream(tuple(z))) )
... N repeats ... )
)
-> (stream(ptuple(z)))
----
During type mapping the names of the function identifier are appended as string
arguments. The names must correspond to a join method. For each join method
a cost function is present and will be used for determining the best function.
The value mapping is organized in two phases:
1 Probe phase: First some input is used to do a probe join. This will give
estimations about the input and output sizes.
2 Eval phase: The cost functions are used to determine the algorithm which
computes the join for the whole inputs. Therfore the buffered inputs must be
streamed again.
The implementation of the first phase is done partly in the OPEN and
FUNMSG+REQUEST messages. In the case of a REQUEST message the operator will return
a tuple computed by the chosen evaluation function.
After the first phase a marker tuple with a first size estimation will be returned.
Since it may happen that the output is much smaller than the inputs we need to maintain
a marker queue as in the implementation of the ~puse~ operator.
*/
static ListExpr pjoin2_tm(ListExpr args)
{
NList l(args);
static const string e1 = expects( Symbol::STREAM(), PTuple::BasicType(),"y1");
static const string e2 = expects( Symbol::STREAM(), PTuple::BasicType(),"y2");
static string err1 = "Expecting input ( "
+ e1 + " " + e2
+ " (list of join-expressions) ";
if ( !checkLength( l, 3, err1 ) )
return l.typeError( err1 );
NList attrs1;
if ( !checkStreamPTuple( l.first(), attrs1) )
return l.typeError( argNotCorrect(1) + err1);
NList attrs2;
if ( !checkStreamPTuple( l.second(), attrs2) )
return l.typeError( argNotCorrect(2) + err1);
//if ( l.third().str() != Symbols::CcString::BasicType() )
// return l.typeError( argNotCorrect(3) + err1);
// Test the parameter function's signature
static const string
err2 = "Expecting as third argument a list of functions of type "
"(map (stream(tuple(y1))) (stream(tuple(y1))) (stream(tuple(z))))";
NList joinMaps = l.third();
NList lastResultAttrs;
NList fNames;
int joinMapsLength = joinMaps.length();
if (joinMapsLength < 0)
return l.typeError( "Empty list of functions! " + argNotCorrect(4) + err2);
for (int i=1; i < (joinMapsLength+1); i++)
{
vector<NList> sig;
NList funDesc = joinMaps.elem(i);
NList fName = funDesc.first();
if ( !fName.isSymbol() )
return l.typeError( "symbol atom for function name expected!" );
string funcStr = fName.str();
string pre = "Function " + funcStr + ": ";
if ( !checkMap( funDesc.second(), 2, sig ) )
return l.typeError( argNotCorrect(3) + err2);
NList leftAttrs;
if ( !checkStreamTuple(sig[0], leftAttrs) )
return l.typeError( pre + "First argument not correct!\n"
"Received " + sig[0].convertToString() + "." );
NList rightAttrs;
if ( !checkStreamTuple(sig[1], rightAttrs) )
return l.typeError( pre + "Second argument not correct!\n"
"Received " + sig[1].convertToString() + "." );
NList resultAttrs;
if ( !checkStreamTuple(sig[2], resultAttrs) )
return l.typeError( pre + "Result type not correct!\n"
"Received " + sig[2].convertToString() + "." );
if ( i==1 ) // first time define reference mapping
{
lastResultAttrs = resultAttrs;
fNames.makeHead( fName.toStringAtom() );
}
else // compare with previous mapping
{
fNames.append( fName.toStringAtom() );
if ( !(attrs1 == leftAttrs) )
return l.typeError( pre +
"Tuple type of the first arg. does not match! \n"
"Received " + leftAttrs.convertToString() + "." );
if ( !(attrs2 == rightAttrs) )
return l.typeError( pre +
"Tuple type of the second arg. does not match! \n"
"Received " + rightAttrs.convertToString() + "." );
if ( !(lastResultAttrs == resultAttrs) )
return l.typeError( pre +
"Tuple type of the result does not match! \n"
"Received " + resultAttrs.convertToString()
+ ". But the result type of the "
+ "previous function was "
+ lastResultAttrs.convertToString() );
}
}
NList appendSym(Symbol::APPEND());
NList resultType(appendSym, fNames, makeStreamPTuple(lastResultAttrs));
return resultType.listExpr();
}
/*
Class ~PTupleBuffer~ captures the first part of a stream of tuples
into a tuple buffer and allows to us it again.
*/
template<class T>
struct PTupleBuffer
{
private:
const string pre;
TupleBuffer* buf;
GenericRelationIterator* rit;
StreamBase<T>& is;
bool scanBuffer;
bool bufferOnly;
bool endReached;
bool markerRead;
size_t maxSize;
Marker lastMarker;
int tuplesCurrentPart;
int tuplesCompleteParts;
int requestedTuples;
public:
PTupleBuffer(const string& id, StreamBase<T>& stream, const int size ) :
pre(id+": "),
rit(0),
is(stream),
scanBuffer(false),
bufferOnly(true),
endReached(false),
markerRead(false),
maxSize(size),
tuplesCurrentPart(0),
tuplesCompleteParts(0),
requestedTuples(0)
{
buf = new TupleBuffer(size);
}
~PTupleBuffer()
{
showInfo();
delete buf;
}
void showInfo()
{
TRACE(pre << "Stored tuples " << getNoTuples())
SHOW(tuplesCompleteParts)
SHOW(tuplesCurrentPart)
}
TupleBuffer* getBufPtr() {
return buf;
}
int getNoTuples(bool useCtrs = false, int ctrNum = 0) {
if (!useCtrs)
return buf->GetNoTuples();
return qp->GetCounter(ctrNum+1);
}
int getNoTuplesOfCompleteParts() {
return tuplesCompleteParts;
}
int getRequestedTuples() {
return tuplesCurrentPart + tuplesCompleteParts;
}
// used by pjoin1 to correct the number of received tuples
void setRequestedTuples(const int num) {
tuplesCurrentPart = 0;
tuplesCompleteParts = num;
}
int getTotalSize(bool useCtrs=false, int ctrNum = 0)
{
if (!useCtrs)
return nextInt( buf->GetTotalSize() );
double f = (1.0 * qp->GetCounter(ctrNum+1)) / qp->GetCounter(ctrNum);
return nextInt(f * buf->GetTotalSize());
}
inline bool end() const {
return endReached;
}
void setLastMarker(const Marker& m) {
lastMarker = m;
}
const Marker& getLastMarker() {
assert(markerRead);
return lastMarker;
}
/*
The ~getNext~ function will remove marker tuples and stores the tuples in
th ~TupleBuffer~ until a memory overflow is reached.
*/
inline bool storeNextTuple() {
// get next ptuple from input
PTuple* pt = (PTuple*) is.getNext();
if (pt)
{
if (pt->tuple) // save tuple
{
TRACE( pre << "Tuple received!" )
buf->AppendTuple(pt->tuple);
tuplesCurrentPart++;
return true;
}
else // store last marker information
{
TRACE( pre << "Marker: " << *(pt->marker) )
lastMarker = *(pt->marker);
// delete the marker, since the join operator
// will create new ones.
// delete pt;
tuplesCompleteParts += tuplesCurrentPart;
tuplesCurrentPart = 0;
markerRead=true;
return true;
}
}
//cerr << pre << ": End Reached!" << endl;
endReached = true;
return false;
}
bool getNextTuple(Word& result)
{
// use the internal Buffer if requested
if (scanBuffer)
{
result.addr = rit->GetNextTuple();
if (!result.addr) {
TRACE(pre << "Buffer usage finished!")
scanBuffer = false;
// release stored tuples
//buf->Clear();
if (bufferOnly)
return false;
else
return getNextTuple(result);
}
return true;
}
else
{
if (!endReached)
{
do {
// get next ptuple from input
PTuple* pt = (PTuple*) is.getNext();
if (pt)
{
if (pt->tuple) // return tuple to parameter function
{
tuplesCurrentPart++;
result.addr = pt->tuple;
return true;
}
else // store last marker information
{
//TRACE( pre << "Marker: " << *(pt->marker) )
lastMarker = *(pt->marker);
// delete the marker, since the join operator
// will create new ones.
// delete pt;
tuplesCompleteParts += tuplesCurrentPart;
tuplesCurrentPart = 0;
}
}
else
{
TRACE(pre << "End of input stream")
// indicate end of stream
endReached = true;
result.addr = 0;
}
} while (!endReached);
}
}
return false;
}
/*
The function ~estimateInputCard~ uses the last read marker Information and
the number of read tuples (only complete partitions)
to estimate the cardinality of the input stream
*/
int getTuplesOfCompleteParts()
{
return max(tuplesCompleteParts, tuplesCurrentPart);
}
inline int readPartitions() { return lastMarker.num; }
inline int partSize() { return lastMarker.tuples; }
int maxInputCard() {
assert(markerRead);
return max(lastMarker.parts,1) * lastMarker.tuples;
}
int estimateInputCard(const bool useCtrs = false, int ctrNum = 0)
{
TRACE("Input cardinality estimation")
SHOW(useCtrs)
SHOW(ctrNum)
SHOW(lastMarker.num)
SHOW(lastMarker.parts)
// expected tuples
double expected = max(lastMarker.num,1) * lastMarker.tuples;
// received tuples
double received = getTuplesOfCompleteParts();
if (endReached)
received = getNoTuples();
if (useCtrs)
{
// determine input selectivity by counters used in the
// plan of the probe join.
double Ctr1 = qp->GetCounter(ctrNum);
double Ctr2 = qp->GetCounter(ctrNum+1);
SHOW(Ctr1)
SHOW(Ctr2)
expected = max(Ctr1,1.0);
received = max(Ctr2,1.0);
}
SHOW(expected)
SHOW(received)
// cardinality. If not a complete partition of tuples
// was read in then the formula below may underestimate
// the input card.
double card = 0.0;
if (lastMarker.parts != 0)
{
card = lastMarker.tuples
* max(lastMarker.parts,1)
* (received / expected);
}
else
{
card = received;
}
SHOW(card)
return static_cast<int>( ceil(card) );
}
void reset(const bool bufOnly = true)
{
rit = buf->MakeScan();
scanBuffer = true;
bufferOnly = bufOnly;
}
inline bool overFlow() {
return getTotalSize() > maxSize;
}
};
template<class StreamType>
struct PJoinInfo {
typedef enum { probe, eval } JoinState;
// used to maintain the evaluation functions
FunVector* evalFuns;
// used to maintain the cost functions
CostFunctions* costFuns;
StreamOpAddr leftIs;
StreamType rightIs;
PTupleBuffer<StreamOpAddr>* leftBuf;
PTupleBuffer<StreamType>* rightBuf;
JoinState state;
const int maxMem;
// index between [0,n-1] pointing to the best evaluation function
int bestPos;
StreamOpAddr bestFun;
// estimated cardinalites of input and output
int leftCard;
int rightCard;
int resultCard;
int leftAvgTupSize;
int rightAvgTupSize;
int resultTuples;
float joinSel;
bool isSelfJoin;
bool useCtrs;
bool doProbeJoin;
int& instanceNum;
int ctrNum;
bool bufReset[2];
PartCtr parts;
PJoinTuple* info;
PJoinInfo( StreamOpAddr left,
StreamType right,
int& instanceCtr,
int num = 0,
int mem = 1024*1024 ) :
evalFuns(0),
leftIs(left),
rightIs(right),
state(probe),
maxMem(mem),
bestPos(1),
leftCard(0),
rightCard(0),
resultCard(0),
leftAvgTupSize(0),
rightAvgTupSize(0),
resultTuples(0),
joinSel(0.0),
isSelfJoin(false),
useCtrs(false),
doProbeJoin(true),
instanceNum(instanceCtr),
ctrNum(num)
{
TRACE_FILE("pjoin.traces")
info = new PJoinTuple();
leftBuf = new PTupleBuffer<StreamOpAddr>( "left", leftIs, maxMem );
rightBuf = new PTupleBuffer<StreamType>( "right", rightIs, maxMem );
evalFuns = new FunVector();
bufReset[0] = false;
bufReset[1] = false;
}
~PJoinInfo()
{
// close the input streams
leftIs.close();
rightIs.close();
// send a close message to the parameter functions
// in order that it can be propagated to its childs.
bestFun.close();
const int leftTuples = leftBuf->getRequestedTuples();
const int rightTuples = rightBuf->getRequestedTuples();
const int resultErr = relErr(resultCard, resultTuples);
const int leftErr = relErr(leftCard, leftTuples);
const int rightErr = relErr(rightCard, rightTuples);
const float joinExact = (resultTuples / (max(leftTuples,1) * 1.0))
/ (max(rightTuples,1) * 1.0);
const int joinSelErr = relErr( joinSel, joinExact);
instanceNum++;
info->id = instanceNum;
info->arg1_est = leftCard;
info->arg1_real = leftTuples;
info->arg1_err = leftErr;
info->arg2_est = rightCard;
info->arg2_real = rightTuples;
info->arg2_err = rightErr;
info->result_est = resultCard;
info->result_real = resultTuples;
info->result_err = resultErr;
info->sel_est = joinSel;
info->sel_real = joinExact;
info->sel_err = joinSelErr;
FunInfo& f = evalFuns->get(bestPos);
info->usedFunction = f.getName();
pjoinRel->append(info, false);
delete leftBuf;
delete rightBuf;
delete evalFuns;
delete costFuns;
}
int relErr(const float a, const float b) {
float c = b;
if (b == 0.0) // avoid infinite values!
c = 1.0;
return static_cast<int>( ceil(((abs(a - b) * 1.0) / c) * 100.0) );
}
void setRequestedTuples() {
rightBuf->setRequestedTuples( qp->GetCounter(ctrNum+2) );
}
/*
Function ~computeCards~ will compute estimated cardinalities for the
input, output, and result stream.
*/
void computeCards(const int resultTuples)
{
SHOW(resultTuples)
SHOW(ctrNum)
leftCard = leftBuf->estimateInputCard();
rightCard = rightBuf->estimateInputCard(useCtrs, ctrNum);
//SHOW(leftCard)
//SHOW(rightCard)
int leftRead = max( leftBuf->getNoTuples(), 1 );
int rightRead = max( rightBuf->getNoTuples(useCtrs, ctrNum), 1);
//avg tuple size
leftAvgTupSize = nextInt( leftBuf->getTotalSize() / leftRead );
rightAvgTupSize =
previousInt(
rightBuf->getTotalSize(useCtrs, ctrNum) / rightRead );
// join selectivity
if (resultTuples == 0)
cerr << "Warning: probejoin returned no tuples!" << endl;
/*
* Hook for correction of the selectivity estimation
* in the case of self joins. To be continued!
*
if (isSelfJoin)
{
float selfJoinTuples = (1.0 * leftRead * leftRead / leftCard);
}
*/
if ( (leftCard == 0) || (rightCard == 0) )
{
// in some cases we may have recognized that the input card is zero!
joinSel = 0.0;
}
else
{
joinSel = (1.0 * max(resultTuples,1)) / (leftRead * rightRead);
}
SHOW(joinSel)
// cardinality of the join result
const float leftCardF = leftCard;
const float rightCardF = rightCard;
const float resultCardF = ceil(joinSel * leftCardF * rightCardF);
resultCard = static_cast<int>( resultCardF );
SHOW(resultCard)
}
/*
The probe join will be stopped if
* a memory overflow happens
* more than ~n~ tuples (only complete parts) for each input are read
*/
inline bool stopLoading(const int n=1000)
{
const int bufferSize = leftBuf->getTotalSize()
+ rightBuf->getTotalSize();
const bool overFlow = bufferSize > 2*maxMem;
bool stop = false;
if (overFlow)
{
TRACE("*** OVERFLOW! ***")
stop = true;
}
else
{
int storedTuples = leftBuf->getNoTuples() + rightBuf->getNoTuples();
stop = (storedTuples >= n);
stop = ( stop || (leftBuf->end() && rightBuf->end()) );
}
if (stop)
{
TRACE("*** Enough tuples read ***")
SHOW(bufferSize)
// assign values to the ~info~ tuple
info->probe_arg1 = leftBuf->getNoTuples();
info->probe_arg2 = rightBuf->getNoTuples();
// reset streams
leftBuf->reset();
leftBuf->showInfo();
rightBuf->reset();
rightBuf->showInfo();
}
return stop;
}
const long cpuOps() {
const long cpu1 = Counter::getRef("CcInt::Compare");
const long cpu2 = Counter::getRef("CcInt::Less");
const long cpu3 = Counter::getRef("CcInt::Equal");
const long cpu4 = Counter::getRef("CcInt::HashValue");
return (cpu1 + cpu2 + cpu3 + cpu4);
}
// By default 0,05% of the cartesian product will
// be used as input for the probe join
int computeProbeSize( int est, double scaleP, int minVal, int maxVal )
{
int ps = nextInt( est * scaleP );
SHOW(ps)
ps = min( max(ps, minVal), maxVal );
SHOW(est)
SHOW(ps)
return ps;
}
void loadTupleBuffers() {
TRACE("*** load TupleBuffers ***")
int k=2;
for(int i=0; i < k; i++) {
leftBuf->storeNextTuple();
rightBuf->storeNextTuple();
}
cerr << leftBuf->getLastMarker() << endl;;
cerr << rightBuf->getLastMarker() << endl;
int leftEst = leftBuf->maxInputCard();
int rightEst = rightBuf->maxInputCard();
// Define 0.05% of the cartesian product as sample size
Environment& env = Environment::getInstance();
double pScale = env.getFloat("SEC_pScale", 0.05);
int pMinRead = env.getInt("SEC_pMinRead", 500);
int pMaxRead = env.getInt("SEC_pMaxRead", 500);
SHOW(pScale)
SHOW(pMinRead)
SHOW(pMaxRead)
int leftTuples = computeProbeSize(leftEst, pScale, pMinRead, pMaxRead);
int rightTuples = computeProbeSize(rightEst, pScale, pMinRead, pMaxRead);
k = leftTuples + rightTuples;
// adapt the sample to the currently estimated
// input sizes
while ( !stopLoading(k) )
{
if (leftBuf->getNoTuples() < leftTuples || rightBuf->end() )
leftBuf->storeNextTuple();
if (rightBuf->getNoTuples() < rightTuples || leftBuf->end() )
rightBuf->storeNextTuple();
static int ctr=50;
ctr--;
if (ctr == 0)
{
ctr=50;
int leftEst = leftBuf->estimateInputCard();
int rightEst = rightBuf->estimateInputCard(useCtrs, ctrNum);
leftTuples = computeProbeSize(leftEst, pScale, pMinRead, pMaxRead);
rightTuples = computeProbeSize(rightEst, pScale, pMinRead, pMaxRead);
k = leftTuples + rightTuples;
}
}
}
void showBufResetStates() {
SHOW(bufReset[0]);
SHOW(bufReset[1]);
}
bool resetBuffer(const int no) {
assert( (no == 1) || (no == 2) );
TRACE("resetBuffer: " << no)
if ( bufReset[no-1] == false ) {
if (no == 1)
leftBuf->reset(false);
else
rightBuf->reset(false);
bufReset[no-1] = true;
return true;
}
return false;
}
void runProbeJoin()
{
StopWatch probeTimer;
Supplier first = (evalFuns->get(0)).getSupplier();
info->join = (evalFuns->get(0)).getName();
// open both input streams
// Recurses into the operator tree and calls
// ~runProbeJoin~ for subordinated pjoins!
leftIs.open();
rightIs.open();
loadTupleBuffers();
// Loading the tuple buffers will cause
// requests to subtrees. Hence the time
// measurement can only start after the tuple
// buffers are filled.
probeTimer.start();
cout << "* Start Probe Join *" << endl;
qp->Open(first);
Tuple* t = nextTuple(first);
int probeReceived = 0;
while( t )
{
probeReceived++;
t = nextTuple(first);
}
qp->Close(first);
cout << "* End Probe Join *" << probeTimer.diffTimes() << endl;
TRACE( "\n*** Probe join finished! ***\n" )
computeCards(probeReceived);
computeBestFunction();
// assgin values to the info tuple
info->probe_cpuOps = cpuOps();
info->probe_seconds = probeTimer.diffSecondsReal();
info->probe_result = probeReceived;
// open the chosen evaluation function
bestFun.open();
doProbeJoin=false;
}
inline void nextPTuple(Word& result)
{
static int call=0;
call++;
/*
if (doProbeJoin) {
cout << call << "-pjoin: runProbeJoin()" << endl;
runProbeJoin();
}*/
if ( parts.resetIfNeeded() )
{
result.addr = new PTuple( parts.newMarker() );
}
else
{
Tuple* t = (Tuple*) bestFun.getNext();
if (t) {
result.addr = new PTuple( t);
resultTuples++;
} else {
result.addr = 0;
}
}
}
void computeBestFunction()
{
TRACE( "\n*** START Computing best function ***\n" )
// initialize ouput partSize and expected parts.
const int tuples = max( leftBuf->partSize(), rightBuf->partSize() );
//const int numOfParts = max( resultCard / tuples, 1 );
parts.init(tuples, resultCard);
SHOW(parts.tuples)
SHOW(parts.parts)
//SHOW(numOfParts)
CostParams cp( leftCard, leftAvgTupSize,
rightCard, rightAvgTupSize, joinSel );
SHOW(cp)
costFuns = new CostFunctions();
bool useHint = false;
size_t useIndex = 0;
for (size_t i=1; i<evalFuns->size(); i++)
{
string name = (evalFuns->get(i)).getName();
if ( hasPrefix("use_", name) ) {
removePrefix("use_", name);
useHint = true;
useIndex = i;
}
SHOW(name)
bool ok = costFuns->append( name, i );
assert(ok);
}
const CostInfo ci = costFuns->findBest(cp);
SHOW(ci)
bestPos = ci.cf->index;
bool pAllowHints =
Environment::getInstance().getBool("SEC_pAllowHints", false);
SHOW(pAllowHints)
if (pAllowHints && useHint) {
bestPos = useIndex;
cout << "Using hint with index " << useIndex << endl;
}
assert( bestPos < (int)evalFuns->size() );
FunInfo& f = evalFuns->get(bestPos);
bestFun = StreamOpAddr(f.getSupplier());
cout << "Using " << f.getName() << endl;
TRACE( "\n*** END Computing best function ***\n" )
}
};
static int pjoin2_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y1)))
// args[1]: Input stream(ptuple(y2)))
// args[2]: A string which defines if the join is a self join or not
// args[2]: A list of map stream(tuple(y1)))x stream(tuple(y2))
// -> stream(tuple(z))
// args[3]: A list of symbols for evaluation function names
static const string pre("pjoin2: ");
static int instanceCtr = 0;
typedef PJoinInfo<StreamOpAddr> PJoin2_Info;
PJoin2_Info* pj = static_cast<PJoin2_Info*>( local.addr );
switch ( message )
{
case OPEN: {
TRACE(pre << "Open received")
// initialze local storage
StreamOpAddr left(args[0].addr);
StreamOpAddr right(args[1].addr);
// A possible interface for triggering self join correction.
//string joinType = StdTypes::GetString(args[2]);
//SHOW(joinType)
//bool isSelfJoin = joinType == "selfjoin";
pj = new PJoin2_Info(left, right, instanceCtr);
local.addr = pj;
FunVector& evalFuns = *(pj->evalFuns);
// load functions into funvector
evalFuns.load(args[2], &args[3]);
// save caller node in argument vectors
for (size_t i=0; i < evalFuns.size(); i++ )
{
Supplier fun = evalFuns.get(i).getSupplier();
qp->SetupStreamArg(fun, 1, s);
qp->SetupStreamArg(fun, 2, s);
}
// open the output stream of the first function
// and do the probe join
pj->runProbeJoin();
return 0;
}
case REQUEST: {
//TRACE(pre << "Request received")
// Do a request on the chosen evaluation function
// a side effect of nextTuple(fun) could be that new markers
// are read.
pj->nextPTuple(result);
//SHOW(result.addr)
if (result.addr) // handle tuple pointer
return YIELD;
else
return CANCEL;
}
case (1*FUNMSG)+OPEN:
{
// open the first (left) input stream
TRACE(pre << "Message 1*FUNMSG+OPEN received")
(pj->leftIs).open();
return 0;
}
case (2*FUNMSG)+OPEN:
{
// open the second (right) input stream
TRACE(pre << "Message 2*FUNMSG+OPEN received")
(pj->rightIs).open();
return 0;
}
/*
The message below will map a ptuple of the input stream to a ~normal~ tuple
to be evaluated by the parameter function. If a marker is
*/
case (1*FUNMSG)+REQUEST: {
//TRACE(pre << "Message 1*FUNMSG+REQUEST received")
// just collect all markers and store them in the queue
pj->leftBuf->getNextTuple( result );
if ( result.addr != 0)
return YIELD;
else
return CANCEL;
}
case (2*FUNMSG)+REQUEST: {
//TRACE(pre << "Message 2*FUNMSG+REQUEST received")
// just collect all markers and store them in the queue
pj->rightBuf->getNextTuple( result );
if ( result.addr != 0)
return YIELD;
else
return CANCEL;
}
case (1*FUNMSG)+CLOSE: {
// This message must be ignored since we will send a CLOSE message
// to our input stream when requested by our parent node
TRACE(pre << "Message 1*FUNMSG+CLOSE received")
pj->resetBuffer(1);
return 0;
}
case (2*FUNMSG)+CLOSE: {
// This message must be ignored since we will send a CLOSE message
// to our input stream when requested by our parent node
TRACE(pre << "Message 2*FUNMSG+CLOSE received")
pj->resetBuffer(2);
return 0;
}
case CLOSE: {
TRACE(pre << "Message CLOSE received")
// closing streams is done in the destructor of the JoinInfo instance
delete pj;
local.addr=0;
return 0;
}
default: {
cerr << pre << "Cannot handle message " << message << endl;
}
}
return 0;
}
/*
4.4 Operator ~pjoin1~
This operator does the following type mapping:
----
( stream(ptuple(y1)) rel(tuple(y2))
( ( map (stream(tuple(y1))) (rel(tuple(y2))) (stream(tuple(z))) )
... N repeats ... )
)
-> (stream(ptuple(z)))
----
During type mapping the names of the function identifier are appended as string
arguments. The names must correspond to a join method. For each join method
a cost function is present and will be used for determining the best function.
The value mapping is organized like ~pjoin1~.
*/
static ListExpr pjoin1_tm(ListExpr args)
{
NList l(args);
static const string e1 = expects( Symbol::STREAM(), PTuple::BasicType(),"y1");
static const string e2 = expects( Relation::BasicType(),
Tuple::BasicType(), "y2" );
static string err1 = "Expecting input ("
+ e1 + " " + e2
+ " int (<list of functions>) ";
if ( !checkLength( l, 4, err1 ) )
return l.typeError( err1 );
NList attrs1;
if ( !checkStreamPTuple( l.first(), attrs1) )
return l.typeError( argNotCorrect(1) + err1);
NList attrs2;
if ( !checkRelTuple( l.second(), attrs2) )
return l.typeError( argNotCorrect(2) + err1);
if ( !l.third().isSymbol( CcInt::BasicType() ) )
return l.typeError( argNotCorrect(3) + err1);
// Test the parameter function's signature
static const string
err2 = "Expecting as third argument a list of functions of type "
"(map (stream(tuple(y1))) (rel(tuple(y1))) (stream(tuple(z))))";
NList joinMaps = l.fourth();
NList lastResultAttrs;
NList fNames;
int joinMapsLength = joinMaps.length();
for (int i=1; i < (joinMapsLength+1); i++)
{
vector<NList> sig;
NList funDesc = joinMaps.elem(i);
NList fName = funDesc.first();
if ( !fName.isSymbol() )
return l.typeError( "symbol atom for function name expected!" );
string funcStr = fName.str();
string pre = "Function " + funcStr + ": ";
if ( !checkMap( funDesc.second(), 2, sig ) )
return l.typeError( argNotCorrect(3) + err2);
NList leftAttrs;
if ( !checkStreamTuple(sig[0], leftAttrs) )
return l.typeError( pre + "First argument not correct!\n"
"Received " + sig[0].convertToString() + "." );
NList rightAttrs;
if ( !checkRelTuple(sig[1], rightAttrs) )
return l.typeError( pre + "Second argument not correct!\n"
"Received " + sig[1].convertToString() + "." );
NList resultAttrs;
if ( !checkStreamTuple(sig[2], resultAttrs) )
return l.typeError( pre + "Result type not correct!\n"
"Received " + sig[2].convertToString() + "." );
if ( i==1 ) // first time define reference mapping
{
lastResultAttrs = resultAttrs;
fNames.makeHead( fName.toStringAtom() );
}
else // compare with previous mapping
{
fNames.append( fName.toStringAtom() );
if ( !(attrs1 == leftAttrs) )
return l.typeError( pre +
"Tuple type of the first arg. does not match! \n"
"Received " + leftAttrs.convertToString() + "." );
if ( !(attrs2 == rightAttrs) )
return l.typeError( pre +
"Tuple type of the second arg. does not match! \n"
"Received " + rightAttrs.convertToString() + "." );
if ( !(lastResultAttrs == resultAttrs) )
return l.typeError( pre +
"Tuple type of the result does not match! \n"
"Received " + resultAttrs.convertToString()
+ ". But the result type of the "
+ "previous function was "
+ lastResultAttrs.convertToString() );
}
}
NList appendSym(Symbol::APPEND());
NList resultType(appendSym, fNames, makeStreamPTuple(lastResultAttrs));
return resultType.listExpr();
}
static int pjoin1_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y1)))
// args[1]: Input rel(tuple(y2)))
// args[2]: Ctr number
// args[3]: A list of map stream(tuple(y1)))x rel(tuple(y2))
// -> stream(tuple(z))
// args[4]: A list of symbols for evaluation function names
static const string pre("pjoin1: ");
static int instanceCtr = 0;
typedef PJoinInfo<RelationAddr> PJoin1_Info;
PJoin1_Info* pj = static_cast<PJoin1_Info*>( local.addr );
switch ( message )
{
case OPEN: {
TRACE(pre << "Open received")
// initialze local storage
StreamOpAddr left(args[0].addr);
GenericRelation* r = getArg<GenericRelation>( args[1] );
RelationAddr right(r);
right.open();
int ctrNum = StdTypes::GetInt(args[2]);
pj = new PJoin1_Info(left, right, instanceCtr, ctrNum);
local.addr = pj;
FunVector& evalFuns = *(pj->evalFuns);
// load functions into funvector
evalFuns.load(args[3], &args[4]);
// save caller node in argument vectors
// and store the relation as second argument for
// the parameter functions.
for (size_t i=0; i < evalFuns.size(); i++ )
{
Supplier fun = evalFuns.get(i).getSupplier();
qp->SetupStreamArg(fun, 1, s);
ArgVectorPointer funargs = qp->Argument(fun);
if (i == 0) {
(*funargs)[1] = SetWord( pj->rightBuf->getBufPtr() );
} else {
(*funargs)[1] = SetWord(r);
}
}
// open the output stream of the first function
// and do the probe join
pj->useCtrs = true;
pj->runProbeJoin();
return 0;
}
case REQUEST: {
//TRACE(pre << "Request received")
// Do a request on the chosen evaluation function
// a side effect of nextTuple(fun) could be that new markers
// are read.
pj->nextPTuple(result);
//SHOW(result.addr)
if (result.addr) // handle tuple pointer
return YIELD;
else
return CANCEL;
}
case (1*FUNMSG)+OPEN:
{
// open the first (left) input stream
TRACE(pre << "Message 1*FUNMSG+OPEN received")
(pj->leftIs).open();
return 0;
}
/*
The message below will map a ptuple of the input stream to a ~normal~ tuple
to be evaluated by the parameter function. If a marker is
*/
case (1*FUNMSG)+REQUEST: {
//TRACE(pre << "Message 1*FUNMSG+REQUEST received")
// just collect all markers and store them in the queue
pj->leftBuf->getNextTuple( result );
if ( result.addr != 0)
return YIELD;
else
return CANCEL;
}
case (1*FUNMSG)+CLOSE: {
// This message must be ignored since we will send a CLOSE message
// to our input stream when requested by our parent node
TRACE(pre << "Message 1*FUNMSG+CLOSE received")
pj->resetBuffer(1);
return 0;
}
case CLOSE: {
TRACE(pre << "Message CLOSE received")
// correct the right PTupleBuffer's information about requested
// tuples. Since the embedded executions plans will not request the
// buffer (argument .. is a base relation) it will contain only the
// number of probe tuples.
pj->setRequestedTuples();
// closing streams is done in the destructor of the JoinInfo instance
delete pj;
local.addr=0;
return 0;
}
default: {
cerr << pre << "Cannot handle message " << message << endl;
}
}
return 0;
}
static ListExpr pcreate_tm(ListExpr args)
{
NList l(args);
NList attrs;
static const string e1 = expects( Symbol::STREAM(), Tuple::BasicType() );
string err1 = "pcreate expects (" + e1 + "int)!";
if ( !checkLength( l, 2, err1 ) )
return l.typeError( err1 );
if ( !checkStreamTuple( l.first(), attrs) )
return l.typeError( argNotCorrect(1) + err1);
if ( !l.second().isSymbol(CcInt::BasicType()) )
return l.typeError( argNotCorrect(2) + err1);
return makeStreamPTuple(attrs).listExpr();
}
static int pcreate_vm( Word* args, Word& result, int message,
Word& local, Supplier s )
{
// args[0]: Input stream(tuple(y)))
// args[1]: int
static const string pre = "pcreate: ";
BufferedStreamInfo* info = static_cast<BufferedStreamInfo*>( local.addr );
switch (message)
{
case OPEN :
{
TRACE(pre << "OPEN")
int partSize = StdTypes::GetInt(args[1]);
info = new BufferedStreamInfo( args[0], partSize );
local.addr = info;
return 0;
}
case REQUEST :
{
// the nextPTuple function returns a marker or a tuple
result.addr = info->nextPTuple();
if ( result.addr )
{
return YIELD;
}
else
{
return CANCEL;
}
}
case CLOSE :
{
//TRACE(pre << "CLOSE received!")
delete info;
local.addr=0;
return 0;
}
default : { assert(false); return 0; }
}
}
static ListExpr pcreate2_tm(ListExpr args)
{
NList l(args);
static const string e1 = expects( Symbol::STREAM(), Tuple::BasicType() );
static string err1 = "pcreate2 expects (" + e1 + "int int)!";
if ( !checkLength(l, 3, err1) )
return l.typeError( err1 );
NList attrs;
if ( !checkStreamTuple( l.first(), attrs) )
return l.typeError( argNotCorrect(1) + err1);
if ( !l.second().isSymbol(CcInt::BasicType()) )
return l.typeError( argNotCorrect(2) + err1);
if ( !l.third().isSymbol(CcInt::BasicType()) )
return l.typeError( argNotCorrect(3) + err1);
return makeStreamPTuple(attrs).listExpr();
}
static int pcreate2_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y)))
// args[1]: int
// args[2]: int
return 0;
}
/*
4.4 The shuffling operators ~shuffle~ and ~memshuffle~
These operators are overloaded. Both have the same type mapping
hence they can use the same functions for type checking and evaluation
function selection.
*/
static ListExpr shuffleX_tm(ListExpr args, const string& op)
{
NList l(args);
static const string e1 = expects( Symbol::STREAM(), Tuple::BasicType() );
static const string e2 = expects( Symbol::STREAM(), PTuple::BasicType() );
string err1 = op + " expects (" + e1 + ") or (" + e2 + ")!";
if ( !checkLength(l, 1, err1) )
return l.typeError( err1 );
NList attrs;
if ( checkStreamTuple( l.first(), attrs) )
return l.first().listExpr();
if ( checkStreamPTuple( l.first(), attrs) )
return l.first().listExpr();
return l.typeError( argNotCorrect(1) + err1);
}
static ListExpr shuffle_tm(ListExpr args)
{
return shuffleX_tm(args, "shuffle");
}
static ListExpr shuffle2_tm(ListExpr args)
{
static const string e1 = expects( Symbol::STREAM(), Tuple::BasicType() );
string err1 = "expecting (" + e1 + " x int)";
if(!nl->HasLength(args,2)){
return listutils::typeError(err1);
}
if(!Stream<Tuple>::checkType(nl->First(args)) ||
!CcInt::checkType(nl->Second(args))){
return listutils::typeError(err1);
}
return nl->First(args);
}
/*
The shuffle operator uses a tuple buffer of size $M$ and stores all incoming
tuples there. In the cas of an memory overflow it will work in the following
way
1 the tuples of the buffer are written in random order to disk
2 Fill the tuple buffer again. Hence the input relation is partitioned into
$N = StreamSize / M$ parts.
3 When the end of the input stream is reached read in tuples from each partition
$N$ in chunks of a logical page size $Lps$. In order to guarantee that all tuples
will be mixed we assume that $N * Lps = M$ which implies $StreamSize =
\frac{M^2}{Lps}$. For example, if $M=16MB$ and $Lps=32k$ we can randomize
relations with a size up to 8GB.
*/
struct ShuffleBuf {
typedef vector<Tuple*> TupleBuf;
TupleBuf buffer;
size_t pos; // pos is a positive value
size_t M;
size_t freeMem;
int Lps;
ShuffleBuf(int MaxMem, int LogicalPageSize) :
pos(0),
M(MaxMem),
freeMem(MaxMem),
Lps(LogicalPageSize)
{}
~ShuffleBuf() {
TupleBuf::const_iterator it = buffer.begin();
while( it != buffer.end() ) {
//(*it)->DecReference();
(*it)->DeleteIfAllowed();
it++;
}
}
void shuffle() {
std::shuffle(buffer.begin(), buffer.end(),
std::mt19937(std::random_device()()));
}
inline bool overFlow(const size_t tupleSize) {
if ( freeMem < tupleSize )
return true;
freeMem -= tupleSize;
return false;
}
// todo: implementation of the persistent case
void handleOverFlow(Tuple* t) {
cerr << "shuffle: A memory overflow happend!" << endl;
buffer.push_back(t);
}
void append(Tuple* t)
{
t->IncReference();
// if ( overFlow(t->GetExtSize()) )
// handleOverFlow(t);
// else
buffer.push_back(t);
}
void open()
{
pos = buffer.size() + 1;
}
inline Tuple* getNext() {
//cout << "pos: " << pos << endl;
if ( pos == 1 )
return 0;
// Return values and start with buffer.size() - 1
pos--;
return buffer[pos-1];
}
};
struct ShuffleBuf2 {
vector<Tuple*> buffer;
unsigned int size;
ShuffleBuf2():size(0) {srand ( time(NULL) );}
~ShuffleBuf2() { }
void append(Tuple* t)
{
buffer.push_back(t);
++size;
}
inline Tuple* getNext()
{
if ( size == 0)
return 0;
int i= rand() % size;
Tuple* res= buffer[i];
buffer[i]= buffer[size-1];
--size;
return res;
}
};
/*
struct ShuffleInfo {
ShuffleInfo(int maxMem = 4 * 1024 * 1024, int maxTuples = 512)
: persBuf(3/4 * maxMem),
memBuf(maxMem),
streamPos(0),
pos(maxTuples/2),
memTuples(maxTuples),
skip(2),
run(1),
stepwidth(2),
bufIter(0),
memBufFinished(false),
memCtr(0),
persCtr(0)
{}
~ShuffleInfo()
{
if ( bufIter != 0) {
delete bufIter;
bufIter = 0;
}
}
void append(Tuple* t) {
if ( (streamPos % skip) == 0 ) {
// tuple will be selected for the front part of the
// output stream.
if ( memBuf.GetNoTuples() == memTuples )
{
size_t p = nextReplacePos();
if ( p >= memTuples )
{
// restart
initReplacePos();
p = nextReplacePos();
}
assert( p < memTuples );
// store t in buffer
t->IncReference();
memBuf.SetTupleAtPos(p, t);
assert( memBuf.InMemory() );
}
else
{
memBuf.AppendTuple(t);
}
} // end of if ( (streamPos % skip) == 0 )
else
{
persCtr++;
persBuf.AppendTuple(t);
}
streamPos++;
}
Tuple* getNext() {
Tuple* result = 0;
// return the tuples of the memory buffer
if ( memBufFinished == false )
{
if ( bufIter == 0) {
bufIter = memBuf.MakeScan();
}
memCtr++;
result = bufIter->GetNextTuple();
if (result == 0) {
cerr << endl;
cerr << "streamPos" << streamPos << endl;
cerr << "memBuf: " << memBuf.GetNoTuples() << endl;
cerr << "memCtr: " << memCtr << endl;
cerr << "persBuf: " << persBuf.GetNoTuples() << endl;
cerr << "persCtr: " << persCtr << endl;
memBufFinished = true;
delete bufIter;
bufIter = 0;
}
}
// return the tuples of the persistent buffer
if ( memBufFinished == true)
{
if ( bufIter == 0) {
bufIter = persBuf.MakeScan();
}
result = bufIter->GetNextTuple();
}
return result;
}
inline void initReplacePos()
{
skip = max((size_t)2, streamPos / memTuples);
run++;
pos = memTuples / 2;
// compress buffer: replace i by 2i. The replaced positions
// are transferred to persBuf.
cerr << endl << "compressing buffer" << endl;
for(size_t i=1; i < memTuples/2; i++)
{
// transfer pos 2*i-1 to persBuf
Tuple* t = memBuf.GetTupleAtPos(2*i-1);
assert(t != 0);
t->DecReference();
persCtr++;
persBuf.AppendTuple(t);
}
for(size_t i=1; i < memTuples/2; i++)
{
// override pos i
Tuple* t = memBuf.GetTupleAtPos(2*i);
assert(t != 0);
memBuf.SetTupleAtPos(i, t);
memBuf.SetTupleAtPos(2*i, 0);
}
bool trace = false;
if (trace) {
for(size_t i=0; i < memTuples/2; i++) {
Tuple* t = memBuf.GetTupleAtPos(i);
cerr << i << ": ";
if (t == 0)
cerr << "null pointer!";
else
cerr << *t;
cerr << endl;
}
}
cerr << endl;
cerr << endl
<< "run: " << run
<< " streamPos: " << streamPos
<< " bufindex: " << pos
<< " skip: " << skip
<< endl;
}
inline size_t nextReplacePos() {
cerr << "p" << pos << ", ";
size_t res = pos;
pos++;
return res;
}
TupleBuffer persBuf;
TupleBuffer memBuf;
size_t streamPos;
size_t pos;
size_t memTuples;
size_t skip;
size_t run;
size_t stepwidth;
GenericRelationIterator* bufIter;
bool memBufFinished;
size_t memCtr;
size_t persCtr;
};
*/
/*
Operator ~shuffle~
*/
struct ShuffleInfo {
ShuffleInfo(int maxMem = 4 * 1024 * 1024, int maxTuples = 100000 )
: persBuf(maxMem),
memBuf(maxMem),
memTuples(500),
skip(maxTuples/500),
streamPos(0),
bufIter(0),
memBufFinished(false),
trace(false)
{}
~ShuffleInfo()
{
if ( bufIter != 0) {
delete bufIter;
bufIter = 0;
}
}
void append(Tuple* t) {
if ( (streamPos % skip) == 0 )
{
// tuple will be selected for the front part of the
// output stream.
memBuf.AppendTuple(t);
} // end of if ( (streamPos % skip) == 0 )
else
{
persBuf.AppendTuple(t);
}
streamPos++;
}
Tuple* getNext() {
Tuple* result = 0;
// return the tuples of the memory buffer
if ( memBufFinished == false )
{
if ( bufIter == 0) {
bufIter = memBuf.MakeScan();
}
result = bufIter->GetNextTuple();
if (result == 0) {
if (trace) {
cerr << endl;
cerr << "streamPos: " << streamPos << endl;
cerr << "memBuf : " << memBuf.GetNoTuples() << endl;
cerr << "persBuf : " << persBuf.GetNoTuples() << endl;
}
memBufFinished = true;
delete bufIter;
bufIter = 0;
}
}
// return the tuples of the persistent buffer
if ( memBufFinished == true)
{
if ( bufIter == 0) {
bufIter = persBuf.MakeScan();
}
result = bufIter->GetNextTuple();
}
return result;
}
TupleBuffer persBuf;
TupleBuffer memBuf;
size_t memTuples;
size_t skip;
size_t streamPos;
GenericRelationIterator* bufIter;
bool memBufFinished;
bool trace;
};
static int shuffle2_vm( Word* args,
Word& result, int message,
Word& local, Supplier s )
{
//args[0]: stream(tuple(y))
//args[1]: int
static const string pre = "shuffle2: ";
ShuffleInfo* info = static_cast<ShuffleInfo*>( local.addr );
Word stream = args[0];
int numTuples = StdTypes::GetInt( args[1] );
switch (message)
{
case OPEN :
{
info = new ShuffleInfo(4 * 1024 * 1024, numTuples);
qp->Open(stream.addr);
Tuple* t = nextTuple(stream);
while( t != 0 )
{
info->append(t);
t = nextTuple(stream);
}
local.addr = info;
return 0;
}
case REQUEST :
{
Tuple* t = info->getNext();
if ( t != 0 )
{
result.addr = t;
return YIELD;
}
else
{
result.addr = 0;
return CANCEL;
}
}
case CLOSE :
{
qp->Close(stream.addr);
delete info;
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
/*
Operator ~shuffle~
*/
struct ShuffleInfoRAND : public ShuffleInfo {
ShuffleInfoRAND() : ShuffleInfo(),
lastTuple(0)
{}
~ShuffleInfoRAND()
{}
void append(Tuple* t)
{
size_t i = 0;
bool replaced = false;
Tuple* v = rtBuf.ReplacedByRandom(t, i, replaced);
if ( replaced )
{
// v was replaced by t
if (v != 0) {
persBuf.AppendTuple(v);
v->DeleteIfAllowed();
}
}
else
{
assert(v == 0);
// v == 0, and t was not stored in buffer
persBuf.AppendTuple(t);
}
}
inline size_t freeBytes() const {
return persBuf.FreeBytes();
}
/*
Copy all tuples of the random buffer into the memory buffer of
the parent class.
*/
void finish()
{
rtBuf.copy2TupleBuf( memBuf );
}
Tuple* lastTuple;
// Will be used to store the last tuple which could not be hold
// in memory. Refer to shuffle3_vm.
private:
RandomTBuf rtBuf;
};
static int shuffle_vm( Word* args,
Word& result, int message,
Word& local, Supplier s )
{
//args[0]: stream(tuple(y))
static const string pre = "shuffle: ";
ShuffleInfoRAND* info = static_cast<ShuffleInfoRAND*>( local.addr );
Word stream = args[0];
switch (message)
{
case OPEN :
{
info = new ShuffleInfoRAND();
qp->Open(stream.addr);
Tuple* t = nextTuple(stream);
while( t != 0 )
{
info->append(t);
t = nextTuple(stream);
}
info->finish();
local.addr = info;
return 0;
}
case REQUEST :
{
Tuple* t = info->getNext();
if ( t != 0 )
{
result.addr = t;
return YIELD;
}
else
{
result.addr = 0;
return CANCEL;
}
}
case CLOSE :
{
qp->Close(stream.addr);
delete info;
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
static int shuffle3_vm( Word* args,
Word& result, int message,
Word& local, Supplier s )
{
//args[0]: stream(tuple(y))
static const string pre = "shuffle: ";
ShuffleInfoRAND* info = static_cast<ShuffleInfoRAND*>( local.addr );
Word stream = args[0];
switch (message)
{
case OPEN :
{
info = new ShuffleInfoRAND();
qp->Open(stream.addr);
Tuple* t = nextTuple(stream);
size_t ctr = 0;
while( t != 0 )
{
// check if the tuple fits into the buffer
if ( (size_t)t->GetExtSize() < info->freeBytes() ) {
info->append(t);
t = nextTuple(stream);
ctr++;
} else {
// store last tuple for the next REQUEST message
info->lastTuple = t;
break;
}
}
cout << "ctr = " << ctr << endl;
info->finish();
local.addr = info;
return 0;
}
case REQUEST :
{
Tuple* t = info->getNext();
if ( t != 0 )
{
// iterate over the memory buffer
result.addr = t;
return YIELD;
}
else
{
// continue iterating the input stream
t = info->lastTuple;
if (t != 0) {
info->lastTuple = 0;
} else {
t = nextTuple(stream);
}
if ( t != 0)
{
result.addr = t;
return YIELD;
}
else
{
result.addr = 0;
return CANCEL;
}
}
}
case CLOSE :
{
qp->Close(stream.addr);
delete info;
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
static ListExpr memshuffle_tm(ListExpr args)
{
return shuffleX_tm(args, "memshuffle");
}
static int memshuffle_vm( Word* args,
Word& result, int message,
Word& local, Supplier s )
{
//args[0]: Input stream(ptuple(y)) or stream(tuple(y))
static const string pre = "memshuffle: ";
ShuffleBuf* info = static_cast<ShuffleBuf*>( local.addr );
Word stream = args[0];
switch (message)
{
case OPEN :
{
const int M = 40 * 4096 * 4096;
const int Lps = 32 * 1024;
info = new ShuffleBuf(M, Lps);
qp->Open(stream.addr);
Tuple* t = nextTuple(stream);
while( t != 0 )
{
info->append(t);
t = nextTuple(stream);
}
info->shuffle();
info->open();
local.addr = info;
return 0;
}
case REQUEST :
{
Tuple* t = info->getNext();
if ( t != 0 )
{
result.addr = t;
return YIELD;
}
else
{
result.addr = 0;
return CANCEL;
}
}
case CLOSE :
{
qp->Close(stream.addr);
delete info;
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
static ListExpr memshuffle2_tm(ListExpr args)
{
return shuffleX_tm(args, "memshuffle2");
}
static int memshuffle2_vm( Word* args,
Word& result, int message,
Word& local, Supplier s )
{
//args[0]: Input stream(ptuple(y)) or stream(tuple(y))
static const string pre = "memshuffle2: ";
ShuffleBuf2* info = static_cast<ShuffleBuf2*>( local.addr );
Word stream = args[0];
switch (message)
{
case OPEN :
{
info = new ShuffleBuf2();
qp->Open(stream.addr);
Tuple* t = nextTuple(stream);
while( t != 0 )
{
info->append(t);
t = nextTuple(stream);
}
local.addr = info;
return 0;
}
case REQUEST :
{
Tuple* t = info->getNext();
if ( t != 0 )
{
result.addr = t;
return YIELD;
}
else
{
result.addr = 0;
return CANCEL;
}
}
case CLOSE :
{
qp->Close(stream.addr);
delete info;
local.addr=0;
return 0;
}
default : { assert(false); }
}
return 0;
}
static ListExpr runtime_tm(ListExpr list)
{
NList args(list);
if (!args.hasLength(7))
return NList::typeError("Expecting 7 arguments");
if ( !args.elem(1).isSymbol(CcString::BasicType()) )
return NList::typeError("Expecting a string as first argument");
if ( !args.elem(2).isSymbol(CcInt::BasicType()) )
return NList::typeError("Expecting an int as second argument");
if ( !args.elem(3).isSymbol(CcInt::BasicType()) )
return NList::typeError("Expecting an int as third argument");
if ( !args.elem(4).isSymbol(CcReal::BasicType()) )
return NList::typeError("Expecting a real as forth argument");
if ( !args.elem(5).isSymbol(CcReal::BasicType()) )
return NList::typeError("Expecting a real as fifth argument");
if ( !args.elem(6).isSymbol(CcReal::BasicType()) )
return NList::typeError("Expecting a real as sixth argument");
if ( !args.elem(7).isSymbol(CcInt::BasicType()) )
return NList::typeError("Expecting an int as seventh argument");
return NList(CcInt::BasicType()).listExpr();
}
static int runtime_vm( Word* args, Word& result, int message,
Word& local, Supplier s)
{
// args[0]: Input stream(ptuple(y)) or stream(tuple(y))
result = qp->ResultStorage(s);
Word w;
qp->Request(args[0].addr, w);
string name = StdTypes::GetString(w);
qp->Request(args[1].addr, w);
int c1 = StdTypes::GetInt(w);
qp->Request(args[2].addr, w);
int c2 = StdTypes::GetInt(w);
qp->Request(args[3].addr, w);
int s1 = (int)floor( StdTypes::GetReal(w) + 0.5 );
qp->Request(args[4].addr, w);
int s2 = (int)floor( StdTypes::GetReal(w) + 0.5 );
qp->Request(args[5].addr, w);
SEC_STD_REAL sel = StdTypes::GetReal(w);
SHOW(name)
SHOW(c1)
SHOW(s1)
CostParams p(c1, s1, c2, s2, sel);
cout << p << endl;
StopWatch t;
// evaluate the last argument
t.start();
qp->Request(args[6].addr, w);
double rt = t.diffSecondsReal();
cout << rt << endl;
CostFunctions* costFuns = new CostFunctions();
costFuns->append( name, 1 );
const CostInfo ci = costFuns->findBest(p);
SHOW(ci)
// set result value
CcInt* funRes = static_cast<CcInt*>( w.addr);
CcInt* res = static_cast<CcInt*>( result.addr );
res->CopyFrom(funRes);
// store values in system table
static int id = 0;
id++;
CostTuple* cost = new CostTuple(p, ci.costs);
cost->cost_name = name;
cost->id = id;
cost->param_res_card = res->GetIntval();
cost->real_runtime = rt;
cost->real_write = Counter::getRef( Symbol::CTR_TBUF_PAGES_W() );
cost->real_read = Counter::getRef( Symbol::CTR_TBUF_PAGES_R() );
long& intHash = Counter::getRef(Symbol::CTR_INT_HASH());
long& intLess = Counter::getRef(Symbol::CTR_INT_LESS());
long& intEqual = Counter::getRef(Symbol::CTR_INT_EQUAL());
long& intCompare = Counter::getRef(Symbol::CTR_INT_COMPARE());
cost->real_cpu = intHash + intLess + intEqual + intCompare;
costRel->append(cost, false);
return 0;
}
}; // end of PartStreamMappings
class SIG {
public:
SIG( const string& p1,
const string& p2,
const string& p3 = "",
const string& p4 = "",
const string& p5 = "",
const string& p6 = "",
const string& p7 = "",
const string& p8 = "") {
map.push_back(p1);
map.push_back(p2);
if (p3 != "")
map.push_back(p3);
if (p4 != "")
map.push_back(p4);
if (p5 != "")
map.push_back(p5);
if (p6 != "")
map.push_back(p6);
if (p7 != "")
map.push_back(p7);
if (p8 != "")
map.push_back(p8);
}
string str() {
string res="";
size_t args = map.size();
assert(args >= 2);
res=map[0];
for(size_t i = 1; i <= args-2; i++) {
res += " x " + map[i];
}
res += " -> " + map[args-1];
return res;
}
private:
vector<string> map;
};
struct runtimeInfo : OperatorInfo {
runtimeInfo()
{
name = "runtime";
signature = SIG(CcString::BasicType(), CcInt::BasicType(),
CcInt::BasicType(),
CcReal::BasicType(),CcReal::BasicType(),CcReal::BasicType(),
"(map ... -> int)", CcInt::BasicType()).str();
syntax = "runtime(s,t1,t2,s1,s2,sel,f)";
meaning = "Determines the runtime for a join given by function f and "
"applies the cost function named by s with the parameters: "
"t1,t2,s1,s2 (input cardinalities and avg. tuple sizes), "
"sel (join selectivity)";
}
};
struct sortmergejoinrInfo : OperatorInfo {
sortmergejoinrInfo(const string& _name)
{
name = _name;
signature = "stream(tuple(...)) x stream(tuple(...)) -> stream(tuple(...))";
syntax = "_ _ sortmergejoin_r[an, bm]";
meaning = "Computes a join by sorting the inputs but returns the result "
"R = S1 u S2 with a random subset S1 and a sorted subset S2. "
"The variant _r does this by generating all output tuples "
"two times, the variant _r2 does this in a more sophisticated "
"way with less and variant _r3 outputs only the random "
"sample S1";
supportsProgress = true;
}
};
extern int
sortmergejoinr_vm( Word* args, Word& result,
int message, Word& local, Supplier s );
extern int
sortmergejoinr2_vm( Word* args, Word& result,
int message, Word& local, Supplier s );
extern int
sortmergejoinr3_vm( Word* args, Word& result,
int message, Word& local, Supplier s );
namespace extrelationalg{
template<bool, int> extern ListExpr
JoinTypeMap(ListExpr args);
}
// Defining static member sym
Symbols PartStreamMappings::sym;
/*
4 Partitioned Stream Algebra
*/
class PartStreamAlgebra : public Algebra
{
typedef PartStreamMappings psm;
public:
PartStreamAlgebra() : Algebra()
{
//assert(false);
ConstructorInfo ci;
ci.name = PTuple::BasicType();
ci.signature = "(ident x DATA)+ -> PTUPLE";
ci.typeExample = PTuple::BasicType();
ci.listRep = "Not supported! ";
ci.valueExample = " - ";
ci.remarks = "Objects are only used in streams!";
ConstructorFunctions<PTuple> ptf;
TypeConstructor* ptuple = new TypeConstructor( ci, ptf );
ptuple->AssociateKind( Kind::PTUPLE() );
AddTypeConstructor( ptuple,true );
/*
4 Operators
*/
Operator* op = 0;
OperatorInfo oi;
oi.name = "pfeed";
oi.signature = "rel(tuple(y) -> stream(ptuple(y))";
oi.syntax = "_ pfeed[int k]";
oi.meaning = "Creates a stream containing marker tuples. "
"Every max(|k|,2) tuples a new marker tuple will "
"be inserted.";
op = new Operator(
oi,
psm::pfeed_vm,
psm::pfeed_tm
);
AddOperator( op,true );
oi.name = "pdelete";
oi.signature = "stream(ptuple(y) -> stream(tuple(y))";
oi.syntax = "_ pdelete";
oi.meaning = "Removes marker tuples from a stream";
op = new Operator(
oi,
psm::pdelete_vm,
psm::pdelete_tm
);
AddOperator( op,true );
oi.name = "PSTREAM1";
oi.signature = "((stream(ptuple(y)) ...) -> stream(tuple(y))";
oi.syntax = "Not available";
oi.meaning = "Type mapping operator";
op = new Operator(
oi,
0,
psm::PSTREAM1_tm
);
AddOperator( op,true );
oi.name = "PSTREAM2";
oi.signature = "((...) (stream(ptuple(y)) ...) -> stream(tuple(y))";
oi.syntax = "Not available";
oi.meaning = "Type mapping operator";
op = new Operator(
oi,
0,
psm::PSTREAM2_tm
);
AddOperator( op,true );
oi.name = "puse";
oi.signature = "stream(ptuple(y) x ( stream(tuple(y)) "
"-> stream(tuple(y)) ) -> stream(tuple(y))";
oi.syntax = "_ puse[ _ ]";
oi.meaning = "Hides the marker tuples for the parameter function "
"and inserts them again into the result stream";
op = new Operator(
oi,
psm::puse_vm,
psm::puse_tm
);
AddOperator( op,true );
oi.name = "pjoin1";
oi.signature = "( stream(ptuple(y1)) rel(tuple(y2)) "
"( ( map (stream(tuple(y1))) (rel(tuple(y2))) "
"(stream(tuple(z))) ) ... N repeats ... ))"
"-> (stream(ptuple(z))).";
oi.syntax = "_ _ pjoin1[ f1: expr1, f2: expr2, ... ]";
oi.meaning = "Implements the adaptive join for two relations "
"given as streams. Function f1 will be used for the"
"probe join. Each function must have one of the names"
"symj (symmjoin), smj (sortmergejoin), hj (hashjoin), "
"ilj (index-loop-join).";
oi.meaning = "Implements the adaptive join for a tuple stream "
"and a base relation.";
op = new Operator(
oi,
psm::pjoin1_vm,
psm::pjoin1_tm
);
AddOperator( op,true );
oi.name = "pjoin2";
oi.signature = "( stream(ptuple(y1)) stream(ptuple(y2)) "
"( ( map (stream(tuple(y1))) (stream(tuple(y2))) "
"(stream(tuple(z))) ) ... N repeats ... ))"
"-> (stream(ptuple(z))).";
oi.syntax = "_ _ pjoin2[ f1: expr1, f2: expr2, ... ]";
oi.meaning = "Implements the adaptive join for two relations "
"given as streams. Function f1 will be used for the"
"probe join. Each function must have one of the names"
"symj (symmjoin), smj (sortmergejoin), hj (hashjoin).";
op = new Operator(
oi,
psm::pjoin2_vm,
psm::pjoin2_tm
);
AddOperator( op,true );
oi.name = "pcreate";
oi.signature = "stream(tuple(y)) x int -> stream(ptuple(y))";
oi.syntax = "_ pcreate[ _ ]";
oi.meaning = "Consumes a stream and creates a partitioned "
"stream like pfeed";
op = new Operator(
oi,
psm::pcreate_vm,
psm::pcreate_tm
);
AddOperator( op,true );
oi.name = "pcreate2";
oi.signature = "stream(tuple(y)) x int x int -> stream(ptuple(y))";
oi.syntax = "_ pcreate2[ _, _ ]";
oi.meaning = "Creates a partitioned stream but gets the inputs "
"stream size as third parameter. This is useful if "
"the input stream is returned "
"by an index structure and the size is known.";
op = new Operator(
oi,
psm::pcreate2_vm,
psm::pcreate2_tm
);
AddOperator( op,true );
oi.name = "shuffle";
oi.signature = "stream(tuple(y)) -> stream(tuple(y))";
oi.syntax = "_ shuffle";
oi.meaning = "Randomizes its input: Materializes a stream "
"returns the first 500 tuples in random order.";
op = new Operator(
oi,
psm::shuffle_vm,
psm::shuffle_tm
);
AddOperator( op,true );
oi.name = "shuffle2";
oi.signature = "stream(tuple(y)) x int -> stream(tuple(y))";
oi.syntax = "_ shuffle2[ n ]";
oi.meaning = "Randomizes its input: Every n/500-th tuple is passed "
"directly to the output (non-blocking). The other "
"tuples are buffered or materialized if necessary and "
"returned afterwards.";
op = new Operator(
oi,
psm::shuffle2_vm,
psm::shuffle2_tm
);
AddOperator( op,true );
oi.name = "shuffle3";
oi.signature = "stream(tuple(y)) -> stream(tuple(y))";
oi.syntax = "_ shuffle3";
oi.meaning = "Randomizes its input: Tuples up to the maximum allowed "
"memory per operator are read into a buffer. Out of "
"this a random sample of 500 tuples is drawn and "
"returned first. Afterwards all remaining tuples of "
"the buffer and stream are returned.";
op = new Operator(
oi,
psm::shuffle3_vm,
psm::shuffle_tm
);
AddOperator( op,true );
oi.name = "memshuffle";
oi.signature = "stream(ptuple(y)) -> stream(ptuple(y)), "
"stream(tuple(y)) -> stream(tuple(y))";
oi.syntax = "_ memshuffle";
oi.meaning = "Overloaded operator which shuffles tuples in a "
"memory buffer and outputs them in random order. "
"For large input streams the randomness might be "
"insufficient.";
op = new Operator(
oi,
psm::memshuffle_vm,
psm::memshuffle_tm
);
AddOperator( op,true );
oi.name = "memshuffle2";
oi.signature = "stream(ptuple(y)) -> stream(ptuple(y)), "
"stream(tuple(y)) -> stream(tuple(y))";
oi.syntax = "_ memshuffle2";
oi.meaning = "Shuffles a stream of tuples in a "
"memory buffer and outputs them in random order. ";
op = new Operator(
oi,
psm::memshuffle2_vm,
psm::memshuffle2_tm
);
AddOperator( op,true );
oi.name = "pshow";
oi.signature = "stream(ptuple(y)) -> stream(ptuple(y))";
oi.syntax = "_ pshow";
oi.meaning = "Display the marker tuples' information.";
op = new Operator(
oi,
psm::pshow_vm,
psm::pshow_tm
);
AddOperator( op,true );
op = new Operator( runtimeInfo(), psm::runtime_vm, psm::runtime_tm );
AddOperator( op,true );
op->SetRequestsArguments();
AddOperator( sortmergejoinrInfo("sortmergejoin_r"),
sortmergejoinr_vm,
extrelationalg::JoinTypeMap<false, 1> );
AddOperator( sortmergejoinrInfo("sortmergejoin_r2"),
sortmergejoinr2_vm,
extrelationalg::JoinTypeMap<false, 1> );
AddOperator( sortmergejoinrInfo("sortmergejoin_r3"),
sortmergejoinr3_vm,
extrelationalg::JoinTypeMap<false, 1> );
}
// We don't care about the deletion of Algebra and TypeConstructor
// instances since they will be destroyed when the process terminates.
~PartStreamAlgebra() {};
};
/*
5 Initialization
*/
extern "C"
Algebra*
InitializePartitionedStreamAlgebra(NestedList *nlRef, QueryProcessor *qpRef)
{
nl = nlRef;
qp = qpRef;
pjoinRel = new PJoinRel("SEC2PJOIN");
costRel = new CostRel("SEC2PJOINCOST");
SystemTables& st = SystemTables::getInstance();
st.insert(pjoinRel);
st.insert(costRel);
return (new PartStreamAlgebra());
}