secondo/Algebras/Groupby/GroupbyAlgebra.cpp

/*

This file is part of SECONDO.

Copyright (C) 2004-2012, University in Hagen, Faculty of Mathematics and
Computer Science, Database Systems for New Applications.

SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


//paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}]
//paragraph [10] Footnote: [{\footnote{] [}}]
//[TOC] [\tableofcontents]
//[_] [\_]
//[x] [\ensuremath{\times}]
//[->] [\ensuremath{\rightarrow}]
//[>] [\ensuremath{>}]
//[<] [\ensuremath{<}]

\newpage

[1] Implementation of Module Groupby Algebra

April 2012. Dieter Capek

[TOC]

\newpage


1 Includes and Defines

*/


#include <vector>
#include <stack>
#include <limits.h>

#undef TRACE_ON
#include "LogMsg.h"
#define TRACE_OFF

#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "QueryProcessor.h"
#include "AlgebraManager.h"
#include "CPUTimeMeasurer.h"
#include "StandardTypes.h"
#include "Counter.h"
#include "Algebras/TupleIdentifier/TupleIdentifier.h"
#include "Progress.h"
#include "RTuple.h"
#include "Symbols.h"
#include "ListUtils.h"
#include "DateTime.h"
#include "Stream.h"
#include "Algebras/FText/FTextAlgebra.h"
#include "SecondoCatalog.h"

#define NUMBUCKETS 99997
#define Normal_Merge 0
#define Symmetric_Merge 1


extern NestedList* nl;
extern QueryProcessor* qp;
extern AlgebraManager* am;

using namespace listutils;
using namespace std;


/*
2 Operator groupby2

Groups an input tuple stream using hashing.
The operator supports aggregation with or witout grouping.
A list of aggregation functions is supported.
The operator supports normal and symmetric merging of attributes.
The operator respects main memory limits and partitions the problem into phases.
The operator does support Secondo progess estimation.

2.1 Specification

Operator description for the Secondo user.

*/

const string groupby2Spec =
  "( ( \"Signature\" \"Syntax\" \"Meaning\"  ) "
  "( <text>stream(Tuple) x AttrList x "
  "(NewAttr-1 x (Tuple x Data -> Data) x Data) .. "
  "(NewAttr-j x (Data x Data -> Data) x (Tuple -> Data) -> "
  "stream(Tuple(Attrlist) o Tuple([NewAttr-1: Data]..[NewAttr-j: Data])"
  "</text--->"

  "<text>_ groupby2 [list; funlist]</text--->"

  "<text>Groups a tuple stream according to the attributes "
  "in AttrList and computes aggregate functions for each group. "
  "The aggregation attributes are appended to the grouping attributes."
  "</text--->"
  ") )";


struct AggrStackEntry {
  int level;
  Attribute* value;
};


/*
2.2 Type Mapping Function

Checks if the supplied data types are correct for the operator.
Returs the data types which result from the operator run.
Using the Secondo Append mechanism the number of groupting attributes, their
positions, the number of aggregate functions and the types of merging required
are forwarded to the value mapping function.

*/


// Type Mapping Function for groupby2 =========================================
ListExpr groupby2TypeMap(ListExpr args)
{
  int j;
  ListExpr first, second, third;           // analysing input
  ListExpr listn,                          // names and data types
    lastlistn,
    listp,                                 // positions of grouping attributes
    lastlistp;
  ListExpr attrtype, result, t, t1, t2, t3, t4;
  ListExpr rest, newAttr, mapDef, firstInit, mapOut;
  ListExpr merge, lastmerge;                    // indicates merge type
  string attrname, resstring;
  string tupleSymbolStr = Tuple::BasicType();
  string err =
    "stream(tuple(X)) x (g1..gn) x (tuple(X)xtxt -> t), t in DATA expected";
  bool firstcall;


  first = second = third = merge = lastmerge = nl->TheEmptyList();
  listn = lastlistn = listp = nl->TheEmptyList();
  // Number of input parameters must be three
  if(! nl->HasLength(args,3))
    return listutils::typeError("Need to specify three parameters.");
  // Get the three arguments
  first  = nl->First(args);         // input stream
  second = nl->Second(args);        // list of grouping attributes
  third  = nl->Third(args);         // aggregation functions

  // Missing values are only allowed for grouping attributes
  if ( nl->IsEmpty(first) || nl->IsEmpty(third))
    return listutils::typeError("Mandatory argument is missing.");
  // First argument must be of type stream
  if(!Stream<Tuple>::checkType(first))
    return listutils::typeError("First argument must be of type stream.");
  // Each grouping attribute must be part of the input stream
  rest = second;
  lastlistp = nl->TheEmptyList();
  firstcall = true;

  if(nl->IsAtom(rest)){
    return listutils::typeError("AttributeList is an atom");
  }

  while (!nl->IsEmpty(rest))
  {
    attrtype = nl->TheEmptyList();
    t = nl->First(rest);
    if(nl->AtomType(t)!=SymbolType)
      return listutils::typeError("Wrong format for an attribute name");
    attrname = nl->SymbolValue(t);

    // Get position of attribute within tuple
    j = FindAttribute(nl->Second(nl->Second(first)), attrname, attrtype);
    if (j) {
      if (!firstcall) {
        lastlistn = nl->Append(lastlistn,nl->TwoElemList(t,attrtype));
        lastlistp = nl->Append(lastlistp,nl->IntAtom(j));
      } else {
        firstcall = false;
        listn = nl->OneElemList(nl->TwoElemList(t,attrtype));
        lastlistn = listn;
        listp = nl->OneElemList(nl->IntAtom(j));
        lastlistp = listp;
      }
    } else {
      // Grouping attribute not in input stream
      string errMsg = "groupby2: Attribute " + attrname +
        " not present in input stream";
      return listutils::typeError(errMsg);
    }
    rest = nl->Rest(rest);
  } // end while; checking grouping attributes

  // Must specify at least one aggregation function
  if(nl->ListLength(third) < 1)
    return listutils::typeError("Must specify one aggregation function.");

  rest = third;         // List of functions
  // Checking of aggregate functions
  while (!(nl->IsEmpty(rest)))
  {
    // Iterate over function list and initial values
    t = nl->First(rest);  // functions
    rest = nl->Rest(rest);
    // Format must be Name:Funktion::Initial Value
    if(nl->ListLength(t) != 3)
      return listutils::typeError("Each function must have three elements.");
    newAttr  = nl->First(t);       // function name
    mapDef   = nl->Second(t);      // aggregate function
    firstInit = nl->Third(t);      // function definition or inital value
    // Checking attribute name
    if ( !(nl->IsAtom(newAttr)) || !(nl->AtomType(newAttr) == SymbolType) )
      return listutils::typeError("Attribut name for function is not valid.");
    // Checking aggregate function
    if(!listutils::isMap<2>(mapDef))
      return listutils::typeError("Aggregation function is not valid.");
    mapOut = nl->Third(mapDef);         // type of second argument

    // assume normal or symmetric merging based on the firstInit parameter
    if(listutils::isDATA(firstInit)) {
      // check the syntax for normal merging:  name:function:initial value
      // Tuple must be first function argument
      t = nl->Second(first);
      if(!nl->Equal(t, nl->Second(mapDef)))
      return listutils::typeError("Map argument 1 must be tuple from stream.");

      // Second function argument and initial value must be from same type
      if(! nl->Equal(firstInit, nl->Third(mapDef)))
        return listutils::typeError(
        "Map argument 2 and start value must have same type.");
      // Function result and initial value must be from same type
      if(! nl->Equal(firstInit, nl->Fourth(mapDef)))
      return listutils::typeError(
        "Map result and start value must have same type.");
      // indicate normal merging
      if (nl->IsEmpty(merge)) {
        merge = nl->OneElemList(nl->IntAtom(Normal_Merge));
        lastmerge = merge;
      } else
        lastmerge = nl->Append(lastmerge, nl->IntAtom(Normal_Merge));
    } else {
      // check the syntax for symmetric merging:  name:function1:function2
      // Checking function2
      if(!listutils::isMap<1>(firstInit))
        return listutils::typeError("Function2 must have one argument.");
      // Tuple must be first function2 argument
      t = nl->Second(first);
      if(!nl->Equal(t, nl->Second(firstInit)))
      return listutils::typeError("Function2 argument must be stream tuple.");
      t1 = nl->Second(mapDef);    // Function1 first argument
      t2 = nl->Third(mapDef);     // Function1 second argument
      t3 = nl->Fourth(mapDef);    // Function1 result
      t4 = nl->Third(firstInit);  // Function2 result

      if ( !listutils::isDATA(t1) || !listutils::isDATA(t2) ||
           !listutils::isDATA(t3) || !listutils::isDATA(t4) )
      return listutils::typeError(
      "Function1 arguments and both functions results must be of kind DATA.");
      if ( !nl->Equal(t1,t2) || !nl->Equal(t1,t3) || !nl->Equal(t1,t4) )
      return listutils::typeError(
      "Function1 arguments and both functions results must be of same type.");
      // indicate symmetric merging
      if (nl->IsEmpty(merge)) {
        merge = nl->OneElemList(nl->IntAtom(Symmetric_Merge));
        lastmerge = merge;
      } else
        lastmerge = nl->Append(lastmerge, nl->IntAtom(Symmetric_Merge));
    } // end-if: check a single aggregate function

    // add function name and result type to list
    if (    (nl->EndOfList( lastlistn ) == true)
         && (nl->IsEmpty( lastlistn ) == false)
         && (nl->IsAtom( lastlistn ) == false)
       )
    { // List already contains group-attributes (not empty)
      lastlistn = nl->Append(lastlistn,(nl->TwoElemList(newAttr,mapOut)));
    } else {
      // No group attribute (list is still empty)
      listn = nl->OneElemList(nl->TwoElemList(newAttr,mapOut));
      lastlistn = listn;
    }
  } // end while for aggregate functions

  // sample: (2 1 2 4 0 1 0 0)
  // # of gourp attributes, followed by their positions
  // # of aggregation functions, followed by merging type
  t1 = nl->OneElemList( nl->IntAtom(nl->ListLength(listp)) );
  t2 = nl->OneElemList( nl->IntAtom(nl->ListLength(merge)) );
  t3 = concat( t1, listp );
  t4 = concat( t2, merge );
  t1 = concat( t3, t4 );

  // Check if the name for the aggregate is used already
  if ( !CompareNames(listn) )
    return listutils::typeError("Attribute names are not unique.");
  // Type mapping is correct, return result type.
  result =
    nl->ThreeElemList(
      nl->SymbolAtom(Symbol::APPEND()),     // text APPEND
      t1,                                   // List of gouping and merging info
      nl->TwoElemList(
        nl->SymbolAtom(Symbol::STREAM()),   // text STREAM
        nl->TwoElemList( nl->SymbolAtom(tupleSymbolStr), listn))
    );
  return result;
}  // end of type mapping for groupby2 operator


/*
2.3 C++ Class groupby2LocalInfo

This class keeps all local operator information between the individuall
calls from the query processor. The class contains all methods required to
initialize groups, aggregate tuples into groups and estimate operator cost.

2.3.1 Local Data Elements

Local data contains all aggregation results, information on tuple buffers and
tuple scans, available and used memory and progress information.

*/


// groupby2LocalInfo class ====================================================

class groupby2LocalInfo: public ProgressLocalInfo
{
public:
  Tuple *t;
  TupleType *resultTupleType;

  int numberatt;             // number of grouping attributes
  int noOffun;               // number of aggregate functions to build
  int *MergeType;            // indicates normal or symmetric merging

  // Actual buffers are created during OPEN
  TupleBuffer *TB_In, *TB_Out, *TB_Temp;   // to save unprocessed input tuples
  TupleBuffer *TB_Group;    // to save group tuples in memory out situations
  GenericRelationIterator* In_Rit;
  bool newGroupsAllowed;

  long MAX_MEMORY, Used_Memory;   // max. memory and actually used memory
  bool FirstREQUEST;   // indicates the start of a phase
  unsigned int ReturnBucket, ReturnElem,
    No_RetTuples,      // number of result tuples returned
    No_GTuples,        // number of group tuples in memory
    Phase,             // phase the operator currently runs in
    SumGTuples,        // sum of group tuples returned
    SumITuples,        // sum of input tuples processed
    SumDiskData,       // data volume written to secondary storage
    tup_aggr,          // number of input tuples aggregated into groups
    tup_n,             // number of input tuples (avalailable from phase 2)
    read_this_phase;   // tuples read from TB_In this phase (from phase 2)

  // progress information
  unsigned int stableValue;
  bool sizesFinal;
  double *attrSizeTmp;
  double *attrSizeExtTmp;

  vector<Tuple*> hBucket[NUMBUCKETS];   // data structure for hash buckets

  groupby2LocalInfo() : t(NULL), resultTupleType(NULL),
    numberatt(0), noOffun(0), MergeType(NULL),
    TB_In(NULL), TB_Out(NULL), TB_Temp(NULL), TB_Group(NULL),
    In_Rit (NULL), newGroupsAllowed(true),
    MAX_MEMORY(0), Used_Memory(0), FirstREQUEST(true), ReturnBucket(0),
    No_RetTuples(0), No_GTuples(0), Phase(0),
    SumGTuples(0), SumITuples(0), SumDiskData(0), tup_aggr(0), tup_n(0),
    read_this_phase(0),
    stableValue(50),sizesFinal(false),
    attrSizeTmp(0), attrSizeExtTmp(0)
  {}


/*
2.3.2 Function InitTuple

Get first function values from tuple and initial values.
tres is the group tuple, s the tuple from the input stream.

*/


  void InitTuple (Tuple* tres, Tuple* s, Supplier addr)
  {
    int i;
    Supplier supp1, supp2, supp3, supp4;
    ArgVectorPointer funargs;
    stack<AggrStackEntry> *newstack;
    Attribute *sattr, *tattr;
    Word funres;
    AggrStackEntry FirstEntry;

    // get first function values from tuple and initial values
    for (i=0; i < noOffun; i++) {
      if (MergeType[i] == Normal_Merge) {
        supp1 = (Supplier) addr;    // list of functions
        supp2 = qp->GetSupplier( supp1, i);
        supp3 = qp->GetSupplier( supp2, 1);
        funargs = qp->Argument(supp3);      // get argument vector
        supp4 = qp->GetSupplier( supp2, 2); // supp4 is initial value
        qp->Request( supp4, funres);        // function evaluation
        tattr = ((Attribute*)funres.addr)->Clone();

        (*funargs)[0].setAddr(s);
        (*funargs)[1].setAddr(tattr);
        qp->Request( supp3, funres);        // function evaluation
        sattr = ((Attribute*)funres.addr)->Clone();
        // after the group attributes
        tres->PutAttribute( numberatt + i, sattr);
        Used_Memory +=
          sattr->Sizeof() + sattr->getUncontrolledFlobSize();
        tattr->DeleteIfAllowed();
      } else {
        // symmetric merge
        // evaluate the tuple->data function
        supp1 = (Supplier) addr;  // funlist: list of functions
        supp2 = qp->GetSupplier( supp1, i);
        supp3 = qp->GetSupplier( supp2, 2);  // second function
        funargs = qp->Argument(supp3);
        (*funargs)[0].setAddr(s);           // tuple is only argument
        qp->Request( supp3, funres);        // function evaluation
        sattr = ((Attribute*)funres.addr)->Clone();

        // build a stack element from the attribute, push this
        FirstEntry.level = 0;
        FirstEntry.value = sattr;
        newstack = new stack<AggrStackEntry> ();
        newstack->push(FirstEntry);
        // Link the stack into the tuple after the group attributes
        tres->PutAttribute(numberatt + i, (Attribute*) newstack);

        Used_Memory += sizeof( *newstack)
          + sizeof(AggrStackEntry)
          + sattr->Sizeof() + sattr->getUncontrolledFlobSize();
      } // end-if per attribute
    } // end-for
  } // end of InitTuple


/*
2.3.3 Function AggregateTuple

Aggregate input tuple s into group tuple tres.
tres is the group tuple, s the tuple from the input stream.

*/

  void AggregateTuple (Tuple* tres, Tuple* s, Supplier addr)
  {
    int i, StackLevel;
    Supplier supp1, supp2, supp3, supp4;
    ArgVectorPointer funargs;
    stack<AggrStackEntry> *newstack;
    Attribute *sattr, *tattr;
    Word funres;
    AggrStackEntry FirstEntry;

    // get function values n+1 from new tuple and function value n
    for (i=0; i < noOffun; i++) {
      supp1 = (Supplier) addr;    // list of functions
      supp2 = qp->GetSupplier( supp1, i);
      supp3 = qp->GetSupplier( supp2, 1);
      funargs = qp->Argument(supp3);      // get argument vector

      if (MergeType[i] == Normal_Merge) {
        // normal merge
        sattr = tres->GetAttribute( numberatt+i);
        (*funargs)[0].setAddr(s);
        (*funargs)[1].setAddr(sattr);
        qp->Request( supp3, funres);
        tattr = ((Attribute*)funres.addr)->Clone();

        // subtract old attribute size
        Used_Memory -=
          sattr->Sizeof() + sattr->getUncontrolledFlobSize();
        // add new attribute size, it can change during merge
        Used_Memory +=
          tattr->Sizeof() + tattr->getUncontrolledFlobSize();
        // PutAttribute does an implice DeleteIfAllowed on the old attr
        tres->PutAttribute( numberatt+i, tattr);

      } else {
        // symmetric merge, evaluate tuple->data, merge stack
        StackLevel = 0;
        // pointer to the stack
        newstack = (stack<AggrStackEntry> *)
          tres->GetAttribute(numberatt+i);

        // evaluate tuple->data
        supp4 = qp->GetSupplier( supp2, 2); // second function
        funargs = qp->Argument(supp4);
        (*funargs)[0].setAddr(s);           // tuple is first argument
        qp->Request( supp4, funres);        // function evaluation
        sattr = ((Attribute*)funres.addr)->Clone();

        // merge stack if possible, else push element
        while (!newstack->empty() &&
               (StackLevel==newstack->top().level)) {
          // merging is possible
          funargs = qp->Argument(supp3);
          (*funargs)[0].setAddr(sattr);
          tattr = newstack->top().value;
          (*funargs)[1].setAddr(tattr);  // attr from top stack entry
          qp->Request( supp3, funres);   // call parameter function

          sattr->DeleteIfAllowed();
          sattr = ((Attribute*)funres.addr)->Clone();

          // delete top element
          Used_Memory -= sizeof(AggrStackEntry)
            + tattr->Sizeof() + tattr->getUncontrolledFlobSize();
          tattr->DeleteIfAllowed();
          newstack->pop();

          StackLevel++;
        } //end-while

        // write a stack element
        FirstEntry.level = StackLevel;
        FirstEntry.value = sattr;
        newstack->push(FirstEntry);

        Used_Memory += sizeof(AggrStackEntry)
          + sattr->Sizeof() + sattr->getUncontrolledFlobSize();
      }  // end-if process an attribute
    } // end-for
  } // end of AggregateTuple


/*
2.3.4 Function ShrinkStack

Shrink the stacks for symmetric merging to a regular tuple.

*/

  void ShrinkStack (Tuple* t, Supplier addr)
  {
    int i;
    Supplier supp1, supp2, supp3;
    ArgVectorPointer funargs;
    stack<AggrStackEntry> *newstack;
    Attribute *sattr, *tattr;
    Word funres;

    for (i=0; i < noOffun; i++) {
      if (MergeType[i] == Symmetric_Merge) {
        // get function arguments from qp
        supp1 = (Supplier) addr;    // list of functions
        supp2 = qp->GetSupplier( supp1, i);
        supp3 = qp->GetSupplier( supp2, 1);
        funargs = qp->Argument(supp3);      // get argument vector

        // collapse the stack to an attribute value, delete the stack
        newstack = (stack<AggrStackEntry> *) t->GetAttribute(numberatt+i);
        // assert(!newstack->empty());   // at least one element required

        tattr = newstack->top().value;
        Used_Memory -= sizeof(AggrStackEntry);
        newstack->pop();

        while (!newstack->empty()) {
          (*funargs)[0].setAddr(tattr);
          sattr = newstack->top().value;
          (*funargs)[1].setAddr(sattr);
          qp->Request( supp3, funres);   // call parameter function

          Used_Memory -= sizeof(AggrStackEntry)
            + sattr->Sizeof() + sattr->getUncontrolledFlobSize()
            + tattr->Sizeof() + tattr->getUncontrolledFlobSize();
          sattr->DeleteIfAllowed();
          tattr->DeleteIfAllowed();

          tattr = ((Attribute*)funres.addr)->Clone();
          Used_Memory += tattr->Sizeof() + tattr->getUncontrolledFlobSize();
          newstack->pop();
        } // end-while

        // write the end result to the tuple
        t->PutAttribute(numberatt+i, tattr);
        Used_Memory -= sizeof(*newstack);
        delete newstack;
      } // end-if
    } // end-for
  } // end ShrinkStack

/*
2.3.5 Function RestoreGroup

Restore a group tuple to memory.

*/

  void RestoreGroup (Tuple* t)
  {
    size_t hash1, hash2;
    int i, k;
    AggrStackEntry FirstEntry;
    stack <AggrStackEntry> *newstack;

    Used_Memory += t->GetExtSize();
    // get the hash code
    hash1 = 0;
    for (k = 0; k < numberatt; k++) hash1 += t->HashValue(k);
    hash2 = hash1 % NUMBUCKETS;
    //if (hash2 < 0) hash2 = -1 * hash2; // has2 is a size_t

    // create one element stacks for symmetric merge
    for (i=0; i < noOffun; i++) {
      if (MergeType[i] == Symmetric_Merge) {
        // build a stack element from the attribute, push this
        FirstEntry.level = 0;
        FirstEntry.value = t->GetAttribute(numberatt + i);
        // increment reference counter to survice putattribute
        FirstEntry.value->Copy();
        newstack = new stack<AggrStackEntry> ();
        newstack->push(FirstEntry);
        Used_Memory += sizeof( *newstack) + sizeof(AggrStackEntry);
        // Link the stack into the tuple after the group attributes
        t->PutAttribute(numberatt + i, (Attribute*) newstack);
      } // end-if per attribute
    }
    // insert group tuple into vector
    hBucket[hash2].push_back(t);
  } // end RestoreGroup


  ~groupby2LocalInfo() {
     if(resultTupleType) {
       resultTupleType->DeleteIfAllowed();
       resultTupleType = 0;
     }

     delete TB_Group;
     delete TB_In;
     delete TB_Out;

     if(MergeType) {
       delete[] MergeType;
       MergeType = 0;
     }
     if(attrSizeTmp) {
       delete[] attrSizeTmp;
       attrSizeTmp = 0;
     }
     if(attrSizeExtTmp) {
       delete[] attrSizeExtTmp;
       attrSizeExtTmp = 0;
     }
  } // end destructor


/*
2.3.6 Function ReportStatus

Test function: report status information on processing in phases.

*/

  void ReportStatus (string text)
  {
    cout << endl << text << endl;
    cout << "Phase:             " << Phase << endl;
    cout << "Used_Memory:       " << Used_Memory << endl;
    cout << "#groups returned. No_RetTuples = " << No_RetTuples << endl;
    cout << "#groups created. No_GTuples    = " << No_GTuples << endl;
    cout << "#groups in TB_Group:    " << TB_Group->GetNoTuples() << endl;
    cout << "#raw tuples in TB_Out:  " << TB_Out->GetNoTuples()  << endl;
    cout << "GetTotalSize() of TB_Out:  " << TB_Out->GetTotalSize()  << endl;
    cout << "Sum GTuples returned:  " << SumGTuples  << endl;
    cout << "Sum ITuples processed: " << read  << endl;
    cout << "Sum Data sec.storage:  " << SumDiskData  << endl;
  } // end of ReportStatus


/*
2.3.7 Function M3Cost

Calculate cost model M3 in milliseconds.
n=no input tuples, g=no groups, tpct=fraction of input tupes merged already
tpct=0: cost of complete model; tpct in 0-1: cost of remaining problem

*/

  float M3Cost (float n, float g, float tpct)
  {
    const double model_ct2 = 8.35E-08;
    const double model_cg  = 3.35E-07;
    const double model_cf  = 1.46E-06;
    const double model_cio = 6.45E-07;
    double model_n;
    double model_g;
    double model_np;
    double model_gp;
    double model_P;
    double groupby2_cost;
    float tuple_size;
    float completed;

    model_n = n;
    model_g = g;

    model_gp = (Phase == 1) ?
      (float)MAX_MEMORY / (float)Used_Memory * No_GTuples
      : (float)SumGTuples/(float)(Phase-1);
    model_P = ceil(model_g / model_gp);
    model_np = model_n / model_g * model_gp;

    // this is the model M3 cost formula; cost in milliseconds
    // tuple processing cost
    groupby2_cost = (1.0-tpct) * max(0.0, (model_P*(model_n-
      (model_P-1)/2*model_np)-model_g)*ceil(model_gp/(2*NUMBUCKETS))*model_ct2);

    // group building cost
    groupby2_cost += (1.0-tpct) *
                     model_g * ceil(model_gp/(2*NUMBUCKETS)) * model_cg;
    // function evaluation cost
    groupby2_cost += (1.0-tpct) * model_n * noOffun * model_cf;

    // disk storage cost
    if (Phase > 1 && TB_In->GetNoTuples() > 0)
      tuple_size = TB_In->GetTotalSize() / TB_In->GetNoTuples();
    else if (Phase == 1 && TB_Out->GetNoTuples() > 0)
      tuple_size = TB_Out->GetTotalSize() / TB_Out->GetNoTuples();
    else
      tuple_size = 100; // just to assume something

    // calculate the fraction of spooled input tuples
    if (tpct < 0.05)
      completed = 0;
    else if (Phase ==1)
      completed = TB_Out->GetNoTuples()/
                  (2.0*(model_P-1)*(model_n-model_P/2.0*model_np));
    else
      completed =
        (SumITuples - (TB_In->GetNoTuples()-read_this_phase)/2
         + TB_Out->GetNoTuples()/2)
        / ((model_P-1)*(model_n-model_P/2.0*model_np));

    groupby2_cost += max(0.0, (1.0-completed) * tuple_size * model_cio *
      (model_P-1)*(model_n-model_P/2.0*model_np));

    groupby2_cost *= 1000;
    return (groupby2_cost);
  } // end of M3Cost

}; // end of class groupby2LocalInfo


/*
2.4 Value Mapping Function

The argument vector contains the following values:                            \\
args[0] =     input stream of tuples                                          \\
args[1] =     list of grouping attributes                                     \\
args[2] =     list of functions (with elements name, function, initial value) \\
args[3] =     number of grouping attributes (added by APPEND)                 \\
args[4..m] =  position of grouping attributes (added by APPEND)               \\
args[m] =     number of aggregate functions (added by APPEND)                 \\
args[m+1..] = type of merging for each aggregate function                     \\

Sample with three grouping attributes and two aggregate functions:            \\
     APPEND (3 1 2 3 2 1 0) is created during type mapping.

The result is:    \\
     arg[3] = 3    Number of grouping attributes                              \\
     arg[4] = 1    Index of first grouping attribute within tuple             \\
     arg[5] = 2    Index of second grouping attribute within tuple            \\
     arg[6] = 3    Index of third grouping attribute within tuple             \\
     arg[7] = 2    Number of aggregate functions                              \\
     arg[8] = 1    Symmetric merging required for function 1                  \\
     arg[9] = 0    Normal merging required for function 0


*/

int groupby2ValueMapping (Word* args, Word& result, int message, Word& local,
                          Supplier supplier)
{
  Word sWord(Address(0));
  groupby2LocalInfo *gbli = (groupby2LocalInfo *) local.addr;
  ListExpr resultType;
  Tuple *current = NULL, *s = NULL, *tres = NULL;
  int attribIdx = 0, vectorpos;
  int PosCountArgument = 3; // position of numberatt info
  // start if positions for grouping attributes
  int PosExtraArguments = PosCountArgument + 1;
  int i, j, k;
  int TuplesInBucket = 0;
  size_t myhash1, myhash2;
  bool SameGroup, DuplicateGroup;
  Word funres;
  int funstart;      // position of aggr. function info
  // for progress estimation
  ProgressInfo p1;
  ProgressInfo *pRes;

  // constants for cost estimation
  const double model_cf  = 1.46E-06;
  double mod_cost, mod_cost_rest;
  double mod_progress;
  double mod_n;
  double mod_g;
  double tuple_consumed;


  switch(message)
  {
/*
2.4.1 OPEN message processing

*/
    case OPEN:
      qp->Open(args[0].addr);
      qp->Request(args[0].addr, sWord);

      // Allocate localinfo class and store first tuple
      if (qp->Received(args[0].addr)) {
        if (gbli) delete gbli;
        gbli = new groupby2LocalInfo();
        local.setAddr(gbli);

        gbli->read = 1;
        gbli->t = (Tuple*)sWord.addr;
        resultType = GetTupleResultType(supplier);
        gbli->resultTupleType = new TupleType(nl->Second(resultType));

        // no of grouping attributes
        gbli->numberatt =
          ((CcInt*)args[PosCountArgument].addr)->GetIntval();
        // get the APPEND info on merge type for aggregate funcions
        funstart = PosCountArgument + gbli->numberatt + 1;
        // no of attribute functions to build
        gbli->noOffun = ((CcInt*)args[funstart].addr)->GetIntval();
        gbli->noAttrs = gbli->numberatt + gbli->noOffun;

        // check how the aggregates need to be merged
        gbli->MergeType = new int [gbli->noOffun];
        for (i=0; i < gbli->noOffun; i++)
          gbli->MergeType[i] = ((CcInt*)args[funstart+i+1].addr)->GetIntval();

        // TupleBuffers without memory, all tuples go to disk
        gbli->Phase = 1;
        gbli->newGroupsAllowed = true;
        gbli->TB_Group = new TupleBuffer(0);
        gbli->TB_In = new TupleBuffer(0);
        gbli->TB_Out = new TupleBuffer(0);
        gbli->MAX_MEMORY = (qp->GetMemorySize(supplier) * 1024 * 1024);

        cmsg.info("ERA:ShowMemInfo") << "groupby2.MAX_MEMORY ("
                   << (gbli->MAX_MEMORY)/1024 << " KiloByte): " << endl;
        cmsg.send();

        // Adjust memory avaliable for LocalInfo (around 1.2 MB)
        gbli->MAX_MEMORY -= sizeof(*gbli);
        // need at least 1 MB memory to run
        assert(gbli->MAX_MEMORY > 1048576);

        // initialize data for progress estimation
        gbli->attrSizeTmp = new double[gbli->noAttrs];
        gbli->attrSizeExtTmp = new double[gbli->noAttrs];
        for (int i = 0; i < gbli->noAttrs; i++) {
          gbli->attrSizeTmp[i] = 0.0;
          gbli->attrSizeExtTmp[i] = 0.0;
        }
      } else {
        local.setAddr(0);         // no tuple received
      }
      return 0;

/*
2.4.2 REQUEST message processing

*/
    case REQUEST:
      if (!gbli) return CANCEL;                 // empty input stream
      if (gbli->t == 0) return CANCEL;          // stream has ended

      if (gbli->FirstREQUEST) {
        // Test
        // gbli->ReportStatus( "Start (FirstREQUEST) of a Phase." );

        // first REQUEST call: aggregate and return first result tuple
        s = gbli->t;        // first tuple is available from OPEN
        gbli->FirstREQUEST = false;
/*
2.4.3 First REQUEST message: Aggregation without grouping

*/
        if (gbli->numberatt == 0) {
          // there is a single result tuple with function aggregates only
          tres = new Tuple(gbli->resultTupleType);

          // process the first input tuple
          gbli->InitTuple( tres, s, (Supplier) args[2].addr);
          s->DeleteIfAllowed();

          qp->Request(args[0].addr, sWord);      // get next tuple
          s = (Tuple*) sWord.addr;

          // main loop to process input tuples
          while (qp->Received(args[0].addr)) {
            gbli->read++;

            // Get function value from tuple and current value
            gbli->AggregateTuple( tres, s, (Supplier) args[2].addr);
            s->DeleteIfAllowed();
            // get next tuple
            qp->Request(args[0].addr, sWord);
            s = (Tuple*) sWord.addr;
          }  // end-while: get tuples

          // end processing for symmetric merging
          gbli->ShrinkStack (tres, (Supplier) args[2].addr);
          // tres is the completed result tuple
          result.addr = tres;
          return YIELD;
        } // end-if: aggregation without grouping

/*
2.4.4 First REQUEST message: Aggregation with grouping

*/
        // process individual tuple s
        while (s) {
        // get the hash value from the grouping attributes
          myhash1 = 0;
          for (k = 0; k < gbli->numberatt; k++) {
            attribIdx =
              ((CcInt*)args[PosExtraArguments+k].addr)->GetIntval();
            j = attribIdx-1;                // 0 based
            myhash1 += s->HashValue(j);
          }
          myhash2 = myhash1 % NUMBUCKETS;
          // myhash2 can overflow and would be negative then
          // is'ts a size_t -> never smaller than zero
          //if (myhash2 < 0) myhash2 = -1 * myhash2;

          // check the hash bucket if the group is created already
          DuplicateGroup = false;
          TuplesInBucket = gbli->hBucket[myhash2].size();

          // Compare new tuple s to the tuples available in the bucket
          // s: group attributes Gi o non grouping attributes Ai
          // tuples from bucket: group attributes Gi o function values
          for (i=0; (i<TuplesInBucket) && !DuplicateGroup; i++) {
            current = gbli->hBucket[myhash2][i];
            if (!current) break;
            // Compare new tuple to a single one from the hash bucket
            SameGroup = true;
            // Compare the grouping attributes
            for (j=0; (j < gbli->numberatt) && SameGroup && current; j++) {
              attribIdx =
                ((CcInt*)args[PosExtraArguments+j].addr)->GetIntval();
              k = attribIdx - 1;        // 0 based
              // Compare each attribute
              // k is index for input tuple. j is index for aggregate tuple
              if (s->GetAttribute(k)->Compare(current->GetAttribute(j)) != 0){
                SameGroup = false;
                break;
              }
            } // end-for
            if (SameGroup) {
              DuplicateGroup = true;
              vectorpos = i;    // save the index in the vector for deleting
            }
          } // end for

          if ((DuplicateGroup == false) &&
              (gbli->Used_Memory >= gbli->MAX_MEMORY ||
               gbli->newGroupsAllowed == false)  ) {
            // new group required but no more heap space available or
            // no new groups allowed: write original input tuple to tuple buffer
            gbli->TB_Out->AppendTuple(s);

          } else if (DuplicateGroup == false) {  // new group ==================
            // Build group aggregate: grouping attributes o function values
            tres = new Tuple(gbli->resultTupleType);
            // add the memory used by this raw tuple (without attributes)
            gbli->Used_Memory += sizeof(*tres);
            gbli->No_GTuples++;
            gbli->tup_aggr++;

            // copy grouping attributes
            for(i = 0; i < gbli->numberatt; i++) {
              attribIdx =
                ((CcInt*)args[PosExtraArguments+i].addr)->GetIntval();
              tres->CopyAttribute(attribIdx-1, s, i);
            }

            // process first input tuple for this group
            gbli->InitTuple( tres, s, (Supplier) args[2].addr);

            // store aggregate tuple in hash bucket
            gbli->hBucket[myhash2].push_back(tres);
            s->DeleteIfAllowed();

          } else {  // group exists already ===================================
            gbli->AggregateTuple( current, s, args[2].addr);
            gbli->tup_aggr++;

            // memory exceeded: group tuple needs to be transferred to disk
            if (gbli->Used_Memory >= gbli->MAX_MEMORY) {
              gbli->newGroupsAllowed = false;
              gbli->ShrinkStack( current, args[2].addr);
              gbli->TB_Group->AppendTuple( current);
              gbli->Used_Memory -= current->GetExtSize();
              gbli->hBucket[myhash2].
                erase( gbli->hBucket[myhash2].begin()+vectorpos);
            }
            s->DeleteIfAllowed();
          }  // end-if

          // get next tuple
          if (gbli->Phase == 1) {
            // read from input stream
            qp->Request(args[0].addr, sWord);
            s = (Tuple*)sWord.addr;
            gbli->read++;
          } else {
            // read from tuple buffer
            s = gbli->In_Rit->GetNextTuple();
            gbli->read++;
            gbli->read_this_phase++;
          }
        } // end-while

        // all input processed; delete the tuple buffer scan
        if (gbli->In_Rit) delete gbli->In_Rit;

        // Aggregates are built completely. Find and return first result tuple.
        result.addr = 0;
        // Find the first hash bucket containing a result tuple
        for(i = 0; i<NUMBUCKETS; i++) {
          TuplesInBucket = gbli->hBucket[i].size();

          if (TuplesInBucket > 0) {
            gbli->ReturnElem = 1;        // next element (0 based)
            gbli->ReturnBucket = i+1;    // next hash bucket
            current = gbli->hBucket[i][0];
            // end processing for symmetric merging attributes
            gbli->ShrinkStack (current, (Supplier) args[2].addr);
            result.setAddr(current);
            gbli->No_RetTuples = 1;
            return YIELD;
          }
        } // end-for
        // empty result
        return CANCEL;
/*
2.4.5 Following REQUEST messages: return result tuples

*/
      } else {
        result.addr = 0;          // did not yet find a tuple

        // no grouping: there is a single result tuple,
        // this was returned already
        if (gbli->numberatt == 0) return CANCEL;

        // If a scan is active or there are still buckets to process
        if (gbli->ReturnElem || gbli->ReturnBucket < NUMBUCKETS) {
          // Find the next result tuple. Process an active scan
          if (gbli->ReturnElem) {
            // There are still tuples available in this bucket
            if (gbli->ReturnElem
                < gbli->hBucket[(gbli->ReturnBucket)-1].size() ) {
              current = gbli->hBucket[(gbli->ReturnBucket)-1][gbli->ReturnElem];
              (gbli->ReturnElem)++;
              result.setAddr(current);
            } else
              gbli->ReturnElem = 0;
          } // end-if

          // No tuples found so far, check next bucket
          if (result.addr == 0 && (gbli->ReturnBucket) < NUMBUCKETS) {
            for(i = gbli->ReturnBucket; i<NUMBUCKETS; i++) {
              TuplesInBucket = gbli->hBucket[i].size();

              if (TuplesInBucket > 0) {
                current = gbli->hBucket[i][0];      // first tuple in bucket
                result.setAddr(current);
                gbli->ReturnElem = 1;        // next tuple
                gbli->ReturnBucket = i+1;    // next bucket
                break;
              } else {
                gbli->ReturnElem = 0;
              }
            } // end-for
          }  // end-if
        } // end-if

        if (current)
          // found a group tuple to return: end processing
          gbli->ShrinkStack (current, (Supplier) args[2].addr);
        else if (gbli->TB_Out->GetNoTuples() == 0  &&
                 gbli->TB_Group->GetNoTuples() > 0)
        { // all groups from memory returned, all input tuples processed
          // completed group tuples stored in TB_Group to return
          // end of program run, no next phase
          if (gbli->Phase > 0) {
            gbli->Phase = 0;        // indicate end of program run
            gbli->In_Rit = gbli->TB_Group->MakeScan();
          }
          current = gbli->In_Rit->GetNextTuple();
          result.setAddr(current);
        }

        if (result.addr) {
          (gbli->No_RetTuples)++;

          // sum up attribute sizes for progress information
          if ( gbli->No_RetTuples <= gbli->stableValue ) {
            for (int i = 0; i < gbli->noAttrs; i++) {
              gbli->attrSizeTmp[i] += current->GetSize(i);
              gbli->attrSizeExtTmp[i] += current->GetExtSize(i);
            }
          }
/*
2.4.6 Phase change

*/
          // last group tuple from memory is returned,
          // output buffer contains unprocessed tuples
          if ((gbli->No_RetTuples + gbli->TB_Group->GetNoTuples()
               == gbli->No_GTuples)
              && (gbli->TB_Out->GetNoTuples() > 0))
          {
            // update progress data
            gbli->SumITuples += gbli->TB_Out->GetNoTuples();
            gbli->SumDiskData += gbli->TB_Out->GetTotalSize();
            gbli->SumGTuples += gbli->No_RetTuples;
            if (gbli->Phase==1) gbli->tup_n = gbli->read;

            // Test
            // gbli->ReportStatus( "End of a Phase." );

            // initialize counters for next phase
            gbli->FirstREQUEST = true;   // need to aggregate on next REQUEST
            gbli->No_RetTuples = 0;      // init for next phase
            gbli->No_GTuples = 0;
            gbli->read_this_phase=0;
            (gbli->Phase)++;

            // clear all vectors, the tuples are not touched
            for (i=0; i<NUMBUCKETS; i++) gbli->hBucket[i].clear();
            // the result tuples are now owned by the following operator
            gbli->Used_Memory = 0;

            // bring back to memory as many group tuples as possible
            if (gbli->TB_Group->GetNoTuples() > 0) {
              gbli->In_Rit = gbli->TB_Group->MakeScan();
              current = gbli->In_Rit->GetNextTuple();

              while (current && (gbli->Used_Memory < gbli->MAX_MEMORY)) {
                gbli->RestoreGroup( current);
                gbli->No_GTuples++;
                current = gbli->In_Rit->GetNextTuple();
              }
              gbli->TB_Temp = new TupleBuffer(0);
              if (current) {
                // write the remaining tuples to a new tuple buffer
                while (current) {
                  gbli->TB_Temp->AppendTuple(current);
                  current = gbli->In_Rit->GetNextTuple();
                }
              } else {
                // all groups could be restored new ones can be created
                gbli->newGroupsAllowed = true;
              }
              delete gbli->In_Rit;
              delete gbli->TB_Group;
              gbli->TB_Group = gbli->TB_Temp;
            } // end-if; bring back group tuples

            delete gbli->TB_In;   // input was processed completely
            // the output buffer of the current phase becomes the input for
            // the next phase
            gbli->TB_In = gbli->TB_Out;
            gbli->TB_Out = new TupleBuffer(0);

            // get the first input tuple
            gbli->In_Rit = gbli->TB_In->MakeScan();
            gbli->t = gbli->In_Rit->GetNextTuple();
          } // end of phase change

          return YIELD;
        } else {
            // update progress data
          gbli->SumITuples += gbli->TB_Out->GetNoTuples();
          gbli->SumDiskData += gbli->TB_Out->GetTotalSize();
          gbli->SumGTuples += gbli->No_RetTuples;

          // Test
          // gbli->ReportStatus( "End of a Phase." );
          return CANCEL;
        }
     } // end-if REQUEST processing

/*
2.4.7 CLOSE message processing

*/
    case CLOSE:
      qp->Close(args[0].addr);      // close input stream
      return 0;

/*
2.4.8 CLOSEPROGRESS message processing

*/
    case CLOSEPROGRESS:
      if (gbli) {
        delete gbli;
        local.setAddr(0);
      }
      return 0;

/*
2.4.9 REQUESTPROGRESS message processing

*/
    case REQUESTPROGRESS:
      pRes = (ProgressInfo*) result.addr;
      if (!gbli) return CANCEL;

      if (qp->RequestProgress(args[0].addr, &p1) ) {
        gbli->sizesChanged = false;

        if (!gbli->sizesInitialized) {
          gbli->attrSize = new double[gbli->noAttrs];
          gbli->attrSizeExt = new double[gbli->noAttrs];
        }

        // the third condition makes sure that the actual sizes are used
        if (!gbli->sizesInitialized || p1.sizesChanged ||
            (gbli->No_RetTuples > gbli->stableValue && !gbli->sizesFinal)) {

          if (gbli->No_RetTuples < gbli->stableValue) {
            // for grouping atts: copy predecessor info
            for (i=0; i < gbli->numberatt; i++) {
              attribIdx = ((CcInt*)args[PosExtraArguments+i].addr)->GetIntval();
              gbli->attrSize[i] = p1.attrSize[attribIdx-1];
              gbli->attrSizeExt[i] = p1.attrSizeExt[attribIdx-1];
            }
            // for aggregation results: assume integer
            for (i=0; i < gbli->noOffun; i++) {
              gbli->attrSize[i+gbli->numberatt] = 12;
              gbli->attrSizeExt[i+gbli->numberatt] = 12;
            }
          } else {
            // actual sizes from returned group tuples
            for (int i = 0; i < gbli->noAttrs; i++) {
              gbli->attrSize[i] = gbli->attrSizeTmp[i] / gbli->stableValue;
              gbli->attrSizeExt[i] = gbli->attrSizeExtTmp[i]/gbli->stableValue;
            }
            // this is run only once
            gbli->sizesFinal = true;
          }

          // summary sizes
          gbli->Size = 0.0;
          gbli->SizeExt = 0.0;
          for (int i = 0; i < gbli->noAttrs; i++) {
            gbli->Size += gbli->attrSize[i];
            gbli->SizeExt += gbli->attrSizeExt[i];
          }

          gbli->sizesInitialized = true;
          gbli->sizesChanged = true;
        }
        pRes->CopySizes(gbli);
        pRes->noAttrs = gbli->noAttrs;

        // write progress information
        if (gbli->numberatt == 0 ||
             (gbli->Phase == 1 && gbli->newGroupsAllowed &&
              gbli->No_GTuples < NUMBUCKETS)) {
          // aggregate without grouping, use model M1   OR
          // Phase 1, small number of groups only; cost in milliseconds
          mod_cost = p1.Card * gbli->noOffun * model_cf * 1000;

          pRes->Card = (gbli->numberatt == 0) ? 1 : gbli->No_GTuples;
          pRes->Time = p1.Time + mod_cost;
          pRes->Progress =
            (p1.Progress*p1.Time + gbli->read/p1.Card*mod_cost)/ pRes->Time;
          pRes->BTime = pRes->Time;
          pRes->BProgress = pRes->Progress;

        } else {
          mod_n = (gbli->Phase == 1) ? p1.Card : gbli->tup_n;
          // fraction of input tuples already aggregated
          tuple_consumed = (gbli->Phase == 1) ?
            (float)(gbli->read - gbli->TB_Out->GetNoTuples()) / mod_n
            : (float) gbli->tup_aggr / mod_n;
          mod_g = (gbli->No_GTuples + gbli->SumGTuples)/tuple_consumed;

          // cost for complete problem
          mod_cost = gbli->M3Cost( mod_n, mod_g, 0.0);
          // cost for remaining piece of problem
          mod_cost_rest = gbli->M3Cost( mod_n, mod_g, tuple_consumed);

          pRes->Card = mod_g;
          pRes->Time = p1.Time + mod_cost;
          mod_progress = (mod_cost - mod_cost_rest) / mod_cost;
          pRes->Progress =
            (p1.Progress*p1.Time + mod_progress*mod_cost) / pRes->Time;
          pRes->BTime = (gbli->Phase == 1) ? pRes->Time : 0;
          pRes->BProgress = (gbli->Phase == 1) ? pRes->Progress : 1;
        }
        return YIELD;
      } else {
        return CANCEL;
      }
  } // end message switch

  return(0);
} // Ende groupby2ValueMapping ================================================


/*
2.5 Operator definition

*/

Operator groupby2 (
         "groupby2",             // name
         groupby2Spec,           // specification
         groupby2ValueMapping,   // value mapping
         Operator::SimpleSelect, // trivial selection function
         groupby2TypeMap         // type mapping;
);


/*

3 Class GroupbyAlgebra

A new subclass GroupbyAlgebra of class Algebra is declared. The only
specialization with respect to class Algebra takes place within the
constructor: all type constructors and operators are registered at the
actual algebra.

*/


class GroupbyAlgebra : public Algebra
{
 public:
  GroupbyAlgebra() : Algebra()
  {
    AddOperator(&groupby2);
    groupby2.SetUsesMemory();
    groupby2.EnableProgress();
  };

  ~GroupbyAlgebra() {};
};

/*

4 Initialization

Each algebra module needs an initialization function. The algebra manager
has a reference to this function if this algebra is included in the list
of required algebras, thus forcing the linker to include this module.

The algebra manager invokes this function to get a reference to the instance
of the algebra class and to provide references to the global nested list
container (used to store constructor, type, operator and object information)
and to the query processor.

*/

extern "C" Algebra*
InitializeGroupbyAlgebra( NestedList* nlRef,
                          QueryProcessor* qpRef,
                          AlgebraManager* amRef )
{
  nl = nlRef;
  qp = qpRef;
  am = amRef;
  return (new GroupbyAlgebra());
}