/* ---- This file is part of SECONDO. Copyright (C) 2004, University in Hagen, Department of Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //paragraph [1] Title: [{\Large \bf \begin {center}] [\end {center}}] //[TOC] [\tableofcontents] //[_][\_] [1] Header File of the Graph Algebra December 2007, S.H[oe]cher,M.H[oe]ger,A.Belz,B.Poneleit [TOC] 1 Defines and includes */ #include "Histogram2d.h" #include "ListUtils.h" #include #include "Algebras/ExtRelation-C++/Tupleorder.h" #include "AlmostEqual.h" #include "Symbols.h" #include "Stream.h" using namespace std; namespace hgr { /* 2 Type constructor */ histogram2dInfo h2_i; histogram2dFunctions h2_f; TypeConstructor histogram2dTC( h2_i, h2_f ); /* 3 Constructors and destructor */ Histogram2d::Histogram2d() { } Histogram2d::Histogram2d(bool _defined, size_t sizeX, size_t sizeY) : BaseHistogram(_defined, sizeX * sizeY), rangeX(sizeX + 1), rangeY(sizeY + 1) { } Histogram2d::Histogram2d(const Histogram2d& rhs) : BaseHistogram(rhs), rangeX(rhs.rangeX.Size()), rangeY(rhs.rangeY.Size()) { for (int i = 0; i < rhs.GetNoRangeX(); i++) rangeX.Append( rhs.GetRangeX(i) ); for (int i = 0; i < rhs.GetNoRangeY(); i++) rangeY.Append( rhs.GetRangeY(i) ); } Histogram2d::~Histogram2d() { } Histogram2d& Histogram2d::operator = (const Histogram2d& h) { CopyFrom(&h); return *this; } /* 4 Helper functions 4.1 GetRangeX, GetRangeY */ HIST_REAL Histogram2d::GetRangeX(int i) const { //cout << "rangeX" << endl; assert( 0 <= i && i < GetNoRangeX() ); HIST_REAL p; rangeX.Get(i, &p); return p; } HIST_REAL Histogram2d::GetRangeY(int i) const { //cout << "rangeY" << endl; assert( 0 <= i && i < GetNoRangeY() ); HIST_REAL p; rangeY.Get(i, &p); return p; } /* 4.2 AppendRangeX, AppendRangeY */ void Histogram2d::AppendRangeX(const HIST_REAL& r) { rangeX.Append(r); } void Histogram2d::AppendRangeY(const HIST_REAL& r) { rangeY.Append(r); } /* 4.3 GetNoRangeX, GetNoRangeY */ int Histogram2d::GetNoRangeX() const { return rangeX.Size(); } int Histogram2d::GetNoRangeY() const { return rangeY.Size(); } /* 4.4 IsEmpty */ bool Histogram2d::IsEmpty() const { return (rangeX.Size() == 0) && (rangeY.Size() == 0) && (bin.Size() == 0); } /* 4.5 Clear, Destroy */ void Histogram2d::Clear() { rangeX.clean(); rangeY.clean(); bin.clean(); } void Histogram2d::Destroy() { rangeX.Destroy(); rangeY.Destroy(); bin.Destroy(); } /* 4.6 ResizeRangeX, ResizeRangeY */ void Histogram2d::ResizeRangeX(const int newSize) { if (newSize > 0) { rangeX.clean(); rangeX.resize(newSize); } } void Histogram2d::ResizeRangeY(const int newSize) { if (newSize > 0) { rangeY.clean(); rangeY.resize(newSize); } } /* 4.7 Compare */ int Histogram2d::Compare(const BaseHistogram& b) const { const Histogram2d* h = static_cast(&b); if ( !IsDefined() && !h->IsDefined() ) return 0; if ( !IsDefined() && h->IsDefined() ) return -1; if (IsDefined() && !h->IsDefined() ) return 1; // Both histograms are defined... if (!IsConsistent(false) || !h->IsConsistent(false)) return 1; // and consistent (without order test)... if (GetNoRangeX() != h->GetNoRangeX()) return 1; if (GetNoRangeY() != h->GetNoRangeY()) return 1; // and are of the same size ... if (IsEmpty() && h->IsEmpty()) return 0; // and are not empty. // Comparison of all couples of ranges and bins: bool equalBins = true; bool lessBins = true; int i = 0; while ((equalBins || lessBins) && i < GetNoBin()) { if (i < GetNoRangeX() && (GetRangeX(i) != h->GetRangeX(i))) return 1; if (i < GetNoRangeY() && (GetRangeY(i) != h->GetRangeY(i))) return 1; if (GetBin(i) == h->GetBin(i)) { lessBins = false; } else if (GetBin(i) < h->GetBin(i)) { equalBins = false; } else // GetBin(i) > h->GetBin(i) { equalBins = false; lessBins = false; } i++; } if (equalBins) return 0; if (lessBins) return -1; // Neither smaller nor equal: return 1; } /* 4.8 CompareAlmost */ int Histogram2d::CompareAlmost(const BaseHistogram& b) const { const Histogram2d* h = static_cast(&b); if ( !IsDefined() && !h->IsDefined() ) return 0; if ( !IsDefined() && h->IsDefined() ) return -1; if (IsDefined() && !h->IsDefined() ) return 1; // Both histograms are defined... if (!IsConsistent(false) || !h->IsConsistent(false)) return 1; // and consistent (without order test)... if (GetNoRangeX() != h->GetNoRangeX()) return 1; if (GetNoRangeY() != h->GetNoRangeY()) return 1; // and are of the same size... if (IsEmpty() && h->IsEmpty()) return 0; // and are not empty. // Comparison of all couples of ranges and bins: bool equalBins = true; bool lessBins = true; int i = 0; int cmp; while ((equalBins || lessBins) && i < GetNoBin()) { if (i < GetNoRangeX() && !AlmostEqual(GetRangeX(i), h->GetRangeX(i))) return 1; if (i < GetNoRangeY() && !AlmostEqual(GetRangeY(i), h->GetRangeY(i))) return 1; cmp = CmpReal(GetBin(i), h->GetBin(i)); if (cmp == 0) { lessBins = false; } else if (cmp == -1) { equalBins = false; } else // cmp == 1 { equalBins = false; lessBins = false; } i++; } if (equalBins) return 0; if (lessBins) return -1; // Neither smaller nor equal: return 1; } /* 4.9 IsRefinement */ bool Histogram2d::IsRefinement(const BaseHistogram& b) const { const Histogram2d* h = static_cast(&b); if ( !IsDefined() || !h->IsDefined() ) return false; // Both histograms are defined... if (!IsConsistent(false) || !h->IsConsistent(false)) return false; // and consistent (without order test)... if (IsEmpty() || h->IsEmpty()) return false; // and are not empty... if (!AlmostEqual( GetRangeX(GetNoRangeX()-1), h->GetRangeX(h->GetNoRangeX()-1))) return false; if (!AlmostEqual( GetRangeY(GetNoRangeY()-1), h->GetRangeY(h->GetNoRangeY()-1))) return false; // and their ends are equal. // Now we check each element of the given range array to be also an element // of our own array. If that is the case, our own histogram is a // refinement of the given one. int i = 0; // Index of the range array given as parameter. int j = 0; // Index of our own range array. while (i < h->GetNoRangeX()) { while (!AlmostEqual(h->GetRangeX(i), this->GetRangeX(j))) { if (++j == this->GetNoRangeX()) return false; } i++; } i = 0; j = 0; while (i < h->GetNoRangeY()) { while (!AlmostEqual(h->GetRangeY(i), this->GetRangeY(j))) { if (++j == this->GetNoRangeY()) return false; } i++; } return true; } /* 4.10 IsConsistent A defined histogram2d is consistent if it is either empty or all of the following statements are true: (1) GetNoRangeX() $>$ 1 and (2) GetNoRangeY() $>$ 1 and (3) GetNoBin() == (GetNoRangeX() - 1) * (GetNoRangeY() - 1) and (4) both range arrays are sorted in ascending order and (5) it contains no duplicate entries. */ bool Histogram2d::IsConsistent(const bool checkOrder) const { if (IsEmpty()) return true; if (!(GetNoRangeX() > 1 && GetNoRangeY() > 1 && GetNoBin() == (GetNoRangeX() - 1) * (GetNoRangeY() - 1))) return false; if (checkOrder) { HIST_REAL min = GetRangeX(0); for (int i = 1; i < GetNoRangeX(); i++) { if (CmpReal(GetRangeX(i), min) < 1) // GetRangeX(i) <= min return false; min = GetRangeX(i); } min = GetRangeY(0); for (int i = 1; i < GetNoRangeY(); i++) { if (CmpReal(GetRangeY(i), min) < 1) // GetRangeY(i) <= min return false; min = GetRangeY(i); } } return true; } /* 4.11 Coarsen */ void Histogram2d::Coarsen(const BaseHistogram* b) { const Histogram2d* pattern = static_cast(b); // Precondition: this->IsRefinement(pattern) == true DbArray* newBin = new DbArray(GetNoBin()); HIST_REAL binVal; // Upper range of the current bin in the original histogram int myRangeX = 1; int myRangeY = 1; // Lower range of the current bin in the example histogram int patternRangeX = 1; int patternRangeY = 1; // int maxY = myRangeY; int initY = myRangeY; int maxX = myRangeX; int initX = myRangeX; for (int i = 0; i < pattern->GetNoBin(); i++) { binVal = 0; // Traversing the bins by columns patternRangeY = i%(pattern->GetNoRangeY() - 1) + 1; patternRangeX = i/(pattern->GetNoRangeY() - 1) + 1; if (maxY == GetNoRangeY()) { maxY = 1; initX = maxX; } initY = maxY; myRangeX = initX; // Merging bins while (myRangeX < GetNoRangeX() && CmpReal( GetRangeX(myRangeX), pattern->GetRangeX(patternRangeX)) != 1) { myRangeY = initY; while ( myRangeY < GetNoRangeY() && CmpReal(GetRangeY(myRangeY), pattern->GetRangeY(patternRangeY)) != 1) { binVal += GetBin((myRangeX - 1)*(GetNoRangeY() - 1) + myRangeY - 1); myRangeY++; maxY = max(maxY, myRangeY); } myRangeX++; maxX = max(maxX, myRangeX); } newBin->Append(binVal); } // (int i = 0; i < pattern->GetNoBin(); i++) // Copying new bins and ranges HIST_REAL p; rangeX.clean(); rangeY.clean(); bin.clean(); for (int i = 0; i < pattern->GetNoRangeX(); i++) { rangeX.Append(pattern->GetRangeX(i)); } for (int i = 0; i < pattern->GetNoRangeY(); i++) { rangeY.Append(pattern->GetRangeY(i)); } for (int i = 0; i < newBin->Size(); i++) { newBin->Get(i, p); bin.Append(p); } delete newBin; } /* 5 Mandatory functions 5.1 Operators == and $<$ */ bool Histogram2d::operator == (const BaseHistogram& h) const { return CompareAlmost(h) == 0; } bool Histogram2d::operator < (const BaseHistogram& h) const { return CompareAlmost(h) == -1; } /* 5.2 In */ Word Histogram2d::In(const ListExpr typeInfo, const ListExpr instance, const int errorPos, ListExpr& errorInfo, bool& correct) { NList in(instance); if ( listutils::isSymbolUndefined(instance) || ((in.length()==1) && (listutils::isSymbolUndefined(nl->First(instance))))){ correct = true; return SetWord(new Histogram2d(false)); } Histogram2d* newHist = new Histogram2d(true); const ListExpr *concise = &instance; ListExpr rangeXList; ListExpr rangeYList; ListExpr binList; ListExpr restItems; HIST_REAL minimum = -1000 * FLT_MAX; HIST_REAL currentValue = 0.0; HIST_REAL lastValue = minimum; /* We expect the incoming data to be formatted as follows: * ((rangeX*)(rangeY*)(bin*)) * with rangeX a list of (n+1), rangeY a list of (m+1) class limits * each in ascending order and bin a list of (n * m) values. * To address the content of a certain bin we will indicate * first the rangeX value and second the rangeY value. * All values must be of type real. * * Example: * y=2 0.1 0.5 1.0 1.5 1.5 will be addressed as rangeX=3, rangeY=2. * y=1 1.1 2.2 6.6 7.7 4.4 will be addressed as rangeX=1, rangeY=0. * y=0 5.5 4.4 3.3 2.2 * x=0 x=1 x=2 x=3 */ if ( (nl->IsEmpty(*concise)) // incoming list is empty || (nl->ListLength(*concise) != 3) // or is too short or too long || (nl->IsAtom(nl->First(*concise))) //or does not contain lists || (nl->IsAtom(nl->Second(*concise))) || (nl->IsAtom(nl->Third(*concise))) //or number of bins is not (number of rangeX -1 )(number of rangeY - 1) || (nl->ListLength(nl->Third(*concise)) != (nl->ListLength(nl->First(*concise))-1) // * (nl->ListLength(nl->Second(*concise))-1)) ){ correct = false; newHist->DeleteIfAllowed(); return SetWord( Address(0) ); } // if ( (nl->IsEmpty(*concise)) // incoming list is empty rangeXList = nl->First(*concise); rangeYList = nl->Second(*concise); binList = nl->Third(*concise); // Examine the rangeXList restItems = rangeXList; while ( !nl->IsEmpty(restItems) ){ const ListExpr curr = nl->First(restItems); if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)){ currentValue = nl->RealValue(curr); restItems = nl->Rest(restItems); if (currentValue > lastValue){ newHist->AppendRangeX(currentValue); lastValue = currentValue; } else{// if (currentValue > lastValue) correct = false; newHist->DeleteIfAllowed(); return SetWord( Address(0) ); } // else // if (currentItem > lastItem) } else{// if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)) correct = false; newHist->DeleteIfAllowed(); return SetWord( Address(0) ); } // else // if(nl->IsAtom(nl->First(rangeList) && } // while ( !nl->IsEmpty(rest) ) currentValue = 0.0; lastValue = minimum; // Examine the rangeYList restItems = rangeYList; while ( !nl->IsEmpty(restItems) ){ const ListExpr curr = nl->First(restItems); if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)){ currentValue = nl->RealValue(curr); restItems = nl->Rest(restItems); if (currentValue > lastValue){ newHist->AppendRangeY(currentValue); lastValue = currentValue; } else {// if (currentValue > lastValue) correct = false; newHist->DeleteIfAllowed(); return SetWord( Address(0) ); } // else // if (currentValue > lastValue) } else { // if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)) correct = false; newHist->DeleteIfAllowed(); return SetWord( Address(0) ); } // else // if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)) } // while ( !nl->IsEmpty(rest) ) currentValue = 0.0; // Now examine the bin list restItems = binList; while ( !nl->IsEmpty(restItems) ){ const ListExpr curr = nl->First(restItems); if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)){ currentValue = nl->RealValue(curr); restItems = nl->Rest(restItems); newHist->AppendBin(currentValue); } // if(nl->IsAtom(curr) && (nl->AtomType(curr) == RealType)) else {// if(nl->IsAtom(nl->First(rangeList) && ... correct = false; newHist->DeleteIfAllowed(); return SetWord( Address(0) ); } // if(nl->IsAtom(nl->First(binList) && ... } // while ( !nl->IsEmpty(restItems) ) correct = true; //cout << *newHist << " hash = " << newHist->HashValue() << endl; return SetWord(newHist); } // Word Histogram2d::In(...) /* 5.3 Out */ ListExpr Histogram2d::Out(ListExpr typeInfo, Word value) { Histogram2d* hist = static_cast(value.addr); const int numberRangesX = hist->GetNoRangeX(); const int numberRangesY = hist->GetNoRangeY(); const int numberBins = hist->GetNoBin(); //cout << *hist << " hash = " << hist->HashValue() << endl; if (!hist->IsDefined()) { NList result = NList(Symbol::UNDEFINED()); return result.listExpr(); } else if(hist->IsEmpty()) { return (nl->TheEmptyList()); } else // else if( newHist->IsEmpty() ) { { ListExpr rangeXExpr = nl->Empty(); ListExpr rangeYExpr = nl->Empty(); ListExpr binExpr = nl->Empty(); // set up rangeXList ListExpr last = rangeXExpr; int i = 0; for (int i=0; i < numberRangesX; i++) { const ListExpr newElem = nl->RealAtom(hist->GetRangeX(i)); if (nl->IsEmpty(rangeXExpr)) { rangeXExpr = nl->OneElemList(newElem); last = rangeXExpr; } // if (nl->IsEmpty(rangeXExpr)) else last = nl->Append(last, newElem); } // for (i=0; i < numberRangesX; i++) // set up rangeYList last = rangeYExpr; for (i=0; i < numberRangesY; i++) { const ListExpr newElem = nl->RealAtom(hist->GetRangeY(i)); if (nl->IsEmpty(rangeYExpr)) { rangeYExpr = nl->OneElemList(newElem); last = rangeYExpr; } // if (nl->IsEmpty(rangeYExpr)) else last = nl->Append(last, newElem); } // for (i=0; i < numberRangesY; i++) // set up binList i = 0; last = binExpr; for (i=0; i < numberBins; i++) { const ListExpr newElem = nl->RealAtom(hist->GetBin(i)); if (nl->IsEmpty(binExpr)) { binExpr = nl->OneElemList(newElem); last = binExpr; } else last = nl->Append(last, newElem); } // for (i=0; i < numberBins; i++) return nl->ThreeElemList(rangeXExpr, rangeYExpr, binExpr); } // else { // if( newHist->IsEmpty() ) { // will (hopefully) never be reached: return (nl->TheEmptyList()); } // ListExpr Histogram2d::Out(ListExpr typeInfo, Word value) /* 5.4 Insert */ void Histogram2d::Insert(const int index, const HIST_REAL& val) { assert(index >= 0 && index < GetNoBin()); HIST_REAL oldVal; bin.Get(index, &oldVal); HIST_REAL newVal = val + oldVal; bin.Put(index, newVal); } /* Increases the content of the corresponding container with the value weight. Returns true if a correspondig container (bin) exists, false if not. */ bool Histogram2d::Insert(const HIST_REAL& x, const HIST_REAL& y, const HIST_REAL& weight) { CcInt index = FindBin(x, y); if (index.IsDefined()) { Insert(index.GetValue(), weight); return true; } else return false; } /* 5.5 FindBin, FindBinX, FindBinY */ const CcInt Histogram2d::FindBinX(const HIST_REAL& x) const { if (GetNoRangeX() < 2 || x < GetRangeX(0) || x >= GetRangeX(GetNoRangeX() - 1)) { return CcInt(false, 0); } int posX; rangeX.Find( &x, BaseHistogram::CmpBinSearch, posX); // bin-Koordinate ist Range-Koordinate - 1 return CcInt(true, posX-1); } const CcInt Histogram2d::FindBinY(const HIST_REAL& y) const { if (GetNoRangeY() < 2 || y < GetRangeY(0) || y >= GetRangeY(GetNoRangeY() - 1)) { return CcInt(false, 0); } int posY; rangeY.Find( &y, BaseHistogram::CmpBinSearch, posY); // bin-Koordinate ist Range-Koordinate - 1 return CcInt(true, posY-1); } CcInt Histogram2d::FindBin(const HIST_REAL& x, const HIST_REAL& y) const { // Vorbedingung: rangeX und rangeY sind aufsteigend sortiert. if (GetNoRangeX() < 2 || x < GetRangeX(0) || x >= GetRangeX(GetNoRangeX() - 1)) { return CcInt(false, 0); } if (GetNoRangeY() < 2 || y < GetRangeY(0) || y >= GetRangeY(GetNoRangeY() - 1)) { return CcInt(false, 0); } // Binaere Suche nach dem richtigen Behaelter: int posX, posY; rangeX.Find( &x, BaseHistogram::CmpBinSearch, posX); rangeY.Find( &y, BaseHistogram::CmpBinSearch, posY); int binArrayIndex = (posX - 1) * (GetNoRangeY() - 1) + (posY - 1); return CcInt(true, binArrayIndex); } /* 5.6 GetBinCoords Projection of the bin array index to 2d coordinates */ pair Histogram2d::GetBinCoords(const int index) const { if (IsEmpty() || index < 0 || index >= GetNoBin()) return pair(CcInt(false, -1), CcInt(false, -1)); int x = index / (GetNoRangeY() - 1); int y = index % (GetNoRangeY() - 1); return pair(CcInt(true, x), CcInt(true, y)); } /* 5.7 CopyRangesFrom */ void Histogram2d::CopyRangesFrom(const BaseHistogram* h) { Histogram2d* src = (Histogram2d*)h; rangeX.clean(); rangeY.clean(); for (int i = 0; i < src->GetNoRangeX(); i++) this->AppendRangeX(src->GetRangeX(i)); for (int i = 0; i < src->GetNoRangeY(); i++) this->AppendRangeY(src->GetRangeY(i)); } /* 5.8 NoBinsX, NoBinsY */ int Histogram2d::NoBinsX() const { return max(0, GetNoRangeX() - 1); } int Histogram2d::NoBinsY() const { return max(0, GetNoRangeY() - 1); } /* 5.9 GetCount */ CcReal Histogram2d::GetCount(const int x, const int y) const { if (!IsDefined() || IsEmpty() || !IsConsistent(false)) return CcReal(false, 0.0); if (x < 0 || x >= NoBinsX()) return CcReal(false, 0.0); if (y < 0 || y >= NoBinsY()) return CcReal(false, 0.0); return CcReal(true, GetBin(x * NoBinsY() + y)); } /* 6 Operators 6.1 MeanX, MeanY Implementation copied from gsl[_]histogram[_]stat2d.c of the GNU Scientific Library (gsl-1.9). Author: Achim Gaedke, Jan. 2002. */ CcReal Histogram2d::MeanX() const { if (!IsDefined() || IsEmpty() || !IsConsistent(false)) return CcReal(false, 0.0); const size_t nx = this->NoBinsX(); const size_t ny = this->NoBinsY(); size_t i; size_t j; /* Compute the bin-weighted arithmetic mean M of a histogram using the recurrence relation M(n) = M(n-1) + (x[n] - M(n-1)) (w(n)/(W(n-1) + w(n))) W(n) = W(n-1) + w(n) We skip negative values in the bins, since those correspond to negative weights (BJG). */ long double wmean = 0; long double W = 0; for (i = 0; i < nx; i++) { double xi = (GetRangeX(i + 1) + GetRangeX(i)) / 2.0; double wi = 0; for (j = 0; j < ny; j++) { double wij = GetBin(i * ny + j); if (wij > 0) wi += wij; } if (wi > 0) { W += wi; wmean += (xi - wmean) * (wi / W); } } return CcReal(true, wmean); } CcReal Histogram2d::MeanY() const { if (!IsDefined() || IsEmpty() || !IsConsistent(false)) return CcReal(false, 0.0); const size_t nx = this->NoBinsX(); const size_t ny = this->NoBinsY(); size_t i; size_t j; /* Compute the bin-weighted arithmetic mean M of a histogram using the recurrence relation M(n) = M(n-1) + (x[n] - M(n-1)) (w(n)/(W(n-1) + w(n))) W(n) = W(n-1) + w(n) */ long double wmean = 0; long double W = 0; for (j = 0; j < ny; j++) { double yj = (GetRangeY(j + 1) + GetRangeY(j)) / 2.0; double wj = 0; for (i = 0; i < nx; i++) { double wij = GetBin(i * ny + j); if (wij > 0) wj += wij; } if (wj > 0) { W += wj; wmean += (yj - wmean) * (wj / W); } } return CcReal(true, wmean); } /* 6.2 VarianceX, VarianceY Implementation copied from gsl[_]histogram[_]stat2d.c of the GNU Scientific Library (gsl-1.9). Author: Achim Gaedke, Jan. 2002. */ CcReal Histogram2d::VarianceX() const { if (!IsDefined() || IsEmpty() || !IsConsistent(false)) return CcReal(false, 0.0); const double xmean = MeanX().GetValue(); const size_t nx = this->NoBinsX(); const size_t ny = this->NoBinsY(); size_t i; size_t j; long double wvariance = 0; long double W = 0; for (i = 0; i < nx; i++) { double xi = (GetRangeX(i + 1) + GetRangeX(i)) / 2.0 - xmean; double wi = 0; for (j = 0; j < ny; j++) { double wij = GetBin(i * ny + j); if (wij > 0) wi += wij; } if (wi > 0) { W += wi; wvariance += ((xi * xi) - wvariance) * (wi / W); } } return CcReal(true, wvariance); } CcReal Histogram2d::VarianceY() const { if (!IsDefined() || IsEmpty() || !IsConsistent(false)) return CcReal(false, 0.0); const double ymean = MeanY().GetValue(); const size_t nx = this->NoBinsX(); const size_t ny = this->NoBinsY(); size_t i; size_t j; long double wvariance = 0; long double W = 0; for (j = 0; j < ny; j++) { double yj = (GetRangeY(j + 1) + GetRangeY(j)) / 2.0 - ymean; double wj = 0; for (i = 0; i < nx; i++) { double wij = GetBin(i * ny + j); if (wij > 0) wj += wij; } if (wj > 0) { W += wj; wvariance += ((yj * yj) - wvariance) * (wj / W); } } return CcReal(true, wvariance); } /* 6.3 Covariance Implementation copied from gsl[_]histogram[_]stat2d.c of the GNU Scientific Library (gsl-1.9). Author: Achim Gaedke, Jan. 2002. */ CcReal Histogram2d::Covariance() const { if (!IsDefined() || IsEmpty() || !IsConsistent(false)) return CcReal(false, 0.0); const double xmean = MeanX().GetValue(); const double ymean = MeanY().GetValue(); const size_t nx = this->NoBinsX(); const size_t ny = this->NoBinsY(); size_t i; size_t j; long double wcovariance = 0; long double W = 0; for (j = 0; j < ny; j++) { for (i = 0; i < nx; i++) { double xi = (GetRangeX(i + 1) + GetRangeX(i)) / 2.0 - xmean; double yj = (GetRangeY(j + 1) + GetRangeY(j)) / 2.0 - ymean; double wij = GetBin(i * ny + j); if (wij > 0) { W += wij; wcovariance += ((xi * yj) - wcovariance) * (wij / W); } } } return CcReal(true, wcovariance); } /* 6.4 Sizeof */ size_t Histogram2d::Sizeof() const { return sizeof(*this); } /* 6.5 Compare, CompareAlmost */ int Histogram2d::Compare(const Attribute *rhs) const { return Compare(*(Histogram2d*)rhs); } int Histogram2d::CompareAlmost(const Attribute *rhs) const { return CompareAlmost(*(Histogram2d*)rhs); } /* 6.6 Adjacent */ bool Histogram2d::Adjacent(const Attribute *attrib) const { return false; } /* 6.7 Clone */ Attribute* Histogram2d::Clone() const { return new Histogram2d(*this); } /* 6.8 HashValue Algorithm according to John Hamer. Hashing revisited. In ITiCSE2002 Seventh Conference on Innovation and Technology in Computer Science Education, pages 80-83, Aarhus, Denmark, June 2002. */ size_t Histogram2d::HashValue() const { if (!IsDefined()) return 0; size_t h = 0; for (int i = 0; i < GetNoRangeX(); i++) { h = hgr::Rotate(h) ^ hgr::HashValue(GetRangeX(i)); } for (int i = 0; i < GetNoRangeY(); i++) { h = hgr::Rotate(h) ^ hgr::HashValue(GetRangeY(i)); } for (int i = 0; i < GetNoBin(); i++) { h = hgr::Rotate(h) ^ hgr::HashValue(GetBin(i)); } return h; } /* 6.9 CopyFrom */ void Histogram2d::CopyFrom(const Attribute* right) { const Histogram2d* hist = static_cast(right); SetDefined(hist->IsDefined()); CopyRangesFrom(hist); CopyBinsFrom(hist); } /* 6.10 NumOfFLOBs */ int Histogram2d::NumOfFLOBs() const { return 3; } /* 6.11 GetFLOB */ Flob* Histogram2d::GetFLOB( const int i ) { assert( i >= 0 && i < NumOfFLOBs() ); if (i == 0) return &rangeX; else if(i == 1) return &rangeY; else return &bin; } /* 6.12 Cast */ void* Histogram2d::Cast(void* addr) { return (new (addr) Histogram2d); } /* 6.13 Create */ Word Histogram2d::Create( const ListExpr typeInfo ) { return SetWord( new Histogram2d( true ) ); } /* 6.14 KindCheck or CheckKind function */ bool Histogram2d::KindCheck(ListExpr type, ListExpr& errorInfo) { return (nl->IsEqual(type, Histogram2d::BasicType())); } /* 6.15 Print */ ostream& Histogram2d::Print( ostream& os ) const { return (os << *this); } /* 6.16 Operator << */ ostream& operator << (ostream& os, const Histogram2d& h) { if (!h.IsDefined()) { os << "(" << Histogram2d::BasicType() << " undef)"; return os; } os << "(" << Histogram2d::BasicType() << "( ( "; for (int i = 0; i < h.GetNoRangeX(); i++) os << h.GetRangeX(i) << " "; os << ") ( "; for (int i = 0; i < h.GetNoRangeY(); i++) os << h.GetRangeY(i) << " "; os << ") ( "; for (int i = 0; i < h.GetNoBin(); i++) os << h.GetBin(i) << " "; os << ") ))"; return os; } /* 7 The histogram2dInfo struct */ histogram2dInfo::histogram2dInfo() { name = Histogram2d::BasicType(); signature = "-> DATA"; typeExample = Histogram2d::BasicType(); listRep = "((rangesX*)(rangesY*)(bins*))"; valueExample= "( (0.0 1.0 ) (0.0 1.5) (2.0) )"; remarks = "All coordinates must be of type real."; } // histogram2dInfo() { /* 8 Creation of the Type Constructor Instance */ histogram2dFunctions::histogram2dFunctions() { //reassign some function pointers in = Histogram2d::In; out = Histogram2d::Out; cast = Histogram2d::Cast; kindCheck = Histogram2d::KindCheck; create = Histogram2d::Create; } // histogram2dFunctions() { /* 9 Type and value mapping functions 9.1 set[_]histogram2d */ ListExpr SetHistogram2dTypeMap(ListExpr args) { if(!nl->HasLength(args,2)){ return listutils::typeError("2 arguments expected"); } if(!Stream::checkType(nl->First(args)) || !Stream::checkType(nl->Second(args))){ return listutils::typeError("stream(real) x stream(real) expected"); } return listutils::basicSymbol(); } int SetHistogram2dFun(Word* args, Word& result, int message, Word& local, Supplier s) { Word elemX, elemY; CcReal* streamObjX; CcReal* streamObjY; HIST_REAL valueX=0, valueY=0, lastValueX=0, lastValueY=0; bool firstElemX, firstElemY; bool receivedX, receivedY; // The query processor provided an empty Histogram2d-instance: result = qp->ResultStorage(s); Histogram2d* resultHg = (Histogram2d*) result.addr; resultHg->Clear(); qp->Open(args[0].addr); qp->Open(args[1].addr); firstElemX = true; firstElemY = true; // Request the first elements of the streams: qp->Request(args[0].addr, elemX); qp->Request(args[1].addr, elemY); receivedX = qp->Received(args[0].addr); receivedY = qp->Received(args[1].addr); while (receivedX || receivedY) { if (receivedX) { streamObjX = (CcReal*) elemX.addr; valueX = streamObjX->GetRealval(); if (!streamObjX->IsDefined()) { cout << "Error in operator set_histogram2d: " << "The first stream contains an undefined value." << endl; resultHg->Clear(); resultHg->SetDefined(false); qp->Close(args[0].addr); qp->Close(args[1].addr); return 0; } if (!firstElemX && (CmpReal(valueX, lastValueX) < 1)) { cout << "Error in operator set_histogram2d: " << "The first stream is not sorted or contains duplicates." << endl; resultHg->Clear(); resultHg->SetDefined(false); qp->Close(args[0].addr); qp->Close(args[1].addr); return 0; } resultHg->AppendRangeX(valueX); lastValueX = valueX; firstElemX = false; // Consume the stream object: streamObjX->DeleteIfAllowed(); // Request the next element of the first stream: qp->Request(args[0].addr, elemX); receivedX = qp->Received(args[0].addr); if (!receivedX) qp->Close(args[0].addr); } // if (receivedX) if (receivedY) { streamObjY = (CcReal*) elemY.addr; valueY = streamObjY->GetRealval(); if (!streamObjY->IsDefined()) { cout << "Error in operator set_histogram2d: " << "The second stream contains an undefined value." << endl; resultHg->Clear(); resultHg->SetDefined(false); qp->Close(args[0].addr); qp->Close(args[1].addr); return 0; } if (!firstElemY && (CmpReal(valueY, lastValueY) < 1)) { cout << "Error in operator set_histogram2d: " << "The second stream is not sorted or contains duplicates." << endl; resultHg->Clear(); resultHg->SetDefined(false); qp->Close(args[0].addr); qp->Close(args[1].addr); return 0; } resultHg->AppendRangeY(valueY); lastValueY = valueY; firstElemY = false; // Consume the current stream object: streamObjY->DeleteIfAllowed(); // Request the next element of the second stream: qp->Request(args[1].addr, elemY); receivedY = qp->Received(args[1].addr); if (!receivedY) qp->Close(args[1].addr); } // if (receivedY) } // while (receivedX || receivedY) if (resultHg->GetNoRangeX() < 2 || resultHg->GetNoRangeY() < 2) { cout << "Error in operator set_histogram2d: " << "A stream contains less than two elements." << endl; resultHg->Clear(); resultHg->SetDefined(false); return 0; } int noOfBins = (resultHg->GetNoRangeX() - 1) * (resultHg->GetNoRangeY() - 1); resultHg->ResizeBin(noOfBins); for (int i = 0; i < noOfBins; i++) resultHg->AppendBin(0.0); return 0; } /* 9.2 BinsX, BinsY */ int BinsXFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h1 = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcInt* i = static_cast(result.addr); if (!h1->IsDefined()) { i->Set(false, -1); return 0; } i->Set(true, h1->NoBinsX()); return 0; } int BinsYFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h1 = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcInt* i = static_cast(result.addr); if (!h1->IsDefined()) { i->Set(false, -1); return 0; } i->Set(true, h1->NoBinsY()); return 0; } ListExpr BinsXYTypeMap(ListExpr args) { NList list(args); const string errMsg = "Expecting (" + Histogram2d::BasicType() + ")"; if (list.length() != 1) return list.typeError(errMsg); NList arg1 = list.first(); // histogram1d -> int if (arg1.isSymbol(Histogram2d::BasicType())) return NList(CcInt::BasicType()).listExpr(); return list.typeError(errMsg); } /* 9.3 binrange[_]minX, binrange[_]maxX, binrange[_]minY, binrange[_]maxY */ int binrange_minXFun( Word* args, Word& result, int message, Word& local, Supplier s ) { const Histogram2d *hg = static_cast(args[0].addr); const CcInt* binNumber = static_cast(args[1].addr); // cerr << "binNumber: " << binNumber->GetIntval( ) << endl; result = qp->ResultStorage(s); CcReal *r = (CcReal*)result.addr; if (!hg->IsDefined() || !binNumber->IsDefined()) { r->Set(false, 0.0); return 0; } int i = binNumber->GetIntval(); // binNumber i must be on the histogram's X axis if ((i >= 0) && (i < (hg->GetNoRangeX()-1))) { r->Set(hg->GetRangeX(i)); return 0; } // if ((i >= 0) && (i < hg->GetNoRangeX())) else { cerr << "Please indicate as second parameter " "an integer value between 0 and " << hg->GetNoRangeX() -2 << "." << endl; return 0; } // else // if ((i >= 0) && (i < hg->GetNoRangeX())) } // int binrange_minXFun( Word* args, Word& result, ... int binrange_maxXFun( Word* args, Word& result, int message, Word& local, Supplier s ) { const Histogram2d *hg = static_cast(args[0].addr); const CcInt* binNumber = static_cast(args[1].addr); // cerr << "binNumber: " << binNumber->GetIntval( ) << endl; result = qp->ResultStorage(s); CcReal *r = (CcReal*)result.addr; if (!hg->IsDefined() || !binNumber->IsDefined()) { r->Set(false, 0.0); return 0; } int i = binNumber->GetIntval(); // binNumber i must be on the histogram's X axis if ((i >= 0) && (i < (hg->GetNoRangeX()-1))) { r->Set(hg->GetRangeX(i+1)); return 0; } // if ((i >= 0) && (i < hg->GetNoRangeX())) else { cerr << "Please indicate as second parameter " "an integer value between 0 and " << hg->GetNoRangeX() -2 << "." << endl; return 0; } // else // if ((i >= 0) && (i < hg->GetNoRangeX())) } // int binrange_maxXFun( Word* args, Word& result, ... int binrange_minYFun( Word* args, Word& result, int message, Word& local, Supplier s ) { const Histogram2d *hg = static_cast(args[0].addr); const CcInt* binNumber = static_cast(args[1].addr); // cerr << "binNumber: " << binNumber->GetIntval( ) << endl; result = qp->ResultStorage(s); CcReal *r = (CcReal*)result.addr; if (!hg->IsDefined() || !binNumber->IsDefined()) { r->Set(false, 0.0); return 0; } int i = binNumber->GetIntval(); // binNumber i must be on the histogram's Y axis if ((i >= 0) && (i < (hg->GetNoRangeY()-1))) { r->Set(hg->GetRangeY(i)); return 0; } // if ((i >= 0) && (i < hg->GetNoRangeY())) else { cerr << "Please indicate as second parameter " "an integer value between 0 and " << hg->GetNoRangeY() -2 << "." << endl; return 0; } // else // if ((i >= 0) && (i < hg->GetNoRangeY())) } // int binrange_minYFun( Word* args, Word& result, ... int binrange_maxYFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d *hg = static_cast(args[0].addr); const CcInt* binNumber = static_cast(args[1].addr); // cerr << "binNumber: " << binNumber->GetIntval( ) << endl; result = qp->ResultStorage(s); CcReal *r = (CcReal*)result.addr; if (!hg->IsDefined() || !binNumber->IsDefined()) { r->Set(false, 0.0); return 0; } int i = binNumber->GetIntval(); // binNumber i must be on the histogram's Y axis if ((i >= 0) && (i < (hg->GetNoRangeY()-1))) { r->Set(hg->GetRangeY(i+1)); return 0; } // if ((i >= 0) && (i < hg->GetNoRangeY())) else { cerr << "Please indicate as second parameter an integer " "value between 0 and " << hg->GetNoRangeY() -2 << "." << endl; return 0; } // else // if ((i >= 0) && (i < hg->GetNoRangeY())) } // int binrange_minYFun( Word* args, Word& result, ... ListExpr binrange_minXY_maxXYTypeMap (ListExpr args) { NList argList = NList(args); short int ok = 0; // we are expecting exactly two arguments if (argList.length() == 2) { NList hist = argList.first(); NList numberBin = argList.second(); // the first argument is to be a histogram2d if ((hist.isSymbol(Histogram2d::BasicType()))) { ok++; } // if ( (hist.isSymbol(Histogram2d::BasicType()) ) ) else { argList.typeError("Expecting a histogram2d as first input parameter."); } // else // if ( (hist.isSymbol(Histogram2d::BasicType()) ) ) // and the second argument is to be an integer value if (numberBin.isSymbol(CcInt::BasicType())) { ok++; } // if (numberBin.isSymbol(CcInt::BasicType())) else { argList.typeError("Expecting an integer value as " "second input parameter."); } // else // if (numberBin.isSymbol(CcInt::BasicType())) } // if (argList.length() == 2) else { // return list.typeError(errMsg); argList.typeError("Expecting exactly two input parameters: " "(1) a histogram2d, (2) an integer value " "indicating the bin number."); } if (ok == 2) return NList(CcReal::BasicType()).listExpr(); else return argList.typeError( "Expecting exactly two input parameters: " "(1) a histogram2d, (2) an integer value " "indicating the bin number."); } // ListExpr binrange_minX_maxXTypeMap (ListExpr args) /* 9.4 create[_]histogram2d Argument 0 tuple stream, 1 attribute name X, 2 attribute name Y, 3 histogram2d, 4 index of attribute X, 5 index of attribute Y */ int CreateHistogram2dFun(Word* args, Word& result, int message, Word& local, Supplier s) { Word elem; CcInt bin; HIST_REAL valX, valY; CcReal* attrValX; CcReal* attrValY; const Histogram2d* hist = (Histogram2d*)args[3].addr; const CcInt* indexX = (CcInt*)args[4].addr; const CcInt* indexY = (CcInt*)args[5].addr; //cout << "indexX " << indexX->GetIntval() << endl; //cout << "indexY " << indexY->GetIntval() << endl; Tuple* currentTuple; qp->Open(args[0].addr); qp->Request(args[0].addr, elem); result = qp->ResultStorage(s); Histogram2d* res = (Histogram2d*)result.addr; res->Clear(); res->CopyFrom(hist); while (qp->Received(args[0].addr) ) { // Find bin for current attribute values currentTuple = (Tuple*)elem.addr; attrValX = (CcReal*)currentTuple->GetAttribute( indexX->GetIntval()-1); attrValY = (CcReal*)currentTuple->GetAttribute( indexY->GetIntval()-1); valX = attrValX->GetRealval(); valY = attrValY->GetRealval(); // catch empty (== undefined) histograms if (res->IsDefined()) { bin = res->FindBin(valX, valY); // Increment bin, if it was found if (bin.IsDefined()) res->Insert(bin.GetIntval()); } currentTuple->DeleteIfAllowed();// consume the stream objects qp->Request(args[0].addr, elem); } // return filled histogram2d qp->Close(args[0].addr); return 0; } // int CreateHistogram2dFun ListExpr CreateHistogram2dTypeMap(ListExpr args) { NList argList = NList(args); if(argList.length() != 4) { return listutils::typeError("Expects 4 arguments."); } NList stream = argList.first(); NList attrNameX = argList.second(); NList attrNameY = argList.third(); NList hist = argList.fourth(); if(stream.length() != 2 || !stream.first().isSymbol(Symbol::STREAM()) || stream.second().length() != 2 || !stream.second().first().isSymbol(Tuple::BasicType()) || !IsTupleDescription(stream.second().second().listExpr())) { return listutils::typeError("Expecting stream of tuples"); } // Check if tuple has supplied attributes NList tupleDescr = stream.second().second(); int len = tupleDescr.length(); bool foundX = false, foundY = false; int indexX=0; int indexY=0; int index = 1; for (; index <= len; ++index) { NList attr = tupleDescr.first(); tupleDescr.rest(); if (attrNameX == attr.first()) { if(!attr.second().isSymbol(CcReal::BasicType())) { return listutils::typeError("AttributeX not of type real"); } foundX = true; indexX = index; } if (attrNameY == attr.first()) { if(!attr.second().isSymbol(CcReal::BasicType())) { return listutils::typeError("AttributeY not of type real"); } foundY = true; indexY = index; } // found both attributes if (foundX && foundY) break; } if(!foundX) { return listutils::typeError("AttributeX not found"); } if(!foundY) { return listutils::typeError("AttributeY not found"); } if(!hist.isSymbol(Histogram2d::BasicType())) { return listutils::typeError("Histogram2d has wrong type"); } ListExpr result = nl->ThreeElemList(nl->SymbolAtom(Symbol::APPEND()), nl->TwoElemList(nl->IntAtom(indexX), nl->IntAtom(indexY)), nl->SymbolAtom(Histogram2d::BasicType())); return result; } // CreateHistogram2dTypeMap(ListExpr args) /* 9.5 CreateHistogram2dEquiwidth */ ListExpr CreateHistogram2dEquiwidthTypeMap(ListExpr args) { string errorMsg; NList list(args); // Check the list: errorMsg = "Operator create_histogram2d_equiwidth " "expects a list of length five."; if (list.length() != 5) return list.typeError(errorMsg); NList arg1 = list.first(); // (stream (tuple ((a1 x1) ... (an xn)))) NList arg2 = list.second(); // (ax) NList arg3 = list.third(); // (ay) NList arg4 = list.fourth(); // (int) NList arg5 = list.fifth(); // (int) // Check the first argument: // (stream (tuple ((a1 x1) ... (an xn)))) errorMsg = "Operator create_histogram2d_equiwidth expects as first argument " "a list with structure\n" "(stream (tuple ((a1 t1)...(an tn))))"; if (!IsStreamDescription(arg1.listExpr())) return list.typeError(errorMsg); // Check the second argument: // (ax) errorMsg = "Operator create_histogram2d_equiwidth expects as " "second argument an attribute."; if (!arg2.isSymbol()) return list.typeError(errorMsg); ListExpr attrlist; string attrname; int attrindex; ListExpr attrtype; NList indexlist; // Find the attribute in the tuple: attrname = arg2.str(); attrlist = arg1.second().second().listExpr(); attrindex = FindAttribute(attrlist, attrname, attrtype); errorMsg = "Attributename '" + attrname + "' is not known.\n" "Known Attribute(s): " + NList(attrlist).convertToString(); if (attrindex == 0) return list.typeError(errorMsg); errorMsg = "Operator create_histogram2d_equiwidth expects as " "second argument an attribute of type real."; if (!NList(attrtype).isSymbol(CcReal::BasicType())) return list.typeError(errorMsg); // Remember the position of the attribute: indexlist.append(NList(attrindex)); // Check the third argument: // (ay) errorMsg = "Operator create_histogram2d_equiwidth expects as " "third argument an attribute."; if (!arg3.isSymbol()) return list.typeError(errorMsg); // Find the attribute in the tuple: attrname = arg3.str(); attrindex = FindAttribute(attrlist, attrname, attrtype); errorMsg = "Attributename '" + attrname + "' is not known.\n" "Known Attribute(s): " + NList(attrlist).convertToString(); if (attrindex == 0) return list.typeError(errorMsg); errorMsg = "Operator create_histogram2d_equiwidth expects as " "second argument an attribute of type real."; if (!NList(attrtype).isSymbol(CcReal::BasicType())) return list.typeError(errorMsg); // Remember the position of the attribute: indexlist.append(NList(attrindex)); // Check the fourth argument: // (int) errorMsg = "Operator create_histogram2d_equiwidth expects as " "fourth argument the symbol 'int'."; if (!arg4.isSymbol(CcInt::BasicType())) return list.typeError(errorMsg); // Check the fifth argument: // (int) errorMsg = "Operator create_histogram2d_equiwidth expects as " "fifth argument the symbol 'int'."; if (!arg5.isSymbol(CcInt::BasicType())) return list.typeError(errorMsg); // Everything should be fine. // We can build the outlist: NList outlist(NList(Symbol::APPEND()), indexlist, NList(Histogram2d::BasicType()) ); //cout << "outlist: " << outlist.convertToString() << endl; return outlist.listExpr(); } int CreateHistogram2dEquiwidthFun(Word* args, Word& result, int message, Word& local, Supplier s) { Word elem; const size_t MAX_MEMORY = qp->FixedMemory(); TupleBuffer* buffer = new TupleBuffer(MAX_MEMORY); // The query processor provided an empty Histogram2d-instance: result = qp->ResultStorage(s); Histogram2d* resultHg = (Histogram2d*) result.addr; resultHg->Clear(); // We don't use the attrnames, // delivered in args[1].addr and args[2].addr. // Get the index of the attributes instead: const int attrXIndex = ((CcInt*)args[5].addr)->GetIntval() - 1; const int attrYIndex = ((CcInt*)args[6].addr)->GetIntval() - 1; if ( !((CcInt*)args[3].addr)->IsDefined() || !((CcInt*)args[4].addr)->IsDefined() ) { resultHg->SetDefined(false); delete buffer; return 0; } // Get the max. number of bins: int maxBinsX = ((CcInt*)args[3].addr)->GetIntval(); int maxBinsY = ((CcInt*)args[4].addr)->GetIntval(); if (maxBinsX < 1) { cout << "Error in operator create_histogram2d_equiwidth: " << "The fourth argument must be greater than zero." << endl; resultHg->SetDefined(false); delete buffer; return 0; } if (maxBinsY < 1) { cout << "Error in operator create_histogram2d_equiwidth: " << "The fifth argument must be greater than zero." << endl; resultHg->SetDefined(false); delete buffer; return 0; } // Open the tuple-stream: qp->Open(args[0].addr); // Request the first tuple of the stream: qp->Request(args[0].addr, elem); Tuple* tuplePtr; CcReal* attrXPtr; CcReal* attrYPtr; HIST_REAL attrXValue=0; HIST_REAL attrYValue=0; HIST_REAL maxX=0; HIST_REAL minX=0; HIST_REAL maxY=0; HIST_REAL minY=0; bool firstTuple = true; while (qp->Received(args[0].addr)) { // Get a pointer to the current tuple: tuplePtr = (Tuple*) elem.addr; // Get a pointer to the current attributes: attrXPtr = (CcReal*)(tuplePtr->GetAttribute(attrXIndex)); attrYPtr = (CcReal*)(tuplePtr->GetAttribute(attrYIndex)); // Get the value of the current attributes: attrXValue = attrXPtr->GetRealval(); attrYValue = attrYPtr->GetRealval(); // We accept only defined attribute-pairs: if (attrXPtr->IsDefined() && attrYPtr->IsDefined()) { if (firstTuple) { maxX = minX = attrXValue; maxY = minY = attrYValue; firstTuple = false; } // Is it a new min./max. value? if (attrXValue < minX) minX = attrXValue; else if (attrXValue > maxX) maxX = attrXValue; if (attrYValue < minY) minY = attrYValue; else if (attrYValue > maxY) maxY = attrYValue; // Push the current tuple in the buffer: buffer->AppendTuple(tuplePtr); } // Consume the tuple: //tuplePtr->DecReference(); tuplePtr->DeleteIfAllowed(); // Request the next tuple of the stream: qp->Request(args[0].addr, elem); } // while (qp->Received(args[0].addr)) qp->Close(args[0].addr); if (buffer->IsEmpty()) { cout << "Error in operator create_histogram2d_equiwidth: " << "The stream contains no valid elements." << endl; resultHg->SetDefined(false); delete buffer; return 0; } if (AlmostEqual(minX, maxX)) { // We got just one, or equal x-values: maxX += FACTOR; minX -= FACTOR; // We need just one bin: maxBinsX = 1; } if (AlmostEqual(minY, maxY)) { // We got just one, or equal y-values: maxY += FACTOR; minY -= FACTOR; // We need just one bin: maxBinsY = 1; } const HIST_REAL valueRangeX = fabs(maxX - minX); const HIST_REAL valueRangeY = fabs(maxY - minY); const HIST_REAL binWidthX = valueRangeX / maxBinsX; const HIST_REAL binWidthY = valueRangeY / maxBinsY; const HIST_REAL firstBoundX = minX; const HIST_REAL firstBoundY = minY; // Create the rangeX-array. // It's size is: maxBinsX + 1 resultHg->ResizeRangeX(maxBinsX + 1); for (int i = 0; i < maxBinsX; i++) { resultHg->AppendRangeX(firstBoundX + binWidthX * i); } // Append the last rangeX: resultHg->AppendRangeX(maxX + 10 * FACTOR); // Create the rangeY-array. // It's size is: maxBinsY + 1 resultHg->ResizeRangeY(maxBinsY + 1); for (int i = 0; i < maxBinsY; i++) { resultHg->AppendRangeY(firstBoundY + binWidthY * i); } // Append the last rangeY: resultHg->AppendRangeY(maxY + 10 * FACTOR); // Init the bin-array with empty bins. // It's size is: maxBinsX * maxBinsY const int noBins = maxBinsX * maxBinsY; resultHg->ResizeBin(noBins); for (int i = 0; i < noBins; i++) { resultHg->AppendBin(0.0); } GenericRelationIterator* it = buffer->MakeScan(); // Fill the histogram with the buffered values: while ((tuplePtr = it->GetNextTuple()) != 0) { attrXPtr = (CcReal*)(tuplePtr->GetAttribute(attrXIndex)); attrYPtr = (CcReal*)(tuplePtr->GetAttribute(attrYIndex)); attrXValue = attrXPtr->GetRealval(); attrYValue = attrYPtr->GetRealval(); resultHg->Insert(attrXValue, attrYValue); tuplePtr->DeleteIfAllowed(); } delete it; buffer->Clear(); delete buffer; return 0; } /* 9.6 Shrink2d Argument 0 histogram, 1 lower bound X, 2 upper bound X, 3 lower bound Y, 4 upper bound Y */ template int Shrink2dFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* hist = (const Histogram2d*)args[0].addr; const CcReal* loX = (const CcReal*)args[1].addr; const CcReal* hiX = (const CcReal*)args[2].addr; const CcReal* loY = (const CcReal*)args[3].addr; const CcReal* hiY = (const CcReal*)args[4].addr; result = qp->ResultStorage(s); Histogram2d* res = (Histogram2d*)result.addr; res->Clear(); if (!hist->IsDefined() || hist->IsEmpty() || !hist->IsConsistent(false) || !loX->IsDefined() || !hiX->IsDefined() || !loY->IsDefined() || !hiY->IsDefined()) { res->SetDefined(false); return 0; } const HIST_REAL lowX = loX->GetRealval(); const HIST_REAL highX = hiX->GetRealval(); const HIST_REAL lowY = loY->GetRealval(); const HIST_REAL highY = hiY->GetRealval(); if (CmpReal(highX, lowX) != 1 || CmpReal(highY, lowY) != 1) { cout << "Error in operator shrink_x: " << "The upper bound must be greater than the lower bound." << endl; res->SetDefined(false); return 0; } if ((lowX < hist->GetRangeX(0) && highX < hist->GetRangeX(0)) || (lowX > hist->GetRangeX(hist->GetNoRangeX()-1) && highX > hist->GetRangeX(hist->GetNoRangeX()-1))) { cout << "Error in operator shrink_x: " << "The specified x-interval is outside the histogram-xrange." << endl; res->SetDefined(false); return 0; } if ((lowY < hist->GetRangeY(0) && highY < hist->GetRangeY(0)) || (lowY > hist->GetRangeY(hist->GetNoRangeY()-1) && highY > hist->GetRangeY(hist->GetNoRangeY()-1))) { cout << "Error in operator shrink_x: " << "The specified y-interval is outside the histogram-yrange." << endl; res->SetDefined(false); return 0; } CcInt lowerX = hist->FindBinX(lowX); CcInt upperX = hist->FindBinX(highX); CcInt lowerY = hist->FindBinY(lowY); CcInt upperY = hist->FindBinY(highY); int loRangeX = 0; int upRangeX = hist->GetNoRangeX() - 1; int loRangeY = 0; int upRangeY = hist->GetNoRangeY() - 1; if (lowerX.IsDefined()) loRangeX = lowerX.GetIntval(); if (upperX.IsDefined()) upRangeX = upperX.GetIntval() + 1; if (lowerY.IsDefined()) loRangeY = lowerY.GetIntval(); if (upperY.IsDefined()) upRangeY = upperY.GetIntval() + 1; if (eager) { if (lowerX.IsDefined() && !AlmostEqual(lowX, hist->GetRangeX(loRangeX))) { // Drop the first xrange, // if the lower bound is inside the histogram-xrange and // we do not hit the bound of the bin. loRangeX++; } if (upperX.IsDefined()) { // Drop the last xrange, // if the upper bound is inside the histogram-xrange. upRangeX--; } if (lowerY.IsDefined() && !AlmostEqual(lowY, hist->GetRangeY(loRangeY))) { // Drop the first yrange, // if the lower bound is inside the histogram-yrange and // we do not hit the bound of the bin. loRangeY++; } if (upperY.IsDefined()) { // Drop the last yrange, // if the upper bound is inside the histogram-yrange. upRangeY--; } } res->ResizeRangeX(upRangeX - loRangeX + 1); for (int i = loRangeX; i <= upRangeX; ++i) res->AppendRangeX(hist->GetRangeX(i)); res->ResizeRangeY(upRangeY - loRangeY + 1); for (int j = loRangeY; j <= upRangeY; ++j) res->AppendRangeY(hist->GetRangeY(j)); const int ny = hist->NoBinsY(); res->ResizeBin((upRangeX - loRangeX) * (upRangeY - loRangeY)); for (int i = loRangeX; i < upRangeX; ++i) for (int j = loRangeY; j < upRangeY; ++j) res->AppendBin(hist->GetBin(i * ny + j)); if (res->IsEmpty() || !res->IsConsistent(false)) { res->Clear(); res->SetDefined(false); } return 0; } /* Instantiation of Template Functions The compiler cannot expand these template functions in the file ~HistogramAlgebra.cpp~. */ template int Shrink2dFun(Word* args, Word& result, int message, Word& local, Supplier s); template int Shrink2dFun(Word* args, Word& result, int message, Word& local, Supplier s); ListExpr Shrink2dTypeMap(ListExpr args) { NList argList(args); if(argList.length() != 5) { return listutils::typeError("Expects 5 arguments."); } NList hist = argList.first(); NList loX = argList.second(); NList hiX = argList.third(); NList loY = argList.fourth(); NList hiY = argList.fifth(); if(!hist.isSymbol(Histogram2d::BasicType())) { return listutils::typeError("Expects '"+ Histogram2d::BasicType()+"' as 1st argument"); } if(!loX.isSymbol(CcReal::BasicType()) || !hiX.isSymbol(CcReal::BasicType()) || !loY.isSymbol(CcReal::BasicType()) || !hiY.isSymbol(CcReal::BasicType())) { return listutils::typeError("Expects 'real' for lower an upper bound"); } return hist.listExpr(); } /* 9.7 Getcount2d */ int Getcount2dFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr); const CcInt* binIndexX = static_cast(args[1].addr); const CcInt* binIndexY = static_cast(args[2].addr); result = qp->ResultStorage(s); CcReal* res = static_cast(result.addr ); if (!binIndexX->IsDefined() || !binIndexY->IsDefined()) { res->SetDefined(false); return 0; } CcReal count = h->GetCount(binIndexX->GetValue(), binIndexY->GetValue()); res->Set(count.IsDefined(), count.GetValue()); return 0; } ListExpr Getcount2dTypeMap(ListExpr args) { NList list(args); const string errMsg = "Expecting (" + Histogram2d::BasicType() + " " + CcInt::BasicType() + " " + CcInt::BasicType() + ")"; if (list.length() != 3) return list.typeError(errMsg); NList arg1 = list.first(); NList arg2 = list.second(); NList arg3 = list.third(); // histogram2d x int x int -> real if (arg1.isSymbol(Histogram2d::BasicType()) && arg2.isSymbol(CcInt::BasicType()) && arg3.isSymbol(CcInt::BasicType())) return NList(CcReal::BasicType()).listExpr(); return list.typeError(errMsg); } /* 9.8 Insert2d Argument 0 Histogram2d, 1 X real value, 2 Y real value, 3 incVal */ template int Insert2dFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = (const Histogram2d*)args[0].addr; const CcReal* x = (const CcReal*)args[1].addr; const CcReal* y = (const CcReal*)args[2].addr; // return histogram2d result = qp->ResultStorage(s); Histogram2d* hist = (Histogram2d*)result.addr; hist->Clear(); if (!h->IsDefined() || !x->IsDefined() || !y->IsDefined()) { hist->SetDefined(false); return 0; } hist->CopyFrom(h); CcInt bin = hist->FindBin(x->GetRealval(), y->GetRealval()); if (bin.IsDefined()) { if (incValSupplied) { const CcReal* incVal = (const CcReal*)args[3].addr; if (!incVal->IsDefined()){ hist->SetDefined(false); return 0; } hist->Insert(bin.GetIntval(), incVal->GetRealval()); } else{ hist->Insert(bin.GetIntval()); } } return 0; } template ListExpr Insert2dTypeMap(ListExpr args) { NList argList = NList(args); if (incValSupplied) { if(argList.length() != 4) { return listutils::typeError("Expects 4 arguments."); } } else { if(argList.length() != 3) { return listutils::typeError("Expects 3 arguments."); } } NList hist = argList.first(); NList x = argList.second(); NList y = argList.third(); if(!hist.isSymbol(Histogram2d::BasicType())) { return listutils::typeError("1st argument not of type " + Histogram2d::BasicType()); } if(!x.isSymbol(CcReal::BasicType())) { return listutils::typeError("2nd argument not of type " + CcReal::BasicType()); } if(!y.isSymbol(CcReal::BasicType())) { return listutils::typeError("3rd argument not of type " + CcReal::BasicType()); } if (incValSupplied) { NList incVal = argList.fourth(); if(!incVal.isSymbol(CcReal::BasicType())) { return listutils::typeError("4th argument not of type " + CcReal::BasicType()); } } return NList(Histogram2d::BasicType()).listExpr(); } /* 9.10 FindBin2d Argument 0 Histogram2d, 1 real value */ template int FindBin2dFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* hist = (const Histogram2d*)args[0].addr; const CcReal* value = (const CcReal*)args[1].addr; CcInt bin; result = qp->ResultStorage(s); if (value->IsDefined()) { if (X) bin = hist->FindBinX(value->GetRealval()); else bin = hist->FindBinY(value->GetRealval()); } else { bin = CcInt(false, 0); } ((CcInt*)result.addr)->Set(bin.IsDefined(), bin.GetIntval()); return 0; } /* 9.11 Find[_]minbin, find[_]maxbi */ template int FindMinMaxBinFun2d(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr); MinMaxBuffer* buffer; CcInt index; HIST_REAL value; int i; Tuple* resultTuple; pair coords; switch (message) { case OPEN: // Find the first bin with min./max. value, // and store it's index and value in local.addr: if (isMin) index = h->GetMinBin(); else index = h->GetMaxBin(); if (index.IsDefined()) value = h->GetBin(index.GetValue()); else value = -1; local.addr = new MinMaxBuffer(index, value, GetResultTupleTypeInfo2d()); return 0; case REQUEST: // Read the buffer: buffer = static_cast(local.addr); index = buffer->index; i = index.GetValue(); value = buffer->value; if (!index.IsDefined()) { // We are ready: result.addr = 0; return CANCEL; } // Create the next stream object; resultTuple = new Tuple(buffer->tupleTypeInfo); coords = h->GetBinCoords(index.GetValue()); resultTuple->PutAttribute(0, new CcInt(coords.first)); // x resultTuple->PutAttribute(1, new CcInt(coords.second)); // y result.addr = resultTuple; // Find the next bin with min./max. value: while (++i < h->GetNoBin() && !AlmostEqual(h->GetBin(i), value)) ; if (i < h->GetNoBin()) { // Store it for the next request: buffer->index = CcInt(true, i); } else { // Where is no next bin anymore: buffer->index = CcInt(false, -1); } return YIELD; case CLOSE: if (local.addr != 0) { delete (MinMaxBuffer*)local.addr; local = SetWord(Address(0)); } return 0; } return 1; // This should never happen... } /* 9.12 MeanX, MeanY */ int MeanXFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcReal* res = static_cast(result.addr ); CcReal mean = h->MeanX(); res->Set(mean.IsDefined(), mean.GetValue()); return 0; } ListExpr Hist2dRealTypeMap(ListExpr args) { NList list(args); const string errMsg = "Expecting " + Histogram2d::BasicType(); if (list.length() != 1) return list.typeError(errMsg); NList arg1 = list.first(); // histogram2d -> real if (arg1.isSymbol(Histogram2d::BasicType())) return NList(CcReal::BasicType()).listExpr(); return list.typeError(errMsg); } int MeanYFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcReal* res = static_cast(result.addr ); CcReal mean = h->MeanY(); res->Set(mean.IsDefined(), mean.GetValue()); return 0; } /* 9.13 Class TupleAndRelPos */ // class TupleAndRelPos // { // public: // // TupleAndRelPos() : // tuple(0), // pos(0), // cmpPtr(0) // {}; // // TupleAndRelPos(Tuple* newTuple, TupleCompareBy* cmpObjPtr = 0, // int newPos = 0) : // tuple(newTuple), // pos(newPos), // cmpPtr(cmpObjPtr) // {}; // // inline bool operator<(const TupleAndRelPos& ref) const // { // // by default < is used to define a sort order // // the priority queue creates a maximum heap, hence // // we change the result to create a minimum queue. // // It would be nice to have also an < operator in the class // // Tuple. Moreover lexicographical comparison should be done by means // // of TupleCompareBy and an appropriate sort order specification, // // if (!this->tuple || !ref.tuple) // { // return true; // } // if ( cmpPtr ) // { // return !(*(TupleCompareBy*)cmpPtr)( this->tuple, ref.tuple ); // } // else // { // return !lexSmaller( this->tuple, ref.tuple ); // } // } // // Tuple* tuple; // int pos; // // private: // void* cmpPtr; // // }; /* 9.14 Class SortStream */ class SortStream { public: SortStream( Word stream, const bool lexicographic, void *tupleSmaller ): stream( stream ), lexiTupleSmaller( lexicographic ? (LexicographicalTupleSmaller*)tupleSmaller : 0 ), tupleCmpBy( lexicographic ? 0 : (TupleCompareBy*)tupleSmaller ), count( 0 ) { // Note: Is is not possible to define a Cmp object using the // constructor // mergeTuples( PairTupleCompareBy( tupleCmpBy )). // It does only work if mergeTuples is a local variable which // does not help us in this case. Is it a Compiler bug or C++ feature? // Hence a new class TupleAndRelPos was defined which implements // the comparison operator '<'. TupleQueue* currentRun = &queue[0]; TupleQueue* nextRun = &queue[1]; Word wTuple = SetWord(Address(0)); size_t i = 0, a = 0, n = 0, m = 0, r = 0; // counter variables bool newRelation = true; MAX_MEMORY = qp->FixedMemory(); cmsg.info("ERA:ShowMemInfo") << "Sortby.MAX_MEMORY (" << MAX_MEMORY/1024 << " kb)" << endl; cmsg.send(); TupleBuffer *rel=0; TupleAndRelPos lastTuple(0, tupleCmpBy); qp->Request(stream.addr, wTuple); TupleAndRelPos minTuple(0, tupleCmpBy); while(qp->Received(stream.addr)) // consume the stream completely { count++; // tuple counter; Tuple* t = static_cast( wTuple.addr ); TupleAndRelPos nextTuple(t, tupleCmpBy); if( MAX_MEMORY > (size_t)t->GetSize() ) { nextTuple.ref->IncReference(); currentRun->push(nextTuple); i++; // increment Tuples in memory counter MAX_MEMORY -= t->GetSize(); } else { // memory is completely used if ( newRelation ) { // create new relation r++; rel = new TupleBuffer( 0 ); GenericRelationIterator *iter = 0; relations.push_back( make_pair( rel, iter ) ); newRelation = false; // get first tuple and store it in an relation nextTuple.ref->IncReference(); currentRun->push(nextTuple); minTuple = currentRun->top(); minTuple.ref->DeleteIfAllowed(); rel->AppendTuple( minTuple.ref ); lastTuple = minTuple; currentRun->pop(); } else { // check if nextTuple can be saved in current relation TupleAndRelPos copyOfLast = lastTuple; if ( nextTuple < lastTuple ) { // nextTuple is in order // Push the next tuple int the heap and append the minimum to // the current relation and push nextTuple.ref->IncReference(); currentRun->push(nextTuple); minTuple = currentRun->top(); minTuple.ref->DeleteIfAllowed(); rel->AppendTuple( minTuple.ref ); lastTuple = minTuple; currentRun->pop(); m++; } else { // nextTuple is smaller, save it for the next relation nextTuple.ref->IncReference(); nextRun->push(nextTuple); n++; if ( !currentRun->empty() ) { // Append the minimum to the current relation minTuple = currentRun->top(); minTuple.ref->DeleteIfAllowed(); rel->AppendTuple( minTuple.ref ); lastTuple = minTuple; currentRun->pop(); } else { //create a new run newRelation = true; // swap queues TupleQueue *helpRun = currentRun; currentRun = nextRun; nextRun = helpRun; ShowPartitionInfo(count,a,n,m,r,rel); i=n; a=0; n=0; m=0; } // end new run } // end next tuple is smaller // delete last tuple if saved to relation and // not referenced by minTuple if ( copyOfLast.ref && (copyOfLast.ref != minTuple.ref) ) { copyOfLast.ref->DeleteIfAllowed(); } } // check if nextTuple can be saved in current relation }// memory is completely used qp->Request(stream.addr, wTuple); } ShowPartitionInfo(count,a,n,m,r,rel); // delete lastTuple and minTuple if allowed if ( lastTuple.ref ) { lastTuple.ref->DeleteIfAllowed(); } if ( (minTuple.ref != lastTuple.ref) ) { minTuple.ref->DeleteIfAllowed(); } // the lastRun and NextRun runs in memory having // less than MAX_TUPLE elements if( !queue[0].empty() ) { Tuple* t = queue[0].top().ref; queue[0].pop(); mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, -2) ); } if( !queue[1].empty() ) { Tuple* t = queue[1].top().ref; queue[1].pop(); mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, -1) ); } // Get next tuple from each relation and push it into the heap. for( size_t i = 0; i < relations.size(); i++ ) { relations[i].second = relations[i].first->MakeScan(); Tuple *t = relations[i].second->GetNextTuple(); if( t != 0 ) { t->IncReference(); mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, i+1) ); } } Counter::getRef("Sortby:ExternPartitions") = relations.size(); } /* It may happen, that the localinfo object will be destroyed before all internal buffered tuples are delivered stream upwards, e.g. queries which use a ~head~ operator. In this case we need to delete also all tuples stored in memory. */ ~SortStream() { while( !mergeTuples.empty() ) { mergeTuples.top().ref->DeleteIfAllowed(); mergeTuples.pop(); } for( int i = 0; i < 2; i++ ) { while( !queue[i].empty() ) { queue[i].top().ref->DeleteIfAllowed(); queue[i].pop(); } } // delete information about sorted runs for( size_t i = 0; i < relations.size(); i++ ) { delete relations[i].second; relations[i].second = 0; delete relations[i].first; relations[i].first = 0; } delete lexiTupleSmaller; lexiTupleSmaller = 0; delete tupleCmpBy; tupleCmpBy = 0; } Tuple *NextResultTuple() { if( mergeTuples.empty() ) // stream finished return 0; else { // Take the first one. TupleAndRelPos p = mergeTuples.top(); //p.ref->DecReference(); mergeTuples.pop(); Tuple *result = p.ref; tuples.push(p); Tuple *t = 0; if (p.pos > 0) t = relations[p.pos-1].second->GetNextTuple(); else { int idx = p.pos+2; if ( !queue[idx].empty() ) { t = queue[idx].top().ref; t->DeleteIfAllowed(); queue[idx].pop(); } else t = 0; } if( t != 0 ) { // run not finished p.ref = t; t->IncReference(); mergeTuples.push( p ); } return result; } } size_t TupleCount() { return count; } priority_queue GetTuples() { return tuples; } private: void ShowPartitionInfo( int c, int a, int n, int m, int r, GenericRelation* rel ) { int rs = (rel != 0) ? rel->GetNoTuples() : 0; if ( RTFlag::isActive("ERA:Sort:PartitionInfo") ) { cmsg.info() << "Current run finished: " << " processed tuples=" << c << ", append minimum=" << m << ", append next=" << n << endl << " materialized runs=" << r << ", last partition's tuples=" << rs << endl << " Runs in memory: queue1= " << queue[0].size() << ", queue2= " << queue[1].size() << endl; cmsg.send(); } } Word stream; // tuple information LexicographicalTupleSmaller *lexiTupleSmaller; TupleCompareBy *tupleCmpBy; size_t count; // sorted runs created by in memory heap filtering size_t MAX_MEMORY; typedef pair SortedRun; vector< SortedRun > relations; typedef priority_queue TupleQueue; TupleQueue queue[2]; TupleQueue mergeTuples; TupleQueue tuples; }; /* 9.15 Class SortQueue */ class SortQueue { public: SortQueue( priority_queue stream, const bool lexicographic, void *tupleSmaller ): stream( stream ), lexiTupleSmaller( lexicographic ? (LexicographicalTupleSmaller*)tupleSmaller : 0 ), tupleCmpBy( lexicographic ? 0 : (TupleCompareBy*)tupleSmaller ), count( 0 ) { // Note: Is is not possible to define a Cmp object using the // constructor // mergeTuples( PairTupleCompareBy( tupleCmpBy )). // It does only work if mergeTuples is a local variable which // does not help us in this case. Is it a Compiler bug or C++ feature? // Hence a new class TupleAndRelPos was defined which implements // the comparison operator '<'. TupleQueue* currentRun = &queue[0]; TupleQueue* nextRun = &queue[1]; //Word wTuple = SetWord(Address(0)); size_t i = 0, a = 0, n = 0, m = 0, r = 0; // counter variables bool newRelation = true; MAX_MEMORY = qp->FixedMemory(); cmsg.info("ERA:ShowMemInfo") << "Sortby.MAX_MEMORY (" << MAX_MEMORY/1024 << " kb)" << endl; cmsg.send(); TupleBuffer *rel=0; TupleAndRelPos lastTuple(0, tupleCmpBy); //qp->Request(stream.addr, wTuple); TupleAndRelPos currentTuple; TupleAndRelPos minTuple(0, tupleCmpBy); while(!stream.empty()) // consume the stream completely { currentTuple = stream.top(); currentTuple.ref->DeleteIfAllowed(); stream.pop(); count++; // tuple counter; Tuple* t = static_cast( currentTuple.ref ); TupleAndRelPos nextTuple(t, tupleCmpBy); if( MAX_MEMORY > (size_t)t->GetSize() ) { nextTuple.ref->IncReference(); currentRun->push(nextTuple); i++; // increment Tuples in memory counter MAX_MEMORY -= t->GetSize(); } else { // memory is completely used if ( newRelation ) { // create new relation r++; rel = new TupleBuffer( 0 ); GenericRelationIterator *iter = 0; relations.push_back( make_pair( rel, iter ) ); newRelation = false; // get first tuple and store it in an relation nextTuple.ref->IncReference(); currentRun->push(nextTuple); minTuple = currentRun->top(); minTuple.ref->DeleteIfAllowed(); rel->AppendTuple( minTuple.ref ); lastTuple = minTuple; currentRun->pop(); } else { // check if nextTuple can be saved in current relation TupleAndRelPos copyOfLast = lastTuple; if ( nextTuple < lastTuple ) { // nextTuple is in order // Push the next tuple int the heap and append the minimum to // the current relation and push nextTuple.ref->IncReference(); currentRun->push(nextTuple); minTuple = currentRun->top(); minTuple.ref->DeleteIfAllowed(); rel->AppendTuple( minTuple.ref ); lastTuple = minTuple; currentRun->pop(); m++; } else { // nextTuple is smaller, save it for the next relation nextTuple.ref->IncReference(); nextRun->push(nextTuple); n++; if ( !currentRun->empty() ) { // Append the minimum to the current relation minTuple = currentRun->top(); minTuple.ref->DeleteIfAllowed(); rel->AppendTuple( minTuple.ref ); lastTuple = minTuple; currentRun->pop(); } else { //create a new run newRelation = true; // swap queues TupleQueue *helpRun = currentRun; currentRun = nextRun; nextRun = helpRun; ShowPartitionInfo(count,a,n,m,r,rel); i=n; a=0; n=0; m=0; } // end new run } // end next tuple is smaller // delete last tuple if saved to relation and // not referenced by minTuple if ( copyOfLast.ref && (copyOfLast.ref != minTuple.ref) ) { copyOfLast.ref->DeleteIfAllowed(); } } // check if nextTuple can be saved in current relation }// memory is completely used //qp->Request(stream.addr, wTuple); } ShowPartitionInfo(count,a,n,m,r,rel); // delete lastTuple and minTuple if allowed if ( lastTuple.ref ) { lastTuple.ref->DeleteIfAllowed(); } if ( (minTuple.ref != lastTuple.ref) ) { minTuple.ref->DeleteIfAllowed(); } // the lastRun and NextRun runs in memory having // less than MAX_TUPLE elements if( !queue[0].empty() ) { Tuple* t = queue[0].top().ref; queue[0].pop(); mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, -2) ); } if( !queue[1].empty() ) { Tuple* t = queue[1].top().ref; queue[1].pop(); mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, -1) ); } // Get next tuple from each relation and push it into the heap. for( size_t i = 0; i < relations.size(); i++ ) { relations[i].second = relations[i].first->MakeScan(); Tuple *t = relations[i].second->GetNextTuple(); if( t != 0 ) { t->IncReference(); mergeTuples.push( TupleAndRelPos(t, tupleCmpBy, i+1) ); } } Counter::getRef("Sortby:ExternPartitions") = relations.size(); } /* It may happen, that the localinfo object will be destroyed before all internal buffered tuples are delivered stream upwards, e.g. queries which use a ~head~ operator. In this case we need to delete also all tuples stored in memory. */ ~SortQueue() { while( !mergeTuples.empty() ) { mergeTuples.top().ref->DeleteIfAllowed(); mergeTuples.pop(); } for( int i = 0; i < 2; i++ ) { while( !queue[i].empty() ) { queue[i].top().ref->DeleteIfAllowed(); queue[i].pop(); } } // delete information about sorted runs for( size_t i = 0; i < relations.size(); i++ ) { delete relations[i].second; relations[i].second = 0; delete relations[i].first; relations[i].first = 0; } delete lexiTupleSmaller; lexiTupleSmaller = 0; delete tupleCmpBy; tupleCmpBy = 0; } Tuple *NextResultTuple() { if( mergeTuples.empty() ) // stream finished return 0; else { // Take the first one. TupleAndRelPos p = mergeTuples.top(); //p.ref->DecReference(); mergeTuples.pop(); tuples.push(p); Tuple *result = p.ref; Tuple *t = 0; if (p.pos > 0) t = relations[p.pos-1].second->GetNextTuple(); else { int idx = p.pos+2; if ( !queue[idx].empty() ) { t = queue[idx].top().ref; t->DeleteIfAllowed(); queue[idx].pop(); } else t = 0; } if( t != 0 ) { // run not finished p.ref = t; t->IncReference(); mergeTuples.push( p ); } return result; } } size_t TupleCount() { return count; } priority_queue GetTuples() { return tuples; } private: void ShowPartitionInfo( int c, int a, int n, int m, int r, GenericRelation* rel ) { int rs = (rel != 0) ? rel->GetNoTuples() : 0; if ( RTFlag::isActive("ERA:Sort:PartitionInfo") ) { cmsg.info() << "Current run finished: " << " processed tuples=" << c << ", append minimum=" << m << ", append next=" << n << endl << " materialized runs=" << r << ", last partition's tuples=" << rs << endl << " Runs in memory: queue1= " << queue[0].size() << ", queue2= " << queue[1].size() << endl; cmsg.send(); } } priority_queue stream; // tuple information LexicographicalTupleSmaller *lexiTupleSmaller; TupleCompareBy *tupleCmpBy; size_t count; // sorted runs created by in memory heap filtering size_t MAX_MEMORY; typedef pair SortedRun; vector< SortedRun > relations; typedef priority_queue TupleQueue; TupleQueue queue[2]; TupleQueue mergeTuples; TupleQueue tuples; }; /* 9.16 CreateHistogram2dEquicount Argument 0 tuple stream, 1 attribute name X, 2 attribute name Y, 3 maxCategories X, 4 maxCategories Y, 5 index of attribute X, 6 Index of attribute Y */ int CreateHistogram2dEquicountFun(Word* args, Word& result, int message, Word& local, Supplier s) { Tuple* currentTuple; // The query processor provided an empty Histogram1d-instance: result = qp->ResultStorage(s); Histogram2d* hist = (Histogram2d*) result.addr; hist->Clear(); const CcInt* indexX = (const CcInt*)args[5].addr; const CcInt* indexY = (const CcInt*)args[6].addr; const CcInt* maxCategoriesX = (const CcInt*)args[3].addr; const CcInt* maxCategoriesY = (const CcInt*)args[4].addr; qp->Open(args[0].addr); // number of bins must be a positive integer value, // else the result is undefined if (maxCategoriesX->IsDefined() && maxCategoriesY->IsDefined() && maxCategoriesX->GetIntval() > 0 && maxCategoriesY->GetIntval() > 0) { // First sort values SortOrderSpecification spec; spec.push_back(pair(indexX->GetIntval(), true)); void *tupleCmp = new TupleCompareBy(spec); SortStream* sli; sli = new SortStream( args[0], false, tupleCmp); int noTuples = sli->TupleCount(); int noBins = noTuples / maxCategoriesX->GetIntval(); int rest = noTuples % maxCategoriesX->GetIntval(); // maxCategories > tupleCount if (noBins == 0) { noBins = 1; rest = 0; } if (noTuples == 0) hist->SetDefined(false); int count = 0; HIST_REAL value = -numeric_limits::max(); HIST_REAL lastvalue; int currentBin = -1; int tupleCount = 0; while ((currentTuple = sli->NextResultTuple()) != 0) { tupleCount++; CcReal* val = (CcReal*)currentTuple->GetAttribute(indexX->GetIntval()-1); if (!val->IsDefined()) continue; lastvalue = value; value = val->GetRealval(); //currentTuple->DecReference(); //currentTuple->DeleteIfAllowed(); // reached next bin? // If we have got a remainder r, the count of the first r bins is // = noBins + 1 if ((currentBin >= rest && count % noBins == 0) || (currentBin < rest && count % (noBins + 1) == 0)) { // current value not yet greater than precedent value if (CmpReal(value, lastvalue) != 1) { //hist->Insert(currentBin); continue; } // if only a few values are left, merge the last bins else if ((count + noTuples - tupleCount) < noBins * 1.1) { //hist->Insert(currentBin); continue; } else { hist->AppendRangeX(value); //hist->AppendBin(0.0); currentBin++; count = 0; } } // (count % noBins == 0) //hist->Insert(currentBin); count++; } // add the last range // Given that FACTOR does not cause value to be $<$ lastrange, here // the less elegant way will do. HIST_REAL lastrange = value+FACTOR; HIST_REAL fact = FACTOR; int max = 0; while (max < 10 && !(valueAppendRangeX(lastrange); // Now applying the same to the Y values // First sort values //SortOrderSpecification spec; spec.clear(); spec.push_back(pair(indexY->GetIntval(), true)); tupleCmp = new TupleCompareBy(spec); //SortByLocalInfo* sli; SortQueue* sq = new SortQueue( sli->GetTuples(), false, tupleCmp); noTuples = sq->TupleCount(); noBins = noTuples / maxCategoriesY->GetIntval(); rest = noTuples % maxCategoriesY->GetIntval(); // maxCategories > tupleCount if (noBins == 0) { noBins = 1; rest = 0; } if (noTuples == 0) hist->SetDefined(false); count = 0; value = -numeric_limits::max(); currentBin = -1; tupleCount = 0; while ((currentTuple = sq->NextResultTuple()) != 0) { tupleCount++; CcReal* val = (CcReal*)currentTuple->GetAttribute(indexY->GetIntval()-1); if (!val->IsDefined()) continue; lastvalue = value; value = val->GetRealval(); //currentTuple->DecReference(); //currentTuple->DeleteIfAllowed(); // next bin reached? // If we have got a remainder r, the count of the first r bins is // = noBins+1 if ((currentBin >= rest && count % noBins == 0) || (currentBin < rest && count % (noBins + 1) == 0)) { // current value not yet greater than precedent value if (CmpReal(value, lastvalue) != 1) { //hist->Insert(currentBin); continue; } // If only a few values are left, merge the last bins. else if ((count + noTuples - tupleCount) < noBins * 1.1) { //hist->Insert(currentBin); continue; } else { hist->AppendRangeY(value); //hist->AppendBin(0.0); currentBin++; count = 0; } } // (count % noBins == 0) //hist->Insert(currentBin); count++; } // add the last range // Given that FACTOR does not cause value to be $<$ lastrange, here // less elegant way will do. lastrange = value+FACTOR; fact = FACTOR; max = 0; while (max < 10 && !(valueAppendRangeY(lastrange); int noBin = (hist->GetNoRangeX()-1)*(hist->GetNoRangeY()-1); for (int i=0; i < noBin; ++i) { hist->AppendBin(0.0); } priority_queue tuples = sq->GetTuples(); TupleAndRelPos tuplePos; Tuple* t; CcReal* x; CcReal* y; while (!tuples.empty()) { tuplePos = tuples.top(); tuples.pop(); t = tuplePos.ref; x = (CcReal*)t->GetAttribute(indexX->GetIntval()-1); y = (CcReal*)t->GetAttribute(indexY->GetIntval()-1); hist->Insert((HIST_REAL)x->GetRealval(), (HIST_REAL)y->GetRealval()); t->DeleteIfAllowed(); } delete sli; delete sq; } else { Word elem; qp->Request(args[0].addr, elem); while(qp->Received(args[0].addr)) { ((Tuple*)elem.addr)->DeleteIfAllowed(); qp->Request(args[0].addr, elem); } hist->SetDefined(false); } qp->Close(args[0].addr); return 0; } ListExpr CreateHistogram2dEquicountTypeMap(ListExpr args) { if(nl->ListLength(args)!=5){ return listutils::typeError("5 Arguments expected"); } ListExpr stream = nl->First(args); ListExpr attrX = nl->Second(args); ListExpr attrY = nl->Third(args); ListExpr maxCatX = nl->Fourth(args); ListExpr maxCatY = nl->Fifth(args); if(!listutils::isTupleStream(stream)){ return listutils::typeError("first argument must be a tuplestream"); } if(!listutils::isSymbol(attrX)){ return listutils::typeError("second argument is not a " "valid attribute name"); } if(!listutils::isSymbol(attrY)){ return listutils::typeError("third argument is not a " "valid attribute name"); } if(!listutils::isSymbol(maxCatX, CcInt::BasicType())){ return listutils::typeError("fourth argument mut be of type int"); } if(!listutils::isSymbol(maxCatY, CcInt::BasicType())){ return listutils::typeError("fifth argument mut be of type int"); } ListExpr attrList = nl->Second(nl->Second(stream)); ListExpr attrTypeX; string attrXname = nl->SymbolValue(attrX); int indexX = listutils::findAttribute(attrList, attrXname , attrTypeX); if(indexX==0){ return listutils::typeError(attrXname + " is not an attribute " "of the stream"); } if(!listutils::isSymbol(attrTypeX,CcReal::BasicType())){ return listutils::typeError("attribute " + attrXname + " is not of type real"); } ListExpr attrTypeY; string attrYname = nl->SymbolValue(attrY); int indexY = listutils::findAttribute(attrList, attrYname , attrTypeY); if(indexY==0){ return listutils::typeError(attrYname + " is not an attribute " "of the stream"); } if(!listutils::isSymbol(attrTypeY,CcReal::BasicType())){ return listutils::typeError("attribute " + attrYname + " is not of type real"); } return nl->ThreeElemList(nl->SymbolAtom(Symbol::APPEND()), nl->TwoElemList(nl->IntAtom(indexX), nl->IntAtom(indexY)), nl->SymbolAtom(Histogram2d::BasicType())); } // CreateHistogram2dEquicountTypeMap(ListExpr args) /* 9.17 VarianceX, VarianceY, Covariance */ int VarianceXFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcReal* res = static_cast(result.addr ); CcReal var = h->VarianceX(); res->Set(var.IsDefined(), var.GetValue()); return 0; } int VarianceYFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcReal* res = static_cast(result.addr ); CcReal var = h->VarianceY(); res->Set(var.IsDefined(), var.GetValue()); return 0; } int CovarianceFun(Word* args, Word& result, int message, Word& local, Supplier s) { const Histogram2d* h = static_cast(args[0].addr ); result = qp->ResultStorage(s); CcReal* res = static_cast(result.addr ); CcReal covar = h->Covariance(); res->Set(covar.IsDefined(), covar.GetValue()); return 0; } /* 9.18 Instantiation of Template Functions The compiler cannot expand these template functions in the file ~HistogramAlgebra.cpp~. */ template int Insert2dFun(Word* args, Word& result, int message, Word& local, Supplier s); template int Insert2dFun(Word* args, Word& result, int message, Word& local, Supplier s); template ListExpr Insert2dTypeMap(ListExpr args); template ListExpr Insert2dTypeMap(ListExpr args); template int FindBin2dFun(Word* args, Word& result, int message, Word& local, Supplier s); template int FindBin2dFun(Word* args, Word& result, int message, Word& local, Supplier s); template int FindMinMaxBinFun2d(Word* args, Word& result, int message, Word& local, Supplier s); template int FindMinMaxBinFun2d(Word* args, Word& result, int message, Word& local, Supplier s); } // namespace hgr