420 lines
13 KiB
C++
420 lines
13 KiB
C++
|
|
/*
|
||
|
|
----
|
||
|
|
This file is part of SECONDO.
|
||
|
|
|
||
|
|
Copyright (C) 2019,
|
||
|
|
Faculty of Mathematics and Computer Science,
|
||
|
|
Database Systems for New Applications.
|
||
|
|
|
||
|
|
SECONDO is free software; you can redistribute it and/or modify
|
||
|
|
it under the terms of the GNU General Public License as published by
|
||
|
|
the Free Software Foundation; either version 2 of the License, or
|
||
|
|
(at your option) any later version.
|
||
|
|
|
||
|
|
SECONDO is distributed in the hope that it will be useful,
|
||
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
|
GNU General Public License for more details.
|
||
|
|
|
||
|
|
You should have received a copy of the GNU General Public License
|
||
|
|
along with SECONDO; if not, write to the Free Software
|
||
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
----
|
||
|
|
|
||
|
|
|
||
|
|
//[<] [\ensuremath{<}]
|
||
|
|
//[>] [\ensuremath{>}]
|
||
|
|
|
||
|
|
\setcounter{tocdepth}{2}
|
||
|
|
\tableofcontents
|
||
|
|
|
||
|
|
|
||
|
|
1 Merger
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include <assert.h>
|
||
|
|
#include <iostream>
|
||
|
|
#include <iomanip>
|
||
|
|
|
||
|
|
#include "Merger.h"
|
||
|
|
#include "Utils.h"
|
||
|
|
|
||
|
|
using namespace cdacspatialjoin;
|
||
|
|
using namespace std;
|
||
|
|
|
||
|
|
|
||
|
|
/*
|
||
|
|
1 Merger class
|
||
|
|
|
||
|
|
*/
|
||
|
|
Merger::Merger(MergedAreaPtr area1_, MergedAreaPtr area2_,
|
||
|
|
const bool isLastMerge_, IOData* const ioData_) :
|
||
|
|
area1(area1_),
|
||
|
|
area2(area2_),
|
||
|
|
isLastMerge(isLastMerge_),
|
||
|
|
ioData(ioData_),
|
||
|
|
result(new MergedArea(area1, area2)),
|
||
|
|
currentTask(TASK::initialize),
|
||
|
|
reportType(0),
|
||
|
|
reportSubType(0),
|
||
|
|
indexSBegin(0),
|
||
|
|
indexS(0),
|
||
|
|
indexTBegin(0),
|
||
|
|
indexT(0) {
|
||
|
|
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_DETAILED_REPORT_TO_CONSOLE
|
||
|
|
cout << endl;
|
||
|
|
cout << "merge " << area1->toString() << " | " << area2->toString() << endl;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// pre-process given areas by calculating from them the temporary vectors
|
||
|
|
// leftA/BSpan, rightA/BSpan, and leftRightA/B
|
||
|
|
const bool isNotLastMerge = !isLastMerge;
|
||
|
|
const EdgeIndex_t resultEdgeStart = result->edgeIndexStart;
|
||
|
|
const EdgeIndex_t resultEdgeEnd = result->edgeIndexEnd;
|
||
|
|
JoinEdge::removeCompleteRectangles(area1_->leftA, area2_->rightA,
|
||
|
|
leftASpan, rightASpan, leftRightA,
|
||
|
|
isNotLastMerge, resultEdgeStart, resultEdgeEnd);
|
||
|
|
JoinEdge::removeCompleteRectangles(area1_->leftB, area2_->rightB,
|
||
|
|
leftBSpan, rightBSpan, leftRightB,
|
||
|
|
isNotLastMerge, resultEdgeStart, resultEdgeEnd);
|
||
|
|
|
||
|
|
currentTask = TASK::report;
|
||
|
|
}
|
||
|
|
|
||
|
|
Merger::~Merger() {
|
||
|
|
delete area1;
|
||
|
|
delete area2;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Merger::merge() {
|
||
|
|
static constexpr unsigned REPORT_TYPE_COUNT = 4;
|
||
|
|
|
||
|
|
// outer loop of reportPairs() calls
|
||
|
|
while (currentTask == TASK::report && reportType < REPORT_TYPE_COUNT) {
|
||
|
|
bool done;
|
||
|
|
switch(reportType) {
|
||
|
|
case 0:
|
||
|
|
done = leftASpan.empty() ||
|
||
|
|
reportPairs(leftASpan, area2->leftB, area2->completeB);
|
||
|
|
break;
|
||
|
|
case 1:
|
||
|
|
done = rightASpan.empty() ||
|
||
|
|
reportPairs(rightASpan, area1->leftB, area1->completeB);
|
||
|
|
break;
|
||
|
|
case 2:
|
||
|
|
done = leftBSpan.empty() ||
|
||
|
|
reportPairs(leftBSpan, area2->leftA, area2->completeA);
|
||
|
|
break;
|
||
|
|
case 3:
|
||
|
|
done = rightBSpan.empty() ||
|
||
|
|
reportPairs(rightBSpan, area1->leftA, area1->completeA);
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
assert (false); // unexpected case
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!done)
|
||
|
|
return false; // outTBlock full, continue at next merge() call
|
||
|
|
|
||
|
|
// reportPairs() was completed for the given reportType,
|
||
|
|
// proceed with next reportType
|
||
|
|
++reportType;
|
||
|
|
if (reportType == REPORT_TYPE_COUNT)
|
||
|
|
currentTask = TASK::postProcess; // report fully completed
|
||
|
|
}
|
||
|
|
|
||
|
|
// post-processing: create the result MergedArea
|
||
|
|
if (currentTask == TASK::postProcess) {
|
||
|
|
if (isLastMerge) {
|
||
|
|
// do nothing: the result areas are obsolete
|
||
|
|
} else {
|
||
|
|
JoinEdge::merge(leftASpan, 0, area2->leftA, 0, result->leftA);
|
||
|
|
JoinEdge::merge(rightASpan, 0, area1->rightA, 0, result->rightA);
|
||
|
|
JoinEdge::merge(area1->completeA, area2->completeA, leftRightA,
|
||
|
|
result->completeA);
|
||
|
|
|
||
|
|
JoinEdge::merge(leftBSpan, 0, area2->leftB, 0, result->leftB);
|
||
|
|
JoinEdge::merge(rightBSpan, 0, area1->rightB, 0, result->rightB);
|
||
|
|
JoinEdge::merge(area1->completeB, area2->completeB, leftRightB,
|
||
|
|
result->completeB);
|
||
|
|
}
|
||
|
|
currentTask = TASK::done;
|
||
|
|
}
|
||
|
|
|
||
|
|
// assert (currentTask == TASK::done);
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Merger::reportPairs(const JoinEdgeVec& span,
|
||
|
|
const JoinEdgeVec& left,
|
||
|
|
const JoinEdgeVec& complete) {
|
||
|
|
static constexpr unsigned REPORT_SUB_TYPE_COUNT = 2;
|
||
|
|
|
||
|
|
// inner loop of reportPairs() calls
|
||
|
|
while (reportSubType < REPORT_SUB_TYPE_COUNT) {
|
||
|
|
bool done = false;
|
||
|
|
if (reportSubType == 0) {
|
||
|
|
done = left.empty() || reportPairsSub(span, left);
|
||
|
|
} else { // reportSubType == 1
|
||
|
|
done = complete.empty() || reportPairsSub(span, complete);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!done)
|
||
|
|
return false; // outTBlock full, continue at next merge() call
|
||
|
|
|
||
|
|
// reportPairsSub() was completed for the given reportSubType,
|
||
|
|
// proceed with next reportSubType
|
||
|
|
++reportSubType;
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
++MergerStats::onlyInstance->reportPairsCount;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// report type completed, reset reportSubType to 0 for next reportType
|
||
|
|
reportSubType = 0;
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Merger::reportPairsSub(const JoinEdgeVec& edgesS,
|
||
|
|
const JoinEdgeVec& edgesT) {
|
||
|
|
|
||
|
|
// get some values frequently used in the loop below; use specialized
|
||
|
|
// functions if there is only one edge in one of the vectors
|
||
|
|
const size_t sizeS = edgesS.size();
|
||
|
|
const size_t sizeT = edgesT.size();
|
||
|
|
|
||
|
|
// refer special cases to specialized functions. sizeS/T can be passed, too
|
||
|
|
// since up to 4 parameters can be passed in registers
|
||
|
|
if (sizeS == 1 && sizeT == 1)
|
||
|
|
return reportPairsSub11(edgesS[0], edgesT[0]);
|
||
|
|
if (sizeS == 1)
|
||
|
|
return reportPairsSub1(edgesS[0], edgesT, sizeT);
|
||
|
|
if (sizeT == 1)
|
||
|
|
return reportPairsSub1(edgesT[0], edgesS, sizeS);
|
||
|
|
// otherwise, both sets contain multiple edges which will be treated below
|
||
|
|
|
||
|
|
// improve performance by using local variables for fields
|
||
|
|
IOData* const ioData_ = ioData;
|
||
|
|
size_t indexSBegin_ = indexSBegin;
|
||
|
|
size_t indexTBegin_ = indexTBegin;
|
||
|
|
size_t indexS_ = indexS;
|
||
|
|
size_t indexT_ = indexT;
|
||
|
|
|
||
|
|
while (indexSBegin_ < sizeS && indexTBegin_ < sizeT) {
|
||
|
|
// determine the current edgeS/TBegin
|
||
|
|
const JoinEdge& edgeSBegin = edgesS[indexSBegin_];
|
||
|
|
const JoinEdge& edgeTBegin = edgesT[indexTBegin_];
|
||
|
|
|
||
|
|
if (edgeSBegin.yMin < edgeTBegin.yMin) {
|
||
|
|
const double yMaxS = edgeSBegin.yMax;
|
||
|
|
// speed up the most frequent case (for many large joins, 99%)
|
||
|
|
if (edgeTBegin.yMin > yMaxS) {
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
MergerStats::onlyInstance->add(0);
|
||
|
|
#endif
|
||
|
|
++indexSBegin_;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
// find intersections of the y range of edgeSBegin and edges in
|
||
|
|
// edgesT[] (starting from indexTBegin)
|
||
|
|
if (indexT_ < indexTBegin_)
|
||
|
|
indexT_ = indexTBegin_;
|
||
|
|
while (indexT_ < sizeT && edgesT[indexT_].yMin <= yMaxS) {
|
||
|
|
// report pair of rectangles (represented by the two edges);
|
||
|
|
// edges can be passed to the output function in any order
|
||
|
|
const bool outTBlockFull =
|
||
|
|
!ioData_->appendToOutput(edgeSBegin, edgesT[indexT_]);
|
||
|
|
|
||
|
|
// increase indexT before a possible "return false", so this pair
|
||
|
|
// will not be reported again
|
||
|
|
++indexT_;
|
||
|
|
|
||
|
|
if (outTBlockFull) {
|
||
|
|
// outTBlock is full. Index positions remain stored in
|
||
|
|
// this Merger instance for the next call (with a new outTBlock)
|
||
|
|
|
||
|
|
// write back local copies to fields
|
||
|
|
indexSBegin = indexSBegin_;
|
||
|
|
indexTBegin = indexTBegin_;
|
||
|
|
indexS = indexS_;
|
||
|
|
indexT = indexT_;
|
||
|
|
return false;
|
||
|
|
} // otherwise continue reporting
|
||
|
|
}
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
MergerStats::onlyInstance->add(indexT_ - indexTBegin_);
|
||
|
|
#endif
|
||
|
|
indexT_ = indexTBegin_;
|
||
|
|
|
||
|
|
// proceed to next edge in edgesS
|
||
|
|
++indexSBegin_;
|
||
|
|
|
||
|
|
} else { // edgeTBegin.yMin <= edgeSBegin.yMin
|
||
|
|
const double yMaxT = edgeTBegin.yMax;
|
||
|
|
// speed up the most frequent case (for many large joins, 99%)
|
||
|
|
if (edgeSBegin.yMin > yMaxT) {
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
MergerStats::onlyInstance->add(0);
|
||
|
|
#endif
|
||
|
|
++indexTBegin_;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
// find intersections of the y range of edgeTBegin and edges in
|
||
|
|
// edgesS[] (starting from indexSBegin)
|
||
|
|
if (indexS_ < indexSBegin_)
|
||
|
|
indexS_ = indexSBegin_;
|
||
|
|
while (indexS_ < sizeS && edgesS[indexS_].yMin <= yMaxT) {
|
||
|
|
// report pair of rectangles (represented by the two edges);
|
||
|
|
// edges can be passed to the output function in any order
|
||
|
|
const bool outTBlockFull =
|
||
|
|
!ioData_->appendToOutput(edgesS[indexS_], edgeTBegin);
|
||
|
|
|
||
|
|
// increase indexS before a possible "return false", so this pair
|
||
|
|
// will not be reported again
|
||
|
|
++indexS_;
|
||
|
|
|
||
|
|
if (outTBlockFull) {
|
||
|
|
// outTBlock is full. Index positions remain stored in
|
||
|
|
// this Merger instance for the next call (with a new outTBlock)
|
||
|
|
|
||
|
|
// write back local copies to fields
|
||
|
|
indexSBegin = indexSBegin_;
|
||
|
|
indexTBegin = indexTBegin_;
|
||
|
|
indexS = indexS_;
|
||
|
|
indexT = indexT_;
|
||
|
|
return false;
|
||
|
|
} // otherwise continue reporting
|
||
|
|
}
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
MergerStats::onlyInstance->add(indexS_ - indexSBegin_);
|
||
|
|
#endif
|
||
|
|
indexS_ = indexSBegin_;
|
||
|
|
|
||
|
|
// proceed to next edge in edgesT
|
||
|
|
++indexTBegin_;
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
++MergerStats::onlyInstance->reportPairsSubCount;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// reset indices for next call
|
||
|
|
indexSBegin = 0;
|
||
|
|
indexS = 0;
|
||
|
|
indexTBegin = 0;
|
||
|
|
indexT = 0;
|
||
|
|
|
||
|
|
// report subtype completed
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Merger::reportPairsSub1(const JoinEdge& edgeS,
|
||
|
|
const JoinEdgeVec& edgesT, const size_t sizeT) {
|
||
|
|
|
||
|
|
// improve performance by using local variable for field "indexTBegin"
|
||
|
|
IOData* const ioData_ = ioData;
|
||
|
|
size_t indexTBegin_ = indexTBegin;
|
||
|
|
|
||
|
|
// iterating over the edgesT[] entries,
|
||
|
|
// first treat all cases in which edgeT.yMin <= edgeS.yMin
|
||
|
|
const double yMinS = edgeS.yMin;
|
||
|
|
while (indexTBegin_ < sizeT) {
|
||
|
|
const JoinEdge& edgeTBegin = edgesT[indexTBegin_];
|
||
|
|
if (edgeTBegin.yMin > yMinS)
|
||
|
|
break;
|
||
|
|
++indexTBegin_; // prepare next iteration in case of "return false"
|
||
|
|
if (yMinS <= edgeTBegin.yMax) {
|
||
|
|
if (!ioData_->appendToOutput(edgeS, edgeTBegin)) {
|
||
|
|
// write back local copy to field
|
||
|
|
indexTBegin = indexTBegin_;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// now, for all remaining edgeT, yMinS < edgeT.yMin
|
||
|
|
|
||
|
|
// continue iterating while edgeT.yMin is in the interval (yMinS, yMaxS]
|
||
|
|
const double yMaxS = edgeS.yMax;
|
||
|
|
if (indexTBegin_ < sizeT) {
|
||
|
|
// improve performance by using local variable for field "indexT"
|
||
|
|
size_t indexT_ = indexT;
|
||
|
|
if (indexT_ < indexTBegin_)
|
||
|
|
indexT_ = indexTBegin_;
|
||
|
|
|
||
|
|
while (indexT_ < sizeT) {
|
||
|
|
const JoinEdge& edgeT = edgesT[indexT_];
|
||
|
|
if (edgeT.yMin > yMaxS)
|
||
|
|
break;
|
||
|
|
++indexT_; // prepare next iteration in case of "return false"
|
||
|
|
if (!ioData_->appendToOutput(edgeS, edgeT)) {
|
||
|
|
// write back local copies to fields
|
||
|
|
indexTBegin = indexTBegin_;
|
||
|
|
indexT = indexT_;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// now all possible intersections were reported
|
||
|
|
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
++MergerStats::onlyInstance->reportPairsSub1Count;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// reset indices for next call (indexSBegin and indexS were not used here)
|
||
|
|
indexTBegin = 0;
|
||
|
|
indexT = 0;
|
||
|
|
|
||
|
|
// report subtype completed
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
bool Merger::reportPairsSub11(const JoinEdge& edgeS,
|
||
|
|
const JoinEdge& edgeT) {
|
||
|
|
if (indexTBegin == 0) {
|
||
|
|
if ((edgeS.yMax >= edgeT.yMin && edgeS.yMin <= edgeT.yMax)) {
|
||
|
|
// report intersection
|
||
|
|
if (!ioData->appendToOutput(edgeS, edgeT)) {
|
||
|
|
indexTBegin = 1; // prevent multiple reporting
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#ifdef CDAC_SPATIAL_JOIN_METRICS
|
||
|
|
++MergerStats::onlyInstance->reportPairsSub11Count;
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// reset indices for next call
|
||
|
|
indexTBegin = 0;
|
||
|
|
|
||
|
|
// report subtype completed
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
size_t Merger::getJoinEdgeCount(const bool includeAreas) const {
|
||
|
|
size_t joinEdgeCount =
|
||
|
|
leftASpan.size() + rightASpan.size() + leftRightA.size()
|
||
|
|
+ leftBSpan.size() + rightBSpan.size() + leftRightB.size();
|
||
|
|
if (includeAreas) {
|
||
|
|
joinEdgeCount += area1->getJoinEdgeCount() + area2->getJoinEdgeCount();
|
||
|
|
joinEdgeCount += result->getJoinEdgeCount();
|
||
|
|
}
|
||
|
|
return joinEdgeCount;
|
||
|
|
}
|
||
|
|
|
||
|
|
size_t Merger::getUsedMemory() const {
|
||
|
|
size_t usedMemory = sizeof(Merger)
|
||
|
|
+ getJoinEdgeCount(false) * sizeof(JoinEdge)
|
||
|
|
+ area1->getUsedMemory() + area2->getUsedMemory()
|
||
|
|
+ result->getUsedMemory();
|
||
|
|
return usedMemory;
|
||
|
|
}
|