/* ---- This file is part of SECONDO. Copyright (C) 2019, Faculty of Mathematics and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //[<] [\ensuremath{<}] //[>] [\ensuremath{>}] \setcounter{tocdepth}{3} \tableofcontents 1 Hybrid Hash Join */ #pragma once #include #include #include "Operator.h" #include "vector" #include "thread" #include "condition_variable" #include #include namespace mthreaded { class HashTablePersist { public: HashTablePersist(const size_t _bucketsNo, const size_t _coreNoWorker, const size_t _maxMem, TupleType* _ttR, TupleType* _ttS, const std::pair _joinAttr); ~HashTablePersist(); void PushR(Tuple* tuple, const size_t bucket); void PushS(Tuple* tuple, const size_t bucket); Tuple* PullR(const size_t bucket) const; Tuple* PullS(const size_t bucket) const; void CloseWrite(); size_t OpenRead(size_t bucket); void UseMemHashTable(size_t usedMem); void SetHashMod(size_t hashMod); void CalcOverflow(); std::shared_ptr GetOverflowS(const size_t bucket) const; size_t GetOverflowBucketNo(const size_t bucket) const; private: std::vector> hashBucketsS; std::vector> hashBucketsR; std::vector> hashBucketsOverflowS; std::vector sizeR; std::vector sizeS; bool setSPersist; size_t lastMemBufferR; size_t lastMemBufferS; size_t hashMod; std::vector overflowBucketNo; const size_t bucketsNo; const size_t coreNoWorker; const size_t maxMem; TupleType* ttR; TupleType* ttS; const std::pair joinAttr; size_t freeMem; }; class HashJoinWorker { private: size_t maxMem; size_t coreNoWorker; size_t streamInNo; std::shared_ptr tupleBuffer; std::shared_ptr> partBufferR; std::shared_ptr> partBufferS; size_t bucketNo = 20; size_t bucketsInMem1st = 999997; std::pair joinAttr; TupleType* resultTupleType; std::shared_ptr hashTablePersist; TupleType* ttR; TupleType* ttS; std::shared_ptr overflowBufferR; std::shared_ptr overflowBufferS; size_t phase1(size_t &countOverflow); // phase 2 hashjoin, return countOverflow size_t phase2(size_t hashMod, size_t pos); void recursiveOverflow(std::shared_ptr overflowR, std::shared_ptr overflowS, size_t sizeR, size_t xMod); bool tupleEqual(Tuple* a, Tuple* b) const; public: HashJoinWorker( size_t _maxMem, size_t _coreNoWorker, size_t _streamInNo, std::shared_ptr _tupleBuffer, std::shared_ptr> _partBufferR, std::shared_ptr> _partBufferS, std::pair _joinAttr, TupleType* _resultTupleType, TupleType* _ttS); ~HashJoinWorker(); // Thread void operator()(); }; class hybridHashJoinLI { private: Stream streamR; Stream streamS; const std::pair joinAttr; std::vector joinThreads; const size_t maxMem; const size_t coreNo; const size_t coreNoWorker; TupleType* resultTupleType; std::shared_ptr tupleBuffer; std::vector>> partBufferR; std::vector>> partBufferS; const size_t cores = MThreadedSingleton::getCoresToUse(); public: //Constructor hybridHashJoinLI( Word _streamR, Word _streamS, const std::pair _joinAttr, const size_t _maxMem, ListExpr resultType); //Destructor ~hybridHashJoinLI(); //Output Tuple* getNext(); private: //Scheduler void Scheduler(); }; class op_hybridHashJoin { static ListExpr hybridHashJoinTM(ListExpr args); static int hybridHashJoinVM(Word* args, Word &result, int message, Word &local, Supplier s); std::string getOperatorSpec(); public: explicit op_hybridHashJoin() = default; ~op_hybridHashJoin() = default; std::shared_ptr getOperator(); }; } // end of namespace mthreaded