Files
secondo/Algebras/ParThread/ConcurrentTupleBuffer/DataPartitioner.h

196 lines
5.5 KiB
C
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
----
This file is part of SECONDO.
Copyright (C) 2019, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [10] title: [{\Large \bf ] [}]
//paragraph [21] table1column: [\begin{quote}\begin{tabular}{l}] [\end{tabular}\end{quote}]
//paragraph [22] table2columns: [\begin{quote}\begin{tabular}{ll}] [\end{tabular}\end{quote}]
//paragraph [23] table3columns: [\begin{quote}\begin{tabular}{lll}] [\end{tabular}\end{quote}]
//paragraph [24] table4columns: [\begin{quote}\begin{tabular}{llll}] [\end{tabular}\end{quote}]
//[--------] [\hline]
//characters [1] verbatim: [$] [$]
//characters [2] formula: [$] [$]
//characters [3] capital: [\textsc{] [}]
//characters [4] teletype: [\texttt{] [}]
//[ae] [\"a]
//[oe] [\"o]
//[ue] [\"u]
//[ss] [{\ss}]
//[<=] [\leq]
//[#] [\neq]
//[tilde] [\verb|~|]
//[Contents] [\tableofcontents]
1 Header File: DataPartitioner
September 2019, Fischer Thomas
1.1 Overview
The ~DataPartitioner~ is part of a ~ConcurrentTupleBufferWriter~ and responsible
to distribute the written tuple block to the queues of the ~ConcurrentTupleBuffer~.
1.2 Imports
*/
#ifndef SECONDO_PARTHREAD_DATA_PARTITONER_H
#define SECONDO_PARTHREAD_DATA_PARTITONER_H
#include "../../ExtRelation-2/HashJoin.h"
namespace parthread
{
/*
1.3 DataPartitioner
The ~ConcurrentTupleBuffer~ uses the ~DataPartitioner~ to decide which type of
~ConcurrentTupleReader~ is necessary to fetch the data from the buffer.
Partitioner distributing tuples to more than one partition in the buffer are
marked as "SharedPartitions". Otherwise they feed only one partition with tuple
blocks. In this case the distribution type is "DedicatedPartition".
*/
enum class DistributionTypes
{
SharedPartitions,
DedicatedPartition
};
class DataPartitioner
{
public:
DataPartitioner(size_t numPartitions, DistributionTypes distType)
: m_numPartitions(numPartitions), m_distType(distType)
{
}
virtual ~DataPartitioner() = default;
DistributionTypes DistributionType() const
{
return m_distType;
};
size_t NumPartitions() const
{
return m_numPartitions;
};
virtual DataPartitioner *Copy() const = 0;
virtual size_t DistributeValue(Tuple *tuple) = 0;
/*
The ~Copy~-method is used to create a deep copy for each writer created by the
tuple buffer. ~DistributeValue~ is abstract and returns the index to a tuple block
in the writers block vector. It must be implemented with a distribution logic
dependent of the distribution type.
*/
private: //member
size_t m_numPartitions;
DistributionTypes m_distType;
};
typedef std::shared_ptr<DataPartitioner> IDataPartitionerPtr;
/*
1.3 HashDataPartitioner and RoundRobinDataPartitioner
Both are implementations of ~DataPartitioner~ and differ in the way they distribute
the tuples to the tuple blocks processed by the writer.
~HashDataPartitioner~ use an attribute of the tuple to calculate the hash-value.
The index of the attribute is passed as parameter to the constructor. ~DistributeValue~
returns the index of the tuple block in the block vector (a value between 0 and
~numPartitions~) where the tuple reference should be stored.
~RoundRobinDataPartitioner~ uses all tuple blocks of the writer in an alternately
fashion. ~DistributeValue~ will return a different block index for each call. When
the maximum number of blocks in the vector is reached it starts again with the first
block (index 0).
*/
class HashDataPartitioner : public DataPartitioner
{
public: //methods
HashDataPartitioner(const size_t numPartitions, const int attrIndex)
: DataPartitioner(numPartitions, DistributionTypes::DedicatedPartition),
m_attrIndex(attrIndex), m_hashFunction(numPartitions, attrIndex)
{
}
~HashDataPartitioner() = default;
virtual DataPartitioner *Copy() const
{
return new HashDataPartitioner(NumPartitions(), m_attrIndex);
}
virtual size_t DistributeValue(Tuple *tuple)
{
return m_hashFunction.Value(tuple);
}
private: //member
int m_attrIndex;
extrel2::HashFunction m_hashFunction;
};
class RoundRobinDataPartitioner : public DataPartitioner
{
public: //methods
RoundRobinDataPartitioner(const size_t numPartitions)
: DataPartitioner(numPartitions, DistributionTypes::SharedPartitions),
m_currentPartition(numPartitions)
{
}
~RoundRobinDataPartitioner() = default;
virtual DataPartitioner *Copy() const
{
return new RoundRobinDataPartitioner(NumPartitions());
}
virtual size_t DistributeValue(Tuple *tuple)
{
m_currentPartition++;
if (m_currentPartition >= NumPartitions())
{
m_currentPartition = 0;
}
return m_currentPartition;
}
private: //member
size_t m_currentPartition;
};
} // namespace parthread
#endif //SECONDO_PARTHREAD_DATA_PARTITONER_H