Files
secondo/Algebras/CRel/TBlock.h

717 lines
16 KiB
C
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
----
This file is part of SECONDO.
Copyright (C) 2004-2009, University in Hagen, Faculty of Mathematics
and Computer Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
*/
#pragma once
#include "AttrArray.h"
#include "Attribute.h"
#include <cstdint>
#include "NestedList.h"
#include "ReadWrite.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "SecondoSMI.h"
#include "Shared.h"
namespace CRelAlgebra
{
class TBlock;
class TBlockInfo;
class TBlockEntry;
class TBlockIterator;
class FilteredTBlockIterator;
typedef Shared<const TBlockInfo> PTBlockInfo;
/*
Represents data of a ~TBlock~ which can be stored seperately.
It also is used as a ~AttrArrayHeader~ on column restoring to avoid redundant
data.
*/
class TBlockHeader
{
public:
uint64_t rowCount,
size;
SmiFileId columnFileId,
flobFileId;
TBlockHeader();
TBlockHeader(uint64_t rowCount, uint64_t size,
SmiFileId columnFileId, SmiFileId flobFileId);
operator AttrArrayHeader() const;
};
/*
This class adds support to filter a ~TBlock~'s tuples without copying them to
a new ~TBlock~.
It does so by providing its own row numbers which are mapped to the
~TBlock~'s row numbers.
*/
class TBlockFilter
{
public:
/*
Creates a ~TBlockFilter~ for the passed ~block~ containing all
tuples.
Note: The ~TBlock~'s refcounter is not touched
*/
TBlockFilter(const TBlock &block) :
m_block(&block)
{
}
/*
Creates a ~TBlockFilter~ for the passed ~bloch~ containing all only the
tuples specified by the passed row numbers in ~filter~.
Note: The ~TBlock~'s refcounter is not touched
*/
TBlockFilter(const TBlock &block,
const SharedArray<const uint64_t> &filter) :
m_block(&block),
m_filter(filter)
{
}
/*
Returns the ~TBlocks~'s row number for this ~TBlockFilter~'s row number
~row~.
*/
uint64_t GetAt(uint64_t row) const
{
return m_filter.IsNull() ? row : m_filter[row];
}
uint64_t operator [] (uint64_t row) const
{
return m_filter.IsNull() ? row : m_filter[row];
}
/*
Returns number of row numbers contained in this ~TBlockFilter~.
*/
uint64_t GetRowCount() const;
/*
Determines if this filter contains / projects indices.
*/
bool HasFilter() const {
return !m_filter.IsNull();
}
/*
Returns a iterator over the ~TBlock~'s tuples taking this
~TBlockFilter~ into account
*/
FilteredTBlockIterator GetIterator() const;
/*
Only for range-loop support!
*/
FilteredTBlockIterator begin() const;
FilteredTBlockIterator end() const;
protected:
friend class TBlock;
const TBlock * const m_block;
const SharedArray<const uint64_t> m_filter;
};
/*
Implements a column-oriented tuple-block.
The main functionality provided is appending tuples, retrieving tuples
and persisting / restoring the block.
The tuple-block is split into columns which themselves are ~AttrArray~s.
Each of them is restored only when neccessary.
This class is reference counted because it's heavily used in stream operators.
Note: When accessing a ~TBlock~'s tuples you probably want to use the
~TBlockFilter~ provided by ~GetFilter~!
*/
class TBlock
{
public:
/*
Returns the size the metadata of a ~TBlock~ with the specified number of
columns would occupy.
~include~ header determines if ~TBlockHeader~ data should be included.
*/
static uint64_t GetSaveSize(uint64_t columnCount,
bool includeHeader = true);
/*
Creates a ~TBlock~ parameterized by a ~TBlockInfo~ and the provided column-
and flob-file id.
~columnFile~ provides a (potentialy shared) ~SmiRecordFile~ instance.
*/
TBlock(const PTBlockInfo &info, SmiFileId columnFileId,
SmiFileId flobFileId);
TBlock(const PTBlockInfo &info, SmiFileId columnFileId,
SmiFileId flobFileId, Shared<SmiRecordFile> columnFile);
/*
Restores a ~TBlock~ parameterized by a ~TBlockInfo~ from the provided
~source~.
~columnFile~ provides a (potentialy shared) ~SmiRecordFile~ instance.
Precondition: ~source~ must hold data saved by a ~TBlock~ with similar
~info~ and ~includeHeader~ == true.
*/
TBlock(const PTBlockInfo &info, Reader &source);
TBlock(const PTBlockInfo &info, Reader &source,
Shared<SmiRecordFile> columnFile);
/*
Restores a ~TBlock~ parameterized by a ~TBlockInfo~ from the provided
~source~ and ~header~.
~columnFile~ provides a (potentialy shared) ~SmiRecordFile~ instance.
Precondition: ~source~ must hold data saved by a ~TBlock~ with similar
~info~ and ~includeHeader~ == false.
*/
TBlock(const PTBlockInfo &info, const TBlockHeader &header, Reader &source);
TBlock(const PTBlockInfo &info, const TBlockHeader &header, Reader &source,
Shared<SmiRecordFile> columnFile);
/*
Creates the projection of a existing ~block~.
The indices of the columns to project on are provided by the array pointer
~columnIndices~. The number of columns in ~columnIndices~ is passed by
~columnCount~.
The returned ~TBlock~ is supposed to be read only.
God knows what might happen if one trys to actually modify or save it.
*/
TBlock(const TBlock &block, const uint64_t *columnIndices,
uint64_t columnCount);
/*
Creates the projection of a existing ~block~ and extends it with the passed
columns.
The indices of the columns to project on are provided by the array pointer
~projectionIndices~. The number of columns in ~projectionIndices~ is passed
by ~projectionIndexCount~. The columns for the extension are provided
through ~extensionColumns~ with the number of columns ~extensionColumnCount~
and the types ~extensionColumnTypes~.
The returned ~TBlock~ is supposed to be read only.
God knows what might happen if one trys to actually modify or save it.
*/
TBlock(const TBlock &block, const uint64_t *projectionIndices,
uint64_t projectionIndexCount, AttrArray **extensionColumns,
uint64_t extensionColumnCount, const ListExpr extensionColumnTypes);
/*
Creates the projection of a existing ~block~ and applies a filter to it.
The indices of the columns to project on are provided by the array pointer
~columnIndices~. The number of columns in ~columnIndices~ is passed by
~columnCount~. The rows to include are specified by ~filter~.
The returned ~TBlock~ is supposed to be read only.
God knows what might happen if one trys to actually modify or save it.
*/
TBlock(const TBlock &block, const uint64_t *columnIndices,
uint64_t columnCount, const SharedArray<const uint64_t> &filter);
/*
Creates filtered ~TBlock~ from the passed ~block~.
The rows to include are specified by ~filter~.
*/
TBlock(const TBlock &block, const SharedArray<const uint64_t> &filter);
virtual ~TBlock();
/*
Returns the ~TBlockInfo~ used by this ~TBlock~.
*/
const PTBlockInfo &GetInfo() const;
/*
Returns the applied ~TBlockFilter~.
*/
const TBlockFilter &GetFilter() const;
/*
Writes this ~TBlock~ into the specified ~target~ either with or without
~TBlockHeader~ data.
*/
void Save(Writer &target, bool includeHeader = true);
/*
Deletes persistent data created by this tuple-block.
*/
void DeleteRecords();
/*
Returns the number of columns of this ~TBlock~.
*/
uint64_t GetColumnCount() const;
/*
Returns the number of tuples in this ~TBlock~.
*/
uint64_t GetRowCount() const;
/*
Returns the size of this ~TBlock~ in bytes.
*/
uint64_t GetSize() const;
/*
Appends a tuple to this tuple-block.
Preconditions:
*~tuple~ must point to a array of ~AttrArrayEntry~
*~tuple~ must contain ~GetColumnCount()~ entries
*the attribute-array types of the entries must match the attribute-array
types of this tuple-block's columns
*/
void Append(const AttrArrayEntry* tuple);
/*
Appends a tuple to this tuple-block.
This function does not touch the ~Attribute~'s reference counters.
Preconditions:
*~tuple~ must point to a array of ~Attribute~ pointers
*~tuple~ must contain ~GetColumnCount()~ entries
*the attribute types of the entries must match the attribute types of this
tuple-block's columns
*/
void Append(Attribute** tuple);
/*
Appends a tuple to this tuple-block.
Preconditions:
*~tuple~ must represent a valid ~TBlockEntry~
*~tuple~'s ~TBlock~ must have a similar ~TBlockInfo~ to this tuple-block
*/
void Append(const TBlockEntry &tuple);
/*
Appends a tuple to this tuple-block.
This function does not touch the ~Tuple~'s reference counter.
Preconditions:
*~tuple~ must contain ~GetColumnCount()~ attributes
*the attribute types of the tuple's attributes must match the attribute
types of this tuple-block's columns
*/
void Append(const Tuple &tuple);
/*
Accesses the column with the specified ~index~
*/
AttrArray &GetAt(uint64_t index) const;
AttrArray &operator[](uint64_t index) const;
/*
Returns a ~TBlockIterator~ over this tuple-block's tuples.
*/
TBlockIterator GetIterator() const;
FilteredTBlockIterator GetFilteredIterator() const;
/*
~TBlockIterator~s used (only!) for range-loop support.
*/
TBlockIterator begin() const;
TBlockIterator end() const;
/*
Increases the reference counter by one.
*/
void IncRef() const;
/*
Decreases the reference counter by one.
If the reference counter reaches zero this object is deleted.
*/
void DecRef() const;
/*
Returns the reference count.
*/
uint64_t GetRefCount() const;
private:
class ColumnInfo
{
public:
uint64_t size;
SmiRecordId recordId;
};
TBlockHeader m_header;
PTBlockInfo m_info;
//The count of tuples in this block.
//Corresponds the count of attributes in each block.
const uint64_t m_columnCount;
ColumnInfo *m_columnInfos;
//The attribute-blocks.
AttrArray **m_columns;
mutable Shared<SmiRecordFile> m_columnFile;
TBlockFilter m_filter;
mutable uint64_t m_refCount;
TBlock(const TBlock&) = delete;
};
inline uint64_t TBlockFilter::GetRowCount() const
{
return m_filter.IsNull() ? m_block->GetRowCount() : m_filter.GetCapacity();
}
/*
Class used to configure a ~TBlock~'s columns.
It can be shared among multiple ~TBlock~s.
*/
class TBlockInfo
{
public:
uint64_t columnCount;
ListExpr *columnTypes,
*columnAttributeTypes;
AttrArrayManager **columnFactories;
TBlockInfo();
/*
Creates a ~TBlockInfo~ from a list of attribute-array types.
*/
TBlockInfo(ListExpr columnTypes);
~TBlockInfo();
};
/*
This class represents the tuple of a ~TBlock~ by a pointer to the block and
a row number.
If either the pointer doesn't point to a ~TBlock~ instance or the row
number is out of the block's range of rows, the ~TBlockEntry~ is considered
invalid.
This class doesn't change a ~TBlockEntry~'s reference count.
If the pointed to block is deleted this ~TBlockEntry~ becomes invalid.
Using a invalid ~TBlockEntry~ is considered undefined behaviour.
*/
class TBlockEntry
{
public:
TBlockEntry()
{
}
TBlockEntry(const TBlock *block, uint64_t row) :
m_block(block),
m_row(row)
{
}
uint64_t GetRow() const
{
return m_row;
}
const TBlock *GetBlock() const
{
return m_block;
}
const AttrArrayEntry operator[](uint64_t index) const
{
return AttrArrayEntry(&m_block->GetAt(index), m_row);
}
bool operator == (const TBlockEntry &other) const
{
return m_row == other.m_row && m_block == other.m_block;
}
bool operator != (const TBlockEntry &other) const
{
return !(*this == other);
}
private:
const TBlock *m_block;
uint64_t m_row;
friend class TBlockIterator;
friend class FilteredTBlockIterator;
};
/*
A iterator over a ~TBlock~s tuples.
Changes of the ~TBlock~ invalidate the iterator which is not reflected by
~TBlockIterator.IsValid~. Further usage is considered undefined behaviour.
The functions are defined in this header file to enable inlining.
*/
class TBlockIterator
{
public:
TBlockIterator() :
m_tuple(nullptr, 0),
m_rowCount(0)
{
}
TBlockIterator(const TBlock *block) :
m_tuple(block, 0),
m_rowCount(block->GetRowCount())
{
}
bool IsValid() const
{
return m_tuple.m_row < m_rowCount;
}
const TBlockEntry &Get() const
{
return m_tuple;
}
bool MoveToNext()
{
if (m_tuple.m_row < m_rowCount)
{
return ++m_tuple.m_row < m_rowCount;
}
return false;
}
const TBlockEntry &operator * () const
{
return Get();
}
TBlockIterator &operator ++ ()
{
MoveToNext();
return *this;
}
bool operator == (const TBlockIterator &other) const
{
if (IsValid())
{
if (other.IsValid())
{
return m_tuple == other.m_tuple;
}
return false;
}
return !other.IsValid();
}
bool operator != (const TBlockIterator &other) const
{
return !(*this == other);
}
private:
TBlockEntry m_tuple;
uint64_t m_rowCount;
};
class FilteredTBlockIterator
{
public:
FilteredTBlockIterator() :
m_filter(nullptr),
m_rowCount(0),
m_row(0),
m_tuple(nullptr, 0)
{
}
FilteredTBlockIterator(const TBlock *block) :
m_filter(block != nullptr ? &block->GetFilter() : nullptr),
m_rowCount(m_filter != nullptr ? m_filter->GetRowCount() : 0),
m_row(0),
m_tuple(block, m_rowCount > 0 ? m_filter->GetAt(0) : 0)
{
}
bool IsValid() const
{
return m_row < m_rowCount;
}
const TBlockEntry &Get() const
{
return m_tuple;
}
bool MoveToNext()
{
if (m_row < m_rowCount)
{
if (++m_row < m_rowCount)
{
m_tuple.m_row = m_filter->GetAt(m_row);
return true;
}
}
return false;
}
const TBlockEntry &operator * () const
{
return Get();
}
FilteredTBlockIterator &operator ++ ()
{
MoveToNext();
return *this;
}
bool operator == (const FilteredTBlockIterator &other) const
{
return !(*this != other);
}
bool operator != (const FilteredTBlockIterator &other) const
{
if (IsValid())
{
if (other.IsValid())
{
return m_tuple == other.m_tuple;
}
return true;
}
return other.IsValid();
}
private:
const TBlockFilter * m_filter;
uint64_t m_rowCount,
m_row;
TBlockEntry m_tuple;
};
inline FilteredTBlockIterator TBlockFilter::GetIterator() const
{
return FilteredTBlockIterator(m_block);
}
inline FilteredTBlockIterator TBlockFilter::begin() const
{
return FilteredTBlockIterator(m_block);
}
inline FilteredTBlockIterator TBlockFilter::end() const
{
return FilteredTBlockIterator();
}
inline FilteredTBlockIterator TBlock::GetFilteredIterator() const
{
return FilteredTBlockIterator(this);
}
}