Files
secondo/Algebras/CRel/TBlock.h
2026-01-23 17:03:45 +08:00

717 lines
16 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
----
This file is part of SECONDO.
Copyright (C) 2004-2009, University in Hagen, Faculty of Mathematics
and Computer Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
*/
#pragma once
#include "AttrArray.h"
#include "Attribute.h"
#include <cstdint>
#include "NestedList.h"
#include "ReadWrite.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "SecondoSMI.h"
#include "Shared.h"
namespace CRelAlgebra
{
class TBlock;
class TBlockInfo;
class TBlockEntry;
class TBlockIterator;
class FilteredTBlockIterator;
typedef Shared<const TBlockInfo> PTBlockInfo;
/*
Represents data of a ~TBlock~ which can be stored seperately.
It also is used as a ~AttrArrayHeader~ on column restoring to avoid redundant
data.
*/
class TBlockHeader
{
public:
uint64_t rowCount,
size;
SmiFileId columnFileId,
flobFileId;
TBlockHeader();
TBlockHeader(uint64_t rowCount, uint64_t size,
SmiFileId columnFileId, SmiFileId flobFileId);
operator AttrArrayHeader() const;
};
/*
This class adds support to filter a ~TBlock~'s tuples without copying them to
a new ~TBlock~.
It does so by providing its own row numbers which are mapped to the
~TBlock~'s row numbers.
*/
class TBlockFilter
{
public:
/*
Creates a ~TBlockFilter~ for the passed ~block~ containing all
tuples.
Note: The ~TBlock~'s refcounter is not touched
*/
TBlockFilter(const TBlock &block) :
m_block(&block)
{
}
/*
Creates a ~TBlockFilter~ for the passed ~bloch~ containing all only the
tuples specified by the passed row numbers in ~filter~.
Note: The ~TBlock~'s refcounter is not touched
*/
TBlockFilter(const TBlock &block,
const SharedArray<const uint64_t> &filter) :
m_block(&block),
m_filter(filter)
{
}
/*
Returns the ~TBlocks~'s row number for this ~TBlockFilter~'s row number
~row~.
*/
uint64_t GetAt(uint64_t row) const
{
return m_filter.IsNull() ? row : m_filter[row];
}
uint64_t operator [] (uint64_t row) const
{
return m_filter.IsNull() ? row : m_filter[row];
}
/*
Returns number of row numbers contained in this ~TBlockFilter~.
*/
uint64_t GetRowCount() const;
/*
Determines if this filter contains / projects indices.
*/
bool HasFilter() const {
return !m_filter.IsNull();
}
/*
Returns a iterator over the ~TBlock~'s tuples taking this
~TBlockFilter~ into account
*/
FilteredTBlockIterator GetIterator() const;
/*
Only for range-loop support!
*/
FilteredTBlockIterator begin() const;
FilteredTBlockIterator end() const;
protected:
friend class TBlock;
const TBlock * const m_block;
const SharedArray<const uint64_t> m_filter;
};
/*
Implements a column-oriented tuple-block.
The main functionality provided is appending tuples, retrieving tuples
and persisting / restoring the block.
The tuple-block is split into columns which themselves are ~AttrArray~s.
Each of them is restored only when neccessary.
This class is reference counted because it's heavily used in stream operators.
Note: When accessing a ~TBlock~'s tuples you probably want to use the
~TBlockFilter~ provided by ~GetFilter~!
*/
class TBlock
{
public:
/*
Returns the size the metadata of a ~TBlock~ with the specified number of
columns would occupy.
~include~ header determines if ~TBlockHeader~ data should be included.
*/
static uint64_t GetSaveSize(uint64_t columnCount,
bool includeHeader = true);
/*
Creates a ~TBlock~ parameterized by a ~TBlockInfo~ and the provided column-
and flob-file id.
~columnFile~ provides a (potentialy shared) ~SmiRecordFile~ instance.
*/
TBlock(const PTBlockInfo &info, SmiFileId columnFileId,
SmiFileId flobFileId);
TBlock(const PTBlockInfo &info, SmiFileId columnFileId,
SmiFileId flobFileId, Shared<SmiRecordFile> columnFile);
/*
Restores a ~TBlock~ parameterized by a ~TBlockInfo~ from the provided
~source~.
~columnFile~ provides a (potentialy shared) ~SmiRecordFile~ instance.
Precondition: ~source~ must hold data saved by a ~TBlock~ with similar
~info~ and ~includeHeader~ == true.
*/
TBlock(const PTBlockInfo &info, Reader &source);
TBlock(const PTBlockInfo &info, Reader &source,
Shared<SmiRecordFile> columnFile);
/*
Restores a ~TBlock~ parameterized by a ~TBlockInfo~ from the provided
~source~ and ~header~.
~columnFile~ provides a (potentialy shared) ~SmiRecordFile~ instance.
Precondition: ~source~ must hold data saved by a ~TBlock~ with similar
~info~ and ~includeHeader~ == false.
*/
TBlock(const PTBlockInfo &info, const TBlockHeader &header, Reader &source);
TBlock(const PTBlockInfo &info, const TBlockHeader &header, Reader &source,
Shared<SmiRecordFile> columnFile);
/*
Creates the projection of a existing ~block~.
The indices of the columns to project on are provided by the array pointer
~columnIndices~. The number of columns in ~columnIndices~ is passed by
~columnCount~.
The returned ~TBlock~ is supposed to be read only.
God knows what might happen if one trys to actually modify or save it.
*/
TBlock(const TBlock &block, const uint64_t *columnIndices,
uint64_t columnCount);
/*
Creates the projection of a existing ~block~ and extends it with the passed
columns.
The indices of the columns to project on are provided by the array pointer
~projectionIndices~. The number of columns in ~projectionIndices~ is passed
by ~projectionIndexCount~. The columns for the extension are provided
through ~extensionColumns~ with the number of columns ~extensionColumnCount~
and the types ~extensionColumnTypes~.
The returned ~TBlock~ is supposed to be read only.
God knows what might happen if one trys to actually modify or save it.
*/
TBlock(const TBlock &block, const uint64_t *projectionIndices,
uint64_t projectionIndexCount, AttrArray **extensionColumns,
uint64_t extensionColumnCount, const ListExpr extensionColumnTypes);
/*
Creates the projection of a existing ~block~ and applies a filter to it.
The indices of the columns to project on are provided by the array pointer
~columnIndices~. The number of columns in ~columnIndices~ is passed by
~columnCount~. The rows to include are specified by ~filter~.
The returned ~TBlock~ is supposed to be read only.
God knows what might happen if one trys to actually modify or save it.
*/
TBlock(const TBlock &block, const uint64_t *columnIndices,
uint64_t columnCount, const SharedArray<const uint64_t> &filter);
/*
Creates filtered ~TBlock~ from the passed ~block~.
The rows to include are specified by ~filter~.
*/
TBlock(const TBlock &block, const SharedArray<const uint64_t> &filter);
virtual ~TBlock();
/*
Returns the ~TBlockInfo~ used by this ~TBlock~.
*/
const PTBlockInfo &GetInfo() const;
/*
Returns the applied ~TBlockFilter~.
*/
const TBlockFilter &GetFilter() const;
/*
Writes this ~TBlock~ into the specified ~target~ either with or without
~TBlockHeader~ data.
*/
void Save(Writer &target, bool includeHeader = true);
/*
Deletes persistent data created by this tuple-block.
*/
void DeleteRecords();
/*
Returns the number of columns of this ~TBlock~.
*/
uint64_t GetColumnCount() const;
/*
Returns the number of tuples in this ~TBlock~.
*/
uint64_t GetRowCount() const;
/*
Returns the size of this ~TBlock~ in bytes.
*/
uint64_t GetSize() const;
/*
Appends a tuple to this tuple-block.
Preconditions:
*~tuple~ must point to a array of ~AttrArrayEntry~
*~tuple~ must contain ~GetColumnCount()~ entries
*the attribute-array types of the entries must match the attribute-array
types of this tuple-block's columns
*/
void Append(const AttrArrayEntry* tuple);
/*
Appends a tuple to this tuple-block.
This function does not touch the ~Attribute~'s reference counters.
Preconditions:
*~tuple~ must point to a array of ~Attribute~ pointers
*~tuple~ must contain ~GetColumnCount()~ entries
*the attribute types of the entries must match the attribute types of this
tuple-block's columns
*/
void Append(Attribute** tuple);
/*
Appends a tuple to this tuple-block.
Preconditions:
*~tuple~ must represent a valid ~TBlockEntry~
*~tuple~'s ~TBlock~ must have a similar ~TBlockInfo~ to this tuple-block
*/
void Append(const TBlockEntry &tuple);
/*
Appends a tuple to this tuple-block.
This function does not touch the ~Tuple~'s reference counter.
Preconditions:
*~tuple~ must contain ~GetColumnCount()~ attributes
*the attribute types of the tuple's attributes must match the attribute
types of this tuple-block's columns
*/
void Append(const Tuple &tuple);
/*
Accesses the column with the specified ~index~
*/
AttrArray &GetAt(uint64_t index) const;
AttrArray &operator[](uint64_t index) const;
/*
Returns a ~TBlockIterator~ over this tuple-block's tuples.
*/
TBlockIterator GetIterator() const;
FilteredTBlockIterator GetFilteredIterator() const;
/*
~TBlockIterator~s used (only!) for range-loop support.
*/
TBlockIterator begin() const;
TBlockIterator end() const;
/*
Increases the reference counter by one.
*/
void IncRef() const;
/*
Decreases the reference counter by one.
If the reference counter reaches zero this object is deleted.
*/
void DecRef() const;
/*
Returns the reference count.
*/
uint64_t GetRefCount() const;
private:
class ColumnInfo
{
public:
uint64_t size;
SmiRecordId recordId;
};
TBlockHeader m_header;
PTBlockInfo m_info;
//The count of tuples in this block.
//Corresponds the count of attributes in each block.
const uint64_t m_columnCount;
ColumnInfo *m_columnInfos;
//The attribute-blocks.
AttrArray **m_columns;
mutable Shared<SmiRecordFile> m_columnFile;
TBlockFilter m_filter;
mutable uint64_t m_refCount;
TBlock(const TBlock&) = delete;
};
inline uint64_t TBlockFilter::GetRowCount() const
{
return m_filter.IsNull() ? m_block->GetRowCount() : m_filter.GetCapacity();
}
/*
Class used to configure a ~TBlock~'s columns.
It can be shared among multiple ~TBlock~s.
*/
class TBlockInfo
{
public:
uint64_t columnCount;
ListExpr *columnTypes,
*columnAttributeTypes;
AttrArrayManager **columnFactories;
TBlockInfo();
/*
Creates a ~TBlockInfo~ from a list of attribute-array types.
*/
TBlockInfo(ListExpr columnTypes);
~TBlockInfo();
};
/*
This class represents the tuple of a ~TBlock~ by a pointer to the block and
a row number.
If either the pointer doesn't point to a ~TBlock~ instance or the row
number is out of the block's range of rows, the ~TBlockEntry~ is considered
invalid.
This class doesn't change a ~TBlockEntry~'s reference count.
If the pointed to block is deleted this ~TBlockEntry~ becomes invalid.
Using a invalid ~TBlockEntry~ is considered undefined behaviour.
*/
class TBlockEntry
{
public:
TBlockEntry()
{
}
TBlockEntry(const TBlock *block, uint64_t row) :
m_block(block),
m_row(row)
{
}
uint64_t GetRow() const
{
return m_row;
}
const TBlock *GetBlock() const
{
return m_block;
}
const AttrArrayEntry operator[](uint64_t index) const
{
return AttrArrayEntry(&m_block->GetAt(index), m_row);
}
bool operator == (const TBlockEntry &other) const
{
return m_row == other.m_row && m_block == other.m_block;
}
bool operator != (const TBlockEntry &other) const
{
return !(*this == other);
}
private:
const TBlock *m_block;
uint64_t m_row;
friend class TBlockIterator;
friend class FilteredTBlockIterator;
};
/*
A iterator over a ~TBlock~s tuples.
Changes of the ~TBlock~ invalidate the iterator which is not reflected by
~TBlockIterator.IsValid~. Further usage is considered undefined behaviour.
The functions are defined in this header file to enable inlining.
*/
class TBlockIterator
{
public:
TBlockIterator() :
m_tuple(nullptr, 0),
m_rowCount(0)
{
}
TBlockIterator(const TBlock *block) :
m_tuple(block, 0),
m_rowCount(block->GetRowCount())
{
}
bool IsValid() const
{
return m_tuple.m_row < m_rowCount;
}
const TBlockEntry &Get() const
{
return m_tuple;
}
bool MoveToNext()
{
if (m_tuple.m_row < m_rowCount)
{
return ++m_tuple.m_row < m_rowCount;
}
return false;
}
const TBlockEntry &operator * () const
{
return Get();
}
TBlockIterator &operator ++ ()
{
MoveToNext();
return *this;
}
bool operator == (const TBlockIterator &other) const
{
if (IsValid())
{
if (other.IsValid())
{
return m_tuple == other.m_tuple;
}
return false;
}
return !other.IsValid();
}
bool operator != (const TBlockIterator &other) const
{
return !(*this == other);
}
private:
TBlockEntry m_tuple;
uint64_t m_rowCount;
};
class FilteredTBlockIterator
{
public:
FilteredTBlockIterator() :
m_filter(nullptr),
m_rowCount(0),
m_row(0),
m_tuple(nullptr, 0)
{
}
FilteredTBlockIterator(const TBlock *block) :
m_filter(block != nullptr ? &block->GetFilter() : nullptr),
m_rowCount(m_filter != nullptr ? m_filter->GetRowCount() : 0),
m_row(0),
m_tuple(block, m_rowCount > 0 ? m_filter->GetAt(0) : 0)
{
}
bool IsValid() const
{
return m_row < m_rowCount;
}
const TBlockEntry &Get() const
{
return m_tuple;
}
bool MoveToNext()
{
if (m_row < m_rowCount)
{
if (++m_row < m_rowCount)
{
m_tuple.m_row = m_filter->GetAt(m_row);
return true;
}
}
return false;
}
const TBlockEntry &operator * () const
{
return Get();
}
FilteredTBlockIterator &operator ++ ()
{
MoveToNext();
return *this;
}
bool operator == (const FilteredTBlockIterator &other) const
{
return !(*this != other);
}
bool operator != (const FilteredTBlockIterator &other) const
{
if (IsValid())
{
if (other.IsValid())
{
return m_tuple == other.m_tuple;
}
return true;
}
return other.IsValid();
}
private:
const TBlockFilter * m_filter;
uint64_t m_rowCount,
m_row;
TBlockEntry m_tuple;
};
inline FilteredTBlockIterator TBlockFilter::GetIterator() const
{
return FilteredTBlockIterator(m_block);
}
inline FilteredTBlockIterator TBlockFilter::begin() const
{
return FilteredTBlockIterator(m_block);
}
inline FilteredTBlockIterator TBlockFilter::end() const
{
return FilteredTBlockIterator();
}
inline FilteredTBlockIterator TBlock::GetFilteredIterator() const
{
return FilteredTBlockIterator(this);
}
}