Files
secondo/Algebras/CRel/CRel.h
2026-01-23 17:03:45 +08:00

563 lines
12 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
----
This file is part of SECONDO.
Copyright (C) 2004-2009, University in Hagen, Faculty of Mathematics
and Computer Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
*/
#pragma once
#include "AttrArray.h"
#include "Attribute.h"
#include "LRUCache.h"
#include "ReadWrite.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include "SecondoSMI.h"
#include "Shared.h"
#include "SmiUtils.h"
#include <string>
#include "TBlock.h"
namespace CRelAlgebra
{
class CRelIterator;
class CRelBlockIterator;
/*
Implements a column-oriented relation.
The main functionality provided is appending tuples, retrieving tuple-blocks
and persisting / restoring the relation.
The whole relation (sequence of tuples) is split up into tuple-blocks.
This hapens on insertion.
Once a tuple-block excceeds a specified size (in bytes) a new tuple-block is
appended.
This class uses ~TBlock~s as tuple-block representation which implements the
column-oriented aspects can be further parameterized by a ~TBlockInfo~.
To potentialy improve random access of tuple-blocks a specified number of
blocks is cached.
*/
class CRel
{
public:
/*
Creates a ~CRel~ using ~TBlock~s specified by ~blockInfo~, the
~desiredBlockSize~ in bytes and ~cacheSize~ representing the number of
blocks to cache.
*/
CRel(const PTBlockInfo &blockInfo, uint64_t desiredBlockSize,
uint64_t cacheSize);
/*
Creates a ~CRel~ using ~TBlock~s specified by ~blockInfo~ and ~cacheSize~
representing the number of blocks to cache.
The ~CRel~ is restored from the data provided by ~source~.
Precondition: the data held by ~source~ was created by a ~CRel~ with similar
~blockInfo~
*/
CRel(const PTBlockInfo &blockInfo, uint64_t cacheSize, Reader &source);
/*
Destroys this ~CRel~ instance.
*/
~CRel();
/*
Returns the ~TBlockInfo~ used by the ~TBlock~s in this relation.
*/
const PTBlockInfo &GetBlockInfo() const;
/*
Persists this relation into the provided ~target~.
This actually only saves metadata.
The rest is stored in three files created by this relation.
*/
void Save(Writer &target);
/*
Removes all tupleblocks from this relation.
*/
void Clear();
/*
Deletes the files created by this relation.
*/
void DeleteFiles();
/*
Returns the number of columns of this ~CRel~.
*/
uint64_t GetColumnCount() const;
/*
Returns the number of tuples in this ~CRel~.
*/
uint64_t GetRowCount() const;
/*
Returns the number of tuple-blocks in this ~CRel~.
*/
uint64_t GetBlockCount() const;
/*
Returns the desired block size of this ~CRel~.
*/
uint64_t GetDesiredBlockSize() const;
/*
Returns the maximum number of tuple blocks cached by this ~CRel~.
*/
uint64_t GetCacheSize() const;
/*
Appends a tuple to this relation.
Preconditions:
*~tuple~ must point to a array of ~AttrArrayEntry~
*~tuple~ must contain ~GetColumnCount()~ entries
*the attribute-array types of the entries must match the attribute-array
types of this relation's columns
*/
void Append(const AttrArrayEntry* tuple);
/*
Appends a tuple to this relation.
This function does not touch the ~Attribute~'s reference counters.
Preconditions:
*~tuple~ must point to a array of ~Attribute~ pointers
*~tuple~ must contain ~GetColumnCount()~ entries
*the attribute types of the entries must match the attribute types of this
relation's columns
*/
void Append(Attribute** tuple);
/*
Appends a tuple to this relation.
Preconditions:
*~tuple~ must represent a valid ~TBlockEntry~
*~tuple~'s ~TBlock~ must have a similar ~TBlockInfo~ to this relation
*/
void Append(const TBlockEntry &tuple);
/*
Appends a tuple to this relation.
This function does not touch the ~Tuple~'s reference counter.
Preconditions:
*~tuple~ must contain ~GetColumnCount()~ attributes
*the attribute types of the tuple's attributes must match the attribute
types of this relation's columns
*/
void Append(const Tuple &tuple);
/*
Accesses a tuple block by it's ~index~.
*/
TBlock &GetBlock(uint64_t index) const;
/*
Returns a ~CRelIterator~ over this relation's tuples.
*/
CRelIterator GetIterator() const;
/*
Returns a ~CRelBlockIterator~ over this relation's tuple blocks.
*/
CRelBlockIterator GetBlockIterator() const;
/*
~CRelIterator~s used (only!) for range-loop support.
*/
CRelIterator begin() const;
CRelIterator end() const;
private:
/*
Class representing a entry of the tuple-block cache.
If a entry leaves the cache and was marked ~modified~ the ~block~ gets
saved.
*/
class BlockCacheEntry
{
public:
TBlock *block;
bool modified;
BlockCacheEntry();
};
/*
Used to save and restore part of a ~TBlockHeader~'s data in one read / write
operation.
*/
class BlockHeader
{
public:
uint64_t rowCount,
size;
BlockHeader();
BlockHeader(const TBlock &block);
TBlockHeader ToHeader(SmiFileId columnFileId, SmiFileId flobFileId);
};
/*
Represents a block of ~TBlock~ metadata.
Saving ~TBlock~ metadata in blocks saves read / write operations.
We allways keep one ~BlockRecord~ in memory so ~sizeLimit~ should be chosen
keeping that in mind.
*/
class BlockRecord
{
public:
static const uint64_t sizeLimit = 1024 * 1024;
uint64_t index;
char *data;
bool modified;
BlockRecord();
BlockRecord(const BlockRecord &instance) = delete;
~BlockRecord();
};
PTBlockInfo m_blockInfo;
const uint64_t m_desiredBlockSize;
uint64_t m_rowCount,
m_blockCount,
m_nextBlockIndex;
mutable uint64_t m_recordCount;
const uint64_t m_blockRecordEntrySize,
m_blockRecordEntryCount,
m_blockRecordSize;
mutable BlockRecord m_blockRecord;
SmiFileId m_blockFileId,
m_columnFileId;
mutable SmiRecordFile m_blockFile;
Shared<SmiRecordFile> m_columnFile;
mutable LRUCache<BlockCacheEntry> m_blockCache;
/*
Deleted copy-constructor to prevent pointer copies
*/
CRel(const CRel&) = delete;
/*
This internal append function accepts all ~T~ which are valid in
~TBlock.Append(T)~. This saves us some redundant code.
*/
template<class T>
void TAppend(T tuple);
/*
Saves a ~TBlock~ with the provided ~index~.
*/
void SaveBlock(uint64_t index, TBlock &block) const;
/*
Restores a ~TBlock~ with the provided ~index~.
*/
TBlock *LoadBlock(uint64_t index) const;
/*
Gets the ~BlockRecord~ containing the metadata for the requested
~blockIndex~.
*/
BlockRecord &GetBlockRecord(uint64_t blockIndex) const;
};
/*
A iterator over a ~CRel~s tuple-blocks.
Changes of the ~CRel~ invalidate the iterator which is not reflected by
~CRelBlockIterator.IsValid~. Further usage is considered undefined behaviour.
The functions are defined in this header file to enable inlining.
*/
class CRelBlockIterator
{
public:
/*
Creates a invalid iterator.
*/
CRelBlockIterator() :
m_relation(nullptr),
m_block(nullptr),
m_index(0)
{
}
/*
Creates a iterator pointing at the first block in the passed ~relation~.
If the ~relation~ is empty the iterator is invalid.
*/
CRelBlockIterator(const CRel *relation) :
m_relation(relation),
m_block(relation->GetBlockCount() > 0 ? &relation->GetBlock(0) : nullptr),
m_index(0)
{
if (m_block != nullptr)
{
m_block->IncRef();
}
}
~CRelBlockIterator()
{
if (m_block != nullptr)
{
m_block->DecRef();
}
}
/*
Determines if the iterator's current position is valid.
*/
bool IsValid()
{
return m_block != nullptr;
}
/*
Returns the ~TBlock~ at the iterator's current position.
Precondition: ~IsValid()~
*/
TBlock &Get()
{
return *m_block;
}
/*
Moves the iterator to the next position.
Returns true if that position is still valid.
Precondition: ~IsValid()~
*/
bool MoveToNext()
{
if (m_block != nullptr)
{
m_block->DecRef();
if (++m_index < m_relation->GetBlockCount())
{
m_block = &m_relation->GetBlock(m_index);
m_block->IncRef();
return true;
}
m_block = nullptr;
}
return false;
}
private:
const CRel *m_relation;
TBlock *m_block;
uint64_t m_index;
};
/*
A iterator over a ~CRel~s tuples.
Changes of the ~CRel~ invalidate the iterator which is not reflected by
~CRelIterator.IsValid~. Further usage is considered undefined behaviour.
The functions are defined in this header file to enable inlining.
*/
class CRelIterator
{
public:
/*
Creates a invalid iterator.
*/
CRelIterator()
{
}
/*
Creates a iterator pointing at the first tuple in the passed ~relation~.
If the ~relation~ is empty the iterator is invalid.
*/
CRelIterator(const CRel *relation) :
m_blockIterator(relation),
m_tupleIterator(m_blockIterator.IsValid() ?
TBlockIterator(&m_blockIterator.Get()) :
TBlockIterator())
{
}
/*
Determines if the iterator's current position is valid.
*/
bool IsValid()
{
return m_tupleIterator.IsValid();
}
/*
Returns a ~TBlockEntry~ representing the tuple at the iterator's current
position.
Precondition: ~IsValid()~
*/
const TBlockEntry &Get()
{
return m_tupleIterator.Get();
}
const TBlockEntry &operator * () const
{
return m_tupleIterator.Get();
}
/*
Moves the iterator to the next position.
~MoveToNext~ returns true if that position is still valid.
Precondition: ~IsValid()~
*/
bool MoveToNext()
{
if (m_tupleIterator.IsValid())
{
if (!m_tupleIterator.MoveToNext())
{
do
{
if (!m_blockIterator.MoveToNext())
{
return false;
}
else
{
m_tupleIterator = m_blockIterator.Get().GetIterator();
}
}
while (!m_tupleIterator.IsValid());
}
return true;
}
return false;
}
CRelIterator &operator ++ ()
{
MoveToNext();
return *this;
}
/*
Compares this iterator and the ~other~ iterator for equality.
*/
bool operator == (const CRelIterator &other) const
{
return m_tupleIterator == other.m_tupleIterator;
}
/*
Compares this iterator and the ~other~ iterator for inequality.
*/
bool operator != (const CRelIterator &other) const
{
return m_tupleIterator != other.m_tupleIterator;
}
private:
CRelBlockIterator m_blockIterator;
TBlockIterator m_tupleIterator;
};
}