462 lines
12 KiB
C++
462 lines
12 KiB
C++
|
|
/*
|
||
|
|
----
|
||
|
|
This file is part of SECONDO.
|
||
|
|
|
||
|
|
Copyright (C) 2004-2008, University in Hagen, Faculty of Mathematics and
|
||
|
|
Computer Science, Database Systems for New Applications.
|
||
|
|
|
||
|
|
SECONDO is free software; you can redistribute it and/or modify
|
||
|
|
it under the terms of the GNU General Public License as published by
|
||
|
|
the Free Software Foundation; either version 2 of the License, or
|
||
|
|
(at your option) any later version.
|
||
|
|
|
||
|
|
SECONDO is distributed in the hope that it will be useful,
|
||
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
|
GNU General Public License for more details.
|
||
|
|
|
||
|
|
You should have received a copy of the GNU General Public License
|
||
|
|
along with SECONDO; if not, write to the Free Software
|
||
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||
|
|
----
|
||
|
|
|
||
|
|
//paragraph [1] Title: [{\Large \bf] [}]
|
||
|
|
//paragraph [10] Footnote: [{\footnote{] [}}]
|
||
|
|
//[TOC] [\tableofcontents]
|
||
|
|
|
||
|
|
[1] Implementation of Page Hash Table in UrelAlgebra
|
||
|
|
|
||
|
|
December 2009 Jiamin Lu
|
||
|
|
|
||
|
|
1 Includes
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include "PageHash.h"
|
||
|
|
|
||
|
|
|
||
|
|
using namespace std;
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
2 Implementation of PageHashTable
|
||
|
|
|
||
|
|
2.1 Constructor of PageHashTable
|
||
|
|
|
||
|
|
If the memory pool file is created for the first time,
|
||
|
|
allocate continuous ~bucketsNum~ pages in the memory pool file
|
||
|
|
for holding the hash table, and record the first page number.
|
||
|
|
|
||
|
|
*/
|
||
|
|
PageHashTable::PageHashTable(SmiUpdateFile* ufp,
|
||
|
|
int _bucketPageNum, int startPageNo) :
|
||
|
|
upFile(ufp), bucketsNum(_bucketPageNum),
|
||
|
|
firstPageNo(startPageNo)
|
||
|
|
{
|
||
|
|
|
||
|
|
if (!startPageNo)
|
||
|
|
{
|
||
|
|
for (int i = 0; i < bucketsNum; i++)
|
||
|
|
{
|
||
|
|
SmiUpdatePage *nPage;
|
||
|
|
if (upFile->AppendNewPage(nPage))
|
||
|
|
{
|
||
|
|
if (i == 0)
|
||
|
|
firstPageNo = nPage->GetPageNo();
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
cerr << "Update Relation: Append new page for"
|
||
|
|
<< " hash table in update file failed." << endl;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
PageHashTable::PageHashTable(PageHashTable& rhs):
|
||
|
|
upFile(rhs.upFile), bucketsNum(rhs.bucketsNum),
|
||
|
|
firstPageNo(rhs.firstPageNo)
|
||
|
|
{}
|
||
|
|
|
||
|
|
PageHashTable::~PageHashTable()
|
||
|
|
{
|
||
|
|
upFile = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
2.3 The FindEntry method of PageHashTable
|
||
|
|
|
||
|
|
Find the entry of the object with ~oid~ in the hash table.
|
||
|
|
If the entry doesn't exist when inserting the first unit of an object,
|
||
|
|
as well as there is still have space, then insert a new entry.
|
||
|
|
|
||
|
|
*/
|
||
|
|
bool PageHashTable::FindEntry(const Oid_t _oID,
|
||
|
|
PageHashEntry& objEntry, bool wantAdd)
|
||
|
|
{
|
||
|
|
//assert(upFile != 0);
|
||
|
|
|
||
|
|
try
|
||
|
|
{
|
||
|
|
const string errMsgPrefix("PageHashTable::FindEntry: ");
|
||
|
|
|
||
|
|
Oid_t oID = _oID;
|
||
|
|
PageHashBucket *bucket;
|
||
|
|
int firstBucketNo = oID % bucketsNum;
|
||
|
|
//Get the first possible bucket number
|
||
|
|
int bucketNo = firstBucketNo;
|
||
|
|
bool found = false;
|
||
|
|
|
||
|
|
int bCounter = 1;
|
||
|
|
PageNo_t bucketPageNo;
|
||
|
|
bool isBucketFull;
|
||
|
|
while (1)
|
||
|
|
{
|
||
|
|
bucketPageNo = firstPageNo + bucketNo;
|
||
|
|
SmiUpdatePage *bPage;
|
||
|
|
if (!upFile->GetPage(bucketPageNo, bPage))
|
||
|
|
throw SecondoException(errMsgPrefix
|
||
|
|
+ "Can't get bucket page" +int2Str(bucketPageNo)
|
||
|
|
+ " in update file. Line " + int2Str(__LINE__));
|
||
|
|
|
||
|
|
bucket = new PageHashBucket(bPage);
|
||
|
|
found = bucket->FindEntry(oID, objEntry);
|
||
|
|
isBucketFull = bucket->disk;
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
There are three results after finding an entry in this bucket:
|
||
|
|
|
||
|
|
1 If found, then the entry has already copied into ~objEntry~.
|
||
|
|
|
||
|
|
2 If not found, but this bucket is not full filled, then add a new entry
|
||
|
|
to this bucket with ascendant order on OID.
|
||
|
|
|
||
|
|
3 If not found, and this bucket is full filled, then keep looking in
|
||
|
|
the next bucket.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
if ((!found) && wantAdd && (!isBucketFull))
|
||
|
|
{
|
||
|
|
//Append a new page, and add a new entry
|
||
|
|
SmiUpdatePage *newPage;
|
||
|
|
if (!upFile->AppendNewPage(newPage))
|
||
|
|
throw SecondoException(errMsgPrefix
|
||
|
|
+ "Can't append a new page in update file. Line "
|
||
|
|
+ int2Str(__LINE__));
|
||
|
|
|
||
|
|
objEntry.oID = _oID;
|
||
|
|
objEntry.pageNo = newPage->GetPageNo();
|
||
|
|
objEntry.pageAddr = newPage->GetPageAddr();
|
||
|
|
|
||
|
|
if (!bucket->AddEntry(oID, objEntry))
|
||
|
|
throw SecondoException(errMsgPrefix
|
||
|
|
+ "Can't add new entry to the bucket. Line "
|
||
|
|
+ int2Str(__LINE__));
|
||
|
|
found = true;
|
||
|
|
}
|
||
|
|
delete bucket;
|
||
|
|
|
||
|
|
if (found)
|
||
|
|
break;
|
||
|
|
|
||
|
|
//Get the next bucketNo
|
||
|
|
int newBucketNo = firstBucketNo + bCounter;
|
||
|
|
bucketNo = newBucketNo < bucketsNum ? newBucketNo :
|
||
|
|
abs(newBucketNo - bucketsNum);
|
||
|
|
|
||
|
|
//there is no other possible buckets
|
||
|
|
if (++bCounter > bucketsNum)
|
||
|
|
{
|
||
|
|
cerr << "Through all buckets, still can't find "
|
||
|
|
"the entry in the hash table" << endl;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return found;
|
||
|
|
}
|
||
|
|
catch (SecondoException& e)
|
||
|
|
{
|
||
|
|
cerr << e.msg() << endl;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
2.4 The ChangeEntry method of PageHashTable
|
||
|
|
|
||
|
|
Change an exist entry in the hash table.
|
||
|
|
|
||
|
|
*/
|
||
|
|
|
||
|
|
bool PageHashTable::ChangeEntry(const Oid_t _oID,
|
||
|
|
SmiUpdatePage* _newPage)
|
||
|
|
{
|
||
|
|
//assert(upFile != 0);
|
||
|
|
|
||
|
|
try
|
||
|
|
{
|
||
|
|
const string errMsgPrefix("PageHashTable::ChangeEntry: ");
|
||
|
|
|
||
|
|
PageHashBucket* bucket;
|
||
|
|
int firstBucketNo = _oID % bucketsNum;
|
||
|
|
PageNo_t bucketNo = firstBucketNo;
|
||
|
|
bool found = false;
|
||
|
|
PageNo_t bucketPageNo;
|
||
|
|
PageHashEntry tgtEntry;
|
||
|
|
bool isBucketFull;
|
||
|
|
|
||
|
|
int bCounter = 1;
|
||
|
|
while (bCounter <= bucketsNum)
|
||
|
|
{
|
||
|
|
bucketPageNo = firstPageNo + bucketNo;
|
||
|
|
SmiUpdatePage *bPage;
|
||
|
|
if (!upFile->GetPage(bucketPageNo, bPage))
|
||
|
|
throw SecondoException(errMsgPrefix
|
||
|
|
+ "Can't get hash page in update file. Line "
|
||
|
|
+ int2Str(__LINE__));
|
||
|
|
|
||
|
|
bucket = new PageHashBucket(bPage);
|
||
|
|
isBucketFull = bucket->disk;
|
||
|
|
found = bucket->FindEntry(_oID, tgtEntry);
|
||
|
|
|
||
|
|
if (found)
|
||
|
|
{
|
||
|
|
//if found, replace the old entry
|
||
|
|
PageHashEntry newEntry;
|
||
|
|
newEntry.oID = _oID;
|
||
|
|
newEntry.pageNo = _newPage->GetPageNo();
|
||
|
|
newEntry.pageAddr = _newPage->GetPageAddr();
|
||
|
|
bucket->ChangeEntry(_oID, newEntry);
|
||
|
|
}
|
||
|
|
|
||
|
|
delete bucket;
|
||
|
|
|
||
|
|
if (found)
|
||
|
|
break;
|
||
|
|
else if (!isBucketFull)
|
||
|
|
{
|
||
|
|
//Can't find an entry in an unfilled page
|
||
|
|
cerr << "Can't find page entry with oID: "
|
||
|
|
<< _oID << endl;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
//search the next bucket
|
||
|
|
int newBucketNo = firstBucketNo + bCounter;
|
||
|
|
bucketNo = newBucketNo < bucketsNum ?
|
||
|
|
newBucketNo : abs(newBucketNo - bucketsNum);
|
||
|
|
bCounter++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return found;
|
||
|
|
}
|
||
|
|
catch (SecondoException& e)
|
||
|
|
{
|
||
|
|
cerr << e.msg() << endl;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
int PageHashTable::GetBucketPagesNum()
|
||
|
|
{
|
||
|
|
return bucketsNum;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
3 Implementation of PageHashBucket
|
||
|
|
|
||
|
|
3.1 The constructor of the PageHashBucket
|
||
|
|
|
||
|
|
Initialize the bucket by reading the head parameter in this page.
|
||
|
|
|
||
|
|
*/
|
||
|
|
PageHashBucket::PageHashBucket(SmiUpdatePage* page)
|
||
|
|
: disk(false), extEntryNum(0),
|
||
|
|
pagePt(page), isInitialized(false)
|
||
|
|
{
|
||
|
|
// assert(pagePt->isAvailable());
|
||
|
|
|
||
|
|
SmiSize offset = 0;
|
||
|
|
pagePt->Read(&disk, sizeof(bool), offset);
|
||
|
|
offset += sizeof(bool);
|
||
|
|
pagePt->Read(&extEntryNum, sizeof(int), offset);
|
||
|
|
offset += sizeof(int);
|
||
|
|
headOffset = offset;
|
||
|
|
|
||
|
|
maxBucketsNum = (pagePt->GetPageSize() -
|
||
|
|
headOffset) / sizeof(PageHashEntry);
|
||
|
|
isInitialized = true;
|
||
|
|
}
|
||
|
|
|
||
|
|
PageHashBucket::~PageHashBucket()
|
||
|
|
{
|
||
|
|
pagePt = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
3.2 AddEntry method of PageHashBucket
|
||
|
|
|
||
|
|
Insert a newEntry to the array on this bucket by ascending order.
|
||
|
|
After adding, update the ~extEntryNum~,
|
||
|
|
and if the bucket is full, mark the ~disk~ as true,
|
||
|
|
so there will be no more entries want to be added into this bucket.
|
||
|
|
|
||
|
|
*/
|
||
|
|
bool PageHashBucket::AddEntry(const Oid_t _oID, PageHashEntry& newEntry)
|
||
|
|
{
|
||
|
|
// assert(pagePt->isAvailable());
|
||
|
|
|
||
|
|
if (isInitialized)
|
||
|
|
{
|
||
|
|
int eCounter = extEntryNum;
|
||
|
|
SmiSize entrySize = sizeof(PageHashEntry);
|
||
|
|
|
||
|
|
//Move all entries have larger ~oID~ to the back of the array
|
||
|
|
size_t offset = headOffset + extEntryNum * entrySize;
|
||
|
|
while (eCounter-- > 0)
|
||
|
|
{
|
||
|
|
PageHashEntry lmEntry;//The last maximum entry
|
||
|
|
pagePt->Read(&lmEntry, entrySize, offset - entrySize);
|
||
|
|
if (lmEntry.oID > _oID)
|
||
|
|
pagePt->Write(&lmEntry, entrySize, offset);
|
||
|
|
else
|
||
|
|
break;
|
||
|
|
offset -= entrySize;
|
||
|
|
}
|
||
|
|
|
||
|
|
//Put the new Entry into the suitable place
|
||
|
|
pagePt->Write(&newEntry, entrySize, offset);
|
||
|
|
|
||
|
|
//Update the ~exist entry number~, if this bucket is full,
|
||
|
|
//mark it with the ~disk~
|
||
|
|
extEntryNum++;
|
||
|
|
pagePt->Write(&extEntryNum, sizeof(int), sizeof(bool));
|
||
|
|
if (extEntryNum == maxBucketsNum)
|
||
|
|
{
|
||
|
|
disk = true;
|
||
|
|
pagePt->Write(&disk, sizeof(bool));
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
cerr << "The Bucket is not initialized for adding entry yet !" << endl;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
3.3 The FindEntry method of PageHashBucket
|
||
|
|
|
||
|
|
Use binary search to find the entry to the data page in this bucket,
|
||
|
|
if the entry is found, its value will be returned by ~objEntry~.
|
||
|
|
|
||
|
|
*/
|
||
|
|
bool PageHashBucket::FindEntry(const Oid_t _oID, PageHashEntry &objEntry)
|
||
|
|
{
|
||
|
|
// assert(pagePt->isAvailable());
|
||
|
|
|
||
|
|
if (isInitialized)
|
||
|
|
{
|
||
|
|
if (extEntryNum == 0)
|
||
|
|
return false;
|
||
|
|
else
|
||
|
|
{
|
||
|
|
SmiSize stOffset = headOffset;
|
||
|
|
SmiSize endOffset = headOffset
|
||
|
|
+ (extEntryNum - 1) * sizeof(PageHashEntry);
|
||
|
|
SmiSize tgtOffset = binSearchEntryAddr(_oID, stOffset, endOffset);
|
||
|
|
if (tgtOffset)
|
||
|
|
{
|
||
|
|
pagePt->Read(&objEntry, sizeof(PageHashEntry), tgtOffset);
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
cerr << "The Bucket is not initialized for "
|
||
|
|
"searching entry yet !" << endl;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
|
||
|
|
3.4 The ChangeEntry method of PageHashBucket
|
||
|
|
|
||
|
|
Change an exist entry in this bucket, if the entry doesn't exist, return false.
|
||
|
|
|
||
|
|
*/
|
||
|
|
bool PageHashBucket::ChangeEntry(const Oid_t _oID, PageHashEntry newEntry)
|
||
|
|
{
|
||
|
|
// assert(pagePt->isAvailable());
|
||
|
|
if (isInitialized)
|
||
|
|
{
|
||
|
|
if (extEntryNum == 0)
|
||
|
|
return false;
|
||
|
|
else
|
||
|
|
{
|
||
|
|
SmiSize stOffset = headOffset;
|
||
|
|
SmiSize endOffset = headOffset
|
||
|
|
+ (extEntryNum -1) * sizeof(PageHashEntry);
|
||
|
|
SmiSize tgtOffset = binSearchEntryAddr(_oID, stOffset, endOffset);
|
||
|
|
if (tgtOffset)
|
||
|
|
{
|
||
|
|
//Found, and change the entry
|
||
|
|
pagePt->Write(&newEntry, sizeof(PageHashEntry), tgtOffset);
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
cerr << "Can't find the entry in this bucket." << endl;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
cerr << "The Bucket is not initialized "
|
||
|
|
"for searching entry yet !" << endl;
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
SmiSize PageHashBucket::binSearchEntryAddr(const Oid_t _oID,
|
||
|
|
SmiSize _LPt, SmiSize _HPt)
|
||
|
|
{
|
||
|
|
// assert(pagePt->isAvailable());
|
||
|
|
|
||
|
|
if (_LPt > _HPt)
|
||
|
|
return 0;
|
||
|
|
else
|
||
|
|
{
|
||
|
|
SmiSize entrySize = sizeof(PageHashEntry);
|
||
|
|
int EntryNum = (_HPt - _LPt) / entrySize + 1;
|
||
|
|
int mid = EntryNum / 2;
|
||
|
|
Oid_t midID;
|
||
|
|
pagePt->Read(&midID, sizeof(Oid_t), _LPt + mid * entrySize);
|
||
|
|
if (_oID == midID)
|
||
|
|
return (_LPt + mid * entrySize);
|
||
|
|
else if (mid == 0)
|
||
|
|
return 0;
|
||
|
|
else if (_oID > midID)
|
||
|
|
return binSearchEntryAddr(_oID,
|
||
|
|
_LPt + (mid + 1) * entrySize, _HPt);
|
||
|
|
else
|
||
|
|
return binSearchEntryAddr(_oID, _LPt, _LPt + (mid - 1) * entrySize);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|