/* ---- This file is part of SECONDO. Copyright (C) 2012, University in Hagen Faculty of Mathematic and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- */ #include "SecondoSMI.h" #include "LRU.h" #include #include #include "NestedList.h" #include "ListUtils.h" #include "StringUtils.h" #include #ifndef VTRIE_H #define VTRIE_H namespace vtrie{ static const unsigned int CHARS = 256; /* 1 Class VTrieNode This class represents a single node within a Vtrie. Besides the content of type I, it contains an array of 256 SmiRecordIds for referring the sons. */ template class VTrieNode{ public: /* ~Constructor~ This constructor creates a new Node without any successors. */ VTrieNode():content(0){ memset(links, 0, CHARS*sizeof(SmiRecordId)); } /* ~Constructor~ This constructor creates a Vtrie node from the record stored if file at position recID. */ VTrieNode(SmiRecordFile* file, const SmiRecordId& recID){ readFrom(file, recID); } /* ~readFrom~ The functions read this node from the specifified record. It overwrites the content of this node. If an error occurs, the return value of this function will be false. */ bool readFrom(SmiRecordFile* file, const SmiRecordId& id){ if(id==0){ return false; } SmiSize length; char* data = file->GetData(id, length, true); if(data==0){ return false; } readFrom((unsigned char*)data,length); free(data); return true; } /* ~writeToFile~ Writes the content of this node to a specified record. */ void writeToFile(SmiRecordFile* f, SmiRecordId rid){ SmiSize bufferLength; char* buffer = getBuffer(bufferLength); SmiSize written; f->Write(rid, buffer, bufferLength,0, written); assert(written == bufferLength); delete[] buffer; } /* ~appendToFile~ This function appends this node to a file and returns the record id where this node was stored. */ SmiRecordId appendToFile(SmiRecordFile* file){ SmiRecord record; SmiSize bufferLength; char* buffer = getBuffer(bufferLength); SmiRecordId id; file->AppendRecord(id, record); record.Write(buffer, bufferLength); delete[] buffer; return id; } /* ~copyFromTo~ This function copyies the subtree rooted by this node into the file ~dest~. */ SmiRecordId copyFromTo(SmiRecordFile* src, SmiRecordFile* dest){ VTrieNode copy; copy.content = content; for(unsigned int i=0; i sc(src,links[i]); copy.links[i] = copyFromTo(src,dest); } } return copy.appendToFile(dest); } /* ~print~ Prints a textual representation of this node to ~o~. */ std::ostream& print(std::ostream& o){ o << "[ " << content << ", " << " ( " ; bool first = true; for(unsigned int i=0;i. */ template class VTrieNodeCache{ public: /* 2.1 Contructor Creates a cache with size ~maxMem~ for file ~file~. */ VTrieNodeCache(size_t _maxMem, SmiRecordFile* _file): file(_file), lru(_maxMem / sizeof(VTrieNode)){ } /* ~Destructir~ Clears the cache and writes back all contained nodes. */ ~VTrieNodeCache(){ clear(); } /* ~getNode~ Returns the node specified by it's record id. */ VTrieNode* getNode(const SmiRecordId id ){ VTrieNode** n = lru.get(id); if(n!=0){ return *n; } VTrieNode* node = new VTrieNode(file,id); LRUEntry*>* victim = lru.use(id, node); if(victim!=0){ victim->value->writeToFile(file, victim->key); delete victim->value; delete victim; } return node; } /* ~clear~ Removes all entries form this cache and writes back the contained nodes. */ void clear(){ LRUEntry*>* victim; while( (victim = lru.deleteLast())!=0){ victim->value->writeToFile(file, victim->key); delete victim->value; delete victim; } } /* ~appendBlankNode~ Creates a new node, appends it to the underlying file and inserts this node into the cache. */ VTrieNode* appendBlankNode( SmiRecordId& id){ VTrieNode* node = new VTrieNode(); id = node->appendToFile(file); LRUEntry*>* victim = lru.use(id, node); if(victim!=0){ victim->value->writeToFile(file, victim->key); delete victim->value; delete victim; } return node; } private: SmiRecordFile* file; LRU*> lru; }; /* 3 Class StackEntry A helper class for the VTrieIterator. */ template class StackEntry{ public: StackEntry(const VTrieNode& _node, unsigned int _pos, const std::string& _str): node(_node),pos(_pos),str(_str){} VTrieNode node; unsigned int pos; std::string str; }; /* 4 Class VTrieIterator */ template class VTrieIterator{ public: /* 4.1 Constructor */ VTrieIterator(SmiRecordFile* _file, const SmiRecordId& rid, const std::string& _str): file(_file), st(){ if(rid!=0){ VTrieNode s(_file,rid); StackEntry* entry = new StackEntry(s,0,_str); st.push(entry); } } /* 4.2 Destructor Destroys the underlying data structure. */ ~VTrieIterator(){ while(!st.empty()){ StackEntry* victim = st.top(); st.pop(); delete victim; } } /* 4.3 ~next~ If there are more entries starting with the prefix specified in the constructor, this function will return true and set ~str~ to the complete word and set content to the content of the corresponding TrieNode. */ bool next(std::string& str, I& content){ while(!st.empty()){ StackEntry* top = st.top(); st.pop(); unsigned int pos = top->pos; while(posnode.getNext(pos)==0){ pos++; } if(pos >= CHARS){ if(top->node.getContent()!=0){ str = top->str; content = top->node.getContent(); delete top; return true; } else { delete top; } } else { SmiRecordId c = top->node.getNext(pos); VTrieNode s(file,c); std::stringstream ss; ss << top->str; ss << (char) pos; StackEntry* entry = new StackEntry(s,0,ss.str()); top->pos = pos+1; st.push(top); st.push(entry); } } return false; } private: SmiRecordFile* file; std::stack*> st; }; /* 5 Class VTrie This class represents a prefix tree. */ template class VTrie{ public: /* 5.1 Constructor */ VTrie(): file(false,0,false), rootId(0){ file.Create(); } /* 5.2 Copy Constructor. This constructor creates a flat copy for src. */ VTrie(const VTrie& src):file(src.file), rootId(src.rootId) { } /* 5.3 Constructor Creates a Vtrie with given root. */ VTrie(SmiFileId fid, SmiRecordId rid) : file(false,0,false), rootId(rid){ file.Open(fid); } /* 5.4 Destructor */ ~VTrie(){ if(file.IsOpen()){ file.Close(); } } /* 5.5 insert Inserts id at the node specified by s. If there is already an entry, this entry will be overwritten by id. */ void insert(const std::string& s, const T id){ if(id==0){ return; } VTrieNode node; SmiRecordId id2; getInsertNode(s,node,id2); node.setContent(id); node.writeToFile(&file, id2); } /* 5.6 deleteFile Removes ths underlying file from disk. */ void deleteFile(){ if(file.IsOpen()){ file.Close(); } file.Drop(); } /* 5.7 search Returns the content stored under the specified prefix. */ T search(const std::string& str){ SmiRecordId id = rootId; size_t pos = 0; while((id!=0) && (pos node(&file,id); id = node.getNext(str[pos]); pos++; } if(id==0){ return 0; } return VTrieNode(&file,id).getContent(); } /* 5.8 contains Checks whether str is a member of this Vtrie. If acceptPrefix is true, the return value of this function will also be true, if str is a prefix of a stored word. */ bool contains(const std::string& str, const bool acceptPrefix){ unsigned int pos=0; SmiRecordId id = rootId; VTrieNode son; while( (id!=0) && pos < str.length()){ son.readFrom(&file,id); id = son.getNext(str[pos]); pos++; } if(id==0){ return false; } son.readFrom(&file,id); return acceptPrefix || (son.getContent()!=0); } /* 5.9 clone Creates a depth copy of this VTrie. */ VTrie* clone(){ VTrie * res = new VTrie(); if(rootId!=0){ VTrieNode son(&file,rootId); res->rootId = son.copyFromTo( &file, &res->file); } return res; } /* 5.10 copyFrom Sets this Vtrie to be identically to src. */ void copyFrom( VTrie& src){ if(rootId!=0){ file.ReCreate(); rootId = 0; } if(src.rootId!=0){ VTrieNode srcSon(&src.file , src.rootId); rootId = srcSon.copyFromTo(&src.file, &file); } } /* 5.11 BasicType Returns Secondo's type description for this class. */ static std::string BasicType(){ return "Vtrie"; } /* 5.12 checkType Checks whether ~t~ corresponds the Secondo's type description of this class. */ static bool checkType(ListExpr t){ return listutils::isSymbol(t,BasicType()); } /* 5.13 getFileId Returns the file id of the underlying file. */ SmiFileId getFileId() { return file.GetFileId(); } /* 5.14 Returns the record id referring to the root of this Vtrie. */ SmiRecordId getRootId()const { return rootId; } /* 5.15 getEntries Returns an iterator iterating over all entries with ~prefix~ as prefix. The caller of this function is responsible to delete the returned instance. */ VTrieIterator * getEntries(const std::string& prefix){ unsigned int pos=0; SmiRecordId id = rootId; while(pos s(&file,id); id = s.getNext(prefix[pos]); pos++; } return new VTrieIterator(&file,id,prefix); } /* 5.16 getFileInfo Returns statistical information about the underlying file. */ void getFileInfo( SmiStatResultType& result){ result = file.GetFileStatistics(SMI_STATS_LAZY); result.push_back( std::pair("FilePurpose", "Secondary VTrie Index")); } protected: SmiRecordFile file; SmiRecordId rootId; /* 5.17 ~getInsertNode~ Returns the node corresponding to str. ~node~ and ~nodeId~ will be set to these values. If the node was not present before calling this function, the return value will be true. */ bool getInsertNode(const std::string& str, VTrieNode& node, SmiRecordId& nodeId){ SmiRecordId lastId = 0; SmiRecordId id = rootId; size_t pos = 0; VTrieNode son; while(id != 0 && pos < str.length()){ son.readFrom(&file,id); lastId = id; id = son.getNext(str[pos]); if(id!=0){ pos++; } } if(id!=0){ node.readFrom(&file,id); nodeId = id; return false; } // the string is not member of the Vtrie, // we extend the Vtrie id = lastId; if(rootId == 0 ){ rootId = son.appendToFile(&file); id = rootId; } VTrieNode newNode; SmiRecordId newId = 0; while(pos& node, SmiRecordId& nodeId, VTrieNodeCache* cache){ SmiRecordId lastId = 0; SmiRecordId id = rootId; size_t pos = 0; VTrieNode* son=0; while(id != 0 && pos < str.length()){ son = cache->getNode(id); lastId = id; id = son->getNext(str[pos]); if(id!=0){ pos++; } } if(id!=0){ node = *(cache->getNode(id)); nodeId = id; return false; } // the string is not member of the Vtrie, // we extend the Vtrie id = lastId; if(rootId == 0 ){ son = cache->appendBlankNode(rootId); id = rootId; } SmiRecordId newId = 0; while(pos* newNode = cache->appendBlankNode(newId); son->setNext(str[pos],newId); son = newNode; id = newId; pos++; } node = *son; nodeId = id; return true; } }; } // end of namespace Vtrie #endif