/* ---- This file is part of SECONDO. Copyright (C) 2007, Faculty of Mathematics and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- 1 class RegExPattern2 This class is the implementation of a finite automaton. In contrast to RegExPattern, this class is a pure main memory implementation which is faster than RegExPattern but cannot be use as an attribute data type */ #ifndef REGEXPATTERN2_H #define REGEXPATTERN2_H #define NUMCHARS 256 #include "IntNfa.h" #include "NestedList.h" #include "ListUtils.h" #include "SecondoSMI.h" int parseRegEx(const char* argument, IntNfa** T); class RegExPattern2{ public: /* 1.1 Constructors */ RegExPattern2() {} RegExPattern2(bool _defined): defined(_defined), numOfStates(0), transitions(), finalStates(), src(""), srcDefined(false) {} RegExPattern2(const RegExPattern2& p): defined(p.defined), numOfStates(p.numOfStates), transitions(p.transitions), finalStates(p.finalStates), src(p.src), srcDefined(p.srcDefined){ } bool hasSource() const{ return srcDefined; } std::string getSource() const{ return src; } void setSource(const std::string& src){ this->src = src; srcDefined = true; } RegExPattern2& operator=(const RegExPattern2& p){ defined = p.defined; numOfStates = p.numOfStates; transitions = p.transitions; finalStates = p.finalStates; src = p.src; srcDefined = p.srcDefined; return *this; } /* 1.3 Destructor */ ~RegExPattern2() {} /* 1.4 Clone function */ RegExPattern2* Clone() const{ return new RegExPattern2(*this); } /* 1.5 SetDefined Sets the defined flag and possible (in false case) clears the contained vectors. */ void SetDefined(const bool def){ defined = def; if(!def){ numOfStates = 0; transitions.clear(); finalStates.clear(); src = ""; srcDefined = false; } } void SetDefinedKeepSource(const bool def){ defined = def; if(!def){ numOfStates = 0; transitions.clear(); finalStates.clear(); } } /* 1.6 IsDefined Checks whether this automaton is a defined one. */ inline bool IsDefined() const{ return defined; } /* 1.7 constructFrom Creates a dfa from a regular expression. */ bool constructFrom(const std::string& regex){ IntNfa* nfa; setSource(regex); if(parseRegEx(regex.c_str(),&nfa)!=0){ SetDefinedKeepSource(false); return false; } nfa->nfa.makeDeterministic(); nfa->nfa.minimize(); nfa->nfa.bringStartStateToTop(); transitions.clear(); finalStates.clear(); numOfStates = nfa->nfa.numOfStates(); for(int i=0;infa.isFinalState(i)); finalStates.push_back(isFinal); } for(int i=0;i s = nfa->nfa.getState(i); for(int c=0;c t = s.getTransitions(c); if(t.empty()){ transitions.push_back(-1); } else { assert(t.size()==1); transitions.push_back(*t.begin()); } } } delete nfa; SetDefined(true); return true; } /* 1.8 matches Checks whether the given string is a word of the language represented by this dfa. */ bool matches(const std::string& text){ if(!IsDefined()){ return false; } if((finalStates.size()==0) || (numOfStates==0)){ return false; } int state = 0; const char* tc = text.c_str(); for(size_t i=0;i0) || allowEmpty){ return true; } } state = transitions[state*NUMCHARS+(unsigned char)text[i]]; length++; if(state<0){ if(findMax){ if((lastFinal>0) || ((lastFinal>=0) && allowEmpty)){ length = lastFinal; return true; } else { return false; } } else { return false; } } } // end of text reached isFinal = finalStates[state]; if(findMax){ if(isFinal){ return allowEmpty || (length>0); } else if((lastFinal>0) || ((lastFinal>=0)&&allowEmpty)){ length = lastFinal; return true; } else { return false; } } else { return isFinal && ((length>0) || allowEmpty); } } /* 1.10 readFrom Reads this automaton from a nested list representation. */ bool ReadFrom(const ListExpr value, const ListExpr typeInfo){ if(listutils::isSymbolUndefined(value)){ SetDefined(false); return true; } if(nl->AtomType(value)==StringType){ return constructFrom(nl->StringValue(value)); } if(nl->AtomType(value)==TextType){ return constructFrom(nl->Text2String(value)); } transitions.clear(); finalStates.clear(); SetDefined(true); if(!nl->HasLength(value,3) && !nl->HasLength(value,4)){ SetDefined(false); return false; } if(nl->HasLength(value,4)){ ListExpr sourceList = nl->Fourth(value); if( (nl->AtomType(sourceList)==StringType) || (nl->AtomType(sourceList)==TextType)){ setSource(listutils::stringValue(sourceList)); } else { SetDefined(false); return false; } } if(nl->AtomType(nl->First(value))!=IntType){ SetDefined(false); return false; } int ns = nl->IntValue(nl->First(value)); numOfStates=ns; for(int i=0;iSecond(value); while(!nl->IsEmpty(T)){ ListExpr t = nl->First(T); T = nl->Rest(T); if(!nl->HasLength(t,3)){ SetDefined(false); return false; } if((nl->AtomType(nl->First(t))!=IntType) || (nl->AtomType(nl->Second(t))!=IntType) || (nl->AtomType(nl->Third(t))!=IntType)){ SetDefined(false); return false; } int source = nl->IntValue(nl->First(t)); int arg = nl->IntValue(nl->Second(t)); int target = nl->IntValue(nl->Third(t)); if(arg<0 || arg >= NUMCHARS){ SetDefined(false); return false; } if(source <0 || source >= numOfStates || target<0 || target >=NUMCHARS){ SetDefined(false); return false; } transitions[source*NUMCHARS+arg] = target; } ListExpr F = nl->Third(value); while(!nl->IsEmpty(F)){ ListExpr f = nl->First(F); F = nl->Rest(F); if(nl->AtomType(f)!=IntType){ SetDefined(false); return false; } int fi = nl->IntValue(f); if(fi<0 || fi>=ns){ SetDefined(false); return false; } finalStates[fi] = true; } return true; } /* 1.11 ToListExpr Converts this dfa into it's nested list representation. */ ListExpr ToListExpr(const ListExpr typeInfo){ if(!IsDefined()){ return listutils::getUndefined(); } ListExpr ns = nl->IntAtom(numOfStates); ListExpr tr = nl->TheEmptyList(); ListExpr last = nl->TheEmptyList(); bool first = true; for(unsigned int i=0;i=0){ ListExpr trans = nl->ThreeElemList(nl->IntAtom(i/NUMCHARS), nl->IntAtom(i%NUMCHARS), nl->IntAtom(t)); if(first){ tr = nl->OneElemList(trans); last = tr; first = false; } else { last = nl->Append(last,trans); } } } ListExpr fin = nl->TheEmptyList(); first = true; for(unsigned int i=0;iOneElemList(nl->IntAtom(i)); last = fin; first = false; } else { last = nl->Append(last,nl->IntAtom(i)); } } } if(!srcDefined){ return nl->ThreeElemList(ns,tr,fin); } else { return nl->FourElemList(ns,tr,fin, nl->TextAtom(src)); } } /* Secondo specific operators */ static const std::string BasicType(){ return "regex2"; } static bool checkType(ListExpr type){ return listutils::isSymbol(type, BasicType()); } static ListExpr Property(){ return (nl->TwoElemList( nl->FourElemList(nl->StringAtom("Signature"), nl->StringAtom("Example Type List"), nl->StringAtom("List Rep"), nl->StringAtom("Example List")), nl->FourElemList(nl->StringAtom("-> Simple"), nl->StringAtom(BasicType()), nl->StringAtom("(numStates (transitions)" " (finalstates)"), nl->StringAtom("(2 ( (0 97 1)) (1)) ")))); } static bool Check(const ListExpr type, ListExpr& errorInfo){ return nl->IsEqual(type,BasicType()); } static ListExpr Out(const ListExpr typeInfo, const Word value){ RegExPattern2* p = (RegExPattern2*) value.addr; return p->ToListExpr(typeInfo); } static Word In(const ListExpr typeInfo, const ListExpr value, const int errorPos, ListExpr& errorInfo, bool& correct){ RegExPattern2* p = new RegExPattern2(false); correct = p->ReadFrom(value,typeInfo); Word res; if(!correct){ delete p; res.addr = 0; } else { res.addr = p; } return res; } static Word Create(const ListExpr typeInfo){ Word res; res.addr = new RegExPattern2(false); return res; } static void Delete(const ListExpr typeInfo, Word& value){ delete (RegExPattern2*) value.addr; value.addr = 0; } static void Close(const ListExpr typeInfo, Word& value){ delete (RegExPattern2*) value.addr; value.addr = 0; } static Word Clone(const ListExpr typeInfo,const Word& w){ Word res; res.addr = ((RegExPattern2*)w.addr)->Clone(); return res; } static int SizeOf(){ return sizeof(RegExPattern2); } static void* Cast(void* w){ return new(w) RegExPattern2(); } static bool Open(SmiRecord& valueRecord, size_t& offset, const ListExpr typeInfo, Word& value ) { RegExPattern2* p = new RegExPattern2(true); valueRecord.Read(&(p->defined), sizeof(bool), offset); offset += sizeof(bool); valueRecord.Read(&(p->numOfStates),sizeof(int), offset); offset += sizeof(int); for(int i=0;inumOfStates*NUMCHARS;i++){ int target; valueRecord.Read(&target,sizeof(int),offset); offset+=sizeof(int); p->transitions.push_back(target); } for(int i=0;inumOfStates;i++){ bool isFinal; valueRecord.Read(&isFinal,sizeof(bool), offset); offset += sizeof(bool); p->finalStates.push_back(isFinal); } int srcLength; valueRecord.Read(&srcLength,sizeof(int), offset); offset += sizeof(int); if(srcLength<0){ p->srcDefined = false; p->src = ""; } else { p->srcDefined = true; if(srcLength==0){ p->src = ""; } else { char* buffer = new char[srcLength]; valueRecord.Read(buffer,srcLength,offset); offset += srcLength; p->src = std::string(buffer,srcLength); delete [] buffer; } } value.addr = p; return true; } static bool Save(SmiRecord& valueRecord, size_t& offset, const ListExpr typeInfo, Word& value ) { RegExPattern2* p = (RegExPattern2*) value.addr; valueRecord.Write(&(p->defined),sizeof(bool),offset); offset+=sizeof(bool); valueRecord.Write(&(p->numOfStates),sizeof(int),offset); offset += sizeof(int); assert((int) p->transitions.size() == NUMCHARS*p->numOfStates); for(unsigned int i = 0; i< p->transitions.size();i++){ int target = p->transitions[i]; valueRecord.Write(&target,sizeof(int),offset); offset += sizeof(int); } assert((int) p->finalStates.size() == p->numOfStates); for(int i=0;inumOfStates;i++){ bool isFinal = p->finalStates[i]; valueRecord.Write(&isFinal,sizeof(bool), offset); offset += sizeof(bool); } if(!p->srcDefined){ int srcLength = -1; valueRecord.Write(&srcLength, sizeof(int), offset); offset += sizeof(int); } else { int srcLength = strlen(p->src.c_str()); valueRecord.Write(&srcLength, sizeof(int), offset); offset += sizeof(int); valueRecord.Write(p->src.c_str(), srcLength, offset); offset += srcLength; } return true; } bool computeRegEx(std::stringstream& ss ){ regexcreator::DeaRegEx automaton; for(int s=0;s=0){ automaton.addEdge(s,state,c); } } } automaton.setStart(0); for(int i=0;iprintTo(ss); delete re; return true; } bool isFinal(int i){ if(i<0 || i>numOfStates) return false; return finalStates[i]; } private: bool defined; int numOfStates; std::vector transitions; std::vector finalStates; std::string src; bool srcDefined; int nextState(const int state, const int c) const{ assert(state >= 0 && state < numOfStates); assert(c>=0 && c<=NUMCHARS); int pos = state*NUMCHARS+c; return transitions[pos]; } }; #endif