/* ---- This file is part of SECONDO. Copyright (C) 2004, University in Hagen, Department of Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //paragraph [1] Title: [{\Large \bf \begin{center}] [\end{center}}] //paragraph [10] Footnote: [{\footnote{] [}}] //[TOC] [\tableofcontents] [1] Implementation of Class DbDacScanGen August-February 2015, Daniel Fuchs [TOC] 1 Class DbDacScanGen This is a generic implemention of the DBDACSCAN algorithm. Template parameters are MEMB\_TYP\_CLASS, the realization of Member Class and TYPE, the Secondo Type. */ #include "AlgebraTypes.h" #include "Algebras/Relation-C++/RelationAlgebra.h" #include "StandardTypes.h" #include "Stream.h" #include "Cluster.h" #include "Member.h" #include #include "SecondoCatalog.h" #include "Algebras/Standard-C++/LongInt.h" #include "Algebras/FText/FTextAlgebra.h" #include "BinRelWriteRead.h" #include "MergeSort.h" #ifndef DBDACSCANGEN_H #define DBDACSCANGEN_H namespace distributedClustering{ const static std::string NEIGH_REL_MEMBER_ID = "MemberId"; const static std::string NEIGH_REL_NEIGHBOR_ID = "NeighborId"; template class DbDacScanGen{ private: /* 1.2 members */ int minPts, geoPos, clIdPos,clTypePos,xPicRefPos; double eps; std::string outRelFileName, outNFileName; bool meltTwoClusters,relNameFound, clusterProcessed,appendPictureRefs; TupleBuffer* buffer; GenericRelationIterator* resIt; // iterator TupleType* tt ,*neighborType; // the result tuple type std::ofstream outRel, outNRel; std::vector membArrayUntouched,membArrayUntouchedSec; std::vector membArrayPtr, membArrayPtrSec; Cluster* leftCluster,*rightCluster; SecondoCatalog* sc; ListExpr neighborTupleTypeInfo, neighborTypeInfo , relTupleTypeInfo,relTypeInfo; Tuple *neighborTuple; //for Picture TYPE *xRefPic, *yRefPic; double maxDist; public: /* 1.3 constructor This constructor is for execute the DBDACSCAN algorithm. */ DbDacScanGen(Word &_inStream, ListExpr &_tupleResultType, std::string& _relName, double _eps, int _minPts, int _attrPos, int _xPicRefPos, bool _appendPictureRefs, size_t _maxMem): minPts(_minPts), geoPos(_attrPos), clIdPos(0),clTypePos(0),xPicRefPos(_xPicRefPos), eps(_eps),outRelFileName(""),outNFileName(_relName) ,meltTwoClusters(false),clusterProcessed(false), appendPictureRefs(_appendPictureRefs),buffer(0), resIt(0),tt(0),neighborType(0), leftCluster(0),rightCluster(0), xRefPic(0),yRefPic(0),maxDist(-1.0) { ListExpr empty; if(createOutputFiles(empty,false)) { relNameFound = true; tt = new TupleType(_tupleResultType); buffer = new TupleBuffer(_maxMem); init(_inStream,membArrayPtr,membArrayUntouched); if(membArrayPtr.size()){ clusterProcessed = true; mergeSort(membArrayPtr,0,membArrayPtr.size()); leftCluster = dbDacScan(membArrayPtr,0,membArrayPtr.size()-1,eps,minPts); } initOutput(); }else{ std::cout << "NeighborRelaiton not created!" << std::endl; std::cout << "Filename: " << outNFileName << std::endl; relNameFound = false; } } /* 1.4 Constructor for operator distClMerge This constructor is for execute the Melt CLUSTERS algorithm. */ DbDacScanGen(const std::string& _leftFN, const std::string& _leftNFN, const std::string& _rightFN, const std::string& _rightNFN, const int _geoPos, const int _clIdPos, const int _clTypePos, int _xPicRefPos, const size_t _maxMem, ListExpr &_tupleResultType, ListExpr& _relFt, const std::string& _outRelName , std::string& _outNName, double _eps,int _minPts): minPts(_minPts),geoPos(_geoPos),clIdPos(_clIdPos), clTypePos(_clTypePos) ,xPicRefPos(_xPicRefPos), eps(_eps) ,outRelFileName(_outRelName),outNFileName(_outNName) ,meltTwoClusters(true),relNameFound(false) , clusterProcessed(false),appendPictureRefs(false) ,buffer(0), resIt(0),tt(0),neighborType(0), leftCluster(0),rightCluster(0), xRefPic(0),yRefPic(0),maxDist(-1.0) { bool readFileCorrect = true; bool readSecFileCorrect = true; buffer = new TupleBuffer(_maxMem); std::string errMsg; //read left rel and nrel file if(!readFile( _leftFN, _tupleResultType ,errMsg,membArrayPtr, membArrayUntouched ,buffer, geoPos,xPicRefPos,DISTMERGE ,clIdPos,clTypePos)) { std::cout << "read left File failed: " << errMsg << std::endl; readFileCorrect = false; } if ( !readFile( _leftNFN, _tupleResultType ,errMsg,membArrayPtr, membArrayUntouched ,buffer, geoPos,xPicRefPos,NEIGHBOR,clIdPos,clTypePos)) { std::cout << "read left Neighbor File failed: " << errMsg << std::endl; readFileCorrect = false; } //read right rel and nrel file if( !readFile( _rightFN, _tupleResultType ,errMsg,membArrayPtrSec, membArrayUntouchedSec ,buffer, geoPos,xPicRefPos,DISTMERGE,clIdPos,clTypePos ,membArrayPtr.size())) { std::cout << "read right File failed: " << errMsg << std::endl; readSecFileCorrect = false; } if (!readFile( _rightNFN, _tupleResultType ,errMsg,membArrayPtrSec, membArrayUntouchedSec ,buffer, geoPos,xPicRefPos,NEIGHBOR,clIdPos,clTypePos ,membArrayPtr.size())) { std::cout << "read right Neighbor File failed: " << errMsg << std::endl; readSecFileCorrect = false; } if(readFileCorrect && createOutputFiles(_relFt)) { relNameFound = true; tt = new TupleType(_tupleResultType); if(readSecFileCorrect){ if(membArrayPtr.size()){ mergeSort (membArrayPtr,0,membArrayPtr.size()); } if(membArrayPtrSec.size()){ mergeSort (membArrayPtrSec,0,membArrayPtrSec.size()); } //define border Points TYPE* leftInnerPoint=0; TYPE* rightInnerPoint=0; //create a right and a left Cluster if (membArrayPtr.size() && membArrayPtrSec.size() ) { clusterProcessed = true; if(membArrayPtr.at(0)->getXVal() > membArrayPtrSec.at(0)->getXVal() ) { leftCluster = new Cluster(membArrayPtr,eps,minPts); rightCluster = new Cluster(membArrayPtrSec,eps,minPts); leftInnerPoint = membArrayPtr.back()->getPoint(); rightInnerPoint = membArrayPtrSec.front()->getPoint(); }else{ leftCluster = new Cluster(membArrayPtrSec,eps,minPts); rightCluster = new Cluster(membArrayPtr,eps,minPts); leftInnerPoint = membArrayPtrSec.back()->getPoint(); rightInnerPoint = membArrayPtr.front()->getPoint(); } //melt Clusters leftCluster-> meltClusters(rightCluster,leftInnerPoint,rightInnerPoint); } } initOutput(); }else{ std::cout << "Files not created!" << std::endl; std::cout << "RelFilename: " << outRelFileName << "RelNFileName: " << outNFileName << std::endl; relNameFound = false; } } /* 1.5 ~Destructor~ */ ~DbDacScanGen(){ if(outRel.is_open()){ finish(outRel); outRel.close(); } if(outNRel.is_open()){ finish(outNRel); outNRel.close(); } deleteEachTuple(); if(buffer) delete buffer; if(leftCluster) delete leftCluster; if(resIt) delete resIt; if(tt) tt->DeleteIfAllowed(); if(neighborType) neighborType->DeleteIfAllowed(); if(neighborTuple) neighborTuple->DeleteIfAllowed(); } /* 1.6 ~deleteEachTuple~ Delete each tuple from tuple buffer. */ void deleteEachTuple() { if(resIt) delete resIt; if(buffer){ resIt = buffer->MakeScan(); Tuple* tuple = resIt->GetNextTuple(); while(tuple) { tuple->DeleteIfAllowed(); tuple = resIt->GetNextTuple(); } } } /* 1.7 ~initOutput~ Initialize the buffer to returning tuples. */ void initOutput(){ if(resIt) delete resIt; resIt = buffer->MakeScan(); } /* 1.8 ~next~ Returns the next output tuple which are expandet with ClusterId, ClusterType, isCluster and the filname of neighbor relationship file. Requires the call of initOutput before. */ Tuple* next(){ if(relNameFound){ if(resIt){ Tuple* tuple = resIt->GetNextTuple(); if(!tuple){ return 0; } TupleId id = resIt->GetTupleId(); Tuple* resTuple = new Tuple(tt); int noAttr = tuple->GetNoAttributes(); if (clusterProcessed) { if(meltTwoClusters){ noAttr = noAttr-5; } for(int i = 0; iCopyAttribute(i,tuple,i); } if(appendPictureRefs){ resTuple->PutAttribute(noAttr, xRefPic->Clone()); noAttr++; } if(id < membArrayUntouched.size()){ putAttribute(resTuple, noAttr,id, membArrayUntouched); }else{ id = id - membArrayUntouched.size(); putAttribute(resTuple, noAttr,id, membArrayUntouchedSec); } } else { //only important for distClMerge for(int i = 0; i< noAttr; i++){ resTuple->CopyAttribute(i,tuple,i); } if(id < membArrayUntouched.size()){ writeFiles(resTuple,membArrayUntouched[id]); }else{ id = id - membArrayUntouched.size(); writeFiles(resTuple,membArrayUntouchedSec[id]); } } return resTuple; } else { return 0; } } else { return 0; } } private: /* 1.9 ~PutAttribute~ Auxiliary function to put attribute into result Tuple. */ void putAttribute(Tuple* resTuple,int noAttr, TupleId& id, std::vector & array) { if(appendPictureRefs){ resTuple->PutAttribute(noAttr, new CcReal(true,array[id]->getYVal())); noAttr++; } resTuple->PutAttribute(noAttr, new LongInt(true, array[id]->getTupleId())); // append attribute ClusterNo resTuple->PutAttribute(noAttr+1, new CcInt(true, array[id]->getClusterNo())); //append attribute isCluser resTuple->PutAttribute(noAttr+2, new CcBool(true, array[id]->updateDensityReachable(minPts))); //append attribute ClusterType if(array[id]->getClusterType() > 4){ resTuple->PutAttribute(noAttr+3, new CcInt(true,-1)); }else{ resTuple->PutAttribute(noAttr+3, new CcInt(true, array[id]->getClusterType())); } //append attribute outNFileName resTuple->PutAttribute(noAttr+4, new FText(true,outNFileName)); //write File writeFiles(resTuple,array[id]); } /* 1.10 ~writeFiles~ write tuples to file */ void writeFiles(Tuple* resTuple,MEMB_TYP_CLASS* member) { if(meltTwoClusters){ //write RelFile if(!writeNextTuple(outRel,resTuple)){ std::cerr << "Problem in writing tuple" << std::endl; } } //write NRelFile if(member->getClusterType() != 1){ //for type CLUSTER you don't nead information about neighbors writeNeighborFileTuples(member); } } /* 1.11 ~writeNeighborFileTuples~ write neighbor tuples to file */ void writeNeighborFileTuples(MEMB_TYP_CLASS* member) { typename std::list::iterator nIt = member->getEpsNeighborhood(true); while(nIt != member->getEpsNeighborhood(false)) { neighborTuple->PutAttribute(0, new LongInt(true,member->getTupleId())); neighborTuple->PutAttribute(1, new LongInt(true,(*nIt)->getTupleId())); if(!writeNextTuple(outNRel,neighborTuple)){ std::cerr << "Problem in writing tuple" << std::endl; } nIt++; } } /* 1.12 ~initialize~ Read in the tuple streams and store them in a vector. */ void init(Word& _stream, std::vector & membArray, std::vector & membArrayUnt ) { Tuple* tuple; Stream inStream(_stream); inStream.open(); int id = 0; bool firstRun = true; bool secondRun = true; bool findPictureCoordRefs = false; bool pictureRefsExist = false; if(TYPE::BasicType() == Picture::BasicType() && appendPictureRefs) { findPictureCoordRefs = true; pictureRefsExist = false; }else if(TYPE::BasicType() == Picture::BasicType() && !appendPictureRefs && xPicRefPos >=0 ) { findPictureCoordRefs = false; pictureRefsExist = true; } while((tuple = inStream.request())){ buffer->AppendTuple(tuple); TYPE* obj = (TYPE*) tuple->GetAttribute(geoPos); if(obj->IsDefined()){ tuple->SetTupleId(id); MEMB_TYP_CLASS* member = new MEMB_TYP_CLASS(obj); member->setTupleId(id); membArrayUnt.push_back(member); membArray.push_back(member); if(pictureRefsExist){ xRefPic = (TYPE*) tuple->GetAttribute(xPicRefPos); double yRefPicVal = ((CcReal*) tuple->GetAttribute(xPicRefPos+1))->GetValue(); member->setCoordinates(xRefPic,yRefPicVal); } if(findPictureCoordRefs) { if(firstRun) { xRefPic = obj; firstRun = false; } else // if(!firstRun && findPictureCoordRefs) { if(secondRun){ yRefPic = obj; maxDist = member->calcDistanz(xRefPic); } else if(member->calcDistanz(xRefPic) > maxDist) { yRefPic = obj; maxDist = member->calcDistanz(xRefPic); } } } } tuple->DeleteIfAllowed(); id++; } if(findPictureCoordRefs) { //search maxDist for (size_t i = 1; i < membArray.size()-1;i++) { if( membArray.at(i)->calcDistanz(yRefPic) > maxDist) { xRefPic = membArray.at(i)->getPoint(); maxDist =membArray.at(i)->calcDistanz(yRefPic); } } //set coordinates to each member for (size_t i = 0; i < membArray.size();i++) { membArray.at(i)->setCoordinates(xRefPic,yRefPic); } } inStream.close(); } /* 1.13 ~createOutputFiles~ Creates the output files in which tuples where stored. */ bool createOutputFiles(ListExpr& _relFt, bool both=true) { std::string errMsg; if(both){ relTupleTypeInfo = _relFt; relTypeInfo = nl->TwoElemList( listutils::basicSymbol(), nl->Second(_relFt)); //create output relation file if(!writeHeader(outRel,outRelFileName,relTypeInfo,errMsg)) { std::cerr << errMsg << std::endl; return false; } } neighborTupleTypeInfo = defineNRel(); neighborTypeInfo = nl->TwoElemList( listutils::basicSymbol(), neighborTupleTypeInfo); sc = SecondoSystem::GetCatalog(); neighborType = new TupleType(sc->NumericType(neighborTupleTypeInfo)); neighborTuple = new Tuple(neighborType); //create output neighbor relation file if(!writeHeader(outNRel, outNFileName,neighborTypeInfo,errMsg)) { std::cerr << "writeHeader not Successfully: " << errMsg << std::endl; return false; } return true; } /* 1.14 ~defineNRel~ Define neigbor relation type. */ ListExpr defineNRel() { return nl->TwoElemList(nl->SymbolAtom(Tuple::BasicType()), nl->TwoElemList(nl->TwoElemList(nl->SymbolAtom(NEIGH_REL_MEMBER_ID), nl->SymbolAtom(LongInt::BasicType())), nl->TwoElemList(nl->SymbolAtom(NEIGH_REL_NEIGHBOR_ID), nl->SymbolAtom(LongInt::BasicType())))); } /* 1.15 ~dbDacScan~ Execute the DBDACSCAN algorithm. */ Cluster* dbDacScan(std::vector& _membArray, int left , int right , double eps, int minPts) { if(right==left){//Array contains only one element return new Cluster(_membArray[left], eps,minPts); }else{ int globMedian = (right + left)/2;//position to the right subarray //get left and right cluster Cluster *rightCl, *leftCl; leftCl = dbDacScan(_membArray,left, globMedian,eps,minPts); rightCl = dbDacScan(_membArray, globMedian+1,right,eps,minPts); int leftInnerIdex = globMedian; int rightInnerIdex = globMedian+1; leftCl->meltClusters(rightCl, _membArray[leftInnerIdex]->getPoint(), _membArray[rightInnerIdex]->getPoint()); return leftCl; } return 0; //should never reached } }; } #endif