/* ---- This file is part of SECONDO. Copyright (C) 2004, University in Hagen, Department of Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //paragraph [1] Title: [{\Large \bf \begin {center}] [\end {center}}] //[TOC] [\tableofcontents] [1] Source File of the Symbolic Trajectory Algebra Started March 2012, Fabio Vald\'{e}s [TOC] \section{Overview} This algebra contains basic types and operations for symbolic trajectories and has been split from the SymbolicTrajectoryAlgebra in April 2020. */ #include "SymbolicTrajectoryBasicAlgebra.h" #ifdef RECODE #include #endif using namespace std; using namespace temporalalgebra; using namespace datetime; namespace stj { double BasicDistanceFuns::distance(const string& str1, const string& str2, const LabelFunction lf /* = TRIVIAL */) { if (lf == TRIVIAL) { return (str1 == str2 ? 0.0 : 1.0); } if (str1.empty() && str2.empty()) { return 0.0; } if (str1.empty() || str2.empty()) { return 1.0; } double ld = 1.0; if (lf == EDIT) { ld = stringutils::ld(str1, str2); } return ld / max(str1.length(), str2.length()); } double BasicDistanceFuns::distance(const pair& val1, const pair& val2, const LabelFunction lf /* = TRIVIAL */) { double ld = BasicDistanceFuns::distance(val1.first, val2.first, lf); return ld / 2 + (val1.second == val2.second ? 0 : 0.5); } double BasicDistanceFuns::distance(const set& values1, const set& values2, const LabelFunction lf) { if (values1.empty() && values2.empty()) { return 0; } if (values1.empty() || values2.empty()) { return 1; } set::iterator i1(values1.begin()), i2(values2.begin()); int m(values1.size()), n(values2.size()); double distsum = 0.0; double dist; int i1count(0), i2count(0); while (i1 != values1.end() && i2 != values2.end()) { dist = BasicDistanceFuns::distance(*i1, *i2, lf); if (dist < 1) { // cout << " " << *i1 << " = " << *i2 << endl; i1++; i1count++; i2++; i2count++; distsum += dist; } else { if (*i1 < *i2) { // cout << " " << *i1 << " < " << *i2 << endl; i1++; i1count++; } else { // cout << " " << *i1 << " > " << *i2 << endl; i2++; i2count++; } distsum += 1.0; } } distsum += std::max(m - i1count, n - i2count); // cout << "distsum = " << distsum << " "; return distsum / (m + n); } double BasicDistanceFuns::distance(set >& values1, set >& values2, const LabelFunction lf /* = TRIVIAL */) { if (values1.empty() && values2.empty()) { return 0; } if (values1.empty() || values2.empty()) { return 1; } set >::iterator i1, i2; double distsum = 0; for (i1 = values1.begin(); i1 != values1.end(); i1++) { for (i2 = values2.begin(); i2 != values2.end(); i2++) { double dist = BasicDistanceFuns::distance(i1->first, i2->first, lf); distsum += dist / 2 + (i1->second == i2->second ? 0 : 0.5); } } return distsum / (values1.size() * values2.size()); } /* \subsection{Function ~recode~} */ #ifdef RECODE bool RecodeFun::recode(const string &src, const string &from, const string &to, string &result) { string rs = trim(from)+".."+trim(to); // use recode lib RECODE_OUTER outer = recode_new_outer(true); RECODE_REQUEST request = recode_new_request(outer); bool success = recode_scan_request(request, rs.c_str()); if (!success) { recode_delete_request(request); recode_delete_outer(outer); result.clear(); return false; } char* recoded = recode_string(request, src.c_str()); // make clean recode_delete_request(request); recode_delete_outer(outer); if (recoded == 0) { result.clear(); return false; } result = recoded; free(recoded); return true; } #endif /* \section{Implementation of class ~Label~} \subsection{Constructor} */ Label::Label(const Label& rhs) : Attribute(rhs.IsDefined()), value(rhs.value.getSize()) { CopyFrom(&rhs); } /* \subsection{Function ~GetValue~} */ void Label::GetValue(string& text) const { assert(IsDefined()); if (value.getSize() > 0) { char *bytes = new char[value.getSize()]; value.read(bytes, value.getSize()); string text2(bytes, value.getSize()); delete[] bytes; text = text2; } else { text.clear(); } } string Label::GetValue() const { assert(IsDefined()); string result; GetValue(result); return result; } /* \subsection{Function ~GetValues~} */ void Label::GetValues(set& values) const { assert(IsDefined()); string value; GetValue(value); values.clear(); values.insert(value); } /* \subsection{Function ~buildValue~} */ void Label::buildValue(const string& text, const unitelem& unit, base& result) { result = text; } /* \subsection{Function ~SetValue~} */ void Label::SetValue(const string &text) { Clean(); if (text.length() > 0) { const char *bytes = text.c_str(); value.write(bytes, text.length()); } } /* \subsection{Operator ~==~} */ bool Label::operator==(const Label& lb) const { if (!IsDefined() && !lb.IsDefined()) { return true; } if (IsDefined() != lb.IsDefined()) { return false; } string str1, str2; GetValue(str1); lb.GetValue(str2); return (str1 == str2); } /* \subsection{Operator ~==~} */ bool Label::operator==(const string& text) const { if (!IsDefined()) { return false; } string str; GetValue(str); return (str == text); } /* \subsection{Function ~Distance~} */ double Label::Distance(const Label& lb, const LabelFunction lf /* = EDIT */) const { if (!IsDefined() && !lb.IsDefined()) { return 0; } string str1, str2; if (!IsDefined() || !lb.IsDefined()) { return 1; } GetValue(str1); lb.GetValue(str2); return BasicDistanceFuns::distance(str1, str2, lf); } /* \subsection{Function ~InsertLabels~} */ void Label::InsertLabels(vector& result) const { if (IsDefined()) { result.push_back(GetLabel()); } } void Label::InsertLabels(set& result) const { if (IsDefined()) { result.insert(GetLabel()); } } /* \subsection{Function ~UpdateFrequencies~} */ void Label::UpdateFrequencies(InvertedFile& inv, vector& fv) const { if (IsDefined()) { InvertedFile::exactIterator* eit = 0; TupleId id; uint32_t wc, cc; eit = inv.getExactIterator(GetLabel(), 16777216); if (eit->next(id, wc, cc)) { fv[id] += 1.0; } delete eit; } } /* \subsection{Function ~readValueFrom~} */ bool Label::readValueFrom(ListExpr LE, string& text, unitelem& unit) { if (nl->IsAtom(LE)) { nl->WriteToString(text, LE); if (text.length() == 0) { unit.pos = UINT_MAX; } return true; } return false; } /* \subsection{Function ~ReadFrom~} */ bool Label::ReadFrom(ListExpr LE, ListExpr typeInfo) { if (listutils::isSymbolUndefined(LE)) { SetDefined(false); return true; } if (nl->IsAtom(LE)) { string text; nl->WriteToString(text, LE); Set(true, text.substr(1, text.length() - 2)); return true; } SetDefined(false); return false; } /* \subsection{Function ~ToListExpr~} */ ListExpr Label::ToListExpr(ListExpr typeInfo) { if (!IsDefined()) { return nl->SymbolAtom(Symbol::UNDEFINED()); } string text; GetValue(text); return nl->TextAtom(text); } /* \subsection{Function ~CheckKind~} */ bool Label::CheckKind(ListExpr type, ListExpr& errorInfo) { return (nl->IsEqual(type, Label::BasicType())); } /* \subsection{Function ~Property~} */ ListExpr Label::Property() { return gentc::GenProperty("-> DATA", BasicType(), "()", "\'Dortmund\'"); } /* \subsection{Function ~checkType~} */ const bool Label::checkType(const ListExpr type) { return listutils::isSymbol(type, BasicType()); } /* \subsection{Function ~CopyFrom~} */ void Label::CopyFrom(const Attribute* right) { SetDefined(right->IsDefined()); if (IsDefined()) { string text; ((Label*)right)->GetValue(text); SetValue(text); } } /* \subsection{Function ~Compare~} */ int Label::Compare(const Attribute* arg) const { string str1, str2; GetValue(str1); ((Label*)arg)->GetValue(str2); return str1.compare(str2); } /* \subsection{Function ~Print~} */ ostream& Label::Print(ostream& os) const { string text; GetValue(text); return os << text; } /* \subsection{Operator ~<<~} */ ostream& operator<<(ostream& os, const Label& lb) { string text; lb.GetValue(text); os << "\'" << text << "\'"; return os; } /* \section{Implementation of class ~Labels~} \subsection{Constructors} */ Labels::Labels(const Labels& src, const bool sort /* = false */) : Attribute(src.IsDefined()), values(src.GetLength()), pos(src.GetNoValues()) { if (sort) { set labels; for (int i = 0; i < src.GetNoValues(); i++) { string text; src.GetValue(i, text); labels.insert(text); // automatic sorting } for (set::iterator it = labels.begin(); it != labels.end(); it++) { Append(*it); // append in alphabetical order } } else { CopyFrom(&src); // keep original order } } /* \subsection{Operator ~=~} */ Labels& Labels::operator=(const Labels& src) { Attribute::operator=(src); values.copyFrom(src.values); pos.copyFrom(src.pos); return *this; } /* \subsection{Operator ~==~} */ bool Labels::operator==(const Labels& src) const { if (!IsDefined() && !src.IsDefined()) { return true; } if (IsDefined() != src.IsDefined()) { return false; } if ((GetNoValues() != src.GetNoValues()) || (GetLength() != src.GetLength())){ return false; } set strings1, strings2; string str; for (int i = 0; i < GetNoValues(); i++) { GetValue(i, str); strings1.insert(str); src.GetValue(i, str); strings2.insert(str); } return strings1 == strings2; } /* \subsection{Functions ~Append~} */ void Labels::Append(const Label& lb) { string text; lb.GetValue(text); Append(text); } void Labels::Append(const string& text) { if (!Contains(text)) { if (text.length() > 0) { pos.Append(values.getSize()); const char *bytes = text.c_str(); values.write(bytes, text.length(), values.getSize()); } else { pos.Append(UINT_MAX); } } } void Labels::Append(const set& values) { for (set::iterator it = values.begin(); it != values.end(); it++) { Append(*it); } } /* \subsection{Function ~Get~} */ void Labels::Get(int i, Label& lb) const { lb.SetDefined(IsDefined()); assert((0 <= i) && (i < GetNoValues())); if (IsDefined()) { string text; GetValue(i, text); lb.Set(true, text); } } /* \subsection{Function ~GetValue~} */ void Labels::GetValue(int i, string& text) const { assert((0 <= i) && (i < GetNoValues()) && IsDefined()); unsigned int cur(-1), next(-1); pos.Get(i, cur); if (cur != UINT_MAX) { int j = i + 1; bool finished = false; while (!finished && (j < GetNoValues())) { pos.Get(j, next); if (next != UINT_MAX) { finished = true; } j++; } if (!finished) { next = GetLength(); } char *bytes = new char[next - cur]; values.read(bytes, next - cur, cur); string text2(bytes, next - cur); delete[] bytes; text = text2; } else { text.clear(); } } /* \subsection{Function ~GetValues~} */ void Labels::GetValues(set& values) const { values.clear(); string value; for (int i = 0; i < GetNoValues(); i++) { GetValue(i, value); values.insert(value); } } /* \subsection{Function ~getRefToLastElem~} */ void Labels::getRefToLastElem(const int size, unsigned int& result) { result = size; } /* \subsection{Function ~getFlobPos~} */ unsigned int Labels::getFlobPos(const arrayelem elem) { return elem; } /* \subsection{Function ~valuesToListExpr~} */ void Labels::valuesToListExpr(const set& values, ListExpr& result) { if (values.empty()) { result = nl->Empty(); return; } set::iterator it = values.begin(); result = nl->OneElemList(nl->TextAtom(*it)); it++; ListExpr last = result; while (it != values.end()) { last = nl->Append(last, nl->TextAtom(*it)); it++; } } /* \subsection{Function ~getString~} */ void Labels::getString(const ListExpr& list, string& result) { nl->WriteToString(result, list); } /* \subsection{Function ~getElemFromList~} */ void Labels::getElemFromList(const ListExpr& list, const unsigned int size, unsigned int& result) { result = size; } /* \subsection{Function ~buildValue~} */ void Labels::buildValue(const string& text, const unsigned int pos, string& result) { result = text; } /* \subsection{Function ~Contains~} */ bool Labels::Contains(const string& text) const { string str; for (int i = 0; i < GetNoValues(); i++) { GetValue(i, str); if (str == text) { return true; } } return false; } /* \subsection{Function ~Intersects~} */ bool Labels::Intersects(const Labels &lbs) const { set values1, values2; GetValues(values1); lbs.GetValues(values2); Labels res(true); res.Intersection(values1, values2); return !res.IsEmpty(); } /* \subsection{Function ~Union~} */ void Labels::Union(const set& values1, const set& values2) { SetDefined(true); Clean(); Append(values1); Append(values2); } /* \subsection{Function ~Intersection~} */ void Labels::Intersection(const set& values1, const set& values2) { SetDefined(true); Clean(); set intersection; set_intersection(values1.begin(), values1.end(), values2.begin(), values2.end(), std::inserter(intersection, intersection.begin())); Append(intersection); } /* \subsection{Function ~Minus~} */ void Labels::Minus(const set& values1, const set& values2) { SetDefined(true); Clean(); set difference; set_difference(values1.begin(), values1.end(), values2.begin(), values2.end(), std::inserter(difference, difference.begin())); Append(difference); } #ifdef RECODE /* \subsection{Function ~Recode~} */ bool Labels::Recode(const std::string& from, const std::string& to, Labels& result) { result.SetDefined(IsDefined()); if (!IsDefined()) { return true; } result.Clean(); string value, recoded; for (int i = 0; i < GetNoValues(); i++) { GetValue(i, value); if (!RecodeFun::recode(value, from, to, recoded)) { result.SetDefined(false); return false; } result.Append(recoded); } return true; } #endif /* \subsection{Operator ~<<~} */ ostream& operator<<(ostream& os, const Labels& lbs) { Label lb(true); string text; for (int i = 0; i < lbs.GetNoValues() - 1; i++) { lbs.GetValue(i, text); os << text << " "; } lbs.GetValue(lbs.GetNoValues() - 1, text); os << text; return os; } /* \subsection{Function ~Distance~} */ double Labels::Distance(const Labels& lbs, const LabelFunction lf /* = EDIT */) const { if (!IsDefined() && !lbs.IsDefined()) { return 0; } set values1, values2; if (IsDefined()) { GetValues(values1); } if (lbs.IsDefined()) { lbs.GetValues(values1); } return BasicDistanceFuns::distance(values1, values2, lf); } /* \subsection{Function ~InsertLabels~} */ void Labels::InsertLabels(vector& result) const { if (IsDefined()) { set values; GetValues(values); for (auto it : values) { result.push_back(it); } } } void Labels::InsertLabels(set& result) const { if (IsDefined()) { set values; GetValues(values); result.insert(values.begin(), values.end()); } } /* \subsection{Function ~UpdateFrequencies~} */ void Labels::UpdateFrequencies(InvertedFile& inv, vector& fv) const { if (IsDefined()) { set values; GetValues(values); InvertedFile::exactIterator* eit = 0; TupleId id; uint32_t wc, cc; for (auto label : values) { eit = inv.getExactIterator(label, 16777216); if (eit->next(id, wc, cc)) { fv[id] += 1.0; } } delete eit; } } /* \subsection{Function ~GetFLOB~} */ Flob *Labels::GetFLOB(const int i) { assert(i >= 0 && i < NumOfFLOBs()); return (i == 0 ? &values : &pos); } /* \subsection{Function ~Compare~} */ int Labels::Compare(const Attribute* arg) const { if (GetNoValues() > ((Labels*)arg)->GetNoValues()) { return 1; } if (GetNoValues() < ((Labels*)arg)->GetNoValues()) { return -1; } string str1, str2; for (int i = 0; i < GetNoValues(); i++) { // equal size; compare labels GetValue(i, str1); ((Labels*)arg)->GetValue(i, str2); int comp = str1.compare(str2); if (comp != 0) { return comp; } } return 0; } /* \subsection{Function ~Property~} */ ListExpr Labels::Property() { return gentc::GenProperty("-> DATA", BasicType(), "(