Files
secondo/Algebras/AssociationAnalysis/Common.cpp

194 lines
6.5 KiB
C++
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
----
This file is part of SECONDO.
Copyright (C) 2021, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [1] Title: [{\Large \bf \begin {center}] [\end {center}}]
[1] Association Analysis Algebra Implementation
January 2021 - April 2021, P. Fedorow for bachelor thesis.
*/
#include "Common.h"
#include "Algebras/Collection/IntSet.h"
#include "NList.h"
#include "StandardTypes.h"
namespace AssociationAnalysis {
frequentItemsetStreamLI::frequentItemsetStreamLI(
std::vector<std::pair<std::vector<int>, double>> &&frequentItemsets)
: frequentItemsets(frequentItemsets), it(this->frequentItemsets.cbegin()),
tupleType(new TupleType(SecondoSystem::GetCatalog()->NumericType(
frequentItemsetTupleType()))) {}
frequentItemsetStreamLI::~frequentItemsetStreamLI() {
this->tupleType->DeleteIfAllowed();
}
Tuple *frequentItemsetStreamLI::getNext() {
if (this->it != this->frequentItemsets.cend()) {
auto &[itemset, support] = *this->it;
auto tuple = new Tuple(this->tupleType);
tuple->PutAttribute(0, new collection::IntSet(std::set<int>(
itemset.cbegin(), itemset.cend())));
tuple->PutAttribute(1, new CcReal(support));
this->it++;
return tuple;
} else {
return nullptr;
}
}
ListExpr frequentItemsetTupleType() {
NList attrs =
NList(NList(NList().symbolAtom("Itemset"),
NList().symbolAtom(collection::IntSet::BasicType())),
NList(NList().symbolAtom("Support"),
NList().symbolAtom(CcReal::BasicType())));
ListExpr type = NList().tupleOf(attrs).listExpr();
return type;
}
ListExpr transactionsTupleType() {
NList attrs = NList(
NList(NList().symbolAtom("Id"), NList().symbolAtom(CcInt::BasicType())),
NList(NList().symbolAtom("Itemset"),
NList().symbolAtom(collection::IntSet::BasicType())));
ListExpr type = NList().tupleOf(attrs).listExpr();
return type;
}
// Type mapping for a frequent itemset mining operator.
ListExpr mineTM(ListExpr args, ListExpr returnType) {
NList type(args);
NList appendList;
bool relativeSupport = false;
NList attrs;
if (type.length() == 3 || type.length() == 4) {
if (!type.elem(1).first().checkRel(attrs)) {
return NList::typeError(
"Argument number 1 must be of type rel(tuple(...)).");
}
if (!type.elem(2).isSymbol(1)) {
return NList::typeError("Argument number 2 must name an attribute in the "
"relation given as the first argument.");
}
if (type.elem(3).first().isSymbol(CcInt::BasicType())) {
if (type.elem(3).second().intval() <= 0) {
return NList::typeError("Argument number 3 must be of type int and > 0 "
"or of type real and in the interval (0, 1).");
}
} else if (type.elem(3).first().isSymbol(CcReal::BasicType())) {
if (type.elem(3).second().realval() <= 0.0 ||
type.elem(3).second().realval() >= 1.0) {
return NList::typeError("Argument number 3 must be of type int and > 0 "
"or of type real and in the interval (0, 1).");
} else {
relativeSupport = true;
}
} else {
return NList::typeError("Argument number 3 must be of type int and > 0 "
"or of type real and in the interval (0, 1).");
}
if (!type.elem(3).first().isSymbol(CcInt::BasicType()) ||
type.elem(3).second().intval() <= 0) {
}
if (type.length() == 4) {
if (!type.elem(4).first().isSymbol(CcInt::BasicType())) {
return NList::typeError(
"The optional argument number 4 must be of type int.");
}
} else {
// Add default value via the append-functionality.
appendList.append(NList().intAtom(0));
}
} else {
return NList::typeError("3 or 4 arguments expected but " +
std::to_string(type.length()) + " received.");
}
std::string itemsetAttrName = type.elem(2).first().str();
int itemsetAttr = -1;
for (int i = 1; i <= (int)attrs.length(); i += 1) {
NList attr = attrs.elem(i);
if (attr.elem(1).isSymbol(itemsetAttrName)) {
itemsetAttr = i;
}
}
if (itemsetAttr == -1) {
return NList::typeError("Argument number 2 must name an attribute in the "
"relation given as the first argument.");
}
appendList.append(NList().intAtom(itemsetAttr - 1));
appendList.append(NList().boolAtom(relativeSupport));
return NList(Symbols::APPEND(), appendList, returnType).listExpr();
}
ListExpr mineTM(ListExpr args) {
NList tupleType = NList(frequentItemsetTupleType());
return mineTM(args, NList().streamOf(tupleType).listExpr());
}
// Increments the integer that the given vector of booleans/bits represents.
// Returns true on overflow, false otherwise. This function is used as helper
// to built all subsets of an another vector of the same size.
bool increment(std::vector<bool> &bs) {
for (std::size_t i = 0; i < bs.size(); i += 1) {
if (bs[i]) {
bs[i] = false;
} else {
bs[i] = true;
return true;
}
}
return false;
}
void TriangularMatrix::insert(std::size_t a, std::size_t b) {
auto [_a, _b] = std::minmax(a, b);
if (_b >= this->matrix.size()) {
// We need to enlarge the 2d matrix vector as it is not large enough to
// store the support count of the itemset {a,b}.
std::size_t size = _b + 1;
this->matrix.resize(size);
for (std::size_t i = 0; i < size; i += 1) {
this->matrix[i].resize(i + 1);
}
}
// Increase the support count.
this->matrix[_b][_a] += 1;
}
int TriangularMatrix::count(size_t a, size_t b) {
auto [_a, _b] = std::minmax(a, b);
if (_b < this->matrix.size() && _a < this->matrix[_b].size()) {
return this->matrix[_b][_a];
} else {
return 0;
}
}
} // namespace AssociationAnalysis