/* ---- This file is part of SECONDO. Copyright (C) 2021, University in Hagen, Department of Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- //paragraph [1] Title: [{\Large \bf \begin {center}] [\end {center}}] [1] Association Analysis Algebra Implementation January 2021 - April 2021, P. Fedorow for bachelor thesis. */ #pragma once #include "Algebras/Relation-C++/RelationAlgebra.h" // rel, trel, tuple #include "NestedList.h" #include "Operator.h" #include #include namespace AssociationAnalysis { // Type mapping for the genTransactions operator. ListExpr genTransactionsTM(ListExpr args); class getTransactionsLI { public: // Prepares the set of potentially frequent itemsets and everything else that // is needed for the generation of transactions. getTransactionsLI(std::size_t numOfTransactions, std::size_t transactionSizeMean, std::size_t frequentItemsetSizeMean, std::size_t numOfFrequentItemsets, std::size_t numOfItems); ~getTransactionsLI() { this->tupleType->DeleteIfAllowed(); } // Returns the next generated transaction as a tuple. Tuple *getNext(); private: // The amount of transactions to be generated. std::size_t numOfTransactions; // The amount of transactions already generated. std::size_t t; // Random number distributions that are used to generate transactions. std::mt19937 gen; std::poisson_distribution genTransactionSize; std::discrete_distribution genPotentialFrequentItemset; // Potentially frequent itemsets that are used to fill a generated // transaction. std::vector> potentialFrequentItemsets; // While a transaction is generated some items of a chosen potentially // frequent itemset are left out and will not be part of the transaction. The // amount of items to be left out is specified for each potentially frequent // itemset in this vector. std::vector corruptionLevels; // Oversized transactions (a transaction is oversized if it is larger // than the size generated by the genTransactionSize random number // distribution) are allowed in half the cases. This boolean is used to // implement this behavior. bool allowOversizedTransaction; // Describes the resulting tuple type: tuple(Id: int, Itemset: intset). TupleType *tupleType; }; // Value mapping for the genTransactions operator. int genTransactionsVM(Word *args, Word &result, int message, Word &local, Supplier s); // Operator info for the genTransactions operator. struct genTransactionsInfo : OperatorInfo { genTransactionsInfo() : OperatorInfo() { this->name = "genTransactions"; this->signature = "int int int int int -> stream(tuple([Id: int, Itemset: intset]))"; this->syntax = "genTransaction(_, _, _, _, _)"; this->meaning = "Generates a stream of transactions. The expected arguments are: the " "number of transactions, the mean of the transaction size, the mean of " "the frequent itemset size, the number of frequent itemsets and the " "number of items."; this->usesArgsInTypeMapping = true; } }; } // namespace AssociationAnalysis