Files
secondo/Algebras/Histogram/HistogramBase.h
2026-01-23 17:03:45 +08:00

549 lines
13 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [1] Title: [{\Large \bf \begin {center}] [\end {center}}]
//[TOC] [\tableofcontents]
//[_][\_]
//[&][\&]
//[<][\<]
//characters [1] verbatim: [\verb@] [@]
""[1]
[1] Header File of the Histogram Algebra
December 2007, S. H[oe]cher, M. H[oe]ger, A. Belz, B. Poneleit
[TOC]
1 Overview
The file "HistogramBase.h" contains basic defines and includes as well as
declarations of functions and operators common to both 1d and 2d histograms.
This documentations will explain their interfaces ("what") in the header file
and restrict itself to shere commentaries ("how") in the implementation file
"BaseHistogram.cpp".
2 Defines and includes
*/
#ifndef HISTOGRAMBASE_H_
#define HISTOGRAMBASE_H_
#include "Symbols.h"
#include "Algebra.h"
#include "NestedList.h"
#include "NList.h"
#include "LogMsg.h"
#include "QueryProcessor.h"
#include "ConstructorTemplates.h"
#include "StandardTypes.h"
//#include "DBArray.h"
#include "Tools/Flob/Flob.h"
#include "Tools/Flob/DbArray.h"
#include "Algebras/Relation-C++/RelationAlgebra.h"
#include <vector>
#include <list>
#include <set>
#include <queue>
#include <limits.h>
#ifndef HIST_REAL
#define HIST_REAL double
#endif // #ifndef HIST_REAL
namespace hgr {
class BaseHistogram : public Attribute
{
public:
/*
3 Functions
The functions and operators mentioned in this section are applicable to
any kind of histogram ("BaseHistogram").
3.1 Constructors and destructor
*/
BaseHistogram();
BaseHistogram(bool _defined, size_t size = 0);
BaseHistogram(const BaseHistogram& rhs);
virtual ~BaseHistogram();
//virtual BaseHistogram& operator = (const BaseHistogram& h) const = 0;
/*
3.2 Functions and operators
3.2.1 GetBin(int)
Takes a bin index number and
returns a pointer to the value of this bin.
*/
HIST_REAL GetBin(int i) const;
/*
3.2.2 AppendBin(HIST[_]REAL)
Appends the passed real value to the array of bins.
*/
void AppendBin(const HIST_REAL& b);
/*
3.2.3 GetNoBin()
Returns the number of bins in the histogram.
*/
int GetNoBin() const;
/*
3.2.4 ResizeBin(int)
Resizes the bin array to the given initial size.
*/
void ResizeBin(const int newSize);
/*
3.2.5 IsDefined()
Returns TRUE if the histogram is defined and FALSE if it is not.
Overwriting the function is not required.
3.2.6 SetDefined(bool)
Sets the histogram's "defined" value to TRUE or FALSE, according to the given
bool value. Overwriting the function is not required.
3.2.7 Distance(BaseHistogram p)
Compares the similarity of two histograms: Returns the sum of the squared
difference values between the couples of bins with equal number.
Example:
histogram1: ((0.0 1.0 2.0)(0.5 0.5))
histogram2: ((0.0 1.0 2.0)(0.6 0.7))
return value: (0.1 x 0.1) + (0.2 x 0.2) = 0.001 + 0.004 = 0.005
*/
HIST_REAL Distance(const BaseHistogram* h);
/*
3.2.8 GetMinBin(), GetMaxBin()
Returns the number of the bin with the lowest(highest) value. If there is more
than one bin with equal low (high) value, the number of the first of them
is returned.
*/
CcInt GetMinBin() const;
CcInt GetMaxBin() const;
/*
3.2.9 CopyBinsFrom(BaseHistogram)
Copies all bin values from the given histogram and appends them to the
calling histogram's bin array.
*/
void CopyBinsFrom(const BaseHistogram* h);
/*
3.2.10 Virtual functions
to be overwritten in histogram1d and histogram2d:
*/
virtual void Clear() = 0;
virtual void CopyRangesFrom(const BaseHistogram* h) = 0;
virtual bool IsEmpty() const = 0;
virtual bool IsConsistent(const bool checkOrder = true) const = 0;
virtual int Compare(const BaseHistogram& h) const = 0;
virtual int Compare(const Attribute* rhs) const{
return Compare(*((BaseHistogram*)rhs));
}
virtual int CompareAlmost(const BaseHistogram& h) const = 0;
virtual int CompareAlmost(const Attribute* rhs) const{
return CompareAlmost(*((BaseHistogram*)rhs));
}
virtual bool IsRefinement(const BaseHistogram& h) const = 0;
virtual bool operator ==(const BaseHistogram& h) const = 0;
virtual bool operator <(const BaseHistogram& h) const = 0;
virtual void Coarsen(const BaseHistogram* h) = 0;
virtual std::ostream& Print( std::ostream& os ) const = 0;
/*
3.2.11 Definition of bin
The bin array is declared here, whereas the ranges array depends on the
histogram's number of dimensions.
*/
protected:
DbArray<HIST_REAL> bin;
/*
3.2.12 CmpBinSearch(real, real)
The function takes two pointers to real values, compares them and returns
-1 (the first real value is smaller than the second) or 1 (else).
It is passed as comparison function to the method DBArray::Find()
to find the right bin for a given value using binary search.
*/
static int CmpBinSearch(const void* v1, const void* v2);
}; // class BaseHistogram : public Attribute
/*
2.4 Operators
2.4.1 is[_]undefined
Sets the result value to (true, true) if the given histogram is defined,
and to (true, false) if it is not.
*/
ListExpr is_undefinedTypeMap( ListExpr args );
int is_undefinedSelect( ListExpr args );
int is_undefined1dFun ( Word* args, Word& result, int message,
Word& local, Supplier s );
int is_undefined2dFun(Word* args, Word& result, int message, Word& local,
Supplier s);
/*
2.4.2 IsEqual and IsLess (operators = and $<$)
Compares two histograms of the same type; the same type is assured by the
HistHistBoolTypeMap function. The operator returns TRUE, if the histograms are
equal or the first is smaller than the second, and FALSE else.
Two histograms are equal if their ranges are equal and the corresponding pairs
of bins are equal.
Histgram A is smaller than histogram B if their ranges are equal and each bin
value in A is smaller than the corresponding bin value in B.
*/
ListExpr HistHistBoolTypeMap(ListExpr args);
int IsEqualFun(Word* args, Word& result, int message, Word& local,
Supplier s);
int IsLessFun(Word* args, Word& result, int message, Word& local,
Supplier s);
/*
2.4.3 is[_]refinement
Sets the result value to (true, true) if the first given histogram is a
refinement of the second one.
This is the case if all ranges of the first histogram are completely contained
in the ranges of the second histogram.
*/
int IsRefinementFun(Word* args, Word& result, int message, Word& local,
Supplier s);
/*
2.4.4 translate
Transforms the first histogram into the second, provided that the first is a
refinement of the second. I.e., the first histogram will be coarsed: If in the
first histogram bin 1 = 200.0 and bin 2 = 100.0 and their ranges get merged to
one common range, its bin value will be 300.0.
*/
ListExpr TranslateTypeMap(ListExpr args);
int TranslateSelect(ListExpr args);
int TranslateFun(Word* args, Word& result, int message, Word& local,
Supplier s);
/*
2.4.5 use
Transforms the given histogram by recalculating all its bins as defined by
the given parameter function with at least one bin value parameter.
This allows f. ex. to scale the histogram.
*/
ListExpr UseTypeMap(ListExpr args);
int UseFun(Word* args, Word& result, int message, Word& local, Supplier s);
/*
2.4.6 use2
Works like "use" but accepts two histograms of the same type, which need to
have the same ranges or one to be a refinement of the other. If the latter is
the case, "use2" first uses "translate" to coarsen the finer one. If that is
not possible, the operator returns "UNDEF".
The parameter function expects at least two bin values, one for each histogram.
The operator allows thus f. ex. to add two histograms by adding their
corresponding bin values.
*/
ListExpr Use2TypeMap(ListExpr args);
int Use2Fun(Word* args, Word& result, int message, Word& local, Supplier s);
/*
2.4.7 fold
Expects three parameters: a histogram, a parameter function and a start value.
The parameter function accepts two real values, representing an intermediate
value and a bin value.
If the given histogram is not defined, the operator returns UNDEF. Otherwise it
assigns the start value to the first function parameter and the content of the
first bin the second function parameter, executes the function, assigns its
result to the first function parameter and the content of the next bin to the
second, thus taking all bin values into account. The return value is the result
of the last function call.
*/
ListExpr FoldTypeMap(ListExpr args);
int FoldFun(Word* args, Word& result, int message, Word& local, Supplier s);
/*
2.4.8 distance
Calculates the distance between two histograms of equal type.
If the two histograms are equal, the distance is 0.0. If one is a refinement
of the other, it is first "translated" (coarsened) to make it comparable to
the other. If none of the histograms is a refinement of the other, the result
is UNDEF.
The distance is calculated by summing up the squared differences between
corresponding bins.
Example:
hist1 = ((0.0 1.0 2.0)(5.0 6.0))
hist2 = ((0.0 1.0 2.0)(5.2 6.1))
result = (0.2 x 0.2) + (0.1 x 0.1) = 0.04 + 0.01 = 0.05
*/
ListExpr DistanceTypeMap(ListExpr args);
int DistanceFun(Word* args, Word& result, int message, Word& local,
Supplier s);
/*
2.4.9 The type mapping function of findbin, findbinX and findbinY
*/
template<bool histogram1d>
ListExpr FindBinTypeMap(ListExpr args);
/*
2.4.10 find[_]minbin, find[_]maxbin
Expects a histogram and returns a stream of integer values (histogram1d) or
of couples of integer values (histogram2d) that designates the index numbers
of all bins having minimum resp. maximum values.
*/
ListExpr FindMinMaxBinTypeMap(ListExpr args);
int FindMinMaxBinSelect(ListExpr args);
/*
3.5 Helper functions and classes for operators
3.5.1 GetResultTuple2d()
Returns the ListExpr: (tuple ((X int)(Y int)))
It is used by the operators find[_]minbin and find[_]maxbin
to construct the resultstream.
*/
ListExpr GetResultTuple2d();
/*
3.5.2 GetResultTupleTypeInfo2d()
Returns the numeric type of the ListExpr: (tuple ((X int)(Y int)))
using the class SecondoCatalog.
It is used by the operators find[_]minbin and find[_]maxbin
to construct the resultstream.
*/
ListExpr GetResultTupleTypeInfo2d();
/*
3.5.3 class MinMaxBuffer
Represents the local storage of the operators find[_]minbin and find[_]maxbin.
*/
class MinMaxBuffer
{
public:
MinMaxBuffer(const CcInt& _index,
const HIST_REAL& _value,
const ListExpr& _tupleTypeInfo = 0) :
index(_index), value(_value), tupleTypeInfo(_tupleTypeInfo)
{
}
CcInt index;
const HIST_REAL value;
const ListExpr tupleTypeInfo;
};
/*
4 Info structs
4.1 is[_]undefinedInfo
*/
struct is_undefinedInfo : OperatorInfo {
// constructor
is_undefinedInfo() : OperatorInfo() {
name = "is_undefined";
signature = "histogram1d -> bool";
appendSignature( "histogram2d -> bool" );
syntax = "is_undefined(_)";
meaning = "Returns TRUE if the histogram is undefined.";
} // is_undefinedInfo() : OperatorInfo() {
}; // struct is_undefinedInfo : OperatorInfo {
/*
4.2 isRefinementInfo
*/
struct IsRefinementInfo : OperatorInfo
{
IsRefinementInfo();
};
/*
4.3 isEqualInfo
*/
struct IsEqualInfo : OperatorInfo {
IsEqualInfo();
};
/*
4.4 isLessInfo
*/
struct IsLessInfo : OperatorInfo {
IsLessInfo();
};
/*
4.5 TranslateInfo
*/
struct TranslateInfo : OperatorInfo {
TranslateInfo();
};
/*
4.6 UseInfo
*/
struct UseInfo : OperatorInfo {
UseInfo();
};
/*
4.7 Use2Info
*/
struct Use2Info : OperatorInfo {
Use2Info();
};
/*
4.8 FoldInfo
*/
struct FoldInfo : OperatorInfo {
FoldInfo();
};
/*
4.9 DistanceInfo
*/
struct DistanceInfo : OperatorInfo {
DistanceInfo();
};
/*
4.6 FindMinBinInfo
*/
struct FindMinBinInfo : OperatorInfo{
FindMinBinInfo();
};
/*
4.7 FindMaxBinInfo
*/
struct FindMaxBinInfo : OperatorInfo{
FindMaxBinInfo();
};
} // namespace hgr
#endif /*HISTOGRAMBASE_H_*/