Files
secondo/Optimizer/Entropy/old/statistics.pl
2026-01-23 17:03:45 +08:00

555 lines
14 KiB
Prolog

/*
----
This file is part of SECONDO.
Copyright (C) 2004, University in Hagen, Department of Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
1 Information about Selectivity of Predicates
[File ~statistics.pl~]
1.1 Rules about Commutativity of Predicates
*/
commute(X = Y, Y = X).
commute(X < Y, Y > X).
commute(X <= Y, Y >= X).
commute(X > Y, Y < X).
commute(X >= Y, Y <= X).
commute(X # Y, Y # X).
/*
1.2 Hard-Coded Selectivities of Predicates
*/
sel(plz:ort = staedte:sName, 0.0031).
sel(plz:pLZ = (plz:pLZ)+1, 0.00001644).
sel((plz:pLZ)-1 = plz:pLZ, 0.00001644).
sel(plz:pLZ = (plz:pLZ)*5, 0.0000022).
sel(plz:pLZ = plz:pLZ, 0.000146).
sel(plz:pLZ * 3 = plz:pLZ * 3, 0.000146).
sel(plz:pLZ > 40000, 0.55).
sel(plz:pLZ > 50000, 0.48).
sel(plz:pLZ < 60000, 0.64).
sel((plz:pLZ mod 5) = 0, 0.17).
sel(plz:ort contains "burg", 0.060).
sel(plz:ort starts "M", 0.071).
sel(staedte:bev > 500000, 0.21).
sel(staedte:bev > 300000, 0.26).
sel(staedte:bev < 500000, 0.79).
sel(staedte:kennzeichen starts "F", 0.034).
sel(staedte:kennzeichen starts "W", 0.068).
/*
1.3 Determine the Simple Form of Predicates
Simple forms of predicates are stored in predicate ~sel~ or
in predicate ~storedSel~.
---- simple(Term, Rel1, Rel2, Simple) :-
----
The simple form of a term ~Term~ containing attributes of ~Rel1~ and/or ~Rel2~
is ~Simple~.
*/
simple(attr(Var:Attr, 0, _), rel(Rel, Var, _), _, Rel:Attr) :- !.
simple(attr(Attr, 0, _), rel(Rel, *, _), _, Rel:Attr) :- !.
simple(attr(Var:Attr, 1, _), rel(Rel, Var, _), _, Rel:Attr) :- !.
simple(attr(Attr, 1, _), rel(Rel, *, _), _, Rel:Attr) :- !.
simple(attr(Var:Attr, 2, _), _, rel(Rel, Var, _), Rel:Attr) :- !.
simple(attr(Attr, 2, _), _, rel(Rel, *, _), Rel:Attr) :- !.
simple(Term, Rel1, Rel2, Simple) :-
compound(Term),
functor(Term, T, 1), !,
arg(1, Term, Arg1),
simple(Arg1, Rel1, Rel2, Arg1Simple),
functor(Simple, T, 1),
arg(1, Simple, Arg1Simple).
simple(Term, Rel1, Rel2, Simple) :-
compound(Term),
functor(Term, T, 2), !,
arg(1, Term, Arg1),
arg(2, Term, Arg2),
simple(Arg1, Rel1, Rel2, Arg1Simple),
simple(Arg2, Rel1, Rel2, Arg2Simple),
functor(Simple, T, 2),
arg(1, Simple, Arg1Simple),
arg(2, Simple, Arg2Simple).
simple(Term, _, _, Term).
/*
---- simplePred(Pred, Simple) :-
----
The simple form of predicate ~Pred~ is ~Simple~.
*/
simplePred(pr(P, A, B), Simple) :- simple(P, A, B, Simple), !.
simplePred(pr(P, A), Simple) :- simple(P, A, A, Simple), !.
simplePred(X, Y) :-
throw(error_Internal(statistics_simplePred(X,Y):malformedExpression).
/*
1.4 Retrieving, Storing, and Loading Selectivities
Auxiliary predicates for ~selectivity~.
*/
sampleS(rel(Rel, Var, Case), rel(Rel2, Var, Case)) :-
atom_concat(Rel, '_sample_s', Rel2).
sampleJ(rel(Rel, Var, Case), rel(Rel2, Var, Case)) :-
atom_concat(Rel, '_sample_j', Rel2).
sampleNameS(Name, Sample) :-
atom_concat(Name, '_sample_s', Sample).
sampleNameJ(Name, Sample) :-
atom_concat(Name, '_sample_j', Sample).
sampleNameSmall(Name, Small) :-
atom_concat(Name, '_small', Small).
possiblyRename(Rel, Renamed) :-
Rel = rel(_, *, _),
!,
Renamed = feed(Rel).
possiblyRename(Rel, Renamed) :-
Rel = rel(_, Name, _),
Renamed = rename(feed(Rel), Name).
dynamicPossiblyRenameJ(Rel, Renamed) :-
Rel = rel(_, *, _),
!,
sampleSizeJoin(JoinSize),
Renamed = sample(Rel, JoinSize, 0.00001).
dynamicPossiblyRenameJ(Rel, Renamed) :-
Rel = rel(_, Name, _),
sampleSizeJoin(JoinSize),
Renamed = rename(sample(Rel, JoinSize, 0.00001), Name).
dynamicPossiblyRenameS(Rel, Renamed) :-
Rel = rel(_, *, _),
!,
sampleSizeSelection(SelectionSize),
Renamed = sample(Rel, SelectionSize, 0.00001).
dynamicPossiblyRenameS(Rel, Renamed) :-
Rel = rel(_, Name, _),
sampleSizeSelection(SelectionSize),
Renamed = rename(sample(Rel, SelectionSize, 0.00001), Name).
cardQuery(Pred, Rel, Query) :-
sampleS(Rel, RelS),
possiblyRename(RelS, RelQuery),
Query = count(filter(RelQuery, Pred)).
cardQuery(Pred, Rel1, Rel2, Query) :-
sampleJ(Rel1, Rel1S),
sampleJ(Rel2, Rel2S),
possiblyRename(Rel1S, Rel1Query),
possiblyRename(Rel2S, Rel2Query),
transformPred(Pred, t, 1, Pred2),
Query = count(loopsel(Rel1Query, fun([param(t, tuple)],
filter(Rel2Query, Pred2)))).
/*
---- transformPred(Pred, Param, Arg, Pred2) :-
----
~Pred2~ is ~Pred~ transformed such that the attribute X of relation ~ArgNo~ is written
as ``attribute(Param, attrname(X))''
*/
transformPred(attr(Attr, Arg, Case), Param, Arg,
attribute(Param, attrname(attr(Attr, Arg, Case)))) :- !.
transformPred(attr(Attr, Arg, Case), _, _, attr(Attr, Arg, Case)) :- !.
transformPred(Pred, Param, Arg, Pred2) :-
compound(Pred),
functor(Pred, T, 1), !,
arg(1, Pred, Arg1),
transformPred(Arg1, Param, Arg, Arg1T),
functor(Pred2, T, 1),
arg(1, Pred2, Arg1T).
transformPred(Pred, Param, Arg, Pred2) :-
compound(Pred),
functor(Pred, T, 2), !,
arg(1, Pred, Arg1),
arg(2, Pred, Arg2),
transformPred(Arg1, Param, Arg, Arg1T),
transformPred(Arg2, Param, Arg, Arg2T),
functor(Pred2, T, 2),
arg(1, Pred2, Arg1T),
arg(2, Pred2, Arg2T).
transformPred(Pred, _, _, Pred).
% Query = count(filter(product(Rel1Query, Rel2Query), Pred)).
dynamicCardQuery(Pred, Rel, Query) :-
dynamicPossiblyRenameS(Rel, RelQuery),
Query = count(filter(RelQuery, Pred)).
dynamicCardQuery(Pred, Rel1, Rel2, Query) :-
dynamicPossiblyRenameJ(Rel1, Rel1Query),
dynamicPossiblyRenameJ(Rel2, Rel2Query),
%Query = count(filter(product(Rel1Query, Rel2Query), Pred)).
transformPred(Pred, t, 1, Pred2),
Query = count(loopsel(Rel1Query, fun([param(t, tuple)],
filter(Rel2Query, Pred2)))).
sels(Pred, Sel) :-
sel(Pred, Sel),
!.
sels(Pred, Sel) :-
commute(Pred, Pred2),
sel(Pred2, Sel),
!.
sels(Pred, Sel) :-
storedSel(Pred, Sel),
!.
sels(Pred, Sel) :-
commute(Pred, Pred2),
storedSel(Pred2, Sel).
/*
---- selectivity(P, Sel) :-
----
The selectivity of predicate ~P~ is ~Sel~.
If ~selectivity~ is called, it first tries to look up
the selectivity via the predicate ~sel~. If no selectivity
is found, a Secondo query is issued, which determines the
selectivity. The retrieved selectitivity is then stored in
predicate ~storedSel~. This ensures that a selectivity has to
be retrieved only once.
*/
% Selectivities must not be 0
selectivity(pr(Pred, Rel, Rel), Sel) :-
selectivity(pr(Pred, Rel), Sel), !.
selectivity(P, Sel) :-
simplePred(P, PSimple),
sels(PSimple, Sel),
!.
selectivity(pr(Pred, Rel1, Rel2), Sel) :-
set_dynamic_sample(off),
Rel1 = rel(BaseName1, _, _),
sampleNameJ(BaseName1, SampleName1),
card(SampleName1, SampleCard1),
Rel2 = rel(BaseName2, _, _),
sampleNameJ(BaseName2, SampleName2),
card(SampleName2, SampleCard2),
cardQuery(Pred, Rel1, Rel2, Query),
plan_to_atom(Query, QueryAtom1),
atom_concat('query ', QueryAtom1, QueryAtom),
%write('selectivity query : '),
%write(QueryAtom),
get_time(Time1),
secondo(QueryAtom, [int, ResCard]),
get_time(Time2),
Time is Time2 - Time1,
my_convert_time(Time, _, _, _, _, Minute, Sec, MilliSec),
MSs is Minute *60000 + Sec*1000 + MilliSec,
write('Elapsed Time: '),
write(MSs),
write(' ms'),nl,
MSsRes is MSs / (SampleCard1 * SampleCard2),
Sel is (ResCard + 1) / (SampleCard1 * SampleCard2), % must not be 0
write('Predicate Cost: '),
write(MSsRes),
write(' ms'),nl,
write('Selectivity : '),
write(Sel),
nl,
simplePred(pr(Pred, Rel1, Rel2), PSimple),
assert(storedPET(PSimple, MSsRes)),
assert(storedSel(PSimple, Sel)),
!.
selectivity(pr(Pred, Rel), Sel) :-
set_dynamic_sample(off),
Rel = rel(BaseName, _, _),
sampleNameS(BaseName, SampleName),
card(SampleName, SampleCard),
cardQuery(Pred, Rel, Query),
plan_to_atom(Query, QueryAtom1),
atom_concat('query ', QueryAtom1, QueryAtom),
%write('selectivity query : '),
%write(QueryAtom),
get_time(Time1),
secondo(QueryAtom, [int, ResCard]),
get_time(Time2),
Time is Time2 - Time1,
my_convert_time(Time, _, _, _, _, Minute, Sec, MilliSec),
MSs is Minute *60000 + Sec*1000 + MilliSec,
write('Elapsed Time: '),
write(MSs),
write(' ms'),nl,
MSsRes is MSs / SampleCard,
Sel is (ResCard + 1)/ SampleCard, % must not be 0
write('Predicate Cost: '),
write(MSsRes),
write(' ms'),nl,
write('Selectivity : '),
write(Sel),
nl,
simplePred(pr(Pred, Rel), PSimple),
assert(storedPET(PSimple, MSsRes)),
assert(storedSel(PSimple, Sel)),
!.
selectivity(pr(Pred, Rel1, Rel2), Sel) :-
set_dynamic_sample(on),
Rel1 = rel(BaseName1, _, _),
card(BaseName1, Card1),
sampleSizeJoin(JoinSize),
SampleCard1 is min(Card1, max(JoinSize, Card1 * 0.00001)),
Rel2 = rel(BaseName2, _, _),
card(BaseName2, Card2),
SampleCard2 is min(Card2, max(JoinSize, Card2 * 0.00001)),
dynamicCardQuery(Pred, Rel1, Rel2, Query),
plan_to_atom(Query, QueryAtom1),
atom_concat('query ', QueryAtom1, QueryAtom),
%write('selectivity query : '),
%write(QueryAtom),
get_time(Time1),
secondo(QueryAtom, [int, ResCard]),
get_time(Time2),
Time is Time2 - Time1,
my_convert_time(Time, _, _, _, _, Minute, Sec, MilliSec),
MSs is Minute *60000 + Sec*1000 + MilliSec,
write('Elapsed Time: '),
write(MSs),
write(' ms'),nl,
MSsRes is MSs / (SampleCard1 * SampleCard2),
Sel is (ResCard + 1) / (SampleCard1 * SampleCard2), % must not be 0
write('Predicate Cost: '),
write(MSsRes),
write(' ms'),nl,
write('Selectivity : '),
write(Sel),
nl,
simplePred(pr(Pred, Rel1, Rel2), PSimple),
assert(storedSel(PSimple, Sel)),
!.
selectivity(pr(Pred, Rel), Sel) :-
set_dynamic_sample(on),
Rel = rel(BaseName, _, _),
card(BaseName, Card),
sampleSizeSelection(SelectionSize),
SampleCard is min(Card, max(SelectionSize, Card * 0.00001)),
dynamicCardQuery(Pred, Rel, Query),
plan_to_atom(Query, QueryAtom1),
atom_concat('query ', QueryAtom1, QueryAtom),
%write('selectivity query : '),
%write(QueryAtom),
get_time(Time1),
secondo(QueryAtom, [int, ResCard]),
get_time(Time2),
Time is Time2 - Time1,
my_convert_time(Time, _, _, _, _, Minute, Sec, MilliSec),
MSs is Minute *60000 + Sec*1000 + MilliSec,
write('Elapsed Time: '),
write(MSs),
write(' ms'),nl,
MSsRes is MSs / SampleCard,
Sel is (ResCard + 1)/ SampleCard, % must not be 0
write('Predicate Cost: '),
write(MSsRes),
write(' ms'),nl,
write('Selectivity : '),
write(Sel),
nl,
simplePred(pr(Pred, Rel), PSimple),
assert(storedSel(PSimple, Sel)),
!.
selectivity(P, _) :- write('Error in optimizer: cannot find selectivity for '),
simplePred(P, PSimple), write(PSimple), nl, fail.
/*
The selectivities retrieved via Secondo queries can be loaded
(by calling ~readStoredSels~) and stored (by calling
~writeStoredSels~).
*/
readStoredSels :-
retractall(storedSel(_, _)),
[storedSels].
/*
The following functions are auxiliary functions for ~writeStoredSels~. Their
purpose is to convert a list of character codes (e.g. [100, 99, 100]) to
an atom (e.g. "dcd"), which makes the stored selectitivities more
readable.
*/
isIntList([]).
isIntList([X | Rest]) :-
integer(X),
isIntList(Rest).
charListToAtom(CharList, Atom) :-
atom_codes(A, CharList),
my_concat_atom([' "', A, '"'], Atom).
%handle string values in order to avoid problems with starting capital letters
replaceCharList(value_expr(string,InString), value_expr(string,OutString)) :-
term_string(InString,OutString),
!.
%handle text values in order to avoid problems with starting capital letters
replaceCharList(value_expr(text,InText), value_expr(text,OutString)) :-
term_string(InText,OutString),
!.
replaceCharList(InTerm, OutTerm) :-
isIntList(InTerm),
!,
charListToAtom(InTerm, OutTerm).
replaceCharList(InTerm, OutTerm) :-
compound(InTerm),
!,
InTerm =.. TermAsList,
maplist(replaceCharList, TermAsList, OutTermAsList),
OutTerm =.. OutTermAsList.
replaceCharList(X, X).
writeStoredSels :-
open('storedSels.pl', write, FD),
write(FD, '/* Automatically generated file, do not edit by hand. */\n'),
findall(_, writeStoredSel(FD), _),
close(FD).
writeStoredSel(Stream) :-
storedSel(X, Y),
replaceCharList(X, XReplaced),
write(Stream, storedSel(XReplaced, Y)),
write(Stream, '.\n').
:-
dynamic(storedSel/2),
at_halt(writeStoredSels),
readStoredSels.
readStoredPETs :-
retractall(storedPET(_, _)),
[storedPETs].
writeStoredPETs :-
open('storedPETs.pl', write, FD),
write(FD, '/* Automatically generated file, do not edit by hand. */\n'),
findall(_, writeStoredPET(FD), _),
close(FD).
writeStoredPET(Stream) :-
storedPET(X, Y),
replaceCharList(X, XReplaced),
write(Stream, storedPET(XReplaced, Y)),
write(Stream, '.\n').
:-
dynamic(storedPET/2),
at_halt(writeStoredPETs),
readStoredPETs.
writePETs :-
findall(_, writePET, _).
writePET :-
storedPET(X, Y),
replaceCharList(X, XReplaced),
write('Predicate: '),
write(XReplaced),
write(', Cost: '),
write(Y),
write(' ms\n').
/*
1.5 Examples
Example 22:
*/
example24 :- optimize(
select *
from [staedte as s, ten]
where [
s:plz < 1000 * no,
s:plz < 4578]
).
/*
Example 23:
*/
example23 :- optimize(
select *
from [thousand as th, ten]
where [
(th:no mod 10) < 5,
th:no * 100 < 50000,
(th:no mod 7) = no]
).