Files
secondo/Optimizer/LargeQueries/querygenerator.pl

819 lines
20 KiB
Perl
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
----
This file is part of SECONDO.
Copyright (C) 2012, University Hagen, Faculty of Mathematics and
Computer Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [10] title: [{\Large \bf ] [}]
//characters [1] formula: [$] [$]
//[ae] [\"{a}]
//[oe] [\"{o}]
//[ue] [\"{u}]
//[ss] [{\ss}]
//[Ae] [\"{A}]
//[Oe] [\"{O}]
//[Ue] [\"{U}]
//[**] [$**$]
//[star] [$*$]
//[->] [$\rightarrow$]
//[toc] [\tableofcontents]
//[=>] [\verb+=>+]
//[newpage] [\newpage]
//[_] [\_]
[10] Predicates for Automatic Query Generation and Benchmark
By Gero Willmes, January 2012
Implementations for my master thesis
[newpage]
[toc]
[newpage]
*/
/*
1 Predicates for Automatic Query Generation and Benchmark
*/
:- dynamic queryComponent/1.
:- dynamic queryPred/1.
/*
1.1 generateQueryTerm
*/
/*
---- generateQueryTerm(+NumberRels, +Factor, -QueryTerm)
----
Description:
Generates a query term ~QueryTerm~ of type 'select count([star]) from RelList where PredList.'
over ~NumberRels~ relations.
The relations 1..NumberRels must exist in the database and must be named r1...r(NumberRels).
Each relation must have a single attribute of type integer which is named (Relationname)i.
E.g. If ~NumberRels~ = 2, then there must exists two relations 'r1' and 'r2' where
'r1' has the integer attribute 'r1i' and 'r2' has the integer attribute 'r2i'.
The generated predicates of ~PredList~ build a spanning tree over the relations in ~RelList~.
~Factor is not used~
*/
generateQueryTerm(NumberRels, QueryTerm):-
retractall(queryComponent(_)),
retractall(queryPred(_)),
createQueryComponents(NumberRels,_),
joinAllQueryComponents(NumberRels, _),
findall(Pred, queryPred(Pred), PredList),
generateRelNames(NumberRels, RelNames),
reverse(RelNames, ReverseRelNames),
QueryTerm = (select count(*) from ReverseRelNames where PredList).
/*
1.2 generate
*/
/*
---- generate(+DatabaseName, +NumberRelations)
----
Description:
Generates a query over ~NumberRelations~ relations in database ~DatabaseName~ and prints it on the screen
*/
generate(_, X):-
isDatabaseOpen,
generateQueryTerm(X, Query),
nl,nl,write('Query: '),nl, nl, write(Query),nl,nl.
generate(DB, X):-
open database DB,
generateQueryTerm(X, Query),
nl,nl,write('Query: '),nl, nl, write(Query),nl,nl.
/*
1.3 generateRelNames
*/
/*
---- generateRelNames(+Number, -RelNameList)
----
Description:
Generates ~Number~ relation names and returns them in the list ~RelNameList~
*/
generateRelNames(1, [RelName]):-
concatrelname(1, RelName).
generateRelNames(N, [H|Rest]):-
concatrelname(N, H),
Nminus1 is N-1,
generateRelNames(Nminus1, Rest).
/*
1.4 joinAllQueryComponents
*/
/*
---- joinAllQueryComponents(+N, +Factor)
----
Description:
Joins in a loop two randomly chosen query components of type 'queryComponent(relList(RelList))'.
Finally there is a single resulting query component.
~Factor is not used~
*/
joinAllQueryComponents(1, _).
joinAllQueryComponents(N, _):-
findall(queryComponent(relList(X)),queryComponent(relList(X)),
QueryComponentList),
selectRandomListElement(QueryComponentList, QueryComponent1),
select(QueryComponent1, QueryComponentList,
ReducedQueryComponentList),
selectRandomListElement(ReducedQueryComponentList, QueryComponent2),
joinQueryComponents(QueryComponent1, QueryComponent2, _),
Nminus1 is N-1,
joinAllQueryComponents(Nminus1, _).
/*
1.5 joinQueryComponents
*/
/*
---- joinQueryComponents(+QueryComponent1, +QueryComponent2, +Factor)
----
Description:
Joins ~QueryComponent1~ and ~QueryComponent2~ which are of type
'queryComponent(relList(RelList))'.
Joining means generating
a predicate and merging the relation lists of the two components into a single resulting component.
The selectivity of the generated predicate is adjusted in a way that the result size of
this predicate multiplies the cardinality of the larger relation by a random factor between 0,1...10.
Finally the two input components are retracted and the resulting component and the generated predicate
are asserted as global facts.
~Factor is not used~
*/
joinQueryComponents(QueryComponent1, QueryComponent2, _):-
QueryComponent1 = queryComponent(relList(RelList1)),
QueryComponent2 = queryComponent(relList(RelList2)),
append(RelList1, RelList2, RelListNew),
selectRandomListElement(RelList1, R1),
selectRandomListElement(RelList2, R2),
generatePredicate(R1, R2, _, Pred),
assert(queryPred(Pred)),
assert(queryComponent(relList(RelListNew))),
retract(QueryComponent1),
retract(QueryComponent2).
/*
1.6 selectRandomListElement
*/
/*
---- selectRandomListElement(+List, -Element)
----
Description:
Returns a random element ~Element~ of the input ~List~
*/
selectRandomListElement(List, Element) :-
length(List, Length),
%random(+L:int, +U:int, -R:int) is det
%Binds R to a random number in [L,U).
%If L and U are both integers, R is an integer,
%Otherwise, R is a float. Note that U will never be generated.
random(0, Length, Index),
nth0(Index, List, Element).
/*
1.7 concatrelname
*/
/*
---- concatrelname(+N, -RelName)
----
Description:
Concats the name of the relation ~RelName~ from the input number ~N~.
*/
concatrelname(N, RelName):-
number(N),
atom_concat('r', N, RelName).
/*
1.8 getAttrName
*/
/*
---- getAttrName(+RelName, -AttrName)
----
Description:
Concats the attribute name ~AttrName~ from the name of the relation ~RelName~
*/
getAttrName(RelName, AttrName):-
atom_concat(RelName,'i',AttrName).
/*
1.9 generatePredicate
*/
/*
---- generatePredicate(+N1, +N2, +Factor, -Pred)
----
Description:
Generates a join ~Predicate~ for a relation number ~N1~ and ~N2~ .
Input:
~N1~: Number of the first input relation
~N2~: Number of the second input relation
~Factor~: not used
Output:
~Pred~: generated predicate
*/
generatePredicate(N1, N2, _, Pred):-
concatrelname(N1, R1),
concatrelname(N2, R2),
compareRelations(R1, R2, RelSmaller, RelLarger, _, _),
getAttrName(RelLarger, AttrLarger),
getAttrName(RelSmaller, AttrSmaller),
genPred(AttrSmaller, AttrLarger,Pred).
/*
1.10 genPred
*/
/*
---- genPred(+AttrSmaller, +AttrLarger, -Pred)
----
Description:
Generates a join predicate ~Pred~ for the two participating relations represented by the
attribute names ~AttrSmaller~ and ~AttrLarger~.
Input:
~AttrSmaller~: Attribute name of the smaller relation
~AttrLarger~: Attribute name of the larger relation
Output:
~Pred~: generated predicate
*/
genPred(AttrSmaller, AttrLarger,Pred):-
%my_concat_atom([AttrSmaller,'=',AttrLarger], Pred).
Pred = (AttrSmaller=AttrLarger).
list2codes([], "").
list2codes([Atom], Codes) :- atom_codes(Atom, Codes).
list2codes([Atom|ListTail], Codes) :-
atom_codes(Atom, AtomCodes),
append(AtomCodes, ",", AtomCodesWithComma),
append(AtomCodesWithComma, ListTailCodes, Codes),
list2codes(ListTail, ListTailCodes).
/*
1.11 list2string
*/
/*
---- list2string(+List, -String)
----
Description:
Converts an input ~List~ to an output ~String~
*/
list2string(List, String) :-
ground(List),
list2codes(List, Codes),
atom_codes(String1, Codes),
my_concat_atom(['[',String1,']'],String).
/*
1.12 sqlbenchmark
*/
/*
---- sqlbenchmark(+Term, -PlanBuild, -PlanExec)
----
Description:
Executes an sql term ~Term~ and returns the optimization time ~PlanBuild~ in ms
and the execution time ~PlanExec~ in ms.
Input:
~Term~: sql term (e.g. select count([star]) from ... where...)
Output:
~PlanBuild~: Optimization time in ms
~PlanExec~: Execution time in ms
*/
sqlbenchmark(Term, PlanBuild, PlanExec) :- defaultExceptionHandler((
isDatabaseOpen,
getTime( mOptimize(Term, Query, Cost), PlanBuild ),
nl, write('The best plan is: '), nl, nl, write(Query), nl, nl,
write('Estimated Cost: '), write(Cost), nl, nl,
query(Query, PlanExec),
appendToRel('SqlHistory', Term, Query, Cost, PlanBuild, PlanExec)
)).
/*
1.13 testloop
*/
/*
---- testloop(+NumberLoops, +AkkOpt, +AkkExec, -ResultOpt,
-ResultExec, +NumberRels)
----
Description:
Generates Queries and executes them in a loop. Akkkumulates optimization time and execution time.
Input:
~NumberLoops~: Number of test loops
~AkkOpt~: Akkumulator for optimization time (should be initialized wit 0)
~AkkExec~: Akkumulator for execution time (should be initialized wit 0)
~NumberRels~: Number of Relations (for query generation)
Output:
~ResultOpt~: Resulting optimization time in ms
~ResultExec~: Resulting execution time in ms
*/
testloop(0, ResultOpt, ResultExec, ResultOpt, ResultExec, _).
testloop(N, AkkOpt, AkkExec, ResultOpt, ResultExec, NumberRels):-
generateQueryTerm(NumberRels, Query),
nl,write('Query:'),nl,
nl,write(Query),
sqlbenchmark(Query, OptTime, ExecTime),
NewAkkOpt is AkkOpt + OptTime,
NewAkkExec is AkkExec + ExecTime,
/*
sqlbenchmark(Query, OptTime1, ExecTime1),
sqlbenchmark(Query, OptTime2, ExecTime2),
sqlbenchmark(Query, OptTime3, ExecTime3),
sqlbenchmark(Query, OptTime4, ExecTime4),
NewAkkOpt is AkkOpt +
((OptTime1 + OptTime2 + OptTime3 + OptTime4)/4),
NewAkkExec is AkkExec +
((ExecTime1 + ExecTime2 + ExecTime3 + ExecTime4)/4),
*/
NewN is N-1,
dropTempRels,
testloop(NewN,NewAkkOpt, NewAkkExec, ResultOpt,
ResultExec, NumberRels).
/*
1.14 benchmark
*/
/*
---- benchmark(+DBName, +NumberLoops, +NumberRels,
-AverageResultOpt, -AverageResultExec)
----
Description:
Generates in a loop ~NumberLoops~ random queries over ~NumberRels~ relations and returns the average optimization time,
average execution time (and average query processing time as textual output).
Input:
~DBName~: Name of the database
~NumberLoops~: Number of test loops
~NumberRels~: Number of predicates in a single query
Output:
~AverageResultOpt~: Average optimization time,
~AverageResultExec~: Average execution time
Average query processing time
as textual output.
call e.g. benchmark(testdb50, 30, 50).
*/
benchmark(DBName, NumberLoops, NumberRels,
AverageResultOpt, AverageResultExec):-
open database DBName,
deleteAllSamples,
closedb,
open database DBName,
testloop(NumberLoops,0,0,ResultOpt,ResultExec, NumberRels), %!!
AverageResultOpt is ResultOpt/NumberLoops,
AverageResultExec is ResultExec/NumberLoops,
nl,write('DBName: '), write(DBName),
nl,write('Number of Predicates: '), write(NumberRels),
nl,write('Testloops: '), write(NumberLoops),
nl,write('Average Optimization Time: '), write(AverageResultOpt),
nl,write('AVerage Execution Time: '), write(AverageResultExec),
nl,write('AVerage Query Processing Time: '),
AverageQueryProcessingTime is AverageResultOpt + AverageResultExec,
write(AverageQueryProcessingTime),
closedb.
/*
1.15 benchmarkComponentSize
*/
/*
---- benchmarkComponentSize
----
Description:
Returns average optimization time, average execution time and average processing time
of a given query, depending on the component size (maximum number of edges per component)
*/
benchmarkComponentSize:-
setOption(largeQueries(qgd)),
setMaxEdgesPerComponent(10),
benchmark(testdb120A, 1, 120, AvgOpt10, AvgExec10),
setMaxEdgesPerComponent(9),
benchmark(testdb120A, 1, 120, AvgOpt9, AvgExec9),
setMaxEdgesPerComponent(8),
benchmark(testdb120A, 1, 120, AvgOpt8, AvgExec8),
setMaxEdgesPerComponent(7),
benchmark(testdb120A, 1, 120, AvgOpt7, AvgExec7),
setMaxEdgesPerComponent(6),
benchmark(testdb120A, 1, 120, AvgOpt6, AvgExec6),
setMaxEdgesPerComponent(5),
benchmark(testdb120A, 1, 120, AvgOpt5, AvgExec5),
nl,write('10'),
nl,write('Average Opt: '),write(AvgOpt10),
nl,write('Average Exec: '),write(AvgExec10),
Avg10 is AvgOpt10+AvgExec10,
nl,write('Average Proc: '),write(Avg10),nl,
nl,write('9'),
nl,write('Average Opt: '),write(AvgOpt9),
nl,write('Average Exec: '),write(AvgExec9),
Avg9 is AvgOpt9+AvgExec9,
nl,write('Average Proc: '),write(Avg9),nl,
nl,write('8'),
nl,write('Average Opt: '),write(AvgOpt8),
nl,write('Average Exec: '),write(AvgExec8),
Avg8 is AvgOpt8+AvgExec8,
nl,write('Average Proc: '),write(Avg8),nl,
nl,write('7'),
nl,write('Average Opt: '),write(AvgOpt7),
nl,write('Average Exec: '),write(AvgExec7),
Avg7 is AvgOpt7+AvgExec7,
nl,write('Average Proc: '),write(Avg7),nl,
nl,write('6'),
nl,write('Average Opt: '),write(AvgOpt6),
nl,write('Average Exec: '),write(AvgExec6),
Avg6 is AvgOpt6+AvgExec6,
nl,write('Average Proc: '),write(Avg6),nl,
nl,write('5'),
nl,write('Average Opt: '),write(AvgOpt5),
nl,write('Average Exec: '),write(AvgExec5),
Avg5 is AvgOpt5+AvgExec5,
nl,write('Average Proc: '),write(Avg5),nl.
benchmarkComponentSizeSmall :-
setOption(largeQueries(qgdm)),
open database testdb40A,
deleteAllSamples,
generateQueryTerm(40, Query),
nl,write('Query:'),nl,
nl,write(Query),
setMaxEdgesPerComponent(2),
sqlbenchmark(Query, AvgOpt2, AvgExec2),
deleteAllSamples,
setMaxEdgesPerComponent(3),
sqlbenchmark(Query, AvgOpt3, AvgExec3),
deleteAllSamples,
setMaxEdgesPerComponent(4),
sqlbenchmark(Query, AvgOpt4, AvgExec4),
nl,write('2'),
nl,write('Average Opt: '),write(AvgOpt2),
nl,write('Average Exec: '),write(AvgExec2),
Avg2 is AvgOpt2 + AvgExec2,
nl,write('Average Proc: '),write(Avg2),nl,
nl,write('3'),
nl,write('Average Opt: '),write(AvgOpt3),
nl,write('Average Exec: '),write(AvgExec3),
Avg3 is AvgOpt3 + AvgExec3,
nl,write('Average Proc: '),write(Avg3),nl,
nl,write('4'),
nl,write('Average Opt: '),write(AvgOpt4),
nl,write('Average Exec: '),write(AvgExec4),
Avg4 is AvgOpt4 + AvgExec4,
nl,write('Average Proc: '),write(Avg4),nl,
closedb.
/*
1.16 benchmarkStandardOptimizer
*/
/*
---- benchmarkStandardOptimizer
----
Description:
Returns average optimization time
of queries with n predicates (n is varied in the range [1..11]),
Time is average optimization time from 30 random generated queries with simple equi-join predicates.
*/
benchmarkStandardOptimizer:-
delOption(largeQueries(qgd)),
delOption(largeQueries(qgdm)),
delOption(largeQueries(aco)),
open database testdb12A,
deleteAllSamples,
closedb,
benchmark(testdb12A, 30, 2, AvgOpt1, _),
benchmark(testdb12A, 30, 3, AvgOpt2, _),
benchmark(testdb12A, 30, 4, AvgOpt3, _),
benchmark(testdb12A, 30, 5, AvgOpt4, _),
benchmark(testdb12A, 30, 6, AvgOpt5, _),
benchmark(testdb12A, 30, 7, AvgOpt6, _),
benchmark(testdb12A, 30, 8, AvgOpt7, _),
benchmark(testdb12A, 30, 9, AvgOpt8, _),
benchmark(testdb12A, 30, 10, AvgOpt9, _),
benchmark(testdb12A, 30, 11, AvgOpt10, _),
benchmark(testdb12A, 30, 12, AvgOpt11, _),
nl,write('Average Opt: '),nl,
nl,write('1: '), write(AvgOpt1),
nl,write('2: '), write(AvgOpt2),
nl,write('3: '), write(AvgOpt3),
nl,write('4: '), write(AvgOpt4),
nl,write('5: '), write(AvgOpt5),
nl,write('6: '), write(AvgOpt6),
nl,write('7: '), write(AvgOpt7),
nl,write('8: '), write(AvgOpt8),
nl,write('9: '), write(AvgOpt9),
nl,write('10: '), write(AvgOpt10),
nl,write('11: '), write(AvgOpt11),nl.
/*
1.17 deleteAllSamples
*/
/*
---- deleteAllSamples
----
Description:
Deletes all samples from the current opened database
*/
deleteAllSamples :-
retractall(rewriteCache(_, _)),
findall(Rel, ( databaseName(DB),
storedRel(DB, Rel, _),
(atom_concat(_, '_sample_s', Rel);
atom_concat(_, '_sample_j', Rel))
), RelList),
write(RelList),
write_list(['\nINFO:\tRemoving samples...']), nl,
catch(deleteTempRels(RelList), _, true).
dropRelList([]).
dropRelList([H|Tail]):-
drop_Relation(H),
dropRelList(Tail).
compareRelations(RelName1, RelName2, SmallerRel, LargerRel,
CardSmallerRel, CardLargerRel):-
card(RelName1, CardR1),
card(RelName2, CardR2),
CardR1 >= CardR2,
CardSmallerRel is CardR2,
CardLargerRel is CardR1,
SmallerRel = RelName2,
LargerRel = RelName1.
compareRelations(RelName1, RelName2, SmallerRel, LargerRel,
CardSmallerRel, CardLargerRel):-
card(RelName1, CardR1),
card(RelName2, CardR2),
CardR1 < CardR2,
CardSmallerRel is CardR1,
CardLargerRel is CardR2,
SmallerRel = RelName1,
LargerRel = RelName2.
createQueryComponents(1,[queryComponent(relList([1]))]):-
assert(queryComponent(relList([1]))).
createQueryComponents(N,[queryComponent(relList([N]))|RestList]) :-
assert(queryComponent(relList([N]))),
Nminus1 is N-1,
createQueryComponents(Nminus1, RestList).
/*
---- optimalitybenchmark
----
Description:
Compares in a loop of 100 Queries with 10 predicates each
the average optimzation time, execution time and
processing time of the algorithms
STD (standard optimizer), ACO, QGD and QGDM.
*/
optimalitybenchmark:-
delOption(largeQueries(qgd)),
delOption(largeQueries(qgdm)),
delOption(largeQueries(aco)),
benchmark(testdb11,100, 11, OptTimeSTD, ExecTimeSTD),
ProcTimeSTD is OptTimeSTD + ExecTimeSTD,
setOption(largeQueries(aco)),
benchmark(testdb11,100, 11, OptTimeACO, ExecTimeACO),
ProcTimeACO is OptTimeACO + ExecTimeACO,
setOption(largeQueries(qgd)),
benchmark(testdb11,100, 11, OptTimeQGD, ExecTimeQGD),
ProcTimeQGD is OptTimeQGD + ExecTimeQGD,
setOption(largeQueries(qgdm)),
benchmark(testdb11,100, 11, OptTimeQGDM, ExecTimeQGDM),
ProcTimeQGDM is OptTimeQGDM + ExecTimeQGDM,
nl,write('STD:OptTime : '),write(OptTimeSTD),
nl,write('STD:ExecTime: '),write(ExecTimeSTD),
nl,write('STD:ProcTime: '),write(ProcTimeSTD),
nl,write('ACO:OptTime : '),write(OptTimeACO),
nl,write('ACO:ExecTime: '),write(ExecTimeACO),
nl,write('ACO:ProcTime: '),write(ProcTimeACO),
nl,write('QGD:OptTime : '),write(OptTimeQGD),
nl,write('QGD:ExecTime: '),write(ExecTimeQGD),
nl,write('QGD:ProcTime: '),write(ProcTimeQGD),
nl,write('QGDM:OptTime : '),write(OptTimeQGDM),
nl,write('QGDM:ExecTime: '),write(ExecTimeQGDM),
nl,write('QGDM:ProcTime: '),write(ProcTimeQGDM).
/*
---- largeRelationsbenchmark
----
Description:
Compares Queries over 16 relations (15 predicates)
where 50% of the relations are of class XXL (1000000 tuples)
the average optimzation time, execution time and
processing time of the algorithms
ACO, QGD and QGDM.
*/
largeRelationsbenchmark:-
setOption(largeQueries(aco)),
benchmark(testdb16,5, 16, OptTimeACO, ExecTimeACO),
ProcTimeACO is OptTimeACO + ExecTimeACO,
setOption(largeQueries(qgd)),
benchmark(testdb16,5, 16, OptTimeQGD, ExecTimeQGD),
ProcTimeQGD is OptTimeQGD + ExecTimeQGD,
setOption(largeQueries(qgdm)),
benchmark(testdb16,5, 16, OptTimeQGDM, ExecTimeQGDM),
ProcTimeQGDM is OptTimeQGDM + ExecTimeQGDM,
nl,write('ACO:OptTime : '),write(OptTimeACO),
nl,write('ACO:ExecTime: '),write(ExecTimeACO),
nl,write('ACO:ProcTime: '),write(ProcTimeACO),
nl,write('QGD:OptTime : '),write(OptTimeQGD),
nl,write('QGD:ExecTime: '),write(ExecTimeQGD),
nl,write('QGD:ProcTime: '),write(ProcTimeQGD),
nl,write('QGDM:OptTime : '),write(OptTimeQGDM),
nl,write('QGDM:ExecTime: '),write(ExecTimeQGDM),
nl,write('QGDM:ProcTime: '),write(ProcTimeQGDM).