Files
secondo/bin/UpdateProgressConstants.sec
2026-01-23 17:03:45 +08:00

2034 lines
57 KiB
Plaintext

open database berlintest
delete ProgressConstants
let ProgressConstants = [const rel(tuple([
Algebra: string,
Operator: string,
ConstantName: string,
ConstantValue: real,
Meaning: string,
Meaning2: string
]))
value ()]
# ... csvimport['ProgressConstants.csv', 1, ""] consume
# machine factor, needed as long as not all constants are determined in this way.
delete MF;
let MF = 3.35;
# remove old relations
#delete plzbig;
#delete plzsmall;
#delete plzsmallWidth10;
#delete plzbig_select0
# build some relations
let fifty = thousand feed filter[.No <= 50] consume
let hundred = thousand feed filter[.No <= 100] consume
# plzbig has 2000000 tuples
let plzbig = plz feed head[40000] fifty feed product consume;
# plzbig_select0 has 2000000 tuples, Selectivity of plzbig and plzbig_select0 is 0
let plzbig_select0 = plzbig feed extend[PLZ2: .PLZ + 100000] remove[PLZ] renameattr[PLZ : PLZ2] consume;
# plzssmall has 1000000 tuples
let plzsmall = plzbig feed head[1000000] renameattr[PLZ1 : PLZ, Ort1 : Ort, No1 : No] consume;
# plzsmallWidth10 has 30 attributes and 1000000 tuples
let plzsmallWidth10 =
plzsmall feed extend[
PLZ2: .PLZ1,
Ort2: .Ort1,
No2: .No1,
PLZ3: .PLZ1,
Ort3: .Ort1,
No3: .No1,
PLZ4: .PLZ1,
Ort4: .Ort1,
No4: .No1,
PLZ5: .PLZ1,
Ort5: .Ort1,
No5: .No1,
PLZ6: .PLZ1,
Ort6: .Ort1,
No6: .No1,
PLZ7: .PLZ1,
Ort7: .Ort1,
No7: .No1,
PLZ8: .PLZ1,
Ort8: .Ort1,
No8: .No1,
PLZ9: .PLZ1,
Ort9: .Ort1,
No9: .No1,
PLZ10: .PLZ1,
Ort10: .Ort1,
No10: .No1]
consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uHashJoin;
delete vHashJoin;
delete wHashJoin;
delete t_read;
delete t_write;
delete t_probe;
delete t_hash;
delete t_result;
# gracehashjoin / hybridhashjoin #####################################
#
# Cost formula:
#
# // calculate total time
# pRes->Time = p1.Time + p2.Time
# + p2.Card * vHashJoin // reading stream B into hash table
# + p1.Card * uHashJoin // probing stream A against hash table
# + pRes->Card * wHashJoin; // output of result tuples
#
# double t2 = p1.Card * ( t_probe + t_hash + t_read + t_write );
#
#
#
# const double GraceHashJoinProgressLocalInfo::uHashJoin = 0.023;
# const double GraceHashJoinProgressLocalInfo::vHashJoin = 0.0067;
# const double GraceHashJoinProgressLocalInfo::wHashJoin = 0.0025;
# const double GraceHashJoinProgressLocalInfo::t_read = 0.001090;
# const double GraceHashJoinProgressLocalInfo::t_write = 0.001090;
# const double GraceHashJoinProgressLocalInfo::t_probe = 0.001557;
# const double GraceHashJoinProgressLocalInfo::t_hash = 0.004163;
# const double GraceHashJoinProgressLocalInfo::t_result = 0.0044;
#
#
# The following queries are taken from Sven Jungnickels
# Master thesis. Page 76.
#
#####################################################################
let plz100 = plz feed thousand feed head[100] product
extend[Ran: randint(50000)] sortby[Ran asc] remove[Ran] consume
let plz100Even = plz100 feed extend[PLZE: .PLZ * 2]
project[PLZE, Ort, No] consume
let plz100Odd = plz100 feed extend[PLZO: (.PLZ * 2) + 1]
project[PLZO, Ort, No] consume
##
# t_result
##
query plz100Even feed head[41267*10] {a} plz100Odd feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
query plz100Even feed head[41267*10] {a} plz100Odd feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
query plz100Even feed head[41267*10] {a} plz100Odd feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
query plz100Even feed head[41267*10] {a} plz100Odd feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x3 is the tuplecount of the second query
let tresult = (((x2 - x1) * 1000) / x3) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tresult", tresult, "millisecs per result tuple", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# t_write
##
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed head[41267*10] {a} plz100Even feed
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x3 is the tuplecount of the second query
let twrite = (((x2 - x1) * 1000) / 412670) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "twrite", twrite, "millisecs per written tuple", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# t_read
##
# Assume that twrite = tread
# See Jungnickel - page 78
#
let tread = twrite;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tread", tread, "millisecs per read tuple", ""] consume
##
# t_hash
##
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let thash = ((((x2 - x1) * 1000) / (41267 * 50)) * MF) - twrite;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "thash", thash, "millisecs per hash value written to disk", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# t_probe
##
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let tprobe = ((((x2 - x1) * 1000) / 412670) * MF) - twrite - thash - tread;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tprobe", tprobe, "millisecs per hash table lookup", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# vHashJoin
#
# The following constants are determined by using hashjoin
# See Jungnikel Page 138
##
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
# measure time for processing 2000000 inserts
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count;
query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count;
query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x5 is the size of additinal tuples
let x5 = (plzbig count) - (plzsmall count);
# Calculate time to process one hashtable insert
# x1 - x2 is the time to insert (x5) tuples more
# multiply by 1000 to get the time in milliseconds
let vHashJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "vHashJoin", vHashJoin, "msecs per hash table insert", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# vHashJoin
##
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
# measure time for processing 40000 tuples
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 20000 tuples
query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Calculate time to process one tuple of the right input
# x1 - x2 is the time to insert (20000) tuples more
# multiply by 1000 to get the time in milliseconds
let uHashJoin = (((x1 - x2) * 1000) / 20000) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "uHashJoin", uHashJoin, "msecs per right input tuple", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# wHashJoin
##
# measure time for processing 100000 * 100000 tuples with 6 attributes in result
let x3 = plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 100000 * 100000 tuples with 20 attributes in result
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
# calculate time for processing one attribute
# (x2 - x1) is the time for processing 10 attributes more on 1000000 tuples
#
# x3 is the size of the result relation
# so (14 * x3) is the number of additionally processed attributes
#
# multiply by 1000 to get the time in milliseconds
let wHashJoin = (((x2 - x1) * 1000) / (14 * x3)) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "wHashJoin", wHashJoin, "msecs per attr in result relation", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uSortBy;
delete uMergeJoin;
delete wMergeJoin;
delete xMergeJoin;
delete yMergeJoin;
# mergejoin / sortmergejoin_old #####################################
#
# Cost formula (mergejoin):
# pRes->Time = p1.Time + p2.Time +
# (p1.Card + p2.Card) * uMergeJoin +
# pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin);
#
#
# Cost formular (sortmergejoin_old)
# pRes->Time = p1.Time + p2.Time +
# p1.Card * p1.Size * uSortBy +
# p2.Card * p2.Size * uSortBy +
# (p1.Card * p1.Size + p2.Card * p2.Size) * wMergeJoin +
# pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin);
#
#
#
# const double uSortBy = 0.00043; //millisecs per byte read in sort step
#
# const double uMergeJoin = 0.0008077; //millisecs per tuple read
# //in merge step (merge)
#
# const double wMergeJoin = 0.0001738; //millisecs per byte read in
# //merge step (sortmerge)
#
# const double xMergeJoin = 0.0012058; //millisecs per result tuple in
# //merge step
#
# const double yMergeJoin = 0.0001072; //millisecs per result attribute in
# //merge step
#
#
# The following queries are taken from
# secondo/Algebras/ExtRelation-C++/ConstantsSortmergejoin.txt
#
#####################################################################
let plz10 = plz feed ten feed product extend[Ran: randint(50000)]
sortby[Ran asc] remove[Ran] consume
let plz10Even = plz10 feed extend[PLZE: .PLZ * 2] project[PLZE, Ort, No] consume
let plz10Odd = plz10 feed extend[PLZO: (.PLZ * 2) + 1] project[PLZO, Ort, No] consume
let FirstEven = plz10Even feed head[80] consume
##
# uSortBy (millisecs per byte read in sort step)
##
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz10Even feed count
query plz10Even feed count
query plz10Even feed count
query plz10Even feed count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x4 = (plz10Even count) * (FirstEven count);
# multiply by 1000 to get the time in milliseconds
let uSortBy = (((x1 - x2) * 1000) / x4) * MF;
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "uSortBy", uSortBy, "millisecs per byte read in sort step", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# uMergeJoin (millisecs per tuple read in merge step (merge))
##
let plz50Even = plz10Even feed ten feed filter[.No < 6] {t} product remove[No_t] consume
let plz50EvenSortedPLZE = plz50Even feed sortby[PLZE asc] consume
let plz10EvenSorted = plz10Even feed sortby[PLZE asc] consume
let plz10OddSorted = plz10Odd feed sortby[PLZO asc] consume
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz50EvenSortedPLZE feed count
query plz50EvenSortedPLZE feed count
query plz50EvenSortedPLZE feed count
query plz50EvenSortedPLZE feed count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz10OddSorted feed count
query plz10OddSorted feed count
query plz10OddSorted feed count
query plz10OddSorted feed count
# calculate avg. time to process these tuples
let x3 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x4 = plz50EvenSortedPLZE count;
# multiply by 1000 to get the time in milliseconds
let uMergeJoin = (((x1 - (x2 + x3)) * 1000) / x4) * MF;
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "uMergeJoin", uMergeJoin, "millisecs per tuple read in merge step (merge)", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# wMergeJoin (millisecs per byte read in merge step (sortmerge))
##
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
let plz10Even15Attrs = plz10Even feed extend[
A1: 1,
A2: 1,
A3: 1,
A4: 1,
A5: 1,
A6: 1,
A7: 1,
A8: 1,
A9: 1,
A10: 1,
A11: 1,
A12: 1]
consume
let plz10Odd15Attrs = plz10Odd feed extend[
A1: 1,
A2: 1,
A3: 1,
A4: 1,
A5: 1,
A6: 1,
A7: 1,
A8: 1,
A9: 1,
A10: 1,
A11: 1,
A12: 1]
consume
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = plz10Even15Attrs tuplesize;
let x4 = plz10Even tuplesize;
let x5 = plz10Even count;
# Count all processed bytes
let x6 = x5 * (x3 + x4);
# multiply by 1000 to get the time in milliseconds
let wMergeJoin = (((x2 - x1) * 1000) / x6) * MF;
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "wMergeJoin", wMergeJoin, "millisecs per byte read in merge step (sortmerge)", ""] consume
##
# xMergeJoin (millisecs per result tuple in merge step)
# yMergeJoin (millisecs per result attribute in merge step)
##
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
# Result 0 tuples
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Result Tuples 24879300
let x3 = plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Difference: time for result tuple with 6 attributes
let x4 = ((x2 - x1) / x3);
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
let x5 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Time per tuple
let xMergeJoin = (((x6 - x5 - x2 - x1) * 1000) / x3) * MF;
# Time for additional 12 attributes
let yMergeJoin = (((x6 - x5 - x2 - x1) * 1000) / (x3 * 12)) * MF;
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "xMergeJoin", xMergeJoin, "millisecs per result tuple in merge step", ""] consume
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "yMergeJoin", yMergeJoin, "millisecs per result attribute in merge step", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uSymmJoin;
# symmjoim ##########################################################
#
# Cost formula:
# pRes->Time = p1.Time + p2.Time +
# p1.Card * p2.Card * predCost * uSymmJoin;
#
#
# const double uSymmJoin = 0.2; //millisecs per tuple pair
#
#
#####################################################################
##
# measure time for processing a tuple pair (uSymmJoin)
##
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
# measure time for processing 5000 * 5000 = 25.000.000 Tuples
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 6000 * 6000 = 36.000.000 Tuples
query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x5 is the size of additinal tuples
let x5 = 36000000 - 25000000;
# x1 - x2 is the time to process (x5) tuples more
# multiply by 1000 to get the time in milliseconds
let uSymmJoin = (((x2 - x1) * 1000) / x5) * MF;
query ProgressConstants inserttuple["ExtRelationAlgebra", "symmjoin", "uSymmJoin", uSymmJoin, "millisecs per tuple pair", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uItHashJoin;
delete vItHashJoin;
delete wItHashJoin;
delete xItHashJoin;
delete yItHashJoin;
# ithashjoin #######################################################
#
# Cost formula:
# pRes->Time = p1.Time + p2.Time + tuplesInTupleFile * wItHashJoin + (partitions - 1) * xItHashJoin;
#
# double uItHashJoin = 0.002; //millisecs per insert in hash table
# double vItHashJoin = 0.021; //millisecs for processing a tupe in right stream
# double wItHashJoin = 0.004; //millisecs for writing one byte to tuplefile
# double xItHashJoin = 0.001; //millisecs for reading one byte from tuplefile
# double yItHashJoin = 0.001; //millisecs for creating a attr in result
#
#####################################################################
##
# measure time for one hash table insert (uItHashJoin)
##
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
# measure time for processing 2000000 inserts
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count;
query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count;
query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x5 is the size of additinal tuples
let x5 = (plzbig count) - (plzsmall count);
# Calculate time to process one hashtable insert
# x1 - x2 is the time to insert (x5) tuples more
# multiply by 1000 to get the time in milliseconds
let uItHashJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "uItHashJoin", uItHashJoin, "msecs per hash table insert", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for processing one tuple in right stream, partition = 1 (vItHashJoin)
##
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
# measure time for processing 40000 tuples
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 20000 tuples
query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Calculate time to process one tuple of the right input
# x1 - x2 is the time to process 20.000 tuples more
# multiply by 1000 to get the time in milliseconds
let vItHashJoin = (((x1 - x2) * 1000) / 20000) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "vItHashJoin", vItHashJoin, "msecs per tuple in right input stream (part. 1)", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for reading and writing one byte into TupleFile (wItHashJoin)
#
# Set memory to 128 MB, so itHashJoin runs with 2 partitions and
# has to write the tuples from the right stream to disk
#
##
# we always run 4 times and take the time of the last three, to have consistent
# warm state
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = plzbig_select0 count
let x4 = plzbig_select0 tuplesize;
# Amount of bytes written to Disk
let x5 = x3 * x4;
# Calculate time to read and write one byte to Tuplefile
# x1 - x2 is the time to write and read x5 bytes to Tuplefile
# multiply by 1000 to get the time in milliseconds
let wItHashJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "wItHashJoin", wItHashJoin, "msecs per byte written to TupleFile", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for reading one byte into TupleFile (xItHashJoin)
#
# We run itHashJoin with 2 and 3 partitions
# the run with 3 partitions has to write the tuples and read them twice (80 MB)
# the run with 2 partitons write and read the tuples ony one times (128 MB)
##
# we always run 4 times and take the time of the last three, to have consistent
# warm state
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = plzbig_select0 count
let x4 = plzbig_select0 tuplesize;
# Amount of bytes written to Disk
let x5 = x3 * x4;
# Calculate time to read and write one byte to Tuplefile
# x1 - x2 is the time to write and read x5 bytes to Tuplefile
# multiply by 1000 to get the time in milliseconds
let xItHashJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "xItHashJoin", xItHashJoin, "msecs per byte read from TupleFile", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for processing one result attr (yItHashJoin)
##
# measure time for processing 100000 * 100000 tuples with 6 attributes in result
let x3 = plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 100000 * 100000 tuples with 20 attributes in result
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
# calculate time for processing one attribute
# (x2 - x1) is the time for processing 10 attributes more on 1000000 tuples
#
# x3 is the size of the result relation
# so (14 * x3) is the number of additionally processed attributes
#
# multiply by 1000 to get the time in milliseconds
let yItHashJoin = (((x2 - x1) * 1000) / (14 * x3)) * MF;
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "yItHashJoin", yItHashJoin, "msecs per attr in result relation", ""] consume
#query ProgressConstants feed csvexport['ProgressConstants.csv', FALSE, TRUE] count
#close database
#quit
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uFeedProject;
delete vFeedProject;
delete wFeedProject;
# feedproject #######################################################
#
# Cost formula:
#
# pRes->Time = (fli->total + 1) *
# (uFeedProject
# + fli->argTupleSize * vFeedProject
# + fli->noAttrs * wFeedProject);
#
#
# double uFeedProject = 0.002; //millisecs per tuple
# double vFeedProject = 0.000036; //millisecs per byte input
# double wFeedProject = 0.0018; //millisecs per attr
#
#####################################################################
##
# measure time for processing one tuple (uFeedProject)
##
# measure time for processing 2000000 tuples
query plzbig feedproject[Ort] count;
query plzbig feedproject[Ort] count;
query plzbig feedproject[Ort] count;
query plzbig feedproject[Ort] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 1000000 tuples
query plzsmall feedproject[Ort1] count;
query plzsmall feedproject[Ort1] count;
query plzsmall feedproject[Ort1] count;
query plzsmall feedproject[Ort1] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x5 is the size of additinal tuples
let x5 = (plzbig count) - (plzsmall count);
# Calculate time to process one tuple
# x1 - x2 is the time to process (x5) tuples more
# multiply by 1000 to get the time in milliseconds
let uFeedProject = (((x1 - x2) * 1000) / x5) * MF
query ProgressConstants inserttuple["Relation-C++", "feedproject", "uFeedProject", uFeedProject, "msecs per tuple", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for processing one byte in input (vFeedProject)
#
# Create two relations, each with 2.000.000 tuples and 2 attributes
# The attributes have different sizes. So we can measure the processing
# time per byte.
#
#
##
delete plzbig1;
delete plzbig2;
# plzbig has 2000000 tuples
let plzbig1 = plzbig feedproject[Ort, No] consume;
let plzbig2 = plzbig feedproject[PLZ, No] consume;
# measure time to process two attributes on 20000000 tuples (PLZ1 and Ort1)
query plzbig1 feedproject[Ort, No] count;
query plzbig1 feedproject[Ort, No] count;
query plzbig1 feedproject[Ort, No] count;
query plzbig1 feedproject[Ort, No] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
# measure time to process two attributes on 20000000 tuples (Ort1 and No1)
query plzbig2 feedproject[PLZ, No] count;
query plzbig2 feedproject[PLZ, No] count;
query plzbig2 feedproject[PLZ, No] count;
query plzbig2 feedproject[PLZ, No] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
let x3 = ((plzbig1 tuplesize) - (plzbig2 tuplesize));
let x4 = plzbig2 count;
# calculate the time to process one byte in input
# (x1 - x2) is the time to process a relation with
# 2000000 tuples and two attributes with a different
# attribute size
#
# x3 is the difference in bytes for the attributes in x1 and x2
# x4 is the number of tuples
# so (x3 * x4) is the number of additional bytes processed in x1
#
# multiply by 1000 to get the time in milliseconds
let vFeedProject = (((x1 - x2) * 1000) / (x3 * x4) ) * MF;
query ProgressConstants inserttuple["Relation-C++", "feedproject", "vFeedProject", vFeedProject, "msecs per byte input", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for processing one attr (wFeedProject)
##
# measure time for processing 1000000 tuples with 20 attributes
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 1000000 tuples with 10 attributes
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = plzsmallWidth10 count;
# calculate time for processing one attribute
# (x1 - x2) is the time for processing 10 attributes more on 1000000 tuples
#
# x3 is the size of the relation
# so (10 * x3) is the number of additionally processed attributes
#
# multiply by 1000 to get the time in milliseconds
let wFeedProject = (((x1 - x2) * 1000) / (10 * x3)) * MF;
query ProgressConstants inserttuple["Relation-C++", "feedproject", "wFeedProject", wFeedProject, "msecs per attr", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
# product ###########################################################
#
# Cost formula:
#
# pRes->Time = p1.Time + p2.Time +
# p2.Card * p2.Size * uProduct +
# p1.Card * p2.Card * pRes->Size * vProduct;
#
# Note:
#
# Product will write the right tuple stream to disk,
# if the size of the stream exceeds the memory limit
# of the operator. Otherwise the operator works completely
# in memory.
#
# // millisecs per byte (right input stream) if data is written to disk
# double uProduct = 0.0003;
#
# //millisecs per byte (output stream) if data is read from disk
# double vProduct = 0.000042;
#
#####################################################################
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uProduct;
delete vProduct;
##
# measure time for processing one byte in right stream (uProduct)
#
# we use a small left tuple stream and a big right tuple stream.
# so the right stream must be written to disk
##
# measure time to proces 2000000 tuples (= 50 * 40000)
query fifty feed plz feed head[40000] product count;
query fifty feed plz feed head[40000] product count;
query fifty feed plz feed head[40000] product count;
query fifty feed plz feed head[40000] product count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time to proces 1000000 tuples (= 50 * 20000)
query fifty feed plz feed head[20000] product count;
query fifty feed plz feed head[20000] product count;
query fifty feed plz feed head[20000] product count;
query fifty feed plz feed head[20000] product count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# calculate size (in bytes) of right tuple stream (x1)
let sizex1 = ((plz tuplesize) * 40000);
# calculate size (in bytes) of right tuple stream (x2)
let sizex2 = ((plz tuplesize) * 20000);
# calculate the time to process one byte in right input stream
#
# (x1 - x2) * 1000 is the time (in msec) to process
# 20000 tuples more as right input
#
# (sizex1 - sizex2) is the amount of bytes for 20000 tuples
let uProduct = (((x1 - x2) * 1000) / (sizex1 - sizex2)) * MF;
query ProgressConstants inserttuple["Relation-C++", "product", "uProduct", uProduct, "msecs per byte (right tuple input stream)", ""] consume
# delete variables used before
delete sizex1;
delete sizex2;
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for processing one byte in output stream (vProduct)
#
# assume that the right tuple stream is written to disk and
# must be read for every tuple in the left stream
##
# x5 is the size of the left stream
let x5 = 1000;
# x6 is the size of the right stream
let x6 = 20000;
# measure time to proces 20000000 tuples (= 1000 * 20000) with 30 attributes
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time to proces 20000000 tuples (= 1000 * 20000) with 3 attributes
query plzsmall feed head[x5] plz feed head[x6] product count;
query plzsmall feed head[x5] plz feed head[x6] product count;
query plzsmall feed head[x5] plz feed head[x6] product count;
query plzsmall feed head[x5] plz feed head[x6] product count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# calculate the time to process one byte in output stream
# x1 - x2 is the time to process 27 attributes more on 20000000 tuples
# multiply by 1000 to get the time in milliseconds
#
# additionalSize is the difference in bytes for the attributes in x1 and x2
# for all tuples
#
# (x5 * x6) is the amount of tuples
delete additionalSize;
let additionalSize = ((plzsmallWidth10 tuplesize - plzsmall tuplesize) * (x5 * x6));
let vProduct = (((x1 - x2) * 1000) / (additionalSize)) * MF;
query ProgressConstants inserttuple["Relation-C++", "product", "vProduct", vProduct, "msecs per byte (output stream)", ""] consume
# project ###########################################################
#
# Cost formula:
#
# pRes->Time = p1.Time + p1.Card * (uProject + pli->noAttrs * vProject);
#
# double uProject = 0.00073; //millisecs per tuple
# double vProject = 0.0004; //millisecs per tuple and attribute
#
#####################################################################
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uProject;
delete vProject;
##
# measure time for processing one tuple (uProject)
##
# measure time for processing 2000000 tuples
query plzbig feed project[Ort] count;
query plzbig feed project[Ort] count;
query plzbig feed project[Ort] count;
query plzbig feed project[Ort] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 1000000 tuples
query plzsmall feed project[Ort1] count;
query plzsmall feed project[Ort1] count;
query plzsmall feed project[Ort1] count;
query plzsmall feed project[Ort1] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Calculate the time to process one tuple
# x1 - x2 is the time to process 1000000 tuples
# multiply by 1000 to get the time in milliseconds
let uProject = (((x1 - x2) * 1000) / 1000000) * MF
query ProgressConstants inserttuple["Relation-C++", "project", "uProject", uProject, "msecs per tuple", ""] consume
#####
# delete old variables
delete x1;
delete x2;
##
# measure time for processing one attribute (vProject)
##
# measure time for processing 1000000 tuples with 20 attributes
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 1000000 tuples with 10 attributes
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = plzsmallWidth10 count;
# calculate the time for processing one attribute
# (x1 - x2) is the time for processing 10 attributes more on 1000000 tuples
#
# x3 is the size of the relation plzsmallWidth10
# so (10 * x3) is the number of additionally processed attributes
#
# multiply by 1000 to get the time in milliseconds
let vProject = (((x1 - x2) * 1000) / (10 * x3)) * MF;
query ProgressConstants inserttuple["Relation-C++", "project", "vProject", vProject, "msecs per attribute", ""] consume
###
# Debug
###
#query ProgressConstants;
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
# extendstream #######################################################
#
# Cost formula:
#
# pRes->Time = p1.Time +
# p1.Card * wExtendStream + // time per input tuple without results
# pRes->Card * (uExtendStream + eli->noAttrs * vExtendStream);
# // time per output tuple created
#
#####################################################################
delete uExtendStream;
delete vExtendStream;
delete wExtendStream;
let Trains100 = Trains feed hundred feed product extend[N: randint(999999)] sortby[N asc] remove[N] consume
# has 56200 tuples
let plz50 = plz feed fifty feed product extend[N: randint(999999)] sortby[N asc] remove[N] consume
# has 2063350 tuples
######### wExtendStream: time per tuple read
# measure time for query plz50 feed count
# we always run 4 times and take the time of the last three, to have consistent
# warm state
query plz50 feed count
query plz50 feed count
query plz50 feed count
query plz50 feed count
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time with extendstream on 2063350 tuples added
query plz50 feed extendstream[U: intstream(1, 0)] count
query plz50 feed extendstream[U: intstream(1, 0)] count
query plz50 feed extendstream[U: intstream(1, 0)] count
query plz50 feed extendstream[U: intstream(1, 0)] count
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# get the time difference and divide by 2063350. Yields time in seconds.
# Multiply by 1000 to get the time in milliseconds
let wExtendStream = ((x2 - x1) / 2063350) * 1000.0 * MF
query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "wExtendStream", wExtendStream, "msecs per input tuple", ""] consume
######### vExtendStream: time per attribute returned
query Trains100 feed extendstream[UTrip: units(.Trip)] count
query Trains100 feed extendstream[UTrip: units(.Trip)] count
query Trains100 feed extendstream[UTrip: units(.Trip)] count
query Trains100 feed extendstream[UTrip: units(.Trip)] count
let x3 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Result 5154400
# Version of Trains100 with more attributes:
let Trains100B = Trains100 feed extend[IdB: .Id, LineB: .Line, UpB: .Up, TripB: .Trip, NoB: .No] consume
query Trains100B feed extendstream[UTrip: units(.Trip)] count
query Trains100B feed extendstream[UTrip: units(.Trip)] count
query Trains100B feed extendstream[UTrip: units(.Trip)] count
query Trains100B feed extendstream[UTrip: units(.Trip)] count
let x4 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# the time difference between x3 and x4 is for processing 5 more attributes
# for 5154400 tuples. Hence we have
let vExtendStream = (((x4 -x3) * 1000) / (5 * 5154400)) * MF
query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "vExtendStream", vExtendStream, "msecs per attribute", ""] consume
########### uExtendStream: time per tuple returned
# If we subtract the time for all 10 attributes from x4 (= x4 - 2 * (x4 - x3)) and
# further subtract the time for the empty query "Trains100 feed count", the
# remainder must be the time per result tuple.
query Trains100 feed count
query Trains100 feed count
query Trains100 feed count
query Trains100 feed count
let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x5 = (x4 - (2 * (x4 - x3))) - x6
let uExtendStream = (MF * (x5 * 1000)) / 5154400
query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "uExtendStream", uExtendStream, "msecs per result tuple", ""] consume
#########################################################################
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
# feed ##############################################################
#
# Cost formula:
#
# pRes->Time = p1.Time + p1.Card * (uFeed + p1.noAttrs * vFeed);
#
#####################################################################
delete uFeed;
delete vFeed;
##### vFeed: time per attribute in input tuple
let plz50Width10 =
plz50 feed extend[
PLZ2: .PLZ,
Ort2: .Ort,
No2: .No,
PLZ3: .PLZ,
Ort3: .Ort,
No3: .No,
PLZ4: .PLZ,
Ort4: .Ort,
No4: .No,
PLZ5: .PLZ,
Ort5: .Ort,
No5: .No,
PLZ6: .PLZ,
Ort6: .Ort,
No6: .No,
PLZ7: .PLZ,
Ort7: .Ort,
No7: .No,
PLZ8: .PLZ,
Ort8: .Ort,
No8: .No,
PLZ9: .PLZ,
Ort9: .Ort,
No9: .No,
PLZ10: .PLZ,
Ort10: .Ort,
No10: .No]
consume
# plz50 has 3 attributes
# plz50Width10 has 30 attributes
query plz50 feed count;
query plz50 feed count;
query plz50 feed count;
query plz50 feed count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x1 = 3.6913163333
query plz50Width10 feed count;
query plz50Width10 feed count;
query plz50Width10 feed count;
query plz50Width10 feed count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x2 = 20.4570833333
let x3 = (30 - 3) * (plz50 count)
# query x3 = 55710450
# hence the time per attribute in milliseconds is
let x4 = ((x2 - x1) * 1000) / x3
# query x4 = 0.0003009447
let vFeed = x4 * MF
# query vFeed = 0.0010081649
query ProgressConstants inserttuple["Relation-C++", "feed", "vFeed", vFeed, "msecs per attribute in input tuple", ""] consume
##### uFeed: time per input tuple
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
# plzbig has 2.000.000 tuples
# plzsmall has 1.000.000 tuples
query plzbig feed count;
query plzbig feed count;
query plzbig feed count;
query plzbig feed count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query plzsmall feed count;
query plzsmall feed count;
query plzsmall feed count;
query plzsmall feed count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Time difference for processing 1.000.000 tuples more
let x3 = x1 - x2;
# this is the time for processing one tuple
let x6 = (x3 / 1000000) * 1000
let uFeed = x6 * MF
# query uFeed = 0.002968628
query ProgressConstants inserttuple["Relation-C++", "feed", "uFeed", uFeed, "msecs per input tuple", ""] consume
#####################################################################
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete x10;
delete x11;
delete x12;
# consume ###########################################################
#
# Cost formula:
#
# pRes->Time = p1.Time +
# p1.Card * (uConsume + p1.SizeExt * vConsume
# + (p1.Size - p1.SizeExt) * wConsume);
#
# previous values
# const double uConsume = 0.024; //millisecs per tuple
# const double vConsume = 0.0003; //millisecs per byte in
# // root/extension
# const double wConsume = 0.001338; //millisecs per byte in FLOB
#
#####################################################################
delete uConsume;
delete vConsume;
delete wConsume
##### vConsume: time per byte in root/extension
query plz50 feed head[200000] consume count;
query plz50 feed head[200000] consume count;
query plz50 feed head[200000] consume count;
query plz50 feed head[200000] consume count;
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x1 = 3.8398513333
query plz50Width10 feed head[200000] consume count;
query plz50Width10 feed head[200000] consume count;
query plz50Width10 feed head[200000] consume count;
query plz50Width10 feed head[200000] consume count;
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x2 = 8.2118556667
let x3 = (plz50Width10 tuplesize - plz50 tuplesize) * 200000
# query x3 = 45013347.22659753
# time per byte in msecs
let x4 = ((x2 - x1) / x3) * 1000
# query x4 = 9.71268e-05
let vConsume = x4 * MF
# query vConsume = 0.0003253749
query ProgressConstants inserttuple["Relation-C++", "consume", "vConsume", vConsume, "msecs per byte in root/extension tuple", ""] consume
##### uConsume: time per tuple
query plz50 feed count;
query plz50 feed count;
query plz50 feed count;
query plz50 feed count;
let x5 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x5 = 3.5346763333
query plz50 feed consume count;
query plz50 feed consume count;
query plz50 feed consume count;
query plz50 feed consume count;
let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x6 = 36.8306773333
# the time difference between x6 and x5 is the time used for consume. From
# this we subtract the time corresponding to the number of bytes written
let x10 = (plz50 tuplesize * plz50 count) * ((x2 - x1) / x3)
# query x10 = 5.0116528562 secs
let x11 = (((x6 - x5) - x10) / (plz50 count)) * 1000
# query x11 = 0.013707974
let uConsume = x11 * MF
# query uConsume = 0.0459217129
query ProgressConstants inserttuple["Relation-C++", "consume", "uConsume", uConsume, "msecs per tuple", ""] consume
##### wConsume: time per byte in FLOBs
delete x7;
delete x8;
delete x9;
query Trains100 feed project[Id, Line, Up] consume count;
query Trains100 feed project[Id, Line, Up] consume count;
query Trains100 feed project[Id, Line, Up] consume count;
query Trains100 feed project[Id, Line, Up] consume count;
let x7 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x7 = 1.440998
query Trains100 feed project[Id, Line, Up, Trip] consume count;
query Trains100 feed project[Id, Line, Up, Trip] consume count;
query Trains100 feed project[Id, Line, Up, Trip] consume count;
query Trains100 feed project[Id, Line, Up, Trip] consume count;
let x8 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# query x8 = 25.8285243333
# The difference between the two queries is storing the Trip attribute.
# This attribute has size
# query Trains100 extattrsize[Trip] = 144
# query Trains100 attrsize[Trip] = 13351.0035587189
# we consider the entire time difference as spent on writing FLOB attributes.
# Hence we have the time per byte in milliseconds for writing FLOBs:
let x9 = ((x8 - x7) / (13351 * 56200)) * 1000
# query x9 = 3.25026e-05
let wConsume = x9 * MF
# query wConsume = 0.0001088836
query ProgressConstants inserttuple["Relation-C++", "consume", "wConsume", wConsume, "msecs per FLOB byte written", ""] consume
#####################################################################
## Adding Global consts
query ProgressConstants inserttuple["Global", "TupleFile", "twrite", wItHashJoin, "msecs per byte written to TupleFile", ""] consume
query ProgressConstants inserttuple["Global", "TupleFile", "tread", xItHashJoin, "msecs per byte read from TupleFile", ""] consume
query ProgressConstants inserttuple["Global", "ResultTuple", "attr", yItHashJoin, "msecs per attr in result relation", ""] consume
query ProgressConstants feed csvexport['ProgressConstants.csv', FALSE, TRUE] count
close database
# query ProgressConstants feed filter[.ConstantName = "vExtendStream"]
# ProgressConstants updatedirect[ConstantValue: uExtendStream] consume