2034 lines
57 KiB
Plaintext
2034 lines
57 KiB
Plaintext
open database berlintest
|
|
|
|
delete ProgressConstants
|
|
|
|
let ProgressConstants = [const rel(tuple([
|
|
Algebra: string,
|
|
Operator: string,
|
|
ConstantName: string,
|
|
ConstantValue: real,
|
|
Meaning: string,
|
|
Meaning2: string
|
|
]))
|
|
value ()]
|
|
|
|
# ... csvimport['ProgressConstants.csv', 1, ""] consume
|
|
|
|
# machine factor, needed as long as not all constants are determined in this way.
|
|
delete MF;
|
|
|
|
let MF = 3.35;
|
|
|
|
# remove old relations
|
|
#delete plzbig;
|
|
#delete plzsmall;
|
|
#delete plzsmallWidth10;
|
|
#delete plzbig_select0
|
|
|
|
# build some relations
|
|
|
|
let fifty = thousand feed filter[.No <= 50] consume
|
|
|
|
let hundred = thousand feed filter[.No <= 100] consume
|
|
|
|
# plzbig has 2000000 tuples
|
|
let plzbig = plz feed head[40000] fifty feed product consume;
|
|
|
|
# plzbig_select0 has 2000000 tuples, Selectivity of plzbig and plzbig_select0 is 0
|
|
let plzbig_select0 = plzbig feed extend[PLZ2: .PLZ + 100000] remove[PLZ] renameattr[PLZ : PLZ2] consume;
|
|
|
|
# plzssmall has 1000000 tuples
|
|
let plzsmall = plzbig feed head[1000000] renameattr[PLZ1 : PLZ, Ort1 : Ort, No1 : No] consume;
|
|
|
|
# plzsmallWidth10 has 30 attributes and 1000000 tuples
|
|
let plzsmallWidth10 =
|
|
plzsmall feed extend[
|
|
PLZ2: .PLZ1,
|
|
Ort2: .Ort1,
|
|
No2: .No1,
|
|
PLZ3: .PLZ1,
|
|
Ort3: .Ort1,
|
|
No3: .No1,
|
|
PLZ4: .PLZ1,
|
|
Ort4: .Ort1,
|
|
No4: .No1,
|
|
PLZ5: .PLZ1,
|
|
Ort5: .Ort1,
|
|
No5: .No1,
|
|
PLZ6: .PLZ1,
|
|
Ort6: .Ort1,
|
|
No6: .No1,
|
|
PLZ7: .PLZ1,
|
|
Ort7: .Ort1,
|
|
No7: .No1,
|
|
PLZ8: .PLZ1,
|
|
Ort8: .Ort1,
|
|
No8: .No1,
|
|
PLZ9: .PLZ1,
|
|
Ort9: .Ort1,
|
|
No9: .No1,
|
|
PLZ10: .PLZ1,
|
|
Ort10: .Ort1,
|
|
No10: .No1]
|
|
consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uHashJoin;
|
|
delete vHashJoin;
|
|
delete wHashJoin;
|
|
delete t_read;
|
|
delete t_write;
|
|
delete t_probe;
|
|
delete t_hash;
|
|
delete t_result;
|
|
|
|
# gracehashjoin / hybridhashjoin #####################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# // calculate total time
|
|
# pRes->Time = p1.Time + p2.Time
|
|
# + p2.Card * vHashJoin // reading stream B into hash table
|
|
# + p1.Card * uHashJoin // probing stream A against hash table
|
|
# + pRes->Card * wHashJoin; // output of result tuples
|
|
#
|
|
# double t2 = p1.Card * ( t_probe + t_hash + t_read + t_write );
|
|
#
|
|
#
|
|
#
|
|
# const double GraceHashJoinProgressLocalInfo::uHashJoin = 0.023;
|
|
# const double GraceHashJoinProgressLocalInfo::vHashJoin = 0.0067;
|
|
# const double GraceHashJoinProgressLocalInfo::wHashJoin = 0.0025;
|
|
# const double GraceHashJoinProgressLocalInfo::t_read = 0.001090;
|
|
# const double GraceHashJoinProgressLocalInfo::t_write = 0.001090;
|
|
# const double GraceHashJoinProgressLocalInfo::t_probe = 0.001557;
|
|
# const double GraceHashJoinProgressLocalInfo::t_hash = 0.004163;
|
|
# const double GraceHashJoinProgressLocalInfo::t_result = 0.0044;
|
|
#
|
|
#
|
|
# The following queries are taken from Sven Jungnickels
|
|
# Master thesis. Page 76.
|
|
#
|
|
#####################################################################
|
|
|
|
let plz100 = plz feed thousand feed head[100] product
|
|
extend[Ran: randint(50000)] sortby[Ran asc] remove[Ran] consume
|
|
|
|
let plz100Even = plz100 feed extend[PLZE: .PLZ * 2]
|
|
project[PLZE, Ort, No] consume
|
|
|
|
let plz100Odd = plz100 feed extend[PLZO: (.PLZ * 2) + 1]
|
|
project[PLZO, Ort, No] consume
|
|
|
|
##
|
|
# t_result
|
|
##
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Odd feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Odd feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Odd feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Odd feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x3 = plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# x3 is the tuplecount of the second query
|
|
let tresult = (((x2 - x1) * 1000) / x3) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tresult", tresult, "millisecs per result tuple", ""] consume
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# t_write
|
|
##
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed
|
|
head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
|
|
|
|
query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# x3 is the tuplecount of the second query
|
|
let twrite = (((x2 - x1) * 1000) / 412670) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "twrite", twrite, "millisecs per written tuple", ""] consume
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
##
|
|
# t_read
|
|
##
|
|
|
|
|
|
# Assume that twrite = tread
|
|
# See Jungnickel - page 78
|
|
#
|
|
let tread = twrite;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tread", tread, "millisecs per read tuple", ""] consume
|
|
|
|
##
|
|
# t_hash
|
|
##
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*50] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
query plz100Even feed {a} plz100Even feed head[41267*100] {b}
|
|
hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
let thash = ((((x2 - x1) * 1000) / (41267 * 50)) * MF) - twrite;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "thash", thash, "millisecs per hash value written to disk", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# t_probe
|
|
##
|
|
|
|
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b}
|
|
hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
let tprobe = ((((x2 - x1) * 1000) / 412670) * MF) - twrite - thash - tread;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tprobe", tprobe, "millisecs per hash table lookup", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# vHashJoin
|
|
#
|
|
# The following constants are determined by using hashjoin
|
|
# See Jungnikel Page 138
|
|
##
|
|
|
|
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
# measure time for processing 2000000 inserts
|
|
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count;
|
|
|
|
query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count;
|
|
|
|
query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# x5 is the size of additinal tuples
|
|
let x5 = (plzbig count) - (plzsmall count);
|
|
|
|
# Calculate time to process one hashtable insert
|
|
# x1 - x2 is the time to insert (x5) tuples more
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let vHashJoin = (((x1 - x2) * 1000) / x5) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "vHashJoin", vHashJoin, "msecs per hash table insert", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
##
|
|
# vHashJoin
|
|
##
|
|
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# measure time for processing 40000 tuples
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 20000 tuples
|
|
query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# Calculate time to process one tuple of the right input
|
|
# x1 - x2 is the time to insert (20000) tuples more
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uHashJoin = (((x1 - x2) * 1000) / 20000) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "uHashJoin", uHashJoin, "msecs per right input tuple", ""] consume
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
##
|
|
# wHashJoin
|
|
##
|
|
|
|
|
|
# measure time for processing 100000 * 100000 tuples with 6 attributes in result
|
|
let x3 = plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 100000 * 100000 tuples with 20 attributes in result
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
|
|
|
|
|
|
# calculate time for processing one attribute
|
|
# (x2 - x1) is the time for processing 10 attributes more on 1000000 tuples
|
|
#
|
|
# x3 is the size of the result relation
|
|
# so (14 * x3) is the number of additionally processed attributes
|
|
#
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let wHashJoin = (((x2 - x1) * 1000) / (14 * x3)) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "wHashJoin", wHashJoin, "msecs per attr in result relation", ""] consume
|
|
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uSortBy;
|
|
delete uMergeJoin;
|
|
delete wMergeJoin;
|
|
delete xMergeJoin;
|
|
delete yMergeJoin;
|
|
|
|
|
|
# mergejoin / sortmergejoin_old #####################################
|
|
#
|
|
# Cost formula (mergejoin):
|
|
# pRes->Time = p1.Time + p2.Time +
|
|
# (p1.Card + p2.Card) * uMergeJoin +
|
|
# pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin);
|
|
#
|
|
#
|
|
# Cost formular (sortmergejoin_old)
|
|
# pRes->Time = p1.Time + p2.Time +
|
|
# p1.Card * p1.Size * uSortBy +
|
|
# p2.Card * p2.Size * uSortBy +
|
|
# (p1.Card * p1.Size + p2.Card * p2.Size) * wMergeJoin +
|
|
# pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin);
|
|
#
|
|
#
|
|
#
|
|
# const double uSortBy = 0.00043; //millisecs per byte read in sort step
|
|
#
|
|
# const double uMergeJoin = 0.0008077; //millisecs per tuple read
|
|
# //in merge step (merge)
|
|
#
|
|
# const double wMergeJoin = 0.0001738; //millisecs per byte read in
|
|
# //merge step (sortmerge)
|
|
#
|
|
# const double xMergeJoin = 0.0012058; //millisecs per result tuple in
|
|
# //merge step
|
|
#
|
|
# const double yMergeJoin = 0.0001072; //millisecs per result attribute in
|
|
# //merge step
|
|
#
|
|
#
|
|
# The following queries are taken from
|
|
# secondo/Algebras/ExtRelation-C++/ConstantsSortmergejoin.txt
|
|
#
|
|
#####################################################################
|
|
|
|
let plz10 = plz feed ten feed product extend[Ran: randint(50000)]
|
|
sortby[Ran asc] remove[Ran] consume
|
|
|
|
let plz10Even = plz10 feed extend[PLZE: .PLZ * 2] project[PLZE, Ort, No] consume
|
|
|
|
let plz10Odd = plz10 feed extend[PLZO: (.PLZ * 2) + 1] project[PLZO, Ort, No] consume
|
|
|
|
let FirstEven = plz10Even feed head[80] consume
|
|
|
|
##
|
|
# uSortBy (millisecs per byte read in sort step)
|
|
##
|
|
|
|
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
query plz10Even feed count
|
|
|
|
query plz10Even feed count
|
|
|
|
query plz10Even feed count
|
|
|
|
query plz10Even feed count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x4 = (plz10Even count) * (FirstEven count);
|
|
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uSortBy = (((x1 - x2) * 1000) / x4) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "uSortBy", uSortBy, "millisecs per byte read in sort step", ""] consume
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
|
|
##
|
|
# uMergeJoin (millisecs per tuple read in merge step (merge))
|
|
##
|
|
|
|
let plz50Even = plz10Even feed ten feed filter[.No < 6] {t} product remove[No_t] consume
|
|
|
|
let plz50EvenSortedPLZE = plz50Even feed sortby[PLZE asc] consume
|
|
|
|
let plz10EvenSorted = plz10Even feed sortby[PLZE asc] consume
|
|
|
|
let plz10OddSorted = plz10Odd feed sortby[PLZO asc] consume
|
|
|
|
|
|
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
|
|
|
|
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
|
|
|
|
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
|
|
|
|
query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plz50EvenSortedPLZE feed count
|
|
|
|
query plz50EvenSortedPLZE feed count
|
|
|
|
query plz50EvenSortedPLZE feed count
|
|
|
|
query plz50EvenSortedPLZE feed count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plz10OddSorted feed count
|
|
|
|
query plz10OddSorted feed count
|
|
|
|
query plz10OddSorted feed count
|
|
|
|
query plz10OddSorted feed count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x3 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
let x4 = plz50EvenSortedPLZE count;
|
|
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uMergeJoin = (((x1 - (x2 + x3)) * 1000) / x4) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "uMergeJoin", uMergeJoin, "millisecs per tuple read in merge step (merge)", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# wMergeJoin (millisecs per byte read in merge step (sortmerge))
|
|
##
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
let plz10Even15Attrs = plz10Even feed extend[
|
|
A1: 1,
|
|
A2: 1,
|
|
A3: 1,
|
|
A4: 1,
|
|
A5: 1,
|
|
A6: 1,
|
|
A7: 1,
|
|
A8: 1,
|
|
A9: 1,
|
|
A10: 1,
|
|
A11: 1,
|
|
A12: 1]
|
|
consume
|
|
|
|
|
|
let plz10Odd15Attrs = plz10Odd feed extend[
|
|
A1: 1,
|
|
A2: 1,
|
|
A3: 1,
|
|
A4: 1,
|
|
A5: 1,
|
|
A6: 1,
|
|
A7: 1,
|
|
A8: 1,
|
|
A9: 1,
|
|
A10: 1,
|
|
A11: 1,
|
|
A12: 1]
|
|
consume
|
|
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
let x3 = plz10Even15Attrs tuplesize;
|
|
|
|
let x4 = plz10Even tuplesize;
|
|
|
|
let x5 = plz10Even count;
|
|
|
|
|
|
# Count all processed bytes
|
|
let x6 = x5 * (x3 + x4);
|
|
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let wMergeJoin = (((x2 - x1) * 1000) / x6) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "wMergeJoin", wMergeJoin, "millisecs per byte read in merge step (sortmerge)", ""] consume
|
|
|
|
|
|
|
|
##
|
|
# xMergeJoin (millisecs per result tuple in merge step)
|
|
# yMergeJoin (millisecs per result attribute in merge step)
|
|
##
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
# Result 0 tuples
|
|
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# Result Tuples 24879300
|
|
let x3 = plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# Difference: time for result tuple with 6 attributes
|
|
let x4 = ((x2 - x1) / x3);
|
|
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count
|
|
|
|
let x5 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count
|
|
|
|
let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# Time per tuple
|
|
let xMergeJoin = (((x6 - x5 - x2 - x1) * 1000) / x3) * MF;
|
|
|
|
# Time for additional 12 attributes
|
|
let yMergeJoin = (((x6 - x5 - x2 - x1) * 1000) / (x3 * 12)) * MF;
|
|
|
|
|
|
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "xMergeJoin", xMergeJoin, "millisecs per result tuple in merge step", ""] consume
|
|
|
|
query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "yMergeJoin", yMergeJoin, "millisecs per result attribute in merge step", ""] consume
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uSymmJoin;
|
|
|
|
|
|
# symmjoim ##########################################################
|
|
#
|
|
# Cost formula:
|
|
# pRes->Time = p1.Time + p2.Time +
|
|
# p1.Card * p2.Card * predCost * uSymmJoin;
|
|
#
|
|
#
|
|
# const double uSymmJoin = 0.2; //millisecs per tuple pair
|
|
#
|
|
#
|
|
#####################################################################
|
|
|
|
##
|
|
# measure time for processing a tuple pair (uSymmJoin)
|
|
##
|
|
|
|
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
# measure time for processing 5000 * 5000 = 25.000.000 Tuples
|
|
|
|
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 6000 * 6000 = 36.000.000 Tuples
|
|
|
|
query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count;
|
|
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# x5 is the size of additinal tuples
|
|
let x5 = 36000000 - 25000000;
|
|
|
|
# x1 - x2 is the time to process (x5) tuples more
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uSymmJoin = (((x2 - x1) * 1000) / x5) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelationAlgebra", "symmjoin", "uSymmJoin", uSymmJoin, "millisecs per tuple pair", ""] consume
|
|
|
|
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uItHashJoin;
|
|
delete vItHashJoin;
|
|
delete wItHashJoin;
|
|
delete xItHashJoin;
|
|
delete yItHashJoin;
|
|
|
|
# ithashjoin #######################################################
|
|
#
|
|
# Cost formula:
|
|
# pRes->Time = p1.Time + p2.Time + tuplesInTupleFile * wItHashJoin + (partitions - 1) * xItHashJoin;
|
|
#
|
|
# double uItHashJoin = 0.002; //millisecs per insert in hash table
|
|
# double vItHashJoin = 0.021; //millisecs for processing a tupe in right stream
|
|
# double wItHashJoin = 0.004; //millisecs for writing one byte to tuplefile
|
|
# double xItHashJoin = 0.001; //millisecs for reading one byte from tuplefile
|
|
# double yItHashJoin = 0.001; //millisecs for creating a attr in result
|
|
#
|
|
#####################################################################
|
|
|
|
##
|
|
# measure time for one hash table insert (uItHashJoin)
|
|
##
|
|
|
|
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
# measure time for processing 2000000 inserts
|
|
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count;
|
|
|
|
query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count;
|
|
|
|
query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# x5 is the size of additinal tuples
|
|
let x5 = (plzbig count) - (plzsmall count);
|
|
|
|
# Calculate time to process one hashtable insert
|
|
# x1 - x2 is the time to insert (x5) tuples more
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uItHashJoin = (((x1 - x2) * 1000) / x5) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "uItHashJoin", uItHashJoin, "msecs per hash table insert", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
##
|
|
# measure time for processing one tuple in right stream, partition = 1 (vItHashJoin)
|
|
##
|
|
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# measure time for processing 40000 tuples
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 20000 tuples
|
|
query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# Calculate time to process one tuple of the right input
|
|
# x1 - x2 is the time to process 20.000 tuples more
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let vItHashJoin = (((x1 - x2) * 1000) / 20000) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "vItHashJoin", vItHashJoin, "msecs per tuple in right input stream (part. 1)", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
##
|
|
# measure time for reading and writing one byte into TupleFile (wItHashJoin)
|
|
#
|
|
# Set memory to 128 MB, so itHashJoin runs with 2 partitions and
|
|
# has to write the tuples from the right stream to disk
|
|
#
|
|
##
|
|
|
|
# we always run 4 times and take the time of the last three, to have consistent
|
|
# warm state
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x3 = plzbig_select0 count
|
|
|
|
let x4 = plzbig_select0 tuplesize;
|
|
|
|
# Amount of bytes written to Disk
|
|
let x5 = x3 * x4;
|
|
|
|
# Calculate time to read and write one byte to Tuplefile
|
|
# x1 - x2 is the time to write and read x5 bytes to Tuplefile
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let wItHashJoin = (((x1 - x2) * 1000) / x5) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "wItHashJoin", wItHashJoin, "msecs per byte written to TupleFile", ""] consume
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
|
|
##
|
|
# measure time for reading one byte into TupleFile (xItHashJoin)
|
|
#
|
|
# We run itHashJoin with 2 and 3 partitions
|
|
# the run with 3 partitions has to write the tuples and read them twice (80 MB)
|
|
# the run with 2 partitons write and read the tuples ony one times (128 MB)
|
|
##
|
|
|
|
# we always run 4 times and take the time of the last three, to have consistent
|
|
# warm state
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
|
|
|
|
query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x3 = plzbig_select0 count
|
|
|
|
let x4 = plzbig_select0 tuplesize;
|
|
|
|
# Amount of bytes written to Disk
|
|
let x5 = x3 * x4;
|
|
|
|
# Calculate time to read and write one byte to Tuplefile
|
|
# x1 - x2 is the time to write and read x5 bytes to Tuplefile
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let xItHashJoin = (((x1 - x2) * 1000) / x5) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "xItHashJoin", xItHashJoin, "msecs per byte read from TupleFile", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# measure time for processing one result attr (yItHashJoin)
|
|
##
|
|
|
|
# measure time for processing 100000 * 100000 tuples with 6 attributes in result
|
|
let x3 = plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 100000 * 100000 tuples with 20 attributes in result
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count;
|
|
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
|
|
|
|
|
|
# calculate time for processing one attribute
|
|
# (x2 - x1) is the time for processing 10 attributes more on 1000000 tuples
|
|
#
|
|
# x3 is the size of the result relation
|
|
# so (14 * x3) is the number of additionally processed attributes
|
|
#
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let yItHashJoin = (((x2 - x1) * 1000) / (14 * x3)) * MF;
|
|
|
|
query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "yItHashJoin", yItHashJoin, "msecs per attr in result relation", ""] consume
|
|
|
|
|
|
#query ProgressConstants feed csvexport['ProgressConstants.csv', FALSE, TRUE] count
|
|
|
|
#close database
|
|
|
|
#quit
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uFeedProject;
|
|
delete vFeedProject;
|
|
delete wFeedProject;
|
|
|
|
# feedproject #######################################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# pRes->Time = (fli->total + 1) *
|
|
# (uFeedProject
|
|
# + fli->argTupleSize * vFeedProject
|
|
# + fli->noAttrs * wFeedProject);
|
|
#
|
|
#
|
|
# double uFeedProject = 0.002; //millisecs per tuple
|
|
# double vFeedProject = 0.000036; //millisecs per byte input
|
|
# double wFeedProject = 0.0018; //millisecs per attr
|
|
#
|
|
#####################################################################
|
|
|
|
##
|
|
# measure time for processing one tuple (uFeedProject)
|
|
##
|
|
|
|
# measure time for processing 2000000 tuples
|
|
query plzbig feedproject[Ort] count;
|
|
|
|
query plzbig feedproject[Ort] count;
|
|
|
|
query plzbig feedproject[Ort] count;
|
|
|
|
query plzbig feedproject[Ort] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 1000000 tuples
|
|
query plzsmall feedproject[Ort1] count;
|
|
|
|
query plzsmall feedproject[Ort1] count;
|
|
|
|
query plzsmall feedproject[Ort1] count;
|
|
|
|
query plzsmall feedproject[Ort1] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# x5 is the size of additinal tuples
|
|
let x5 = (plzbig count) - (plzsmall count);
|
|
|
|
# Calculate time to process one tuple
|
|
# x1 - x2 is the time to process (x5) tuples more
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uFeedProject = (((x1 - x2) * 1000) / x5) * MF
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "feedproject", "uFeedProject", uFeedProject, "msecs per tuple", ""] consume
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# measure time for processing one byte in input (vFeedProject)
|
|
#
|
|
# Create two relations, each with 2.000.000 tuples and 2 attributes
|
|
# The attributes have different sizes. So we can measure the processing
|
|
# time per byte.
|
|
#
|
|
#
|
|
##
|
|
delete plzbig1;
|
|
delete plzbig2;
|
|
|
|
# plzbig has 2000000 tuples
|
|
let plzbig1 = plzbig feedproject[Ort, No] consume;
|
|
let plzbig2 = plzbig feedproject[PLZ, No] consume;
|
|
|
|
# measure time to process two attributes on 20000000 tuples (PLZ1 and Ort1)
|
|
query plzbig1 feedproject[Ort, No] count;
|
|
|
|
query plzbig1 feedproject[Ort, No] count;
|
|
|
|
query plzbig1 feedproject[Ort, No] count;
|
|
|
|
query plzbig1 feedproject[Ort, No] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
|
|
|
|
|
|
# measure time to process two attributes on 20000000 tuples (Ort1 and No1)
|
|
query plzbig2 feedproject[PLZ, No] count;
|
|
|
|
query plzbig2 feedproject[PLZ, No] count;
|
|
|
|
query plzbig2 feedproject[PLZ, No] count;
|
|
|
|
query plzbig2 feedproject[PLZ, No] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime];
|
|
|
|
let x3 = ((plzbig1 tuplesize) - (plzbig2 tuplesize));
|
|
|
|
let x4 = plzbig2 count;
|
|
|
|
# calculate the time to process one byte in input
|
|
# (x1 - x2) is the time to process a relation with
|
|
# 2000000 tuples and two attributes with a different
|
|
# attribute size
|
|
#
|
|
# x3 is the difference in bytes for the attributes in x1 and x2
|
|
# x4 is the number of tuples
|
|
# so (x3 * x4) is the number of additional bytes processed in x1
|
|
#
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let vFeedProject = (((x1 - x2) * 1000) / (x3 * x4) ) * MF;
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "feedproject", "vFeedProject", vFeedProject, "msecs per byte input", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
|
|
##
|
|
# measure time for processing one attr (wFeedProject)
|
|
##
|
|
|
|
# measure time for processing 1000000 tuples with 20 attributes
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# measure time for processing 1000000 tuples with 10 attributes
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x3 = plzsmallWidth10 count;
|
|
|
|
# calculate time for processing one attribute
|
|
# (x1 - x2) is the time for processing 10 attributes more on 1000000 tuples
|
|
#
|
|
# x3 is the size of the relation
|
|
# so (10 * x3) is the number of additionally processed attributes
|
|
#
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let wFeedProject = (((x1 - x2) * 1000) / (10 * x3)) * MF;
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "feedproject", "wFeedProject", wFeedProject, "msecs per attr", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
# product ###########################################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# pRes->Time = p1.Time + p2.Time +
|
|
# p2.Card * p2.Size * uProduct +
|
|
# p1.Card * p2.Card * pRes->Size * vProduct;
|
|
#
|
|
# Note:
|
|
#
|
|
# Product will write the right tuple stream to disk,
|
|
# if the size of the stream exceeds the memory limit
|
|
# of the operator. Otherwise the operator works completely
|
|
# in memory.
|
|
#
|
|
# // millisecs per byte (right input stream) if data is written to disk
|
|
# double uProduct = 0.0003;
|
|
#
|
|
# //millisecs per byte (output stream) if data is read from disk
|
|
# double vProduct = 0.000042;
|
|
#
|
|
#####################################################################
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uProduct;
|
|
delete vProduct;
|
|
|
|
##
|
|
# measure time for processing one byte in right stream (uProduct)
|
|
#
|
|
# we use a small left tuple stream and a big right tuple stream.
|
|
# so the right stream must be written to disk
|
|
##
|
|
|
|
# measure time to proces 2000000 tuples (= 50 * 40000)
|
|
query fifty feed plz feed head[40000] product count;
|
|
|
|
query fifty feed plz feed head[40000] product count;
|
|
|
|
query fifty feed plz feed head[40000] product count;
|
|
|
|
query fifty feed plz feed head[40000] product count;
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# measure time to proces 1000000 tuples (= 50 * 20000)
|
|
query fifty feed plz feed head[20000] product count;
|
|
|
|
query fifty feed plz feed head[20000] product count;
|
|
|
|
query fifty feed plz feed head[20000] product count;
|
|
|
|
query fifty feed plz feed head[20000] product count;
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# calculate size (in bytes) of right tuple stream (x1)
|
|
let sizex1 = ((plz tuplesize) * 40000);
|
|
|
|
# calculate size (in bytes) of right tuple stream (x2)
|
|
let sizex2 = ((plz tuplesize) * 20000);
|
|
|
|
|
|
# calculate the time to process one byte in right input stream
|
|
#
|
|
# (x1 - x2) * 1000 is the time (in msec) to process
|
|
# 20000 tuples more as right input
|
|
#
|
|
# (sizex1 - sizex2) is the amount of bytes for 20000 tuples
|
|
|
|
let uProduct = (((x1 - x2) * 1000) / (sizex1 - sizex2)) * MF;
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "product", "uProduct", uProduct, "msecs per byte (right tuple input stream)", ""] consume
|
|
|
|
|
|
# delete variables used before
|
|
delete sizex1;
|
|
delete sizex2;
|
|
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
##
|
|
# measure time for processing one byte in output stream (vProduct)
|
|
#
|
|
# assume that the right tuple stream is written to disk and
|
|
# must be read for every tuple in the left stream
|
|
##
|
|
|
|
# x5 is the size of the left stream
|
|
let x5 = 1000;
|
|
|
|
# x6 is the size of the right stream
|
|
let x6 = 20000;
|
|
|
|
# measure time to proces 20000000 tuples (= 1000 * 20000) with 30 attributes
|
|
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
|
|
|
|
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
|
|
|
|
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
|
|
|
|
query plzsmallWidth10 feed head[x5] plz feed head[x6] product count;
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# measure time to proces 20000000 tuples (= 1000 * 20000) with 3 attributes
|
|
query plzsmall feed head[x5] plz feed head[x6] product count;
|
|
|
|
query plzsmall feed head[x5] plz feed head[x6] product count;
|
|
|
|
query plzsmall feed head[x5] plz feed head[x6] product count;
|
|
|
|
query plzsmall feed head[x5] plz feed head[x6] product count;
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# calculate the time to process one byte in output stream
|
|
# x1 - x2 is the time to process 27 attributes more on 20000000 tuples
|
|
# multiply by 1000 to get the time in milliseconds
|
|
#
|
|
# additionalSize is the difference in bytes for the attributes in x1 and x2
|
|
# for all tuples
|
|
#
|
|
# (x5 * x6) is the amount of tuples
|
|
|
|
delete additionalSize;
|
|
let additionalSize = ((plzsmallWidth10 tuplesize - plzsmall tuplesize) * (x5 * x6));
|
|
|
|
let vProduct = (((x1 - x2) * 1000) / (additionalSize)) * MF;
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "product", "vProduct", vProduct, "msecs per byte (output stream)", ""] consume
|
|
|
|
# project ###########################################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# pRes->Time = p1.Time + p1.Card * (uProject + pli->noAttrs * vProject);
|
|
#
|
|
# double uProject = 0.00073; //millisecs per tuple
|
|
# double vProject = 0.0004; //millisecs per tuple and attribute
|
|
#
|
|
#####################################################################
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
delete uProject;
|
|
delete vProject;
|
|
|
|
##
|
|
# measure time for processing one tuple (uProject)
|
|
##
|
|
|
|
# measure time for processing 2000000 tuples
|
|
query plzbig feed project[Ort] count;
|
|
|
|
query plzbig feed project[Ort] count;
|
|
|
|
query plzbig feed project[Ort] count;
|
|
|
|
query plzbig feed project[Ort] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# measure time for processing 1000000 tuples
|
|
query plzsmall feed project[Ort1] count;
|
|
|
|
query plzsmall feed project[Ort1] count;
|
|
|
|
query plzsmall feed project[Ort1] count;
|
|
|
|
query plzsmall feed project[Ort1] count;
|
|
|
|
# calculate avg. time to process these tuples
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# Calculate the time to process one tuple
|
|
# x1 - x2 is the time to process 1000000 tuples
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let uProject = (((x1 - x2) * 1000) / 1000000) * MF
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "project", "uProject", uProject, "msecs per tuple", ""] consume
|
|
|
|
#####
|
|
|
|
# delete old variables
|
|
delete x1;
|
|
delete x2;
|
|
|
|
##
|
|
# measure time for processing one attribute (vProject)
|
|
##
|
|
|
|
# measure time for processing 1000000 tuples with 20 attributes
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count;
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# measure time for processing 1000000 tuples with 10 attributes
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count;
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x3 = plzsmallWidth10 count;
|
|
|
|
# calculate the time for processing one attribute
|
|
# (x1 - x2) is the time for processing 10 attributes more on 1000000 tuples
|
|
#
|
|
# x3 is the size of the relation plzsmallWidth10
|
|
# so (10 * x3) is the number of additionally processed attributes
|
|
#
|
|
# multiply by 1000 to get the time in milliseconds
|
|
let vProject = (((x1 - x2) * 1000) / (10 * x3)) * MF;
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "project", "vProject", vProject, "msecs per attribute", ""] consume
|
|
|
|
###
|
|
# Debug
|
|
###
|
|
|
|
#query ProgressConstants;
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
# extendstream #######################################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# pRes->Time = p1.Time +
|
|
# p1.Card * wExtendStream + // time per input tuple without results
|
|
# pRes->Card * (uExtendStream + eli->noAttrs * vExtendStream);
|
|
# // time per output tuple created
|
|
#
|
|
#####################################################################
|
|
|
|
delete uExtendStream;
|
|
delete vExtendStream;
|
|
delete wExtendStream;
|
|
|
|
let Trains100 = Trains feed hundred feed product extend[N: randint(999999)] sortby[N asc] remove[N] consume
|
|
|
|
# has 56200 tuples
|
|
|
|
let plz50 = plz feed fifty feed product extend[N: randint(999999)] sortby[N asc] remove[N] consume
|
|
|
|
# has 2063350 tuples
|
|
|
|
|
|
######### wExtendStream: time per tuple read
|
|
|
|
# measure time for query plz50 feed count
|
|
# we always run 4 times and take the time of the last three, to have consistent
|
|
# warm state
|
|
|
|
query plz50 feed count
|
|
|
|
query plz50 feed count
|
|
|
|
query plz50 feed count
|
|
|
|
query plz50 feed count
|
|
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# measure time with extendstream on 2063350 tuples added
|
|
|
|
query plz50 feed extendstream[U: intstream(1, 0)] count
|
|
|
|
query plz50 feed extendstream[U: intstream(1, 0)] count
|
|
|
|
query plz50 feed extendstream[U: intstream(1, 0)] count
|
|
|
|
query plz50 feed extendstream[U: intstream(1, 0)] count
|
|
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# get the time difference and divide by 2063350. Yields time in seconds.
|
|
# Multiply by 1000 to get the time in milliseconds
|
|
|
|
let wExtendStream = ((x2 - x1) / 2063350) * 1000.0 * MF
|
|
|
|
query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "wExtendStream", wExtendStream, "msecs per input tuple", ""] consume
|
|
|
|
######### vExtendStream: time per attribute returned
|
|
|
|
query Trains100 feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
query Trains100 feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
query Trains100 feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
query Trains100 feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
let x3 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# Result 5154400
|
|
|
|
|
|
# Version of Trains100 with more attributes:
|
|
|
|
let Trains100B = Trains100 feed extend[IdB: .Id, LineB: .Line, UpB: .Up, TripB: .Trip, NoB: .No] consume
|
|
|
|
query Trains100B feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
query Trains100B feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
query Trains100B feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
query Trains100B feed extendstream[UTrip: units(.Trip)] count
|
|
|
|
let x4 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# the time difference between x3 and x4 is for processing 5 more attributes
|
|
# for 5154400 tuples. Hence we have
|
|
|
|
let vExtendStream = (((x4 -x3) * 1000) / (5 * 5154400)) * MF
|
|
|
|
query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "vExtendStream", vExtendStream, "msecs per attribute", ""] consume
|
|
|
|
########### uExtendStream: time per tuple returned
|
|
|
|
# If we subtract the time for all 10 attributes from x4 (= x4 - 2 * (x4 - x3)) and
|
|
# further subtract the time for the empty query "Trains100 feed count", the
|
|
# remainder must be the time per result tuple.
|
|
|
|
query Trains100 feed count
|
|
|
|
query Trains100 feed count
|
|
|
|
query Trains100 feed count
|
|
|
|
query Trains100 feed count
|
|
|
|
let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
let x5 = (x4 - (2 * (x4 - x3))) - x6
|
|
|
|
let uExtendStream = (MF * (x5 * 1000)) / 5154400
|
|
|
|
query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "uExtendStream", uExtendStream, "msecs per result tuple", ""] consume
|
|
|
|
#########################################################################
|
|
|
|
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
# feed ##############################################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# pRes->Time = p1.Time + p1.Card * (uFeed + p1.noAttrs * vFeed);
|
|
#
|
|
#####################################################################
|
|
|
|
delete uFeed;
|
|
delete vFeed;
|
|
|
|
##### vFeed: time per attribute in input tuple
|
|
|
|
let plz50Width10 =
|
|
plz50 feed extend[
|
|
PLZ2: .PLZ,
|
|
Ort2: .Ort,
|
|
No2: .No,
|
|
PLZ3: .PLZ,
|
|
Ort3: .Ort,
|
|
No3: .No,
|
|
PLZ4: .PLZ,
|
|
Ort4: .Ort,
|
|
No4: .No,
|
|
PLZ5: .PLZ,
|
|
Ort5: .Ort,
|
|
No5: .No,
|
|
PLZ6: .PLZ,
|
|
Ort6: .Ort,
|
|
No6: .No,
|
|
PLZ7: .PLZ,
|
|
Ort7: .Ort,
|
|
No7: .No,
|
|
PLZ8: .PLZ,
|
|
Ort8: .Ort,
|
|
No8: .No,
|
|
PLZ9: .PLZ,
|
|
Ort9: .Ort,
|
|
No9: .No,
|
|
PLZ10: .PLZ,
|
|
Ort10: .Ort,
|
|
No10: .No]
|
|
consume
|
|
|
|
# plz50 has 3 attributes
|
|
|
|
# plz50Width10 has 30 attributes
|
|
|
|
query plz50 feed count;
|
|
query plz50 feed count;
|
|
query plz50 feed count;
|
|
query plz50 feed count;
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x1 = 3.6913163333
|
|
|
|
query plz50Width10 feed count;
|
|
query plz50Width10 feed count;
|
|
query plz50Width10 feed count;
|
|
query plz50Width10 feed count;
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x2 = 20.4570833333
|
|
|
|
let x3 = (30 - 3) * (plz50 count)
|
|
|
|
# query x3 = 55710450
|
|
|
|
# hence the time per attribute in milliseconds is
|
|
|
|
let x4 = ((x2 - x1) * 1000) / x3
|
|
|
|
# query x4 = 0.0003009447
|
|
|
|
let vFeed = x4 * MF
|
|
|
|
# query vFeed = 0.0010081649
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "feed", "vFeed", vFeed, "msecs per attribute in input tuple", ""] consume
|
|
|
|
##### uFeed: time per input tuple
|
|
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
|
|
# plzbig has 2.000.000 tuples
|
|
# plzsmall has 1.000.000 tuples
|
|
query plzbig feed count;
|
|
query plzbig feed count;
|
|
query plzbig feed count;
|
|
query plzbig feed count;
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
query plzsmall feed count;
|
|
query plzsmall feed count;
|
|
query plzsmall feed count;
|
|
query plzsmall feed count;
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# Time difference for processing 1.000.000 tuples more
|
|
let x3 = x1 - x2;
|
|
|
|
# this is the time for processing one tuple
|
|
let x6 = (x3 / 1000000) * 1000
|
|
|
|
let uFeed = x6 * MF
|
|
|
|
# query uFeed = 0.002968628
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "feed", "uFeed", uFeed, "msecs per input tuple", ""] consume
|
|
|
|
#####################################################################
|
|
|
|
|
|
|
|
|
|
|
|
# delete variables used before
|
|
delete x1;
|
|
delete x2;
|
|
delete x3;
|
|
delete x4;
|
|
delete x5;
|
|
delete x6;
|
|
delete x10;
|
|
delete x11;
|
|
delete x12;
|
|
|
|
# consume ###########################################################
|
|
#
|
|
# Cost formula:
|
|
#
|
|
# pRes->Time = p1.Time +
|
|
# p1.Card * (uConsume + p1.SizeExt * vConsume
|
|
# + (p1.Size - p1.SizeExt) * wConsume);
|
|
#
|
|
# previous values
|
|
# const double uConsume = 0.024; //millisecs per tuple
|
|
# const double vConsume = 0.0003; //millisecs per byte in
|
|
# // root/extension
|
|
# const double wConsume = 0.001338; //millisecs per byte in FLOB
|
|
#
|
|
#####################################################################
|
|
|
|
delete uConsume;
|
|
delete vConsume;
|
|
delete wConsume
|
|
|
|
|
|
##### vConsume: time per byte in root/extension
|
|
|
|
query plz50 feed head[200000] consume count;
|
|
query plz50 feed head[200000] consume count;
|
|
query plz50 feed head[200000] consume count;
|
|
query plz50 feed head[200000] consume count;
|
|
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
|
|
# query x1 = 3.8398513333
|
|
|
|
query plz50Width10 feed head[200000] consume count;
|
|
query plz50Width10 feed head[200000] consume count;
|
|
query plz50Width10 feed head[200000] consume count;
|
|
query plz50Width10 feed head[200000] consume count;
|
|
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x2 = 8.2118556667
|
|
|
|
let x3 = (plz50Width10 tuplesize - plz50 tuplesize) * 200000
|
|
|
|
# query x3 = 45013347.22659753
|
|
|
|
# time per byte in msecs
|
|
|
|
let x4 = ((x2 - x1) / x3) * 1000
|
|
|
|
# query x4 = 9.71268e-05
|
|
|
|
let vConsume = x4 * MF
|
|
|
|
# query vConsume = 0.0003253749
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "consume", "vConsume", vConsume, "msecs per byte in root/extension tuple", ""] consume
|
|
|
|
|
|
##### uConsume: time per tuple
|
|
|
|
|
|
query plz50 feed count;
|
|
query plz50 feed count;
|
|
query plz50 feed count;
|
|
query plz50 feed count;
|
|
let x5 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x5 = 3.5346763333
|
|
|
|
query plz50 feed consume count;
|
|
query plz50 feed consume count;
|
|
query plz50 feed consume count;
|
|
query plz50 feed consume count;
|
|
let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x6 = 36.8306773333
|
|
|
|
# the time difference between x6 and x5 is the time used for consume. From
|
|
# this we subtract the time corresponding to the number of bytes written
|
|
|
|
let x10 = (plz50 tuplesize * plz50 count) * ((x2 - x1) / x3)
|
|
|
|
# query x10 = 5.0116528562 secs
|
|
|
|
let x11 = (((x6 - x5) - x10) / (plz50 count)) * 1000
|
|
|
|
# query x11 = 0.013707974
|
|
|
|
let uConsume = x11 * MF
|
|
|
|
# query uConsume = 0.0459217129
|
|
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "consume", "uConsume", uConsume, "msecs per tuple", ""] consume
|
|
|
|
|
|
##### wConsume: time per byte in FLOBs
|
|
|
|
delete x7;
|
|
delete x8;
|
|
delete x9;
|
|
|
|
query Trains100 feed project[Id, Line, Up] consume count;
|
|
query Trains100 feed project[Id, Line, Up] consume count;
|
|
query Trains100 feed project[Id, Line, Up] consume count;
|
|
query Trains100 feed project[Id, Line, Up] consume count;
|
|
let x7 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x7 = 1.440998
|
|
|
|
query Trains100 feed project[Id, Line, Up, Trip] consume count;
|
|
query Trains100 feed project[Id, Line, Up, Trip] consume count;
|
|
query Trains100 feed project[Id, Line, Up, Trip] consume count;
|
|
query Trains100 feed project[Id, Line, Up, Trip] consume count;
|
|
let x8 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
|
|
|
|
# query x8 = 25.8285243333
|
|
|
|
# The difference between the two queries is storing the Trip attribute.
|
|
# This attribute has size
|
|
|
|
# query Trains100 extattrsize[Trip] = 144
|
|
# query Trains100 attrsize[Trip] = 13351.0035587189
|
|
|
|
# we consider the entire time difference as spent on writing FLOB attributes.
|
|
# Hence we have the time per byte in milliseconds for writing FLOBs:
|
|
|
|
let x9 = ((x8 - x7) / (13351 * 56200)) * 1000
|
|
|
|
# query x9 = 3.25026e-05
|
|
|
|
let wConsume = x9 * MF
|
|
|
|
# query wConsume = 0.0001088836
|
|
|
|
query ProgressConstants inserttuple["Relation-C++", "consume", "wConsume", wConsume, "msecs per FLOB byte written", ""] consume
|
|
|
|
#####################################################################
|
|
|
|
## Adding Global consts
|
|
|
|
query ProgressConstants inserttuple["Global", "TupleFile", "twrite", wItHashJoin, "msecs per byte written to TupleFile", ""] consume
|
|
|
|
query ProgressConstants inserttuple["Global", "TupleFile", "tread", xItHashJoin, "msecs per byte read from TupleFile", ""] consume
|
|
|
|
query ProgressConstants inserttuple["Global", "ResultTuple", "attr", yItHashJoin, "msecs per attr in result relation", ""] consume
|
|
|
|
query ProgressConstants feed csvexport['ProgressConstants.csv', FALSE, TRUE] count
|
|
|
|
close database
|
|
|
|
|
|
|
|
# query ProgressConstants feed filter[.ConstantName = "vExtendStream"]
|
|
# ProgressConstants updatedirect[ConstantValue: uExtendStream] consume
|
|
|
|
|
|
|
|
|