open database berlintest delete ProgressConstants let ProgressConstants = [const rel(tuple([ Algebra: string, Operator: string, ConstantName: string, ConstantValue: real, Meaning: string, Meaning2: string ])) value ()] # ... csvimport['ProgressConstants.csv', 1, ""] consume # machine factor, needed as long as not all constants are determined in this way. delete MF; let MF = 3.35; # remove old relations #delete plzbig; #delete plzsmall; #delete plzsmallWidth10; #delete plzbig_select0 # build some relations let fifty = thousand feed filter[.No <= 50] consume let hundred = thousand feed filter[.No <= 100] consume # plzbig has 2000000 tuples let plzbig = plz feed head[40000] fifty feed product consume; # plzbig_select0 has 2000000 tuples, Selectivity of plzbig and plzbig_select0 is 0 let plzbig_select0 = plzbig feed extend[PLZ2: .PLZ + 100000] remove[PLZ] renameattr[PLZ : PLZ2] consume; # plzssmall has 1000000 tuples let plzsmall = plzbig feed head[1000000] renameattr[PLZ1 : PLZ, Ort1 : Ort, No1 : No] consume; # plzsmallWidth10 has 30 attributes and 1000000 tuples let plzsmallWidth10 = plzsmall feed extend[ PLZ2: .PLZ1, Ort2: .Ort1, No2: .No1, PLZ3: .PLZ1, Ort3: .Ort1, No3: .No1, PLZ4: .PLZ1, Ort4: .Ort1, No4: .No1, PLZ5: .PLZ1, Ort5: .Ort1, No5: .No1, PLZ6: .PLZ1, Ort6: .Ort1, No6: .No1, PLZ7: .PLZ1, Ort7: .Ort1, No7: .No1, PLZ8: .PLZ1, Ort8: .Ort1, No8: .No1, PLZ9: .PLZ1, Ort9: .Ort1, No9: .No1, PLZ10: .PLZ1, Ort10: .Ort1, No10: .No1] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uHashJoin; delete vHashJoin; delete wHashJoin; delete t_read; delete t_write; delete t_probe; delete t_hash; delete t_result; # gracehashjoin / hybridhashjoin ##################################### # # Cost formula: # # // calculate total time # pRes->Time = p1.Time + p2.Time # + p2.Card * vHashJoin // reading stream B into hash table # + p1.Card * uHashJoin // probing stream A against hash table # + pRes->Card * wHashJoin; // output of result tuples # # double t2 = p1.Card * ( t_probe + t_hash + t_read + t_write ); # # # # const double GraceHashJoinProgressLocalInfo::uHashJoin = 0.023; # const double GraceHashJoinProgressLocalInfo::vHashJoin = 0.0067; # const double GraceHashJoinProgressLocalInfo::wHashJoin = 0.0025; # const double GraceHashJoinProgressLocalInfo::t_read = 0.001090; # const double GraceHashJoinProgressLocalInfo::t_write = 0.001090; # const double GraceHashJoinProgressLocalInfo::t_probe = 0.001557; # const double GraceHashJoinProgressLocalInfo::t_hash = 0.004163; # const double GraceHashJoinProgressLocalInfo::t_result = 0.0044; # # # The following queries are taken from Sven Jungnickels # Master thesis. Page 76. # ##################################################################### let plz100 = plz feed thousand feed head[100] product extend[Ran: randint(50000)] sortby[Ran asc] remove[Ran] consume let plz100Even = plz100 feed extend[PLZE: .PLZ * 2] project[PLZE, Ort, No] consume let plz100Odd = plz100 feed extend[PLZO: (.PLZ * 2) + 1] project[PLZO, Ort, No] consume ## # t_result ## query plz100Even feed head[41267*10] {a} plz100Odd feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count query plz100Even feed head[41267*10] {a} plz100Odd feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count query plz100Even feed head[41267*10] {a} plz100Odd feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count query plz100Even feed head[41267*10] {a} plz100Odd feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZO_b, 1000] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x3 = plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # x3 is the tuplecount of the second query let tresult = (((x2 - x1) * 1000) / x3) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tresult", tresult, "millisecs per result tuple", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # t_write ## query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count query plz100Even feed head[41267*10] {a} plz100Even feed head[41267*10] {b} hybridhashjoinParam[PLZE_a, PLZE_b, 1000,32,1024*1024,4096] head[1] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # x3 is the tuplecount of the second query let twrite = (((x2 - x1) * 1000) / 412670) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "twrite", twrite, "millisecs per written tuple", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # t_read ## # Assume that twrite = tread # See Jungnickel - page 78 # let tread = twrite; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tread", tread, "millisecs per read tuple", ""] consume ## # t_hash ## query plz100Even feed {a} plz100Even feed head[41267*50] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed {a} plz100Even feed head[41267*50] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed {a} plz100Even feed head[41267*50] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed {a} plz100Even feed head[41267*50] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz100Even feed {a} plz100Even feed head[41267*100] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed {a} plz100Even feed head[41267*100] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed {a} plz100Even feed head[41267*100] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count query plz100Even feed {a} plz100Even feed head[41267*100] {b} hybridhashjoin[PLZE_a, PLZE_b, 1000] head[1] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let thash = ((((x2 - x1) * 1000) / (41267 * 50)) * MF) - twrite; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "thash", thash, "millisecs per hash value written to disk", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # t_probe ## query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count query plz100Even feed head[41267*20] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count query plz100Even feed head[41267*30] {a} plz100Odd feed head[41267*20] {b} hybridhashjoinParam[PLZE_a, PLZO_b, 1000,32,1024*1024,4096] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let tprobe = ((((x2 - x1) * 1000) / 412670) * MF) - twrite - thash - tread; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "tprobe", tprobe, "millisecs per hash table lookup", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # vHashJoin # # The following constants are determined by using hashjoin # See Jungnikel Page 138 ## query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count; # measure time for processing 2000000 inserts query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count; query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count; query plzbig feed {r1} plz feed head[0] {r2} hashjoin[PLZ_r1, PLZ_r2] count; # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count; query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count; query plzsmall feed {r1} plz feed head[0] {r2} hashjoin[PLZ1_r1, PLZ_r2] count; # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # x5 is the size of additinal tuples let x5 = (plzbig count) - (plzsmall count); # Calculate time to process one hashtable insert # x1 - x2 is the time to insert (x5) tuples more # multiply by 1000 to get the time in milliseconds let vHashJoin = (((x1 - x2) * 1000) / x5) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "vHashJoin", vHashJoin, "msecs per hash table insert", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # vHashJoin ## query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count # measure time for processing 40000 tuples query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [40000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 20000 tuples query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [20000] {r2} hashjoin[PLZ1_r1, PLZ_r2] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Calculate time to process one tuple of the right input # x1 - x2 is the time to insert (20000) tuples more # multiply by 1000 to get the time in milliseconds let uHashJoin = (((x1 - x2) * 1000) / 20000) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "uHashJoin", uHashJoin, "msecs per right input tuple", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # wHashJoin ## # measure time for processing 100000 * 100000 tuples with 6 attributes in result let x3 = plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 100000 * 100000 tuples with 20 attributes in result query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count; query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count; query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count; query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} hashjoin[PLZ1_r1, PLZ1_r2] count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]; # calculate time for processing one attribute # (x2 - x1) is the time for processing 10 attributes more on 1000000 tuples # # x3 is the size of the result relation # so (14 * x3) is the number of additionally processed attributes # # multiply by 1000 to get the time in milliseconds let wHashJoin = (((x2 - x1) * 1000) / (14 * x3)) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "gracehashjoin", "wHashJoin", wHashJoin, "msecs per attr in result relation", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uSortBy; delete uMergeJoin; delete wMergeJoin; delete xMergeJoin; delete yMergeJoin; # mergejoin / sortmergejoin_old ##################################### # # Cost formula (mergejoin): # pRes->Time = p1.Time + p2.Time + # (p1.Card + p2.Card) * uMergeJoin + # pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin); # # # Cost formular (sortmergejoin_old) # pRes->Time = p1.Time + p2.Time + # p1.Card * p1.Size * uSortBy + # p2.Card * p2.Size * uSortBy + # (p1.Card * p1.Size + p2.Card * p2.Size) * wMergeJoin + # pRes->Card * (xMergeJoin + pRes->noAttrs * yMergeJoin); # # # # const double uSortBy = 0.00043; //millisecs per byte read in sort step # # const double uMergeJoin = 0.0008077; //millisecs per tuple read # //in merge step (merge) # # const double wMergeJoin = 0.0001738; //millisecs per byte read in # //merge step (sortmerge) # # const double xMergeJoin = 0.0012058; //millisecs per result tuple in # //merge step # # const double yMergeJoin = 0.0001072; //millisecs per result attribute in # //merge step # # # The following queries are taken from # secondo/Algebras/ExtRelation-C++/ConstantsSortmergejoin.txt # ##################################################################### let plz10 = plz feed ten feed product extend[Ran: randint(50000)] sortby[Ran asc] remove[Ran] consume let plz10Even = plz10 feed extend[PLZE: .PLZ * 2] project[PLZE, Ort, No] consume let plz10Odd = plz10 feed extend[PLZO: (.PLZ * 2) + 1] project[PLZO, Ort, No] consume let FirstEven = plz10Even feed head[80] consume ## # uSortBy (millisecs per byte read in sort step) ## query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count query plz10Even feed {p1} FirstEven feed {p2} sortmergejoin[PLZE_p1, PLZE_p2] head[1] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz10Even feed count query plz10Even feed count query plz10Even feed count query plz10Even feed count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x4 = (plz10Even count) * (FirstEven count); # multiply by 1000 to get the time in milliseconds let uSortBy = (((x1 - x2) * 1000) / x4) * MF; query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "uSortBy", uSortBy, "millisecs per byte read in sort step", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # uMergeJoin (millisecs per tuple read in merge step (merge)) ## let plz50Even = plz10Even feed ten feed filter[.No < 6] {t} product remove[No_t] consume let plz50EvenSortedPLZE = plz50Even feed sortby[PLZE asc] consume let plz10EvenSorted = plz10Even feed sortby[PLZE asc] consume let plz10OddSorted = plz10Odd feed sortby[PLZO asc] consume query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count query plz50EvenSortedPLZE feed plz10OddSorted feed renameattr[PLZE: PLZO] concat count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz50EvenSortedPLZE feed count query plz50EvenSortedPLZE feed count query plz50EvenSortedPLZE feed count query plz50EvenSortedPLZE feed count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz10OddSorted feed count query plz10OddSorted feed count query plz10OddSorted feed count query plz10OddSorted feed count # calculate avg. time to process these tuples let x3 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x4 = plz50EvenSortedPLZE count; # multiply by 1000 to get the time in milliseconds let uMergeJoin = (((x1 - (x2 + x3)) * 1000) / x4) * MF; query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "uMergeJoin", uMergeJoin, "millisecs per tuple read in merge step (merge)", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # wMergeJoin (millisecs per byte read in merge step (sortmerge)) ## # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; let plz10Even15Attrs = plz10Even feed extend[ A1: 1, A2: 1, A3: 1, A4: 1, A5: 1, A6: 1, A7: 1, A8: 1, A9: 1, A10: 1, A11: 1, A12: 1] consume let plz10Odd15Attrs = plz10Odd feed extend[ A1: 1, A2: 1, A3: 1, A4: 1, A5: 1, A6: 1, A7: 1, A8: 1, A9: 1, A10: 1, A11: 1, A12: 1] consume query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] head[1] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x3 = plz10Even15Attrs tuplesize; let x4 = plz10Even tuplesize; let x5 = plz10Even count; # Count all processed bytes let x6 = x5 * (x3 + x4); # multiply by 1000 to get the time in milliseconds let wMergeJoin = (((x2 - x1) * 1000) / x6) * MF; query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "wMergeJoin", wMergeJoin, "millisecs per byte read in merge step (sortmerge)", ""] consume ## # xMergeJoin (millisecs per result tuple in merge step) # yMergeJoin (millisecs per result attribute in merge step) ## # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; # Result 0 tuples query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Result Tuples 24879300 let x3 = plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count query plz10Even feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Difference: time for result tuple with 6 attributes let x4 = ((x2 - x1) / x3); query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count query plz10Even15Attrs feed {p1} plz10Odd feed {p2} sortmergejoin_old[PLZE_p1, PLZO_p2] count let x5 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count query plz10Even15Attrs feed {p1} plz10Even feed {p2} sortmergejoin_old[PLZE_p1, PLZE_p2] count let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Time per tuple let xMergeJoin = (((x6 - x5 - x2 - x1) * 1000) / x3) * MF; # Time for additional 12 attributes let yMergeJoin = (((x6 - x5 - x2 - x1) * 1000) / (x3 * 12)) * MF; query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "xMergeJoin", xMergeJoin, "millisecs per result tuple in merge step", ""] consume query ProgressConstants inserttuple["ExtRelationAlgebra", "mergejoin", "yMergeJoin", yMergeJoin, "millisecs per result attribute in merge step", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uSymmJoin; # symmjoim ########################################################## # # Cost formula: # pRes->Time = p1.Time + p2.Time + # p1.Card * p2.Card * predCost * uSymmJoin; # # # const double uSymmJoin = 0.2; //millisecs per tuple pair # # ##################################################################### ## # measure time for processing a tuple pair (uSymmJoin) ## query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; # measure time for processing 5000 * 5000 = 25.000.000 Tuples query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; query plzbig feed head[5000] {r1} plzbig feed head[5000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 6000 * 6000 = 36.000.000 Tuples query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; query plzbig feed head[6000] {r1} plzbig feed head[6000] {r2} symmjoin[.PLZ_r1 = ..PLZ_r2] count; # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # x5 is the size of additinal tuples let x5 = 36000000 - 25000000; # x1 - x2 is the time to process (x5) tuples more # multiply by 1000 to get the time in milliseconds let uSymmJoin = (((x2 - x1) * 1000) / x5) * MF; query ProgressConstants inserttuple["ExtRelationAlgebra", "symmjoin", "uSymmJoin", uSymmJoin, "millisecs per tuple pair", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uItHashJoin; delete vItHashJoin; delete wItHashJoin; delete xItHashJoin; delete yItHashJoin; # ithashjoin ####################################################### # # Cost formula: # pRes->Time = p1.Time + p2.Time + tuplesInTupleFile * wItHashJoin + (partitions - 1) * xItHashJoin; # # double uItHashJoin = 0.002; //millisecs per insert in hash table # double vItHashJoin = 0.021; //millisecs for processing a tupe in right stream # double wItHashJoin = 0.004; //millisecs for writing one byte to tuplefile # double xItHashJoin = 0.001; //millisecs for reading one byte from tuplefile # double yItHashJoin = 0.001; //millisecs for creating a attr in result # ##################################################################### ## # measure time for one hash table insert (uItHashJoin) ## query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count; # measure time for processing 2000000 inserts query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count; query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count; query plzbig feed {r1} plz feed head[0] {r2} itHashJoin[PLZ_r1, PLZ_r2] count; # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count; query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count; query plzsmall feed {r1} plz feed head[0] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count; # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # x5 is the size of additinal tuples let x5 = (plzbig count) - (plzsmall count); # Calculate time to process one hashtable insert # x1 - x2 is the time to insert (x5) tuples more # multiply by 1000 to get the time in milliseconds let uItHashJoin = (((x1 - x2) * 1000) / x5) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "uItHashJoin", uItHashJoin, "msecs per hash table insert", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for processing one tuple in right stream, partition = 1 (vItHashJoin) ## query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count # measure time for processing 40000 tuples query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [40000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 20000 tuples query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig feed head [20000] {r2} itHashJoin[PLZ1_r1, PLZ_r2] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Calculate time to process one tuple of the right input # x1 - x2 is the time to process 20.000 tuples more # multiply by 1000 to get the time in milliseconds let vItHashJoin = (((x1 - x2) * 1000) / 20000) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "vItHashJoin", vItHashJoin, "msecs per tuple in right input stream (part. 1)", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for reading and writing one byte into TupleFile (wItHashJoin) # # Set memory to 128 MB, so itHashJoin runs with 2 partitions and # has to write the tuples from the right stream to disk # ## # we always run 4 times and take the time of the last three, to have consistent # warm state query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x3 = plzbig_select0 count let x4 = plzbig_select0 tuplesize; # Amount of bytes written to Disk let x5 = x3 * x4; # Calculate time to read and write one byte to Tuplefile # x1 - x2 is the time to write and read x5 bytes to Tuplefile # multiply by 1000 to get the time in milliseconds let wItHashJoin = (((x1 - x2) * 1000) / x5) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "wItHashJoin", wItHashJoin, "msecs per byte written to TupleFile", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for reading one byte into TupleFile (xItHashJoin) # # We run itHashJoin with 2 and 3 partitions # the run with 3 partitions has to write the tuples and read them twice (80 MB) # the run with 2 partitons write and read the tuples ony one times (128 MB) ## # we always run 4 times and take the time of the last three, to have consistent # warm state query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 80} count # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count query plzsmall feed {r1} plzbig_select0 feed {r2} itHashJoin[PLZ1_r1, PLZ_r2] {memory 128} count # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x3 = plzbig_select0 count let x4 = plzbig_select0 tuplesize; # Amount of bytes written to Disk let x5 = x3 * x4; # Calculate time to read and write one byte to Tuplefile # x1 - x2 is the time to write and read x5 bytes to Tuplefile # multiply by 1000 to get the time in milliseconds let xItHashJoin = (((x1 - x2) * 1000) / x5) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "xItHashJoin", xItHashJoin, "msecs per byte read from TupleFile", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for processing one result attr (yItHashJoin) ## # measure time for processing 100000 * 100000 tuples with 6 attributes in result let x3 = plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count query plzsmall feed head[100000] {r1} plzsmall feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 100000 * 100000 tuples with 20 attributes in result query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count; query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count; query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count; query plzsmallWidth10 feed head[100000] {r1} plzsmallWidth10 feed head[100000] {r2} itHashJoin[PLZ1_r1, PLZ1_r2] count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]; # calculate time for processing one attribute # (x2 - x1) is the time for processing 10 attributes more on 1000000 tuples # # x3 is the size of the result relation # so (14 * x3) is the number of additionally processed attributes # # multiply by 1000 to get the time in milliseconds let yItHashJoin = (((x2 - x1) * 1000) / (14 * x3)) * MF; query ProgressConstants inserttuple["ExtRelation2Algebra", "itHashJoin", "yItHashJoin", yItHashJoin, "msecs per attr in result relation", ""] consume #query ProgressConstants feed csvexport['ProgressConstants.csv', FALSE, TRUE] count #close database #quit # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uFeedProject; delete vFeedProject; delete wFeedProject; # feedproject ####################################################### # # Cost formula: # # pRes->Time = (fli->total + 1) * # (uFeedProject # + fli->argTupleSize * vFeedProject # + fli->noAttrs * wFeedProject); # # # double uFeedProject = 0.002; //millisecs per tuple # double vFeedProject = 0.000036; //millisecs per byte input # double wFeedProject = 0.0018; //millisecs per attr # ##################################################################### ## # measure time for processing one tuple (uFeedProject) ## # measure time for processing 2000000 tuples query plzbig feedproject[Ort] count; query plzbig feedproject[Ort] count; query plzbig feedproject[Ort] count; query plzbig feedproject[Ort] count; # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 1000000 tuples query plzsmall feedproject[Ort1] count; query plzsmall feedproject[Ort1] count; query plzsmall feedproject[Ort1] count; query plzsmall feedproject[Ort1] count; # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # x5 is the size of additinal tuples let x5 = (plzbig count) - (plzsmall count); # Calculate time to process one tuple # x1 - x2 is the time to process (x5) tuples more # multiply by 1000 to get the time in milliseconds let uFeedProject = (((x1 - x2) * 1000) / x5) * MF query ProgressConstants inserttuple["Relation-C++", "feedproject", "uFeedProject", uFeedProject, "msecs per tuple", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for processing one byte in input (vFeedProject) # # Create two relations, each with 2.000.000 tuples and 2 attributes # The attributes have different sizes. So we can measure the processing # time per byte. # # ## delete plzbig1; delete plzbig2; # plzbig has 2000000 tuples let plzbig1 = plzbig feedproject[Ort, No] consume; let plzbig2 = plzbig feedproject[PLZ, No] consume; # measure time to process two attributes on 20000000 tuples (PLZ1 and Ort1) query plzbig1 feedproject[Ort, No] count; query plzbig1 feedproject[Ort, No] count; query plzbig1 feedproject[Ort, No] count; query plzbig1 feedproject[Ort, No] count; # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]; # measure time to process two attributes on 20000000 tuples (Ort1 and No1) query plzbig2 feedproject[PLZ, No] count; query plzbig2 feedproject[PLZ, No] count; query plzbig2 feedproject[PLZ, No] count; query plzbig2 feedproject[PLZ, No] count; # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]; let x3 = ((plzbig1 tuplesize) - (plzbig2 tuplesize)); let x4 = plzbig2 count; # calculate the time to process one byte in input # (x1 - x2) is the time to process a relation with # 2000000 tuples and two attributes with a different # attribute size # # x3 is the difference in bytes for the attributes in x1 and x2 # x4 is the number of tuples # so (x3 * x4) is the number of additional bytes processed in x1 # # multiply by 1000 to get the time in milliseconds let vFeedProject = (((x1 - x2) * 1000) / (x3 * x4) ) * MF; query ProgressConstants inserttuple["Relation-C++", "feedproject", "vFeedProject", vFeedProject, "msecs per byte input", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for processing one attr (wFeedProject) ## # measure time for processing 1000000 tuples with 20 attributes query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 1000000 tuples with 10 attributes query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; query plzsmallWidth10 feedproject[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x3 = plzsmallWidth10 count; # calculate time for processing one attribute # (x1 - x2) is the time for processing 10 attributes more on 1000000 tuples # # x3 is the size of the relation # so (10 * x3) is the number of additionally processed attributes # # multiply by 1000 to get the time in milliseconds let wFeedProject = (((x1 - x2) * 1000) / (10 * x3)) * MF; query ProgressConstants inserttuple["Relation-C++", "feedproject", "wFeedProject", wFeedProject, "msecs per attr", ""] consume # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; # product ########################################################### # # Cost formula: # # pRes->Time = p1.Time + p2.Time + # p2.Card * p2.Size * uProduct + # p1.Card * p2.Card * pRes->Size * vProduct; # # Note: # # Product will write the right tuple stream to disk, # if the size of the stream exceeds the memory limit # of the operator. Otherwise the operator works completely # in memory. # # // millisecs per byte (right input stream) if data is written to disk # double uProduct = 0.0003; # # //millisecs per byte (output stream) if data is read from disk # double vProduct = 0.000042; # ##################################################################### # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uProduct; delete vProduct; ## # measure time for processing one byte in right stream (uProduct) # # we use a small left tuple stream and a big right tuple stream. # so the right stream must be written to disk ## # measure time to proces 2000000 tuples (= 50 * 40000) query fifty feed plz feed head[40000] product count; query fifty feed plz feed head[40000] product count; query fifty feed plz feed head[40000] product count; query fifty feed plz feed head[40000] product count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time to proces 1000000 tuples (= 50 * 20000) query fifty feed plz feed head[20000] product count; query fifty feed plz feed head[20000] product count; query fifty feed plz feed head[20000] product count; query fifty feed plz feed head[20000] product count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # calculate size (in bytes) of right tuple stream (x1) let sizex1 = ((plz tuplesize) * 40000); # calculate size (in bytes) of right tuple stream (x2) let sizex2 = ((plz tuplesize) * 20000); # calculate the time to process one byte in right input stream # # (x1 - x2) * 1000 is the time (in msec) to process # 20000 tuples more as right input # # (sizex1 - sizex2) is the amount of bytes for 20000 tuples let uProduct = (((x1 - x2) * 1000) / (sizex1 - sizex2)) * MF; query ProgressConstants inserttuple["Relation-C++", "product", "uProduct", uProduct, "msecs per byte (right tuple input stream)", ""] consume # delete variables used before delete sizex1; delete sizex2; delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; ## # measure time for processing one byte in output stream (vProduct) # # assume that the right tuple stream is written to disk and # must be read for every tuple in the left stream ## # x5 is the size of the left stream let x5 = 1000; # x6 is the size of the right stream let x6 = 20000; # measure time to proces 20000000 tuples (= 1000 * 20000) with 30 attributes query plzsmallWidth10 feed head[x5] plz feed head[x6] product count; query plzsmallWidth10 feed head[x5] plz feed head[x6] product count; query plzsmallWidth10 feed head[x5] plz feed head[x6] product count; query plzsmallWidth10 feed head[x5] plz feed head[x6] product count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time to proces 20000000 tuples (= 1000 * 20000) with 3 attributes query plzsmall feed head[x5] plz feed head[x6] product count; query plzsmall feed head[x5] plz feed head[x6] product count; query plzsmall feed head[x5] plz feed head[x6] product count; query plzsmall feed head[x5] plz feed head[x6] product count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # calculate the time to process one byte in output stream # x1 - x2 is the time to process 27 attributes more on 20000000 tuples # multiply by 1000 to get the time in milliseconds # # additionalSize is the difference in bytes for the attributes in x1 and x2 # for all tuples # # (x5 * x6) is the amount of tuples delete additionalSize; let additionalSize = ((plzsmallWidth10 tuplesize - plzsmall tuplesize) * (x5 * x6)); let vProduct = (((x1 - x2) * 1000) / (additionalSize)) * MF; query ProgressConstants inserttuple["Relation-C++", "product", "vProduct", vProduct, "msecs per byte (output stream)", ""] consume # project ########################################################### # # Cost formula: # # pRes->Time = p1.Time + p1.Card * (uProject + pli->noAttrs * vProject); # # double uProject = 0.00073; //millisecs per tuple # double vProject = 0.0004; //millisecs per tuple and attribute # ##################################################################### # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete uProject; delete vProject; ## # measure time for processing one tuple (uProject) ## # measure time for processing 2000000 tuples query plzbig feed project[Ort] count; query plzbig feed project[Ort] count; query plzbig feed project[Ort] count; query plzbig feed project[Ort] count; # calculate avg. time to process these tuples let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 1000000 tuples query plzsmall feed project[Ort1] count; query plzsmall feed project[Ort1] count; query plzsmall feed project[Ort1] count; query plzsmall feed project[Ort1] count; # calculate avg. time to process these tuples let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Calculate the time to process one tuple # x1 - x2 is the time to process 1000000 tuples # multiply by 1000 to get the time in milliseconds let uProject = (((x1 - x2) * 1000) / 1000000) * MF query ProgressConstants inserttuple["Relation-C++", "project", "uProject", uProject, "msecs per tuple", ""] consume ##### # delete old variables delete x1; delete x2; ## # measure time for processing one attribute (vProject) ## # measure time for processing 1000000 tuples with 20 attributes query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4, PLZ4, No4, Ort5, PLZ5, No5, Ort6, PLZ6, No6, Ort7, PLZ7] count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time for processing 1000000 tuples with 10 attributes query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; query plzsmallWidth10 feed project[Ort1, PLZ1, No1, Ort2, PLZ2, No2, Ort3, PLZ3, No3, Ort4] count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x3 = plzsmallWidth10 count; # calculate the time for processing one attribute # (x1 - x2) is the time for processing 10 attributes more on 1000000 tuples # # x3 is the size of the relation plzsmallWidth10 # so (10 * x3) is the number of additionally processed attributes # # multiply by 1000 to get the time in milliseconds let vProject = (((x1 - x2) * 1000) / (10 * x3)) * MF; query ProgressConstants inserttuple["Relation-C++", "project", "vProject", vProject, "msecs per attribute", ""] consume ### # Debug ### #query ProgressConstants; # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; # extendstream ####################################################### # # Cost formula: # # pRes->Time = p1.Time + # p1.Card * wExtendStream + // time per input tuple without results # pRes->Card * (uExtendStream + eli->noAttrs * vExtendStream); # // time per output tuple created # ##################################################################### delete uExtendStream; delete vExtendStream; delete wExtendStream; let Trains100 = Trains feed hundred feed product extend[N: randint(999999)] sortby[N asc] remove[N] consume # has 56200 tuples let plz50 = plz feed fifty feed product extend[N: randint(999999)] sortby[N asc] remove[N] consume # has 2063350 tuples ######### wExtendStream: time per tuple read # measure time for query plz50 feed count # we always run 4 times and take the time of the last three, to have consistent # warm state query plz50 feed count query plz50 feed count query plz50 feed count query plz50 feed count let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # measure time with extendstream on 2063350 tuples added query plz50 feed extendstream[U: intstream(1, 0)] count query plz50 feed extendstream[U: intstream(1, 0)] count query plz50 feed extendstream[U: intstream(1, 0)] count query plz50 feed extendstream[U: intstream(1, 0)] count let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # get the time difference and divide by 2063350. Yields time in seconds. # Multiply by 1000 to get the time in milliseconds let wExtendStream = ((x2 - x1) / 2063350) * 1000.0 * MF query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "wExtendStream", wExtendStream, "msecs per input tuple", ""] consume ######### vExtendStream: time per attribute returned query Trains100 feed extendstream[UTrip: units(.Trip)] count query Trains100 feed extendstream[UTrip: units(.Trip)] count query Trains100 feed extendstream[UTrip: units(.Trip)] count query Trains100 feed extendstream[UTrip: units(.Trip)] count let x3 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Result 5154400 # Version of Trains100 with more attributes: let Trains100B = Trains100 feed extend[IdB: .Id, LineB: .Line, UpB: .Up, TripB: .Trip, NoB: .No] consume query Trains100B feed extendstream[UTrip: units(.Trip)] count query Trains100B feed extendstream[UTrip: units(.Trip)] count query Trains100B feed extendstream[UTrip: units(.Trip)] count query Trains100B feed extendstream[UTrip: units(.Trip)] count let x4 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # the time difference between x3 and x4 is for processing 5 more attributes # for 5154400 tuples. Hence we have let vExtendStream = (((x4 -x3) * 1000) / (5 * 5154400)) * MF query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "vExtendStream", vExtendStream, "msecs per attribute", ""] consume ########### uExtendStream: time per tuple returned # If we subtract the time for all 10 attributes from x4 (= x4 - 2 * (x4 - x3)) and # further subtract the time for the empty query "Trains100 feed count", the # remainder must be the time per result tuple. query Trains100 feed count query Trains100 feed count query Trains100 feed count query Trains100 feed count let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] let x5 = (x4 - (2 * (x4 - x3))) - x6 let uExtendStream = (MF * (x5 * 1000)) / 5154400 query ProgressConstants inserttuple["ExtRelation-C++", "extendstream", "uExtendStream", uExtendStream, "msecs per result tuple", ""] consume ######################################################################### # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; # feed ############################################################## # # Cost formula: # # pRes->Time = p1.Time + p1.Card * (uFeed + p1.noAttrs * vFeed); # ##################################################################### delete uFeed; delete vFeed; ##### vFeed: time per attribute in input tuple let plz50Width10 = plz50 feed extend[ PLZ2: .PLZ, Ort2: .Ort, No2: .No, PLZ3: .PLZ, Ort3: .Ort, No3: .No, PLZ4: .PLZ, Ort4: .Ort, No4: .No, PLZ5: .PLZ, Ort5: .Ort, No5: .No, PLZ6: .PLZ, Ort6: .Ort, No6: .No, PLZ7: .PLZ, Ort7: .Ort, No7: .No, PLZ8: .PLZ, Ort8: .Ort, No8: .No, PLZ9: .PLZ, Ort9: .Ort, No9: .No, PLZ10: .PLZ, Ort10: .Ort, No10: .No] consume # plz50 has 3 attributes # plz50Width10 has 30 attributes query plz50 feed count; query plz50 feed count; query plz50 feed count; query plz50 feed count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x1 = 3.6913163333 query plz50Width10 feed count; query plz50Width10 feed count; query plz50Width10 feed count; query plz50Width10 feed count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x2 = 20.4570833333 let x3 = (30 - 3) * (plz50 count) # query x3 = 55710450 # hence the time per attribute in milliseconds is let x4 = ((x2 - x1) * 1000) / x3 # query x4 = 0.0003009447 let vFeed = x4 * MF # query vFeed = 0.0010081649 query ProgressConstants inserttuple["Relation-C++", "feed", "vFeed", vFeed, "msecs per attribute in input tuple", ""] consume ##### uFeed: time per input tuple delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; # plzbig has 2.000.000 tuples # plzsmall has 1.000.000 tuples query plzbig feed count; query plzbig feed count; query plzbig feed count; query plzbig feed count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] query plzsmall feed count; query plzsmall feed count; query plzsmall feed count; query plzsmall feed count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # Time difference for processing 1.000.000 tuples more let x3 = x1 - x2; # this is the time for processing one tuple let x6 = (x3 / 1000000) * 1000 let uFeed = x6 * MF # query uFeed = 0.002968628 query ProgressConstants inserttuple["Relation-C++", "feed", "uFeed", uFeed, "msecs per input tuple", ""] consume ##################################################################### # delete variables used before delete x1; delete x2; delete x3; delete x4; delete x5; delete x6; delete x10; delete x11; delete x12; # consume ########################################################### # # Cost formula: # # pRes->Time = p1.Time + # p1.Card * (uConsume + p1.SizeExt * vConsume # + (p1.Size - p1.SizeExt) * wConsume); # # previous values # const double uConsume = 0.024; //millisecs per tuple # const double vConsume = 0.0003; //millisecs per byte in # // root/extension # const double wConsume = 0.001338; //millisecs per byte in FLOB # ##################################################################### delete uConsume; delete vConsume; delete wConsume ##### vConsume: time per byte in root/extension query plz50 feed head[200000] consume count; query plz50 feed head[200000] consume count; query plz50 feed head[200000] consume count; query plz50 feed head[200000] consume count; let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x1 = 3.8398513333 query plz50Width10 feed head[200000] consume count; query plz50Width10 feed head[200000] consume count; query plz50Width10 feed head[200000] consume count; query plz50Width10 feed head[200000] consume count; let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x2 = 8.2118556667 let x3 = (plz50Width10 tuplesize - plz50 tuplesize) * 200000 # query x3 = 45013347.22659753 # time per byte in msecs let x4 = ((x2 - x1) / x3) * 1000 # query x4 = 9.71268e-05 let vConsume = x4 * MF # query vConsume = 0.0003253749 query ProgressConstants inserttuple["Relation-C++", "consume", "vConsume", vConsume, "msecs per byte in root/extension tuple", ""] consume ##### uConsume: time per tuple query plz50 feed count; query plz50 feed count; query plz50 feed count; query plz50 feed count; let x5 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x5 = 3.5346763333 query plz50 feed consume count; query plz50 feed consume count; query plz50 feed consume count; query plz50 feed consume count; let x6 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x6 = 36.8306773333 # the time difference between x6 and x5 is the time used for consume. From # this we subtract the time corresponding to the number of bytes written let x10 = (plz50 tuplesize * plz50 count) * ((x2 - x1) / x3) # query x10 = 5.0116528562 secs let x11 = (((x6 - x5) - x10) / (plz50 count)) * 1000 # query x11 = 0.013707974 let uConsume = x11 * MF # query uConsume = 0.0459217129 query ProgressConstants inserttuple["Relation-C++", "consume", "uConsume", uConsume, "msecs per tuple", ""] consume ##### wConsume: time per byte in FLOBs delete x7; delete x8; delete x9; query Trains100 feed project[Id, Line, Up] consume count; query Trains100 feed project[Id, Line, Up] consume count; query Trains100 feed project[Id, Line, Up] consume count; query Trains100 feed project[Id, Line, Up] consume count; let x7 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x7 = 1.440998 query Trains100 feed project[Id, Line, Up, Trip] consume count; query Trains100 feed project[Id, Line, Up, Trip] consume count; query Trains100 feed project[Id, Line, Up, Trip] consume count; query Trains100 feed project[Id, Line, Up, Trip] consume count; let x8 = SEC2COMMANDS feed tail[3] avg[ElapsedTime] # query x8 = 25.8285243333 # The difference between the two queries is storing the Trip attribute. # This attribute has size # query Trains100 extattrsize[Trip] = 144 # query Trains100 attrsize[Trip] = 13351.0035587189 # we consider the entire time difference as spent on writing FLOB attributes. # Hence we have the time per byte in milliseconds for writing FLOBs: let x9 = ((x8 - x7) / (13351 * 56200)) * 1000 # query x9 = 3.25026e-05 let wConsume = x9 * MF # query wConsume = 0.0001088836 query ProgressConstants inserttuple["Relation-C++", "consume", "wConsume", wConsume, "msecs per FLOB byte written", ""] consume ##################################################################### ## Adding Global consts query ProgressConstants inserttuple["Global", "TupleFile", "twrite", wItHashJoin, "msecs per byte written to TupleFile", ""] consume query ProgressConstants inserttuple["Global", "TupleFile", "tread", xItHashJoin, "msecs per byte read from TupleFile", ""] consume query ProgressConstants inserttuple["Global", "ResultTuple", "attr", yItHashJoin, "msecs per attr in result relation", ""] consume query ProgressConstants feed csvexport['ProgressConstants.csv', FALSE, TRUE] count close database # query ProgressConstants feed filter[.ConstantName = "vExtendStream"] # ProgressConstants updatedirect[ConstantValue: uExtendStream] consume