Files
secondo/bin/UpdateProgressConstants-NRW.sec
2026-01-23 17:03:45 +08:00

334 lines
11 KiB
Plaintext

open database nrw2
delete ProgressConstants
let ProgressConstants = [const rel(tuple([
Algebra: string,
Operator: string,
ConstantName: string,
ConstantValue: real,
Meaning: string,
Meaning2: string
]))
value ()]
# ... csvimport['ProgressConstants.csv', 1, ""] consume
# machine factor, needed as long as not all constants are determined in this way.
delete MF;
let MF = 3.35;
# build some relations
# delete Roads100000
# delete Roads200000
# delete Roads500000
# delete RoadsWidth20
# Roads100000 has 100.000 tuples
let Roads100000 = Roads feed head[100000] consume;
# Roads200000 has 200.000 tuples
let Roads200000 = Roads feed head[200000] consume;
# Roads500000 has 500.000 tuples
let Roads500000 = Roads feed head[500000] consume;
# RoadsWidth20 has 20 Attributes
let RoadsWidth20 =
Roads feed extend[
Osm_id2: .Osm_id,
Name2: .Name,
Ref2: .Ref,
Type2: .Type,
OneWay2: .Oneway,
Osm_id3: .Osm_id,
Name3: .Name,
Ref3: .Ref,
Type3: .Type,
OneWay3: .Oneway]
consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
delete uItSpatialJoin;
delete vItSpatialJoin;
delete wItSpatialJoin;
delete xItSpatialJoin;
delete yItSpatialJoin;
# itspatialjoin #######################################################
#
# Cost formula:
# pRes->Time = p1.Time + p2.Time + tuplesInTupleFile * wItSpatialJoin + (partitions - 1) * xItSpatialJoin;
#
# double uItSpatialJoin = 0.002; //millisecs per insert in R-Tree
# double vItSpatialJoin = 0.021; //millisecs for processing a tupe in right stream
# double wItSpatialJoin = 0.004; //millisecs for writing and reading one byte into/from relation
# double xItSpatialJoin = 0.001; //millisecs for reading one byte from relation
# double yItSpatialJoin = 0.001; //millisecs for creating a attr in result tuple
#
#####################################################################
##
# measure time for one rtree insert (uItHashJoin)
##
query Roads200000 feed {r1} Roads200000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# measure time for processing 200.000 inserts
query Roads200000 feed {r1} Roads200000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads200000 feed {r1} Roads200000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads200000 feed {r1} Roads200000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query Roads100000 feed {r1} Roads100000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# measure time for processing 100.000 inserts
query Roads100000 feed {r1} Roads100000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads100000 feed {r1} Roads100000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads100000 feed {r1} Roads100000 feed head[0] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# x5 is the size of additinal tuples
let x5 = (Roads200000 count) - (Roads100000 count);
# Calculate time to process one rtree insert
# x1 - x2 is the time to insert (x5) tuples more
# multiply by 1000 to get the time in milliseconds
let uItSpatialJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["MMRTreeAlgebra", "itSpatialJoin", "uItSpatialJoin", uItSpatialJoin, "msecs per rtree insert", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for processing one tuple in right stream, partition = 1 (vItHashJoin)
##
query Roads100000 feed {r1} Roads100000 feed head[40000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# measure time for processing 40000 tuples
query Roads100000 feed {r1} Roads100000 feed head[40000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads100000 feed {r1} Roads100000 feed head[40000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads100000 feed {r1} Roads100000 feed head[40000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# measure time for processing 20000 tuples
query Roads100000 feed {r1} Roads100000 feed head[20000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads100000 feed {r1} Roads100000 feed head[20000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
query Roads100000 feed {r1} Roads100000 feed head[20000] {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count;
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# Calculate time to process one tuple in right stream
# x1 - x2 is the time to insert (20000) tuples more
# multiply by 1000 to get the time in milliseconds
let vItSpatialJoin = (((x1 - x2) * 1000) / 20000) * MF;
query ProgressConstants inserttuple["MMRTreeAlgebra", "itSpatialJoin", "vItSpatialJoin", vItSpatialJoin, "msecs per tuple in right input stream (part. 1)", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for reading and writing one byte into a Relation (wItSpatialJoin)
#
# Set memory to 300 MB, so itHashJoin runs with 2 partitions and
# has to write the tuples into the tuplefile
#
##
# we always run 4 times and take the time of the last three, to have consistent
# warm state
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 500} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 500} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 500} count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = Roads count
let x4 = Roads roottuplesize;
# Amount of bytes written to Disk
let x5 = x3 * x4;
# Calculate time to read and write one byte to Tuplefile
# x1 - x2 is the time to write and read x5 bytes to Tuplefile
# multiply by 1000 to get the time in milliseconds
let wItSpatialJoin = (((x1 - x2) * 1000) / x5) * MF;
delete wItSpatialJoin;
let wItSpatialJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["MMRTreeAlgebra", "itSpatialJoin", "wItSpatialJoin", wItSpatialJoin, "msecs per byte written and read into/from Relation", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for reading one byte into Relation (xItSpatialJoin)
#
# We run itSpatialJoin with 2 and 3 partitions
# the run with 3 partitions has to write the tuples and read them twice (140 MB)
# the run with 2 partitons write and read the tuples ony one times (300 MB)
##
# we always run 4 times and take the time of the last three, to have consistent
# warm state
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 140} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 140} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 140} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 140} count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] {memory 300} count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
let x3 = Roads count
let x4 = Roads roottuplesize;
# Amount of bytes written to Disk
let x5 = x3 * x4;
# Calculate time to read and write one byte to Tuplefile
# x1 - x2 is the time to write and read x5 bytes to Tuplefile
# multiply by 1000 to get the time in milliseconds
let xItSpatialJoin = (((x1 - x2) * 1000) / x5) * MF;
query ProgressConstants inserttuple["MMRTreeAlgebra", "itSpatialJoin", "xItSpatialJoin", xItSpatialJoin, "msecs per byte read from Relation", ""] consume
# delete variables used before
delete x1;
delete x2;
delete x3;
delete x4;
delete x5;
delete x6;
##
# measure time for creating a attr in result tuple (yItSpatialJoin)
# Relation Roads has 10 attributes - so a join has 20 attributes
# Relation RoadsWidth20 has 20 attributes - so a join has 40 attributes
##
# we always run 4 times and take the time of the last three, to have consistent
# warm state
let x3 = Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
query Roads feed {r1} Roads feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
# calculate avg. time to process these tuples
let x1 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
query RoadsWidth20 feed {r1} RoadsWidth20 feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
query RoadsWidth20 feed {r1} RoadsWidth20 feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
query RoadsWidth20 feed {r1} RoadsWidth20 feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
query RoadsWidth20 feed {r1} RoadsWidth20 feed {r2} itSpatialJoin[GeoData_r1, GeoData_r2, 4, 8] count
# calculate avg. time to process these tuples
let x2 = SEC2COMMANDS feed tail[3] avg[ElapsedTime]
# calculate time for processing one attribute
# (x2 - x1) is the time for processing 20 attributes more
#
# x3 is the size of the result relation
# so (20 * x3) is the number of additionally processed attributes
#
# multiply by 1000 to get the time in milliseconds
let yItSpatialJoin = (((x2 - x1) * 1000) / (20 * x3)) * MF;
query ProgressConstants inserttuple["MMRTreeAlgebra", "itSpatialJoin", "yItSpatialJoin", yItSpatialJoin, "msecs per attr in result relation", ""] consume
query ProgressConstants feed csvexport['ProgressConstants-nrw.csv', FALSE, TRUE] count
close database
quit