Files
secondo/bin/testDistAlg.sec
2026-01-23 17:03:45 +08:00

484 lines
11 KiB
Plaintext

# Test file for Distributed Algebra 2
##################################################################
# 6.1 Random Partitioning
query deleteRemoteObjects(RoadsB1)
delete RoadsB1
let RoadsB1 = Roads feed dfdistribute3["RoadsB1", 50, TRUE, Workers14]
##################################################################
# 6.2 Hash Partitioning
query deleteRemoteObjects(RoadsB2)
delete RoadsB2
let RoadsB2 = Roads feed ddistribute4["RoadsB2", hashvalue(.Osm_id,
999997), 50, Workers14]
##################################################################
# 6.3 Range Partitioning
delete S
let S = Roads feed filter[not(.Name starts " ")]
nth[113, FALSE]
project[Name] sortby[Name] consume
delete Boundaries
let Boundaries = S feedproject[Name] nth[101, TRUE]
addcounter[D, 1] project[Name, D] consume
query Boundaries inserttuple["", 0] consume
query memclear()
query Boundaries feed letmconsume["Boundaries"] mcreateAVLtree["Name"]
query deleteRemoteObjects(RoadsB3)
delete RoadsB3
let RoadsB3 = Roads feed filter[not(.Name starts " ")]
ddistribute4["RoadsB3", "Boundaries_Name" "Boundaries" matchbelow[.Name]
extract[D], 50, Workers14]
# Creating the full sorted order
query deleteRemoteObjects(RoadsB3S)
delete RoadsB3S
let RoadsB3S = RoadsB3 dmap["RoadsB3S", . feed sortby[Name]]
##################################################################
# 6.4 Spatial Partitioning
delete grid
let grid = [const cellgrid2d value (7.29 51.37 0.025 0.025 20)]
query deleteRemoteObjects(RoadsB4)
delete RoadsB4
let RoadsB4 = Roads feed
extendstream[Cell: cellnumber(bbox(.GeoData), grid)]
dfdistribute2["RoadsB4", Cell, 50, Workers14]
query deleteRemoteObjects(BuildingsB4)
delete BuildingsB4
let BuildingsB4 = Buildings feed
extend[EnlargedBox: enlargeRect(bbox(.GeoData), 0.01, 0.01)]
extendstream[Cell: cellnumber(.EnlargedBox, grid)]
extend[Original: .Cell = cellnumber(.EnlargedBox, grid)
transformstream extract[Elem]]
ddistribute2["BuildingsB4", Cell, 50, Workers14]
# ca. 40 min
query deleteRemoteObjects(BuildingsB1)
delete BuildingsB1
let BuildingsB1 = Buildings feed dfdistribute3["BuildingsB1", 50, TRUE, Workers14]
# 712 sec = 11:52 min
query deleteRemoteObjects(BuildingsB4a)
delete BuildingsB4a
let BuildingsB4a = BuildingsB1 partitionF["",
. feed extend[EnlargedBox: enlargeRect(bbox(.GeoData), 0.01, 0.01)]
extendstream[Cell: cellnumber(.EnlargedBox, grid)]
extend[Original: .Cell = cellnumber(.EnlargedBox, grid)
transformstream extract[Elem] ],
..Cell, 0]
collect2["BuildingsB4a", 1238]
# 4:47 min
##################################################################
# 6.5 Replication
query share("Roads", FALSE, Workers14)
##################################################################
# 7 Querying
#
# 7.1 Selection
#
# 7.1.1 By Scanning
query RoadsB1 dmap["", . feed
filter[.Name starts "Universitätsstraße"] count]
getValue tie[. + ..]
# 5.58 seconds
# result 357
delete eichlinghofen
let eichlinghofen = [const region value (
(
(
(7.419515247680575 51.47332155746125)
(7.394967670776298 51.47332155746125)
(7.394967670776298 51.48716614802665)
(7.419515247680575 51.48716614802665)))) ]
query share("eichlinghofen", FALSE, Workers14)
query BuildingsB4 dmap["", . feed filter[.Original]
filter[.GeoData intersects eichlinghofen] ]
dsummarize consume count
# 114 seconds
# 2263 objects
##################################################################
# 7.1.2 Creating a Standard Index
query deleteRemoteObjects(RoadsB2_Name)
delete RoadsB2_Name
let RoadsB2_Name = RoadsB2 dloop["RoadsB2_Name", . createbtree[Name] ]
##################################################################
# 7.1.3 Using a Standard Index
query RoadsB2_Name RoadsB2
dloop2["", . .. range["Universitätsstraße", "Universitätsstraße"++]
count]
getValue tie[. + ..]
# 4.38 seconds
# result 357
##################################################################
# 7.1.4 Creating a Spatial Index
query deleteRemoteObjects(BuildingsB4_GeoData)
delete BuildingsB4_GeoData
let BuildingsB4_GeoData = BuildingsB4 dloop["",
. feed addid extend[Box: scalerect(.EnlargedBox, 1000000.0, 1000000.0)]
sortby[Box] remove[Box] bulkloadrtree[EnlargedBox] ]
##################################################################
# 7.1.5 Using a Spatial Index
query share("grid", FALSE, Workers14)
query BuildingsB4_GeoData BuildingsB4
dmap2["", . .. windowintersects[eichlinghofen]
filter[.Original]
filter[.GeoData intersects eichlinghofen], 1238
]
dsummarize remove[EnlargedBox] consume count
# 10 seconds
# 2263 objects
##################################################################
# 7.2 Join
#
# 7.2.1 Equijoin
#
# (1) Distributed by Join Attribute
query deleteRemoteObjects(NaturalB2)
delete NaturalB2
let NaturalB2 = Natural feed filter[not(.Name starts " ")]
ddistribute4["NaturalB2", hashvalue(.Name, 999997), 50, Workers14]
query NaturalB2 dmap["",
. feed {n1} . feed {n2} itHashJoin[Name_n1, Name_n2]
filter[.Osm_id_n1 < .Osm_id_n2]]
dsummarize consume count
# 10.74 seconds
# result 6284
##################################################################
# (2) Arbitrary Distribution
query deleteRemoteObjects(NaturalB1)
delete NaturalB1
let NaturalB1 = Natural feed
dfdistribute3["NaturalB1", 50, TRUE, Workers14]
query NaturalB1 partitionF["", . feed filter[not(.Name starts " ")],
hashvalue(..Name, 999997), 0]
collect2["", 1238]
dmap["",
. feed {n1} . feed {n2} itHashJoin[Name_n1, Name_n2]
filter[.Osm_id_n1 < .Osm_id_n2]]
dsummarize consume count
# 43 seconds
# result 6284
##################################################################
# 7.2.2 Spatial Join
#
# (1) Both arguments are distributed by spatial attributes
query deleteRemoteObjects(WaterwaysB4)
delete WaterwaysB4
let WaterwaysB4 = Waterways feed
extendstream[Cell: cellnumber(bbox(.GeoData), grid)]
dfdistribute2["WaterwaysB4", Cell, 50, Workers14]
# 16.16 seconds
query RoadsB4 WaterwaysB4 dmap2["",
. feed {r} .. feed {w} itSpatialJoin[GeoData_r, GeoData_w]
filter[.Cell_r = .Cell_w]
filter[gridintersects(grid, bbox(.GeoData_r),
bbox(.GeoData_w), .Cell_r)]
filter[.GeoData_r intersects .GeoData_w] count, 1238 ]
getValue tie[. + ..]
# 3:59min (239.123sec)
# result 61579
##################################################################
# (2) Not distributed by spatial attributes
query deleteRemoteObjects(WaterwaysB1)
delete WaterwaysB1
let WaterwaysB1 = Waterways feed dfdistribute3["WaterwaysB1", 50, TRUE, Workers14]
# 3.27 seconds
query
RoadsB1 partitionF["",
. feed extendstream[Cell: cellnumber(bbox(.GeoData), grid)], ..Cell, 0]
WaterwaysB1 partitionF["",
. feed extendstream[Cell: cellnumber(bbox(.GeoData), grid)], ..Cell, 0]
areduce2["",
. feed {r} .. feed {w} itSpatialJoin[GeoData_r, GeoData_w]
filter[.Cell_r = .Cell_w]
filter[gridintersects(grid, bbox(.GeoData_r), bbox(.GeoData_w), .Cell_r)]
filter[.GeoData_r intersects .GeoData_w] count, 1238 ]
getValue tie[. + ..]
# 309 seconds
# result 61579
query
RoadsB1 partitionF["",
. feed extendstream[Cell: cellnumber(bbox(.GeoData), grid)], ..Cell, 0]
collect2["", 1238]
WaterwaysB1 partitionF["",
. feed extendstream[Cell: cellnumber(bbox(.GeoData), grid)], ..Cell, 0]
collect2["", 1238]
dmap2["",
. feed {r} .. feed {w} itSpatialJoin[GeoData_r, GeoData_w]
filter[.Cell_r = .Cell_w]
filter[gridintersects(grid, bbox(.GeoData_r),
bbox(.GeoData_w), .Cell_r)]
filter[.GeoData_r intersects .GeoData_w] count, 1238 ]
getValue tie[. + ..]
# 364 seconds
# result 61579
##################################################################
# Expressions in the Select-Clause
query RoadsB4 WaterwaysB4 dmap2["",
. feed {r} .. feed {w} itSpatialJoin[GeoData_r, GeoData_w]
filter[.Cell_r = .Cell_w]
filter[gridintersects(grid, bbox(.GeoData_r),
bbox(.GeoData_w), .Cell_r)]
filter[.GeoData_r intersects .GeoData_w], 1238 ]
dsummarize
projectextend[Osm_id_r, Name_r, GeoData_r, Osm_id_w, Name_w, GeoData_w;
BridgePosition: crossings(.GeoData_r, .GeoData_w)]
consume count
# 6:09min (368.765sec)
# result size 61579
query RoadsB4 WaterwaysB4 dmap2["",
. feed {r} .. feed {w} itSpatialJoin[GeoData_r, GeoData_w]
filter[.Cell_r = .Cell_w]
filter[gridintersects(grid, bbox(.GeoData_r),
bbox(.GeoData_w), .Cell_r)]
filter[.GeoData_r intersects .GeoData_w]
projectextend[Osm_id_r, Name_r, Osm_id_w, Name_w; BridgePosition:
crossings(.GeoData_r, .GeoData_w)], 1238
]
dsummarize
consume count
# 4:08min (248.387sec)
# result size 61579
##################################################################
# 7.2.3 General Join
query share("Waterways", TRUE, Workers14)
query RoadsB1 dmap["", . feed filter[not(.Name starts " ")]
filter[.Type contains "pedestrian"] {r}
Waterways feed filter[.Type contains "river"] {w}
symmjoin[.Name_r contains ..Name_w] ]
dsummarize
consume count
# 38.54 sec
# result 0
##################################################################
# 7.2.4 Index-Based Equijoin
query deleteRemoteObjects(RoadsB3_Name)
delete RoadsB3_Name
let RoadsB3_Name = RoadsB3 dloop["RoadsB3_Name", . createbtree[Name]]
query RoadsB3 RoadsB3_Name dmap2["",
. feed filter[.Type contains "raceway"] {r1} loopjoin[.. exactmatchS[.Name_r1]], 1238]
RoadsB3
dmap2["", . feed .. gettuples filter[.Osm_id_r1 < .Osm_id], 1238 ]
dsummarize
consume count
# 15.7 sec
# result 29
##################################################################
# 7.2.5 Index-Based Spatial Join
query RoadsB4 BuildingsB4_GeoData dmap2["",
. feed filter[.Type contains "raceway"] {r}
loopjoin[.. windowintersectsS[.GeoData_r]], 1238]
BuildingsB4
dmap2["", . feed .. gettuples2[Id, b] filter[.Cell_r = .Cell_b]
filter[gridintersects(grid, bbox(.GeoData_r), .EnlargedBox_b, .Cell_r)]
filter[distance(gk(.GeoData_r), gk(.GeoData_b)) < 500] count, 1238 ]
getValue tie[. + ..]
# 2:42min (162.014sec)
# result 14859
##################################################################
# 7.3 Aggregation
#
# 7.3.1 Counting
query RoadsB1 dmap["", . feed sortby[Type]
groupby[Type; Cnt: group count] ]
dsummarize sortby[Type] groupby[Type; Cnt: group feed sum[Cnt]]
consume count
# 9.33 seconds
# result 73 (groups)
# total number of group members is 1505462, correct.
##################################################################
# 7.3 Aggregation
#
# 7.3.2 Sum, Average
query WaterwaysB1 dmap["", . feed filter[.Width between[0, 10000]]
sortby[Type]
groupby[Type; Cnt: group count, SWidth: group feed sum[Width]]
]
dsummarize sortby[Type]
groupby[Type; SumWidth: group feed sum[SWidth],
SumCnt: group feed sum[Cnt]]
extend[AWidth: .SumWidth / .SumCnt]
project[Type, AWidth]
consume
# using groupby2
query WaterwaysB1 dmap["", . feed filter[.Width between[0, 10000]]
groupby2[Type; Cnt: fun(t: TUPLE, agg:int) agg + 1::0,
SWidth: fun(t2: TUPLE, agg2:int) agg2 + attr(t2, Width)::0]
]
dsummarize sortby[Type]
groupby[Type; AWidth: group feed sum[SWidth] / group feed sum[Cnt]]
consume