Files
secondo/bin/Scripts/BalancedSpatialJoin.sec

249 lines
5.6 KiB
Plaintext
Raw Normal View History

2026-01-23 17:03:45 +08:00
# Use BalancedSpatialPartition first.
query Fields feed Buildings feed itSpatialJoin[Field, GeoData] count
# 54.19 seconds, 55.48
# result 6525125
query Fields feed Buildings feed itSpatialJoin[Field, GeoData]
groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0]
consume
# 129 seconds
query Fields feed Buildings feed itSpatialJoin[Field, GeoData]
groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0]
groupby[; Min: group feed min[Cnt], Max: group feed max[Cnt]] consume
# 122 seconds
# result:
#
# Min : 129198
# Max : 139990
query Fields feed Buildings feed itSpatialJoin[Field, GeoData]
groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0]
sum[Cnt]
# result: 6525125 (correct)
# There are 6525125 - 6516159 = 8966 duplicate buildings = 0.13%.
# Comparison with grid distribution. We use about 20 * 20 = 400 cells.
let grid2 = [const cellgrid2d value (5.27 50.36 0.245 0.11 20)]
# exactly partitions World into 400 cells.
query Buildings feed extendstream[Cell: cellnumber(bbox(.GeoData), grid2)]
extend[N: .Cell mod 48]
count
# 191 seconds
# result 6527803
query Buildings feed extendstream[Cell: cellnumber(bbox(.GeoData), grid2)]
extend[N: .Cell mod 48]
groupby2[N; Cnt: fun(t:TUPLE, i:int) i + 1::0]
consume
# 175 seconds
# partial result (first 10):
N : 0
Cnt : 111482
N : 1
Cnt : 197285
N : 2
Cnt : 184358
N : 3
Cnt : 144331
N : 4
Cnt : 203089
N : 5
Cnt : 109011
N : 6
Cnt : 123624
N : 7
Cnt : 91586
N : 8
Cnt : 128755
N : 9
Cnt : 77923
N : 10
Cnt : 38344
query Roads feed {r} Buildings feed {b} itSpatialJoin[GeoData_r, GeoData_b] count
let BuildingsB5 =
query Fields feed Buildings feed itSpatialJoin[Field, GeoData] count
# 54.21 seconds, 56.91, 55.35
# result 6525125
query Fields feed Buildings feed itSpatialJoin[Field, GeoData]
extend[TLClass: topleftmax(.Field, bbox(.GeoData))] count
# 1:51 min, 2:03 min; 2:42 min, 1:29 min, 2:20min
let BuildingsB5 = Fields feed Buildings feed itSpatialJoin[Field, GeoData]
extend[TLClass: topleftmax(.Field, bbox(.GeoData))]
remove[Field]
ddistribute2["BuildingsB5", N, 50, Workers14]
let BuildingsB5rest = BuildingsB5
dloop["BuildingsB5rest", . feed filter[.TLClass < 3] consume]
let RoadsB5 = Fields feed Roads feed itSpatialJoin[Field, GeoData]
extend[TLClass: topleftmax(.Field, bbox(.GeoData))]
remove[Field]
ddistribute2["RoadsB5", N, 50, Workers14]
# 4:16min
let RoadsB5rest = RoadsB5
dloop["RoadsB5rest", . feed filter[.TLClass < 3] consume]
# 10.12 seconds
query Roads feed {r} Waterways feed {w} itSpatialJoin[GeoData_r, GeoData_w] count
# 56.94 seconds
# result 1479958
let WaterwaysB5 = Fields feed Waterways feed itSpatialJoin[Field, GeoData]
extend[TLClass: topleftmax(.Field, bbox(.GeoData))]
remove[Field]
ddistribute2["WaterwaysB5", N, 50, Workers14]
# 20.59 seconds
let WaterwaysB5rest = WaterwaysB5
dloop["WaterwaysB5rest", . feed filter[.TLClass < 3] consume]
# 12.46 seconds
query RoadsB5 WaterwaysB5 dmap2["",
. feed filter[.TLClass < 3] consume
.. feed filter[.TLClass < 3] consume
within2[fun(roads012: ANY, waterways012: ANY2)
. feed {r} .. feed filter[.TLClass = 3] {w}
itSpatialJoin[GeoData_r, GeoData_w]
. feed filter[.TLClass = 3] {r} waterways012 feed {w}
itSpatialJoin[GeoData_r, GeoData_w] concat
roads012 feed filter[.TLClass = 1] {r} waterways012 feed filter[.TLClass = 2] {w}
itSpatialJoin[GeoData_r, GeoData_w] concat
roads012 feed filter[.TLClass = 2] {r} waterways012 feed filter[.TLClass = 1] {w}
itSpatialJoin[GeoData_r, GeoData_w] concat
count
], 1238]
getValue tie[. + ..]
let WaterwaysB5 = Fields feed Waterways feed itSpatialJoin[Field, GeoData]
extend[TLClass: topleftmax(.Field, bbox(.GeoData))]
remove[Field]
ddistribute2["WaterwaysB5", N, 50, Workers14]
# 23.4113sec
let RoadsB5 = Fields feed Roads feed itSpatialJoin[Field, GeoData]
extend[TLClass: topleftmax(.Field, bbox(.GeoData))]
remove[Field]
ddistribute2["RoadsB5", N, 50, Workers14]
# 3:04min
query WaterwaysB5 RoadsB5 dmap2["", fun(waterways: DARRAYELEM, roads: DARRAYELEM2)
waterways feed filter[.TLClass < 3] {w} roads feed filter[.TLClass = 3] {r} itSpatialJoin[GeoData_w, GeoData_r]
waterways feed filter[.TLClass = 3] {w} roads feed {r} itSpatialJoin[GeoData_w, GeoData_r] concat
waterways feed filter[.TLClass = 1] {w} roads feed filter[.TLClass = 2] {r} itSpatialJoin[GeoData_w, GeoData_r] concat
waterways feed filter[.TLClass = 2] {w} roads feed filter[.TLClass = 1] {r} itSpatialJoin[GeoData_w, GeoData_r] concat
count, 1238]
getValue
# 20.52 seconds
query WaterwaysB5 RoadsB5 dmap2["", fun(waterways: DARRAYELEM, roads: DARRAYELEM2)
waterways feed filter[.TLClass < 3] {w} roads feed filter[.TLClass = 3] {r} itSpatialJoin[GeoData_w, GeoData_r]
waterways feed filter[.TLClass = 3] {w} roads feed {r} itSpatialJoin[GeoData_w, GeoData_r] concat
waterways feed filter[.TLClass = 1] {w} roads feed filter[.TLClass = 2] {r} itSpatialJoin[GeoData_w, GeoData_r] concat
waterways feed filter[.TLClass = 2] {w} roads feed filter[.TLClass = 1] {r} itSpatialJoin[GeoData_w, GeoData_r] concat
count, 1238]
getValue tie[. + ..]
# 22.25 seconds
# result 1479958
query Waterways count
# 80904
query Roads count
# 1505462
query WaterwaysB5 dmap["", . count] getValue tie[. + ..]
# 81979
query RoadsB5 dmap["", . count] getValue tie[. + ..]
# 1519837
query Waterways feed {w} Roads feed {r} itSpatialJoin[GeoData_w, GeoData_r] count
# 2:05min
# 1479958
query "Roads" mfeed {r} "Buildings" mfeed {b} itSpatialJoin[GeoData_r, GeoData_b] count
# 1713 seconds
# result 14247534