# Use BalancedSpatialPartition first. query Fields feed Buildings feed itSpatialJoin[Field, GeoData] count # 54.19 seconds, 55.48 # result 6525125 query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] consume # 129 seconds query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] groupby[; Min: group feed min[Cnt], Max: group feed max[Cnt]] consume # 122 seconds # result: # # Min : 129198 # Max : 139990 query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] sum[Cnt] # result: 6525125 (correct) # There are 6525125 - 6516159 = 8966 duplicate buildings = 0.13%. # Comparison with grid distribution. We use about 20 * 20 = 400 cells. let grid2 = [const cellgrid2d value (5.27 50.36 0.245 0.11 20)] # exactly partitions World into 400 cells. query Buildings feed extendstream[Cell: cellnumber(bbox(.GeoData), grid2)] extend[N: .Cell mod 48] count # 191 seconds # result 6527803 query Buildings feed extendstream[Cell: cellnumber(bbox(.GeoData), grid2)] extend[N: .Cell mod 48] groupby2[N; Cnt: fun(t:TUPLE, i:int) i + 1::0] consume # 175 seconds # partial result (first 10): N : 0 Cnt : 111482 N : 1 Cnt : 197285 N : 2 Cnt : 184358 N : 3 Cnt : 144331 N : 4 Cnt : 203089 N : 5 Cnt : 109011 N : 6 Cnt : 123624 N : 7 Cnt : 91586 N : 8 Cnt : 128755 N : 9 Cnt : 77923 N : 10 Cnt : 38344 query Roads feed {r} Buildings feed {b} itSpatialJoin[GeoData_r, GeoData_b] count let BuildingsB5 = query Fields feed Buildings feed itSpatialJoin[Field, GeoData] count # 54.21 seconds, 56.91, 55.35 # result 6525125 query Fields feed Buildings feed itSpatialJoin[Field, GeoData] extend[TLClass: topleftmax(.Field, bbox(.GeoData))] count # 1:51 min, 2:03 min; 2:42 min, 1:29 min, 2:20min let BuildingsB5 = Fields feed Buildings feed itSpatialJoin[Field, GeoData] extend[TLClass: topleftmax(.Field, bbox(.GeoData))] remove[Field] ddistribute2["BuildingsB5", N, 50, Workers14] let BuildingsB5rest = BuildingsB5 dloop["BuildingsB5rest", . feed filter[.TLClass < 3] consume] let RoadsB5 = Fields feed Roads feed itSpatialJoin[Field, GeoData] extend[TLClass: topleftmax(.Field, bbox(.GeoData))] remove[Field] ddistribute2["RoadsB5", N, 50, Workers14] # 4:16min let RoadsB5rest = RoadsB5 dloop["RoadsB5rest", . feed filter[.TLClass < 3] consume] # 10.12 seconds query Roads feed {r} Waterways feed {w} itSpatialJoin[GeoData_r, GeoData_w] count # 56.94 seconds # result 1479958 let WaterwaysB5 = Fields feed Waterways feed itSpatialJoin[Field, GeoData] extend[TLClass: topleftmax(.Field, bbox(.GeoData))] remove[Field] ddistribute2["WaterwaysB5", N, 50, Workers14] # 20.59 seconds let WaterwaysB5rest = WaterwaysB5 dloop["WaterwaysB5rest", . feed filter[.TLClass < 3] consume] # 12.46 seconds query RoadsB5 WaterwaysB5 dmap2["", . feed filter[.TLClass < 3] consume .. feed filter[.TLClass < 3] consume within2[fun(roads012: ANY, waterways012: ANY2) . feed {r} .. feed filter[.TLClass = 3] {w} itSpatialJoin[GeoData_r, GeoData_w] . feed filter[.TLClass = 3] {r} waterways012 feed {w} itSpatialJoin[GeoData_r, GeoData_w] concat roads012 feed filter[.TLClass = 1] {r} waterways012 feed filter[.TLClass = 2] {w} itSpatialJoin[GeoData_r, GeoData_w] concat roads012 feed filter[.TLClass = 2] {r} waterways012 feed filter[.TLClass = 1] {w} itSpatialJoin[GeoData_r, GeoData_w] concat count ], 1238] getValue tie[. + ..] let WaterwaysB5 = Fields feed Waterways feed itSpatialJoin[Field, GeoData] extend[TLClass: topleftmax(.Field, bbox(.GeoData))] remove[Field] ddistribute2["WaterwaysB5", N, 50, Workers14] # 23.4113sec let RoadsB5 = Fields feed Roads feed itSpatialJoin[Field, GeoData] extend[TLClass: topleftmax(.Field, bbox(.GeoData))] remove[Field] ddistribute2["RoadsB5", N, 50, Workers14] # 3:04min query WaterwaysB5 RoadsB5 dmap2["", fun(waterways: DARRAYELEM, roads: DARRAYELEM2) waterways feed filter[.TLClass < 3] {w} roads feed filter[.TLClass = 3] {r} itSpatialJoin[GeoData_w, GeoData_r] waterways feed filter[.TLClass = 3] {w} roads feed {r} itSpatialJoin[GeoData_w, GeoData_r] concat waterways feed filter[.TLClass = 1] {w} roads feed filter[.TLClass = 2] {r} itSpatialJoin[GeoData_w, GeoData_r] concat waterways feed filter[.TLClass = 2] {w} roads feed filter[.TLClass = 1] {r} itSpatialJoin[GeoData_w, GeoData_r] concat count, 1238] getValue # 20.52 seconds query WaterwaysB5 RoadsB5 dmap2["", fun(waterways: DARRAYELEM, roads: DARRAYELEM2) waterways feed filter[.TLClass < 3] {w} roads feed filter[.TLClass = 3] {r} itSpatialJoin[GeoData_w, GeoData_r] waterways feed filter[.TLClass = 3] {w} roads feed {r} itSpatialJoin[GeoData_w, GeoData_r] concat waterways feed filter[.TLClass = 1] {w} roads feed filter[.TLClass = 2] {r} itSpatialJoin[GeoData_w, GeoData_r] concat waterways feed filter[.TLClass = 2] {w} roads feed filter[.TLClass = 1] {r} itSpatialJoin[GeoData_w, GeoData_r] concat count, 1238] getValue tie[. + ..] # 22.25 seconds # result 1479958 query Waterways count # 80904 query Roads count # 1505462 query WaterwaysB5 dmap["", . count] getValue tie[. + ..] # 81979 query RoadsB5 dmap["", . count] getValue tie[. + ..] # 1519837 query Waterways feed {w} Roads feed {r} itSpatialJoin[GeoData_w, GeoData_r] count # 2:05min # 1479958 query "Roads" mfeed {r} "Buildings" mfeed {b} itSpatialJoin[GeoData_r, GeoData_b] count # 1713 seconds # result 14247534