Files
secondo/bin/Scripts/BalancedSpatialPartition.sec

216 lines
5.5 KiB
Plaintext
Raw Normal View History

2026-01-23 17:03:45 +08:00
# Script for Balanced Partition
#
#
# Precondition: database (nrw) must exist and be opened
# Define a bounding box for all objects to be processed. Result of bbox aggregation over all objects from nrw:
# query Buildings feed projectextend[; Box: bbox(.GeoData)] Landuse feed projectextend[; Box: bbox(.GeoData)] concat Natural feed projectextend[; Box: bbox(.GeoData)] concat Places feed projectextend[; Box: bbox(.GeoData)] concat Points feed projectextend[; Box: bbox(.GeoData)] concat Railways feed projectextend[; Box: bbox(.GeoData)] concat Roads feed projectextend[; Box: bbox(.GeoData)] concat Waterways feed projectextend[; Box: bbox(.GeoData)] concat aggregateB[Box; fun(r1: rect, r2: rect) r1 union r2; [const rect value undef]]
# rect: ( (5.83191,50.1346) - (9.64789,52.5661) )
delete World;
let World = [const rect value (5.83 9.65 50.13 52.57)]
delete Left;
delete Right;
delete Bottom;
delete Top
let Left = minD(World, 1)
let Right = maxD(World, 1)
let Bottom = minD(World, 2)
let Top = maxD(World, 2)
# Set numbers of rows and columns for the partitioning
delete NRows;
delete NColumns
let NRows = 6
let NColumns = 8;
delete Sample;
let Sample = Buildings feedNth[100, FALSE] projectextend[; Box: bbox(.GeoData)] consume
delete Fraction;
let Fraction = (2 * (Sample count)) div NRows;
delete VPartition;
let VPartition =
Sample feed projectextend[; Y: minD(.Box, 2)]
Sample feed projectextend[; Y: maxD(.Box, 2)]
concat sort
nth[Fraction, TRUE] head[NRows - 1]
consume;
delete Stripe1;
let Stripe1 = rectangle2(Left, Right, Bottom, VPartition feed extract[Y])
delete Stripes;
let Stripes = VPartition feed
extend_last[Stripe: rectangle2(Left, Right, ..Y, .Y)::Stripe1]
consume
delete LastY;
let LastY = VPartition feed tail[1] extract[Y]
query Stripes inserttuple[LastY, rectangle2(Left, Right, LastY, Top)] consume
# query Stripes feed Buildings feed itSpatialJoin[Stripe, GeoData] count
# 51.71 seconds, 44.67
# result 6518935
# query Buildings count
# result 6516159
# query Stripes feed Buildings feed itSpatialJoin[Stripe, GeoData]
# groupby2[Stripe; Cnt: fun(t:TUPLE, i:int) i + 1::0]
# consume
# 152 seconds
# query Stripes feed Sample feed itSpatialJoin[Stripe, Box]
# groupby2[Stripe; Cnt: fun(t:TUPLE, i:int) i + 1::0]
# consume
# 0.51 seconds
delete Fraction2;
let Fraction2 = (2 * (Sample count)) div (NRows * NColumns)
delete Fields;
let Fields = Stripes feed Sample feed itSpatialJoin[Stripe, Box]
sortby[Stripe]
nest[Stripe; Boxes]
extend[Bounds: fun(t: TUPLE)
attr(t, Boxes) afeed projectextend[; X: minD(.Box, 1)]
attr(t, Boxes) afeed projectextend[; X: maxD(.Box, 1)]
concat sort
nth[Fraction2, TRUE] head[NColumns - 1]
aconsume]
remove[Boxes]
extend[LeftField: rectangle2(minD(.Stripe, 1), .Bounds afeed extract[X],
minD(.Stripe, 2), maxD(.Stripe, 2))]
extend[RightField: rectangle2(.Bounds afeed tail[1] extract[X], maxD(.Stripe, 1),
minD(.Stripe, 2), maxD(.Stripe, 2))]
extend[FieldsX: fun(t2: TUPLE)
attr(t2, LeftField) feed namedtransformstream[Field]
attr(t2, Bounds) afeed
extend_last[Field: rectangle2(..X, .X, minD(attr(t2, Stripe), 2), maxD(attr(t2, Stripe), 2))::
[const rect value undef]]
filter[isdefined(.Field)] remove[X] concat
attr(t2, RightField) feed namedtransformstream[Field] concat
aconsume]
project[FieldsX]
unnest[FieldsX]
addcounter[N, 0]
consume
# 5.27 seconds, 5.25, 5.24
# Create a partition where the last 50 fields are small
# divide the last stripe into 50 fields
delete Fraction3;
let Fraction3 = (2 * (Sample count)) div (NRows * 50)
let Fields2 = Stripes feed tail[1] Sample feed itSpatialJoin[Stripe, Box]
sortby[Stripe]
nest[Stripe; Boxes]
extend[Bounds: fun(t: TUPLE)
attr(t, Boxes) afeed projectextend[; X: minD(.Box, 1)]
attr(t, Boxes) afeed projectextend[; X: maxD(.Box, 1)]
concat sort
nth[Fraction3, TRUE] head[49]
aconsume]
remove[Boxes]
extend[LeftField: rectangle2(minD(.Stripe, 1), .Bounds afeed extract[X],
minD(.Stripe, 2), maxD(.Stripe, 2))]
extend[RightField: rectangle2(.Bounds afeed tail[1] extract[X], maxD(.Stripe, 1),
minD(.Stripe, 2), maxD(.Stripe, 2))]
extend[FieldsX: fun(t2: TUPLE)
attr(t2, LeftField) feed namedtransformstream[Field]
attr(t2, Bounds) afeed
extend_last[Field: rectangle2(..X, .X, minD(attr(t2, Stripe), 2), maxD(attr(t2, Stripe), 2))::
[const rect value undef]]
filter[isdefined(.Field)] remove[X] concat
attr(t2, RightField) feed namedtransformstream[Field] concat
aconsume]
project[FieldsX]
unnest[FieldsX]
addcounter[N, 0]
consume
let Fields3 = Fields feed filter[.N < 40] Fields2 feed projectextend[Field; N: 40 + .N] concat consume
# query Fields feed Buildings feed itSpatialJoin[Field, GeoData] count
# 54.19 seconds, 55.48; 89 seconds (cold)
# result 6525125; new result after repartitioning 6525796
# query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] consume
# 129 seconds
# query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] groupby[; Min: group feed min[Cnt], Max: group feed max[Cnt]] consume
# 122 seconds
# result:
#
# Min : 129198
# Max : 139990
# query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] sum[Cnt]
# result: 6525125 (correct)
# There are 6525125 - 6516159 = 8966 duplicate buildings = 0.13%.