# Script for Balanced Partition # # # Precondition: database (nrw) must exist and be opened # Define a bounding box for all objects to be processed. Result of bbox aggregation over all objects from nrw: # query Buildings feed projectextend[; Box: bbox(.GeoData)] Landuse feed projectextend[; Box: bbox(.GeoData)] concat Natural feed projectextend[; Box: bbox(.GeoData)] concat Places feed projectextend[; Box: bbox(.GeoData)] concat Points feed projectextend[; Box: bbox(.GeoData)] concat Railways feed projectextend[; Box: bbox(.GeoData)] concat Roads feed projectextend[; Box: bbox(.GeoData)] concat Waterways feed projectextend[; Box: bbox(.GeoData)] concat aggregateB[Box; fun(r1: rect, r2: rect) r1 union r2; [const rect value undef]] # rect: ( (5.83191,50.1346) - (9.64789,52.5661) ) delete World; let World = [const rect value (5.83 9.65 50.13 52.57)] delete Left; delete Right; delete Bottom; delete Top let Left = minD(World, 1) let Right = maxD(World, 1) let Bottom = minD(World, 2) let Top = maxD(World, 2) # Set numbers of rows and columns for the partitioning delete NRows; delete NColumns let NRows = 6 let NColumns = 8; delete Sample; let Sample = Buildings feedNth[100, FALSE] projectextend[; Box: bbox(.GeoData)] consume delete Fraction; let Fraction = (2 * (Sample count)) div NRows; delete VPartition; let VPartition = Sample feed projectextend[; Y: minD(.Box, 2)] Sample feed projectextend[; Y: maxD(.Box, 2)] concat sort nth[Fraction, TRUE] head[NRows - 1] consume; delete Stripe1; let Stripe1 = rectangle2(Left, Right, Bottom, VPartition feed extract[Y]) delete Stripes; let Stripes = VPartition feed extend_last[Stripe: rectangle2(Left, Right, ..Y, .Y)::Stripe1] consume delete LastY; let LastY = VPartition feed tail[1] extract[Y] query Stripes inserttuple[LastY, rectangle2(Left, Right, LastY, Top)] consume # query Stripes feed Buildings feed itSpatialJoin[Stripe, GeoData] count # 51.71 seconds, 44.67 # result 6518935 # query Buildings count # result 6516159 # query Stripes feed Buildings feed itSpatialJoin[Stripe, GeoData] # groupby2[Stripe; Cnt: fun(t:TUPLE, i:int) i + 1::0] # consume # 152 seconds # query Stripes feed Sample feed itSpatialJoin[Stripe, Box] # groupby2[Stripe; Cnt: fun(t:TUPLE, i:int) i + 1::0] # consume # 0.51 seconds delete Fraction2; let Fraction2 = (2 * (Sample count)) div (NRows * NColumns) delete Fields; let Fields = Stripes feed Sample feed itSpatialJoin[Stripe, Box] sortby[Stripe] nest[Stripe; Boxes] extend[Bounds: fun(t: TUPLE) attr(t, Boxes) afeed projectextend[; X: minD(.Box, 1)] attr(t, Boxes) afeed projectextend[; X: maxD(.Box, 1)] concat sort nth[Fraction2, TRUE] head[NColumns - 1] aconsume] remove[Boxes] extend[LeftField: rectangle2(minD(.Stripe, 1), .Bounds afeed extract[X], minD(.Stripe, 2), maxD(.Stripe, 2))] extend[RightField: rectangle2(.Bounds afeed tail[1] extract[X], maxD(.Stripe, 1), minD(.Stripe, 2), maxD(.Stripe, 2))] extend[FieldsX: fun(t2: TUPLE) attr(t2, LeftField) feed namedtransformstream[Field] attr(t2, Bounds) afeed extend_last[Field: rectangle2(..X, .X, minD(attr(t2, Stripe), 2), maxD(attr(t2, Stripe), 2)):: [const rect value undef]] filter[isdefined(.Field)] remove[X] concat attr(t2, RightField) feed namedtransformstream[Field] concat aconsume] project[FieldsX] unnest[FieldsX] addcounter[N, 0] consume # 5.27 seconds, 5.25, 5.24 # Create a partition where the last 50 fields are small # divide the last stripe into 50 fields delete Fraction3; let Fraction3 = (2 * (Sample count)) div (NRows * 50) let Fields2 = Stripes feed tail[1] Sample feed itSpatialJoin[Stripe, Box] sortby[Stripe] nest[Stripe; Boxes] extend[Bounds: fun(t: TUPLE) attr(t, Boxes) afeed projectextend[; X: minD(.Box, 1)] attr(t, Boxes) afeed projectextend[; X: maxD(.Box, 1)] concat sort nth[Fraction3, TRUE] head[49] aconsume] remove[Boxes] extend[LeftField: rectangle2(minD(.Stripe, 1), .Bounds afeed extract[X], minD(.Stripe, 2), maxD(.Stripe, 2))] extend[RightField: rectangle2(.Bounds afeed tail[1] extract[X], maxD(.Stripe, 1), minD(.Stripe, 2), maxD(.Stripe, 2))] extend[FieldsX: fun(t2: TUPLE) attr(t2, LeftField) feed namedtransformstream[Field] attr(t2, Bounds) afeed extend_last[Field: rectangle2(..X, .X, minD(attr(t2, Stripe), 2), maxD(attr(t2, Stripe), 2)):: [const rect value undef]] filter[isdefined(.Field)] remove[X] concat attr(t2, RightField) feed namedtransformstream[Field] concat aconsume] project[FieldsX] unnest[FieldsX] addcounter[N, 0] consume let Fields3 = Fields feed filter[.N < 40] Fields2 feed projectextend[Field; N: 40 + .N] concat consume # query Fields feed Buildings feed itSpatialJoin[Field, GeoData] count # 54.19 seconds, 55.48; 89 seconds (cold) # result 6525125; new result after repartitioning 6525796 # query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] consume # 129 seconds # query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] groupby[; Min: group feed min[Cnt], Max: group feed max[Cnt]] consume # 122 seconds # result: # # Min : 129198 # Max : 139990 # query Fields feed Buildings feed itSpatialJoin[Field, GeoData] groupby2[Field; Cnt: fun(t:TUPLE, i:int) i + 1::0] sum[Cnt] # result: 6525125 (correct) # There are 6525125 - 6516159 = 8966 duplicate buildings = 0.13%.