126 lines
4.3 KiB
Plaintext
126 lines
4.3 KiB
Plaintext
###########################################################################
|
|
#
|
|
# Construction of regions from OpenStreetMap Data with two different tag values:
|
|
# buildings, landuse
|
|
#
|
|
# Uses NestedRelationAlgebra, HadoopAlgebra, HadoopParallelAlgebra
|
|
#
|
|
# -> adapt the filename of the osm-file to be imported.
|
|
#
|
|
###########################################################################
|
|
|
|
# Database germany
|
|
|
|
# we have 12 data servers and 36 cores
|
|
|
|
let CLUSTER_SIZE = 12
|
|
|
|
let PS_SCALE = 36
|
|
|
|
#let OSM_FilePath = 'arnsberg-regbez.osm';
|
|
let OSM_FilePath = '/mnt/diskb/psec2/backup_OSM/germany-latest.osm';
|
|
|
|
# Q1
|
|
|
|
# Import the data in parallel
|
|
|
|
let SubFileName = "PartCity";
|
|
|
|
let RelPrefix = "City";
|
|
|
|
query divide_osm(OSM_FilePath, SubFileName, CLUSTER_SIZE, RelPrefix);
|
|
# 47 secs
|
|
|
|
query spreadFiles(SubFileName, '', CLUSTER_SIZE);
|
|
# 10 secs
|
|
|
|
let MapAllRel = hadoopMapAll(fullosmimport(totext(SubFileName), RelPrefix)) consume;
|
|
# 82 secs
|
|
|
|
# The distribution of above map job result
|
|
let CityDistribution = MapAllRel feed remove[Success, Result] consume;
|
|
|
|
let CityNodes_Ini_dlo = CityNodes_type createFList["CityNodes", CityDistribution, DLO, TRUE];
|
|
|
|
let CityNodeTags_Ini_dlo = CityNodeTags_type createFList["CityNodeTags", CityDistribution, DLO, TRUE];
|
|
|
|
let CityWays_Ini_dlo = CityWays_type createFList["CityWays", CityDistribution, DLO, TRUE];
|
|
|
|
let CityWayTags_Ini_dlo = CityWayTags_type createFList["CityWayTags", CityDistribution, DLO, TRUE];
|
|
|
|
let CityRelations_Ini_dlo = CityRelations_type createFList["CityRelations", CityDistribution, DLO, TRUE];
|
|
|
|
let CityRelationTags_Ini_dlo = CityRelationTags_type createFList["CityRelationTags", CityDistribution, DLO, TRUE];
|
|
|
|
# Q2 parallel
|
|
|
|
# Use the approach with a regular spatial grid
|
|
|
|
let CityNodes_NodeId_flist = CityNodes_Ini_dlo hadoopReduce[NodeId, DLF, CLUSTER_SIZE; . feed ]
|
|
|
|
# 63s
|
|
|
|
let CityNodesSample = CityNodes_Ini_dlo hadoopMap[DLF, TRUE; . sample[100; 0.00000001]] collect[] consume;
|
|
|
|
let CityAreaScaled = CityNodesSample feed
|
|
extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000]
|
|
extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)]
|
|
aggregateB[Box; fun(r1: rect, r2:rect) r1 union r2; [const rect value undef] ]
|
|
|
|
# we will define a 30 by 30 grid over this area. The resulting 900 cells will
|
|
# be mapped to 36 tasks, so each task gets 25 cells, to obtain an even distribution.
|
|
|
|
let NO_COLUMNS = 30
|
|
|
|
let CELL_SIZE_X = (maxD(CityAreaScaled, 1) - minD(CityAreaScaled, 1)) / NO_COLUMNS
|
|
|
|
let CELL_SIZE_Y = (maxD(CityAreaScaled, 2) - minD(CityAreaScaled, 2)) / NO_COLUMNS
|
|
|
|
let GRID_CityArea = createCellGrid2D(minD(CityAreaScaled, 1), minD(CityAreaScaled, 2),
|
|
CELL_SIZE_X, CELL_SIZE_Y, NO_COLUMNS)
|
|
|
|
let CityNodesNew_NodeId_dlo = CityNodes_NodeId_flist
|
|
hadoopMap[DLF, FALSE; . extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000]
|
|
extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)]
|
|
extendstream[Cell: cellnumber(.Box, GRID_CityArea)]
|
|
extend[Task: .Cell mod PS_SCALE] ]
|
|
hadoopReduce[Task, DLF, PS_SCALE; . projectextend[NodeId; Pos: makepoint(.Lon, .Lat) ] ]
|
|
hadoopReduce[NodeId, DLO, CLUSTER_SIZE; . consume]
|
|
|
|
|
|
# Q3 parallel
|
|
|
|
let CityWays_NodeRef_flist = CityWays_Ini_dlo hadoopReduce[NodeRef, DLF, CLUSTER_SIZE; . feed ]
|
|
|
|
# 60s
|
|
|
|
let CityWayTags_WayIdInTag_flist = CityWayTags_Ini_dlo hadoopReduce[WayIdInTag, DLF, CLUSTER_SIZE; . feed ]
|
|
|
|
# 62s
|
|
|
|
let Ways_WayId_dlo = CityNodesNew_NodeId_dlo
|
|
CityWays_NodeRef_flist
|
|
hadoopReduce2[NodeId, NodeRef, DLF, PS_SCALE; . feed .. itHashJoin[NodeId, NodeRef] ]
|
|
CityWayTags_WayIdInTag_flist
|
|
hadoopReduce2[WayId, WayIdInTag, DLO, CLUSTER_SIZE; . sortby[WayId, NodeCounter]
|
|
nest[WayId; NodeList]
|
|
extend[Curve: .NodeList afeed projecttransformstream[Pos] collect_line[TRUE]]
|
|
.. nest[WayIdInTag; WayInfo]
|
|
itHashJoin[WayId, WayIdInTag]
|
|
consume ]
|
|
|
|
# 3:20min (200.108sec)
|
|
|
|
|
|
let Buildings_OID_dlf = Ways_WayId_dlo hadoopMap[DLF; . feed filter[.WayInfo afeed filter[.WayTagKey = "building"] count > 0] extend[Region: .Curve line2region] projectextend[Region;OSM_id: .WayId] ];
|
|
|
|
|
|
let Lands_OID_dlf = Ways_WayId_dlo hadoopMap[DLF; . feed filter[.WayInfo afeed filter[.WayTagKey = "landuse"] count > 0] extend[Region: .Curve line2region] projectextend[Region;OSM_id: .WayId] ];
|
|
|
|
let Buildings = Buildings_OID_dlf collect[] consume;
|
|
|
|
let Lands = Lands_OID_dlf collect[] consume;
|
|
|
|
close database;
|
|
|