########################################################################### # # Construction of a Road Network from OpenStreetMap Data in OrderedRelation # Graph Representation # # Uses NestedRelationAlgebra # # -> adapt the filename of the osm-file to be imported. # # running times and result sizes refer to arnsberg-regbez.osm, using # GlobalMemory = 2 GB # # second set of running times for California (complete) # ########################################################################### # Database hombruch # we have 12 data servers and 36 cores let CLUSTER_SIZE = 12 let PS_SCALE = 36 # Q1 query fullosmimport('arnsberg-regbez.osm', "City") # 155 secs, 194 # relation CityNodes with 5008465 tuples stored # relation CityNodeTags with 389898 tuples stored # relation CityWays with 6419349 tuples stored # relation CityWayTags with 2193352 tuples stored # relation CityRelations with 355347 tuples stored # relation CityRelationTags with 52448 tuples stored # file california-latest.osm opened successfully # relation CityNodes with 44131572 tuples stored # relation CityNodeTags with 8469252 tuples stored # relation CityWays with 49706123 tuples stored # relation CityWayTags with 20363092 tuples stored # relation CityRelations with 164141 tuples stored # relation CityRelationTags with 81925 tuples stored # CA 40:52min (2451.81sec) # Q2 let CityNodesNew = CityNodes feed extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] sortby[Box] projectextend[NodeId; Pos: makepoint(.Lon, .Lat)] addcounter[NodeIdNew, 1] consume # 213 secs # CA 64:28min (3867.55sec) # Q2 parallel # let ZOrderSections = CityNodes sample[3600; 0.00000001] extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] sortby[Box] addcounter[No, 0] extend[N: .No div 100] groupby[N; Box: group feed extract[Box]] extend_last[Low: ..Box :: [const rect value undef], High: .Box :: [const rect value undef]] consume # 4.01 secs (warm) # let ... = # query CityNodes feed extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] ZOrderSections feed {z} symmjoin[(compare(.Box, ..Low_z) >= 0) and (compare(.Box, ..High_z) < 0)] remove[Box_z, Low_z, High_z] count abandoned for the moment. # Use the approach with a regular spatial grid let CityNodesSample = CityNodes sample[1000; 0.00000001] consume # 21.64 secs (cold), 3.96 secs (warm) # check the rectangle created by the sample query CityNodesSample feed extend[Box: rectangle2(.Lon, .Lon, .Lat, .Lat)] aggregateB[Box; fun(r1: rect, r2:rect) r1 union r2; [const rect value undef] ] let CityAreaScaled = CityNodesSample feed extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] aggregateB[Box; fun(r1: rect, r2:rect) r1 union r2; [const rect value undef] ] # we will define a 30 by 30 grid over this area. The resulting 900 cells will # be mapped to 36 tasks, so each task gets 25 cells, to obtain an even distribution. let NO_COLUMNS = 30 let CELL_SIZE_X = (maxD(CityAreaScaled, 1) - minD(CityAreaScaled, 1)) / NO_COLUMNS let CELL_SIZE_Y = (maxD(CityAreaScaled, 2) - minD(CityAreaScaled, 2)) / NO_COLUMNS let GRID_CityArea = createCellGrid2D(minD(CityAreaScaled, 1), minD(CityAreaScaled, 2), CELL_SIZE_X, CELL_SIZE_Y, NO_COLUMNS) query CityNodes feed extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] extendstream[Cell: cellnumber(.Box, GRID_CityArea)] sortby[Cell] groupby[Cell; Cnt: group count] sum[Cnt] # correct number is 5027561 # 5027561 # result is 5027561, correct # check for even distribution to tasks: query CityNodes feed extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] extendstream[Cell: cellnumber(.Box, GRID_CityArea)] extend[Task: .Cell mod PS_SCALE] sortby[Task] groupby[Task; Cnt: group count] consume # sequential query let CityNodesNew = CityNodes feed extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] sortby[Box] projectextend[NodeId; Pos: makepoint(.Lon, .Lat)] addcounter[NodeIdNew, 1] consume let CityNodes_NodeId_flist = CityNodes feed spread[; NodeId, CLUSTER_SIZE, TRUE;] # CA # 3:08min (188.108sec) query CityNodes_NodeId_flist hadoopMap[DLF; . count feed transformstream] collect[] consume query CityNodes_NodeId_flist hadoopMap[DLF; . count feed transformstream] collect[] sum[Elem] # arnsberg # 5027561, correct # CA # 44131572 let CityNodesNew_NodeId_dlo = CityNodes_NodeId_flist hadoopMap[DLF, FALSE; . extend[Easting: .Lon * 1000000, Northing: .Lat * 1000000] extend[Box: rectangle2(.Easting, .Easting, .Northing, .Northing)] extendstream[Cell: cellnumber(.Box, GRID_CityArea)] extend[Task: .Cell mod PS_SCALE] ] hadoopReduce[Task, DLF, PS_SCALE; . sortby[Box] addcounter[No, 1] projectextend[NodeId; Pos: makepoint(.Lon, .Lat), NodeIdNew: (.Task * 10000000) + .No] ] hadoopReduce[NodeId, DLO, CLUSTER_SIZE; . consume] # CA # 6:56min (416.263sec) query CityNodesNew_NodeId_flist2 hadoopMap[DLF; . count feed transformstream] collect[] consume # CA # 44131572 correct # query nodes(CityNodesNew feed addid bulkloadrtree[Pos]) consume # Q3 let Ways = CityNodesNew feed CityWays feed itHashJoin[NodeId, NodeRef] sortby[WayId, NodeCounter] nest[WayId; NodeList] extend[Curve : .NodeList afeed projecttransformstream[Pos] collect_line[TRUE]] CityWayTags feed nest[WayIdInTag; WayInfo] itHashJoin[WayId, WayIdInTag] extend[Box: bbox(.Curve) scale[1000000.0]] sortby[Box] remove[Box] consume # 3339 secs # Q3 parallel # old version, now obsolete # let CityNodesNew_NodeId_flist = CityNodesNew feed spread[; NodeId, CLUSTER_SIZE, TRUE;] # 28 secs # CA 3:50min (229.644sec) let CityWays_NodeRef_flist = CityWays feed spread[; NodeRef, CLUSTER_SIZE, TRUE;] # 32 secs # CA 4:04min (244.451sec) let CityWayTags_WayIdInTag_flist = CityWayTags feed spread[; WayIdInTag, CLUSTER_SIZE, TRUE;] # 28 secs # CA 4:10min (250.382sec) let Ways_WayId_dlo = CityNodesNew_NodeId_dlo CityWays_NodeRef_flist hadoopReduce2[NodeId, NodeRef, DLF, PS_SCALE; . feed .. itHashJoin[NodeId, NodeRef] ] CityWayTags_WayIdInTag_flist hadoopReduce2[WayId, WayIdInTag, DLO, CLUSTER_SIZE; . sortby[WayId, NodeCounter] nest[WayId; NodeList] extend[Curve: .NodeList afeed projecttransformstream[Pos] collect_line[TRUE]] .. nest[WayIdInTag; WayInfo] itHashJoin[WayId, WayIdInTag] extend[Box: bbox(.Curve) scale[1000000.0]] sortby[Box] remove[Box] consume ] # 3:20min (200.108sec) # CA 18:59min (1139.39sec) # check the result: query Ways_WayId_dlo hadoopMap[DLF; . feed count feed transformstream] collect[] consume # 34 secs # even distribution query Ways_WayId_dlo hadoopMap[DLF; . feed count feed transformstream] collect[] sum[Elem] # 39 secs # 752888 correct query Ways_WayId_dlo hadoopMap[DLF; . feed project[WayId, Curve] head[500] ] collect[] consume # 38 secs # 6000 Ways appear in Javagui # Q4 derive Ways_Curve_rtree = Ways feed addid projectextend[TID; Box: bbox(.Curve)] bulkloadrtree[Box] # 10 secs # Q4 parallel let Ways_WayId_Curve_rtree_dlo = Ways_WayId_dlo hadoopMap[DLO; . feed addid projectextend[TID; Box: bbox(.Curve)] bulkloadrtree[Box] ] # 41.93 secs # query Ways feed addid projectextend[TID; Box: bbox(.Curve)] bulkloadrtree[Box] # query nodes(Ways feed addid projectextend[TID; Box: bbox(.Curve)] bulkloadrtree[Box]) consume # Q5 let Roads = Ways feed filter[.WayInfo afeed filter[.WayTagKey = "highway"] count > 0] consume # 113 secs # Q5 parallel let Roads_WayId_dlo = Ways_WayId_dlo hadoopMap[DLO; . feed filter[.WayInfo afeed filter[.WayTagKey = "highway"] count > 0] consume ] # 54.72 secs # CA 1:51min (110.819sec) # check the result: query Roads_WayId_dlo hadoopMap[DLF; . feed count feed transformstream] collect[] consume # ok query Roads_WayId_dlo hadoopMap[DLF; . feed count feed transformstream] collect[] sum[Elem] # 242556 correct # Q6 derive Roads_Curve_rtree = Roads feed addid projectextend[TID; Box: bbox(.Curve)] bulkloadrtree[Box] # 3.56 secs # Q6 parallel let Roads_WayId_Curve_rtree_dlo = Roads_WayId_dlo hadoopMap[DLO; . feed addid projectextend[TID; Box: bbox(.Curve)] bulkloadrtree[Box] ] # 33.52 secs # CA 32.5971sec # Q7 # compute Nodes as the union of start points, end points and intersections of roads let Nodes = CityWays feed CityWays feed {h2} itHashJoin[NodeRef, NodeRef_h2] filter[.WayId # .WayId_h2] CityNodesNew feed itHashJoin[NodeRef, NodeId] Roads feed {r1} itHashJoin[WayId, WayId_r1] Roads feed {r2} itHashJoin[WayId_h2, WayId_r2] project[WayId, NodeCounter, NodeIdNew, Pos] Roads feed projectextend[WayId; Node: .NodeList afeed filter[.NodeCounter = 0] aconsume] unnest[Node] project[WayId, NodeCounter, NodeIdNew, Pos] concat Roads feed extend[HighNodeNo: (.NodeList afeed count) - 1] projectextend[WayId; Node: fun(t: TUPLE) attr(t, NodeList) afeed filter[.NodeCounter = attr(t, HighNodeNo)] aconsume] unnest[Node] project[WayId, NodeCounter, NodeIdNew, Pos] concat sortby[WayId, NodeCounter] rdup consume # 269 secs # Q7 parallel let RoadsWayId_WayId_flist = Roads_WayId_dlo hadoopMap[DLF; . feed project[WayId] ] # 34.78 secs # CA 32.4967sec let Nodes_WayId_flist = CityWays_NodeRef_flist CityWays_NodeRef_flist hadoopReduce2[NodeRef, NodeRef, DLF, PS_SCALE; . .. {h2} itHashJoin[NodeRef, NodeRef_h2] filter[.WayId # .WayId_h2] ] CityNodesNew_NodeId_dlo hadoopReduce2[NodeRef, NodeId, DLF, PS_SCALE; . .. feed itHashJoin[NodeRef, NodeId] ] RoadsWayId_WayId_flist hadoopReduce2[WayId, WayId, DLF, PS_SCALE; . .. {r1} itHashJoin[WayId, WayId_r1] ] RoadsWayId_WayId_flist hadoopReduce2[WayId_h2, WayId, DLF, PS_SCALE; . .. {r2} itHashJoin[WayId_h2, WayId_r2] project[WayId, NodeCounter, NodeIdNew, Pos] ] Roads_WayId_dlo hadoopMap[DLF, FALSE; . feed projectextend[WayId; Node: .NodeList afeed filter[.NodeCounter = 0] aconsume] unnest[Node] project[WayId, NodeCounter, NodeIdNew, Pos] ] hadoopReduce2[WayId, WayId, DLF, PS_SCALE; . .. concat ] Roads_WayId_dlo hadoopMap[DLF, FALSE; . feed extend[HighNodeNo: (.NodeList afeed count) - 1] projectextend[WayId; Node: fun(t: TUPLE) attr(t, NodeList) afeed filter[.NodeCounter = attr(t, HighNodeNo)] aconsume] unnest[Node] project[WayId, NodeCounter, NodeIdNew, Pos] ] hadoopReduce2[WayId, WayId, DLF, PS_SCALE; . .. concat sortby[WayId, NodeCounter] rdup ] # 5:45 mins (344 secs) # 722693 correct # CA 12:47min (767.436sec) # query Nodes_WayId_flist hadoopMap[DLF; . count feed transformstream] collect[] sum[Elem] # Q8 let EdgesUp = Nodes feed nest[WayId; SectionNodes] projectextend[WayId; Sections: .SectionNodes afeed extend_last[Source: ..NodeIdNew::0, Target: .NodeIdNew::0, SourcePos: ..Pos::[const point value undef], TargetPos: .Pos::[const point value undef], SourceNodeCounter: ..NodeCounter::0, TargetNodeCounter: .NodeCounter::0] filter[.Source # 0] project[Source, Target, SourcePos, TargetPos, SourceNodeCounter, TargetNodeCounter] aconsume] Roads feed {r} itHashJoin[WayId, WayId_r] projectextend[WayId; Sections: fun(t:TUPLE) attr(t, Sections) afeed extend[ Curve: fun(u: TUPLE) attr(t, NodeList_r) afeed filter[.NodeCounter_r between[attr(u, SourceNodeCounter), attr(u, TargetNodeCounter)] ] projecttransformstream[Pos_r] collect_sline[TRUE], RoadName: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "name"] extract [WayTagValue_r], RoadType: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "highway"] extract [WayTagValue_r], MaxSpeed: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "maxspeed"] extract [WayTagValue_r] ] aconsume ] unnest[Sections] consume # 337 secs # Q8 parallel # Split the above query let Nodes_WayId_dlo = Nodes_WayId_flist hadoopReduce[WayId, DLO, CLUSTER_SIZE; . nest[WayId; SectionNodes] projectextend[WayId; Sections: .SectionNodes afeed extend_last[Source: ..NodeIdNew::0, Target: .NodeIdNew::0, SourcePos: ..Pos::[const point value undef], TargetPos: .Pos::[const point value undef], SourceNodeCounter: ..NodeCounter::0, TargetNodeCounter: .NodeCounter::0] filter[.Source # 0] project[Source, Target, SourcePos, TargetPos, SourceNodeCounter, TargetNodeCounter] aconsume] consume] # 1:27min (86.9874sec) # CA 1:25min (84.723sec) let EdgesUp_WayId_flist = Nodes_WayId_dlo hadoopMap[DLF, TRUE; . feed para(Roads_WayId_dlo) feed {r} itHashJoin[WayId, WayId_r] projectextend[WayId; Sections: fun(t:TUPLE) attr(t, Sections) afeed extend[Curve: fun(u: TUPLE) attr(t, NodeList_r) afeed filter[.NodeCounter_r between[attr(u, SourceNodeCounter), attr(u, TargetNodeCounter)] ] projecttransformstream[Pos_r] collect_sline[TRUE], RoadName: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "name"] extract [WayTagValue_r], RoadType: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "highway"] extract [WayTagValue_r], MaxSpeed: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "maxspeed"] extract [WayTagValue_r] ] aconsume ] unnest[Sections] ] # 49.78 secs # 480137 correct # CA 2:60min (179.586sec) # query EdgesUp_WayId_flist hadoopMap[DLF; . count feed transformstream] collect[] sum[Elem] # Q9 let EdgesDown = Nodes feed nest[WayId; SectionNodes] projectextend[WayId; Sections: .SectionNodes afeed sortby[NodeCounter desc] extend_last[Source: ..NodeIdNew::0, Target: .NodeIdNew::0, SourcePos: ..Pos::[const point value undef], TargetPos: .Pos::[const point value undef], SourceNodeCounter: ..NodeCounter::0, TargetNodeCounter: .NodeCounter::0] filter[.Source # 0] project[Source, Target, SourcePos, TargetPos, SourceNodeCounter, TargetNodeCounter] aconsume] Roads feed filter[.WayInfo afeed filter[.WayTagKey = "oneway"] filter[(.WayTagValue = "yes")] count = 0] {r} itHashJoin[WayId, WayId_r] projectextend[WayId; Sections: fun(t:TUPLE) attr(t, Sections) afeed extend[Curve: fun(u: TUPLE) attr(t, NodeList_r) afeed sortby[NodeCounter_r desc] filter[.NodeCounter_r between[attr(u, TargetNodeCounter), attr(u, SourceNodeCounter)] ] projecttransformstream[Pos_r] collect_sline[TRUE], RoadName: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "name"] extract [WayTagValue_r], RoadType: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "highway"] extract [WayTagValue_r], MaxSpeed: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "maxspeed"] extract [WayTagValue_r] ] aconsume ] unnest[Sections] consume # 372 secs # Q9 parallel let NodesDesc_WayId_dlo = Nodes_WayId_flist hadoopReduce[WayId, DLO, CLUSTER_SIZE; . nest[WayId; SectionNodes] projectextend[WayId; Sections: .SectionNodes afeed sortby[NodeCounter desc] extend_last[Source: ..NodeIdNew::0, Target: .NodeIdNew::0, SourcePos: ..Pos::[const point value undef], TargetPos: .Pos::[const point value undef], SourceNodeCounter: ..NodeCounter::0, TargetNodeCounter: .NodeCounter::0] filter[.Source # 0] project[Source, Target, SourcePos, TargetPos, SourceNodeCounter, TargetNodeCounter] aconsume] consume] # 46.9466sec # CA 1:22min (81.5328sec) let EdgesDown_WayId_flist = NodesDesc_WayId_dlo hadoopMap[DLF; . feed para(Roads_WayId_dlo) feed filter[.WayInfo afeed filter[.WayTagKey = "oneway"] filter[(.WayTagValue = "yes")] count = 0] {r} itHashJoin[WayId, WayId_r] projectextend[WayId; Sections: fun(t:TUPLE) attr(t, Sections) afeed extend[Curve: fun(u: TUPLE) attr(t, NodeList_r) afeed sortby[NodeCounter_r desc] filter[.NodeCounter_r between[attr(u, TargetNodeCounter), attr(u, SourceNodeCounter)] ] projecttransformstream[Pos_r] collect_sline[TRUE], RoadName: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "name"] extract [WayTagValue_r], RoadType: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "highway"] extract [WayTagValue_r], MaxSpeed: attr(t, WayInfo_r) afeed filter[.WayTagKey_r = "maxspeed"] extract [WayTagValue_r] ] aconsume ] unnest[Sections] ] # 53.3496sec # 453944 correct # CA 3:48min (228.05sec) # query EdgesDown_WayId_flist hadoopMap[DLF; . count feed transformstream ] collect[] sum[Elem] # correct # Q10 let Edges = EdgesUp feed EdgesDown feed concat projectextend[Source, Target, SourcePos, TargetPos, SourceNodeCounter, TargetNodeCounter, Curve, RoadName, RoadType, MaxSpeed; WayId: .WayId] oconsume[Source, Target] # 169 secs # Q10 parallel # correct way (CLUSTER_SIZE parameter omitted, is default) let Edges_WayId_dlo = EdgesUp_WayId_flist EdgesDown_WayId_flist hadoopReduce2[WayId, WayId, DLO; . .. concat projectextend[Source, Target, SourcePos, TargetPos, SourceNodeCounter, TargetNodeCounter, Curve, RoadName, RoadType, MaxSpeed; WayId: .WayId] oconsume[Source, Target] ] # 50.6366sec # 934081 correct # CA 1:46min (105.711sec) # query Edges_WayId_dlo hadoopMap[DLF; . feed count feed transformstream ] collect[] sum[Elem] let EdgeIndex = Edges feed projectextend[Source, Target, Curve, SourcePos; Box: bbox(.Curve)] filter[isdefined(.Box)] extend[Box2: bbox(.SourcePos) scale[1000000.0]] sortby[Box2] project[Source, Target, Curve, Box] consume # 99 secs # query nodes(EdgeIndex feed addid bulkloadrtree[Box]) consume derive EdgeIndex_Box_rtree = EdgeIndex feed addid bulkloadrtree[Box] # 9 secs # Queries # # Get edges from a restricted area: # let hombruch = [const rect value (7.419 7.457 51.462 51.484)] # query EdgeIndex_Box_rtree EdgeIndex windowintersects[hombruch] remove[Box] # loopsel[Edges orange[.Source, .Target; .Source, .Target]] consume # 2.17 secs, 3.83, 2.02 # query Ways_Curve_rtree windowintersectsS[hombruch] Ways gettuples remove[NodeList] consume # 5.08 secs, 4.03, 5.66 # Runtime for Scripts/OrderedRelationGraphFromFullOSMImport.SEC: Times (elapsed / cpu): # 84:56min (5095.72sec) /2510.84sec = 2.02949 # CA Runtime for Scripts/OrderedRelationGraphFromFullOSMImportParallelOnly.SEC: Times (elapsed / # cpu): 164:19min (9859sec) /0.06sec = 164317