Files
secondo/bin/Scripts/PregelConnectedComponentsPart1.psec

271 lines
7.3 KiB
Plaintext
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
//paragraph [10] Title: [{\Large \bf ] [}]
//[@] [\verb+@+]
//[%] [\%]
//[&] [\&]
//[ue] [\"{u}]
[10] Computing Strongly Connected Components in Pregel
Part 1
Ralf Hartmut G[ue]ting, March 12, 2019
Run this script with ~SecondoTTYBDB~ and prefix [@][%] or [@][&]. Part 1 builds the graph and defines the computing function. Part 2 computes connected components and can be called repeatedly.
Data needed are
1 a relation with edges of the graph called ~EdgesA~ as it is created by script ~OrderedRelationGraphFromFullOSMImport.SEC~,
2 a relation ~Workers~ defining workers in the Pregel format. An example tuple is
----
Host : 132.176.69.75
Port : 1783
Config : SecondoConfig.ini
MessageServerPort : 1883
----
1 Create a Graph
*/
# Database must exist and be open
# adapt next line
restore Workers from Workers12Pregel;
let WorkerNum = Workers count;
let PartFun = fun (id: int) (hashvalue(id, WorkerNum));
# adapt one of next lines
# restore EdgesA from EdgesHombruch;
# let EdgesA = "EdgesDortmund.bin" ffeed5 consume;
let EdgesA = "EdgesArnsberg.bin" ffeed5 consume;
update EdgesA := EdgesA feed sortby[Source] consume;
# add undefined curves (for applications in general)
query EdgesA feed filter[not(isdefined(.Curve))]
EdgesA updatedirect[Curve: create_sline(.SourcePos, .TargetPos)]
count
# extend[R: randint(999997)] sortby[R] remove[R]
let NodesPersistent =
EdgesA feed projectextend[; PrelId: .Source, Pos: .SourcePos]
EdgesA feed projectextend[; PrelId: .Target, Pos: .TargetPos]
concat
sort rdup
extend[R: randint(999997)] sortby[R] remove[R]
addcounter[Id, 1]
extend[Partition: PartFun(.Id)]
extend[Color: 0, Active: TRUE]
consume
# (PrelId int) (Pos point) (Id int) (Partition int) (Color int)
# (Active bool)
let Size = (NodesPersistent feed max[Id]) + 1
let EdgesForward = EdgesA feed extend[Cost: size(gk(.Curve))]
project[Source, Target, Cost]
NodesPersistent feed itHashJoin[Source, PrelId]
NodesPersistent feed {n} itHashJoin[Target, PrelId_n]
projectextend[; Source: .Id, PartitionSource: .Partition, Target: .Id_n,
PartitionTarget: .Partition_n, Cost: .Cost]
consume
# (Source, PartitionSource, Target, PartitionTarget, Cost)
let EdgesBackward = EdgesForward feed projectextend[; Source: .Target,
PartitionSource: .PartitionTarget, Target: .Source,
PartitionTarget: .PartitionSource, Cost: .Cost] sortby[Source] consume
# (Source, PartitionSource, Target, PartitionTarget, Cost)
/*
2 Computing Strongly Connected Components
Nodes and edges have schemas:
----
Node(PrelId: int, Pos: point, Id: int, Partition: int, Color: int,
Active: bool)
Edge(Source: int, PartitionSource: int, Target: int, PartitionTarget: int,
Cost: real)
----
Initially nodes are numbered 1, ..., ~n~ in field ~Id~, ~Color~ = 0, ~Active~ = TRUE. Only active nodes receive messages.
2.1 Phase 1: Remove Singleton Components
----
To all nodes: checkSingle()
on checkSingle():
if count(successors) = 0 or count(predecessors) = 0 then
Color := Id;
Active := false;
send(predecessors, checkSingle());
send(successors, checkSingle());
disconnect;
endif
----
2.2 Phase 2: Spread the Maximum Color
----
To all nodes: resetColor()
on resetColor():
Color := 0;
send(successors, newColor(Id));
on newColor(X):
if X > Color then
Color := X;
send(successor, newColor(X))
endif
----
2.3 Phase 3: Spread Back to Predecessors of Maximum
----
To all nodes: maxBack()
on maxBack():
if Color = Id then
send(predecessor, maxColor(Color));
Active := FALSE
endif
on maxColor(X):
if Color = X then send(predecessor, maxColor(X));
Active := FALSE
endif
----
2.4 Control
----
while count(ActiveNodes) = 0 do
Phase 1;
Phase 2;
Phase 3;
endwhile
----
3 The Compute Function
The node programs are implemented in the following ~Compute~ function.
*/
query memclear()
let Nodes = NodesPersistent feed mconsume;
let Forward = EdgesForward feed createmgraph3[Source, Target, Cost, Size];
let Backward = EdgesBackward feed
createmgraph3[Source, Target, Cost, Size];
let InitialMessages = NodesPersistent feed
projectextend[; NodeId: .Id, NodePartition: .Partition]
extend[Message: "newColor", Value: .NodeId]
consume
let NoMessages = fun() InitialMessages feed head[0]
let Compute = fun (messages: stream(tuple([NodeId: int, NodePartition: int,
Message: string, Value: int])))
messages
sortby[NodeId] groupby[NodeId, Message; Max: group feed max[Value]]
Nodes mfeed addid filter[.Active] mergejoin[NodeId, Id]
loopsel[fun(t: TUPLE)
attr(t, Message)
switch[
"checkSingle",
t feed filter[(Forward mg3numsuccessors[.Id] = 0) or
(Backward mg3numsuccessors[.Id] = 0)]
Nodes mupdatedirect2[TID; Color: .Id, Active: FALSE]
loopsel[fun(t2: TUPLE)
Backward mg3successors[attr(t2, Id)]
projectextend[; NodeId: .Target,
NodePartition: PartFun(.Target),
Message: "checkSingle", Value: 0]
Forward mg3successors[attr(t2, Id)]
projectextend[; NodeId: .Target,
NodePartition: PartFun(.Target),
Message: "checkSingle", Value: 0]
concat
t2 feed extend[DisconnectF: Forward mg3disconnect[.Id],
DisconnectB: Backward mg3disconnect[.Id]]
filter[FALSE]
projectextend[; NodeId: .Id, NodePartition: PartFun(.Id),
Message: "checkSingle", Value: 0]
concat
]
; "resetColor",
t feed
Nodes mupdatedirect2[TID; Color: 0]
projectextend[; NodeId: .Id,
NodePartition: PartFun(.Id),
Message: "newColor", Value: .Id]
; "newColor",
t feed filter[.Max > .Color]
Nodes mupdatedirect2[TID; Color: .Max]
loopjoin[Forward mg3successors[.Id]]
projectextend[; NodeId: .Target,
NodePartition: PartFun(.Target),
Message: "newColor", Value: .Color]
; "maxBack",
t feed filter[.Id = .Color]
Nodes mupdatedirect2[TID; Active: FALSE]
loopjoin[Backward mg3successors[.Id]]
projectextend[; NodeId: .Target,
NodePartition: PartFun(.Target),
Message: "maxColor", Value: .Color]
; "maxColor",
t feed filter[.Color = .Max]
Nodes mupdatedirect2[TID; Active: FALSE]
loopjoin[Backward mg3successors[.Id]]
projectextend[; NodeId: .Target,
NodePartition: PartFun(.Target),
Message: "maxColor", Value: .Color]
; NoMessages()]
]
query share("Compute", TRUE, Workers);
/*
4 Distribute Data to Workers
*/
let NodesD = NodesPersistent feed ddistribute4["NodesD", PartFun(.Id),
WorkerNum , Workers];
let NodesSD = NodesD makeSimple[FALSE, "NodesPersistent"];
delete NodesD
let EdgesDf = EdgesForward feed ddistribute4["EdgesDf",
PartFun(.Source), WorkerNum , Workers];
let EdgesSDf = EdgesDf makeSimple[FALSE, "EdgesForward"];
delete EdgesDf
let EdgesDb = EdgesBackward feed ddistribute4["EdgesDb",
PartFun(.Source), WorkerNum , Workers];
let EdgesSDb = EdgesDb makeSimple[FALSE, "EdgesBackward"];
delete EdgesDb
query share("WorkerNum", TRUE, Workers);
query share("PartFun", TRUE, Workers);
query share("Size", TRUE, Workers);
query share("InitialMessages", TRUE, Workers);
query share("NoMessages", TRUE, Workers);
let nl = '
'