Files
secondo/bin/Scripts/DistributedSimilarityClustering.sec
2026-01-23 17:03:45 +08:00

100 lines
3.2 KiB
Plaintext

# Script for Distributed Similarity Clustering as in Paper on Distributed Arrays - Report 381
let sizeT = size(T);
query share("sizeT", TRUE, Workers)
let SS = T dmap["", . feed some[10000 div sizeT]] dsummarize consume
let k = 50;
@&Scripts/SimilarityPartitioning.sec;
let n = PC count;
let MinPts = 10;
let Eps = 100.0;
let wgs84 = create_geoid("WGS1984");
let myPort = ...
query share("PC", TRUE, Workers);
query share("MinPts", TRUE, Workers);
query share("Eps", TRUE, Workers);
query share("wgs84", TRUE, Workers);
query share("n", TRUE, Workers);
query memclear(); query T dcommand['query memclear()'] consume;
query T dcommand['query meminit(3600)'] consume;
query T dlet["PCm", 'PC feed mconsume'] consume;
query T dlet["PCm_Pos_mtree", 'PCm mcreatemtree[Pos, wgs84]'] consume
let V = T
dmap["", . feed
loopjoin[fun(t: TUPLE) PCm_Pos_mtree PCm mdistScan[attr(t, Pos)] head[1]
projectextend[N; Dist: distance(attr(t, Pos), .Pos, wgs84)]]
loopjoin[fun(u: TUPLE) PCm_Pos_mtree PCm mdistRange[attr(u, Pos),
attr(u, Dist) + (2 * Eps)] projectextend[; N2: .N]] ]
partition["", .N2, n]
collectB["V", myPort]
let X = V
dmap["X", $1 feed extend[Pos2: gk(.Pos)] dbscanM[Pos2, CID0, Eps, MinPts]
extend[CID: (.CID0 * n) + $2] consume
]
query T dcommand['query memclear()'] filter[.Ok] count;
let Wm = X dmap["Wm", . feed filter[.N # .N2] mconsume];
let Wm_Pos_mtree = Wm dmap["Wm_Pos_mtree", . mcreatemtree[Pos, wgs84]];
let Neighbors = X Wm_Pos_mtree Wm
dmap3["Neighbors", $1 feed filter[.N = .N2]
loopsel[fun(t: TUPLE) $2 $3 mdistRange[attr(t, Pos), Eps]
projectextend[; P: attr(t, Osm_id), PosP: attr(t, Pos), CID0: attr(t, CID0),
CIDp: attr(t, CID), IsCoreP: attr(t, IsCore), Np: attr(t, N), Q: .Osm_id,
QPos: .Pos]]
, myPort]
let NeighborsByP = Neighbors partition["", hashvalue(.P, 999997), 0]
collect2["NeighborsByP", myPort];
let NeighborsByQ = Neighbors partition["", hashvalue(.Q, 999997), 0]
collect2["NeighborsByQ", myPort];
let Merge = NeighborsByQ NeighborsByP
dmap2["Merge", . feed {n1} .. feed {n2} itHashJoin[Q_n1, P_n2]
filter[.P_n1 = .Q_n2]
filter[.IsCoreP_n1 and .IsCoreP_n2]
project[CIDp_n1, CIDp_n2]
rduph[]
consume, myPort
]
let Assignments = NeighborsByQ NeighborsByP
dmap2["", . feed {n1} .. feed {n2} itHashJoin[Q_n1, P_n2]
filter[.P_n1 = .Q_n2]
filter[.IsCoreP_n1 and not(.IsCoreP_n2)]
projectextend[; P: .P_n2, N: .Np_n2, CID: .CIDp_n1]
krduph[P]
consume, myPort
]
partition["", .N, 0]
collect2["Assignments", myPort]
let MergeM = Merge dsummarize rduph[] createmgraph2[CIDp_n1, CIDp_n2, 1.0];
let MaxCN = X dmap["", . feed max[CID] feed transformstream] dsummarize max[Elem];
let Renumber = MergeM mg2connectedcomponents projectextend[; CID: .CIDp_n1,
CIDnew: .CompNo + MaxCN] rduph[] consume
query share("Renumber", TRUE, Workers);
query X Assignments
dmap2["", $1 feed addid filter[.N = .N2] $2 feed krduph[P] {a}
itHashJoin[Osm_id, P_a] $1 updatedirect2[TID; CID: .CID_a] count, myPort
]
getValue tie[. + ..]
query X
dmap["", $1 feed addid filter[.N = .N2] Renumber feed krduph[CID] {a}
itHashJoin[CID, CID_a] $1 updatedirect2[TID; CID: .CIDnew_a] count
]
getValue tie[. + ..]