Files
2026-01-23 17:03:45 +08:00

62 lines
1.8 KiB
Plaintext

delete database pagerank;
create database pagerank;
open database pagerank;
#
# Dampening factor
#
let D = 0.85;
#
# Extended worker relation
#
restore Workers from TwoTwoWorkers;
let WorkerNumber = Workers count;
#
# Partition function
#
let PartitionFunction = fun (id: int) (hashvalue(id, WorkerNumber));
#
# Edges of the Graph
#
restore Links from LinksStanfordNoDangling;
#
# Remove dangling Links => Might take a while on big graphs, you may want to check to long an iteration takes first.
# You can also see the rate at which it's converging pretty quickly.
# The Stanford dataset with ~2.3 million edges took 50-ish iterations of 1:30min each to complete
#
let RemoveDanglingLinks = fun (links: rel(tuple([Source: int, Target: int]))) (links feed sortby[Target] links feed project[Target] sort rdup links feed projectextend[;Target: .Source] sort rdup kmergediff kmergediff);
#let NumberLinks = 100000000;
#while Links count < NumberLinks do { update NumberLinks := Links count | update Links := RemoveDanglingLinks(Links) consume | query Links count } endwhile;
#delete NumberLinks;
#
# Vertices of the Graph
#
let PagesPersistent = Links feed projectextend[;Id: .Source, Value: 0.0] sort rdup consume;
let N = PagesPersistent count;
let Minimum = ((1 - D) / N);
update PagesPersistent := PagesPersistent feed projectextend[Id; Value: 1.0 / N] consume;
let Pages = PagesPersistent feed mconsume;
#
# Connect Edge Pages => Leads to WAY! more Links, so use with caution
#
let ConnectEdgePages = fun (pages: rel(tuple([Id: int, Value: real])), links: rel(tuple([Source: int, Target: int]))) (links feed project[Target] sort links feed projectextend[;Target: .Source] sort mergediff projectextend[; Source: .Target] loopjoin[pages feed projectextend[; Target: .Id]]);
# update Links := Links feed ConnectEdgePages(PagesPersistent, Links) concat sortby[Source] consume;