62 lines
1.8 KiB
Plaintext
62 lines
1.8 KiB
Plaintext
|
|
delete database pagerank;
|
||
|
|
create database pagerank;
|
||
|
|
open database pagerank;
|
||
|
|
|
||
|
|
#
|
||
|
|
# Dampening factor
|
||
|
|
#
|
||
|
|
|
||
|
|
let D = 0.85;
|
||
|
|
|
||
|
|
#
|
||
|
|
# Extended worker relation
|
||
|
|
#
|
||
|
|
|
||
|
|
restore Workers from TwoTwoWorkers;
|
||
|
|
let WorkerNumber = Workers count;
|
||
|
|
|
||
|
|
#
|
||
|
|
# Partition function
|
||
|
|
#
|
||
|
|
|
||
|
|
let PartitionFunction = fun (id: int) (hashvalue(id, WorkerNumber));
|
||
|
|
|
||
|
|
#
|
||
|
|
# Edges of the Graph
|
||
|
|
#
|
||
|
|
|
||
|
|
restore Links from LinksStanfordNoDangling;
|
||
|
|
|
||
|
|
#
|
||
|
|
# Remove dangling Links => Might take a while on big graphs, you may want to check to long an iteration takes first.
|
||
|
|
# You can also see the rate at which it's converging pretty quickly.
|
||
|
|
# The Stanford dataset with ~2.3 million edges took 50-ish iterations of 1:30min each to complete
|
||
|
|
#
|
||
|
|
|
||
|
|
let RemoveDanglingLinks = fun (links: rel(tuple([Source: int, Target: int]))) (links feed sortby[Target] links feed project[Target] sort rdup links feed projectextend[;Target: .Source] sort rdup kmergediff kmergediff);
|
||
|
|
|
||
|
|
#let NumberLinks = 100000000;
|
||
|
|
#while Links count < NumberLinks do { update NumberLinks := Links count | update Links := RemoveDanglingLinks(Links) consume | query Links count } endwhile;
|
||
|
|
#delete NumberLinks;
|
||
|
|
|
||
|
|
|
||
|
|
#
|
||
|
|
# Vertices of the Graph
|
||
|
|
#
|
||
|
|
|
||
|
|
let PagesPersistent = Links feed projectextend[;Id: .Source, Value: 0.0] sort rdup consume;
|
||
|
|
let N = PagesPersistent count;
|
||
|
|
let Minimum = ((1 - D) / N);
|
||
|
|
|
||
|
|
update PagesPersistent := PagesPersistent feed projectextend[Id; Value: 1.0 / N] consume;
|
||
|
|
|
||
|
|
let Pages = PagesPersistent feed mconsume;
|
||
|
|
|
||
|
|
|
||
|
|
#
|
||
|
|
# Connect Edge Pages => Leads to WAY! more Links, so use with caution
|
||
|
|
#
|
||
|
|
|
||
|
|
let ConnectEdgePages = fun (pages: rel(tuple([Id: int, Value: real])), links: rel(tuple([Source: int, Target: int]))) (links feed project[Target] sort links feed projectextend[;Target: .Source] sort mergediff projectextend[; Source: .Target] loopjoin[pages feed projectextend[; Target: .Id]]);
|
||
|
|
|
||
|
|
# update Links := Links feed ConnectEdgePages(PagesPersistent, Links) concat sortby[Source] consume;
|