Files
secondo/bin/Scripts/CreatePregelDblpPGraph2Part1.psec

92 lines
2.5 KiB
Plaintext
Raw Normal View History

2026-01-23 17:03:45 +08:00
/*
Script to create a distributed property graph for the dblp dataset.
Run this script without transactions, hence with SecondoTTNT.
1 Preparations
Get the dblp data.
Within a directory, say /home/ralf/Daten, execute:
----
wget https://dblp.uni-trier.de/xml/dblp.xml.gz
gunzip dblp.xml.gz
----
Start monitors.
2 Set Up a Database
2.1 Import DBLP Data
*/
create database dblppregel0820;
open database dblppregel0820;
let Document_raw2 = '/home/ralf/Daten/dblp.xml'
dblpimport['/home/ralf/Daten/Stopwords.txt']
# restore Document_raw2 from Document_raw2
/*
2.2 Create Relations on Master
*/
{
let DocumentP = Document_raw2 feed remove[AuthorsList, Keywords] consume
| let KeywordP = Document_raw2 feed project[Keywords] unnest[Keywords] rduph[]
renameAttr[Word: Keyword]
addcounter[Wordid, 1] consume
| let HAS_KEYWORDp = Document_raw2 feed project[Docid, Keywords]
unnest[Keywords] {d}
KeywordP feed itHashJoin[Keyword_d, Word]
projectextend[Wordid; Docid: .Docid_d]
project[Docid, Wordid]
consume
| let AuthorP = Document_raw2 feed project[AuthorsList]
unnest[AuthorsList] rduph[]
addcounter[Authorid, 1] consume
| let WROTEp = Document_raw2 feed project[Docid, AuthorsList]
unnest[AuthorsList] {d}
AuthorP feed itHashJoin[Name_d, Name]
projectextend[Authorid; Docid: .Docid_d]
consume
| let JournalsP = Document_raw2 feed project[Journal] filter[.Journal # ""]
renameAttr[Name: Journal]
rduph[]
addcounter[Journalid, 1] consume
| let AT_JOURNALp = Document_raw2 feed filter[.Type = "article"] {d}
project[Docid_d, Journal_d]
JournalsP feed itHashJoin[Journal_d, Name]
renameAttr[Docid: Docid_d]
project[Docid, Journalid]
consume
| let ConferenceP = Document_raw2 feed filter[.Type = "inproceedings"]
project[Booktitle]
renameAttr[Name: Booktitle]
rduph[]
addcounter[Conferenceid, 1] consume
| let AT_CONFp = Document_raw2 feed {d} project[Docid_d, Booktitle_d]
ConferenceP feed itHashJoin[Booktitle_d, Name]
renameAttr[Docid: Docid_d]
project[Docid, Conferenceid]
consume
| let PublisherP = Document_raw2 feed filter[isdefined(.Publisher)]
project[Publisher]
renameAttr[Name: Publisher]
rduph[]
addcounter[Publisherid, 1] consume
| let PUBLISHED_BYp = Document_raw2 feed {d} project[Docid_d, Publisher_d]
PublisherP feed itHashJoin[Publisher_d, Name]
renameAttr[Docid: Docid_d]
project[Docid, Publisherid]
consume
}
close database