92 lines
2.5 KiB
Plaintext
92 lines
2.5 KiB
Plaintext
/*
|
|
Script to create a distributed property graph for the dblp dataset.
|
|
|
|
Run this script without transactions, hence with SecondoTTNT.
|
|
|
|
1 Preparations
|
|
|
|
Get the dblp data.
|
|
|
|
Within a directory, say /home/ralf/Daten, execute:
|
|
|
|
----
|
|
wget https://dblp.uni-trier.de/xml/dblp.xml.gz
|
|
gunzip dblp.xml.gz
|
|
----
|
|
|
|
Start monitors.
|
|
|
|
2 Set Up a Database
|
|
|
|
2.1 Import DBLP Data
|
|
|
|
*/
|
|
|
|
create database dblppregel0820;
|
|
|
|
open database dblppregel0820;
|
|
|
|
let Document_raw2 = '/home/ralf/Daten/dblp.xml'
|
|
dblpimport['/home/ralf/Daten/Stopwords.txt']
|
|
|
|
# restore Document_raw2 from Document_raw2
|
|
|
|
/*
|
|
2.2 Create Relations on Master
|
|
|
|
*/
|
|
|
|
{
|
|
let DocumentP = Document_raw2 feed remove[AuthorsList, Keywords] consume
|
|
| let KeywordP = Document_raw2 feed project[Keywords] unnest[Keywords] rduph[]
|
|
renameAttr[Word: Keyword]
|
|
addcounter[Wordid, 1] consume
|
|
| let HAS_KEYWORDp = Document_raw2 feed project[Docid, Keywords]
|
|
unnest[Keywords] {d}
|
|
KeywordP feed itHashJoin[Keyword_d, Word]
|
|
projectextend[Wordid; Docid: .Docid_d]
|
|
project[Docid, Wordid]
|
|
consume
|
|
| let AuthorP = Document_raw2 feed project[AuthorsList]
|
|
unnest[AuthorsList] rduph[]
|
|
addcounter[Authorid, 1] consume
|
|
| let WROTEp = Document_raw2 feed project[Docid, AuthorsList]
|
|
unnest[AuthorsList] {d}
|
|
AuthorP feed itHashJoin[Name_d, Name]
|
|
projectextend[Authorid; Docid: .Docid_d]
|
|
consume
|
|
| let JournalsP = Document_raw2 feed project[Journal] filter[.Journal # ""]
|
|
renameAttr[Name: Journal]
|
|
rduph[]
|
|
addcounter[Journalid, 1] consume
|
|
| let AT_JOURNALp = Document_raw2 feed filter[.Type = "article"] {d}
|
|
project[Docid_d, Journal_d]
|
|
JournalsP feed itHashJoin[Journal_d, Name]
|
|
renameAttr[Docid: Docid_d]
|
|
project[Docid, Journalid]
|
|
consume
|
|
| let ConferenceP = Document_raw2 feed filter[.Type = "inproceedings"]
|
|
project[Booktitle]
|
|
renameAttr[Name: Booktitle]
|
|
rduph[]
|
|
addcounter[Conferenceid, 1] consume
|
|
| let AT_CONFp = Document_raw2 feed {d} project[Docid_d, Booktitle_d]
|
|
ConferenceP feed itHashJoin[Booktitle_d, Name]
|
|
renameAttr[Docid: Docid_d]
|
|
project[Docid, Conferenceid]
|
|
consume
|
|
| let PublisherP = Document_raw2 feed filter[isdefined(.Publisher)]
|
|
project[Publisher]
|
|
renameAttr[Name: Publisher]
|
|
rduph[]
|
|
addcounter[Publisherid, 1] consume
|
|
| let PUBLISHED_BYp = Document_raw2 feed {d} project[Docid_d, Publisher_d]
|
|
PublisherP feed itHashJoin[Publisher_d, Name]
|
|
renameAttr[Docid: Docid_d]
|
|
project[Docid, Publisherid]
|
|
consume
|
|
}
|
|
|
|
close database
|
|
|