177 lines
6.1 KiB
Plaintext
177 lines
6.1 KiB
Plaintext
# Script to Cluster Places from database Berlintest
|
|
open database nrw;
|
|
|
|
#create workers
|
|
delete Workers;
|
|
let Workers = [const rel(tuple([Host: text, Port: int, Config: text])) value
|
|
(
|
|
('132.176.69.181' 59801 'SecondoConfig.ini')
|
|
('132.176.69.181' 59802 'SecondoConfig.ini')
|
|
('132.176.69.182' 59803 'SecondoConfig.ini')
|
|
('132.176.69.182' 59804 'SecondoConfig.ini')
|
|
|
|
('132.176.69.183' 59805 'SecondoConfig.ini')
|
|
('132.176.69.183' 59806 'SecondoConfig.ini')
|
|
('132.176.69.184' 59807 'SecondoConfig.ini')
|
|
('132.176.69.184' 59808 'SecondoConfig.ini')
|
|
|
|
('132.176.69.185' 59809 'SecondoConfig.ini')
|
|
('132.176.69.185' 59810 'SecondoConfig.ini')
|
|
('132.176.69.186' 59811 'SecondoConfig.ini')
|
|
('132.176.69.186' 59812 'SecondoConfig.ini')
|
|
)];
|
|
|
|
#set some variables
|
|
delete CntWorkers;
|
|
let CntWorkers = 12;
|
|
|
|
#distribute data round robin ##################################################
|
|
let DistArray = Places feed
|
|
dfdistribute3["DistArray", 12 , TRUE, Workers];
|
|
|
|
############## SAMPLING #######################################################
|
|
#create samples on each worker
|
|
#create Sample files #############################
|
|
# chosse each rhoTh element rho=(m/ln(n*t)) ##
|
|
# s... s=m/rho SampleSize ##
|
|
# t... count of machines ##
|
|
# n... count of objects ##
|
|
# m... m = n/t count of objects at each machine ##
|
|
##################################################
|
|
|
|
#distribute sample files
|
|
# create single sample
|
|
let SingSampArr=
|
|
DistArray dmap["SingSampArr", . head[real2int(
|
|
floor(logB(Places count * 12, 2.718281828)))]
|
|
intstream(0,12 - 1) namedtransformstream[WidSamp]
|
|
product];
|
|
|
|
let SampMatr= SingSampArr
|
|
partition["SampMatr", .WidSamp ,12];
|
|
|
|
let SampArray = SampMatr
|
|
areduce["SampArray", . consume, 1236];
|
|
|
|
############ start distributed Sampling and redistribute data #################
|
|
let DistMat = DistArray SampArray dmap2["", . ..
|
|
distsamp[GeoData, WID, 12] , 91234]
|
|
partition[ "DistMat",.WID, 12 ];
|
|
|
|
|
|
############### reduce with DbDacScan #########################################
|
|
let DbScan = DistMat areduce["DbScan", .
|
|
dbdacscan [GeoData, ClusterId,
|
|
secondoHome() + '/dfarrays/NRW/DbScan/NeighborFile.bin'
|
|
,0.09,5] , 56874 ];
|
|
|
|
###############################################################################
|
|
|
|
# set some auxiliary variables to start merging clusters ######################
|
|
delete HelpInt;
|
|
let HelpInt = 0;
|
|
delete AktRound;
|
|
let AktRound = 0;
|
|
delete LastRound;
|
|
let LastRound = real2int(ceil(logB(CntWorkers,2)));
|
|
delete AktWorker;
|
|
let AktWorker = 0;
|
|
|
|
########### start workers for under level of distributed Algebra ##############
|
|
query Workers feed extend
|
|
[Started: connect(.Host, .Port, .Config)]consume;
|
|
|
|
#open databases
|
|
query intstream(0,CntWorkers - 1) namedtransformstream[Server] extend[ W :
|
|
'open database nrw'] prcmd[Server,W] consume;
|
|
|
|
#rename Files - so every Worker has the same Filename
|
|
update AktWorker := 0;
|
|
while AktWorker < CntWorkers do
|
|
{
|
|
query rquery (AktWorker,'query moveFile(secondoHome() +
|
|
"/dfarrays/NRW/DbScan/DbScan_" + "'
|
|
+ num2string(AktWorker) + '" + ".bin",
|
|
secondoHome() + "/dfarrays/NRW/DbScan/DbScan.bin")' ) |
|
|
update AktWorker := AktWorker + 1
|
|
}endwhile;
|
|
|
|
############### begin DistMerge ###############################################
|
|
while AktRound < LastRound do
|
|
{
|
|
# send file to lower left neighbors ( 0 - n/2-1)
|
|
update HelpInt := 0 |
|
|
update AktWorker := real2int(pow (2,AktRound)) |
|
|
while AktWorker < CntWorkers do
|
|
{
|
|
query transferFile(AktWorker, HelpInt,9321,
|
|
rquery(AktWorker,"query secondoHome()") + '/dfarrays/NRW/DbScan/DbScan.bin',
|
|
rquery(HelpInt,"query secondoHome()") + '/dfarrays/NRW/DbScan/DbScan.bin_1') |
|
|
query transferFile(AktWorker, HelpInt,9321,
|
|
rquery(AktWorker,"query secondoHome()") + '/dfarrays/NRW/DbScan/NeighborFile.bin',
|
|
rquery(HelpInt, "query secondoHome()") + '/dfarrays/NRW/DbScan/NeighborFile.bin_1') |
|
|
query rquery(AktWorker,
|
|
'query removeFile(secondoHome() + "/dfarrays/NRW/DbScan/DbScan.bin")') |
|
|
query rquery(AktWorker,
|
|
'query removeFile(secondoHome() + "/dfarrays/NRW/DbScan/NeighborFile.bin")') |
|
|
update HelpInt := HelpInt + real2int(pow (2,AktRound + 1)) |
|
|
update AktWorker := AktWorker + real2int(pow (2,AktRound + 1))
|
|
}
|
|
endwhile |
|
|
################### execute distclmerge #######################################
|
|
query intstream(0,CntWorkers - 1)
|
|
filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0]
|
|
pquery['query distclmerge(
|
|
secondoHome() + "/dfarrays/NRW/DbScan/DbScan.bin" ,
|
|
secondoHome() + "/dfarrays/NRW/DbScan/NeighborFile.bin" ,
|
|
secondoHome() + "/dfarrays/NRW/DbScan/DbScan.bin_1" ,
|
|
secondoHome() + "/dfarrays/NRW/DbScan/NeighborFile.bin_1" ,
|
|
GeoData, ClusterId, 0.09,5,
|
|
secondoHome() + "/dfarrays/NRW/DbScan/DbScan.bin" ,
|
|
secondoHome() + "/dfarrays/NRW/DbScan/NeighborFile.bin"
|
|
) count' ,0] consume |
|
|
# remove Files
|
|
query intstream(0,CntWorkers - 1)
|
|
filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0]
|
|
pquery[
|
|
'query removeFile(secondoHome() + "/dfarrays/NRW/DbScan/DbScan.bin_1")'
|
|
,0] consume |
|
|
query intstream(0,CntWorkers - 1)
|
|
filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0]
|
|
pquery[
|
|
'query removeFile(secondoHome() + "/dfarrays/NRW/DbScan/NeighborFile.bin_1")'
|
|
,0] consume |
|
|
update AktRound := AktRound + 1
|
|
}
|
|
endwhile ;
|
|
##################### end DistMerge ###########################################
|
|
|
|
|
|
############################## Results: #######################################
|
|
|
|
query rquery(0,'query ffeed5(secondoHome()
|
|
+ "/dfarrays/NRW/DbScan/DbScan.bin") sortby[ClusterId]
|
|
groupby[ClusterId ; C : group count] count');
|
|
|
|
|
|
|
|
|
|
|
|
################remove files #######################################
|
|
query rquery(0,'query removeFile(secondoHome()
|
|
+ "/dfarrays/NRW/DbScan/NeighborFile.bin")');
|
|
|
|
# delete old Files
|
|
query deleteRemoteObjects(DistArray);
|
|
delete DistArray;
|
|
query deleteRemoteObjects(SampArray);
|
|
delete SampArray;
|
|
query deleteRemoteObjects(SampMatr);
|
|
delete SampMatr;
|
|
query deleteRemoteObjects(SingSampArr);
|
|
delete SingSampArr;
|
|
query deleteRemoteObjects(DbScan);
|
|
delete DbScan;
|
|
query deleteRemoteObjects(DistMat);
|
|
delete DistMat;
|