Files
secondo/Algebras/DistributedClustering/Script/00_distributedClustering_Orte2.SEC
2026-01-23 17:03:45 +08:00

177 lines
6.4 KiB
Plaintext

# Script to Cluster Orte2 from database Berlintest
open database berlintest;
#create workers
delete Workers;
let Workers = [const rel(tuple([Host: text, Port: int, Config: text])) value
(
('132.176.69.181' 59801 'SecondoConfig.ini')
('132.176.69.181' 59802 'SecondoConfig.ini')
('132.176.69.182' 59803 'SecondoConfig.ini')
('132.176.69.182' 59804 'SecondoConfig.ini')
('132.176.69.183' 59805 'SecondoConfig.ini')
('132.176.69.183' 59806 'SecondoConfig.ini')
('132.176.69.184' 59807 'SecondoConfig.ini')
('132.176.69.184' 59808 'SecondoConfig.ini')
('132.176.69.185' 59809 'SecondoConfig.ini')
('132.176.69.185' 59810 'SecondoConfig.ini')
('132.176.69.186' 59811 'SecondoConfig.ini')
('132.176.69.186' 59812 'SecondoConfig.ini')
)];
#set some variables
delete CntWorkers;
let CntWorkers = 12;
#distribute data round robin ##################################################
let DistArray = Orte2 feed
dfdistribute3["DistArray", 12 , TRUE, Workers];
############## SAMPLING #######################################################
#create samples on each worker
#create Sample files #############################
# chosse each rhoTh element rho=(m/ln(n*t)) ##
# s... s=m/rho SampleSize ##
# t... count of machines ##
# n... count of objects ##
# m... m = n/t count of objects at each machine ##
##################################################
#distribute sample files
# create single sample
let SingSampArr=
DistArray dmap["SingSampArr", . head[real2int(
floor(logB(Orte2 count * 12, 2.718281828)))]
intstream(0,12 - 1) namedtransformstream[WidSamp]
product];
let SampMatr= SingSampArr
partition["SampMatr", .WidSamp ,12];
let SampArray = SampMatr
areduce["SampArray", . consume, 1236];
############ start distributed Sampling and redistribute data #################
let DistMat = DistArray SampArray dmap2["", . ..
distsamp[GeoData, WID, 12] , 91234]
partition[ "DistMat",.WID, 12 ];
############### reduce with DbDacScan #########################################
let DbScan = DistMat areduce["DbScan", .
dbdacscan [GeoData, ClusterId,
secondoHome() + '/dfarrays/BERLINTEST/DbScan/NeighborFile.bin'
,5000.0,5] , 56874 ];
###############################################################################
# set some auxiliary variables to start merging clusters ######################
delete HelpInt;
let HelpInt = 0;
delete AktRound;
let AktRound = 0;
delete LastRound;
let LastRound = real2int(ceil(logB(CntWorkers,2)));
delete AktWorker;
let AktWorker = 0;
########### start workers for under level of distributed Algebra ##############
query Workers feed extend
[Started: connect(.Host, .Port, .Config)]consume;
#open databases
query intstream(0,CntWorkers - 1) namedtransformstream[Server] extend[ W :
'open database berlintest'] prcmd[Server,W] consume;
#rename Files - so every Worker has the same Filename
update AktWorker := 0;
while AktWorker < CntWorkers do
{
query rquery (AktWorker,'query moveFile(secondoHome() +
"/dfarrays/BERLINTEST/DbScan/DbScan_" + "'
+ num2string(AktWorker) + '" + ".bin",
secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin")' ) |
update AktWorker := AktWorker + 1
}endwhile;
############### begin DistMerge ###############################################
while AktRound < LastRound do
{
# send file to lower left neighbors ( 0 - n/2-1)
update HelpInt := 0 |
update AktWorker := real2int(pow (2,AktRound)) |
while AktWorker < CntWorkers do
{
query transferFile(AktWorker, HelpInt,9321,
rquery(AktWorker,"query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/DbScan.bin',
rquery(HelpInt,"query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/DbScan.bin_1') |
query transferFile(AktWorker, HelpInt,9321,
rquery(AktWorker,"query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/NeighborFile.bin',
rquery(HelpInt, "query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/NeighborFile.bin_1') |
query rquery(AktWorker,
'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin")') |
query rquery(AktWorker,
'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin")') |
update HelpInt := HelpInt + real2int(pow (2,AktRound + 1)) |
update AktWorker := AktWorker + real2int(pow (2,AktRound + 1))
}
endwhile |
################### execute distclmerge #######################################
query intstream(0,CntWorkers - 1)
filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0]
pquery['query distclmerge(
secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin" ,
secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin" ,
secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin_1" ,
secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin_1" ,
GeoData, ClusterId, 5000.0,5,
secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin" ,
secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin"
) count' ,0] consume |
# remove Files
query intstream(0,CntWorkers - 1)
filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0]
pquery[
'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin_1")'
,0] consume |
query intstream(0,CntWorkers - 1)
filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0]
pquery[
'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin_1")'
,0] consume |
update AktRound := AktRound + 1
}
endwhile ;
##################### end DistMerge ###########################################
############################## Results: #######################################
query rquery(0,'query ffeed5(secondoHome()
+ "/dfarrays/BERLINTEST/DbScan/DbScan.bin") sortby[ClusterId]
groupby[ClusterId ; C : group count] count');
query rquery(0,'query ffeed5(secondoHome()
+ "/dfarrays/BERLINTEST/DbScan/DbScan.bin") sortby[ClusterId]
groupby[ClusterId ; C : group count] consume');
################remove files #######################################
query rquery(0,'query removeFile(secondoHome()
+ "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin")');
# delete old Files
query deleteRemoteObjects(DistArray);
delete DistArray;
query deleteRemoteObjects(SampArray);
delete SampArray;
query deleteRemoteObjects(SampMatr);
delete SampMatr;
query deleteRemoteObjects(SingSampArr);
delete SingSampArr;
query deleteRemoteObjects(DbScan);
delete DbScan;
query deleteRemoteObjects(DistMat);
delete DistMat;