# Script to Cluster Orte2 from database Berlintest open database berlintest; #create workers delete Workers; let Workers = [const rel(tuple([Host: text, Port: int, Config: text])) value ( ('132.176.69.181' 59801 'SecondoConfig.ini') ('132.176.69.181' 59802 'SecondoConfig.ini') ('132.176.69.182' 59803 'SecondoConfig.ini') ('132.176.69.182' 59804 'SecondoConfig.ini') ('132.176.69.183' 59805 'SecondoConfig.ini') ('132.176.69.183' 59806 'SecondoConfig.ini') ('132.176.69.184' 59807 'SecondoConfig.ini') ('132.176.69.184' 59808 'SecondoConfig.ini') ('132.176.69.185' 59809 'SecondoConfig.ini') ('132.176.69.185' 59810 'SecondoConfig.ini') ('132.176.69.186' 59811 'SecondoConfig.ini') ('132.176.69.186' 59812 'SecondoConfig.ini') )]; #set some variables delete CntWorkers; let CntWorkers = 12; #distribute data round robin ################################################## let DistArray = Orte2 feed dfdistribute3["DistArray", 12 , TRUE, Workers]; ############## SAMPLING ####################################################### #create samples on each worker #create Sample files ############################# # chosse each rhoTh element rho=(m/ln(n*t)) ## # s... s=m/rho SampleSize ## # t... count of machines ## # n... count of objects ## # m... m = n/t count of objects at each machine ## ################################################## #distribute sample files # create single sample let SingSampArr= DistArray dmap["SingSampArr", . head[real2int( floor(logB(Orte2 count * 12, 2.718281828)))] intstream(0,12 - 1) namedtransformstream[WidSamp] product]; let SampMatr= SingSampArr partition["SampMatr", .WidSamp ,12]; let SampArray = SampMatr areduce["SampArray", . consume, 1236]; ############ start distributed Sampling and redistribute data ################# let DistMat = DistArray SampArray dmap2["", . .. distsamp[GeoData, WID, 12] , 91234] partition[ "DistMat",.WID, 12 ]; ############### reduce with DbDacScan ######################################### let DbScan = DistMat areduce["DbScan", . dbdacscan [GeoData, ClusterId, secondoHome() + '/dfarrays/BERLINTEST/DbScan/NeighborFile.bin' ,5000.0,5] , 56874 ]; ############################################################################### # set some auxiliary variables to start merging clusters ###################### delete HelpInt; let HelpInt = 0; delete AktRound; let AktRound = 0; delete LastRound; let LastRound = real2int(ceil(logB(CntWorkers,2))); delete AktWorker; let AktWorker = 0; ########### start workers for under level of distributed Algebra ############## query Workers feed extend [Started: connect(.Host, .Port, .Config)]consume; #open databases query intstream(0,CntWorkers - 1) namedtransformstream[Server] extend[ W : 'open database berlintest'] prcmd[Server,W] consume; #rename Files - so every Worker has the same Filename update AktWorker := 0; while AktWorker < CntWorkers do { query rquery (AktWorker,'query moveFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan_" + "' + num2string(AktWorker) + '" + ".bin", secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin")' ) | update AktWorker := AktWorker + 1 }endwhile; ############### begin DistMerge ############################################### while AktRound < LastRound do { # send file to lower left neighbors ( 0 - n/2-1) update HelpInt := 0 | update AktWorker := real2int(pow (2,AktRound)) | while AktWorker < CntWorkers do { query transferFile(AktWorker, HelpInt,9321, rquery(AktWorker,"query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/DbScan.bin', rquery(HelpInt,"query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/DbScan.bin_1') | query transferFile(AktWorker, HelpInt,9321, rquery(AktWorker,"query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/NeighborFile.bin', rquery(HelpInt, "query secondoHome()") + '/dfarrays/BERLINTEST/DbScan/NeighborFile.bin_1') | query rquery(AktWorker, 'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin")') | query rquery(AktWorker, 'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin")') | update HelpInt := HelpInt + real2int(pow (2,AktRound + 1)) | update AktWorker := AktWorker + real2int(pow (2,AktRound + 1)) } endwhile | ################### execute distclmerge ####################################### query intstream(0,CntWorkers - 1) filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0] pquery['query distclmerge( secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin" , secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin" , secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin_1" , secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin_1" , GeoData, ClusterId, 5000.0,5, secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin" , secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin" ) count' ,0] consume | # remove Files query intstream(0,CntWorkers - 1) filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0] pquery[ 'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin_1")' ,0] consume | query intstream(0,CntWorkers - 1) filterintstream[(. mod real2int(pow (2,AktRound + 1))) = 0] pquery[ 'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin_1")' ,0] consume | update AktRound := AktRound + 1 } endwhile ; ##################### end DistMerge ########################################### ############################## Results: ####################################### query rquery(0,'query ffeed5(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin") sortby[ClusterId] groupby[ClusterId ; C : group count] count'); query rquery(0,'query ffeed5(secondoHome() + "/dfarrays/BERLINTEST/DbScan/DbScan.bin") sortby[ClusterId] groupby[ClusterId ; C : group count] consume'); ################remove files ####################################### query rquery(0,'query removeFile(secondoHome() + "/dfarrays/BERLINTEST/DbScan/NeighborFile.bin")'); # delete old Files query deleteRemoteObjects(DistArray); delete DistArray; query deleteRemoteObjects(SampArray); delete SampArray; query deleteRemoteObjects(SampMatr); delete SampMatr; query deleteRemoteObjects(SingSampArr); delete SingSampArr; query deleteRemoteObjects(DbScan); delete DbScan; query deleteRemoteObjects(DistMat); delete DistMat;