Files
secondo/bin/Scripts/DistCost.sec
2026-01-23 17:03:45 +08:00

156 lines
4.6 KiB
Plaintext

/*
----
This file is part of SECONDO.
Copyright (C) 2004-2008, University Hagen, Faculty of Mathematics and
Computer Science, Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//paragraph [10] title: [{\Large \bf ] [}]
//[%] [\%]
[10] Measuring and Displaying Costs in Distributed Computation
This is a Secondo script to be run with @[%]filename.
This script defines in the currently open database a relation
* LastCommand,
and a set of functions
* distCostReset
* distCostSave
* distCostBoxes
* distCostUtil
that can be used to visualize the overall time requirements used in a distributed query.
*/
/*
Create a distributed array with server numbers. Must be the numbers 0, ..., ~N~ - 1 if you have ~N~ workers. Must be called ~ControlWorkers~
*/
# let ControlWorkers = intstream(0, 14 - 1) transformstream
# ddistribute3["ControlWorkers", 14, TRUE, Workers14]
# dloop["", . feed extract[Elem]]
/*
Use the following functions in order. Example:
Remember the last command number executed on each server.
---- update LastCommand := distCostReset(Control50)
----
Execute the query of interest
---- query BuildingsB1 dloop["", . feed
filter[.GeoData intersects eichlinghofen] count]
getValue
----
Remember the costs for each server that have occurrred after the last previous command.
---- let Costs1 = distCostSave(Control50)
----
Visualize the costs for the query of interest.
---- query distCostBoxes(Costs1, 0.0, 5.0)
----
*/
/*
Create a relation which has for every server the number of the last command executed.
*/
let distCostReset = fun(Control: darray(int)) Control
dloop["", fun(server: int) SEC2COMMANDS feed extend[Server: server]
project[Server, CmdNr] tail[1] consume] dsummarize consume
let LastCommand = distCostReset(ControlWorkers)
/*
Store the commands executed on each server after the last command. Parameter is an array of integers with the number of the last command per server, as returned by ~distCostReset~.
Note that this command, the query to be evaluated and the cost save query need to be executed by the same Secondo client, whereas the visualization in ~distCostBoxes~ may be done in a separate GUI client.
*/
let distCostSave = fun(Control: darray(int))
Control dloop["", fun(server: int)
SEC2COMMANDS feed extend[Server: server] consume
] dsummarize
LastCommand feed {l} hashjoin[Server, Server_l]
filter[.CmdNr > .CmdNr_l] remove[Server_l, CmdNr_l]
project[CmdNr, CmdStr, ElapsedTime, CpuTime, Server]
consume
/*
Create a visual representation of the costs at each server by stacking vertically the costs per server. Hence the coordinate system is x = server number, y = time elapsed. Parameters are a relation ~Costs~ with the previously saved costs, a minimal time threshold for commands to be considered ~MinTime~, and a width of the column for each server ~ColumnWidth~.
*/
let distCostBoxes = fun(
Costs:
rel(tuple([CmdNr: int, CmdStr: text, ElapsedTime: real,
CpuTime: real, Server: int])),
MinTime: real,
ColumnWidth: real )
Costs feed filter[.ElapsedTime > MinTime]
nest[Server; CostsA]
extend[CostsB: fun(t: TUPLE)
attr(t, CostsA) afeed addcounter[N, 1]
extend[Below: fun(t2: TUPLE) attr(t, CostsA) afeed addcounter[N2, 1]
filter[.N2 < attr(t2, N)]
sum[ElapsedTime]]
extend[CostBox: rectangle2(attr(t, Server) * ColumnWidth,
(attr(t, Server) * ColumnWidth) + 0.8 * ColumnWidth,
.Below, .Below + .ElapsedTime)]
aconsume]
remove[CostsA]
unnest[CostsB]
consume
/*
Compute the worker utilization defined as the fraction of work done by all workers compared to what they could have done if all workers worked until the end time of the query.
*/
let distCostUtil = fun(
Costs:
rel(tuple([CmdNr: int, CmdStr: text, ElapsedTime: real,
CpuTime: real, Server: int])))
Costs feed groupby[Server; Elapsed: group feed sum[ElapsedTime]] consume
within [
. feed sum[Elapsed] /
((. feed max[Elapsed]) * (. count))
]