156 lines
4.6 KiB
Plaintext
156 lines
4.6 KiB
Plaintext
/*
|
|
|
|
----
|
|
This file is part of SECONDO.
|
|
|
|
Copyright (C) 2004-2008, University Hagen, Faculty of Mathematics and
|
|
Computer Science, Database Systems for New Applications.
|
|
|
|
SECONDO is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
SECONDO is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with SECONDO; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
----
|
|
|
|
//paragraph [10] title: [{\Large \bf ] [}]
|
|
//[%] [\%]
|
|
|
|
|
|
|
|
[10] Measuring and Displaying Costs in Distributed Computation
|
|
|
|
This is a Secondo script to be run with @[%]filename.
|
|
|
|
This script defines in the currently open database a relation
|
|
|
|
* LastCommand,
|
|
|
|
and a set of functions
|
|
|
|
* distCostReset
|
|
|
|
* distCostSave
|
|
|
|
* distCostBoxes
|
|
|
|
* distCostUtil
|
|
|
|
that can be used to visualize the overall time requirements used in a distributed query.
|
|
|
|
*/
|
|
|
|
/*
|
|
Create a distributed array with server numbers. Must be the numbers 0, ..., ~N~ - 1 if you have ~N~ workers. Must be called ~ControlWorkers~
|
|
|
|
*/
|
|
|
|
# let ControlWorkers = intstream(0, 14 - 1) transformstream
|
|
# ddistribute3["ControlWorkers", 14, TRUE, Workers14]
|
|
# dloop["", . feed extract[Elem]]
|
|
|
|
|
|
/*
|
|
Use the following functions in order. Example:
|
|
|
|
Remember the last command number executed on each server.
|
|
|
|
---- update LastCommand := distCostReset(Control50)
|
|
----
|
|
|
|
Execute the query of interest
|
|
|
|
---- query BuildingsB1 dloop["", . feed
|
|
filter[.GeoData intersects eichlinghofen] count]
|
|
getValue
|
|
----
|
|
|
|
Remember the costs for each server that have occurrred after the last previous command.
|
|
|
|
---- let Costs1 = distCostSave(Control50)
|
|
----
|
|
|
|
Visualize the costs for the query of interest.
|
|
|
|
---- query distCostBoxes(Costs1, 0.0, 5.0)
|
|
----
|
|
|
|
*/
|
|
|
|
/*
|
|
Create a relation which has for every server the number of the last command executed.
|
|
|
|
*/
|
|
|
|
let distCostReset = fun(Control: darray(int)) Control
|
|
dloop["", fun(server: int) SEC2COMMANDS feed extend[Server: server]
|
|
project[Server, CmdNr] tail[1] consume] dsummarize consume
|
|
|
|
let LastCommand = distCostReset(ControlWorkers)
|
|
|
|
/*
|
|
Store the commands executed on each server after the last command. Parameter is an array of integers with the number of the last command per server, as returned by ~distCostReset~.
|
|
|
|
Note that this command, the query to be evaluated and the cost save query need to be executed by the same Secondo client, whereas the visualization in ~distCostBoxes~ may be done in a separate GUI client.
|
|
|
|
*/
|
|
|
|
let distCostSave = fun(Control: darray(int))
|
|
Control dloop["", fun(server: int)
|
|
SEC2COMMANDS feed extend[Server: server] consume
|
|
] dsummarize
|
|
LastCommand feed {l} hashjoin[Server, Server_l]
|
|
filter[.CmdNr > .CmdNr_l] remove[Server_l, CmdNr_l]
|
|
project[CmdNr, CmdStr, ElapsedTime, CpuTime, Server]
|
|
consume
|
|
|
|
/*
|
|
Create a visual representation of the costs at each server by stacking vertically the costs per server. Hence the coordinate system is x = server number, y = time elapsed. Parameters are a relation ~Costs~ with the previously saved costs, a minimal time threshold for commands to be considered ~MinTime~, and a width of the column for each server ~ColumnWidth~.
|
|
|
|
*/
|
|
|
|
let distCostBoxes = fun(
|
|
Costs:
|
|
rel(tuple([CmdNr: int, CmdStr: text, ElapsedTime: real,
|
|
CpuTime: real, Server: int])),
|
|
MinTime: real,
|
|
ColumnWidth: real )
|
|
Costs feed filter[.ElapsedTime > MinTime]
|
|
nest[Server; CostsA]
|
|
extend[CostsB: fun(t: TUPLE)
|
|
attr(t, CostsA) afeed addcounter[N, 1]
|
|
extend[Below: fun(t2: TUPLE) attr(t, CostsA) afeed addcounter[N2, 1]
|
|
filter[.N2 < attr(t2, N)]
|
|
sum[ElapsedTime]]
|
|
extend[CostBox: rectangle2(attr(t, Server) * ColumnWidth,
|
|
(attr(t, Server) * ColumnWidth) + 0.8 * ColumnWidth,
|
|
.Below, .Below + .ElapsedTime)]
|
|
aconsume]
|
|
remove[CostsA]
|
|
unnest[CostsB]
|
|
consume
|
|
|
|
/*
|
|
Compute the worker utilization defined as the fraction of work done by all workers compared to what they could have done if all workers worked until the end time of the query.
|
|
|
|
*/
|
|
|
|
let distCostUtil = fun(
|
|
Costs:
|
|
rel(tuple([CmdNr: int, CmdStr: text, ElapsedTime: real,
|
|
CpuTime: real, Server: int])))
|
|
Costs feed groupby[Server; Elapsed: group feed sum[ElapsedTime]] consume
|
|
within [
|
|
. feed sum[Elapsed] /
|
|
((. feed max[Elapsed]) * (. count))
|
|
]
|