2065 lines
59 KiB
C++
2065 lines
59 KiB
C++
/*
|
|
----
|
|
This file is part of SECONDO.
|
|
|
|
Copyright (C) 2015,
|
|
Faculty of Mathematics and Computer Science,
|
|
Database Systems for New Applications.
|
|
|
|
SECONDO is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
SECONDO is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with SECONDO; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
----
|
|
|
|
|
|
//[$][\$]
|
|
|
|
*/
|
|
|
|
#include "schedule.h"
|
|
#include <unordered_set>
|
|
#include <chrono>
|
|
#include <thread>
|
|
|
|
using namespace std;
|
|
using namespace distributed2;
|
|
|
|
// #define DEBUG_JOB_SELECTION
|
|
// #define REPORT_WORKER_STATS
|
|
// #define REPORT_TOTAL_STATS
|
|
|
|
namespace distributed5
|
|
{
|
|
|
|
/*
|
|
1 schedule Operator
|
|
|
|
The schedule operator is responsible for distributing the tasks from a tuple
|
|
stream to the worker.
|
|
*/
|
|
|
|
/*
|
|
1.1 Type Mapping
|
|
|
|
Type Mapping for the schedule Operator.
|
|
|
|
*/
|
|
|
|
ListExpr scheduleTM(ListExpr args)
|
|
{
|
|
|
|
string err = "stream(task(d[f]array), int) expected";
|
|
|
|
//ensure that exactly 2 arguments comes into schedule
|
|
if (!nl->HasLength(args, 2))
|
|
{
|
|
return listutils::typeError(err + " (wrong number of arguments)");
|
|
}
|
|
|
|
//ensure that there comes a Task Stream
|
|
ListExpr arg1Type = nl->First(args);
|
|
if (!Stream<Task>::checkType(arg1Type))
|
|
{
|
|
return listutils::typeError(err + " (tasks expected)");
|
|
}
|
|
|
|
//ensure that the stream is of type Tasks
|
|
ListExpr taskType = Task::innerType(nl->Second(arg1Type));
|
|
if (!(DArray::checkType(taskType) || DFArray::checkType(taskType)))
|
|
{
|
|
return listutils::typeError(err + " (d[f]array expected)");
|
|
}
|
|
|
|
// ensure that second argument is a port number
|
|
ListExpr arg2Type = nl->Second(args);
|
|
if (!CcInt::checkType(arg2Type))
|
|
{
|
|
return listutils::typeError(err + " (port number expected)");
|
|
}
|
|
|
|
return taskType;
|
|
}
|
|
|
|
// from https://stackoverflow.com/a/57635490
|
|
struct pair_hash
|
|
{
|
|
template <class T1, class T2>
|
|
std::size_t operator()(const std::pair<T1, T2> &pair) const
|
|
{
|
|
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
|
|
}
|
|
};
|
|
|
|
struct tuple3_hash
|
|
{
|
|
template <class T1, class T2, class T3>
|
|
std::size_t operator()(const std::tuple<T1, T2, T3> &tuple) const
|
|
{
|
|
return std::hash<T1>()(std::get<0>(tuple)) ^
|
|
std::hash<T2>()(std::get<1>(tuple)) ^
|
|
std::hash<T3>()(std::get<2>(tuple));
|
|
}
|
|
};
|
|
|
|
struct TaskScheduleInfo
|
|
{
|
|
public:
|
|
Task *task;
|
|
boost::shared_mutex mutex;
|
|
|
|
// Position, Successor, Target Position
|
|
std::unordered_set<tuple<size_t, TaskScheduleInfo *, size_t>, tuple3_hash>
|
|
successors;
|
|
|
|
vector<TaskDataItem *> arguments;
|
|
optional<vector<TaskDataItem *>> results;
|
|
optional<pair<WorkerLocation, int>> reservation;
|
|
bool started = false;
|
|
bool inPool = false;
|
|
// end of mutex protected
|
|
};
|
|
|
|
/*
|
|
2. Class OnlyOnceMutexMap
|
|
|
|
This class is responsible for mutexing
|
|
|
|
*/
|
|
template <typename K>
|
|
class OnlyOnceMutexMap
|
|
{
|
|
public:
|
|
std::optional<boost::unique_lock<boost::mutex>> check(K key)
|
|
{
|
|
boost::mutex *mutex;
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(mapMutex);
|
|
auto it = map.find(key);
|
|
if (it == map.end())
|
|
{
|
|
// this creates a new mutex and locks it
|
|
return std::optional<boost::unique_lock<boost::mutex>>(
|
|
std::in_place,
|
|
map[key]);
|
|
}
|
|
mutex = &it->second;
|
|
}
|
|
// this waits until the mutex is unlocked
|
|
boost::lock_guard<boost::mutex> wait(*mutex);
|
|
return std::optional<boost::unique_lock<boost::mutex>>();
|
|
}
|
|
|
|
private:
|
|
boost::mutex mapMutex;
|
|
std::map<K, boost::mutex> map;
|
|
};
|
|
|
|
/*
|
|
3. Class Scheduler
|
|
|
|
*/
|
|
class Scheduler;
|
|
|
|
/*
|
|
|
|
4. Class WorkerJob
|
|
|
|
*/
|
|
|
|
class WorkerJob
|
|
{
|
|
public:
|
|
WorkerJob(WorkerLocation &location, Scheduler &scheduler)
|
|
: location(location), scheduler(scheduler) {}
|
|
virtual ~WorkerJob() {}
|
|
|
|
virtual string toString() const = 0;
|
|
|
|
virtual string getType() const = 0;
|
|
|
|
virtual bool run() = 0;
|
|
|
|
protected:
|
|
WorkerLocation &location;
|
|
Scheduler &scheduler;
|
|
};
|
|
|
|
/*
|
|
|
|
4. Class WorkerPool
|
|
|
|
The WorkerPool coordinates the WorkerPool for the schedule operator
|
|
|
|
*/
|
|
class WorkPool
|
|
{
|
|
public:
|
|
WorkPool() : tasks(new std::unordered_set<Task *>()) {}
|
|
void addTask(Task *task)
|
|
{
|
|
boost::lock_guard lock(mutex);
|
|
makePrivate();
|
|
tasks->emplace(task);
|
|
}
|
|
void removeTask(Task *task)
|
|
{
|
|
boost::lock_guard lock(mutex);
|
|
makePrivate();
|
|
tasks->erase(task);
|
|
}
|
|
bool empty()
|
|
{
|
|
boost::lock_guard lock(mutex);
|
|
return tasks->empty();
|
|
}
|
|
size_t size()
|
|
{
|
|
boost::lock_guard lock(mutex);
|
|
return tasks->size();
|
|
}
|
|
std::shared_ptr<std::unordered_set<Task *>> getTasks()
|
|
{
|
|
boost::lock_guard lock(mutex);
|
|
isPublic = true;
|
|
return tasks;
|
|
}
|
|
|
|
private:
|
|
void makePrivate()
|
|
{
|
|
if (!isPublic)
|
|
return;
|
|
std::shared_ptr<std::unordered_set<Task *>> newTasks(
|
|
new std::unordered_set<Task *>(*tasks));
|
|
tasks = newTasks;
|
|
isPublic = false;
|
|
}
|
|
boost::mutex mutex;
|
|
std::shared_ptr<std::unordered_set<Task *>> tasks;
|
|
bool isPublic = false;
|
|
};
|
|
|
|
/*
|
|
5. Class Scheduler
|
|
|
|
Is called from the schedule operator.
|
|
|
|
Distributes the tasks to the worker.
|
|
|
|
*/
|
|
|
|
class Scheduler
|
|
{
|
|
public:
|
|
Scheduler(int fileTransferPort, bool resultInObjectForm)
|
|
: fileTransferPort(fileTransferPort),
|
|
resultInObjectForm(resultInObjectForm) {}
|
|
|
|
|
|
//joins all running threads
|
|
void join()
|
|
{
|
|
stopWorkers(true);
|
|
bool running = true;
|
|
while (running)
|
|
{
|
|
{
|
|
boost::unique_lock<boost::mutex>
|
|
lock(threadsMutex);
|
|
if (runningThreads > 0)
|
|
{
|
|
threadSignal.timed_wait(
|
|
lock, boost::posix_time::seconds(1));
|
|
}
|
|
else
|
|
{
|
|
running = false;
|
|
}
|
|
}
|
|
printProgress(workPool.size());
|
|
}
|
|
|
|
// all threads have finished working
|
|
// join them to wait for cleanup
|
|
for (auto &threadPair : threads)
|
|
{
|
|
auto thread = threadPair.second;
|
|
thread->join();
|
|
delete thread;
|
|
}
|
|
threads.clear();
|
|
|
|
// delete all tasks
|
|
for (auto &pair : taskInfo)
|
|
{
|
|
delete pair.first;
|
|
}
|
|
taskInfo.clear();
|
|
|
|
// delete all data
|
|
// cout << "Calling Destructor " << dataItems.size() << endl;
|
|
for(pair<const string, TaskDataItem*> item : dataItems) {
|
|
TaskDataItem* taskItem = item.second;
|
|
dataReferences.erase(taskItem);
|
|
delete taskItem;
|
|
}
|
|
dataItems.clear();
|
|
|
|
// cout << "Clearing dataReferences " << dataReferences.size() << endl;
|
|
for (auto &pair : dataReferences)
|
|
{
|
|
TaskDataItem* item = pair.first;
|
|
delete item;
|
|
}
|
|
dataReferences.clear();
|
|
|
|
// clear line from progress output
|
|
cout << "\x1b[2K\r" << flush;
|
|
|
|
#ifdef REPORT_TOTAL_STATS
|
|
cout << "=== Total ===\n"
|
|
<< stats.toString();
|
|
#endif
|
|
}
|
|
|
|
//When a new task is recieved
|
|
//this methode checks if the task can be started or
|
|
//has to be added to the queue of waiting tasks
|
|
void receiveTask(Task *task)
|
|
{
|
|
// Create a ResultTask for output tasks
|
|
if (task->hasFlag(Output))
|
|
{
|
|
task->clearFlag(Output);
|
|
receiveTask(task);
|
|
|
|
ResultTask *result = new ResultTask(
|
|
task->getPreferredLocation(), *this);
|
|
result->addPredecessorTask(task);
|
|
receiveTask(result);
|
|
|
|
return;
|
|
}
|
|
|
|
// update task schedule info structures
|
|
TaskScheduleInfo *info;
|
|
{
|
|
// need to be a exclusive lock, as task is unknown yet and will
|
|
// be added
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
info->task = task;
|
|
}
|
|
|
|
if (task->hasFlag(RunOnReceive))
|
|
{
|
|
WorkerLocation emptyLocation("", 0, "", -1);
|
|
vector<TaskDataItem *> emptyArgs;
|
|
vector<TaskDataItem *> result = task->run(emptyLocation, emptyArgs);
|
|
setTaskResult(task, result, emptyArgs);
|
|
return;
|
|
}
|
|
|
|
totalNumberOfTasks++;
|
|
|
|
vector<pair<Task *, size_t>> &arguments = task->getArguments();
|
|
vector<pair<TaskScheduleInfo *, size_t>> preInfos;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
for (auto pair : arguments)
|
|
{
|
|
preInfos.emplace_back(&taskInfo[pair.first], pair.second);
|
|
}
|
|
}
|
|
|
|
optional<WorkerLocation> preferredLocation;
|
|
bool hasResult = false;
|
|
for (auto pair : preInfos)
|
|
{
|
|
auto &preInfo = pair.first;
|
|
size_t pos = pair.second;
|
|
TaskDataItem *result;
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(preInfo->mutex);
|
|
result = preInfo->results ? (*preInfo->results)[pos] : 0;
|
|
|
|
// Here two mutexes are locked, but order of locking is
|
|
// always in direction of result flow
|
|
// So no deadlock can occur
|
|
boost::lock_guard<boost::shared_mutex> lock2(info->mutex);
|
|
if (result == 0)
|
|
{
|
|
preInfo->successors.emplace(
|
|
pos, info, info->arguments.size());
|
|
}
|
|
else
|
|
{
|
|
info->inPool = true;
|
|
}
|
|
info->arguments.push_back(result);
|
|
}
|
|
if (result != 0)
|
|
{
|
|
hasResult = true;
|
|
boost::lock_guard<boost::shared_mutex>
|
|
lock(dataReferencesMutex);
|
|
dataReferences[result]++;
|
|
}
|
|
}
|
|
// Add Task to the global work pool
|
|
workPool.addTask(task);
|
|
|
|
// Add Task to the local work pool
|
|
// if preferred location is already known
|
|
if (hasResult)
|
|
{
|
|
WorkerLocation preferredLocation = task->getPreferredLocation();
|
|
WorkPool *localWorkPool;
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
localWorkPool = &localWorkPools[preferredLocation];
|
|
}
|
|
localWorkPool->addTask(task);
|
|
}
|
|
size_t remainingTasks = 0;
|
|
if (totalNumberOfTasks % 100 == 0)
|
|
remainingTasks = workPool.size();
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
workGeneration++;
|
|
}
|
|
poolSignal.notify_all();
|
|
if (remainingTasks != 0)
|
|
{
|
|
printProgress(remainingTasks);
|
|
}
|
|
}
|
|
|
|
void printProgress(size_t remainingTasks)
|
|
{
|
|
size_t completedTasks = totalNumberOfTasks - remainingTasks;
|
|
size_t percent = completedTasks * 100 / totalNumberOfTasks;
|
|
cout << " " << percent << "% (" << completedTasks << "/"
|
|
<< totalNumberOfTasks << ")\r" << flush;
|
|
}
|
|
|
|
vector<WorkerLocation> getWorkers()
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(threadsMutex);
|
|
vector<WorkerLocation> workers;
|
|
for (auto &pair : threads)
|
|
{
|
|
workers.push_back(pair.first);
|
|
}
|
|
return workers;
|
|
}
|
|
|
|
void addError(string message)
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(resultMutex);
|
|
isError = true;
|
|
if (!errorMessage.empty())
|
|
errorMessage += "\n\n";
|
|
errorMessage += message;
|
|
}
|
|
|
|
void waitForPoolUpdateOrLocation(
|
|
TaskDataItem *data, WorkerLocation &nearby)
|
|
{
|
|
boost::shared_lock<boost::shared_mutex> lock(poolMutex);
|
|
|
|
if (!data->hasLocation(nearby))
|
|
poolSignal.wait(lock);
|
|
}
|
|
|
|
void signalPoolUpdate()
|
|
{
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
workGeneration++;
|
|
}
|
|
poolSignal.notify_all();
|
|
}
|
|
|
|
int getFileTransferPort()
|
|
{
|
|
return fileTransferPort;
|
|
}
|
|
|
|
map<string, pair<bool, int>> getActiveTransferrators()
|
|
{
|
|
boost::shared_lock_guard lock(poolMutex);
|
|
return activeTransferrators;
|
|
}
|
|
|
|
bool startExecutingTask(Task *task)
|
|
{
|
|
TaskScheduleInfo *info;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
}
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(info->mutex);
|
|
if (info->started)
|
|
{
|
|
return false;
|
|
}
|
|
info->started = true;
|
|
}
|
|
workPool.removeTask(task);
|
|
return true;
|
|
}
|
|
|
|
bool reserveTask(Task *task, WorkerLocation &location, int cost)
|
|
{
|
|
TaskScheduleInfo *info;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
}
|
|
boost::lock_guard<boost::shared_mutex> lock(info->mutex);
|
|
if (info->started)
|
|
return false;
|
|
auto &reservation = info->reservation;
|
|
if (reservation && reservation->second <= cost)
|
|
{
|
|
// already reserved with lower (or equal) cost
|
|
if (reservation->first.getServer() != location.getServer())
|
|
{
|
|
// by other server
|
|
// reserve fails
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
// by other worker on same server
|
|
// reserve succeed, but doesn't update reservation
|
|
return true;
|
|
}
|
|
}
|
|
// not reserved or reserved with higher cost
|
|
// update reservation
|
|
// and reserve succeed
|
|
info->reservation =
|
|
make_pair(location, cost);
|
|
return true;
|
|
}
|
|
|
|
void unreserveTask(Task *task, WorkerLocation &location, int cost)
|
|
{
|
|
TaskScheduleInfo *info;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
}
|
|
boost::lock_guard<boost::shared_mutex> lock(info->mutex);
|
|
auto &reservation = info->reservation;
|
|
if (reservation && reservation->first == location)
|
|
{
|
|
reservation.reset();
|
|
}
|
|
}
|
|
|
|
void updateActiveFileTransfers(string server, int update)
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
activeTransferrators[server].second += update;
|
|
}
|
|
|
|
void setTaskResult(Task *task, vector<TaskDataItem *> results,
|
|
vector<TaskDataItem *> args)
|
|
{
|
|
// make sure a worker is running for the location of the result
|
|
if (results.size() > 0)
|
|
ensureWorker(results[0]->getFirstLocation().getWorkerLocation());
|
|
|
|
for (size_t i = 0; i < results.size(); i++)
|
|
{
|
|
TaskDataItem*& result = results[i];
|
|
string oname = result->getObjectName();
|
|
TaskDataItem* existing = nullptr;
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(dataItemsMutex);
|
|
|
|
// when data with the same name has already been referenced
|
|
// merge both data items to one item to keep only a single data
|
|
// item per name
|
|
auto pair = dataItems.emplace(oname, result);
|
|
if (!pair.second)
|
|
{
|
|
existing = pair.first->second;
|
|
}
|
|
}
|
|
if (existing != nullptr)
|
|
{
|
|
if (existing != result)
|
|
{
|
|
existing->merge(result);
|
|
delete result;
|
|
result = existing;
|
|
}
|
|
}
|
|
}
|
|
|
|
TaskScheduleInfo *info;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
}
|
|
|
|
#ifdef DEBUG_JOB_SELECTION
|
|
cout << "Got result from " << task->getId() << ": "
|
|
<< task->toString() << " / " << task->toString() << endl;
|
|
#endif
|
|
|
|
map<TaskDataItem *, size_t> refs;
|
|
vector<Task *> tasksForLocalWorkPools;
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(info->mutex);
|
|
|
|
// store result
|
|
info->results = results;
|
|
|
|
// update the arguments of successors
|
|
for (auto tuple : info->successors)
|
|
{
|
|
size_t pos = get<0>(tuple);
|
|
TaskScheduleInfo *succInfo = get<1>(tuple);
|
|
size_t targetPos = get<2>(tuple);
|
|
TaskDataItem *result = results[pos];
|
|
|
|
refs[result]++;
|
|
|
|
// Here two mutexes are locked, but order of locking is
|
|
// always in direction of result flow
|
|
// So no deadlock can occur
|
|
boost::lock_guard<boost::shared_mutex> lock(succInfo->mutex);
|
|
succInfo->arguments[targetPos] = result;
|
|
if (!succInfo->inPool)
|
|
{
|
|
succInfo->inPool = true;
|
|
tasksForLocalWorkPools.push_back(succInfo->task);
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(dataReferencesMutex);
|
|
|
|
// each successor keeps a reference to the result
|
|
for (auto pair : refs)
|
|
{
|
|
dataReferences[pair.first] += pair.second;
|
|
}
|
|
|
|
// For all arguments, decrease the number of remaining tasks
|
|
for (auto preResult : args)
|
|
{
|
|
dataReferences[preResult]--;
|
|
}
|
|
}
|
|
|
|
vector<pair<WorkPool *, Task *>> tasksForLocalWorkPools2;
|
|
{
|
|
boost::shared_lock_guard lock(poolMutex);
|
|
for (auto task : tasksForLocalWorkPools)
|
|
{
|
|
tasksForLocalWorkPools2.emplace_back(
|
|
&localWorkPools[task->getPreferredLocation()], task);
|
|
}
|
|
}
|
|
|
|
for (auto pair : tasksForLocalWorkPools2)
|
|
{
|
|
pair.first->addTask(pair.second);
|
|
}
|
|
|
|
// A new result may unlock other tasks
|
|
// Decreased data references may unlock garbagged collecting
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
workGeneration++;
|
|
}
|
|
|
|
poolSignal.notify_all();
|
|
}
|
|
|
|
boost::mutex resultMutex;
|
|
vector<DArrayElement> myResult;
|
|
string dArrayName;
|
|
bool isError = false;
|
|
string errorMessage;
|
|
|
|
private:
|
|
static thread_local optional<set<WorkerLocation>>
|
|
ensureWorkerCheckedLocations;
|
|
|
|
// makes sure a worker thread for this location is running
|
|
// mutex must already be locked
|
|
void ensureWorker(const WorkerLocation &location)
|
|
{
|
|
if (location.getServer() == "")
|
|
return;
|
|
// lock-free check of the thread local data
|
|
// to fast exit on already checked locations
|
|
if (ensureWorkerCheckedLocations &&
|
|
!ensureWorkerCheckedLocations->emplace(location).second)
|
|
return;
|
|
{
|
|
boost::shared_lock_guard lock(poolMutex);
|
|
if (localWorkPools.find(location) != localWorkPools.end())
|
|
return;
|
|
}
|
|
WorkPool *localWorkPool;
|
|
{
|
|
boost::lock_guard lock(poolMutex);
|
|
localWorkPool = &localWorkPools[location];
|
|
}
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(threadsMutex);
|
|
auto &slot = threads[location];
|
|
if (slot != 0)
|
|
return;
|
|
runningThreads++;
|
|
slot =
|
|
new boost::thread(
|
|
boost::bind(
|
|
&Scheduler::worker,
|
|
this,
|
|
WorkerLocation(location),
|
|
boost::ref(*localWorkPool)));
|
|
}
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
workerWorking++;
|
|
}
|
|
}
|
|
|
|
// mutex must already be locked
|
|
void stopWorkers(bool waitForWorkDone)
|
|
{
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
if (waitForWorkDone)
|
|
stopWorkersWhenWorkDone = true;
|
|
else
|
|
killWorkers = true;
|
|
workGeneration++;
|
|
poolSignal.notify_all();
|
|
}
|
|
|
|
bool shouldStopWorkersWhenWorkDone()
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
return stopWorkersWhenWorkDone;
|
|
}
|
|
|
|
bool shouldKillWorkers()
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
return killWorkers;
|
|
}
|
|
|
|
bool isErrored()
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(resultMutex);
|
|
return isError;
|
|
}
|
|
|
|
size_t getWorkGeneration()
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
return workGeneration;
|
|
}
|
|
|
|
void ensureFileTransferrator(WorkerLocation &location)
|
|
{
|
|
{
|
|
auto lock = fileTransferrators.check(location.getServer());
|
|
if (!lock)
|
|
return;
|
|
string cmd = "query staticFileTransferator(" +
|
|
std::to_string(fileTransferPort) +
|
|
",10)";
|
|
|
|
ConnectionInfo* ci = location.getWorkerConnection();
|
|
|
|
double duration =
|
|
Task::runCommand(ci,
|
|
cmd,
|
|
"open file transferator",
|
|
false,
|
|
"");
|
|
|
|
ci -> deleteIfAllowed();
|
|
ci = nullptr;
|
|
|
|
TaskStatistics::report("remote open file transferator", duration);
|
|
}
|
|
|
|
boost::lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
|
|
auto &transferrator = activeTransferrators[location.getServer()];
|
|
if (!transferrator.first)
|
|
{
|
|
transferrator.first = true;
|
|
workGeneration++;
|
|
poolSignal.notify_all();
|
|
}
|
|
}
|
|
|
|
// poolMutex already need to be locked
|
|
vector<TaskDataItem *> getTaskArguments(Task *task)
|
|
{
|
|
TaskScheduleInfo *info;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
}
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(info->mutex);
|
|
vector<TaskDataItem *> vec = info->arguments;
|
|
return vec;
|
|
}
|
|
|
|
// the worker thread
|
|
void worker(WorkerLocation location, WorkPool &localWorkPool)
|
|
{
|
|
// enable ensureWorker caching for this worker
|
|
ensureWorkerCheckedLocations.emplace();
|
|
|
|
// Connect to the worker
|
|
ConnectionInfo* ci = location.getWorkerConnection();
|
|
|
|
// Ensure file transferrator is open
|
|
ensureFileTransferrator(location);
|
|
|
|
while (true)
|
|
{
|
|
size_t startWorkGeneration = getWorkGeneration();
|
|
if (shouldKillWorkers())
|
|
break;
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
WorkerJob *job = selectJob(location, localWorkPool);
|
|
auto duration =
|
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::high_resolution_clock::now() - start);
|
|
TaskStatistics::report("selecting local job",
|
|
((double)duration.count()) / 1000000);
|
|
|
|
|
|
// Select new job, if not already selected
|
|
if (job == nullptr)
|
|
{
|
|
start = std::chrono::high_resolution_clock::now();
|
|
job = selectJob(location, workPool);
|
|
duration =
|
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::high_resolution_clock::now() - start);
|
|
TaskStatistics::report("selecting global job",
|
|
((double)duration.count()) / 1000000);
|
|
}
|
|
|
|
// Execute job
|
|
if(job != nullptr)
|
|
{
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
if (!job->run())
|
|
{
|
|
stopWorkers(false);
|
|
break;
|
|
}
|
|
auto duration =
|
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::high_resolution_clock::now() - start);
|
|
TaskStatistics::report("run job " + job->getType(),
|
|
((double)duration.count()) / 1000000);
|
|
|
|
delete job;
|
|
job = nullptr;
|
|
|
|
continue;
|
|
}
|
|
|
|
if (shouldStopWorkersWhenWorkDone() && !isErrored())
|
|
{
|
|
// nothing productive to do
|
|
// collect some garbage
|
|
collectGarbagge(location);
|
|
}
|
|
|
|
if (startWorkGeneration == getWorkGeneration())
|
|
{
|
|
// Go into sleep mode
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
|
|
{
|
|
boost::unique_lock<boost::shared_mutex> lock(poolMutex);
|
|
|
|
if (startWorkGeneration != workGeneration)
|
|
continue;
|
|
|
|
// check for end of work
|
|
if (workerWorking == 1)
|
|
{
|
|
if (stopWorkersWhenWorkDone && workPool.empty())
|
|
{
|
|
workerWorking--;
|
|
poolSignal.notify_all();
|
|
break;
|
|
}
|
|
}
|
|
|
|
workerWorking--;
|
|
|
|
poolSignal.wait(lock);
|
|
|
|
workerWorking++;
|
|
}
|
|
|
|
auto duration =
|
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::high_resolution_clock::now() - start);
|
|
TaskStatistics::report("worker idle",
|
|
((double)duration.count()) / 1000000);
|
|
}
|
|
}
|
|
|
|
#ifdef REPORT_WORKER_STATS
|
|
cout << "=== " << location.toString() << " ===\n"
|
|
<< TaskStatistics::getThreadLocal().toString();
|
|
#endif
|
|
|
|
#ifdef REPORT_TOTAL_STATS
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(statsMutex);
|
|
stats.merge(TaskStatistics::getThreadLocal());
|
|
}
|
|
#endif
|
|
|
|
{
|
|
boost::lock_guard<boost::mutex> lock(threadsMutex);
|
|
runningThreads--;
|
|
ci -> deleteIfAllowed();
|
|
ci = nullptr;
|
|
}
|
|
threadSignal.notify_all();
|
|
}
|
|
|
|
// mutex must already be locked
|
|
int computeTaskCost(Task *task, vector<TaskDataItem *> args,
|
|
WorkerLocation &location)
|
|
{
|
|
int cost = 0;
|
|
int i = 0;
|
|
for (auto arg : args)
|
|
{
|
|
if (arg == 0)
|
|
{
|
|
cost += CostMissingArgument;
|
|
continue;
|
|
}
|
|
cost += arg->getDistance(location);
|
|
i++;
|
|
}
|
|
if (task->hasFlag(PreferSlotServer))
|
|
{
|
|
if (location.getServer() !=
|
|
task->getPreferredLocation().getServer())
|
|
{
|
|
cost += CostNotPreferredServer;
|
|
}
|
|
}
|
|
if (task->hasFlag(PreferSlotWorker))
|
|
{
|
|
if (location != task->getPreferredLocation())
|
|
{
|
|
cost += CostNotPreferredWorker;
|
|
}
|
|
}
|
|
return cost;
|
|
}
|
|
|
|
static bool checkBetter(optional<pair<WorkerJob *, int>> &best, int cost)
|
|
{
|
|
if (!best)
|
|
return true;
|
|
if (best->second > cost)
|
|
{
|
|
delete best->first;
|
|
best.reset();
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// mutex must already be locked
|
|
void valueJobsForTask(WorkerLocation &location, Task *task,
|
|
optional<pair<WorkerJob *, int>> &best,
|
|
unordered_map<TaskDataItem *, bool>
|
|
&hasUpcomingLocationCache);
|
|
WorkerJob *selectJob(WorkerLocation &location, WorkPool &workPool);
|
|
void collectGarbagge(WorkerLocation &location);
|
|
|
|
class ResultTask : public Task
|
|
{
|
|
public:
|
|
ResultTask(WorkerLocation preferredLocation, Scheduler &scheduler)
|
|
: Task(preferredLocation,
|
|
CopyArguments | ConvertArguments |
|
|
(scheduler.resultInObjectForm
|
|
? RunOnPreferedWorker | PrimaryArgumentAsObject
|
|
: RunOnPreferedServer | PrimaryArgumentAsFile)),
|
|
scheduler(scheduler) {}
|
|
|
|
virtual std::string getTaskType() const { return "result"; }
|
|
|
|
virtual size_t getNumberOfResults() const { return 0; }
|
|
|
|
virtual vector<TaskDataItem *> run(
|
|
WorkerLocation &location,
|
|
std::vector<TaskDataItem *> args)
|
|
{
|
|
TaskDataItem *result = args.front();
|
|
|
|
TaskDataLocation storedLocation =
|
|
result->findLocation(
|
|
location,
|
|
scheduler.resultInObjectForm ? Object : File);
|
|
WorkerLocation preferredLocation = getPreferredLocation();
|
|
|
|
if (scheduler.resultInObjectForm)
|
|
{
|
|
result->persistLocation(storedLocation);
|
|
}
|
|
else
|
|
{
|
|
if (storedLocation.getWorkerLocation() != preferredLocation)
|
|
{
|
|
ConnectionInfo *ci = location.getWorkerConnection();
|
|
|
|
// copy file into worker
|
|
Task::runCommand(
|
|
ci,
|
|
string("query createDirectory(") +
|
|
"'" + preferredLocation.getFileDirectory(result) +
|
|
"', TRUE)",
|
|
"create directory",
|
|
false, "");
|
|
Task::runCommand(
|
|
ci,
|
|
string("query copyFile(") +
|
|
"'" + storedLocation.getFilePath(result) + "', " +
|
|
"'" + preferredLocation.getFilePath(result) + "')",
|
|
"copy file to correct worker");
|
|
result->addLocation(
|
|
TaskDataLocation(preferredLocation, File, false));
|
|
|
|
ci -> deleteIfAllowed();
|
|
ci = nullptr;
|
|
}
|
|
else
|
|
{
|
|
result->persistLocation(storedLocation);
|
|
}
|
|
}
|
|
|
|
boost::lock_guard<boost::mutex> lock(scheduler.resultMutex);
|
|
scheduler.dArrayName = result->getName();
|
|
while (scheduler.myResult.size() <= result->getSlot())
|
|
{
|
|
scheduler.myResult.push_back(DArrayElement("", 0, 0, ""));
|
|
}
|
|
// TODO Error when already set (multiple leaf tasks)
|
|
DArrayElement &resultItem = scheduler.myResult[result->getSlot()];
|
|
if (resultItem.getHost() != "")
|
|
{
|
|
cout << "Multiple leaf tasks for slot " << result->getSlot()
|
|
<< endl;
|
|
}
|
|
#ifdef TASK_VERIFY_COUNTS
|
|
auto loc = result->findLocation(preferredLocation);
|
|
cout << result->getSlot() << " = " << loc.getValue(result) << endl;
|
|
#endif
|
|
resultItem = preferredLocation.getDArrayElement();
|
|
|
|
return vector<TaskDataItem *>();
|
|
}
|
|
|
|
private:
|
|
Scheduler &scheduler;
|
|
};
|
|
|
|
int fileTransferPort;
|
|
bool resultInObjectForm;
|
|
|
|
size_t totalNumberOfTasks = 0;
|
|
|
|
boost::mutex threadsMutex;
|
|
std::map<WorkerLocation, boost::thread *> threads;
|
|
size_t runningThreads = 0;
|
|
boost::condition_variable threadSignal;
|
|
// end of threadsMutex protected
|
|
|
|
boost::shared_mutex poolMutex;
|
|
size_t workGeneration = 0;
|
|
WorkPool workPool;
|
|
std::map<WorkerLocation, WorkPool> localWorkPools;
|
|
size_t workerWorking = 0;
|
|
bool stopWorkersWhenWorkDone = false;
|
|
bool killWorkers = false;
|
|
boost::condition_variable_any poolSignal;
|
|
|
|
std::unordered_map<Task *, TaskScheduleInfo> taskInfo;
|
|
|
|
std::map<string, pair<bool, int>> activeTransferrators;
|
|
// end of poolMutex protected
|
|
|
|
boost::mutex dataItemsMutex;
|
|
std::unordered_map<std::string, TaskDataItem *> dataItems;
|
|
// end of dataReferencesMutex protected
|
|
|
|
boost::shared_mutex dataReferencesMutex;
|
|
std::unordered_map<TaskDataItem *, int> dataReferences;
|
|
// end of dataReferencesMutex protected
|
|
|
|
// thread-safe
|
|
OnlyOnceMutexMap<std::string> fileTransferrators;
|
|
|
|
#ifdef REPORT_TOTAL_STATS
|
|
boost::mutex statsMutex;
|
|
TaskStatistics stats;
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
|
|
6. Class ExecuteWorkerJob
|
|
|
|
Executes the operator job on the worker
|
|
|
|
*/
|
|
|
|
class ExecuteWorkerJob : public WorkerJob
|
|
{
|
|
public:
|
|
ExecuteWorkerJob(WorkerLocation &location,
|
|
Scheduler &scheduler,
|
|
Task *task,
|
|
vector<TaskDataItem *> args)
|
|
: WorkerJob(location, scheduler),
|
|
task(task), args(args) {}
|
|
virtual ~ExecuteWorkerJob() {}
|
|
|
|
virtual string getType() const { return "execute"; }
|
|
|
|
virtual string toString() const
|
|
{
|
|
string message = string("execute ") +
|
|
std::to_string(task->getId()) + " " + task->toString();
|
|
int i = 0;
|
|
for (auto arg : args)
|
|
{
|
|
message += string("\narg ") + std::to_string(i++) +
|
|
" = " + arg->toString();
|
|
}
|
|
return message;
|
|
}
|
|
|
|
virtual bool run()
|
|
{
|
|
if (!scheduler.startExecutingTask(task))
|
|
{
|
|
TaskStatistics::report("execute invalid", 0);
|
|
return true;
|
|
}
|
|
|
|
// Execute
|
|
vector<TaskDataItem *> result;
|
|
try
|
|
{
|
|
result = task->run(location, args);
|
|
}
|
|
catch (exception &e)
|
|
{
|
|
string message = string(e.what()) + "\n" +
|
|
"while running " + task->toString() +
|
|
" on " + location.toString();
|
|
int i = 0;
|
|
for (auto arg : args)
|
|
{
|
|
message += string("\narg ") + std::to_string(i++) +
|
|
" = " + arg->toString();
|
|
}
|
|
scheduler.addError(message);
|
|
return false;
|
|
}
|
|
|
|
scheduler.setTaskResult(task, result, args);
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
Task *task;
|
|
vector<TaskDataItem *> args;
|
|
};
|
|
|
|
/*
|
|
|
|
8. Class TransferDataWorkerJob
|
|
|
|
Transfers the input data to the worker
|
|
|
|
*/
|
|
class TransferDataWorkerJob : public WorkerJob
|
|
{
|
|
public:
|
|
TransferDataWorkerJob(WorkerLocation &location,
|
|
Scheduler &scheduler,
|
|
Task *task,
|
|
int taskCost,
|
|
vector<TaskDataItem *> dataItems)
|
|
: WorkerJob(location, scheduler),
|
|
task(task), taskCost(taskCost),
|
|
dataItems(dataItems) {}
|
|
virtual ~TransferDataWorkerJob() {}
|
|
|
|
virtual string getType() const { return "transfer"; }
|
|
|
|
virtual string toString() const
|
|
{
|
|
return "transfer " + std::to_string(dataItems.size()) +
|
|
" data items from " + task->toString();
|
|
}
|
|
|
|
virtual bool run()
|
|
{
|
|
// set reservation
|
|
if (!scheduler.reserveTask(task, location, taskCost))
|
|
return true;
|
|
|
|
TaskDataLocation loc(location, File, true);
|
|
|
|
auto activeTransferrators = scheduler.getActiveTransferrators();
|
|
|
|
string tuples = "";
|
|
int count = 0;
|
|
|
|
map<string, int> transfersPerServer;
|
|
vector<TaskDataItem *> usedDataItems;
|
|
|
|
for (auto data : dataItems)
|
|
{
|
|
try
|
|
{
|
|
auto pair =
|
|
data->findTransferSourceLocation(activeTransferrators);
|
|
// set upcoming location
|
|
if (data->addUpcomingLocation(loc))
|
|
{
|
|
auto sourceLocation = pair.first;
|
|
string sourceServer = sourceLocation.getServer();
|
|
|
|
// set active transfer
|
|
transfersPerServer[sourceServer]++;
|
|
activeTransferrators[sourceServer].second++;
|
|
|
|
usedDataItems.push_back(data);
|
|
|
|
tuples += "('" + sourceLocation.getFilePath(data) + "' " +
|
|
"'" + sourceServer + "' " +
|
|
"'" + location.getFilePath(data) + "') ";
|
|
count++;
|
|
}
|
|
}
|
|
catch (NoSourceLocationException &)
|
|
{
|
|
// this can happen when file transferrator is
|
|
// not ready yet, or data is in object form,
|
|
// skip this task for now,
|
|
// as it can't be transferred
|
|
}
|
|
}
|
|
|
|
for (auto pair : transfersPerServer)
|
|
{
|
|
scheduler.updateActiveFileTransfers(pair.first, pair.second);
|
|
}
|
|
|
|
if(tuples.size() == 0) {
|
|
|
|
#ifdef DEBUG_JOB_SELECTION
|
|
cerr << "ERROR: Got empty file transfer task. Dataitem count "
|
|
<< dataItems.size() << endl;
|
|
#endif
|
|
// TODO: Replace by blocking until the job can be executed
|
|
chrono::seconds duration(5);
|
|
this_thread::sleep_for(duration);
|
|
} else {
|
|
string relation =
|
|
string("[const rel(tuple([P: text, S: text, T: text])) ") +
|
|
"value (" + tuples + ")]";
|
|
string port = std::to_string(scheduler.getFileTransferPort());
|
|
string cmd = "query " + relation + " feed extend[ " +
|
|
"OK: getFileTCP(.P, .S, " + port + ", TRUE, .T) ] " +
|
|
"count + 1";
|
|
|
|
double duration = 0;
|
|
|
|
try
|
|
{
|
|
ConnectionInfo* targetCI = location.getWorkerConnection();
|
|
|
|
duration = Task::runCommand(
|
|
targetCI, cmd, "transfer file", false,
|
|
"(int " + std::to_string(count + 1) + ")");
|
|
|
|
targetCI -> deleteIfAllowed();
|
|
targetCI = nullptr;
|
|
}
|
|
catch (exception &e)
|
|
{
|
|
string list = "";
|
|
for (auto data : usedDataItems)
|
|
{
|
|
list += " - " + data->toString() + "\n";
|
|
}
|
|
scheduler.addError(string(e.what()) + "\n" +
|
|
"while copying\n" + list +
|
|
"to " + location.toString());
|
|
return false;
|
|
}
|
|
|
|
TaskStatistics::report("remote transfer files", duration);
|
|
}
|
|
|
|
for (auto pair : transfersPerServer)
|
|
{
|
|
scheduler.updateActiveFileTransfers(pair.first, -pair.second);
|
|
}
|
|
|
|
for (auto data : usedDataItems)
|
|
{
|
|
data->addLocation(loc);
|
|
}
|
|
|
|
// unreserve when still reserved by this worker
|
|
scheduler.unreserveTask(task, location, taskCost);
|
|
|
|
scheduler.signalPoolUpdate();
|
|
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
Task *task;
|
|
int taskCost;
|
|
vector<TaskDataItem *> dataItems;
|
|
};
|
|
|
|
/*
|
|
|
|
9. Class ConvertToObjectWorkerJob
|
|
|
|
Converts the input data to objects
|
|
|
|
*/
|
|
|
|
class ConvertToObjectWorkerJob : public WorkerJob
|
|
{
|
|
public:
|
|
ConvertToObjectWorkerJob(WorkerLocation &location,
|
|
Scheduler &scheduler,
|
|
Task *task,
|
|
int taskCost,
|
|
TaskDataItem *data)
|
|
: WorkerJob(location, scheduler),
|
|
task(task), taskCost(taskCost),
|
|
data(data) {}
|
|
virtual ~ConvertToObjectWorkerJob() {}
|
|
|
|
virtual string getType() const { return "convert to object"; }
|
|
|
|
virtual string toString() const
|
|
{
|
|
return "convert to object " + data->toString();
|
|
}
|
|
|
|
virtual bool run()
|
|
{
|
|
// set reservation
|
|
if (!scheduler.reserveTask(task, location, taskCost))
|
|
return true;
|
|
|
|
TaskDataLocation sourceLocation =
|
|
data->findLocation(location, File);
|
|
|
|
// set active transfer and upcoming location
|
|
TaskDataLocation loc(location, Object, true);
|
|
data->addUpcomingLocation(loc);
|
|
|
|
double duration = 0;
|
|
|
|
try
|
|
{
|
|
ConnectionInfo* ci = location.getWorkerConnection();
|
|
string cmd;
|
|
string description;
|
|
|
|
if (data->isObjectRelation() || data->isFileRelation())
|
|
{
|
|
cmd = "(let " + data->getObjectName() +
|
|
" = (consume (feed" +
|
|
sourceLocation.getValueArgument(data) + ")))";
|
|
description = "store file relation as object";
|
|
}
|
|
else
|
|
{
|
|
cmd = "(let " + data->getObjectName() + " = " +
|
|
sourceLocation.getValueArgument(data) + ")";
|
|
description = "store file value as object";
|
|
}
|
|
|
|
try
|
|
{
|
|
duration += Task::runCommand(
|
|
ci,
|
|
cmd,
|
|
description,
|
|
true,
|
|
"()");
|
|
}
|
|
catch (RemoteException &e)
|
|
{
|
|
// Failed, maybe variable already exists
|
|
// delete variable and retry
|
|
duration += Task::runCommand(
|
|
ci,
|
|
"(delete " + data->getObjectName() + ")",
|
|
"delete existing object",
|
|
true, "", true);
|
|
|
|
duration += Task::runCommand(
|
|
ci,
|
|
cmd,
|
|
description,
|
|
true,
|
|
"()");
|
|
}
|
|
|
|
ci->deleteIfAllowed();
|
|
ci = nullptr;
|
|
}
|
|
catch (exception &e)
|
|
{
|
|
scheduler.addError(string(e.what()) + "\n" +
|
|
"while converting " + data->toString() +
|
|
" to object form on " + location.toString());
|
|
return false;
|
|
}
|
|
|
|
TaskStatistics::report("remote convert to object", duration);
|
|
|
|
data->addLocation(loc);
|
|
|
|
// unreserve when still reserved by this worker
|
|
scheduler.unreserveTask(task, location, taskCost);
|
|
|
|
scheduler.signalPoolUpdate();
|
|
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
Task *task;
|
|
int taskCost;
|
|
TaskDataItem *data;
|
|
};
|
|
|
|
/*
|
|
|
|
11. Class ConvertToFileWorkerJob
|
|
|
|
Converts the input data to file
|
|
|
|
*/
|
|
|
|
class ConvertToFileWorkerJob : public WorkerJob
|
|
{
|
|
public:
|
|
ConvertToFileWorkerJob(WorkerLocation &location,
|
|
Scheduler &scheduler,
|
|
TaskDataItem *data)
|
|
: WorkerJob(location, scheduler),
|
|
data(data) {}
|
|
virtual ~ConvertToFileWorkerJob() {}
|
|
|
|
virtual string getType() const { return "convert to file"; }
|
|
|
|
virtual string toString() const
|
|
{
|
|
return "convert to file " + data->toString();
|
|
}
|
|
|
|
virtual bool run()
|
|
{
|
|
// set active transfer and upcoming location
|
|
TaskDataLocation loc(location, File, true);
|
|
data->addUpcomingLocation(loc);
|
|
|
|
double duration = 0;
|
|
|
|
try
|
|
{
|
|
ConnectionInfo* ci = location.getWorkerConnection();
|
|
|
|
// Save Object in a file
|
|
string oname = data->getObjectName();
|
|
string fname = location.getFilePath(data);
|
|
string cmd = "query " + oname +
|
|
" saveObjectToFile['" + fname + "']";
|
|
duration = Task::runCommand(ci, cmd, "save object to file",
|
|
false, "");
|
|
|
|
ci -> deleteIfAllowed();
|
|
ci = nullptr;
|
|
}
|
|
catch (exception &e)
|
|
{
|
|
scheduler.addError(string(e.what()) + "\n" +
|
|
"while converting " + data->toString() +
|
|
" to file form on " + location.toString());
|
|
return false;
|
|
}
|
|
|
|
TaskStatistics::report("remote convert to file", duration);
|
|
|
|
data->addLocation(loc);
|
|
|
|
scheduler.signalPoolUpdate();
|
|
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
TaskDataItem *data;
|
|
};
|
|
|
|
/*
|
|
|
|
12. Class WaitForTransferCompletedWorkerJob
|
|
|
|
Waits until another job is completed.
|
|
|
|
*/
|
|
|
|
class WaitForTransferCompletedWorkerJob : public WorkerJob
|
|
{
|
|
public:
|
|
WaitForTransferCompletedWorkerJob(WorkerLocation &location,
|
|
Scheduler &scheduler,
|
|
TaskDataItem *data)
|
|
: WorkerJob(location, scheduler),
|
|
data(data) {}
|
|
virtual ~WaitForTransferCompletedWorkerJob() {}
|
|
|
|
virtual string getType() const { return "wait for transfer"; }
|
|
|
|
virtual string toString() const
|
|
{
|
|
return "wait for tranfer completed";
|
|
}
|
|
|
|
virtual bool run()
|
|
{
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
scheduler.waitForPoolUpdateOrLocation(data, location);
|
|
auto duration =
|
|
std::chrono::duration_cast<std::chrono::microseconds>(
|
|
std::chrono::high_resolution_clock::now() - start);
|
|
TaskStatistics::report("wait for transfer",
|
|
((double)duration.count()) / 1000000);
|
|
return true;
|
|
}
|
|
|
|
private:
|
|
TaskDataItem *data;
|
|
};
|
|
|
|
void Scheduler::valueJobsForTask(WorkerLocation &location, Task *task,
|
|
optional<pair<WorkerJob *, int>> &best,
|
|
unordered_map<TaskDataItem *, bool>
|
|
&hasUpcomingLocationCache)
|
|
{
|
|
vector<TaskDataItem *> args = getTaskArguments(task);
|
|
|
|
int cost = computeTaskCost(task, args, location);
|
|
|
|
// Skip early when there is already a better job
|
|
if (best && cost > best->second)
|
|
return;
|
|
|
|
bool preferredWorker = task->getPreferredLocation() == location;
|
|
bool preferredServer = task->getPreferredLocation().getServer() ==
|
|
location.getServer();
|
|
bool validWorker =
|
|
(!task->hasFlag(RunOnPreferedWorker) || preferredWorker) &&
|
|
(!task->hasFlag(RunOnPreferedServer) || preferredServer);
|
|
bool validServer =
|
|
(!task->hasFlag(RunOnPreferedWorker) &&
|
|
!task->hasFlag(RunOnPreferedServer)) ||
|
|
preferredServer;
|
|
|
|
bool argumentsAvailable = true;
|
|
|
|
if (validServer)
|
|
{
|
|
TaskScheduleInfo *info;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(poolMutex);
|
|
info = &taskInfo[task];
|
|
}
|
|
// check if there is already a reservation for this task
|
|
optional<pair<WorkerLocation, int>> reservation;
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(info->mutex);
|
|
if (info->started)
|
|
return;
|
|
reservation = info->reservation;
|
|
}
|
|
if (reservation)
|
|
{
|
|
int resCost = reservation->second;
|
|
// it's only allowed to seal a reservation
|
|
// when the cost is smaller
|
|
cost += CostReservation;
|
|
|
|
// Skip early when there is already a better job
|
|
if (best && cost > best->second)
|
|
return;
|
|
|
|
if (cost >= resCost)
|
|
{
|
|
// When by this server reserved
|
|
// We can help to copy files
|
|
string server = reservation->first.getServer();
|
|
if (server != location.getServer())
|
|
{
|
|
validServer = false;
|
|
}
|
|
validWorker = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
vector<TaskDataItem *> needTransfer;
|
|
TaskDataItem *needWait = 0;
|
|
|
|
int i = 0;
|
|
for (auto data : args)
|
|
{
|
|
bool primaryArgument = i == 0;
|
|
i++;
|
|
|
|
if (data == 0)
|
|
{
|
|
// When arguments are missing
|
|
// this task can't be executed yet
|
|
argumentsAvailable = false;
|
|
continue;
|
|
}
|
|
|
|
bool hasFile = data->hasLocation(location,
|
|
DataStorageType::File);
|
|
bool hasObject = data->hasLocation(location,
|
|
DataStorageType::Object);
|
|
|
|
bool forceFile =
|
|
task->hasFlag(primaryArgument
|
|
? PrimaryArgumentAsFile
|
|
: SecondaryArgumentsAsFile);
|
|
bool forceObject =
|
|
task->hasFlag(primaryArgument
|
|
? PrimaryArgumentAsObject
|
|
: SecondaryArgumentsAsObject);
|
|
|
|
if (!forceFile && data->isObjectRelation())
|
|
{
|
|
forceObject = true;
|
|
}
|
|
if (!forceObject && data->isFileRelation())
|
|
{
|
|
forceFile = true;
|
|
}
|
|
|
|
if ((!hasFile || forceObject) && (!hasObject || forceFile))
|
|
{
|
|
// Data is not at the correct location
|
|
// this can't be executed
|
|
argumentsAvailable = false;
|
|
}
|
|
|
|
// Is Copy allowed?
|
|
if (task->hasFlag(CopyArguments))
|
|
{
|
|
// This is a valid server for execution
|
|
// and the data is not yet available
|
|
if (validServer &&
|
|
!hasFile && !hasObject)
|
|
{
|
|
// Check if data is already being transferred here
|
|
bool hasUpcomingLocation = false;
|
|
auto cacheEntry = hasUpcomingLocationCache.find(data);
|
|
if (cacheEntry == hasUpcomingLocationCache.end())
|
|
{
|
|
hasUpcomingLocation =
|
|
data->hasUpcomingLocation(location, File);
|
|
hasUpcomingLocationCache[data] = hasUpcomingLocation;
|
|
}
|
|
else
|
|
{
|
|
hasUpcomingLocation = cacheEntry->second;
|
|
}
|
|
if (hasUpcomingLocation)
|
|
{
|
|
needWait = data;
|
|
}
|
|
else
|
|
{
|
|
needTransfer.push_back(data);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Is Convert allowed?
|
|
if (task->hasFlag(TaskFlag::ConvertArguments))
|
|
{
|
|
// Convert from file to object when data need to be in
|
|
// object form
|
|
if (validWorker &&
|
|
forceObject &&
|
|
!hasObject && hasFile)
|
|
{
|
|
int convertCost = cost + CostConvertToObject;
|
|
if (checkBetter(best, convertCost))
|
|
{
|
|
best = make_pair(
|
|
new ConvertToObjectWorkerJob(
|
|
location, *this,
|
|
task, cost, data),
|
|
convertCost);
|
|
}
|
|
}
|
|
// Convert from object to file when data need to be in
|
|
// file form
|
|
if (validServer &&
|
|
forceFile &&
|
|
!hasFile && hasObject)
|
|
{
|
|
int convertCost = cost + CostConvertToFile;
|
|
if (checkBetter(best, convertCost))
|
|
{
|
|
best = make_pair(
|
|
new ConvertToFileWorkerJob(
|
|
location, *this, data),
|
|
convertCost);
|
|
}
|
|
}
|
|
// Convert from object to file so other worker can copy it
|
|
// This helps other workers to execute the task
|
|
// So we reverse task cost logic in a way that far away
|
|
// tasks will get their data converted first, as their as
|
|
// at least likely to be executed by this worker
|
|
if (hasObject && !hasFile)
|
|
{
|
|
int convertCost = INT_MAX - cost;
|
|
if (checkBetter(best, convertCost))
|
|
{
|
|
best = make_pair(
|
|
new ConvertToFileWorkerJob(
|
|
location, *this, data),
|
|
convertCost);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (argumentsAvailable && validWorker)
|
|
{
|
|
// Execute the task
|
|
if (checkBetter(best, cost))
|
|
{
|
|
best = make_pair(
|
|
new ExecuteWorkerJob(location, *this,
|
|
task, args),
|
|
cost);
|
|
}
|
|
}
|
|
else if (!needTransfer.empty())
|
|
{
|
|
// Transfer the data to the server
|
|
int transferCost =
|
|
cost +
|
|
CostTransfer;
|
|
if (checkBetter(best, transferCost))
|
|
{
|
|
best = make_pair(
|
|
new TransferDataWorkerJob(
|
|
location, *this,
|
|
task, cost, needTransfer),
|
|
transferCost);
|
|
}
|
|
}
|
|
else if (validWorker && needWait != 0)
|
|
{
|
|
// The correct worker can wait for the data transfer
|
|
int waitingCost = cost + CostWaitingOnTransfer;
|
|
if (checkBetter(best, waitingCost))
|
|
{
|
|
best = make_pair(
|
|
new WaitForTransferCompletedWorkerJob(
|
|
location, *this,
|
|
needWait),
|
|
waitingCost);
|
|
}
|
|
}
|
|
}
|
|
|
|
WorkerJob *Scheduler::selectJob(WorkerLocation &location, WorkPool &workPool)
|
|
{
|
|
std::shared_ptr<unordered_set<Task *>> tasks = workPool.getTasks();
|
|
|
|
if (tasks->empty())
|
|
return 0;
|
|
|
|
unordered_map<TaskDataItem *, bool> hasUpcomingLocationCache;
|
|
|
|
optional<pair<WorkerJob *, int>> best;
|
|
|
|
// start at a random position
|
|
size_t index = rand() % tasks->size();
|
|
auto it = tasks->begin();
|
|
std::advance(it, index);
|
|
for (; it != tasks->end(); it++)
|
|
{
|
|
valueJobsForTask(location, *it, best, hasUpcomingLocationCache);
|
|
}
|
|
for (auto it = tasks->begin(); index > 0; it++, index--)
|
|
{
|
|
valueJobsForTask(location, *it, best, hasUpcomingLocationCache);
|
|
}
|
|
|
|
if (best)
|
|
{
|
|
#ifdef DEBUG_JOB_SELECTION
|
|
cout << location.toString() << " (cost: " << best->second << ")"
|
|
<< " -> " << best->first->toString() << endl;
|
|
#endif
|
|
return best->first;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void Scheduler::collectGarbagge(WorkerLocation &location)
|
|
{
|
|
vector<TaskDataItem *> notReferencedData;
|
|
|
|
{
|
|
boost::shared_lock_guard<boost::shared_mutex> lock(dataReferencesMutex);
|
|
|
|
for (auto pair : dataReferences)
|
|
{
|
|
if (pair.second == 0)
|
|
{
|
|
notReferencedData.push_back(pair.first);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (notReferencedData.size() == 0)
|
|
return;
|
|
|
|
bool hasMore = true;
|
|
while (hasMore)
|
|
{
|
|
vector<pair<TaskDataItem *, TaskDataLocation>> garbagge;
|
|
hasMore = false;
|
|
for (auto data : notReferencedData)
|
|
{
|
|
for (auto loc : data->getLocations())
|
|
{
|
|
if (loc.isTemporary() &&
|
|
loc.getWorkerLocation() == location)
|
|
{
|
|
garbagge.emplace_back(data, loc);
|
|
}
|
|
}
|
|
// Limit the number of items removed in a batch
|
|
// otherwise remote server crashes
|
|
if (garbagge.size() >= 500)
|
|
{
|
|
hasMore = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (garbagge.size() == 0)
|
|
return;
|
|
|
|
vector<string> objectsList;
|
|
string filesList = "";
|
|
for (auto pair : garbagge)
|
|
{
|
|
auto data = pair.first;
|
|
auto &loc = pair.second;
|
|
switch (loc.getStorageType())
|
|
{
|
|
case Object:
|
|
objectsList.push_back(data->getObjectName());
|
|
break;
|
|
case File:
|
|
filesList += "('" + loc.getFilePath(data) + "') ";
|
|
break;
|
|
}
|
|
data->removeLocation(loc);
|
|
}
|
|
|
|
try
|
|
{
|
|
if (!filesList.empty())
|
|
{
|
|
ConnectionInfo* ci = location.getWorkerConnection();
|
|
|
|
string removeQuery =
|
|
"query [const rel(tuple([X: text])) value (" +
|
|
filesList +
|
|
")] feed extend[ OK: removeFile(.X) ] " +
|
|
"count + 1";
|
|
|
|
double duration = Task::runCommand(
|
|
ci,
|
|
removeQuery,
|
|
"remove temporary files",
|
|
false, "");
|
|
|
|
ci -> deleteIfAllowed();
|
|
ci = nullptr;
|
|
|
|
TaskStatistics::report("remote remove files", duration);
|
|
}
|
|
|
|
if (objectsList.size() > 0)
|
|
{
|
|
ConnectionInfo* ci = location.getWorkerConnection();
|
|
|
|
for (auto objName : objectsList)
|
|
{
|
|
string removeQuery = "delete " + objName;
|
|
double duration = Task::runCommand(
|
|
ci,
|
|
removeQuery,
|
|
"remove temporary object",
|
|
false, "");
|
|
TaskStatistics::report("remote remove object", duration);
|
|
}
|
|
|
|
ci -> deleteIfAllowed();
|
|
ci = nullptr;
|
|
}
|
|
}
|
|
catch (exception &e)
|
|
{
|
|
addError(string(e.what()) + "\n" +
|
|
"while collecting garbagge" +
|
|
" on " + location.toString());
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
thread_local optional<set<WorkerLocation>>
|
|
Scheduler::ensureWorkerCheckedLocations;
|
|
|
|
/*
|
|
|
|
1.2 Value Mapping
|
|
|
|
Value Mapping for schedule Operator.
|
|
|
|
*/
|
|
|
|
int scheduleVM(Word *args, Word &result, int message,
|
|
Word &local, Supplier s)
|
|
{
|
|
result = qp->ResultStorage(s);
|
|
Stream<Task> stream(args[0]);
|
|
stream.open();
|
|
int port = ((CcInt *)args[1].addr)->GetValue();
|
|
Task *task;
|
|
DArrayBase *res = (DArrayBase *)result.addr;
|
|
bool resultInObjectForm =
|
|
DArray::checkType(
|
|
Task::innerType(nl->Second(qp->GetType(qp->GetSon(s, 0)))));
|
|
Scheduler scheduler(port, resultInObjectForm);
|
|
|
|
while ((task = stream.request()) != 0 && !scheduler.isError)
|
|
{
|
|
scheduler.receiveTask(task);
|
|
}
|
|
|
|
scheduler.join();
|
|
stream.close();
|
|
if (scheduler.isError)
|
|
{
|
|
cout << "schedule failed: " << scheduler.errorMessage << endl;
|
|
// TODO report error
|
|
}
|
|
else
|
|
{
|
|
vector<uint32_t> mapping;
|
|
vector<DArrayElement> workers;
|
|
map<DArrayElement, optional<uint32_t>> workersMap;
|
|
for (auto &worker : scheduler.getWorkers())
|
|
{
|
|
auto element = worker.getDArrayElement();
|
|
workersMap.emplace(element, workers.size());
|
|
workers.push_back(element);
|
|
}
|
|
for (auto &element : scheduler.myResult)
|
|
{
|
|
auto &entry = workersMap[element];
|
|
if (!entry)
|
|
{
|
|
entry.emplace(workers.size());
|
|
workers.push_back(element);
|
|
}
|
|
mapping.push_back(entry.value());
|
|
}
|
|
res->set(mapping, scheduler.dArrayName, workers);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
OperatorSpec scheduleSpec(
|
|
"tasks(darray(X), int) -> darray(X)",
|
|
"_ schedule[_]",
|
|
"Computes the result of the query.",
|
|
"");
|
|
|
|
Operator scheduleOp(
|
|
"schedule",
|
|
scheduleSpec.getStr(),
|
|
scheduleVM,
|
|
Operator::SimpleSelect,
|
|
scheduleTM);
|
|
} // namespace distributed5
|