/* ---- This file is part of SECONDO. Copyright (C) since 2019, University in Hagen, Faculty of Mathematics and Computer Science, Database Systems for New Applications. SECONDO is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SECONDO is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SECONDO; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ---- */ #ifdef USE_MULTIHREADED_QUERY_PROCESSING #include "ParallelQueryOptimizer.h" #include "Operator.h" #include "QueryProcessor.h" #include "Profiles.h" #include "Algebras/ParThread/ExecutionContext/ExecutionContext.h" #include "tbb/task_scheduler_init.h" #include "boost/algorithm/string.hpp" #include "boost/filesystem.hpp" #include #include #include #include #include #include namespace fs = boost::filesystem; namespace parthread { const char *ParallelQueryOptimizer::ParOperatorName = "par"; struct ContextInformation { ContextInformation(OpNode *node) : InsertPosition(node), SonIndex(0), ParentContext(NULL), MaxDegreeOfDataParallelism(1), NumberOfOperators(0), IsValid(true), IsHashPartitioned(false), PartitonAttributeIndex(-1){}; OpNode *InsertPosition; int SonIndex; ContextInformation *ParentContext; int MaxDegreeOfDataParallelism; int NumberOfOperators; bool IsValid; bool IsHashPartitioned; int PartitonAttributeIndex; std::vector MemoryOperators; ContextInformation *GetNextValidParent() { if (ParentContext == NULL) { return NULL; } else if (ParentContext->IsValid) { return ParentContext; } else { return ParentContext->GetNextValidParent(); } } }; class ParallelQueryOptimizer::ParallelQueryOptimizerImpl { public: //methods ParallelQueryOptimizerImpl() : m_initialized(false), m_taskScheduler(tbb::task_scheduler_init::deferred) { } void WriteDebugOutput(const std::string message) { if (m_settings.Logger != NULL) { m_settings.Logger->WriteDebugOutput(message); } } void InitializeIfNecessary() { if (m_initialized) { return; } //read settings from secondo-config long maxNumberOfConcurrentThreads = SmiProfile::GetParameter("MultithreadedQueryProcessing", "MaxNumberOfConcurrentThreads", 1, std::string(getenv("SECONDO_CONFIG"))); long totalBufferSizeInByte = SmiProfile::GetParameter("MultithreadedQueryProcessing", "TotalBufferSizeInByte", 0, std::string(getenv("SECONDO_CONFIG"))); long maxDegreeOfDataParallelism = SmiProfile::GetParameter("MultithreadedQueryProcessing", "MaxDegreeOfDataParallelism", 1, std::string(getenv("SECONDO_CONFIG"))); long maxNumberOfTuples = SmiProfile::GetParameter("MultithreadedQueryProcessing", "MaxNumberOfTuplesPerBlock", 1000, std::string(getenv("SECONDO_CONFIG"))); long timeoutPerTask = SmiProfile::GetParameter("MultithreadedQueryProcessing", "TimeoutPerTaskInMicroseconds", 0, std::string(getenv("SECONDO_CONFIG"))); long queueCapacityThreshold = SmiProfile::GetParameter("MultithreadedQueryProcessing", "QueueCapacityThreshold", 100, std::string(getenv("SECONDO_CONFIG"))); long usePipelineParallelism = SmiProfile::GetParameter("MultithreadedQueryProcessing", "UsePipelineParallelism", 1, std::string(getenv("SECONDO_CONFIG"))); long loggerMode = SmiProfile::GetParameter("MultithreadedQueryProcessing", "LoggerMode", 0, std::string(getenv("SECONDO_CONFIG"))); std::string debugOutputPath = SmiProfile::GetParameter("MultithreadedQueryProcessing", "DebugOutputPath", "", std::string(getenv("SECONDO_CONFIG"))); long useOptimization = SmiProfile::GetParameter("MultithreadedQueryProcessing", "UseStaticParallelOptimization", 0, std::string(getenv("SECONDO_CONFIG"))); std::string operatorsSupportingDataParallelismStr = SmiProfile::GetParameter("MultithreadedQueryProcessing", "OperatorsSupportingDataParallelism", "", std::string(getenv("SECONDO_CONFIG"))); std::string operatorsRetainingOrderStr = SmiProfile::GetParameter("MultithreadedQueryProcessing", "OperatorsRetainingOrder", "", std::string(getenv("SECONDO_CONFIG"))); std::string operatorsRequiringHashPartitioningStr = SmiProfile::GetParameter("MultithreadedQueryProcessing", "OperatorsRequiringHashPartitioning", "", std::string(getenv("SECONDO_CONFIG"))); if (maxNumberOfConcurrentThreads < 1 && tbb::task_scheduler_init::default_num_threads() > 1) { //if the number of threads is not explicite configured //let tbb decide how many cores are used for parallel execution. m_settings.MaxNumberOfConcurrentThreads = tbb::task_scheduler_init::default_num_threads(); } else if (maxNumberOfConcurrentThreads > 1) { m_settings.MaxNumberOfConcurrentThreads = maxNumberOfConcurrentThreads; } else { //at least two threads are needed for parallelized query processing. //Fall back to sequential query processing m_settings.MaxNumberOfConcurrentThreads = 1; } m_settings.QueueCapacityThreshold = std::max(queueCapacityThreshold, 1L); m_settings.MaxDegreeOfDataParallelism = std::max((int)maxDegreeOfDataParallelism, 1); m_settings.TotalBufferSizeInBytes = std::max(totalBufferSizeInByte, 0L); m_settings.MaxNumberOfTuplesPerBlock = std::max(maxNumberOfTuples, 1L); m_settings.TimeoutPerTaskInMicroSeconds = (clock_t)std::max(timeoutPerTask, 0L); m_settings.UsePipelineParallelism = usePipelineParallelism == 1; m_settings.UseOptimization = useOptimization == 1; if (!debugOutputPath.empty() && !fs::is_directory(debugOutputPath)) { debugOutputPath = std::string(); } ExecutionContextLogger::LoggerModes mode = (ExecutionContextLogger::LoggerModes) std::min((int)loggerMode, (int)ExecutionContextLogger::LoggerModes::FullOutput); m_settings.Logger.reset( new ExecutionContextLogger(mode, debugOutputPath)); m_settings.OperatorsSupportingDataParallelism = ParseSettingString(operatorsSupportingDataParallelismStr); m_settings.OperatorsRetainingOrder = ParseSettingString(operatorsRetainingOrderStr); m_settings.OperatorsRequiringHashPartitioning = ParseSettingString(operatorsRequiringHashPartitioningStr); //initialize a new scheduler to manage the number of threads //given as argument. if (m_settings.MaxNumberOfConcurrentThreads > 1) { m_taskScheduler.initialize(m_settings.MaxNumberOfConcurrentThreads); assert(m_taskScheduler.is_active()); std::stringstream debugText; debugText << "Initialized task scheduler with " << m_settings.MaxNumberOfConcurrentThreads << " threads"; m_settings.Logger->WriteDebugOutput(debugText.str()); } else { std::stringstream debugText; debugText << "Use serial execution"; m_settings.Logger->WriteDebugOutput(debugText.str()); } m_initialized = true; }; void ParallelizeQueryPlan(QueryProcessor *queryProcessor, void *queryPlan, size_t memorySpent, int noMemoryOperators) { InitializeIfNecessary(); m_queryProcessor = queryProcessor; m_settings.Logger->ResetTimer(); m_settings.Logger->WriteDebugOutput("Start parallelizing query plan"); //in case of incorrect settings or disabled concurrency fall back to //regular serial execution if (m_settings.MaxNumberOfConcurrentThreads < 2) { return; } //create an initial context from root node to first par- operator before //analyzing the operator tree recursive depth first OpTree rootNode = static_cast(queryPlan); std::vector analyzedContexts; ContextInformation *beforeFirstParOpContext = new ContextInformation(NULL); beforeFirstParOpContext->IsValid = false; analyzedContexts.push_back(beforeFirstParOpContext); bool needsAnalysis = true; if (m_settings.UseOptimization) { //Inserts new par operators in the query plan, returns a list of //analyzed contexts //find first insert position of par-operator CreateInsertionPoints(rootNode, 0, NULL, analyzedContexts, analyzedContexts[0]); if (analyzedContexts.size() > 1) { //first context contains nodes from root to first stream operator //no possibility for paralllelism, so set to invalid analyzedContexts[0]->IsValid = false; // create par operators for each context for (ContextInformation *contextInfo : analyzedContexts) { if (contextInfo->InsertPosition != NULL && contextInfo->IsValid) { contextInfo->InsertPosition = static_cast( m_queryProcessor->InsertParOperatorInTree( contextInfo->InsertPosition, contextInfo->SonIndex, contextInfo->MaxDegreeOfDataParallelism, contextInfo->PartitonAttributeIndex)); } } if (m_settings.Logger->DebugMode()) { m_settings.Logger->WriteDebugOutput( "Finished parallelizing query plan"); fs::path outputPath(m_settings.Logger->LoggerDebugPath()); outputPath /= "parallelOptimizedTree.gv"; m_queryProcessor->OutputToDotFile(rootNode, outputPath.c_str()); } needsAnalysis = false; } } if (needsAnalysis) { //Checks the existing par operators inserted by the user for validity //and returns a list of analyzed contexts AnalyzeQueryPlanRecursive(queryProcessor, rootNode, analyzedContexts, beforeFirstParOpContext); } std::map createdContexts; int contextId = 0; for (ContextInformation *contextInfo : analyzedContexts) { //leave ivalid par operators in the tree. Without an execution context //the operator does not use parallel execution. if (contextInfo->IsValid) { //set local2 with new context object contextId++; ExecutionContext *context = new ExecutionContext(contextId, queryProcessor, contextInfo->InsertPosition, m_settings, contextInfo->MaxDegreeOfDataParallelism, contextInfo->PartitonAttributeIndex); createdContexts[contextInfo->InsertPosition] = context; ParNodeInfo *local2 = new ParNodeInfo(context); ContextInformation *parent = contextInfo->GetNextValidParent(); if (parent == NULL) { //the first par node close by the root has no managed entities, //create a single dummy entity to store a tuple reader ExecutionContextEntity *entity = new ExecutionContextEntity( 0, (OpTree)queryPlan); local2->CurrentEntity(entity); } else { ExecutionContext *parentContext = createdContexts[parent->InsertPosition]; parentContext->AddChildContexts(context); context->ParentContext(parentContext); } //Remove the connection to the sons of the par operator, //to avoid forwarding following commands to subtrees. assert(queryProcessor->GetNoSons(contextInfo->InsertPosition) > 0); queryProcessor->RemoveSon(contextInfo->InsertPosition, 0); queryProcessor->GetLocal2(contextInfo->InsertPosition) .setAddr(local2); std::stringstream debugText; debugText << "Created Context " << context->ContextId() << " with max. entities " << contextInfo->MaxDegreeOfDataParallelism; if (contextInfo->IsHashPartitioned) { debugText << " and hash partitioned"; } m_settings.Logger->WriteDebugOutput(debugText.str()); } } RedistributeMemory(analyzedContexts, contextId, memorySpent, noMemoryOperators); DeleteContextInformation(analyzedContexts); analyzedContexts.clear(); if (m_settings.Logger->DebugMode()) { m_settings.Logger->WriteDebugOutput( "Finished parallelizing query plan"); fs::path outputPath(m_settings.Logger->LoggerDebugPath()); outputPath /= "partialTree_ContextEntity_0_0.gv"; m_queryProcessor->OutputToDotFile(rootNode, outputPath.c_str()); } }; std::set ParseSettingString(const std::string &setting, const char *separator = " ") { std::string trimmedSetting(setting); std::set splittedSettings; boost::trim(trimmedSetting); boost::split(splittedSettings, trimmedSetting, boost::is_any_of(separator), boost::token_compress_on); return splittedSettings; } ContextInformation *AnalyzeQueryPlanRecursive( QueryProcessor *queryProcessor, OpTree rootNode, std::vector &analyzedContexts, ContextInformation *infCurrentContext) { Operator *op = queryProcessor->GetOperator(rootNode); if (op == NULL) { //this happens when the node is no operator or a function object // in this case skip investigation of the subtree return infCurrentContext; } std::string opName = op->GetName(); bool isParOperator = opName == ParOperatorName; if (!isParOperator && op->UsesMemory()) { infCurrentContext->MemoryOperators.push_back(rootNode); } if (isParOperator) { ContextInformation *context = new ContextInformation(rootNode); context->MaxDegreeOfDataParallelism = queryProcessor->GetParOperatorsNumberOfInstances(rootNode); context->PartitonAttributeIndex = queryProcessor->GetParOperatorsPartitoningAttrIdx(rootNode); analyzedContexts.push_back(context); context->ParentContext = infCurrentContext; infCurrentContext = context; } else //regular operator { //analyze the operators of the context to setup the context with // correct parameters if (infCurrentContext->InsertPosition == NULL || OperatorRetainsOrder(opName)) { //if operators are found which rely on an order of the tuple stream, //then invalidate all par-operators to the root node ContextInformation *lastAnalyzedContext = infCurrentContext; while (lastAnalyzedContext != NULL) { lastAnalyzedContext->IsValid = false; lastAnalyzedContext = lastAnalyzedContext->ParentContext; } } if (!OperatorSupportsDataparallelism(opName)) { infCurrentContext->MaxDegreeOfDataParallelism = 1; } else { infCurrentContext->MaxDegreeOfDataParallelism = std::min((int)m_settings.MaxDegreeOfDataParallelism, infCurrentContext->MaxDegreeOfDataParallelism); } if (OperatorRequiresHashPartitioning(opName) && infCurrentContext->MaxDegreeOfDataParallelism > 1) { infCurrentContext->IsHashPartitioned = true; } } //for each son call analysis recursive int numSons = qp->GetNoSons(rootNode); for (int i = 0; i < numSons; i++) { OpNode *son = static_cast(qp->GetSon(rootNode, i)); ContextInformation *sonContext = AnalyzeQueryPlanRecursive( queryProcessor, son, analyzedContexts, infCurrentContext); //check if the son context has a partition attribute in case of //partitioned contexts if (infCurrentContext->IsHashPartitioned && sonContext->InsertPosition != infCurrentContext->InsertPosition && sonContext->PartitonAttributeIndex < 0) { infCurrentContext->MaxDegreeOfDataParallelism = 1; } } //return the contextInfo of the last reached par-operator return infCurrentContext; } ContextInformation *CreateInsertionPoints( OpTree currentNode, int sonIndex, OpTree previousNode, std::vector &analyzedContexts, ContextInformation *ctx, bool foundFirstStreamOperator = false, bool prevNodeIsBranchNode = false) { bool insertionPoint = false; Operator *op = m_queryProcessor->GetOperator(currentNode); if (op == NULL) { //this happens when the node is no operator or a function object // in this case skip investigation of the subtree return ctx; } std::string opName = op->GetName(); if (opName == ParOperatorName) { //cancel the optimization if the user inserted par-operators DeleteContextInformation(analyzedContexts); ContextInformation *beforeFirstParOpContext = new ContextInformation(NULL); analyzedContexts.push_back(beforeFirstParOpContext); return NULL; } if (OperatorRetainsOrder(opName)) { //found an operator relying on order of the datasets //in this case invalidate all found insertion points before ContextInformation *lastAnalyzedContext = ctx; do { ctx = lastAnalyzedContext; lastAnalyzedContext = ctx; lastAnalyzedContext->MaxDegreeOfDataParallelism = 1; lastAnalyzedContext->IsHashPartitioned = false; lastAnalyzedContext->IsValid = false; lastAnalyzedContext = lastAnalyzedContext->ParentContext; } while (lastAnalyzedContext != NULL); } else { if (m_queryProcessor->IsTupleStreamOperator(currentNode)) { foundFirstStreamOperator = true; //if the degree of dataparallelism changes, add another //insertion point bool contextSupportsDataparallelism = ctx->MaxDegreeOfDataParallelism > 1; insertionPoint = contextSupportsDataparallelism ^ OperatorSupportsDataparallelism(opName); //set an insertion point if the operator requires hash partitioning //and the context doesn't support hash partitioning yet insertionPoint = insertionPoint || (!ctx->IsHashPartitioned && OperatorRequiresHashPartitioning(opName)); //the last node branched more than one datastream insertionPoint = insertionPoint || prevNodeIsBranchNode; } else if (foundFirstStreamOperator) { //other type of operator, stop analysis of sub trees return ctx; } } if (insertionPoint) { ContextInformation *newCtx = new ContextInformation(previousNode); newCtx->SonIndex = sonIndex; newCtx->ParentContext = ctx; newCtx->MaxDegreeOfDataParallelism = 1; if (OperatorSupportsDataparallelism(opName)) { newCtx->MaxDegreeOfDataParallelism = m_settings.MaxDegreeOfDataParallelism; } newCtx->IsHashPartitioned = OperatorRequiresHashPartitioning(opName); if (prevNodeIsBranchNode && newCtx->ParentContext && newCtx->ParentContext->IsHashPartitioned) { newCtx->PartitonAttributeIndex = AttributeIndexOfNode(previousNode, sonIndex); } analyzedContexts.push_back(newCtx); ctx = newCtx; } if (op->UsesMemory()) { ctx->MemoryOperators.push_back(currentNode); } ctx->NumberOfOperators++; //for each son call analysis recursive int numSons = qp->GetNoSons(currentNode); bool isBranchNode = numSons > 1 && IsBranchNode(numSons, currentNode); for (int i = 0; i < numSons; i++) { OpNode *son = static_cast(qp->GetSon(currentNode, i)); //ContextInformation *sonContext = CreateInsertionPoints(son, i, currentNode, analyzedContexts, ctx, foundFirstStreamOperator, isBranchNode); } //return the contextInfo of the last reached par-operator return ctx; } int AttributeIndexOfNode(OpNode *node, int sonIndex) { Operator *op = m_queryProcessor->GetOperator(node); if (op->GetName() == "hashjoin" && (sonIndex == 0 || sonIndex == 1)) { Supplier son = m_queryProcessor->GetSon(node, sonIndex+5); return m_queryProcessor->GetConstNodeIntValue(son)-1; } return -1; } bool IsBranchNode(int numSons, OpNode *node) { int numStreamSons = 0; for (int i = 0; i < numSons; i++) { OpNode *son = static_cast(qp->GetSon(node, i)); if (m_queryProcessor->IsTupleStreamOperator(son)) { numStreamSons++; } } return numStreamSons > 1; } bool OperatorSupportsDataparallelism(const std::string &opName) { return m_settings.OperatorsSupportingDataParallelism.find(opName) != m_settings.OperatorsSupportingDataParallelism.end(); } bool OperatorRetainsOrder(const std::string &opName) { return m_settings.OperatorsRetainingOrder.find(opName) != m_settings.OperatorsRetainingOrder.end(); } bool OperatorRequiresHashPartitioning(const std::string &opName) { return m_settings.OperatorsRequiringHashPartitioning.find(opName) != m_settings.OperatorsRequiringHashPartitioning.end(); } void DeleteContextInformation( std::vector &analyzedContexts) { for (ContextInformation *contextInfo : analyzedContexts) { delete contextInfo; } analyzedContexts.clear(); } void OptimizeQueryPlanRecursive( QueryProcessor *queryProcessor, OpTree rootNode, std::vector &analyzedContexts, ContextInformation *infoOfCurrentContext) { /* TODO: implement adding par-operator in operator tree */ } void RedistributeMemory(std::vector analyzedContexts, int numberOfValidContexts, size_t memorySpent, int numberMemoryOperators) { if (numberOfValidContexts == 0) { return; } size_t globalMemory = m_queryProcessor->GetGlobalMemory(); size_t memoryDistributionFactor = numberMemoryOperators > 0 ? numberMemoryOperators : 1; size_t prevMemoryPerOperator = (globalMemory - memorySpent) / memoryDistributionFactor; //calculate the distributed memory considering the numberOfValidContexts size_t memoryPerOperator = (globalMemory - memorySpent) / (numberMemoryOperators + numberOfValidContexts); size_t memPerTupleBuffer = m_settings.TotalBufferSizeInBytes / numberOfValidContexts; if (m_settings.TotalBufferSizeInBytes > 0 && memoryPerOperator > memPerTupleBuffer) { //if a total buffer size is given and the value is less than the //distributed memory, then take the total size ... memorySpent += m_settings.TotalBufferSizeInBytes; memoryPerOperator = (globalMemory - memorySpent) / memoryDistributionFactor; } else { //... otherwise distribute the same amount of memory to the par-Operator // as for all other memory operators. memPerTupleBuffer = memoryPerOperator; } for (ContextInformation *contextInfo : analyzedContexts) { if (!contextInfo->IsValid) { continue; } //set the memory of the par-operators for the contained tuple buffers m_queryProcessor->SetMemorySize(contextInfo->InsertPosition, memPerTupleBuffer); //reduce memory of the parallelized operators depending on the number //of instances for (OpNode *memOp : contextInfo->MemoryOperators) { size_t mem = m_queryProcessor->GetMemorySize(memOp); size_t memoryPerOpAndInstance = memoryPerOperator / contextInfo->MaxDegreeOfDataParallelism; if (prevMemoryPerOperator != mem) { //the operators memory was probably not distributed, so just //divide the original memory size through the number of instances memoryPerOpAndInstance = mem / contextInfo->MaxDegreeOfDataParallelism; } m_queryProcessor->SetMemorySize(memOp, memoryPerOpAndInstance); } } std::stringstream debugText; debugText << "Redistributed memory for " << numberOfValidContexts << " contexts with " << memPerTupleBuffer << "MB each and " << numberMemoryOperators << " memory operators with " << memoryPerOperator << "MB distributed over the instances " << "of the operators execution context"; m_settings.Logger->WriteDebugOutput(debugText.str()); } private: //member bool m_initialized; QueryProcessor *m_queryProcessor; tbb::task_scheduler_init m_taskScheduler; ExecutionContextSetting m_settings; }; // ParallelQueryOptimizerImpl ParallelQueryOptimizer::ParallelQueryOptimizer() : m_pImpl(new ParallelQueryOptimizerImpl()) { } ParallelQueryOptimizer::~ParallelQueryOptimizer() = default; void ParallelQueryOptimizer::ParallelizeQueryPlan( QueryProcessor *queryProcessor, void *queryPlan, size_t memorySpent, int noMemoryOperators) { return m_pImpl->ParallelizeQueryPlan(queryProcessor, queryPlan, memorySpent, noMemoryOperators); } } // namespace parthread #endif // USE_MULTIHREADED_QUERY_PROCESSING