From 75fb5487b5072d1c7db0ab8719f3c68f05f6c2f3 Mon Sep 17 00:00:00 2001 From: laiyongqiang Date: Wed, 3 Feb 2021 16:50:04 +0800 Subject: [PATCH] somas solver optimization --- .../optimizer/somas/somas_solver_alg.cc | 61 ++++---- .../optimizer/somas/somas_solver_alg.h | 2 - .../optimizer/somas/somas_solver_core.cc | 137 +++++++----------- .../optimizer/somas/somas_solver_core.h | 1 - 4 files changed, 86 insertions(+), 115 deletions(-) diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc index 4430a819a0..f39389d71a 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.cc @@ -117,25 +117,6 @@ void FootPrint::Merge(vector *interval_v, stack *s) { return; } -void FootPrint::ConstrainedBLocks(const std::vector *constraints, const BlockTensor &b1, - const BlockTensor &b2, vector *oInterval) { - MS_EXCEPTION_IF_NULL(oInterval); - // propagate - size_t acum = m_offset_; - - for (SomasSolverTensorDescPtr p1 = b1.m_start_tensor_; NULL != p1; p1 = p1->right_) { - for (SomasSolverTensorDescPtr p2 = b2.m_start_tensor_; NULL != p2; p2 = p2->right_) { - if ((*constraints)[p1->index_].IsBitTrue(p2->index_) == false) { - Interval a = Interval(acum, acum + p1->size_); - Interval b = Interval(p2); - if (a.lb() < b.ub()) { - (*oInterval).emplace_back(b); - } - } - } - acum += p1->size_; - } -} bool FootPrint::findOffset(const std::vector *constraints, const BlockTensor &block, size_t *offset) { MS_EXCEPTION_IF_NULL(offset); bool bretval = true; @@ -148,16 +129,42 @@ bool FootPrint::findOffset(const std::vector *constraints, const bretval = true; // transform constrained tensors in non eligible intervals - for (size_t i = 0; i < m_starts_.size(); i++) { - if (block.Alone() && m_starts_[i]->Alone() && - (*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[i]->m_start_tensor_->index_) == false) { - if (m_algorithm_ != 1 && i == 0) return false; - Interval It = Interval(m_starts_[i]->m_start_tensor_); - l_interval.emplace_back(It); - } else { - ConstrainedBLocks(constraints, block, *m_starts_[i], &l_interval); // solve multiple tensor blocks + if (block.Alone()) { + if (m_algorithm_ != kSingleObject && m_starts_.size() > 0 && m_starts_[0]->Alone() && + (*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[0]->m_start_tensor_->index_) == false) { + return false; + } + for (size_t i = 0; i < m_starts_.size(); i++) { + auto allocated_tensor = m_starts_[i]->m_start_tensor_; + while (allocated_tensor != NULL) { + if ((*constraints)[block.m_start_tensor_->index_].IsBitTrue(allocated_tensor->index_) == false) { + l_interval.emplace_back(Interval(allocated_tensor)); + } + allocated_tensor = allocated_tensor->right_; + } + } + } else { + int64_t start_offset = static_cast(m_offset_); + for (size_t i = 0; i < m_starts_.size(); i++) { + auto allocated_tensor = m_starts_[i]->m_start_tensor_; + while (allocated_tensor != NULL) { + int64_t allocated_offset = static_cast(allocated_tensor->offset_); + int64_t allocated_size = static_cast(allocated_tensor->size_); + int64_t accumulator = 0; + for (auto block_tensor = block.m_start_tensor_; block_tensor != NULL; block_tensor = block_tensor->right_) { + int64_t end_placement = allocated_offset + allocated_size - accumulator; + if ((*constraints)[block_tensor->index_].IsBitTrue(allocated_tensor->index_) == false && + end_placement > start_offset) { + l_interval.emplace_back(Interval(allocated_tensor)); + break; + } + accumulator += block_tensor->size_; + } + allocated_tensor = allocated_tensor->right_; + } } } + // merge non-eligible intervals and find a slot to allocate the tensor block if (!l_interval.empty()) { stack l_mergedIntervals; diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.h b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.h index 9ba3dd1c3c..b970cc3f8f 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.h +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_alg.h @@ -145,8 +145,6 @@ class FootPrint : public std::enable_shared_from_this { const size_t getOffset() { return m_offset_; } void setOffset(const size_t &offset) { m_offset_ = offset; } bool findOffset(const std::vector *constraints, const BlockTensor &block, size_t *offset); - void ConstrainedBLocks(const std::vector *constraints, const BlockTensor &b1, const BlockTensor &b2, - vector *oInterval_l); void Merge(vector *l_interval, stack *l_merged); bool findFirst(stack *merged, const BlockTensor &block, size_t *offset); size_t Result(); diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.cc b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.cc index 6b6388a2ef..71ea343eee 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.cc +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.cc @@ -56,14 +56,13 @@ Status SomasSolverCore::MemoryAllocationSolver() { branching_strategy_ = static_cast(branching_strategy); Clean(); MS_LOG(DEBUG) << "Timing Start " << tensors_.size() << " Tensors"; - start = std::chrono::system_clock::now(); + auto start_upper = std::chrono::system_clock::now(); upperbound_ = FindSolutions(); - MS_LOG(DEBUG) - << "\nElapsed time of upper bound testing: " - << std::chrono::duration_cast(std::chrono::system_clock::now() - start).count() - << " ms"; - start = std::chrono::system_clock::now(); - + MS_LOG(DEBUG) << "Elapsed time of upper bound testing: " + << std::chrono::duration_cast(std::chrono::system_clock::now() - + start_upper) + .count() + << " ms"; if (upperbound_ > worst) { worst = upperbound_; } @@ -130,30 +129,6 @@ Status SomasSolverCore::Verify() { return retval; } -Status SomasSolverCore::Verify(unordered_map *pTensor_map) { - Status retval = SUCCESS; - if (NULL == pTensor_map) return retval; - MS_LOG(INFO) << "Verifying HQ Solution.."; - MS_LOG(INFO) << "Checking tensors id, sizes.."; - - for (auto ptensor : *pTensor_map) { - if (tensors_.count(ptensor.first) == 0) { - MS_LOG(WARNING) << "HQ Tensor id " << ptensor.first << " does not exists"; - } else if (tensors_[ptensor.first]->size_ != ptensor.second->size_) { - size_t HQ_index = ptensor.first; - size_t HQ_size = ptensor.second->size_; - size_t index = ptensor.first; - size_t size = tensors_[ptensor.first]->size_; - MS_LOG(WARNING) << "HQ Tensor Id: " << HQ_index << " with size: " << HQ_size - << " is different from Tensor Id: " << index << " size: " << size; - } - } - - MS_LOG(INFO) << "Checking HQ Solution.."; - tensors_ = *pTensor_map; - retval = Verify(upperbound_) == 0 ? FAILED : SUCCESS; - return retval; -} bool SomasSolverCore::Verify(const size_t &upperbound) { auto start = std::chrono::system_clock::now(); bool retval = true; @@ -252,64 +227,56 @@ void SomasSolverCore::Clean() { } upperbound_ = SIZE_MAX; } + +static bool GreaterSizeSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) { + return t1.m_size_ > t2.m_size_ || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_); +} +#ifdef SOMAS_DEBUG +static bool GreaterSizeGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) { + return t1.m_size_ > t2.m_size_ || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_); +} +static bool GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) { + return t1.m_size_ > t2.m_size_ || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ && + t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_); +} +static bool GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) { + return t1.m_size_ > t2.m_size_ || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ && + t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_); +} +static bool GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) { + return t1.m_size_ > t2.m_size_ || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ && + t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_); +} +static bool GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) { + return t1.m_size_ > t2.m_size_ || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) || + (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ && + t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_); +} +#endif + void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heuristic MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sorting_[sort_strategy_].c_str(); - switch (sort_strategy_) { - case kGreaterSizeSmallerIndex: { // size(>), index(<) - sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { - return t1.m_size_ > t2.m_size_ || - (t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_); - }); - break; - } + typedef bool (*SortingFunction)(const BlockTensor &, const BlockTensor &); + std::unordered_map sort_map; + sort_map[kGreaterSizeSmallerIndex] = &GreaterSizeSmallerIndex; #ifdef SOMAS_DEBUG - case kGreaterSizeGreaterIndex: { // size(>), index(>) - sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { - return t1.m_size > t2.m_size || - (t1.m_size == t2.m_size && t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_); - }); - break; - } - case kGreaterSizeSmallerConstraintsSmallerIndex: { // size(>), constraints(<), index(<) - sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { - return t1.m_size > t2.m_size || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && - t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_); - }); - break; - } - case kGreaterSizeSmallerConstraintsGreaterIndex: { // size(>), constraints(<), index(>) - sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { - return t1.m_size > t2.m_size || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && - t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_); - }); - break; - } - case kGreaterSizeGreaterConstraintsSmallerIndex: { // size(>), constraints(>), index(<) - sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { - return t1.m_size > t2.m_size || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && - t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_); - }); - break; - } - case kGreaterSizeGreaterConstraintsGreaterIndex: { // // size(>), constraints(>), index(>) - sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) { - return t1.m_size > t2.m_size || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) || - (t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ && - t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_); - }); - break; - } + sort_map[kGreaterSizeGreaterIndex] = &GreaterSizeGreaterIndex; + sort_map[kGreaterSizeSmallerConstraintsSmallerIndex] = &GreaterSizeSmallerConstraintsSmallerIndex; + sort_map[kGreaterSizeSmallerConstraintsGreaterIndex] = &GreaterSizeSmallerConstraintsGreaterIndex; + sort_map[kGreaterSizeGreaterConstraintsSmallerIndex] = &GreaterSizeGreaterConstraintsSmallerIndex; + sort_map[kGreaterSizeGreaterConstraintsGreaterIndex] = &GreaterSizeGreaterConstraintsGreaterIndex; #endif - case kNumSortingTypes: { // no sorting case - break; - } + if (sort_strategy_ < kNumSortingTypes) { + sort(block_tensors_.begin(), block_tensors_.end(), *(sort_map[sort_strategy_])); } // log for debug purposes for (auto &block : block_tensors_) block.log(); diff --git a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.h b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.h index 0caffd01e7..c1d236d2b8 100644 --- a/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.h +++ b/mindspore/ccsrc/backend/optimizer/somas/somas_solver_core.h @@ -53,7 +53,6 @@ class SomasSolverCore { Status MemoryAllocationSolver(); Status Verify(); bool Verify(const size_t &); - Status Verify(unordered_map *); void VerifySolution(const bool verify) { verify_ = verify; } void SortTensors(); void BuildBlocks();