!12052 somas solver optimization

From: @laiyongqiang
Reviewed-by: @zhoufeng54,@chujinjin
Signed-off-by: @chujinjin
pull/12052/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 2847a1f3e3

@ -117,25 +117,6 @@ void FootPrint::Merge(vector<Interval> *interval_v, stack<Interval> *s) {
return;
}
void FootPrint::ConstrainedBLocks(const std::vector<DynamicBitSet> *constraints, const BlockTensor &b1,
const BlockTensor &b2, vector<Interval> *oInterval) {
MS_EXCEPTION_IF_NULL(oInterval);
// propagate
size_t acum = m_offset_;
for (SomasSolverTensorDescPtr p1 = b1.m_start_tensor_; NULL != p1; p1 = p1->right_) {
for (SomasSolverTensorDescPtr p2 = b2.m_start_tensor_; NULL != p2; p2 = p2->right_) {
if ((*constraints)[p1->index_].IsBitTrue(p2->index_) == false) {
Interval a = Interval(acum, acum + p1->size_);
Interval b = Interval(p2);
if (a.lb() < b.ub()) {
(*oInterval).emplace_back(b);
}
}
}
acum += p1->size_;
}
}
bool FootPrint::findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset) {
MS_EXCEPTION_IF_NULL(offset);
bool bretval = true;
@ -148,16 +129,42 @@ bool FootPrint::findOffset(const std::vector<DynamicBitSet> *constraints, const
bretval = true;
// transform constrained tensors in non eligible intervals
for (size_t i = 0; i < m_starts_.size(); i++) {
if (block.Alone() && m_starts_[i]->Alone() &&
(*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[i]->m_start_tensor_->index_) == false) {
if (m_algorithm_ != 1 && i == 0) return false;
Interval It = Interval(m_starts_[i]->m_start_tensor_);
l_interval.emplace_back(It);
} else {
ConstrainedBLocks(constraints, block, *m_starts_[i], &l_interval); // solve multiple tensor blocks
if (block.Alone()) {
if (m_algorithm_ != kSingleObject && m_starts_.size() > 0 && m_starts_[0]->Alone() &&
(*constraints)[block.m_start_tensor_->index_].IsBitTrue(m_starts_[0]->m_start_tensor_->index_) == false) {
return false;
}
for (size_t i = 0; i < m_starts_.size(); i++) {
auto allocated_tensor = m_starts_[i]->m_start_tensor_;
while (allocated_tensor != NULL) {
if ((*constraints)[block.m_start_tensor_->index_].IsBitTrue(allocated_tensor->index_) == false) {
l_interval.emplace_back(Interval(allocated_tensor));
}
allocated_tensor = allocated_tensor->right_;
}
}
} else {
int64_t start_offset = static_cast<int64_t>(m_offset_);
for (size_t i = 0; i < m_starts_.size(); i++) {
auto allocated_tensor = m_starts_[i]->m_start_tensor_;
while (allocated_tensor != NULL) {
int64_t allocated_offset = static_cast<int64_t>(allocated_tensor->offset_);
int64_t allocated_size = static_cast<int64_t>(allocated_tensor->size_);
int64_t accumulator = 0;
for (auto block_tensor = block.m_start_tensor_; block_tensor != NULL; block_tensor = block_tensor->right_) {
int64_t end_placement = allocated_offset + allocated_size - accumulator;
if ((*constraints)[block_tensor->index_].IsBitTrue(allocated_tensor->index_) == false &&
end_placement > start_offset) {
l_interval.emplace_back(Interval(allocated_tensor));
break;
}
accumulator += block_tensor->size_;
}
allocated_tensor = allocated_tensor->right_;
}
}
}
// merge non-eligible intervals and find a slot to allocate the tensor block
if (!l_interval.empty()) {
stack<Interval> l_mergedIntervals;

@ -145,8 +145,6 @@ class FootPrint : public std::enable_shared_from_this<FootPrint> {
const size_t getOffset() { return m_offset_; }
void setOffset(const size_t &offset) { m_offset_ = offset; }
bool findOffset(const std::vector<DynamicBitSet> *constraints, const BlockTensor &block, size_t *offset);
void ConstrainedBLocks(const std::vector<DynamicBitSet> *constraints, const BlockTensor &b1, const BlockTensor &b2,
vector<Interval> *oInterval_l);
void Merge(vector<Interval> *l_interval, stack<Interval> *l_merged);
bool findFirst(stack<Interval> *merged, const BlockTensor &block, size_t *offset);
size_t Result();

@ -56,14 +56,13 @@ Status SomasSolverCore::MemoryAllocationSolver() {
branching_strategy_ = static_cast<FittingType>(branching_strategy);
Clean();
MS_LOG(DEBUG) << "Timing Start " << tensors_.size() << " Tensors";
start = std::chrono::system_clock::now();
auto start_upper = std::chrono::system_clock::now();
upperbound_ = FindSolutions();
MS_LOG(DEBUG)
<< "\nElapsed time of upper bound testing: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() - start).count()
<< " ms";
start = std::chrono::system_clock::now();
MS_LOG(DEBUG) << "Elapsed time of upper bound testing: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::system_clock::now() -
start_upper)
.count()
<< " ms";
if (upperbound_ > worst) {
worst = upperbound_;
}
@ -130,30 +129,6 @@ Status SomasSolverCore::Verify() {
return retval;
}
Status SomasSolverCore::Verify(unordered_map<size_t, SomasSolverTensorDescPtr> *pTensor_map) {
Status retval = SUCCESS;
if (NULL == pTensor_map) return retval;
MS_LOG(INFO) << "Verifying HQ Solution..";
MS_LOG(INFO) << "Checking tensors id, sizes..";
for (auto ptensor : *pTensor_map) {
if (tensors_.count(ptensor.first) == 0) {
MS_LOG(WARNING) << "HQ Tensor id " << ptensor.first << " does not exists";
} else if (tensors_[ptensor.first]->size_ != ptensor.second->size_) {
size_t HQ_index = ptensor.first;
size_t HQ_size = ptensor.second->size_;
size_t index = ptensor.first;
size_t size = tensors_[ptensor.first]->size_;
MS_LOG(WARNING) << "HQ Tensor Id: " << HQ_index << " with size: " << HQ_size
<< " is different from Tensor Id: " << index << " size: " << size;
}
}
MS_LOG(INFO) << "Checking HQ Solution..";
tensors_ = *pTensor_map;
retval = Verify(upperbound_) == 0 ? FAILED : SUCCESS;
return retval;
}
bool SomasSolverCore::Verify(const size_t &upperbound) {
auto start = std::chrono::system_clock::now();
bool retval = true;
@ -252,64 +227,56 @@ void SomasSolverCore::Clean() {
}
upperbound_ = SIZE_MAX;
}
static bool GreaterSizeSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
}
#ifdef SOMAS_DEBUG
static bool GreaterSizeGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
}
static bool GreaterSizeSmallerConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
}
static bool GreaterSizeSmallerConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ < t2.m_start_tensor_->constraints_) ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
}
static bool GreaterSizeGreaterConstraintsSmallerIndex(const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
}
static bool GreaterSizeGreaterConstraintsGreaterIndex(const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ > t2.m_start_tensor_->constraints_) ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->constraints_ == t2.m_start_tensor_->constraints_ &&
t1.m_start_tensor_->index_ > t2.m_start_tensor_->index_);
}
#endif
void SomasSolverCore::SortTensors() { // need to sort the tensors for Fast Heuristic
MS_LOG(DEBUG) << "Sorting Blocks of tensor, strategy: " << sorting_[sort_strategy_].c_str();
switch (sort_strategy_) {
case kGreaterSizeSmallerIndex: { // size(>), index(<)
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size_ > t2.m_size_ ||
(t1.m_size_ == t2.m_size_ && t1.m_start_tensor_->index_ < t2.m_start_tensor_->index_);
});
break;
}
typedef bool (*SortingFunction)(const BlockTensor &, const BlockTensor &);
std::unordered_map<SortingType, SortingFunction> sort_map;
sort_map[kGreaterSizeSmallerIndex] = &GreaterSizeSmallerIndex;
#ifdef SOMAS_DEBUG
case kGreaterSizeGreaterIndex: { // size(>), index(>)
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size > t2.m_size ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_);
});
break;
}
case kGreaterSizeSmallerConstraintsSmallerIndex: { // size(>), constraints(<), index(<)
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size > t2.m_size ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_);
});
break;
}
case kGreaterSizeSmallerConstraintsGreaterIndex: { // size(>), constraints(<), index(>)
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size > t2.m_size ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ < t2.m_pStartTensor->constraints_) ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_);
});
break;
}
case kGreaterSizeGreaterConstraintsSmallerIndex: { // size(>), constraints(>), index(<)
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size > t2.m_size ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
t1.m_pStartTensor->index_ < t2.m_pStartTensor->index_);
});
break;
}
case kGreaterSizeGreaterConstraintsGreaterIndex: { // // size(>), constraints(>), index(>)
sort(block_tensors_.begin(), block_tensors_.end(), [](const BlockTensor &t1, const BlockTensor &t2) {
return t1.m_size > t2.m_size ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ > t2.m_pStartTensor->constraints_) ||
(t1.m_size == t2.m_size && t1.m_pStartTensor->constraints_ == t2.m_pStartTensor->constraints_ &&
t1.m_pStartTensor->index_ > t2.m_pStartTensor->index_);
});
break;
}
sort_map[kGreaterSizeGreaterIndex] = &GreaterSizeGreaterIndex;
sort_map[kGreaterSizeSmallerConstraintsSmallerIndex] = &GreaterSizeSmallerConstraintsSmallerIndex;
sort_map[kGreaterSizeSmallerConstraintsGreaterIndex] = &GreaterSizeSmallerConstraintsGreaterIndex;
sort_map[kGreaterSizeGreaterConstraintsSmallerIndex] = &GreaterSizeGreaterConstraintsSmallerIndex;
sort_map[kGreaterSizeGreaterConstraintsGreaterIndex] = &GreaterSizeGreaterConstraintsGreaterIndex;
#endif
case kNumSortingTypes: { // no sorting case
break;
}
if (sort_strategy_ < kNumSortingTypes) {
sort(block_tensors_.begin(), block_tensors_.end(), *(sort_map[sort_strategy_]));
}
// log for debug purposes
for (auto &block : block_tensors_) block.log();

@ -53,7 +53,6 @@ class SomasSolverCore {
Status MemoryAllocationSolver();
Status Verify();
bool Verify(const size_t &);
Status Verify(unordered_map<size_t, SomasSolverTensorDescPtr> *);
void VerifySolution(const bool verify) { verify_ = verify; }
void SortTensors();
void BuildBlocks();

Loading…
Cancel
Save