!2762 Adjust the thread num to compute optimizer of cpu kernel

Merge pull request !2762 from YuJianfeng/master
pull/2762/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 71f28e5b7e

@ -876,12 +876,13 @@ bool IsWeightBoundary(const AnfNodePtr &node) {
return false;
}
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t thread_num,
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
size_t total_compute_size) {
const size_t kThreadNum = 24;
std::vector<std::thread> threads;
threads.reserve(thread_num);
threads.reserve(kThreadNum);
size_t start = 0;
size_t once_compute_size = (total_compute_size + thread_num - 1) / thread_num;
size_t once_compute_size = (total_compute_size + kThreadNum - 1) / kThreadNum;
while (start < total_compute_size) {
size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size);
threads.emplace_back(std::thread(func, params, start, end));

@ -128,7 +128,7 @@ void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr>
bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
bool IsWeightBoundary(const AnfNodePtr &node);
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t thread_num,
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
size_t total_compute_size);
} // namespace kernel
} // namespace mindspore

@ -155,15 +155,14 @@ bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
input_params.v_ = v;
input_params.beta1_ = beta1;
input_params.beta2_ = beta2;
const size_t kThreadNum = 16;
MultiThreadCompute(ComputeMomentum, &input_params, kThreadNum, total_dim_size);
MultiThreadCompute(ComputeMomentum, &input_params, total_dim_size);
input_params.m_t_ = m_t;
input_params.use_nesterov_ = use_nesterov_;
input_params.sparse_grad_ = unique_sparse_grad;
input_params.var_first_dim_size_ = var_first_dim_size_;
input_params.var_outer_dim_size_ = var_outer_dim_size_;
MultiThreadCompute(ComputeAdam, &input_params, kThreadNum, unique_sparse_grad.indices_size_);
MultiThreadCompute(ComputeAdam, &input_params, unique_sparse_grad.indices_size_);
if (use_nesterov_) {
input_params.m_ = input_params.m_t_;
@ -171,7 +170,7 @@ bool SparseApplyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
input_params.var_ = var;
input_params.lr_ = lr;
input_params.epsilon_ = epsilon;
MultiThreadCompute(ComputeWeight, &input_params, kThreadNum, total_dim_size);
MultiThreadCompute(ComputeWeight, &input_params, total_dim_size);
return true;
}
} // namespace kernel

@ -145,8 +145,7 @@ bool SparseApplyFtrlCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inp
input_params.sparse_grad_ = unique_sparse_grad;
input_params.var_first_dim_size_ = var_first_dim_size_;
input_params.var_outer_dim_size_ = var_outer_dim_size_;
const size_t kThreadNum = 16;
MultiThreadCompute(ComputeFtrl, &input_params, kThreadNum, unique_sparse_grad.indices_size_);
MultiThreadCompute(ComputeFtrl, &input_params, unique_sparse_grad.indices_size_);
return true;
}
} // namespace kernel

@ -139,8 +139,7 @@ bool SparseApplyLazyAdamCPUKernel::Launch(const std::vector<kernel::AddressPtr>
input_params.sparse_grad_ = unique_sparse_grad;
input_params.var_first_dim_size_ = var_first_dim_size_;
input_params.var_outer_dim_size_ = var_outer_dim_size_;
const size_t kThreadNum = 16;
MultiThreadCompute(ComputeLazyAdam, &input_params, kThreadNum, unique_sparse_grad.indices_size_);
MultiThreadCompute(ComputeLazyAdam, &input_params, unique_sparse_grad.indices_size_);
return true;
}
} // namespace kernel

@ -132,8 +132,7 @@ bool SparseApplyProximalAdagradCPUKernel::Launch(const std::vector<kernel::Addre
input_params.sparse_grad_ = unique_sparse_grad;
input_params.var_first_dim_size_ = var_first_dim_size_;
input_params.var_outer_dim_size_ = var_outer_dim_size_;
const size_t kThreadNum = 16;
MultiThreadCompute(ComputeProximalAdagrad, &input_params, kThreadNum, unique_sparse_grad.indices_size_);
MultiThreadCompute(ComputeProximalAdagrad, &input_params, unique_sparse_grad.indices_size_);
return true;
}
} // namespace kernel

Loading…
Cancel
Save