From 853b3a3f30b75c900db5a959921c9eb8980d4b70 Mon Sep 17 00:00:00 2001 From: kswang Date: Sat, 12 Dec 2020 17:20:43 +0800 Subject: [PATCH] modify some cpu kernel thread num --- .../kernel_compiler/cpu/adam_delta_cpu_kernel.cc | 4 ++++ .../cpu/embedding_look_up_cpu_kernel.cc | 10 +++++++--- .../cpu/scatter_nd_update_cpu_kernel.cc | 8 ++++++-- .../cpu/sparse_optimizer_cpu_kernel.h | 12 ++++++++---- .../backend/kernel_compiler/cpu/unique_cpu_kernel.cc | 6 +++++- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc index 657cec2781..f694b9987a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_delta_cpu_kernel.cc @@ -24,7 +24,11 @@ namespace mindspore { namespace kernel { constexpr size_t kAdamDeltaInputSize = 9; +#ifdef ENABLE_D constexpr size_t kUsedThreadNum = 23; +#else +constexpr size_t kUsedThreadNum = 8; +#endif namespace { struct ComputeParam { float *delta_{nullptr}; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc index 8a0c9ed812..71cb8c1a11 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/embedding_look_up_cpu_kernel.cc @@ -22,6 +22,11 @@ namespace mindspore { namespace kernel { namespace { +#ifdef ENABLE_D +constexpr size_t kUsedThreadNum = 23; +#else +constexpr size_t kUsedThreadNum = 8; +#endif template void LookUpTableTask(const float *input_addr, const T *indices_addr, float *output_addr, size_t indices_lens, size_t outer_dim_size, T offset, size_t first_dim_size) { @@ -92,10 +97,9 @@ void EmbeddingLookUpCPUKernel::LaunchKernel(const std::vector(inputs[0]->addr); auto indices_addr = reinterpret_cast(inputs[1]->addr); auto output_addr = reinterpret_cast(outputs[0]->addr); - const size_t kMaxThreadNum = 16; size_t thread_num = indices_lens_ / 10000 + 1; - thread_num = thread_num > kMaxThreadNum ? kMaxThreadNum : thread_num; - std::thread threads[kMaxThreadNum]; + thread_num = thread_num > kUsedThreadNum ? kUsedThreadNum : thread_num; + std::thread threads[kUsedThreadNum]; size_t task_proc_lens = (indices_lens_ + thread_num - 1) / thread_num; size_t i; size_t task_offset = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc index 4f36be4efe..a21bcd9968 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_update_cpu_kernel.cc @@ -22,6 +22,11 @@ namespace mindspore { namespace kernel { namespace { +#ifdef ENABLE_D +constexpr size_t kUsedThreadNum = 23; +#else +constexpr size_t kUsedThreadNum = 8; +#endif template void Compute(const ComputeParams *params, const size_t start, const size_t end) { MS_EXCEPTION_IF_NULL(params); @@ -115,10 +120,9 @@ void ScatterNdUpdateCPUKernel::LaunchKernel(const std::vector &input params.indices_unit_rank_ = indices_unit_rank_; params.out_strides_ = &out_strides_; - const size_t thread_num = 24; std::vector tasks; size_t start = 0; - size_t once_compute_size = (num_units_ + thread_num - 1) / thread_num; + size_t once_compute_size = (num_units_ + kUsedThreadNum - 1) / kUsedThreadNum; while (start < num_units_) { size_t end = (start + once_compute_size) > num_units_ ? num_units_ : (start + once_compute_size); auto task = [¶ms, start, end]() -> int { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h index 060218b2d7..455ebab18c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sparse_optimizer_cpu_kernel.h @@ -27,6 +27,11 @@ namespace mindspore { namespace kernel { +#ifdef ENABLE_D +constexpr size_t kUsedThreadNum = 23; +#else +constexpr size_t kUsedThreadNum = 8; +#endif template struct SparseGradient { float *value_{nullptr}; @@ -95,7 +100,7 @@ class SparseOptimizerCPUKernel : public CPUKernel { static void BucketReduceSparseGradient(const ReduceSparseGradientParam ¶m) { MS_LOG(DEBUG) << "Start"; MS_EXCEPTION_IF_NULL(param.input_grad_); - size_t thread_num = 23; + size_t thread_num = kUsedThreadNum; if (param.input_grad_->indices_size_ < thread_num) { thread_num = param.input_grad_->indices_size_; } @@ -120,11 +125,10 @@ class SparseOptimizerCPUKernel : public CPUKernel { template void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, size_t total_compute_size) const { - const size_t kThreadNum = 24; std::vector threads; - threads.reserve(kThreadNum); + threads.reserve(kUsedThreadNum); size_t start = 0; - size_t once_compute_size = (total_compute_size + kThreadNum - 1) / kThreadNum; + size_t once_compute_size = (total_compute_size + kUsedThreadNum - 1) / kUsedThreadNum; while (start < total_compute_size) { size_t end = (start + once_compute_size) > total_compute_size ? total_compute_size : (start + once_compute_size); threads.emplace_back(std::thread(func, params, start, end)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc index cd030c7180..9266522092 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc @@ -20,7 +20,11 @@ namespace mindspore { namespace kernel { const size_t kUseBucketUniqueSize = 100000; -const size_t kUniqueThreadNum = 23; +#ifdef ENABLE_D +constexpr size_t kUniqueThreadNum = 23; +#else +constexpr size_t kUniqueThreadNum = 8; +#endif void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) { node_ = kernel_node; CheckParam(kernel_node);