|
|
|
@ -186,9 +186,17 @@ void ArithmeticCPUKernel::LaunchLess(const std::vector<AddressPtr> &inputs, cons
|
|
|
|
|
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
|
|
|
|
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
|
if (thread_num < 1) {
|
|
|
|
|
MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
threads.reserve(thread_num);
|
|
|
|
|
size_t start = 0;
|
|
|
|
|
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
|
|
|
|
|
if (once_compute_size < 1) {
|
|
|
|
|
MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
while (start < lens) {
|
|
|
|
|
size_t end = (start + once_compute_size) > lens ? lens : (start + once_compute_size);
|
|
|
|
|
threads.emplace_back(std::thread(&ArithmeticCPUKernel::Less<T>, this, input1, input2, output, start, end));
|
|
|
|
@ -214,11 +222,15 @@ void ArithmeticCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, co
|
|
|
|
|
size_t thread_num = lens < 128 * max_thread_num ? std::ceil(lens / 128.0) : max_thread_num;
|
|
|
|
|
MS_LOG(INFO) << "Lens=" << lens << "; use thread_num=" << thread_num << "; max_thread_num: " << max_thread_num;
|
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
|
if (thread_num < 1) {
|
|
|
|
|
MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
threads.reserve(thread_num);
|
|
|
|
|
size_t start = 0;
|
|
|
|
|
size_t once_compute_size = (lens + thread_num - 1) / thread_num;
|
|
|
|
|
if (thread_num < 1 || once_compute_size < 1) {
|
|
|
|
|
MS_LOG(ERROR) << "Invalid value: thread_num " << thread_num << "; once_compute_size " << once_compute_size;
|
|
|
|
|
if (once_compute_size < 1) {
|
|
|
|
|
MS_LOG(ERROR) << "Invalid value: once_compute_size " << once_compute_size;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
while (start < lens) {
|
|
|
|
|