From 618c05d45490c7398ce3a14cf2bdb216eb47a704 Mon Sep 17 00:00:00 2001 From: kswang Date: Thu, 29 Oct 2020 21:02:39 +0800 Subject: [PATCH] optimize cpu unique --- .../kernel_compiler/cpu/unique_cpu_kernel.cc | 68 ++-- .../kernel_compiler/cpu/unique_cpu_kernel.h | 322 +++++++++++++++++- .../cpu/unique_with_pad_cpu_kernel.cc | 44 +-- .../cpu/unique_with_pad_cpu_kernel.h | 27 +- tests/st/ops/cpu/test_unique_op.py | 30 +- .../cpp/kernel/cpu/unique_cpu_kernel_test.cc | 19 +- .../cpu/unique_with_pad_cpu_kernel_test.cc | 22 +- 7 files changed, 440 insertions(+), 92 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc index 05e90f6a92..e367bbba3d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc @@ -19,45 +19,67 @@ namespace mindspore { namespace kernel { +const size_t kUseBucketUniqueSize = 100000; +const size_t kUniqueThreadNum = 23; void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) { CheckParam(kernel_node); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - n_ = input_shape[0]; + input_size_ = input_shape[0]; dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); } +void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t)); + workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t)); + workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t)); +} + bool UniqueCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspace*/, + const std::vector &workspace, const std::vector &outputs) { if (dtype_ == kNumberTypeInt32) { - LaunchKernel(inputs, outputs); - } else if (dtype_ == kNumberTypeFloat32) { - LaunchKernel(inputs, outputs); + LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt64) { - LaunchKernel(inputs, outputs); + LaunchKernel(inputs, workspace, outputs); + } else if (dtype_ == kNumberTypeFloat32) { + LaunchKernel(inputs, workspace, outputs); } return true; } -template -void UniqueCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &outputs) { - auto x_addr = reinterpret_cast(inputs[0]->addr); - auto y_addr = reinterpret_cast(outputs[0]->addr); - auto idx_addr = reinterpret_cast(outputs[1]->addr); - - std::unordered_map uniq; - int n = SizeToInt(n_); - uniq.reserve(n * 2); - for (int i = 0, j = 0; i < n; ++i) { - auto it = uniq.emplace(x_addr[i], j); - idx_addr[i] = it.first->second; - if (it.second) { - ++j; - } +template +void UniqueCPUKernel::LaunchKernel(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) { + if (input_size_ == 0) { + return; + } + if (inputs.size() < 1) { + MS_LOG(EXCEPTION) << "Input size should be large than 0"; + } + if (workspace.size() < 3) { + MS_LOG(EXCEPTION) << "workspace size should be large than 2"; + } + if (outputs.size() < 2) { + MS_LOG(EXCEPTION) << "Output size should be large than 1"; } - for (const auto &it : uniq) { - y_addr[it.second] = it.first; + auto params = std::make_shared>(); + params->input_ = reinterpret_cast(inputs[0]->addr); + params->input_idx_ = reinterpret_cast(workspace[0]->addr); + params->workspace_ = reinterpret_cast(workspace[1]->addr); + params->workspace_idx_ = reinterpret_cast(workspace[2]->addr); + params->output_ = reinterpret_cast(outputs[0]->addr); + params->inverse_idx_ = reinterpret_cast(outputs[1]->addr); + params->input_size_ = input_size_; + params->output_size_ = 0; + params->need_sort_ = true; + params->thread_num_ = kUniqueThreadNum; + if (input_size_ < kUseBucketUniqueSize) { + Unique(params); + } else { + BucketUnique(params); } + output_size_ = params->output_size_; } void UniqueCPUKernel::CheckParam(const CNodePtr &kernel_node) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h index 1fb5b299aa..339c87131b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h @@ -16,31 +16,339 @@ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_CPU_KERNEL_H_ -#include +#include #include +#include #include +#include #include "backend/kernel_compiler/cpu/cpu_kernel.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" namespace mindspore { namespace kernel { +template +struct UniqueParam { + DataType *input_{nullptr}; + IndexType *input_idx_{nullptr}; + DataType *output_{nullptr}; + IndexType *inverse_idx_{nullptr}; + DataType *workspace_{nullptr}; + IndexType *workspace_idx_{nullptr}; + IndexType input_size_{0}; + IndexType output_size_{0}; + size_t thread_num_{0}; + bool need_sort_{true}; +}; + class UniqueCPUKernel : public CPUKernel { public: UniqueCPUKernel() = default; ~UniqueCPUKernel() override = default; void InitKernel(const CNodePtr &kernel_node) override; - + void InitInputOutputSize(const CNodePtr &kernel_node) override; bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; - template - void LaunchKernel(const std::vector &inputs, const std::vector &outputs); + template + void LaunchKernel(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs); - private: - void CheckParam(const CNodePtr &kernel_node); - size_t n_{0}; + protected: + virtual void CheckParam(const CNodePtr &kernel_node); + size_t input_size_{0}; TypeId dtype_{kTypeUnknown}; + size_t output_size_{0}; + + template + static size_t BucketId(DataType data, size_t bucket_num) { + return static_cast(data) % bucket_num; + } + + template + static void CalculateEachBucketSize(const std::shared_ptr> ¶ms, + std::vector *each_bucket_size) { + MS_EXCEPTION_IF_NULL(params); + MS_EXCEPTION_IF_NULL(params->input_); + MS_EXCEPTION_IF_NULL(each_bucket_size); + size_t bucket_num = each_bucket_size->size(); + for (IndexType i = 0; i < params->input_size_; ++i) { + auto bucket_id = BucketId(params->input_[i], bucket_num); + each_bucket_size->at(bucket_id)++; + } + } + + template + static void SplitAndCalculateBucketSize( + const std::shared_ptr> ¶ms, + std::vector>> *segments_ptr, + std::vector>> *segment_bucket_sizes_ptr) { + MS_EXCEPTION_IF_NULL(params); + MS_EXCEPTION_IF_NULL(params->input_); + MS_EXCEPTION_IF_NULL(segments_ptr); + MS_EXCEPTION_IF_NULL(segment_bucket_sizes_ptr); + auto &segments = *segments_ptr; + auto &segment_bucket_sizes = *segment_bucket_sizes_ptr; + + IndexType input_size = params->input_size_; + size_t thread_num = params->thread_num_; + if (thread_num < 1) { + MS_LOG(EXCEPTION) << "Thread num must > 0 !"; + } + IndexType thread_data_size = input_size / thread_num; + size_t left_data_size = input_size % thread_num; + std::vector threads; + threads.reserve(thread_num); + segments.reserve(thread_num); + segment_bucket_sizes.reserve(thread_num); + IndexType current_offset = 0; + for (size_t i = 0; i < thread_num; ++i) { + segment_bucket_sizes.emplace_back(std::make_shared>(thread_num, 0)); + IndexType data_size = thread_data_size; + if (i < left_data_size) { + data_size += 1; + } + segments.emplace_back(std::make_shared>()); + segments[i]->input_ = params->input_ + current_offset; + segments[i]->input_size_ = data_size; + segments[i]->thread_num_ = thread_num; + threads.emplace_back( + std::thread(CalculateEachBucketSize, segments[i], segment_bucket_sizes[i].get())); + current_offset += data_size; + } + for (size_t i = 0; i < params->thread_num_; ++i) { + threads[i].join(); + } + } + + template + static void SegmentToBuckets(const std::shared_ptr> &segment, + IndexType segment_offset, + const std::vector>> &buckets) { + MS_LOG(DEBUG) << "Start"; + MS_EXCEPTION_IF_NULL(segment); + MS_EXCEPTION_IF_NULL(segment->input_); + std::vector bucket_data_num(segment->thread_num_, 0); + auto bucket_size = buckets.size(); + for (IndexType i = 0; i < segment->input_size_; ++i) { + DataType data = segment->input_[i]; + auto bucket_id = BucketId(data, segment->thread_num_); + auto bucket_index = bucket_data_num[bucket_id]; + if (bucket_id >= bucket_size) { + MS_LOG(ERROR) << "Error bucket id!"; + continue; + } + auto &bucket = buckets[bucket_id]; + MS_EXCEPTION_IF_NULL(bucket); + if (bucket_index >= bucket->input_size_) { + MS_LOG(ERROR) << "Error bucket index!"; + continue; + } + bucket->input_[bucket_index] = data; + bucket->workspace_idx_[bucket_index] = segment_offset + i; + bucket_data_num[bucket_id]++; + } + MS_LOG(DEBUG) << "End"; + } + + template + static void GatherSegmentsToBuckets(const std::shared_ptr> ¶ms, + std::vector>> *segments_ptr, + std::vector>> *segment_bucket_sizes_ptr, + std::vector>> *buckets_ptr) { + MS_LOG(DEBUG) << "Start"; + MS_EXCEPTION_IF_NULL(params); + MS_EXCEPTION_IF_NULL(params->workspace_); + MS_EXCEPTION_IF_NULL(params->inverse_idx_); + MS_EXCEPTION_IF_NULL(params->workspace_idx_); + MS_EXCEPTION_IF_NULL(params->output_); + MS_EXCEPTION_IF_NULL(params->input_idx_); + MS_EXCEPTION_IF_NULL(segments_ptr); + MS_EXCEPTION_IF_NULL(segment_bucket_sizes_ptr); + MS_EXCEPTION_IF_NULL(buckets_ptr); + auto &segments = *segments_ptr; + auto &segment_bucket_sizes = *segment_bucket_sizes_ptr; + auto &buckets = *buckets_ptr; + auto thread_num = segments.size(); + buckets.reserve(thread_num); + std::vector bucket_data_size(thread_num, 0); + for (size_t i = 0; i < thread_num; ++i) { + for (size_t j = 0; j < thread_num; ++j) { + bucket_data_size[j] += segment_bucket_sizes[i]->at(j); + } + } + + IndexType current_offset = 0; + for (size_t i = 0; i < thread_num; ++i) { + auto bucket = std::make_shared>(); + bucket->input_ = params->output_ + current_offset; + bucket->input_idx_ = params->inverse_idx_ + current_offset; + bucket->workspace_idx_ = params->workspace_idx_ + current_offset; + bucket->output_ = params->workspace_ + current_offset; + bucket->inverse_idx_ = params->input_idx_ + current_offset; + bucket->input_size_ = bucket_data_size[i]; + current_offset += bucket_data_size[i]; + buckets.emplace_back(bucket); + } + std::vector tmp_bucket_data_size(thread_num, 0); + std::vector>>> thread_buckets; + for (size_t i = 0; i < thread_num; ++i) { + std::vector>> local_buckets; + for (size_t j = 0; j < thread_num; ++j) { + auto bucket = std::make_shared>(); + bucket->input_ = buckets[j]->input_ + tmp_bucket_data_size[j]; + bucket->input_size_ = buckets[j]->input_size_ - tmp_bucket_data_size[j]; + bucket->workspace_idx_ = buckets[j]->workspace_idx_ + tmp_bucket_data_size[j]; + local_buckets.emplace_back(bucket); + tmp_bucket_data_size[j] += segment_bucket_sizes[i]->at(j); + } + thread_buckets.emplace_back(local_buckets); + } + std::vector threads; + threads.reserve(thread_num); + current_offset = 0; + for (size_t i = 0; i < thread_num; ++i) { + MS_EXCEPTION_IF_NULL(segments[i]); + threads.emplace_back( + std::thread(SegmentToBuckets, segments[i], current_offset, thread_buckets[i])); + current_offset += segments[i]->input_size_; + } + for (size_t i = 0; i < thread_num; ++i) { + threads[i].join(); + } + MS_LOG(DEBUG) << "End"; + } + + template + static void Unique(const std::shared_ptr> ¶ms) { + MS_LOG(DEBUG) << "Start"; + MS_EXCEPTION_IF_NULL(params); + DataType *input = params->input_; + IndexType *input_idx = params->input_idx_; + DataType *output = params->output_; + IndexType *inverse_idx = params->inverse_idx_; + MS_EXCEPTION_IF_NULL(input); + MS_EXCEPTION_IF_NULL(input_idx); + MS_EXCEPTION_IF_NULL(output); + MS_EXCEPTION_IF_NULL(inverse_idx); + IndexType j = 0; + if (params->need_sort_) { + for (IndexType i = 0; i < params->input_size_; ++i) { + input_idx[i] = i; + } + std::sort(input_idx, input_idx + params->input_size_, + [&](IndexType left, IndexType right) { return input[left] < input[right]; }); + DataType last = input[0]; + for (IndexType i = 0; i < params->input_size_; ++i) { + auto curr = input[input_idx[i]]; + if (i == 0 || curr != last) { + if (i != 0) { + j++; + } + output[j] = curr; + inverse_idx[input_idx[i]] = j; + last = curr; + } else { + inverse_idx[input_idx[i]] = j; + } + } + params->output_size_ = j + 1; + } else { + std::unordered_map uniq; + uniq.reserve(params->input_size_); + for (IndexType i = 0; i < params->input_size_; ++i) { + auto it = uniq.emplace(input[i], j); + inverse_idx[i] = it.first->second; + if (it.second) { + ++j; + } + } + for (const auto &it : uniq) { + output[it.second] = it.first; + } + params->output_size_ = j; + } + MS_LOG(DEBUG) << "End"; + } + + template + static void UniqueEachBucket(const std::vector>> &buckets) { + MS_LOG(DEBUG) << "Start"; + size_t thread_num = buckets.size(); + std::vector threads; + threads.reserve(thread_num); + for (size_t i = 0; i < thread_num; ++i) { + threads.emplace_back(std::thread(Unique, buckets[i])); + } + for (size_t i = 0; i < thread_num; ++i) { + threads[i].join(); + } + MS_LOG(DEBUG) << "End"; + } + + template + static void TransformBucketReverseIndices(const std::shared_ptr> &bucket, + const std::shared_ptr> &result, + IndexType offset) { + MS_EXCEPTION_IF_NULL(bucket); + MS_EXCEPTION_IF_NULL(bucket->inverse_idx_); + MS_EXCEPTION_IF_NULL(bucket->workspace_idx_); + MS_EXCEPTION_IF_NULL(result); + MS_EXCEPTION_IF_NULL(result->inverse_idx_); + for (IndexType i = 0; i < bucket->input_size_; ++i) { + auto origin_idx = bucket->workspace_idx_[i]; + if (origin_idx >= 0 && origin_idx < result->input_size_) { + result->inverse_idx_[origin_idx] = bucket->inverse_idx_[i] + offset; + } + } + } + + template + static void MergeBuckets(const std::vector>> &buckets, + const std::shared_ptr> &result) { + MS_LOG(DEBUG) << "Start"; + MS_EXCEPTION_IF_NULL(result); + MS_EXCEPTION_IF_NULL(result->output_); + size_t thread_num = buckets.size(); + std::vector bucket_offsets(thread_num); + IndexType current_size = 0; + for (size_t i = 0; i < thread_num; ++i) { + auto bucket = buckets[i]; + MS_EXCEPTION_IF_NULL(bucket); + MS_EXCEPTION_IF_NULL(bucket->output_); + bucket_offsets[i] = current_size; + auto ret_code = memcpy_s(result->output_ + current_size, (result->input_size_ - current_size) * sizeof(DataType), + bucket->output_, bucket->output_size_ * sizeof(DataType)); + if (ret_code != EOK) { + MS_LOG(EXCEPTION) << "Failed to copy data!"; + } + current_size += bucket->output_size_; + } + result->output_size_ = current_size; + + std::vector threads; + threads.reserve(thread_num); + for (size_t i = 0; i < thread_num; ++i) { + threads.emplace_back( + std::thread(TransformBucketReverseIndices, buckets[i], result, bucket_offsets[i])); + } + for (size_t i = 0; i < thread_num; ++i) { + threads[i].join(); + } + MS_LOG(DEBUG) << "End"; + } + + template + static void BucketUnique(const std::shared_ptr> ¶ms) { + MS_EXCEPTION_IF_NULL(params); + std::vector>> segments; + std::vector>> buckets; + std::vector>> segment_bucket_sizes; + SplitAndCalculateBucketSize(params, &segments, &segment_bucket_sizes); + GatherSegmentsToBuckets(params, &segments, &segment_bucket_sizes, &buckets); + UniqueEachBucket(buckets); + MergeBuckets(buckets, params); + } }; MS_REG_CPU_KERNEL( diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc index 31e1e3195a..dbefe6ed90 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc @@ -19,49 +19,33 @@ namespace mindspore { namespace kernel { -void UniqueWithPadCPUKernel::InitKernel(const CNodePtr &kernel_node) { - CheckParam(kernel_node); - auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - n_ = SizeToLong(input_shape[0]); - dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); -} - bool UniqueWithPadCPUKernel::Launch(const std::vector &inputs, - const std::vector & /*workspace*/, + const std::vector &workspace, const std::vector &outputs) { + UniqueCPUKernel::Launch(inputs, workspace, outputs); if (dtype_ == kNumberTypeInt32) { - LaunchKernel(inputs, outputs); + PadOutput(inputs, outputs); } else if (dtype_ == kNumberTypeInt64) { - LaunchKernel(inputs, outputs); - } else { - MS_LOG(EXCEPTION) << "Only unsupported int32 or int64 dtype"; + PadOutput(inputs, outputs); + } else if (dtype_ == kNumberTypeFloat32) { + PadOutput(inputs, outputs); } return true; } template -void UniqueWithPadCPUKernel::LaunchKernel(const std::vector &inputs, - const std::vector &outputs) { - T *a = reinterpret_cast(inputs[0]->addr); +void UniqueWithPadCPUKernel::PadOutput(const std::vector &inputs, const std::vector &outputs) { + if (inputs.size() < 2) { + MS_LOG(EXCEPTION) << "Input size should be large than 1"; + } + if (outputs.size() < 1) { + MS_LOG(EXCEPTION) << "Output size should be large than 0"; + } T pad_num = *reinterpret_cast(inputs[1]->addr); T *out = reinterpret_cast(outputs[0]->addr); - T *idx_vec = reinterpret_cast(outputs[1]->addr); - - for (int64_t i = 0; i < n_; ++i) { + for (size_t i = output_size_; i < input_size_; ++i) { out[i] = pad_num; } - std::unordered_map uniq; - uniq.reserve(n_); - for (int64_t i = 0, j = 0; i < n_; ++i) { - auto it = uniq.emplace(a[i], j); - idx_vec[i] = it.first->second; - if (it.second) { - ++j; - } - } - for (const auto &it : uniq) { - out[it.second] = it.first; - } } void UniqueWithPadCPUKernel::CheckParam(const CNodePtr &kernel_node) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h index 23683a9c4d..759ef8b4df 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h @@ -16,31 +16,26 @@ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_WITH_PAD_CPU_KERNEL_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_WITH_PAD_CPU_KERNEL_H_ -#include #include #include +#include #include "backend/kernel_compiler/cpu/cpu_kernel.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" +#include "backend/kernel_compiler/cpu/unique_cpu_kernel.h" namespace mindspore { namespace kernel { -class UniqueWithPadCPUKernel : public CPUKernel { +class UniqueWithPadCPUKernel : public UniqueCPUKernel { public: UniqueWithPadCPUKernel() = default; ~UniqueWithPadCPUKernel() override = default; - - void InitKernel(const CNodePtr &kernel_node) override; - bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) override; - template - void LaunchKernel(const std::vector &inputs, const std::vector &outputs); + void PadOutput(const std::vector &inputs, const std::vector &outputs); - private: - void CheckParam(const CNodePtr &kernel_node); - int64_t n_{0}; - TypeId dtype_{kTypeUnknown}; + protected: + void CheckParam(const CNodePtr &kernel_node) override; }; MS_REG_CPU_KERNEL(UniqueWithPad, @@ -56,7 +51,15 @@ MS_REG_CPU_KERNEL(UniqueWithPad, .AddInputAttr(kNumberTypeInt64) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeInt64) - .AddOutputAttr(kNumberTypeInt64), + .AddOutputAttr(kNumberTypeInt32), + UniqueWithPadCPUKernel); + +MS_REG_CPU_KERNEL(UniqueWithPad, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeInt32), UniqueWithPadCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/tests/st/ops/cpu/test_unique_op.py b/tests/st/ops/cpu/test_unique_op.py index 0ad55b7808..4bf888bb11 100644 --- a/tests/st/ops/cpu/test_unique_op.py +++ b/tests/st/ops/cpu/test_unique_op.py @@ -33,7 +33,7 @@ class Net(nn.Cell): return self.uniq(x) -def test_net(): +def test_net_fp32(): x = Tensor(np.array([1, 2, 5, 2]), mstype.float32) uniq = Net() output = uniq(x) @@ -45,3 +45,31 @@ def test_net(): assert (output[0].asnumpy() == expect_y_result).all() assert (output[1].asnumpy() == expect_idx_result).all() + + +def test_net_int32(): + x = Tensor(np.array([1, 2, 5, 2]), mstype.int32) + uniq = Net() + output = uniq(x) + print("x:\n", x) + print("y:\n", output[0]) + print("idx:\n", output[1]) + expect_y_result = [1, 2, 5] + expect_idx_result = [0, 1, 2, 1] + + assert (output[0].asnumpy() == expect_y_result).all() + assert (output[1].asnumpy() == expect_idx_result).all() + + +def test_net_int64(): + x = Tensor(np.array([1, 2, 5, 2]), mstype.int64) + uniq = Net() + output = uniq(x) + print("x:\n", x) + print("y:\n", output[0]) + print("idx:\n", output[1]) + expect_y_result = [1, 2, 5] + expect_idx_result = [0, 1, 2, 1] + + assert (output[0].asnumpy() == expect_y_result).all() + assert (output[1].asnumpy() == expect_idx_result).all() diff --git a/tests/ut/cpp/kernel/cpu/unique_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/unique_cpu_kernel_test.cc index 943a1ace96..e835b24a80 100644 --- a/tests/ut/cpp/kernel/cpu/unique_cpu_kernel_test.cc +++ b/tests/ut/cpp/kernel/cpu/unique_cpu_kernel_test.cc @@ -29,7 +29,7 @@ class UniqueCpuKernelTest : public UT::Common { UniqueCpuKernelTest() : unique_(std::make_shared()) {} void SetUp() override { - unique_->n_ = 9; + unique_->input_size_ = 9; unique_->dtype_ = kNumberTypeFloat32; inputs_.clear(); workspace_.clear(); @@ -42,16 +42,19 @@ class UniqueCpuKernelTest : public UT::Common { return kernel_addr; } - void CreateInputAddress() { inputs_.push_back(CreateKernelAddress(x_.data())); } - - void CreateOutputAddress() { + void CreateAddress() { + inputs_.push_back(CreateKernelAddress(x_.data())); outputs_.push_back(CreateKernelAddress(y_.data())); outputs_.push_back(CreateKernelAddress(idx_.data())); + workspace_.push_back(CreateKernelAddress(workspace_idx_.data())); + workspace_.push_back(CreateKernelAddress(workspace_idx_.data())); + workspace_.push_back(CreateKernelAddress(workspace_idx_.data())); } std::vector x_; std::vector y_; - std::vector idx_; + std::vector idx_; + std::vector workspace_idx_; std::vector inputs_; std::vector workspace_; std::vector outputs_; @@ -62,13 +65,13 @@ TEST_F(UniqueCpuKernelTest, compute_test) { x_ = {1, 1, 2, 4, 4, 4, 7, 8, 8}; y_ = {1, 1, 1, 1, 1}; idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1}; - CreateInputAddress(); - CreateOutputAddress(); + workspace_idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + CreateAddress(); unique_->Launch(inputs_, workspace_, outputs_); // check compute result std::vector expect_y{1, 2, 4, 7, 8}; - std::vector expect_idx{0, 0, 1, 2, 2, 2, 3, 4, 4}; + std::vector expect_idx{0, 0, 1, 2, 2, 2, 3, 4, 4}; EXPECT_TRUE(y_ == expect_y); EXPECT_TRUE(idx_ == expect_idx); } diff --git a/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc index c9d1177f57..25d5727cf2 100644 --- a/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc +++ b/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc @@ -29,7 +29,7 @@ class UniqueWithPadCpuKernelTest : public UT::Common { UniqueWithPadCpuKernelTest() : unique_with_pad_(std::make_shared()) {} void SetUp() override { - unique_with_pad_->n_ = 10; + unique_with_pad_->input_size_ = 10; unique_with_pad_->dtype_ = kNumberTypeInt64; inputs_.clear(); workspace_.clear(); @@ -42,21 +42,21 @@ class UniqueWithPadCpuKernelTest : public UT::Common { return kernel_addr; } - void CreateInputAddress() { + void CreateAddress() { inputs_.push_back(CreateKernelAddress(x_.data())); inputs_.push_back(CreateKernelAddress(&pad_dim_)); - ; - } - - void CreateOutputAddress() { outputs_.push_back(CreateKernelAddress(out_.data())); outputs_.push_back(CreateKernelAddress(idx_.data())); + workspace_.push_back(CreateKernelAddress(workspace_idx_.data())); + workspace_.push_back(CreateKernelAddress(workspace_idx_.data())); + workspace_.push_back(CreateKernelAddress(workspace_idx_.data())); } std::vector x_; int64_t pad_dim_; std::vector out_; - std::vector idx_; + std::vector idx_; + std::vector workspace_idx_; std::vector inputs_; std::vector workspace_; std::vector outputs_; @@ -68,13 +68,13 @@ TEST_F(UniqueWithPadCpuKernelTest, compute_test) { pad_dim_ = 8; out_ = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - CreateInputAddress(); - CreateOutputAddress(); + workspace_idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + CreateAddress(); unique_with_pad_->Launch(inputs_, workspace_, outputs_); // check compute result - std::vector expect_out{1, 5, 4, 3, 2, 8, 8, 8, 8, 8}; - std::vector expect_idx{0, 0, 1, 1, 2, 2, 3, 3, 4, 4}; + std::vector expect_out{1, 2, 3, 4, 5, 8, 8, 8, 8, 8}; + std::vector expect_idx{0, 0, 4, 4, 3, 3, 2, 2, 1, 1}; EXPECT_TRUE(out_ == expect_out); EXPECT_TRUE(idx_ == expect_idx); }