optimize cpu unique

pull/7999/head
kswang 5 years ago
parent 7d250f2218
commit 618c05d454

@ -19,45 +19,67 @@
namespace mindspore {
namespace kernel {
const size_t kUseBucketUniqueSize = 100000;
const size_t kUniqueThreadNum = 23;
void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
n_ = input_shape[0];
input_size_ = input_shape[0];
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
}
void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
}
bool UniqueCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float>(inputs, outputs);
LaunchKernel<int, int>(inputs, workspace, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
LaunchKernel<int64_t, int>(inputs, workspace, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float, int>(inputs, workspace, outputs);
}
return true;
}
template <typename T>
void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
auto x_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto y_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto idx_addr = reinterpret_cast<int64_t *>(outputs[1]->addr);
std::unordered_map<T, int64_t> uniq;
int n = SizeToInt(n_);
uniq.reserve(n * 2);
for (int i = 0, j = 0; i < n; ++i) {
auto it = uniq.emplace(x_addr[i], j);
idx_addr[i] = it.first->second;
if (it.second) {
++j;
}
template <typename DataType, typename IndexType>
void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
if (input_size_ == 0) {
return;
}
if (inputs.size() < 1) {
MS_LOG(EXCEPTION) << "Input size should be large than 0";
}
if (workspace.size() < 3) {
MS_LOG(EXCEPTION) << "workspace size should be large than 2";
}
if (outputs.size() < 2) {
MS_LOG(EXCEPTION) << "Output size should be large than 1";
}
for (const auto &it : uniq) {
y_addr[it.second] = it.first;
auto params = std::make_shared<UniqueParam<DataType, IndexType>>();
params->input_ = reinterpret_cast<DataType *>(inputs[0]->addr);
params->input_idx_ = reinterpret_cast<IndexType *>(workspace[0]->addr);
params->workspace_ = reinterpret_cast<DataType *>(workspace[1]->addr);
params->workspace_idx_ = reinterpret_cast<IndexType *>(workspace[2]->addr);
params->output_ = reinterpret_cast<DataType *>(outputs[0]->addr);
params->inverse_idx_ = reinterpret_cast<IndexType *>(outputs[1]->addr);
params->input_size_ = input_size_;
params->output_size_ = 0;
params->need_sort_ = true;
params->thread_num_ = kUniqueThreadNum;
if (input_size_ < kUseBucketUniqueSize) {
Unique(params);
} else {
BucketUnique(params);
}
output_size_ = params->output_size_;
}
void UniqueCPUKernel::CheckParam(const CNodePtr &kernel_node) {

@ -19,49 +19,33 @@
namespace mindspore {
namespace kernel {
void UniqueWithPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
n_ = SizeToLong(input_shape[0]);
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
}
bool UniqueWithPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
UniqueCPUKernel::Launch(inputs, workspace, outputs);
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
PadOutput<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Only unsupported int32 or int64 dtype";
PadOutput<int64_t>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
PadOutput<float>(inputs, outputs);
}
return true;
}
template <typename T>
void UniqueWithPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
T *a = reinterpret_cast<T *>(inputs[0]->addr);
void UniqueWithPadCPUKernel::PadOutput(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
if (inputs.size() < 2) {
MS_LOG(EXCEPTION) << "Input size should be large than 1";
}
if (outputs.size() < 1) {
MS_LOG(EXCEPTION) << "Output size should be large than 0";
}
T pad_num = *reinterpret_cast<T *>(inputs[1]->addr);
T *out = reinterpret_cast<T *>(outputs[0]->addr);
T *idx_vec = reinterpret_cast<T *>(outputs[1]->addr);
for (int64_t i = 0; i < n_; ++i) {
for (size_t i = output_size_; i < input_size_; ++i) {
out[i] = pad_num;
}
std::unordered_map<T, int> uniq;
uniq.reserve(n_);
for (int64_t i = 0, j = 0; i < n_; ++i) {
auto it = uniq.emplace(a[i], j);
idx_vec[i] = it.first->second;
if (it.second) {
++j;
}
}
for (const auto &it : uniq) {
out[it.second] = it.first;
}
}
void UniqueWithPadCPUKernel::CheckParam(const CNodePtr &kernel_node) {

@ -16,31 +16,26 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_WITH_PAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_WITH_PAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include <unordered_map>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/unique_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class UniqueWithPadCPUKernel : public CPUKernel {
class UniqueWithPadCPUKernel : public UniqueCPUKernel {
public:
UniqueWithPadCPUKernel() = default;
~UniqueWithPadCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void PadOutput(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
private:
void CheckParam(const CNodePtr &kernel_node);
int64_t n_{0};
TypeId dtype_{kTypeUnknown};
protected:
void CheckParam(const CNodePtr &kernel_node) override;
};
MS_REG_CPU_KERNEL(UniqueWithPad,
@ -56,7 +51,15 @@ MS_REG_CPU_KERNEL(UniqueWithPad,
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
.AddOutputAttr(kNumberTypeInt32),
UniqueWithPadCPUKernel);
MS_REG_CPU_KERNEL(UniqueWithPad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeInt32),
UniqueWithPadCPUKernel);
} // namespace kernel
} // namespace mindspore

@ -33,7 +33,7 @@ class Net(nn.Cell):
return self.uniq(x)
def test_net():
def test_net_fp32():
x = Tensor(np.array([1, 2, 5, 2]), mstype.float32)
uniq = Net()
output = uniq(x)
@ -45,3 +45,31 @@ def test_net():
assert (output[0].asnumpy() == expect_y_result).all()
assert (output[1].asnumpy() == expect_idx_result).all()
def test_net_int32():
x = Tensor(np.array([1, 2, 5, 2]), mstype.int32)
uniq = Net()
output = uniq(x)
print("x:\n", x)
print("y:\n", output[0])
print("idx:\n", output[1])
expect_y_result = [1, 2, 5]
expect_idx_result = [0, 1, 2, 1]
assert (output[0].asnumpy() == expect_y_result).all()
assert (output[1].asnumpy() == expect_idx_result).all()
def test_net_int64():
x = Tensor(np.array([1, 2, 5, 2]), mstype.int64)
uniq = Net()
output = uniq(x)
print("x:\n", x)
print("y:\n", output[0])
print("idx:\n", output[1])
expect_y_result = [1, 2, 5]
expect_idx_result = [0, 1, 2, 1]
assert (output[0].asnumpy() == expect_y_result).all()
assert (output[1].asnumpy() == expect_idx_result).all()

@ -29,7 +29,7 @@ class UniqueCpuKernelTest : public UT::Common {
UniqueCpuKernelTest() : unique_(std::make_shared<UniqueCPUKernel>()) {}
void SetUp() override {
unique_->n_ = 9;
unique_->input_size_ = 9;
unique_->dtype_ = kNumberTypeFloat32;
inputs_.clear();
workspace_.clear();
@ -42,16 +42,19 @@ class UniqueCpuKernelTest : public UT::Common {
return kernel_addr;
}
void CreateInputAddress() { inputs_.push_back(CreateKernelAddress(x_.data())); }
void CreateOutputAddress() {
void CreateAddress() {
inputs_.push_back(CreateKernelAddress(x_.data()));
outputs_.push_back(CreateKernelAddress(y_.data()));
outputs_.push_back(CreateKernelAddress(idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
}
std::vector<float> x_;
std::vector<float> y_;
std::vector<int64_t> idx_;
std::vector<int> idx_;
std::vector<int64_t> workspace_idx_;
std::vector<AddressPtr> inputs_;
std::vector<AddressPtr> workspace_;
std::vector<AddressPtr> outputs_;
@ -62,13 +65,13 @@ TEST_F(UniqueCpuKernelTest, compute_test) {
x_ = {1, 1, 2, 4, 4, 4, 7, 8, 8};
y_ = {1, 1, 1, 1, 1};
idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateInputAddress();
CreateOutputAddress();
workspace_idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateAddress();
unique_->Launch(inputs_, workspace_, outputs_);
// check compute result
std::vector<float> expect_y{1, 2, 4, 7, 8};
std::vector<int64_t> expect_idx{0, 0, 1, 2, 2, 2, 3, 4, 4};
std::vector<int> expect_idx{0, 0, 1, 2, 2, 2, 3, 4, 4};
EXPECT_TRUE(y_ == expect_y);
EXPECT_TRUE(idx_ == expect_idx);
}

@ -29,7 +29,7 @@ class UniqueWithPadCpuKernelTest : public UT::Common {
UniqueWithPadCpuKernelTest() : unique_with_pad_(std::make_shared<UniqueWithPadCPUKernel>()) {}
void SetUp() override {
unique_with_pad_->n_ = 10;
unique_with_pad_->input_size_ = 10;
unique_with_pad_->dtype_ = kNumberTypeInt64;
inputs_.clear();
workspace_.clear();
@ -42,21 +42,21 @@ class UniqueWithPadCpuKernelTest : public UT::Common {
return kernel_addr;
}
void CreateInputAddress() {
void CreateAddress() {
inputs_.push_back(CreateKernelAddress(x_.data()));
inputs_.push_back(CreateKernelAddress(&pad_dim_));
;
}
void CreateOutputAddress() {
outputs_.push_back(CreateKernelAddress(out_.data()));
outputs_.push_back(CreateKernelAddress(idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
}
std::vector<int64_t> x_;
int64_t pad_dim_;
std::vector<int64_t> out_;
std::vector<int64_t> idx_;
std::vector<int> idx_;
std::vector<int64_t> workspace_idx_;
std::vector<AddressPtr> inputs_;
std::vector<AddressPtr> workspace_;
std::vector<AddressPtr> outputs_;
@ -68,13 +68,13 @@ TEST_F(UniqueWithPadCpuKernelTest, compute_test) {
pad_dim_ = 8;
out_ = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateInputAddress();
CreateOutputAddress();
workspace_idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateAddress();
unique_with_pad_->Launch(inputs_, workspace_, outputs_);
// check compute result
std::vector<int64_t> expect_out{1, 5, 4, 3, 2, 8, 8, 8, 8, 8};
std::vector<int64_t> expect_idx{0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
std::vector<int64_t> expect_out{1, 2, 3, 4, 5, 8, 8, 8, 8, 8};
std::vector<int> expect_idx{0, 0, 4, 4, 3, 3, 2, 2, 1, 1};
EXPECT_TRUE(out_ == expect_out);
EXPECT_TRUE(idx_ == expect_idx);
}

Loading…
Cancel
Save