diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.cc index c141f18ba1..dc55d66e42 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.cc @@ -29,5 +29,8 @@ MS_REG_GPU_KERNEL_TWO( MS_REG_GPU_KERNEL_TWO( Unique, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), UniqueGpuKernel, int, int) +MS_REG_GPU_KERNEL_TWO( + Unique, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + UniqueGpuKernel, int64_t, int64_t) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unique_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unique_impl.cu index 9b08d55204..e7a151e8f3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unique_impl.cu +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/unique_impl.cu @@ -72,3 +72,5 @@ template int CalUnique(const half *input, int num_elements, int *inpu half *output, int *index, cudaStream_t cuda_stream); template int CalUnique(const int *input, int num_elements, int *input_index, int *sorted_index, int *output, int *index, cudaStream_t cuda_stream); +template int CalUnique(const int64_t *input, int num_elements, int64_t *input_index, + int64_t *sorted_index, int64_t *output, int64_t *index, cudaStream_t cuda_stream); diff --git a/tests/st/ops/gpu/test_unique_op.py b/tests/st/ops/gpu/test_unique_op.py index a5070ac93f..1ab835cd9a 100644 --- a/tests/st/ops/gpu/test_unique_op.py +++ b/tests/st/ops/gpu/test_unique_op.py @@ -267,3 +267,65 @@ def test_unique_dynamic(): assert (x_idx2.asnumpy() == expt_index2).all() for i, out in enumerate(x_split2): assert (out.asnumpy() == expt_split2[i]).all() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_unique_1d_int64(): + x = Tensor(np.array([4, 5, 1, 2, 3, 3, 4, 5]).astype(np.int64)) + exp_output = np.array([1, 2, 3, 4, 5]).astype(np.int64) + exp_idx = np.array([3, 4, 0, 1, 2, 2, 3, 4]).astype(np.int64) + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = NetUnique() + x_unique, x_idx = net(x) + print(x_unique) + print(x_idx) + assert (x_unique.asnumpy() == exp_output).all() + assert (x_idx.asnumpy() == exp_idx).all() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_unique_1d_sorted_int64(): + x = Tensor(np.array([1, 1, 2, 4, 4, 4, 7, 8, 8]).astype(np.int64)) + exp_output = np.array([1, 2, 4, 7, 8]).astype(np.int64) + exp_idx = np.array([0, 0, 1, 2, 2, 2, 3, 4, 4]).astype(np.int64) + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = NetUnique() + x_unique, x_idx = net(x) + assert (x_unique.asnumpy() == exp_output).all() + assert (x_idx.asnumpy() == exp_idx).all() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_unique_zeros_int64(): + x = Tensor(np.zeros(1000).astype(np.int64)) + exp_output = np.zeros(1).astype(np.int64) + exp_idx = np.zeros(1000).astype(np.int64) + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = NetUnique() + x_unique, x_idx = net(x) + assert (x_unique.asnumpy() == exp_output).all() + assert (x_idx.asnumpy() == exp_idx).all() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_unique_large_int64(): + x_np1 = np.arange(100) + x_np2 = np.arange(100, 200) + x_np3 = np.arange(200, 300) + x_np = np.concatenate((x_np1, x_np2, x_np3, x_np1, x_np2, x_np3, x_np1, x_np2, x_np3)) + x = Tensor(x_np.astype(np.int64)) + exp_output = np.arange(300).astype(np.int64) + exp_idx = np.concatenate((x_np1, x_np2, x_np3, x_np1, x_np2, x_np3, x_np1, x_np2, x_np3)).astype(np.int64) + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = NetUnique() + x_unique, x_idx = net(x) + assert (x_unique.asnumpy() == exp_output).all() + assert (x_idx.asnumpy() == exp_idx).all()