add unique kernel and op (#17557)

6 years ago · 206c44e2a8
parent 71af72b1c2
commit 206c44e2a8
5 changed files with 257 additions and 0 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -202,6 +202,7 @@ paddle.fluid.layers.stack (ArgSpec(args=['x', 'axis'], varargs=None, keywords=No
 paddle.fluid.layers.pad2d (ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None)), ('document', '3f3abdb795a5c2aad8c2312249551ce5'))
 paddle.fluid.layers.unstack (ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b0c4ca08d4eb295189e1b107c920d093'))
 paddle.fluid.layers.sequence_enumerate (ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)), ('document', 'b870fed41abd2aecf929ece65f555fa1'))
 paddle.fluid.layers.unique (ArgSpec(args=['x', 'dtype'], varargs=None, keywords=None, defaults=('int32',)), ('document', 'cab0b06e5683875f12f0efc62fa230a9'))
 paddle.fluid.layers.expand (ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '33bc4f6010282ffe044d77be7ba7c275'))
 paddle.fluid.layers.sequence_concat (ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b992616c1afbd6b0c2a897ac23036381'))
 paddle.fluid.layers.scale (ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)), ('document', '463e4713806e5adaa4d20a41e2218453'))
--- a/paddle/fluid/operators/unique_op.cc
+++ b/paddle/fluid/operators/unique_op.cc
@ -0,0 +1,61 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/unique_op.h"
 namespace paddle {
 namespace operators {
 class UniqueOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of UniqueOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   "Output(Out) of UniqueOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("Index"),
                   "Output(Index) of UniqueOp should not be null.");
    auto in_dims = ctx->GetInputDim("X");
    PADDLE_ENFORCE(in_dims.size() == 1, "Input(X) should be a vector.");
    ctx->SetOutputDim("Out", {-1});
    ctx->SetOutputDim("Index", in_dims);
  }
 };
 class UniqueOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "Input tensor. It should be a 1-D tensor.");
    AddAttr<int>("dtype", "data type for output index");
    AddOutput("Out", "A unique subsequence for input tensor.");
    AddOutput("Index",
              "An index tensor pointing to unique subsequence, which has "
              "identical shape with input tensor and int64 dtype.");
    AddComment(R"DOC(
    Return a unique subsequence for 1-D input tensor, and an index tensor pointing to this unique subsequence
 )DOC");
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP_WITHOUT_GRADIENT(unique, ops::UniqueOp, ops::UniqueOpMaker);
 REGISTER_OP_CPU_KERNEL(unique, ops::UniqueKernel<float>,
                       ops::UniqueKernel<double>, ops::UniqueKernel<int32_t>,
                       ops::UniqueKernel<int64_t>);
--- a/paddle/fluid/operators/unique_op.h
+++ b/paddle/fluid/operators/unique_op.h
@ -0,0 +1,83 @@
 /* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <cmath>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
 namespace paddle {
 namespace operators {
 template <typename InT>
 struct UniqueOpFunctor {
  framework::Tensor* out_;
  framework::Tensor* index_;
  const framework::Tensor* in_;
  UniqueOpFunctor(framework::Tensor* out, framework::Tensor* index,
                  const framework::Tensor* in)
      : out_(out), index_(index), in_(in) {}
  template <typename IndexT>
  void apply() const {
    auto* in_data = in_->data<InT>();
    auto* index_data = index_->mutable_data<IndexT>(platform::CPUPlace());
    int64_t j = 0;
    // TODO(fangzeyang): Should optimize performance here.
    std::unordered_map<InT, int64_t> dict;
    std::vector<InT> uniq;
    PADDLE_ENFORCE(in_->numel() < pow(2, 31),
                   "numel of Unique op input should less than INT_MAX");
    for (auto i = 0; i < in_->numel(); i++) {
      auto it = dict.find(in_data[i]);
      if (it == dict.end()) {
        dict.insert(std::make_pair(in_data[i], j));
        uniq.push_back(in_data[i]);
        index_data[i] = static_cast<IndexT>(j);
        j++;
      } else {
        index_data[i] = static_cast<IndexT>(it->second);
      }
    }
    out_->Resize(framework::make_ddim({static_cast<int64_t>(uniq.size())}));
    auto out_data = out_->mutable_data<InT>(platform::CPUPlace());
    std::memcpy(out_data, uniq.data(), uniq.size() * sizeof(InT));
  }
 };
 template <typename T>
 class UniqueKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto data_type = static_cast<framework::proto::VarType::Type>(
        context.Attr<int>("dtype"));
    auto* x = context.Input<framework::Tensor>("X");
    auto* out = context.Output<framework::Tensor>("Out");
    auto* index = context.Output<framework::Tensor>("Index");
    framework::VisitDataType(data_type, UniqueOpFunctor<T>(out, index, x));
  }
 };
 }  // namespace operators
 }  // namespace paddle
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -145,6 +145,7 @@ __all__ = [
    'pad2d',
    'unstack',
    'sequence_enumerate',
    'unique',
    'expand',
    'sequence_concat',
    'scale',
@ -12068,6 +12069,45 @@ def sign(x):
    return out
 def unique(x, dtype='int32'):
    """
    **unique** 
    Return a unique tensor for `x` and an index tensor pointing to this unique tensor.
    Args:
        x(Variable): A 1-D input tensor.
        dtype(np.dtype|core.VarDesc.VarType|str): The type of index tensor: int32, int64.
    Returns:
        tuple: (out, index). `out` is the unique tensor for `x`, with identical dtype to `x`, and \
            `index` is an index tensor pointing to `out`, by which user can recover the original `x` tensor.
    Examples:
        .. code-block:: python
             import numpy as np
             import paddle.fluid as fluid
             x = fluid.assign(np.array([2, 3, 3, 1, 5, 3], dtype='int32'))
             out, index = fluid.layers.unique(x) # out is [2, 3, 1, 5]; index is [0, 1, 1, 2, 3, 1]
    """
    helper = LayerHelper("unique", **locals())
    out = helper.create_variable_for_type_inference(dtype=x.dtype)
    index = helper.create_variable_for_type_inference(dtype)
    helper.append_op(
        type='unique',
        inputs={'X': x},
        attrs={'dtype': convert_np_dtype_to_dtype_(dtype)},
        outputs={'Out': [out],
                 'Index': [index]})
    return out, index
 def deformable_conv(input,
                    offset,
                    mask,
--- a/python/paddle/fluid/tests/unittests/test_unique.py
+++ b/python/paddle/fluid/tests/unittests/test_unique.py
@ -0,0 +1,72 @@
 #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import unittest
 import numpy as np
 from op_test import OpTest
 import paddle.fluid.core as core
 from paddle.fluid.op import Operator
 class TestUniqueOp(OpTest):
    def setUp(self):
        self.op_type = "unique"
        self.init_config()
    def test_check_output(self):
        self.check_output()
    def init_config(self):
        self.inputs = {'X': np.array([2, 3, 3, 1, 5, 3], dtype='int64'), }
        self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)}
        self.outputs = {
            'Out': np.array(
                [2, 3, 1, 5], dtype='int64'),
            'Index': np.array(
                [0, 1, 1, 2, 3, 1], dtype='int32')
        }
 class TestOne(TestUniqueOp):
    def init_config(self):
        self.inputs = {'X': np.array([2], dtype='int64'), }
        self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)}
        self.outputs = {
            'Out': np.array(
                [2], dtype='int64'),
            'Index': np.array(
                [0], dtype='int32')
        }
 class TestRandom(TestUniqueOp):
    def init_config(self):
        self.inputs = {'X': np.random.randint(0, 100, (150, ), dtype='int64')}
        self.attrs = {'dtype': int(core.VarDesc.VarType.INT64)}
        np_unique, np_index, reverse_index = np.unique(self.inputs['X'], True,
                                                       True)
        np_tuple = [(np_unique[i], np_index[i]) for i in range(len(np_unique))]
        np_tuple.sort(key=lambda x: x[1])
        target_out = np.array([i[0] for i in np_tuple], dtype='int64')
        target_index = np.array(
            [list(target_out).index(i) for i in self.inputs['X']],
            dtype='int64')
        self.outputs = {'Out': target_out, 'Index': target_index}
 if __name__ == "__main__":
    unittest.main()