Add topk op (#3760)

* init add * add topk op * someupdate * fix style check * add test py file * update top k cuda kernel * follow comments * remove debug print * fix casting error * fix casting error * fix casting error * fix rename bug... * fix travis
8 years ago · 3fbb692d4b
parent 2f40da0923
commit 3fbb692d4b
7 changed files with 516 additions and 1 deletions
--- a/paddle/operators/top_k_op.cc
+++ b/paddle/operators/top_k_op.cc
@ -0,0 +1,67 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/top_k_op.h"
+
+namespace paddle {
+namespace operators {
+
+class TopkOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"),
+                            "Input of TopkOP must be initialized.");
+    auto *input = ctx.Input<framework::Tensor>("X");
+    const int k = static_cast<int>(ctx.Attr<int>("k"));
+
+    PADDLE_ENFORCE_GE(k, 1, "k must >= 1");
+    PADDLE_ENFORCE_GE(input->dims().size(), 1, "input must have >= 1d shape");
+    PADDLE_ENFORCE_GE(input->dims()[input->dims().size() - 1], k,
+                      "input must have >= k columns");
+
+    framework::DDim dims = input->dims();
+    dims[dims.size() - 1] = k;
+    ctx.Output<Tensor>("Out")->Resize(dims);
+    ctx.Output<Tensor>("Indices")->Resize(dims);
+  }
+};
+
+class TopkOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  TopkOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The input of Topk op");
+    AddOutput("Out", "The output tensor of Topk op");
+    AddOutput("Indices", "The indices of Topk elements of input");
+    AddComment(
+        R"DOC(If the input is a vector (1d tensor), finds the k largest entries in the vector and outputs their values and indices as vectors. Thus values[j] is the j-th largest entry in input, and its index is indices[j].
+
+    For matrices, computes the top k entries in each row. )DOC");
+    AddAttr<int>("k",
+                 "Number of top elements to look for along the last "
+                 "dimension (along each row for matrices).")
+        .SetDefault(1);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(top_k, ops::TopkOp, ops::TopkOpMaker);
+REGISTER_OP_CPU_KERNEL(top_k,
+                       ops::TopkKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/top_k_op.cu
+++ b/paddle/operators/top_k_op.cu
--- a/paddle/operators/top_k_op.h
+++ b/paddle/operators/top_k_op.h
@ -0,0 +1,76 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <algorithm>
+#include <iostream>
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
+
+template <typename Place, typename T>
+class TopkKernel : public framework::OpKernel {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    // Get the top k elements of each row of input tensor
+    // FIXME: only deal with matrix(2d tensor).
+    auto* input = ctx.Input<Tensor>("X");
+    auto* output = ctx.Output<Tensor>("Out");
+    auto* indices = ctx.Output<Tensor>("Indices");
+    // k is determined by Attr
+    const size_t k = static_cast<int>(ctx.Attr<int>("k"));
+
+    T* output_data = output->mutable_data<T>(ctx.GetPlace());
+    T* indices_data = indices->mutable_data<T>(ctx.GetPlace());
+
+    auto eg_input = EigenMatrix<T>::From(*input);
+
+    // reshape input to a flattern matrix(like flat_inner_dims)
+    framework::DDim inputdims = input->dims();
+    const size_t row = framework::product(
+        framework::slice_ddim(inputdims, 0, inputdims.size() - 1));
+    const size_t col = inputdims[inputdims.size() - 1];
+    Eigen::DSizes<int, 2> flat2dims(row, col);
+    // NOTE: eigen shape doesn't affect paddle tensor.
+    eg_input.reshape(flat2dims);
+
+    for (size_t i = 0; i < row; i++) {
+      std::vector<std::pair<T, size_t>> vec;
+      for (size_t j = 0; j < col; j++) {
+        vec.push_back(std::pair<T, size_t>(eg_input(i, j), j));
+      }
+
+      std::partial_sort(
+          vec.begin(), vec.begin() + k, vec.end(),
+          [](const std::pair<T, size_t>& l, const std::pair<T, size_t>& r) {
+            return l.first > r.first;
+          });
+      for (size_t j = 0; j < k; j++) {
+        output_data[i * k + j] = vec[j].first;
+        indices_data[i * k + j] = vec[j].second;
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@ -49,6 +49,7 @@ USE_OP(minus);
 USE_OP(cos_sim);
 USE_CPU_ONLY_OP(gather);
 USE_CPU_ONLY_OP(scatter);
+USE_OP(top_k);
 USE_OP(squared_l2_distance);

 namespace paddle {
--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@ -37,7 +37,7 @@ Configuring cmake in /paddle/build ...
      -DWITH_PYTHON=${WITH_PYTHON:-ON}
      -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON}
      -DCUDNN_ROOT=/usr/
-      -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
+      -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON}
      -DWITH_TESTING=${WITH_TESTING:-ON}
      -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
 ========================================
--- a/python/paddle/v2/framework/tests/CMakeLists.txt
+++ b/python/paddle/v2/framework/tests/CMakeLists.txt
@ -17,6 +17,7 @@ py_test(test_cross_entropy_op SRCS test_cross_entropy_op.py)
 py_test(test_gather_op SRCS test_gather_op.py)
 py_test(test_scatter_op SRCS test_scatter_op.py)
 py_test(test_fill_zeros_like_op SRCS test_fill_zeros_like_op.py)
+py_test(test_top_k_op SRCS test_top_k_op.py)

 py_test(gradient_checker SRCS gradient_checker.py)

--- a/python/paddle/v2/framework/tests/test_top_k_op.py
+++ b/python/paddle/v2/framework/tests/test_top_k_op.py
@ -0,0 +1,52 @@
+import unittest
+import numpy as np
+from gradient_checker import GradientChecker, create_op
+from op_test_util import OpTestMeta
+
+
+class TestTopkOp(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "top_k"
+        k = 1
+        input = np.random.random((32, 84)).astype("float32")
+        output = np.ndarray((32, k))
+        indices = np.ndarray((32, k))
+
+        self.inputs = {'X': input}
+        self.attrs = {'k': k}
+
+        for rowid in xrange(32):
+            row = input[rowid]
+            output[rowid] = np.sort(row)[-k:]
+            indices[rowid] = row.argsort()[-k:]
+
+        self.outputs = {'Out': output, 'Indices': indices}
+
+
+class TestTopkOp3d(unittest.TestCase):
+    __metaclass__ = OpTestMeta
+
+    def setUp(self):
+        self.type = "top_k"
+        k = 1
+        input = np.random.random((32, 2, 84)).astype("float32")
+        input_flat_2d = input.reshape(64, 84)
+        output = np.ndarray((64, k))
+        indices = np.ndarray((64, k)).astype("int")
+
+        # FIXME: should use 'X': input for a 3d input
+        self.inputs = {'X': input_flat_2d}
+        self.attrs = {'k': k}
+
+        for rowid in xrange(64):
+            row = input_flat_2d[rowid]
+            output[rowid] = np.sort(row)[-k:]
+            indices[rowid] = row.argsort()[-k:]
+
+        self.outputs = {'Out': output, 'Indices': indices}
+
+
+if __name__ == '__main__':
+    unittest.main()