add detection_output code only

7 years ago · ca535d18ab
parent 6665c49299
commit ca535d18ab
4 changed files with 518 additions and 0 deletions
--- a/paddle/operators/detection_output_op.cc
+++ b/paddle/operators/detection_output_op.cc
@ -0,0 +1,91 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+Indicesou may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/detection_output_op.h"
+namespace paddle {
+namespace operators {
+
+class Detection_output_OpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  Detection_output_OpMaker(framework::OpProto* proto,
+                           framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput(
+        "Loc",
+        "(Tensor) The input tensor of detection_output operator. "
+        "The format of input tensor is NCHW. Where N is batch size, C is the "
+        "number of channels, H and W is the height and width of feature.");
+    AddInput(
+        "Conf",
+        "(Tensor) The input tensor of detection_output operator. "
+        "The format of input tensor is NCHW. Where N is batch size, C is the "
+        "number of channels, H and W is the height and width of feature.");
+    AddInput(
+        "PriorBox",
+        "(Tensor) The input tensor of detection_output operator. "
+        "The format of input tensor is NCHW. Where N is batch size, C is the "
+        "number of channels, H and W is the height and width of feature.");
+    AddOutput("Out",
+              "(Tensor) The output tensor of detection_output operator."
+              "N * M."
+              "M = C * H * W");
+    AddAttr<int>("background_label_id", "(int), multi level pooling");
+    AddAttr<int>("num_classes", "(int), multi level pooling");
+    AddAttr<float>("nms_threshold", "(int), multi level pooling");
+    AddAttr<float>("confidence_threshold", "(int), multi level pooling");
+    AddAttr<int>("top_k", "(int), multi level pooling");
+    AddAttr<int>("nms_top_k", "(int), multi level pooling");
+    AddComment(R"DOC(
+        "Does spatial pyramid pooling on the input image by taking the max,
+        etc. within regions so that the result vector of different sized
+        images are of the same size
+        Input shape: $(N, C_{in}, H_{in}, W_{in})$
+        Output shape: $(H_{out}, W_{out})$
+        Where
+          $$
+            H_{out} = N \\
+            W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in}
+          $$
+        )DOC");
+  }
+};
+
+class Detection_output_Op : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of Detection_output_Op"
+                   "should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of Detection_output_Op should not be null.");
+    auto in_x_dims = ctx->GetInputDim("X");
+    int pyramid_height = ctx->Attrs().Get<int>("pyramid_height");
+    PADDLE_ENFORCE(in_x_dims.size() == 4,
+                   "Detection_output_ing intput must be of 4-dimensional.");
+    int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1];
+    std::vector<int64_t> output_shape({in_x_dims[0], outlen});
+    ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(detection_output, ops::Detection_output_Op,
+                             ops::Detection_output_OpMaker);
+REGISTER_OP_CPU_KERNEL(
+    detection_output,
+    ops::Detection_output_Kernel<paddle::platform::CPUPlace, float>,
+    ops::Detection_output_Kernel<paddle::platform::CPUPlace, double>);
--- a/paddle/operators/detection_output_op.cu.cc
+++ b/paddle/operators/detection_output_op.cu.cc
@ -0,0 +1,21 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+Indicesou may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/detection_output_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(
+    detection_output,
+    ops::Detection_output_Kernel<paddle::platform::GPUPlace, float>,
+    ops::Detection_output_Kernel<paddle::platform::GPUPlace, double>);
--- a/paddle/operators/detection_output_op.h
+++ b/paddle/operators/detection_output_op.h
@ -0,0 +1,114 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+Indicesou may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/tensor.h"
+#include "paddle/operators/math/detection_util.h"
+#include "paddle/operators/math/math_function.h"
+#include "paddle/operators/math/softmax.h"
+
+namespace paddle {
+namespace operators {
+template <typename Place, typename T>
+class Detection_output_Kernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    const framework::Tensor* in_loc = context.Input<framework::Tensor>("Loc");
+    const framework::Tensor* in_conf = context.Input<framework::Tensor>("Conf");
+    const framework::Tensor* in_priorbox =
+        context.Input<framework::Tensor>("PriorBox");
+    auto* out = context.Output<framework::Tensor>("Out");
+    int num_classes = context.template Attr<int>("num_classes");
+    int top_k = context.template Attr<int>("top_k");
+    int nms_top_k = context.template Attr<int>("nms_top_k");
+    int background_label_id = context.template Attr<int>("background_label_id");
+    float nms_threshold = context.template Attr<float>("nms_threshold");
+    float confidence_threshold =
+        context.template Attr<float>("confidence_threshold");
+
+    int input_num = in_loc->dims()[0];
+    int batch_size = in_loc->dims()[1];
+    int loc_sum_size = in_loc->numel();
+    int conf_sum_size = in_conf->numel();
+    std::vector<int64_t> loc_shape_vec({1, loc_sum_size});
+    std::vector<int64_t> conf_shape_vec(
+        {conf_sum_size / num_classes, num_classes});
+    framework::DDim loc_shape(framework::make_ddim(loc_shape_vec));
+    framework::DDim conf_shape(framework::make_ddim(conf_shape_vec));
+    framework::Tensor loc_tensor;
+    framework::Tensor conf_tensor;
+    loc_tensor.mutable_data<T>(loc_shape, context.GetPlace());
+    conf_tensor.mutable_data<T>(conf_shape, context.GetPlace());
+
+    // KNCHW ==> NHWC
+    for (int i = 0; i < input_num; ++i) {
+      math::appendWithPermute<T>(*in_loc, &loc_tensor);
+      math::appendWithPermute<T>(*in_conf, &conf_tensor);
+    }
+    // softmax
+    math::SoftmaxFunctor<Place, T>()(context.device_context(), &conf_tensor,
+                                     &conf_tensor);
+    // get decode bboxes
+    size_t num_priors = in_priorbox->numel() / 8;
+    std::vector<std::vector<operators::math::BBox<T>>> all_decoded_bboxes;
+    for (size_t n = 0; n < batch_size; ++n) {
+      std::vector<operators::math::BBox<T>> decoded_bboxes;
+      for (size_t i = 0; i < num_priors; ++i) {
+        size_t prior_offset = i * 8;
+        size_t loc_pred_offset = n * num_priors * 4 + i * 4;
+        std::vector<math::BBox<T>> prior_bbox_vec;
+        math::getBBoxFromPriorData<T>(in_priorbox->data<T>() + prior_offset, 1,
+                                      prior_bbox_vec);
+        std::vector<std::vector<T>> prior_bbox_var;
+        math::getBBoxVarFromPriorData<T>(in_priorbox->data<T>() + prior_offset,
+                                         1, prior_bbox_var);
+        std::vector<T> loc_pred_data;
+        for (size_t j = 0; j < 4; ++j)
+          loc_pred_data.push_back(
+              *(loc_tensor.data<T>() + loc_pred_offset + j));
+        math::BBox<T> bbox = math::decodeBBoxWithVar<T>(
+            prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data);
+        decoded_bboxes.push_back(bbox);
+      }
+      all_decoded_bboxes.push_back(decoded_bboxes);
+    }
+
+    std::vector<std::map<size_t, std::vector<size_t>>> all_indices;
+    int num_kept = math::getDetectionIndices<T>(
+        conf_tensor.data<T>(), num_priors, num_classes, background_label_id,
+        batch_size, confidence_threshold, nms_top_k, nms_threshold, top_k,
+        all_decoded_bboxes, &all_indices);
+
+    framework::Tensor out_tmp;
+    if (num_kept <= 0) {
+      std::vector<int64_t> out_shape_vec({0, 0});
+      framework::DDim out_shape(framework::make_ddim(out_shape_vec));
+      out->Resize(out_shape);
+      return;
+    }
+    std::vector<int64_t> out_shape_vec({num_kept, 7});
+    framework::DDim out_shape(framework::make_ddim(out_shape_vec));
+    out_tmp.mutable_data<T>(out_shape, context.GetPlace());
+
+    T* out_data = out_tmp.data<T>();
+    math::getDetectionOutput<T>(conf_tensor.data<T>(), num_kept, num_priors,
+                                num_classes, batch_size, all_indices,
+                                all_decoded_bboxes, out_data);
+    out->mutable_data<T>(out_shape, context.GetPlace());
+    out->ShareDataWith(out_tmp);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/operators/math/detection_util.h
+++ b/paddle/operators/math/detection_util.h