Merge pull request #15356 from jerrywgz/add_clip_op

Add box clip op
6 years ago · 1743d1a58f
parent 43a67a2662 4f18a9b87b
commit 1743d1a58f
9 changed files with 368 additions and 0 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -325,6 +325,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None
 paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0))
 paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None))
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@ -31,6 +31,7 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
 polygon_box_transform_op.cu)
 detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
 detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
 detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
 detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc)
 if(WITH_GPU)
--- a/paddle/fluid/operators/detection/bbox_util.h
+++ b/paddle/fluid/operators/detection/bbox_util.h
@ -99,5 +99,29 @@ void BboxOverlaps(const framework::Tensor& r_boxes,
  }
 }
 template <class T>
 void ClipTiledBoxes(const platform::DeviceContext& ctx,
                    const framework::Tensor& im_info,
                    const framework::Tensor& input_boxes,
                    framework::Tensor* out) {
  T* out_data = out->mutable_data<T>(ctx.GetPlace());
  const T* im_info_data = im_info.data<T>();
  const T* input_boxes_data = input_boxes.data<T>();
  T zero(0);
  T im_w = round(im_info_data[1] / im_info_data[2]);
  T im_h = round(im_info_data[0] / im_info_data[2]);
  for (int64_t i = 0; i < input_boxes.numel(); ++i) {
    if (i % 4 == 0) {
      out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
    } else if (i % 4 == 1) {
      out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
    } else if (i % 4 == 2) {
      out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
    } else {
      out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
    }
  }
 }
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@ -0,0 +1,86 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/operators/detection/box_clip_op.h"
 #include "paddle/fluid/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 class BoxClipOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("Input"),
                   "Input(Input) of BoxClipOp should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
                   "Input(ImInfo) of BoxClipOp should not be null.");
    auto input_box_dims = ctx->GetInputDim("Input");
    auto im_info_dims = ctx->GetInputDim("ImInfo");
    if (ctx->IsRuntime()) {
      auto input_box_size = input_box_dims.size();
      PADDLE_ENFORCE_EQ(input_box_dims[input_box_size - 1], 4,
                        "The last dimension of Input must be 4");
      PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
                        "The rank of Input(Input) in BoxClipOp must be 2");
      PADDLE_ENFORCE_EQ(im_info_dims[1], 3,
                        "The last dimension of ImInfo must be 3");
    }
    ctx->ShareDim("Input", /*->*/ "Output");
    ctx->ShareLoD("Input", /*->*/ "Output");
  }
 };
 class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("Input",
             "(LoDTensor) "
             "Input is a LoDTensor with shape [..., 4] holds 4 points"
             "in last dimension in format [xmin, ymin, xmax, ymax]");
    AddInput("ImInfo",
             "(Tensor) Information for image reshape is in shape (N, 3), "
             "in format (height, width, im_scale)");
    AddOutput("Output",
              "(LoDTensor) "
              "Output is a LoDTensor with the same shape as Input"
              "and it is the result after clip");
    AddComment(R"DOC(
 This operator clips input boxes to original input images.
 For each input box, The formula is given as follows:
       $$xmin = \max(\min(xmin, im_w - 1), 0)$$
       $$ymin = \max(\min(ymin, im_h - 1), 0)$$     
       $$xmax = \max(\min(xmax, im_w - 1), 0)$$
       $$ymax = \max(\min(ymax, im_h - 1), 0)$$
 where im_w and im_h are computed from ImInfo, the formula is given as follows:
       $$im_w = \round(width / im_scale)$$
       $$im_h = \round(height / im_scale)$$ 
 )DOC");
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(box_clip, ops::BoxClipOp, ops::BoxClipOpMaker,
                  paddle::framework::EmptyGradOpMaker);
 REGISTER_OP_CPU_KERNEL(
    box_clip, ops::BoxClipKernel<paddle::platform::CPUDeviceContext, float>,
    ops::BoxClipKernel<paddle::platform::CPUDeviceContext, double>);
--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@ -0,0 +1,74 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include <algorithm>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/detection/box_clip_op.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/platform/cuda_primitives.h"
 #include "paddle/fluid/platform/hostdevice.h"
 namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
 using LoDTenso = framework::LoDTensor;
 static constexpr int ImInfoSize = 3;
 template <typename T, int BlockSize>
 static __global__ void GPUBoxClip(const T *input, const size_t *lod,
                                  const size_t width, const T *im_info,
                                  T *output) {
  T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
                 im_info[blockIdx.x * ImInfoSize + 2]);
  T im_h = round(im_info[blockIdx.x * ImInfoSize] /
                 im_info[blockIdx.x * ImInfoSize + 2]);
  for (int i = threadIdx.x; i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * width;
       i += BlockSize) {
    int idx = lod[blockIdx.x] * width + i;
    T im_size = (idx % 2 == 0) ? im_w : im_h;
    output[idx] = max(min(input[idx], im_size - 1), T(0.));
  }
 }
 template <typename DeviceContext, typename T>
 class GPUBoxClipKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &context) const override {
    PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
                   "This kernel only runs on GPU device.");
    auto *input = context.Input<LoDTensor>("Input");
    auto *im_info = context.Input<Tensor>("ImInfo");
    auto *output = context.Output<LoDTensor>("Output");
    const int64_t num = input->dims()[0];
    const int64_t bbox_width = input->numel() / num;
    auto lod = input->lod();
    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
    auto &dev_ctx = context.template device_context<DeviceContext>();
    auto stream = dev_ctx.stream();
    const size_t batch_size = lod.back().size() - 1;
    T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
    GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
        input->data<T>(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()),
        bbox_width, im_info->data<T>(), output_data);
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(
    box_clip, ops::GPUBoxClipKernel<paddle::platform::CUDADeviceContext, float>,
    ops::GPUBoxClipKernel<paddle::platform::CUDADeviceContext, double>);
--- a/paddle/fluid/operators/detection/box_clip_op.h
+++ b/paddle/fluid/operators/detection/box_clip_op.h
@ -0,0 +1,50 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <string>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/detection/bbox_util.h"
 #include "paddle/fluid/operators/math/math_function.h"
 namespace paddle {
 namespace operators {
 using Tensor = framework::Tensor;
 using LoDTensor = framework::LoDTensor;
 template <typename DeviceContext, typename T>
 class BoxClipKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& context) const override {
    auto* input_box = context.Input<LoDTensor>("Input");
    auto* im_info = context.Input<LoDTensor>("ImInfo");
    auto* output_box = context.Output<LoDTensor>("Output");
    auto& dev_ctx =
        context.template device_context<platform::CPUDeviceContext>();
    output_box->mutable_data<T>(context.GetPlace());
    if (input_box->lod().size()) {
      PADDLE_ENFORCE_EQ(input_box->lod().size(), 1UL,
                        "Only support 1 level of LoD.");
    }
    auto box_lod = input_box->lod().back();
    int64_t n = static_cast<int64_t>(box_lod.size() - 1);
    for (int i = 0; i < n; ++i) {
      Tensor im_info_slice = im_info->Slice(i, i + 1);
      Tensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]);
      Tensor output_slice = output_box->Slice(box_lod[i], box_lod[i + 1]);
      ClipTiledBoxes<T>(dev_ctx, im_info_slice, box_slice, &output_slice);
    }
  }
 };
 }  // namespace operators
 }  // namespace paddle
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@ -49,6 +49,7 @@ __all__ = [
    'box_coder',
    'polygon_box_transform',
    'yolov3_loss',
    'box_clip',
    'multiclass_nms',
 ]
@ -2055,6 +2056,54 @@ def generate_proposals(scores,
    return rpn_rois, rpn_roi_probs
 def box_clip(input, im_info, name=None):
    """
    Clip the box into the size given by im_info
    For each input box, The formula is given as follows:
    .. code-block:: text
        xmin = max(min(xmin, im_w - 1), 0)
        ymin = max(min(ymin, im_h - 1), 0) 
        xmax = max(min(xmax, im_w - 1), 0)
        ymax = max(min(ymax, im_h - 1), 0)
    where im_w and im_h are computed from im_info:
    .. code-block:: text
        im_h = round(height / scale)
        im_w = round(weight / scale)
    Args:
        input(variable): The input box, the last dimension is 4.
        im_info(variable): The information of image with shape [N, 3] with 
                            layout (height, width, scale). height and width
                            is the input size and scale is the ratio of input
                            size and original size.
        name (str): The name of this layer. It is optional.
    Returns:
        Variable: The cliped tensor variable.
    Examples:
        .. code-block:: python
            boxes = fluid.layers.data(
                name='data', shape=[8, 4], dtype='float32', lod_level=1)
            im_info = fluid.layers.data(name='im_info', shape=[3])
            out = fluid.layers.box_clip(
                input=boxes, im_info=im_info, inplace=True)
    """
    helper = LayerHelper("box_clip", **locals())
    output = helper.create_variable_for_type_inference(dtype=input.dtype)
    inputs = {"Input": input, "ImInfo": im_info}
    helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output})
    return output
 def multiclass_nms(bboxes,
                   scores,
                   score_threshold,
@ -2132,9 +2181,11 @@ def multiclass_nms(bboxes,
             (After version 1.3, when no boxes detected, the lod is changed 
             from {0} to {1}) 
    Examples:
        .. code-block:: python
            boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
                                      dtype='float32', lod_level=1)
            scores = fluid.layers.data(name='scores', shape=[81],
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@ -482,6 +482,17 @@ class TestYoloDetection(unittest.TestCase):
            self.assertIsNotNone(loss)
 class TestBoxClip(unittest.TestCase):
    def test_box_clip(self):
        program = Program()
        with program_guard(program):
            input_box = layers.data(
                name='input_box', shape=[7, 4], dtype='float32', lod_level=1)
            im_info = layers.data(name='im_info', shape=[3], dtype='float32')
            out = layers.box_clip(input_box, im_info)
            self.assertIsNotNone(out)
 class TestMulticlassNMS(unittest.TestCase):
    def test_multiclass_nms(self):
        program = Program()
--- a/python/paddle/fluid/tests/unittests/test_box_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
@ -0,0 +1,70 @@
 #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import unittest
 import numpy as np
 import sys
 import math
 from op_test import OpTest
 import copy
 def box_clip(input_box, im_info, output_box):
    im_w = round(im_info[1] / im_info[2])
    im_h = round(im_info[0] / im_info[2])
    output_box[:, :, 0] = np.maximum(
        np.minimum(input_box[:, :, 0], im_w - 1), 0)
    output_box[:, :, 1] = np.maximum(
        np.minimum(input_box[:, :, 1], im_h - 1), 0)
    output_box[:, :, 2] = np.maximum(
        np.minimum(input_box[:, :, 2], im_w - 1), 0)
    output_box[:, :, 3] = np.maximum(
        np.minimum(input_box[:, :, 3], im_h - 1), 0)
 def batch_box_clip(input_boxes, im_info, lod):
    n = input_boxes.shape[0]
    m = input_boxes.shape[1]
    output_boxes = np.zeros((n, m, 4), dtype=np.float32)
    cur_offset = 0
    for i in range(len(lod)):
        box_clip(input_boxes[cur_offset:(cur_offset + lod[i]), :, :],
                 im_info[i, :],
                 output_boxes[cur_offset:(cur_offset + lod[i]), :, :])
        cur_offset += lod[i]
    return output_boxes
 class TestBoxClipOp(OpTest):
    def test_check_output(self):
        self.check_output()
    def setUp(self):
        self.op_type = "box_clip"
        lod = [[1, 2, 3]]
        input_boxes = np.random.random((6, 10, 4)) * 5
        im_info = np.array([[5, 8, 1.], [6, 6, 1.], [7, 5, 1.]])
        output_boxes = batch_box_clip(input_boxes, im_info, lod[0])
        self.inputs = {
            'Input': (input_boxes.astype('float32'), lod),
            'ImInfo': im_info.astype('float32'),
        }
        self.outputs = {'Output': output_boxes}
 if __name__ == '__main__':
    unittest.main()