From 481d8bce2fa10c5c729b146c6925e46d434d22d6 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 16 Jan 2019 06:42:31 +0000
Subject: [PATCH 01/15] add box clip op

---
 paddle/fluid/API.spec                         |  2 +
 .../fluid/operators/detection/CMakeLists.txt  |  1 +
 paddle/fluid/operators/detection/bbox_util.h  | 24 ++++++
 .../fluid/operators/detection/box_clip_op.cc  | 74 +++++++++++++++++++
 .../fluid/operators/detection/box_clip_op.h   | 50 +++++++++++++
 python/paddle/fluid/layers/detection.py       | 66 ++++++++++++-----
 python/paddle/fluid/tests/test_detection.py   | 14 +++-
 .../fluid/tests/unittests/test_box_clip_op.py | 70 ++++++++++++++++++
 8 files changed, 282 insertions(+), 19 deletions(-)
 create mode 100644 paddle/fluid/operators/detection/box_clip_op.cc
 create mode 100644 paddle/fluid/operators/detection/box_clip_op.h
 create mode 100644 python/paddle/fluid/tests/unittests/test_box_clip_op.py

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 50ffef72ba..cfde0fdf0c 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -318,6 +318,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None
 paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None))
 paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None))
+paddle.fluid.layers.box_clip ArgSpec(args=['input_box', 'im_info', 'inplace', 'name'], varargs=None, keywords=None, defaults=(False, None))
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
 paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
@@ -494,6 +495,7 @@ paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=N
 paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
 paddle.reader.chain ArgSpec(args=[], varargs='readers', keywords=None, defaults=None)
 paddle.reader.shuffle ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None)
+paddle.reader.ComposeNotAligned.__init__ 
 paddle.reader.firstn ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None)
 paddle.reader.xmap_readers ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,))
 paddle.reader.PipeReader.__init__ ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain'))
diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
index 6c85f1577e..b0f023935d 100644
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -31,6 +31,7 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
 polygon_box_transform_op.cu)
 detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
 detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
+detection_library(box_clip_op SRCS box_clip_op.cc)
 
 if(WITH_GPU)
   detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub)
diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h
index 6abeca1da4..ba16c9565f 100644
--- a/paddle/fluid/operators/detection/bbox_util.h
+++ b/paddle/fluid/operators/detection/bbox_util.h
@@ -93,5 +93,29 @@ void BboxOverlaps(const framework::Tensor& r_boxes,
   }
 }
 
+template <class T>
+void ClipTiledBoxes(const platform::DeviceContext& ctx,
+                    const framework::Tensor& im_info,
+                    const framework::Tensor& input_boxes,
+                    framework::Tensor* out) {
+  T* out_data = out->mutable_data<T>(ctx.GetPlace());
+  const T* im_info_data = im_info.data<T>();
+  const T* input_boxes_data = input_boxes.data<T>();
+  T zero(0);
+  T im_w = round(im_info_data[1] / im_info_data[2]);
+  T im_h = round(im_info_data[0] / im_info_data[2]);
+  for (int64_t i = 0; i < input_boxes.numel(); ++i) {
+    if (i % 4 == 0) {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
+    } else if (i % 4 == 1) {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
+    } else if (i % 4 == 2) {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
+    } else {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
+    }
+  }
+}
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
new file mode 100644
index 0000000000..b185f12796
--- /dev/null
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -0,0 +1,74 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/detection/box_clip_op.h"
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+class BoxClipOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("InputBox"),
+                   "Input(InputBox) of BoxClipOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
+                   "Input(ImInfo) of BoxClipOp should not be null.");
+
+    auto input_box_dims = ctx->GetInputDim("InputBox");
+    auto im_info_dims = ctx->GetInputDim("ImInfo");
+
+    if (ctx->IsRuntime()) {
+      auto input_box_size = input_box_dims.size();
+      PADDLE_ENFORCE_EQ(input_box_dims[input_box_size - 1], 4,
+                        "The last dimension of InputBox must be 4");
+      PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
+                        "The rank of Input(InputBox) in BoxClipOp must be 2");
+      PADDLE_ENFORCE_EQ(im_info_dims[1], 2,
+                        "The last dimension of ImInfo must be 2");
+    }
+    ctx->ShareDim("InputBox", /*->*/ "OutputBox");
+    ctx->ShareLoD("InputBox", /*->*/ "OutputBox");
+  }
+};
+
+class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("InputBox",
+             "(LoDTensor) "
+             "InputBox is a LoDTensor with shape [..., 4] holds 4 points"
+             "in last dimension in format [xmin, ymin, xmax, ymax]");
+    AddInput("ImInfo",
+             "(Tensor) Information for image reshape is in shape (N, 2), "
+             "in format (height, width)");
+    AddOutput("OutputBox",
+              "(LoDTensor) "
+              "OutputBox is a LoDTensor with the same shape as InputBox"
+              "and it is the result after clip");
+    AddComment(R"DOC(
+  This operator clips input boxes to original input images.
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(box_clip, ops::BoxClipOp, ops::BoxClipOpMaker,
+                  paddle::framework::EmptyGradOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    box_clip, ops::BoxClipKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::BoxClipKernel<paddle::platform::CPUDeviceContext, double>);
diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h
new file mode 100644
index 0000000000..88d35d2a88
--- /dev/null
+++ b/paddle/fluid/operators/detection/box_clip_op.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <string>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/detection/bbox_util.h"
+#include "paddle/fluid/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+
+template <typename DeviceContext, typename T>
+class BoxClipKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* input_box = context.Input<LoDTensor>("InputBox");
+    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* output_box = context.Output<LoDTensor>("OutputBox");
+    auto& dev_ctx =
+        context.template device_context<platform::CPUDeviceContext>();
+    output_box->mutable_data<T>(context.GetPlace());
+    if (input_box->lod().size()) {
+      PADDLE_ENFORCE_EQ(input_box->lod().size(), 1UL,
+                        "Only support 1 level of LoD.");
+    }
+    auto box_lod = input_box->lod().back();
+    int64_t n = static_cast<int64_t>(box_lod.size() - 1);
+    for (int i = 0; i < n; ++i) {
+      Tensor im_info_slice = im_info->Slice(i, i + 1);
+      Tensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]);
+      Tensor output_slice = output_box->Slice(box_lod[i], box_lod[i + 1]);
+      ClipTiledBoxes<T>(dev_ctx, im_info_slice, box_slice, &output_slice);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 8aed97dc59..daeb10c1d6 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -31,23 +31,11 @@ import numpy
 from functools import reduce
 
 __all__ = [
-    'prior_box',
-    'density_prior_box',
-    'multi_box_head',
-    'bipartite_match',
-    'target_assign',
-    'detection_output',
-    'ssd_loss',
-    'detection_map',
-    'rpn_target_assign',
-    'anchor_generator',
-    'roi_perspective_transform',
-    'generate_proposal_labels',
-    'generate_proposals',
-    'iou_similarity',
-    'box_coder',
-    'polygon_box_transform',
-    'yolov3_loss',
+    'prior_box', 'density_prior_box', 'multi_box_head', 'bipartite_match',
+    'target_assign', 'detection_output', 'ssd_loss', 'detection_map',
+    'rpn_target_assign', 'anchor_generator', 'roi_perspective_transform',
+    'generate_proposal_labels', 'generate_proposals', 'iou_similarity',
+    'box_coder', 'polygon_box_transform', 'yolov3_loss', 'box_clip'
 ]
 
 
@@ -1810,3 +1798,47 @@ def generate_proposals(scores,
     rpn_roi_probs.stop_gradient = True
 
     return rpn_rois, rpn_roi_probs
+
+
+def box_clip(input_box, im_info, inplace=False, name=None):
+    """
+    Clip the box into the size given by im_info
+
+    Args:
+        input_box(variable): The input box, the last dimension is 4.
+        im_info(variable): The information of image with shape [N, 3].
+        inplace(bool): Must use :attr:`False` if :attr:`input_box` is used in 
+                       multiple operators. If this flag is set :attr:`True`, 
+                       reuse input :attr:`input_box` to clip, which will 
+                       change the value of tensor variable :attr:`input_box` 
+                       and might cause errors when :attr:`input_box` is used 
+                       in multiple operators. If :attr:`False`, preserve the 
+                       value pf :attr:`input_box` and create a new output 
+                       tensor variable whose data is copied from input x but 
+                       cliped.
+        name (str): The name of this layer. It is optional.
+    
+    Returns:
+        Variable: The cliped tensor variable.
+
+    Examples:
+        .. code-block:: python
+
+            boxes = fluid.layers.data(
+                name='data', shape=[8, 4], dtype='float32', lod_level=1)
+            im_info = fluid.layers.data(name='im_info', shape=[3])
+            out = fluid.layers.box_clip(
+                input_box=boxes, im_info=im_info, inplace=True)
+    """
+
+    inputs = {"InputBox": input_box, "ImInfo": im_info}
+
+    helper = LayerHelper("box_clip", **locals())
+    output = helper.create_variable_for_type_inference(dtype=input_box.dtype)
+    helper.append_op(
+        type="box_clip",
+        inputs=inputs,
+        attrs={"inplace:": inplace},
+        outputs={"OutputBox": output})
+
+    return output
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index d99eaa0634..bbc372da1a 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -354,8 +354,7 @@ class TestGenerateProposals(unittest.TestCase):
         data_shape = [20, 64, 64]
         images = fluid.layers.data(
             name='images', shape=data_shape, dtype='float32')
-        im_info = fluid.layers.data(
-            name='im_info', shape=[1, 3], dtype='float32')
+        im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32')
         anchors, variances = fluid.layers.anchor_generator(
             name='anchor_generator',
             input=images,
@@ -401,5 +400,16 @@ class TestYoloDetection(unittest.TestCase):
             self.assertIsNotNone(loss)
 
 
+class TestBoxClip(unittest.TestCase):
+    def test_box_clip(self):
+        program = Program()
+        with program_guard(program):
+            input_box = layers.data(
+                name='input_box', shape=[7, 4], dtype='float32', lod_level=1)
+            im_info = layers.data(name='im_info', shape=[3], dtype='float32')
+            out = layers.box_clip(input_box, im_info)
+            self.assertIsNotNone(out)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_box_clip_op.py b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
new file mode 100644
index 0000000000..6cd3f21a6e
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
@@ -0,0 +1,70 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import sys
+import math
+from op_test import OpTest
+import copy
+
+
+def box_clip(input_box, im_info, output_box):
+    im_w = round(im_info[1] / im_info[2])
+    im_h = round(im_info[0] / im_info[2])
+    output_box[:, :, 0] = np.maximum(
+        np.minimum(input_box[:, :, 0], im_w - 1), 0)
+    output_box[:, :, 1] = np.maximum(
+        np.minimum(input_box[:, :, 1], im_h - 1), 0)
+    output_box[:, :, 2] = np.maximum(
+        np.minimum(input_box[:, :, 2], im_w - 1), 0)
+    output_box[:, :, 3] = np.maximum(
+        np.minimum(input_box[:, :, 3], im_h - 1), 0)
+
+
+def batch_box_clip(input_boxes, im_info, lod):
+    n = input_boxes.shape[0]
+    m = input_boxes.shape[1]
+    output_boxes = np.zeros((n, m, 4), dtype=np.float32)
+    cur_offset = 0
+    for i in range(len(lod)):
+        box_clip(input_boxes[cur_offset:(cur_offset + lod[i]), :, :],
+                 im_info[i, :],
+                 output_boxes[cur_offset:(cur_offset + lod[i]), :, :])
+        cur_offset += lod[i]
+    return output_boxes
+
+
+class TestBoxClipOp(OpTest):
+    def test_check_output(self):
+        self.check_output()
+
+    def setUp(self):
+        self.op_type = "box_clip"
+        lod = [[1, 2, 3]]
+        input_boxes = np.random.random((6, 10, 4)) * 5
+        im_info = np.array([[5, 8, 1.], [6, 6, 1.], [7, 5, 1.]])
+        output_boxes = batch_box_clip(input_boxes, im_info, lod[0])
+
+        self.inputs = {
+            'InputBox': (input_boxes.astype('float32'), lod),
+            'ImInfo': im_info.astype('float32'),
+        }
+        self.outputs = {'OutputBox': output_boxes}
+
+
+if __name__ == '__main__':
+    unittest.main()

From d30aa89fa50c3f431cb5c9351a478c28176c7c5c Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 16 Jan 2019 06:46:22 +0000
Subject: [PATCH 02/15] test=develop

---
 python/paddle/fluid/layers/detection.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index daeb10c1d6..477ae67d0b 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1831,10 +1831,9 @@ def box_clip(input_box, im_info, inplace=False, name=None):
                 input_box=boxes, im_info=im_info, inplace=True)
     """
 
-    inputs = {"InputBox": input_box, "ImInfo": im_info}
-
     helper = LayerHelper("box_clip", **locals())
     output = helper.create_variable_for_type_inference(dtype=input_box.dtype)
+    inputs = {"InputBox": input_box, "ImInfo": im_info}
     helper.append_op(
         type="box_clip",
         inputs=inputs,

From af448373c723ecea6a958d5ee831b0ff8860b715 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 16 Jan 2019 09:50:36 +0000
Subject: [PATCH 03/15] test=develop

---
 paddle/fluid/operators/detection/box_clip_op.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index b185f12796..1e6ad7cbb3 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -36,7 +36,7 @@ class BoxClipOp : public framework::OperatorWithKernel {
       PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
                         "The rank of Input(InputBox) in BoxClipOp must be 2");
       PADDLE_ENFORCE_EQ(im_info_dims[1], 2,
-                        "The last dimension of ImInfo must be 2");
+                        "The last dimension of ImInfo must be 3");
     }
     ctx->ShareDim("InputBox", /*->*/ "OutputBox");
     ctx->ShareLoD("InputBox", /*->*/ "OutputBox");
@@ -51,8 +51,8 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
              "InputBox is a LoDTensor with shape [..., 4] holds 4 points"
              "in last dimension in format [xmin, ymin, xmax, ymax]");
     AddInput("ImInfo",
-             "(Tensor) Information for image reshape is in shape (N, 2), "
-             "in format (height, width)");
+             "(Tensor) Information for image reshape is in shape (N, 3), "
+             "in format (height, width, im_scale)");
     AddOutput("OutputBox",
               "(LoDTensor) "
               "OutputBox is a LoDTensor with the same shape as InputBox"

From e2044c09e9bc4c078e2b9c66a193078313562c9c Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 16 Jan 2019 11:04:40 +0000
Subject: [PATCH 04/15] test=develop

---
 paddle/fluid/API.spec | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index cfde0fdf0c..eff8defaf7 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -495,7 +495,6 @@ paddle.reader.buffered ArgSpec(args=['reader', 'size'], varargs=None, keywords=N
 paddle.reader.compose ArgSpec(args=[], varargs='readers', keywords='kwargs', defaults=None)
 paddle.reader.chain ArgSpec(args=[], varargs='readers', keywords=None, defaults=None)
 paddle.reader.shuffle ArgSpec(args=['reader', 'buf_size'], varargs=None, keywords=None, defaults=None)
-paddle.reader.ComposeNotAligned.__init__ 
 paddle.reader.firstn ArgSpec(args=['reader', 'n'], varargs=None, keywords=None, defaults=None)
 paddle.reader.xmap_readers ArgSpec(args=['mapper', 'reader', 'process_num', 'buffer_size', 'order'], varargs=None, keywords=None, defaults=(False,))
 paddle.reader.PipeReader.__init__ ArgSpec(args=['self', 'command', 'bufsize', 'file_type'], varargs=None, keywords=None, defaults=(8192, 'plain'))

From 5fb2856584d0d0fcde54f86d249c5fc9adab41e5 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 16 Jan 2019 13:13:55 +0000
Subject: [PATCH 05/15] test_develop

---
 paddle/fluid/operators/detection/box_clip_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index 1e6ad7cbb3..609bd5606b 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -35,7 +35,7 @@ class BoxClipOp : public framework::OperatorWithKernel {
                         "The last dimension of InputBox must be 4");
       PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
                         "The rank of Input(InputBox) in BoxClipOp must be 2");
-      PADDLE_ENFORCE_EQ(im_info_dims[1], 2,
+      PADDLE_ENFORCE_EQ(im_info_dims[1], 3,
                         "The last dimension of ImInfo must be 3");
     }
     ctx->ShareDim("InputBox", /*->*/ "OutputBox");

From b10d84bc5aaee83c2f25e077c4f38461aafe3928 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Mon, 21 Jan 2019 03:05:53 +0000
Subject: [PATCH 06/15] fix bug when run on GPU, test=develop

---
 paddle/fluid/operators/detection/box_clip_op.cc | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index 609bd5606b..fb94d0fbc6 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -20,7 +20,7 @@ class BoxClipOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
  protected:
-  void InferShape(framework::InferShapeContext *ctx) const override {
+  void InferShape(framework::InferShapeContext* ctx) const override {
     PADDLE_ENFORCE(ctx->HasInput("InputBox"),
                    "Input(InputBox) of BoxClipOp should not be null.");
     PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
@@ -41,6 +41,13 @@ class BoxClipOp : public framework::OperatorWithKernel {
     ctx->ShareDim("InputBox", /*->*/ "OutputBox");
     ctx->ShareLoD("InputBox", /*->*/ "OutputBox");
   }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("RpnRois"));
+    return framework::OpKernelType(data_type, platform::CPUPlace());
+  }
 };
 
 class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {

From 5246285e3431c4e8dfc0f2193dac038649ced9c9 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Mon, 21 Jan 2019 03:11:49 +0000
Subject: [PATCH 07/15] test=develop

---
 paddle/fluid/operators/detection/box_clip_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index fb94d0fbc6..e47027d98c 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -45,7 +45,7 @@ class BoxClipOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
-    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("RpnRois"));
+    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("InputBox"));
     return framework::OpKernelType(data_type, platform::CPUPlace());
   }
 };

From 1c558ad388aa8b9d256e90d6640b82f5170e3a18 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Tue, 22 Jan 2019 12:26:12 +0000
Subject: [PATCH 08/15] add gpu kernel for box clip, test=develop

---
 .../fluid/operators/detection/CMakeLists.txt  |  2 +-
 .../fluid/operators/detection/box_clip_op.cc  | 45 +++++++++++--------
 .../fluid/operators/detection/box_clip_op.h   |  4 +-
 python/paddle/fluid/layers/detection.py       | 42 ++++++++++++-----
 .../fluid/tests/unittests/test_box_clip_op.py |  4 +-
 5 files changed, 63 insertions(+), 34 deletions(-)

diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt
index b0f023935d..1c9e8a454c 100644
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -31,7 +31,7 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
 polygon_box_transform_op.cu)
 detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
 detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
-detection_library(box_clip_op SRCS box_clip_op.cc)
+detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
 
 if(WITH_GPU)
   detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub)
diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index e47027d98c..15adcdedae 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -21,51 +21,58 @@ class BoxClipOp : public framework::OperatorWithKernel {
 
  protected:
   void InferShape(framework::InferShapeContext* ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("InputBox"),
-                   "Input(InputBox) of BoxClipOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Input"),
+                   "Input(Input) of BoxClipOp should not be null.");
     PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
                    "Input(ImInfo) of BoxClipOp should not be null.");
 
-    auto input_box_dims = ctx->GetInputDim("InputBox");
+    auto input_box_dims = ctx->GetInputDim("Input");
     auto im_info_dims = ctx->GetInputDim("ImInfo");
 
     if (ctx->IsRuntime()) {
       auto input_box_size = input_box_dims.size();
       PADDLE_ENFORCE_EQ(input_box_dims[input_box_size - 1], 4,
-                        "The last dimension of InputBox must be 4");
+                        "The last dimension of Input must be 4");
       PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
-                        "The rank of Input(InputBox) in BoxClipOp must be 2");
+                        "The rank of Input(Input) in BoxClipOp must be 2");
       PADDLE_ENFORCE_EQ(im_info_dims[1], 3,
                         "The last dimension of ImInfo must be 3");
     }
-    ctx->ShareDim("InputBox", /*->*/ "OutputBox");
-    ctx->ShareLoD("InputBox", /*->*/ "OutputBox");
-  }
-
- protected:
-  framework::OpKernelType GetExpectedKernelType(
-      const framework::ExecutionContext& ctx) const override {
-    auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("InputBox"));
-    return framework::OpKernelType(data_type, platform::CPUPlace());
+    ctx->ShareDim("Input", /*->*/ "Output");
+    ctx->ShareLoD("Input", /*->*/ "Output");
   }
+  /*
+  protected:
+   framework::OpKernelType GetExpectedKernelType(
+       const framework::ExecutionContext& ctx) const override {
+     auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Input"));
+     return framework::OpKernelType(data_type, platform::CPUPlace());
+   }
+   */
 };
 
 class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput("InputBox",
+    AddInput("Input",
              "(LoDTensor) "
-             "InputBox is a LoDTensor with shape [..., 4] holds 4 points"
+             "Input is a LoDTensor with shape [..., 4] holds 4 points"
              "in last dimension in format [xmin, ymin, xmax, ymax]");
     AddInput("ImInfo",
              "(Tensor) Information for image reshape is in shape (N, 3), "
              "in format (height, width, im_scale)");
-    AddOutput("OutputBox",
+    AddOutput("Output",
               "(LoDTensor) "
-              "OutputBox is a LoDTensor with the same shape as InputBox"
+              "Output is a LoDTensor with the same shape as Input"
               "and it is the result after clip");
     AddComment(R"DOC(
-  This operator clips input boxes to original input images.
+This operator clips input boxes to original input images.
+
+The formula is given as follows:
+
+       $$height_out = \max(\min(height_loc, im_h), 0)$$
+       $$width_out = \max(\min(width_loc, im_w), 0)$$     
+
 )DOC");
   }
 };
diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h
index 88d35d2a88..74e1f88f8d 100644
--- a/paddle/fluid/operators/detection/box_clip_op.h
+++ b/paddle/fluid/operators/detection/box_clip_op.h
@@ -25,9 +25,9 @@ template <typename DeviceContext, typename T>
 class BoxClipKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& context) const override {
-    auto* input_box = context.Input<LoDTensor>("InputBox");
+    auto* input_box = context.Input<LoDTensor>("Input");
     auto* im_info = context.Input<LoDTensor>("ImInfo");
-    auto* output_box = context.Output<LoDTensor>("OutputBox");
+    auto* output_box = context.Output<LoDTensor>("Output");
     auto& dev_ctx =
         context.template device_context<platform::CPUDeviceContext>();
     output_box->mutable_data<T>(context.GetPlace());
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 477ae67d0b..3e2882ea3c 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -31,11 +31,24 @@ import numpy
 from functools import reduce
 
 __all__ = [
-    'prior_box', 'density_prior_box', 'multi_box_head', 'bipartite_match',
-    'target_assign', 'detection_output', 'ssd_loss', 'detection_map',
-    'rpn_target_assign', 'anchor_generator', 'roi_perspective_transform',
-    'generate_proposal_labels', 'generate_proposals', 'iou_similarity',
-    'box_coder', 'polygon_box_transform', 'yolov3_loss', 'box_clip'
+    'prior_box',
+    'density_prior_box',
+    'multi_box_head',
+    'bipartite_match',
+    'target_assign',
+    'detection_output',
+    'ssd_loss',
+    'detection_map',
+    'rpn_target_assign',
+    'anchor_generator',
+    'roi_perspective_transform',
+    'generate_proposal_labels',
+    'generate_proposals',
+    'iou_similarity',
+    'box_coder',
+    'polygon_box_transform',
+    'yolov3_loss',
+    'box_clip',
 ]
 
 
@@ -1800,13 +1813,22 @@ def generate_proposals(scores,
     return rpn_rois, rpn_roi_probs
 
 
-def box_clip(input_box, im_info, inplace=False, name=None):
+def box_clip(input, im_info, inplace=False, name=None):
     """
     Clip the box into the size given by im_info
+    The formula is given as follows:
+        
+    .. code-block:: text
+
+        height_out = max(min(height_loc, im_h), 0)
+        width_out = max(min(width_loc, im_w), 0)
 
     Args:
         input_box(variable): The input box, the last dimension is 4.
-        im_info(variable): The information of image with shape [N, 3].
+        im_info(variable): The information of image with shape [N, 3] with 
+                            layout (height, width, scale). height and width
+                            is the input size and scale is the ratio of input
+                            size and original size.
         inplace(bool): Must use :attr:`False` if :attr:`input_box` is used in 
                        multiple operators. If this flag is set :attr:`True`, 
                        reuse input :attr:`input_box` to clip, which will 
@@ -1832,12 +1854,12 @@ def box_clip(input_box, im_info, inplace=False, name=None):
     """
 
     helper = LayerHelper("box_clip", **locals())
-    output = helper.create_variable_for_type_inference(dtype=input_box.dtype)
-    inputs = {"InputBox": input_box, "ImInfo": im_info}
+    output = helper.create_variable_for_type_inference(dtype=input.dtype)
+    inputs = {"Input": input, "ImInfo": im_info}
     helper.append_op(
         type="box_clip",
         inputs=inputs,
         attrs={"inplace:": inplace},
-        outputs={"OutputBox": output})
+        outputs={"Output": output})
 
     return output
diff --git a/python/paddle/fluid/tests/unittests/test_box_clip_op.py b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
index 6cd3f21a6e..b2b0598f31 100644
--- a/python/paddle/fluid/tests/unittests/test_box_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
@@ -60,10 +60,10 @@ class TestBoxClipOp(OpTest):
         output_boxes = batch_box_clip(input_boxes, im_info, lod[0])
 
         self.inputs = {
-            'InputBox': (input_boxes.astype('float32'), lod),
+            'Input': (input_boxes.astype('float32'), lod),
             'ImInfo': im_info.astype('float32'),
         }
-        self.outputs = {'OutputBox': output_boxes}
+        self.outputs = {'Output': output_boxes}
 
 
 if __name__ == '__main__':

From b449f8ff2fb31714c998ddfe5978a36d24222105 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 23 Jan 2019 02:16:25 +0000
Subject: [PATCH 09/15] revised API spec, test=develop

---
 paddle/fluid/API.spec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index eff8defaf7..078021616b 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -318,7 +318,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None
 paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None))
 paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None))
-paddle.fluid.layers.box_clip ArgSpec(args=['input_box', 'im_info', 'inplace', 'name'], varargs=None, keywords=None, defaults=(False, None))
+paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'inplace', 'name'], varargs=None, keywords=None, defaults=(False, None))
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
 paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))

From 57e5f61ec8b6822bd897df15478c646cf347097b Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 23 Jan 2019 05:50:09 +0000
Subject: [PATCH 10/15] add gpu kernel, test=develop

---
 .../fluid/operators/detection/box_clip_op.cu  | 74 +++++++++++++++++++
 python/paddle/fluid/tests/test_detection.py   |  3 +-
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 paddle/fluid/operators/detection/box_clip_op.cu

diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu
new file mode 100644
index 0000000000..f10c92366d
--- /dev/null
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@@ -0,0 +1,74 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/detection/box_clip_op.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/platform/cuda_primitives.h"
+#include "paddle/fluid/platform/hostdevice.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTenso = framework::LoDTensor;
+
+static constexpr int ImInfoSize = 3;
+
+template <typename T, int BlockSize>
+static __global__ void GPUBoxClip(const T *input, const size_t *lod,
+                                  const size_t width, const T *im_info,
+                                  T *output) {
+  for (int i = threadIdx.x; i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * width;
+       i += BlockSize) {
+    int idx = lod[blockIdx.x] * width + i;
+    T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
+                   im_info[blockIdx.x * ImInfoSize + 2]);
+    T im_h = round(im_info[blockIdx.x * ImInfoSize] /
+                   im_info[blockIdx.x * ImInfoSize + 2]);
+    T im_size = (idx % 2 == 0) ? im_w : im_h;
+    output[idx] = max(min(input[idx], im_size - 1), T(0.));
+  }
+}
+
+template <typename DeviceContext, typename T>
+class GPUBoxClipKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &context) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
+                   "This kernel only runs on GPU device.");
+    auto *input = context.Input<LoDTensor>("Input");
+    auto *im_info = context.Input<Tensor>("ImInfo");
+    auto *output = context.Output<LoDTensor>("Output");
+    const int64_t num = input->dims()[0];
+    const int64_t bbox_width = input->numel() / num;
+    auto lod = input->lod();
+    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
+    auto &dev_ctx = context.template device_context<DeviceContext>();
+    auto stream = dev_ctx.stream();
+    const size_t num_lod = lod.back().size() - 1;
+    T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
+    GPUBoxClip<T, 512><<<num_lod, 512, 0, stream>>>(
+        input->data<T>(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()),
+        bbox_width, im_info->data<T>(), output_data);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    box_clip, ops::GPUBoxClipKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::GPUBoxClipKernel<paddle::platform::CUDADeviceContext, double>);
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index bbc372da1a..4d8f2b1db1 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -354,7 +354,8 @@ class TestGenerateProposals(unittest.TestCase):
         data_shape = [20, 64, 64]
         images = fluid.layers.data(
             name='images', shape=data_shape, dtype='float32')
-        im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32')
+        im_info = fluid.layers.data(
+            name='im_info', shape=[1, 3], dtype='float32')
         anchors, variances = fluid.layers.anchor_generator(
             name='anchor_generator',
             input=images,

From 11f1baa4061af460d60f31aa1ca9863695b24227 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 23 Jan 2019 09:13:48 +0000
Subject: [PATCH 11/15] refine code, test=develop

---
 .../fluid/operators/detection/box_clip_op.cc  | 20 +++++-----
 .../fluid/operators/detection/box_clip_op.cu  | 12 +++---
 python/paddle/fluid/layers/detection.py       | 38 +++++++++++--------
 3 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc
index 15adcdedae..3aa766559a 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -41,14 +41,6 @@ class BoxClipOp : public framework::OperatorWithKernel {
     ctx->ShareDim("Input", /*->*/ "Output");
     ctx->ShareLoD("Input", /*->*/ "Output");
   }
-  /*
-  protected:
-   framework::OpKernelType GetExpectedKernelType(
-       const framework::ExecutionContext& ctx) const override {
-     auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Input"));
-     return framework::OpKernelType(data_type, platform::CPUPlace());
-   }
-   */
 };
 
 class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -68,11 +60,17 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
     AddComment(R"DOC(
 This operator clips input boxes to original input images.
 
-The formula is given as follows:
+For each input box, The formula is given as follows:
 
-       $$height_out = \max(\min(height_loc, im_h), 0)$$
-       $$width_out = \max(\min(width_loc, im_w), 0)$$     
+       $$xmin = \max(\min(xmin, im_w - 1), 0)$$
+       $$ymin = \max(\min(ymin, im_h - 1), 0)$$     
+       $$xmax = \max(\min(xmax, im_w - 1), 0)$$
+       $$ymax = \max(\min(ymax, im_h - 1), 0)$$
 
+where im_w and im_h are computed from ImInfo, the formula is given as follows:
+
+       $$im_w = \round(width / im_scale)$$
+       $$im_h = \round(height / im_scale)$$ 
 )DOC");
   }
 };
diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu
index f10c92366d..b727da5f7b 100644
--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@@ -30,13 +30,13 @@ template <typename T, int BlockSize>
 static __global__ void GPUBoxClip(const T *input, const size_t *lod,
                                   const size_t width, const T *im_info,
                                   T *output) {
+  T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
+                 im_info[blockIdx.x * ImInfoSize + 2]);
+  T im_h = round(im_info[blockIdx.x * ImInfoSize] /
+                 im_info[blockIdx.x * ImInfoSize + 2]);
   for (int i = threadIdx.x; i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * width;
        i += BlockSize) {
     int idx = lod[blockIdx.x] * width + i;
-    T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
-                   im_info[blockIdx.x * ImInfoSize + 2]);
-    T im_h = round(im_info[blockIdx.x * ImInfoSize] /
-                   im_info[blockIdx.x * ImInfoSize + 2]);
     T im_size = (idx % 2 == 0) ? im_w : im_h;
     output[idx] = max(min(input[idx], im_size - 1), T(0.));
   }
@@ -57,9 +57,9 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
     framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
     auto &dev_ctx = context.template device_context<DeviceContext>();
     auto stream = dev_ctx.stream();
-    const size_t num_lod = lod.back().size() - 1;
+    const size_t batch_size = lod.back().size() - 1;
     T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
-    GPUBoxClip<T, 512><<<num_lod, 512, 0, stream>>>(
+    GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
         input->data<T>(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()),
         bbox_width, im_info->data<T>(), output_data);
   }
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 3e2882ea3c..9fc23da70e 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1816,26 +1816,35 @@ def generate_proposals(scores,
 def box_clip(input, im_info, inplace=False, name=None):
     """
     Clip the box into the size given by im_info
-    The formula is given as follows:
+    For each input box, The formula is given as follows:
         
     .. code-block:: text
 
-        height_out = max(min(height_loc, im_h), 0)
-        width_out = max(min(width_loc, im_w), 0)
+        xmin = max(min(xmin, im_w - 1), 0)
+        ymin = max(min(ymin, im_h - 1), 0) 
+        xmax = max(min(xmax, im_w - 1), 0)
+        ymax = max(min(ymax, im_h - 1), 0)
+    
+    where im_w and im_h are computed from im_info:
+ 
+    .. code-block:: text
+
+        im_h = round(height / scale)
+        im_w = round(weight / scale)
 
     Args:
-        input_box(variable): The input box, the last dimension is 4.
+        input(variable): The input box, the last dimension is 4.
         im_info(variable): The information of image with shape [N, 3] with 
                             layout (height, width, scale). height and width
                             is the input size and scale is the ratio of input
                             size and original size.
-        inplace(bool): Must use :attr:`False` if :attr:`input_box` is used in 
+        inplace(bool): Must use :attr:`False` if :attr:`input` is used in 
                        multiple operators. If this flag is set :attr:`True`, 
-                       reuse input :attr:`input_box` to clip, which will 
-                       change the value of tensor variable :attr:`input_box` 
-                       and might cause errors when :attr:`input_box` is used 
+                       reuse input :attr:`input` to clip, which will 
+                       change the value of tensor variable :attr:`input` 
+                       and might cause errors when :attr:`input` is used 
                        in multiple operators. If :attr:`False`, preserve the 
-                       value pf :attr:`input_box` and create a new output 
+                       value pf :attr:`input` and create a new output 
                        tensor variable whose data is copied from input x but 
                        cliped.
         name (str): The name of this layer. It is optional.
@@ -1850,16 +1859,13 @@ def box_clip(input, im_info, inplace=False, name=None):
                 name='data', shape=[8, 4], dtype='float32', lod_level=1)
             im_info = fluid.layers.data(name='im_info', shape=[3])
             out = fluid.layers.box_clip(
-                input_box=boxes, im_info=im_info, inplace=True)
+                input=boxes, im_info=im_info, inplace=True)
     """
 
     helper = LayerHelper("box_clip", **locals())
-    output = helper.create_variable_for_type_inference(dtype=input.dtype)
+    output = x if inplace else helper.create_variable_for_type_inference(\
+             dtype=input.dtype)
     inputs = {"Input": input, "ImInfo": im_info}
-    helper.append_op(
-        type="box_clip",
-        inputs=inputs,
-        attrs={"inplace:": inplace},
-        outputs={"Output": output})
+    helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output})
 
     return output

From aaf756272f4d590e3f33eafd262e0fca2e0e6109 Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Mon, 28 Jan 2019 06:11:04 +0000
Subject: [PATCH 12/15] remove inplace arg, test=develop

---
 paddle/fluid/API.spec                   |  2 +-
 python/paddle/fluid/layers/detection.py | 14 ++------------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index b24c844b4b..799fbb0f75 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -324,7 +324,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None
 paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None))
 paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None))
-paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'inplace', 'name'], varargs=None, keywords=None, defaults=(False, None))
+paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
 paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index 4fd7e5739c..fe2baa108c 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1963,7 +1963,7 @@ def generate_proposals(scores,
     return rpn_rois, rpn_roi_probs
 
 
-def box_clip(input, im_info, inplace=False, name=None):
+def box_clip(input, im_info, name=None):
     """
     Clip the box into the size given by im_info
     For each input box, The formula is given as follows:
@@ -1988,15 +1988,6 @@ def box_clip(input, im_info, inplace=False, name=None):
                             layout (height, width, scale). height and width
                             is the input size and scale is the ratio of input
                             size and original size.
-        inplace(bool): Must use :attr:`False` if :attr:`input` is used in 
-                       multiple operators. If this flag is set :attr:`True`, 
-                       reuse input :attr:`input` to clip, which will 
-                       change the value of tensor variable :attr:`input` 
-                       and might cause errors when :attr:`input` is used 
-                       in multiple operators. If :attr:`False`, preserve the 
-                       value pf :attr:`input` and create a new output 
-                       tensor variable whose data is copied from input x but 
-                       cliped.
         name (str): The name of this layer. It is optional.
     
     Returns:
@@ -2013,8 +2004,7 @@ def box_clip(input, im_info, inplace=False, name=None):
     """
 
     helper = LayerHelper("box_clip", **locals())
-    output = x if inplace else helper.create_variable_for_type_inference(\
-             dtype=input.dtype)
+    output = helper.create_variable_for_type_inference(dtype=input.dtype)
     inputs = {"Input": input, "ImInfo": im_info}
     helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output})
 

From 334f697da9e7f21a961001a4c4171ec1e6d3186d Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 30 Jan 2019 03:11:13 +0000
Subject: [PATCH 13/15] test=develop

---
 python/paddle/fluid/layers/detection.py     | 3 ++-
 python/paddle/fluid/tests/test_detection.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py
index b629f54d51..63d8bd4dc7 100644
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -2010,9 +2010,10 @@ def box_clip(input, im_info, name=None):
     output = helper.create_variable_for_type_inference(dtype=input.dtype)
     inputs = {"Input": input, "ImInfo": im_info}
     helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output})
-    
+
     return output
 
+
 def multiclass_nms(bboxes,
                    scores,
                    score_threshold,
diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py
index 3eab9b99e2..5e21dda967 100644
--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -479,6 +479,7 @@ class TestBoxClip(unittest.TestCase):
             out = layers.box_clip(input_box, im_info)
             self.assertIsNotNone(out)
 
+
 class TestMulticlassNMS(unittest.TestCase):
     def test_multiclass_nms(self):
         program = Program()

From e402c0ec7d813264d76841fc4972ebc631f7696e Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Wed, 30 Jan 2019 03:14:19 +0000
Subject: [PATCH 14/15] test=develop

---
 paddle/fluid/API.spec                         |   6 +-
 paddle/fluid/operators/interpolate_op.cc      |  70 ++++++
 paddle/fluid/operators/interpolate_op.cu      | 104 ++++++---
 paddle/fluid/operators/interpolate_op.h       | 111 +++++++---
 python/paddle/fluid/layers/nn.py              | 202 ++++++++++++++++--
 .../unittests/test_bilinear_interp_op.py      | 102 +++++++--
 .../tests/unittests/test_nearest_interp_op.py |  63 ++++--
 7 files changed, 551 insertions(+), 107 deletions(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 690218b874..ad759c2eda 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -142,10 +142,10 @@ paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon',
 paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
 paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None))
 paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
-paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None))
+paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1))
 paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
-paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape'], varargs=None, keywords=None, defaults=(None, None, None, None))
-paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape'], varargs=None, keywords=None, defaults=(None, None, None, None))
+paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1))
+paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True))
 paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc
index 93dd3f794f..de91ba6270 100644
--- a/paddle/fluid/operators/interpolate_op.cc
+++ b/paddle/fluid/operators/interpolate_op.cc
@@ -82,6 +82,18 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
                          "bilinear interpolation and \"nearest\" for nearest "
                          "neighbor interpolation.")
         .SetDefault("bilinear");
+    AddAttr<bool>(
+        "align_corners",
+        "an optinal bool. Defaults to True. "
+        "If True, the centers of 4 corner pixels of the input and output "
+        "tensors are aligned, preserving the values at the corner pixels, "
+        "if Flase, are not aligned")
+        .SetDefault(true);
+    AddAttr<int>("align_mode",
+                 "(int, default \'1\'), optional for bilinear interpolation"
+                 "can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , "
+                 "can be \'1\' for src_idx = scale*dst_index .")
+        .SetDefault(1);
     AddComment(R"DOC(
           This operator samples input X to given output shape by using specified
           interpolation method, the interpolation methods can be \"nearest\"
@@ -98,6 +110,64 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
           to perform linear interpolation first in one direction, and then 
           again in the other direction.
 
+          Align_corners and align_mode are optinal parameters,the calculation method 
+          of interpolation can be selected by them.
+          
+          Example:
+
+          For scale:
+          
+            if align_corners = True and out_{size}>1 :
+
+              scale_{factor} = (in_{size}-1.0)/(out_{size}-1.0)
+            
+            else:
+              
+              scale_{factor} = float(in_{size}/out_{size})
+            
+          
+          Nearest neighbor interpolation:
+          
+          if:
+              align_corners = False
+
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+
+              H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
+              W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
+
+          else:
+              align_corners = True
+
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+
+              H_out = round(H_{in} * scale_{factor})
+              W_out = round(W_{in} * scale_{factor})
+
+          Bilinear interpolation:
+
+          if:
+              align_corners = False , align_mode = 0
+              
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+              
+              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
+              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
+
+
+          else:
+           
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+
+              H_out = H_{in} * scale_{factor}
+              W_out = W_{in} * scale_{factor}
+
+          
+
           For details of nearest neighbor interpolation, please refer to Wikipedia: 
           https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
 
diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu
index 99ac725f73..b887878ea2 100644
--- a/paddle/fluid/operators/interpolate_op.cu
+++ b/paddle/fluid/operators/interpolate_op.cu
@@ -23,7 +23,8 @@ __global__ void KeNearestNeighborInterpFw(
     const T* in, const size_t in_img_h, const size_t in_img_w,
     const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
     const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
+    const bool align_corners) {
   int nthreads = output_h * output_w;
   int tid = blockIdx.x * blockDim.x + threadIdx.x;
   int stride = blockDim.x * gridDim.x;
@@ -35,10 +36,14 @@ __global__ void KeNearestNeighborInterpFw(
     int channel_id = out_id_w / out_img_size;
 
     int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
+    int in_img_idy = (align_corners)
+                         ? static_cast<int>(ratio_h * out_img_idy + 0.5)
+                         : static_cast<int>(ratio_h * out_img_idy);
 
     int out_img_idx = tid % out_img_w;
-    int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
+    int in_img_idx = (align_corners)
+                         ? static_cast<int>(ratio_w * out_img_idx + 0.5)
+                         : static_cast<int>(ratio_w * out_img_idx);
 
     out[tid] = in[out_id_h * input_w + channel_id * in_img_size +
                   in_img_idy * in_img_w + in_img_idx];
@@ -50,7 +55,8 @@ __global__ void KeNearestNeighborInterpBw(
     T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
     const size_t input_w, const T* out, const size_t out_img_h,
     const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
+    const bool align_corners) {
   int nthreads = output_h * output_w;
   int tid = blockIdx.x * blockDim.x + threadIdx.x;
   int stride = blockDim.x * gridDim.x;
@@ -62,10 +68,14 @@ __global__ void KeNearestNeighborInterpBw(
     int channel_id = out_id_w / out_img_size;
 
     int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
+    int in_img_idy = (align_corners)
+                         ? static_cast<int>(ratio_h * out_img_idy + 0.5)
+                         : static_cast<int>(ratio_h * out_img_idy);
 
     int out_img_idx = tid % out_img_w;
-    int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
+    int in_img_idx = (align_corners)
+                         ? static_cast<int>(ratio_w * out_img_idx + 0.5)
+                         : static_cast<int>(ratio_w * out_img_idx);
 
     T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
                     in_img_idy * in_img_w + in_img_idx];
@@ -79,10 +89,12 @@ __global__ void KeBilinearInterpFw(
     const T* in, const size_t in_img_h, const size_t in_img_w,
     const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
     const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
+    const bool align_corners, const int align_mode) {
   int nthreads = output_h * output_w;
   int tid = blockIdx.x * blockDim.x + threadIdx.x;
   int stride = blockDim.x * gridDim.x;
+  bool align_flag = (align_mode == 0 && !align_corners);
   for (; tid < nthreads; tid += stride) {
     int out_id_h = tid / output_w;
     int out_id_w = tid % output_w;
@@ -91,15 +103,23 @@ __global__ void KeBilinearInterpFw(
     int channel_id = out_id_w / out_img_size;
 
     int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = ratio_h * out_img_idy;
+    int in_img_idy = align_flag
+                         ? static_cast<int>(ratio_h * (out_img_idy + 0.5) - 0.5)
+                         : static_cast<int>(ratio_h * out_img_idy);
+    in_img_idy = (in_img_idy > 0) ? in_img_idy : 0;
     int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
-    T h1lambda = ratio_h * out_img_idy - in_img_idy;
+    T h1lambda = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
+                            : ratio_h * out_img_idy - in_img_idy;
     T h2lambda = 1.f - h1lambda;
 
     int out_img_idx = tid % out_img_w;
-    int in_img_idx = ratio_w * out_img_idx;
+    int in_img_idx = align_flag
+                         ? static_cast<int>(ratio_w * (out_img_idx + 0.5) - 0.5)
+                         : static_cast<int>(ratio_w * out_img_idx);
+    in_img_idx = (in_img_idx > 0) ? in_img_idx : 0;
     int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
-    T w1lambda = ratio_w * out_img_idx - in_img_idx;
+    T w1lambda = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
+                            : ratio_w * out_img_idx - in_img_idx;
     T w2lambda = 1.f - w1lambda;
 
     const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
@@ -118,10 +138,12 @@ __global__ void KeBilinearInterpBw(
     T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
     const size_t input_w, const T* out, const size_t out_img_h,
     const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const T ratio_h, const T ratio_w) {
+    const size_t num_channels, const T ratio_h, const T ratio_w,
+    const bool align_corners, const int align_mode) {
   int nthreads = output_h * output_w;
   int tid = blockIdx.x * blockDim.x + threadIdx.x;
   int stride = blockDim.x * gridDim.x;
+  bool align_flag = (align_mode == 0 && !align_corners);
   for (; tid < nthreads; tid += stride) {
     int out_id_h = tid / output_w;
     int out_id_w = tid % output_w;
@@ -130,15 +152,22 @@ __global__ void KeBilinearInterpBw(
     int channel_id = out_id_w / out_img_size;
 
     int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = ratio_h * out_img_idy;
+    int in_img_idy = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5
+                                : ratio_h * out_img_idy;
+    in_img_idy = (in_img_idy > 0) ? in_img_idy : 0;
     int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
-    T h1lambda = ratio_h * out_img_idy - in_img_idy;
+    T h1lambda = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
+                            : ratio_h * out_img_idy - in_img_idy;
+
     T h2lambda = 1.f - h1lambda;
 
     int out_img_idx = tid % out_img_w;
-    int in_img_idx = ratio_w * out_img_idx;
+    int in_img_idx = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5
+                                : ratio_w * out_img_idx;
+    in_img_idx = (in_img_idx > 0) ? in_img_idx : 0;
     int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
-    T w1lambda = ratio_w * out_img_idx - in_img_idx;
+    T w1lambda = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
+                            : ratio_w * out_img_idx - in_img_idx;
     T w2lambda = 1.f - w1lambda;
 
     T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
@@ -175,6 +204,9 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
       out_w = size_data[1];
     }
 
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");
+
     int n = input->dims()[0];
     int c = input->dims()[1];
     int in_h = input->dims()[2];
@@ -188,10 +220,16 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
     int in_chw = c * in_hw;
     int out_chw = c * out_hw;
 
-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = 0.f;
+    float ratio_w = 0.f;
+    if (out_h > 1) {
+      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
+                                : static_cast<float>(in_h) / out_h;
+    }
+    if (out_w > 1) {
+      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
+                                : static_cast<float>(in_w) / out_w;
+    }
 
     if (in_h == out_h && in_w == out_w) {
       framework::TensorCopy(*input, ctx.GetPlace(), output);
@@ -206,12 +244,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
       KeNearestNeighborInterpFw<
           T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
           input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
-          out_chw, c, ratio_h, ratio_w);
+          out_chw, c, ratio_h, ratio_w, align_corners);
     } else if ("bilinear" == interp_method) {
       KeBilinearInterpFw<
           T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
           input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
-          out_chw, c, ratio_h, ratio_w);
+          out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
     }
   }
 };
@@ -234,6 +272,10 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
     int out_h = ctx.Attr<int>("out_h");
     int out_w = ctx.Attr<int>("out_w");
     auto out_size = ctx.Input<Tensor>("OutSize");
+
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");
+
     if (out_size != nullptr) {
       Tensor sizes;
       framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes);
@@ -252,10 +294,16 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
     int in_chw = c * in_hw;
     int out_chw = c * out_hw;
 
-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = 0.f;
+    float ratio_w = 0.f;
+    if (out_h > 1) {
+      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
+                                : static_cast<float>(in_h) / out_h;
+    }
+    if (out_w > 1) {
+      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
+                                : static_cast<float>(in_w) / out_w;
+    }
 
     if (in_h == out_h && in_w == out_w) {
       framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
@@ -270,12 +318,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
       KeNearestNeighborInterpBw<
           T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
           input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
-          out_w, n, out_chw, c, ratio_h, ratio_w);
+          out_w, n, out_chw, c, ratio_h, ratio_w, align_corners);
     } else if ("bilinear" == interp_method) {
       KeBilinearInterpBw<
           T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
           input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
-          out_w, n, out_chw, c, ratio_h, ratio_w);
+          out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
     }
   }
 };
diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h
index 7fdb3e1f5a..c631ad1dd1 100644
--- a/paddle/fluid/operators/interpolate_op.h
+++ b/paddle/fluid/operators/interpolate_op.h
@@ -26,14 +26,17 @@ template <typename T>
 static void NearestNeighborInterpolate(const Tensor& input, Tensor* output,
                                        const float ratio_h, const float ratio_w,
                                        const int n, const int c,
-                                       const int out_h, const int out_w) {
+                                       const int out_h, const int out_w,
+                                       const bool align_corners) {
   auto input_t = EigenTensor<T, 4>::From(input);
   auto output_t = EigenTensor<T, 4>::From(*output);
   for (int k = 0; k < out_h; k++) {  // loop for images
-    int in_k = static_cast<int>(ratio_h * k + 0.5);
+    int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
+                               : static_cast<int>(ratio_h * k);
 
     for (int l = 0; l < out_w; l++) {
-      int in_l = static_cast<int>(ratio_w * l + 0.5);
+      int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
+                                 : static_cast<int>(ratio_w * l);
 
       for (int i = 0; i < n; i++) {    // loop for batches
         for (int j = 0; j < c; j++) {  // loop for channels
@@ -48,20 +51,29 @@ template <typename T>
 static void BilinearInterpolation(const Tensor& input, Tensor* output,
                                   const float ratio_h, const float ratio_w,
                                   const int in_h, const int in_w, const int n,
-                                  const int c, const int out_h,
-                                  const int out_w) {
+                                  const int c, const int out_h, const int out_w,
+                                  const bool align_corners,
+                                  const bool align_mode) {
   auto input_t = EigenTensor<T, 4>::From(input);
   auto output_t = EigenTensor<T, 4>::From(*output);
+  bool align_flag = (align_mode == 0 && !align_corners);
   for (int k = 0; k < out_h; k++) {  // loop for images
-    int y_n = static_cast<int>(ratio_h * k);
+    int y_n = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
+                         : static_cast<int>(ratio_h * k);
+    y_n = (y_n > 0) ? y_n : 0;
     int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
-    float d_n = ratio_h * k - y_n;
+    float d_n =
+        align_flag ? ratio_h * (k + 0.5) - 0.5 - y_n : ratio_h * k - y_n;
     float d_s = 1.f - d_n;
 
     for (int l = 0; l < out_w; l++) {
-      int x_w = static_cast<int>(ratio_w * l);
+      int x_w = (align_mode == 0 && !align_corners)
+                    ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
+                    : static_cast<int>(ratio_w * l);
+      x_w = (x_w > 0) ? x_w : 0;
       int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
-      float d_w = ratio_w * l - x_w;
+      float d_w =
+          align_flag ? ratio_w * (l + 0.5) - 0.5 - x_w : ratio_w * l - x_w;
       float d_e = 1.f - d_w;
 
       for (int i = 0; i < n; i++) {    // loop for batches
@@ -78,19 +90,20 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output,
 }
 
 template <typename T>
-static void NearestNeighborInterpolateGrad(const Tensor& output_grad,
-                                           Tensor* input_grad,
-                                           const float ratio_h,
-                                           const float ratio_w, const int n,
-                                           const int c, const int out_h,
-                                           const int out_w) {
+static void NearestNeighborInterpolateGrad(
+    const Tensor& output_grad, Tensor* input_grad, const float ratio_h,
+    const float ratio_w, const int n, const int c, const int out_h,
+    const int out_w, const bool align_corners) {
   auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
   auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
+
   for (int k = 0; k < out_h; k++) {  // loop for images
-    int in_k = static_cast<int>(ratio_h * k + 0.5);
+    int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
+                               : static_cast<int>(ratio_h * k);
 
     for (int l = 0; l < out_w; l++) {
-      int in_l = static_cast<int>(ratio_w * l + 0.5);
+      int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
+                                 : static_cast<int>(ratio_w * l);
 
       for (int i = 0; i < n; i++) {    // loop for batches
         for (int j = 0; j < c; j++) {  // loop for channels
@@ -106,19 +119,28 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
                                       Tensor* input_grad, const float ratio_h,
                                       const float ratio_w, const int in_h,
                                       const int in_w, const int n, const int c,
-                                      const int out_h, const int out_w) {
+                                      const int out_h, const int out_w,
+                                      const bool align_corners,
+                                      const int align_mode) {
   auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
   auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
+  bool align_flag = (align_mode == 0 && !align_corners);
   for (int k = 0; k < out_h; k++) {  // loop for images
-    int y_n = static_cast<int>(ratio_h * k);
+    int y_n = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
+                         : static_cast<int>(ratio_h * k);
+    y_n = (y_n > 0) ? y_n : 0;
     int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
-    float d_n = ratio_h * k - y_n;
+    float d_n =
+        align_flag ? ratio_h * (k + 0.5) - 0.5 - y_n : ratio_h * k - y_n;
     float d_s = 1.f - d_n;
 
     for (int l = 0; l < out_w; l++) {
-      int x_w = static_cast<int>(ratio_w * l);
+      int x_w = align_flag ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
+                           : static_cast<int>(ratio_w * l);
+      x_w = (x_w > 0) ? x_w : 0;
       int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
-      float d_w = ratio_w * l - x_w;
+      float d_w =
+          align_flag ? ratio_w * (l + 0.5) - 0.5 - x_w : ratio_w * l - x_w;
       float d_e = 1.f - d_w;
 
       for (int i = 0; i < n; i++) {    // loop for batches
@@ -134,7 +156,6 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
     }
   }
 }
-
 template <typename T>
 class InterpolateKernel : public framework::OpKernel<T> {
  public:
@@ -151,6 +172,8 @@ class InterpolateKernel : public framework::OpKernel<T> {
       out_h = out_size_data[0];
       out_w = out_size_data[1];
     }
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");
 
     const int n = input->dims()[0];
     const int c = input->dims()[1];
@@ -168,17 +191,24 @@ class InterpolateKernel : public framework::OpKernel<T> {
       return;
     }
 
-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = 0.f;
+    float ratio_w = 0.f;
+
+    if (out_h > 1) {
+      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
+                                : static_cast<float>(in_h) / out_h;
+    }
+    if (out_w > 1) {
+      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
+                                : static_cast<float>(in_w) / out_w;
+    }
 
     if ("bilinear" == interp_method) {
       BilinearInterpolation<T>(*input, output, ratio_h, ratio_w, in_h, in_w, n,
-                               c, out_h, out_w);
+                               c, out_h, out_w, align_corners, align_mode);
     } else if ("nearest" == interp_method) {
       NearestNeighborInterpolate<T>(*input, output, ratio_h, ratio_w, n, c,
-                                    out_h, out_w);
+                                    out_h, out_w, align_corners);
     }
   }
 };
@@ -200,6 +230,8 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
       out_h = out_size_data[0];
       out_w = out_size_data[1];
     }
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");
 
     const int n = input->dims()[0];
     const int c = input->dims()[1];
@@ -217,17 +249,26 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
       return;
     }
 
-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = 0.f;
+    float ratio_w = 0.f;
+
+    if (out_h > 1) {
+      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
+                                : static_cast<float>(in_h) / out_h;
+    }
+    if (out_w > 1) {
+      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
+                                : static_cast<float>(in_w) / out_w;
+    }
 
     if ("bilinear" == interp_method) {
       BilinearInterpolationGrad<T>(*output_grad, input_grad, ratio_h, ratio_w,
-                                   in_h, in_w, n, c, out_h, out_w);
+                                   in_h, in_w, n, c, out_h, out_w,
+                                   align_corners, align_mode);
     } else if ("nearest" == interp_method) {
       NearestNeighborInterpolateGrad<T>(*output_grad, input_grad, ratio_h,
-                                        ratio_w, n, c, out_h, out_w);
+                                        ratio_w, n, c, out_h, out_w,
+                                        align_corners);
     }
   }
 };
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index beb5e31211..0dbcf442a3 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -932,7 +932,7 @@ def dynamic_gru(input,
             create ParamAttr as param_attr. If the Initializer of the param_attr
             is not set, the parameter is initialized with Xavier. Default: None.
         bias_attr (ParamAttr|bool|None): The parameter attribute for the bias
-            of GRU. Note that the bias with :math:`(1 \\times 3D)` concatenates
+            of GRU.Note that the bias with :math:`(1 \\times 3D)` concatenates
             the bias in the update gate, reset gate and candidate calculations.
             If it is set to False, no bias will be applied to the update gate,
             reset gate and candidate calculations. If it is set to None or one
@@ -1073,7 +1073,7 @@ def gru_unit(input,
             create ParamAttr as param_attr. If the Initializer of the param_attr
             is not set, the parameter is initialized with Xavier. Default: None.
         bias_attr (ParamAttr|bool|None): The parameter attribute for the bias
-            of GRU. Note that the bias with :math:`(1 \\times 3D)` concatenates
+            of GRU.Note that the bias with :math:`(1 \\times 3D)` concatenates
             the bias in the update gate, reset gate and candidate calculations.
             If it is set to False, no bias will be applied to the update gate,
             reset gate and candidate calculations. If it is set to None or one
@@ -5403,7 +5403,7 @@ def transpose(x, perm, name=None):
     Examples:
         .. code-block:: python
 
-            # use append_batch_size=False to avoid prepending extra
+            # use append_batch_size=False to avoid prepending extra
             # batch size in shape
             x = fluid.layers.data(name='x', shape=[5, 10, 15],
                             dtype='float32', append_batch_size=False)
@@ -5920,7 +5920,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
                                 than :attr:`shape`.
         act (str): The non-linear activation to be applied to the reshaped tensor
                    variable.
-        inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
+        inplace(bool): Must use :attr:`False` if :attr:`x` is used in multiple
                        operators. If this flag is set :attr:`True`, reuse input
                        :attr:`x` to reshape, which will change the shape of
                        tensor variable :attr:`x` and might cause errors when
@@ -6581,7 +6581,9 @@ def image_resize(input,
                  scale=None,
                  name=None,
                  resample='BILINEAR',
-                 actual_shape=None):
+                 actual_shape=None,
+                 align_corners=True,
+                 align_mode=1):
     """
     **Resize a Batch of Images**
 
@@ -6594,6 +6596,80 @@ def image_resize(input,
 
         'NEAREST' : Nearest neighbor interpolation
 
+    Nearest neighbor interpolation is to perform nearest neighbor interpolation
+    in both the 3rd dimention(in height direction) and the 4th dimention(in width 
+    direction) on input tensor.
+            
+    Bilinear interpolation is an extension of linear interpolation for 
+    interpolating functions of two variables (e.g. H-direction and 
+    W-direction in this op) on a rectilinear 2D grid. The key idea is 
+    to perform linear interpolation first in one direction, and then 
+    again in the other direction.
+
+    Align_corners and align_mode are optinal parameters,the calculation method 
+    of interpolation can be selected by them.
+
+    Example:
+
+      For scale:
+      
+        if align_corners = True && out_size > 1 :
+
+          scale_factor = (in_size-1.0)/(out_size-1.0)
+        
+        else:
+          
+          scale_factor = float(in_size/out_size)
+        
+      
+      Nearest neighbor interpolation:
+      
+      if:
+          align_corners = False
+
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+
+          H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
+          W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
+
+      else:
+          align_corners = True
+
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+
+          H_out = round(H_{in} * scale_{factor})
+          W_out = round(W_{in} * scale_{factor})
+
+      Bilinear interpolation:
+
+      if:
+          align_corners = False , align_mode = 0
+          
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+          
+          H_out = (H_{in}+0.5) * scale_{factor} - 0.5
+          W_out = (W_{in}+0.5) * scale_{factor} - 0.5
+
+
+      else:
+       
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+
+          H_out = H_{in} * scale_{factor}
+          W_out = W_{in} * scale_{factor}
+
+    For details of nearest neighbor interpolation, please refer to Wikipedia: 
+    https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation.
+
+    For details of bilinear interpolation, please refer to Wikipedia: 
+    https://en.wikipedia.org/wiki/Bilinear_interpolation.
+
+
+
     Args:
         input (Variable): The input tensor of image resize layer,
                           This is a 4-D tensor of the shape
@@ -6623,6 +6699,13 @@ def image_resize(input,
                                 set, otherwise errors would be occured in graph
                                 constructing stage.
                                 Default: None
+        align_corners(bool) :  An optional bool, If True, the centers of the 4 corner pixels of the 
+                               input and output tensors are aligned, preserving the values at the 
+                               corner pixels.
+                               Default: True
+        align_mode(int)  :  An optional for bilinear interpolation. can be \'0\' 
+                            for src_idx = scale*(dst_indx+0.5)-0.5 , can be \'1\' for 
+                            src_idx = scale*dst_index .
 
     Returns:
         Variable: The output is a 4-D tensor of the shape
@@ -6635,6 +6718,8 @@ def image_resize(input,
                     or 'NEAREST' currently.
         ValueError: One of out_shape and scale must not be None.
         ValueError: out_shape length should be 2.
+        TypeError: align_corners shoule be a bool value
+        ValueError: align_mode can only be '0' or '1'
 
     Examples:
         .. code-block:: python
@@ -6650,6 +6735,12 @@ def image_resize(input,
             "The 'resample' of image_resize can only be 'BILINEAR' or 'NEAREST' currently."
         )
     resample_type = resample_methods[resample]
+
+    if not isinstance(align_corners, bool):
+        raise TypeError("Attr align_corners should be a bool value")
+    if align_mode != 0 and align_mode != 1:
+        raise ValueError("align_mode can only be 0 or 1")
+
     if out_shape is None and scale is None:
         raise ValueError("One of out_shape and scale must not be None.")
     helper = LayerHelper('{}_interp'.format(resample_type), **locals())
@@ -6689,9 +6780,13 @@ def image_resize(input,
         type='{}_interp'.format(resample_type),
         inputs=inputs,
         outputs={"Out": out},
-        attrs={"out_h": out_h,
-               "out_w": out_w,
-               "interp_method": resample_type})
+        attrs={
+            "out_h": out_h,
+            "out_w": out_w,
+            "interp_method": resample_type,
+            "align_corners": align_corners,
+            "align_mode": align_mode
+        })
     return out
 
 
@@ -6700,7 +6795,9 @@ def resize_bilinear(input,
                     out_shape=None,
                     scale=None,
                     name=None,
-                    actual_shape=None):
+                    actual_shape=None,
+                    align_corners=True,
+                    align_mode=1):
     """
     Resize input by performing bilinear interpolation based on given
     output shape which specified by actual_shape, out_shape and scale
@@ -6715,6 +6812,47 @@ def resize_bilinear(input,
     For details of bilinear interpolation, please refer to Wikipedia:
     https://en.wikipedia.org/wiki/Bilinear_interpolation
 
+    Align_corners and align_mode are optinal parameters,the calculation 
+    method of interpolation can be selected by them.
+
+
+    Align_corners and align_mode are optinal parameters,the calculation method 
+    of interpolation can be selected by them.
+
+    Example:
+
+      For scale:
+      
+        if align_corners = True && out_size > 1 :
+
+          scale_factor = (in_size-1.0)/(out_size-1.0)
+        
+        else:
+          
+          scale_factor = float(in_size/out_size)     
+
+    Bilinear interpolation:
+
+      if:
+          align_corners = False , align_mode = 0
+          
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+          
+          H_out = (H_{in}+0.5) * scale_{factor} - 0.5
+          W_out = (W_{in}+0.5) * scale_{factor} - 0.5
+
+
+      else:
+
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+
+          H_out = H_{in} * scale_{factor}
+          W_out = W_{in} * scale_{factor}
+
+
+
     Args:
         input(${x_type}): ${x_comment}.
 
@@ -6738,6 +6876,8 @@ def resize_bilinear(input,
                                 set, otherwise errors would be occured in graph
                                 constructing stage.
                                 Default: None
+        align_corners(bool): ${align_corners_comment}
+        align_mode(bool): ${align_mode_comment}
 
     Returns:
         ${out_comment}.
@@ -6748,7 +6888,8 @@ def resize_bilinear(input,
             out = fluid.layers.resize_bilinear(input, out_shape=[12, 12])
     """
 
-    return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape)
+    return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape,
+                        align_corners, align_mode)
 
 
 @templatedoc(op_type="nearest_interp")
@@ -6756,13 +6897,48 @@ def resize_nearest(input,
                    out_shape=None,
                    scale=None,
                    name=None,
-                   actual_shape=None):
+                   actual_shape=None,
+                   align_corners=True):
     """
     Resize input by performing nearest neighbor interpolation in both the
     3rd dimention(in height direction) and the 4th dimention(in width
     direction) based on given output shape which specified by actual_shape,
     out_shape and scale in priority order.
 
+    Example:
+
+      For scale:
+      
+        if align_corners = True && out_size > 1 :
+
+          scale_factor = (in_size-1.0)/(out_size-1.0)
+        
+        else:
+          
+          scale_factor = float(in_size/out_size)
+        
+      
+      Nearest neighbor interpolation:
+      
+      if:
+          align_corners = False
+
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+
+          H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
+          W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
+
+      else:
+          align_corners = True
+
+          input : (N,C,H_in,W_in)
+          output: (N,C,H_out,W_out) where:
+
+          H_out = round(H_{in} * scale_{factor})
+          W_out = round(W_{in} * scale_{factor})
+
+
     For details of nearest neighbor interpolation, please refer to Wikipedia:
     https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
 
@@ -6789,6 +6965,7 @@ def resize_nearest(input,
                                 set, otherwise errors would be occured in graph
                                 constructing stage.
                                 Default: None
+        align_corners(bool): ${align_corners_comment}
 
     Returns:
         ${out_comment}.
@@ -6799,7 +6976,8 @@ def resize_nearest(input,
             out = fluid.layers.resize_nearest(input, out_shape=[12, 12])
     """
 
-    return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape)
+    return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape,
+                        align_corners)
 
 
 def image_resize_short(input, out_short_len, resample='BILINEAR'):
diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
index c8a7063dc1..f60ed1d79a 100644
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
@@ -20,7 +20,13 @@ from op_test import OpTest
 import paddle.fluid.core as core
 
 
-def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
+def bilinear_interp_np(input,
+                       out_h,
+                       out_w,
+                       out_size=None,
+                       actual_shape=None,
+                       align_corners=True,
+                       align_mode=0):
     """bilinear interpolation implement in shape [N, C, H, W]"""
     if out_size is not None:
         out_h = out_size[0]
@@ -29,25 +35,45 @@ def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
         out_h = actual_shape[0]
         out_w = actual_shape[1]
     batch_size, channel, in_h, in_w = input.shape
+
+    ratio_h = ratio_w = 0.0
     if out_h > 1:
-        ratio_h = (in_h - 1.0) / (out_h - 1.0)
-    else:
-        ratio_h = 0.0
+        if (align_corners):
+            ratio_h = (in_h - 1.0) / (out_h - 1.0)
+        else:
+            ratio_h = 1.0 * in_h / out_h
     if out_w > 1:
-        ratio_w = (in_w - 1.0) / (out_w - 1.0)
-    else:
-        ratio_w = 0.0
+        if (align_corners):
+            ratio_w = (in_w - 1.0) / (out_w - 1.0)
+        else:
+            ratio_w = 1.0 * in_w / out_w
 
     out = np.zeros((batch_size, channel, out_h, out_w))
+
     for i in range(out_h):
-        h = int(ratio_h * i)
+        if (align_mode == 0 and not align_corners):
+            h = int(ratio_h * (i + 0.5) - 0.5)
+        else:
+            h = int(ratio_h * i)
+
+        h = max(0, h)
         hid = 1 if h < in_h - 1 else 0
-        h1lambda = ratio_h * i - h
+        if (align_mode == 0 and not align_corners):
+            h1lambda = ratio_h * (i + 0.5) - 0.5 - h
+        else:
+            h1lambda = ratio_h * i - h
         h2lambda = 1.0 - h1lambda
         for j in range(out_w):
-            w = int(ratio_w * j)
+            if (align_mode == 0 and not align_corners):
+                w = int(ratio_w * (j + 0.5) - 0.5)
+            else:
+                w = int(ratio_w * j)
+            w = max(0, w)
             wid = 1 if w < in_w - 1 else 0
-            w1lambda = ratio_w * j - w
+            if (align_mode == 0 and not align_corners):
+                w1lambda = ratio_w * (j + 0.5) - 0.5 - w
+            else:
+                w1lambda = ratio_w * j - w
             w2lambda = 1.0 - w1lambda
 
             out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] +
@@ -66,7 +92,8 @@ class TestBilinearInterpOp(OpTest):
         input_np = np.random.random(self.input_shape).astype("float32")
 
         output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
-                                       self.out_size, self.actual_shape)
+                                       self.out_size, self.actual_shape,
+                                       self.align_corners, self.align_mode)
         self.inputs = {'X': input_np}
         if self.out_size is not None:
             self.inputs['OutSize'] = self.out_size
@@ -75,7 +102,9 @@ class TestBilinearInterpOp(OpTest):
         self.attrs = {
             'out_h': self.out_h,
             'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners,
+            'align_mode': self.align_mode
         }
         self.outputs = {'Out': output_np}
 
@@ -91,6 +120,8 @@ class TestBilinearInterpOp(OpTest):
         self.out_h = 2
         self.out_w = 2
         self.out_size = np.array([3, 3]).astype("int32")
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase1(TestBilinearInterpOp):
@@ -99,6 +130,8 @@ class TestBilinearInterpCase1(TestBilinearInterpOp):
         self.input_shape = [4, 1, 7, 8]
         self.out_h = 1
         self.out_w = 1
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase2(TestBilinearInterpOp):
@@ -107,6 +140,8 @@ class TestBilinearInterpCase2(TestBilinearInterpOp):
         self.input_shape = [3, 3, 9, 6]
         self.out_h = 12
         self.out_w = 12
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase3(TestBilinearInterpOp):
@@ -115,6 +150,8 @@ class TestBilinearInterpCase3(TestBilinearInterpOp):
         self.input_shape = [1, 1, 128, 64]
         self.out_h = 64
         self.out_w = 128
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase4(TestBilinearInterpOp):
@@ -124,6 +161,8 @@ class TestBilinearInterpCase4(TestBilinearInterpOp):
         self.out_h = 1
         self.out_w = 1
         self.out_size = np.array([2, 2]).astype("int32")
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase5(TestBilinearInterpOp):
@@ -133,6 +172,8 @@ class TestBilinearInterpCase5(TestBilinearInterpOp):
         self.out_h = 12
         self.out_w = 12
         self.out_size = np.array([11, 11]).astype("int32")
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase6(TestBilinearInterpOp):
@@ -142,6 +183,8 @@ class TestBilinearInterpCase6(TestBilinearInterpOp):
         self.out_h = 64
         self.out_w = 128
         self.out_size = np.array([65, 129]).astype("int32")
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpActualShape(TestBilinearInterpOp):
@@ -151,6 +194,8 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp):
         self.out_h = 64
         self.out_w = 32
         self.out_size = np.array([66, 40]).astype("int32")
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpOpUint8(OpTest):
@@ -162,14 +207,17 @@ class TestBilinearInterpOpUint8(OpTest):
         input_np = np.random.randint(
             low=0, high=256, size=self.input_shape).astype("uint8")
         output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
-                                       self.out_size, self.actual_shape)
+                                       self.out_size, self.actual_shape,
+                                       self.align_corners, self.align_mode)
         self.inputs = {'X': input_np}
         if self.out_size is not None:
             self.inputs['OutSize'] = self.out_size
         self.attrs = {
             'out_h': self.out_h,
             'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners,
+            'align_mode': self.align_mode
         }
         self.outputs = {'Out': output_np}
 
@@ -181,6 +229,8 @@ class TestBilinearInterpOpUint8(OpTest):
         self.input_shape = [1, 3, 9, 6]
         self.out_h = 10
         self.out_w = 9
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
@@ -189,6 +239,8 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
         self.input_shape = [2, 3, 128, 64]
         self.out_h = 120
         self.out_w = 50
+        self.align_corners = True
+        self.align_mode = 1
 
 
 class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
@@ -198,6 +250,26 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
         self.out_h = 5
         self.out_w = 13
         self.out_size = np.array([6, 15]).astype("int32")
+        self.align_corners = True
+        self.align_mode = 1
+
+
+class TestBilinearInterpOtherMethod1(TestBilinearInterpOp):
+    def set_align_mode(self):
+        self.align_corners = False
+        self.align_mode = 1
+
+
+class TestBilinearInterpWithMethod2(TestBilinearInterpOp):
+    def set_align_mode(self):
+        self.align_corners = False
+        self.align_mode = 0
+
+
+class TestBilinearInterpWithMethod3(TestBilinearInterpOp):
+    def set_align_mode(self):
+        self.align_corners = True
+        self.align_mode = 0
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
index 242709425f..5bb2260ef7 100644
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
@@ -24,7 +24,8 @@ def nearest_neighbor_interp_np(X,
                                out_h,
                                out_w,
                                out_size=None,
-                               actual_shape=None):
+                               actual_shape=None,
+                               align_corners=True):
     """nearest neighbor interpolation implement in shape [N, C, H, W]"""
     if out_size is not None:
         out_h = out_size[0]
@@ -35,17 +36,31 @@ def nearest_neighbor_interp_np(X,
     n, c, in_h, in_w = X.shape
 
     ratio_h = ratio_w = 0.0
-    if out_h > 1:
-        ratio_h = (in_h - 1.0) / (out_h - 1.0)
-    if out_w > 1:
-        ratio_w = (in_w - 1.0) / (out_w - 1.0)
+    if (out_h > 1):
+        if (align_corners):
+            ratio_h = (in_h - 1.0) / (out_h - 1.0)
+        else:
+            ratio_h = 1.0 * in_h / out_h
+    if (out_w > 1):
+        if (align_corners):
+            ratio_w = (in_w - 1.0) / (out_w - 1.0)
+        else:
+            ratio_w = 1.0 * in_w / out_w
 
     out = np.zeros((n, c, out_h, out_w))
-    for i in range(out_h):
-        in_i = int(ratio_h * i + 0.5)
-        for j in range(out_w):
-            in_j = int(ratio_w * j + 0.5)
-            out[:, :, i, j] = X[:, :, in_i, in_j]
+
+    if align_corners:
+        for i in range(out_h):
+            in_i = int(ratio_h * i + 0.5)
+            for j in range(out_w):
+                in_j = int(ratio_w * j + 0.5)
+                out[:, :, i, j] = X[:, :, in_i, in_j]
+    else:
+        for i in range(out_h):
+            in_i = int(ratio_h * i)
+            for j in range(out_w):
+                in_j = int(ratio_w * j)
+                out[:, :, i, j] = X[:, :, in_i, in_j]
 
     return out.astype(X.dtype)
 
@@ -59,7 +74,8 @@ class TestNearestInterpOp(OpTest):
         input_np = np.random.random(self.input_shape).astype("float32")
 
         output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
-                                               self.out_size, self.actual_shape)
+                                               self.out_size, self.actual_shape,
+                                               self.align_corners)
         self.inputs = {'X': input_np}
         if self.out_size is not None:
             self.inputs['OutSize'] = self.out_size
@@ -68,7 +84,8 @@ class TestNearestInterpOp(OpTest):
         self.attrs = {
             'out_h': self.out_h,
             'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners,
         }
         self.outputs = {'Out': output_np}
 
@@ -84,6 +101,7 @@ class TestNearestInterpOp(OpTest):
         self.out_h = 2
         self.out_w = 2
         self.out_size = np.array([3, 3]).astype("int32")
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase1(TestNearestInterpOp):
@@ -92,6 +110,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp):
         self.input_shape = [4, 1, 7, 8]
         self.out_h = 1
         self.out_w = 1
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase2(TestNearestInterpOp):
@@ -100,6 +119,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp):
         self.input_shape = [3, 3, 9, 6]
         self.out_h = 12
         self.out_w = 12
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase3(TestNearestInterpOp):
@@ -108,6 +128,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp):
         self.input_shape = [1, 1, 128, 64]
         self.out_h = 64
         self.out_w = 128
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase4(TestNearestInterpOp):
@@ -117,6 +138,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp):
         self.out_h = 1
         self.out_w = 1
         self.out_size = np.array([2, 2]).astype("int32")
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase5(TestNearestInterpOp):
@@ -126,6 +148,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp):
         self.out_h = 12
         self.out_w = 12
         self.out_size = np.array([11, 11]).astype("int32")
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase6(TestNearestInterpOp):
@@ -135,6 +158,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp):
         self.out_h = 64
         self.out_w = 128
         self.out_size = np.array([65, 129]).astype("int32")
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
@@ -144,6 +168,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
         self.out_h = 64
         self.out_w = 32
         self.out_size = np.array([66, 40]).astype("int32")
+        self.align_corners = True
 
 
 class TestNearestInterpOpUint8(OpTest):
@@ -155,14 +180,16 @@ class TestNearestInterpOpUint8(OpTest):
         input_np = np.random.randint(
             low=0, high=256, size=self.input_shape).astype("uint8")
         output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
-                                               self.out_size, self.actual_shape)
+                                               self.out_size, self.actual_shape,
+                                               self.align_corners)
         self.inputs = {'X': input_np}
         if self.out_size is not None:
             self.inputs['OutSize'] = self.out_size
         self.attrs = {
             'out_h': self.out_h,
             'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners
         }
         self.outputs = {'Out': output_np}
 
@@ -174,6 +201,7 @@ class TestNearestInterpOpUint8(OpTest):
         self.input_shape = [1, 3, 9, 6]
         self.out_h = 10
         self.out_w = 9
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
@@ -182,6 +210,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
         self.input_shape = [2, 3, 128, 64]
         self.out_h = 120
         self.out_w = 50
+        self.align_corners = True
 
 
 class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
@@ -191,6 +220,12 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
         self.out_h = 5
         self.out_w = 13
         self.out_size = np.array([6, 15]).astype("int32")
+        self.align_corners = True
+
+
+class TestNearestInterpWithoutCorners(TestNearestInterpOp):
+    def set_align_corners(self):
+        self.align_corners = False
 
 
 if __name__ == "__main__":

From 4f18a9b87be1a13742bd07f43030659b7404b21f Mon Sep 17 00:00:00 2001
From: jerrywgz <jerrywgz@126.com>
Date: Thu, 31 Jan 2019 02:04:11 +0000
Subject: [PATCH 15/15] test=develop

---
 paddle/fluid/API.spec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 041e5d95eb..f50a38842a 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -324,8 +324,8 @@ paddle.fluid.layers.generate_mask_labels ArgSpec(args=['im_info', 'gt_classes',
 paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0))
 paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None))
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))