add align_corners and align_mode for image_resize

test=develop
7 years ago · 48cc484643
parent e07900d317
commit 48cc484643
6 changed files with 529 additions and 100 deletions
--- a/paddle/fluid/operators/interpolate_op.cc
+++ b/paddle/fluid/operators/interpolate_op.cc
@ -82,6 +82,18 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
                         "bilinear interpolation and \"nearest\" for nearest "
                         "neighbor interpolation.")
        .SetDefault("bilinear");
+    AddAttr<bool>(
+        "align_corners",
+        "an optinal bool. Defaults to True. "
+        "If True, the centers of 4 corner pixels of the input and output "
+        "tensors are aligned, preserving the values at the corner pixels, "
+        "if Flase, are not aligned")
+        .SetDefault(true);
+    AddAttr<int>("align_mode",
+                 "(int, default \'0\'), align_corners mode , can be \'0\' "
+                 "for pytorch calculation method, can be \'1\' for "
+                 "tensorflow calculation method.")
+        .SetDefault(0);
    AddComment(R"DOC(
          This operator samples input X to given output shape by using specified
          interpolation method, the interpolation methods can be \"nearest\"
@ -98,6 +110,67 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
          to perform linear interpolation first in one direction, and then 
          again in the other direction.

+          Align_corners and align_mode are optinal parameters,The calculation method 
+          of interpolation can be selected by them.
+          
+          Example:
+
+          for scale:
+          
+            if align_corners = True and out_{size}>1 :
+
+              scale_{factor} = (in_{size}-1.0)/(out_{size}-1.0)
+            
+            else:
+              
+              scale_{factor} = float(in_{size}/out_{size})
+            
+          
+          Nearest neighbor interpolation:
+          
+          case 1:
+              align_corners = False
+
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+
+              H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
+              W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
+
+          case 2:
+              align_corners = True
+
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+
+              H_out = round(H_{in} * scale_{factor})
+              W_out = round(W_{in} * scale_{factor})
+
+          Bilinear interpolation:
+
+          case 1:
+              align_corners = False , align_mode = 0
+              
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+              
+              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
+              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
+
+
+          case 2:
+              align_corners = False , align_mode = 1
+              or
+              align_corners = True
+           
+              input : (N,C,H_in,W_in)
+              output: (N,C,H_out,W_out) where:
+
+              H_out = H_{in} * scale_{factor}
+              W_out = W_{in} * scale_{factor}
+
+          
+
          For details of nearest neighbor interpolation, please refer to Wikipedia: 
          https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation

--- a/paddle/fluid/operators/interpolate_op.cu
+++ b/paddle/fluid/operators/interpolate_op.cu
@ -23,7 +23,8 @@ __global__ void KeNearestNeighborInterpFw(
    const T* in, const size_t in_img_h, const size_t in_img_w,
    const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
+    const bool align_corners) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
@ -35,10 +36,14 @@ __global__ void KeNearestNeighborInterpFw(
    int channel_id = out_id_w / out_img_size;

    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
+    int in_img_idy = (align_corners)
+                         ? static_cast<int>(ratio_h * out_img_idy + 0.5)
+                         : static_cast<int>(ratio_h * out_img_idy);

    int out_img_idx = tid % out_img_w;
-    int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
+    int in_img_idx = (align_corners)
+                         ? static_cast<int>(ratio_w * out_img_idx + 0.5)
+                         : static_cast<int>(ratio_w * out_img_idx);

    out[tid] = in[out_id_h * input_w + channel_id * in_img_size +
                  in_img_idy * in_img_w + in_img_idx];
@ -50,7 +55,8 @@ __global__ void KeNearestNeighborInterpBw(
    T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
    const size_t input_w, const T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
+    const bool align_corners) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
@ -62,10 +68,14 @@ __global__ void KeNearestNeighborInterpBw(
    int channel_id = out_id_w / out_img_size;

    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
+    int in_img_idy = (align_corners)
+                         ? static_cast<int>(ratio_h * out_img_idy + 0.5)
+                         : static_cast<int>(ratio_h * out_img_idy);

    int out_img_idx = tid % out_img_w;
-    int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
+    int in_img_idx = (align_corners)
+                         ? static_cast<int>(ratio_w * out_img_idx + 0.5)
+                         : static_cast<int>(ratio_w * out_img_idx);

    T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
                    in_img_idy * in_img_w + in_img_idx];
@ -79,7 +89,8 @@ __global__ void KeBilinearInterpFw(
    const T* in, const size_t in_img_h, const size_t in_img_w,
    const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
+    const bool align_corners, const int align_mode) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
@ -91,15 +102,23 @@ __global__ void KeBilinearInterpFw(
    int channel_id = out_id_w / out_img_size;

    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = ratio_h * out_img_idy;
+    int in_img_idy = (align_mode == 0 && !align_corners)
+                         ? static_cast<int>(ratio_h * (out_img_idy + 0.5) - 0.5)
+                         : static_cast<int>(ratio_h * out_img_idy);
    int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
-    T h1lambda = ratio_h * out_img_idy - in_img_idy;
+    T h1lambda = (align_mode == 0 && !align_corners)
+                     ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
+                     : ratio_h * out_img_idy - in_img_idy;
    T h2lambda = 1.f - h1lambda;

    int out_img_idx = tid % out_img_w;
-    int in_img_idx = ratio_w * out_img_idx;
+    int in_img_idx = (align_mode == 0 && !align_corners)
+                         ? static_cast<int>(ratio_w * (out_img_idx + 0.5) - 0.5)
+                         : static_cast<int>(ratio_w * out_img_idx);
    int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
-    T w1lambda = ratio_w * out_img_idx - in_img_idx;
+    T w1lambda = (align_mode == 0 && !align_corners)
+                     ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
+                     : ratio_w * out_img_idx - in_img_idx;
    T w2lambda = 1.f - w1lambda;

    const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
@ -118,7 +137,8 @@ __global__ void KeBilinearInterpBw(
    T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
    const size_t input_w, const T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const T ratio_h, const T ratio_w) {
+    const size_t num_channels, const T ratio_h, const T ratio_w,
+    const bool align_corners, const int align_mode) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
@ -130,15 +150,24 @@ __global__ void KeBilinearInterpBw(
    int channel_id = out_id_w / out_img_size;

    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = ratio_h * out_img_idy;
+    int in_img_idy = (align_mode == 0 && !align_corners)
+                         ? ratio_h * (out_img_idy + 0.5) - 0.5
+                         : ratio_h * out_img_idy;
    int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
-    T h1lambda = ratio_h * out_img_idy - in_img_idy;
+    T h1lambda = (align_mode == 0 && !align_corners)
+                     ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
+                     : ratio_h * out_img_idy - in_img_idy;
+
    T h2lambda = 1.f - h1lambda;

    int out_img_idx = tid % out_img_w;
-    int in_img_idx = ratio_w * out_img_idx;
+    int in_img_idx = (align_mode == 0 && !align_corners)
+                         ? ratio_w * (out_img_idx + 0.5) - 0.5
+                         : ratio_w * out_img_idx;
    int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
-    T w1lambda = ratio_w * out_img_idx - in_img_idx;
+    T w1lambda = (align_mode == 0 && !align_corners)
+                     ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
+                     : ratio_w * out_img_idx - in_img_idx;
    T w2lambda = 1.f - w1lambda;

    T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
@ -175,6 +204,9 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
      out_w = size_data[1];
    }

+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");
+
    int n = input->dims()[0];
    int c = input->dims()[1];
    int in_h = input->dims()[2];
@ -188,10 +220,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
    int in_chw = c * in_hw;
    int out_chw = c * out_hw;

-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = (align_corners && out_h > 1)
+                        ? static_cast<float>(in_h - 1) / (out_h - 1)
+                        : static_cast<float>(in_h) / out_h;
+    float ratio_w = (align_corners && out_w > 1)
+                        ? static_cast<float>(in_w - 1) / (out_w - 1)
+                        : static_cast<float>(in_w) / out_w;

    if (in_h == out_h && in_w == out_w) {
      framework::TensorCopy(*input, ctx.GetPlace(), output);
@ -206,12 +240,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
      KeNearestNeighborInterpFw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
-          out_chw, c, ratio_h, ratio_w);
+          out_chw, c, ratio_h, ratio_w, align_corners);
    } else if ("bilinear" == interp_method) {
      KeBilinearInterpFw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
-          out_chw, c, ratio_h, ratio_w);
+          out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
    }
  }
 };
@ -234,6 +268,10 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
    int out_h = ctx.Attr<int>("out_h");
    int out_w = ctx.Attr<int>("out_w");
    auto out_size = ctx.Input<Tensor>("OutSize");
+
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");
+
    if (out_size != nullptr) {
      Tensor sizes;
      framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes);
@ -252,10 +290,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
    int in_chw = c * in_hw;
    int out_chw = c * out_hw;

-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = (align_corners && out_h > 1)
+                        ? static_cast<float>(in_h - 1) / (out_h - 1)
+                        : static_cast<float>(in_h) / out_h;
+    float ratio_w = (align_corners && out_w > 1)
+                        ? static_cast<float>(in_w - 1) / (out_w - 1)
+                        : static_cast<float>(in_w) / out_w;

    if (in_h == out_h && in_w == out_w) {
      framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
@ -270,12 +310,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
      KeNearestNeighborInterpBw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
-          out_w, n, out_chw, c, ratio_h, ratio_w);
+          out_w, n, out_chw, c, ratio_h, ratio_w, align_corners);
    } else if ("bilinear" == interp_method) {
      KeBilinearInterpBw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
-          out_w, n, out_chw, c, ratio_h, ratio_w);
+          out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
    }
  }
 };
--- a/paddle/fluid/operators/interpolate_op.h
+++ b/paddle/fluid/operators/interpolate_op.h
@ -26,14 +26,17 @@ template <typename T>
 static void NearestNeighborInterpolate(const Tensor& input, Tensor* output,
                                       const float ratio_h, const float ratio_w,
                                       const int n, const int c,
-                                       const int out_h, const int out_w) {
+                                       const int out_h, const int out_w,
+                                       const bool align_corners) {
  auto input_t = EigenTensor<T, 4>::From(input);
  auto output_t = EigenTensor<T, 4>::From(*output);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int in_k = static_cast<int>(ratio_h * k + 0.5);
+    int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
+                               : static_cast<int>(ratio_h * k);

    for (int l = 0; l < out_w; l++) {
-      int in_l = static_cast<int>(ratio_w * l + 0.5);
+      int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
+                                 : static_cast<int>(ratio_w * l);

      for (int i = 0; i < n; i++) {    // loop for batches
        for (int j = 0; j < c; j++) {  // loop for channels
@ -48,20 +51,29 @@ template <typename T>
 static void BilinearInterpolation(const Tensor& input, Tensor* output,
                                  const float ratio_h, const float ratio_w,
                                  const int in_h, const int in_w, const int n,
-                                  const int c, const int out_h,
-                                  const int out_w) {
+                                  const int c, const int out_h, const int out_w,
+                                  const bool align_corners,
+                                  const bool align_mode) {
  auto input_t = EigenTensor<T, 4>::From(input);
  auto output_t = EigenTensor<T, 4>::From(*output);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int y_n = static_cast<int>(ratio_h * k);
+    int y_n = (align_mode == 0 && !align_corners)
+                  ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
+                  : static_cast<int>(ratio_h * k);
    int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
-    float d_n = ratio_h * k - y_n;
+    float d_n = (align_mode == 0 && !align_corners)
+                    ? ratio_h * (k + 0.5) - 0.5 - y_n
+                    : ratio_h * k - y_n;
    float d_s = 1.f - d_n;

    for (int l = 0; l < out_w; l++) {
-      int x_w = static_cast<int>(ratio_w * l);
+      int x_w = (align_mode == 0 && !align_corners)
+                    ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
+                    : static_cast<int>(ratio_w * l);
      int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
-      float d_w = ratio_w * l - x_w;
+      float d_w = (align_mode == 0 && !align_corners)
+                      ? ratio_w * (l + 0.5) - 0.5 - x_w
+                      : ratio_w * l - x_w;
      float d_e = 1.f - d_w;

      for (int i = 0; i < n; i++) {    // loop for batches
@ -78,19 +90,20 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output,
 }

 template <typename T>
-static void NearestNeighborInterpolateGrad(const Tensor& output_grad,
-                                           Tensor* input_grad,
-                                           const float ratio_h,
-                                           const float ratio_w, const int n,
-                                           const int c, const int out_h,
-                                           const int out_w) {
+static void NearestNeighborInterpolateGrad(
+    const Tensor& output_grad, Tensor* input_grad, const float ratio_h,
+    const float ratio_w, const int n, const int c, const int out_h,
+    const int out_w, const bool align_corners) {
  auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
  auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
+
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int in_k = static_cast<int>(ratio_h * k + 0.5);
+    int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
+                               : static_cast<int>(ratio_h * k);

    for (int l = 0; l < out_w; l++) {
-      int in_l = static_cast<int>(ratio_w * l + 0.5);
+      int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
+                                 : static_cast<int>(ratio_w * l);

      for (int i = 0; i < n; i++) {    // loop for batches
        for (int j = 0; j < c; j++) {  // loop for channels
@ -106,19 +119,29 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
                                      Tensor* input_grad, const float ratio_h,
                                      const float ratio_w, const int in_h,
                                      const int in_w, const int n, const int c,
-                                      const int out_h, const int out_w) {
+                                      const int out_h, const int out_w,
+                                      const bool align_corners,
+                                      const int align_mode) {
  auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
  auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int y_n = static_cast<int>(ratio_h * k);
+    int y_n = (align_mode == 0 && !align_corners)
+                  ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
+                  : static_cast<int>(ratio_h * k);
    int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
-    float d_n = ratio_h * k - y_n;
+    float d_n = (align_mode == 0 && !align_corners)
+                    ? ratio_h * (k + 0.5) - 0.5 - y_n
+                    : ratio_h * k - y_n;
    float d_s = 1.f - d_n;

    for (int l = 0; l < out_w; l++) {
-      int x_w = static_cast<int>(ratio_w * l);
+      int x_w = (align_mode == 0 && !align_corners)
+                    ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
+                    : static_cast<int>(ratio_w * l);
      int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
-      float d_w = ratio_w * l - x_w;
+      float d_w = (align_mode == 0 && !align_corners)
+                      ? ratio_w * (l + 0.5) - 0.5 - x_w
+                      : ratio_w * l - x_w;
      float d_e = 1.f - d_w;

      for (int i = 0; i < n; i++) {    // loop for batches
@ -134,7 +157,6 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
    }
  }
 }
-
 template <typename T>
 class InterpolateKernel : public framework::OpKernel<T> {
 public:
@ -151,6 +173,8 @@ class InterpolateKernel : public framework::OpKernel<T> {
      out_h = out_size_data[0];
      out_w = out_size_data[1];
    }
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");

    const int n = input->dims()[0];
    const int c = input->dims()[1];
@ -168,17 +192,19 @@ class InterpolateKernel : public framework::OpKernel<T> {
      return;
    }

-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = (align_corners && out_h > 1)
+                        ? static_cast<float>(in_h - 1) / (out_h - 1)
+                        : static_cast<float>(in_h) / out_h;
+    float ratio_w = (align_corners && out_w > 1)
+                        ? static_cast<float>(in_w - 1) / (out_w - 1)
+                        : static_cast<float>(in_w) / out_w;

    if ("bilinear" == interp_method) {
      BilinearInterpolation<T>(*input, output, ratio_h, ratio_w, in_h, in_w, n,
-                               c, out_h, out_w);
+                               c, out_h, out_w, align_corners, align_mode);
    } else if ("nearest" == interp_method) {
      NearestNeighborInterpolate<T>(*input, output, ratio_h, ratio_w, n, c,
-                                    out_h, out_w);
+                                    out_h, out_w, align_corners);
    }
  }
 };
@ -200,6 +226,8 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
      out_h = out_size_data[0];
      out_w = out_size_data[1];
    }
+    bool align_corners = ctx.Attr<bool>("align_corners");
+    int align_mode = ctx.Attr<int>("align_mode");

    const int n = input->dims()[0];
    const int c = input->dims()[1];
@ -217,17 +245,21 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
      return;
    }

-    float ratio_h =
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
-    float ratio_w =
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    float ratio_h = (align_corners && out_h > 1)
+                        ? static_cast<float>(in_h - 1) / (out_h - 1)
+                        : static_cast<float>(in_h) / out_h;
+    float ratio_w = (align_corners && out_w > 1)
+                        ? static_cast<float>(in_w - 1) / (out_w - 1)
+                        : static_cast<float>(in_w) / out_w;

    if ("bilinear" == interp_method) {
      BilinearInterpolationGrad<T>(*output_grad, input_grad, ratio_h, ratio_w,
-                                   in_h, in_w, n, c, out_h, out_w);
+                                   in_h, in_w, n, c, out_h, out_w,
+                                   align_corners, align_mode);
    } else if ("nearest" == interp_method) {
      NearestNeighborInterpolateGrad<T>(*output_grad, input_grad, ratio_h,
-                                        ratio_w, n, c, out_h, out_w);
+                                        ratio_w, n, c, out_h, out_w,
+                                        align_corners);
    }
  }
 };
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
@ -20,7 +20,13 @@ from op_test import OpTest
 import paddle.fluid.core as core


-def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
+def bilinear_interp_np(input,
+                       out_h,
+                       out_w,
+                       out_size=None,
+                       actual_shape=None,
+                       align_corners=True,
+                       align_mode=0):
    """bilinear interpolation implement in shape [N, C, H, W]"""
    if out_size is not None:
        out_h = out_size[0]
@ -29,25 +35,41 @@ def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
        out_h = actual_shape[0]
        out_w = actual_shape[1]
    batch_size, channel, in_h, in_w = input.shape
-    if out_h > 1:
+
+    ratio_h = ratio_w = 0.0
+    if (align_corners and out_h > 1):
        ratio_h = (in_h - 1.0) / (out_h - 1.0)
    else:
-        ratio_h = 0.0
-    if out_w > 1:
+        ratio_h = 1.0 * in_h / out_h
+    if (align_corners and out_w > 1):
        ratio_w = (in_w - 1.0) / (out_w - 1.0)
    else:
-        ratio_w = 0.0
+        ratio_w = 1.0 * in_w / out_w

    out = np.zeros((batch_size, channel, out_h, out_w))
+
    for i in range(out_h):
-        h = int(ratio_h * i)
+        if (align_mode == 0 and not align_corners):
+            h = int(ratio_h * (i + 0.5) - 0.5)
+        else:
+            h = int(ratio_h * i)
+
        hid = 1 if h < in_h - 1 else 0
-        h1lambda = ratio_h * i - h
+        if (align_mode == 0 and not align_corners):
+            h1lambda = ratio_h * (i + 0.5) - 0.5 - h
+        else:
+            h1lambda = ratio_h * i - h
        h2lambda = 1.0 - h1lambda
        for j in range(out_w):
-            w = int(ratio_w * j)
+            if (align_mode == 0 and not align_corners):
+                w = int(ratio_w * (j + 0.5) - 0.5)
+            else:
+                w = int(ratio_w * j)
            wid = 1 if w < in_w - 1 else 0
-            w1lambda = ratio_w * j - w
+            if (align_mode == 0 and not align_corners):
+                w1lambda = ratio_w * (j + 0.5) - 0.5 - w
+            else:
+                w1lambda = ratio_w * j - w
            w2lambda = 1.0 - w1lambda

            out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] +
@ -66,7 +88,8 @@ class TestBilinearInterpOp(OpTest):
        input_np = np.random.random(self.input_shape).astype("float32")

        output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
-                                       self.out_size, self.actual_shape)
+                                       self.out_size, self.actual_shape,
+                                       self.align_corners, self.align_mode)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
@ -75,7 +98,9 @@ class TestBilinearInterpOp(OpTest):
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners,
+            'align_mode': self.align_mode
        }
        self.outputs = {'Out': output_np}

@ -91,6 +116,8 @@ class TestBilinearInterpOp(OpTest):
        self.out_h = 2
        self.out_w = 2
        self.out_size = np.array([3, 3]).astype("int32")
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase1(TestBilinearInterpOp):
@ -99,6 +126,8 @@ class TestBilinearInterpCase1(TestBilinearInterpOp):
        self.input_shape = [4, 1, 7, 8]
        self.out_h = 1
        self.out_w = 1
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase2(TestBilinearInterpOp):
@ -107,6 +136,8 @@ class TestBilinearInterpCase2(TestBilinearInterpOp):
        self.input_shape = [3, 3, 9, 6]
        self.out_h = 12
        self.out_w = 12
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase3(TestBilinearInterpOp):
@ -115,6 +146,8 @@ class TestBilinearInterpCase3(TestBilinearInterpOp):
        self.input_shape = [1, 1, 128, 64]
        self.out_h = 64
        self.out_w = 128
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase4(TestBilinearInterpOp):
@ -124,6 +157,8 @@ class TestBilinearInterpCase4(TestBilinearInterpOp):
        self.out_h = 1
        self.out_w = 1
        self.out_size = np.array([2, 2]).astype("int32")
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase5(TestBilinearInterpOp):
@ -133,6 +168,8 @@ class TestBilinearInterpCase5(TestBilinearInterpOp):
        self.out_h = 12
        self.out_w = 12
        self.out_size = np.array([11, 11]).astype("int32")
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase6(TestBilinearInterpOp):
@ -142,6 +179,8 @@ class TestBilinearInterpCase6(TestBilinearInterpOp):
        self.out_h = 64
        self.out_w = 128
        self.out_size = np.array([65, 129]).astype("int32")
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpActualShape(TestBilinearInterpOp):
@ -151,6 +190,8 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp):
        self.out_h = 64
        self.out_w = 32
        self.out_size = np.array([66, 40]).astype("int32")
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpOpUint8(OpTest):
@ -162,14 +203,17 @@ class TestBilinearInterpOpUint8(OpTest):
        input_np = np.random.randint(
            low=0, high=256, size=self.input_shape).astype("uint8")
        output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
-                                       self.out_size, self.actual_shape)
+                                       self.out_size, self.actual_shape,
+                                       self.align_corners, self.align_mode)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners,
+            'align_mode': self.align_mode
        }
        self.outputs = {'Out': output_np}

@ -181,6 +225,8 @@ class TestBilinearInterpOpUint8(OpTest):
        self.input_shape = [1, 3, 9, 6]
        self.out_h = 10
        self.out_w = 9
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
@ -189,6 +235,8 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
        self.input_shape = [2, 3, 128, 64]
        self.out_h = 120
        self.out_w = 50
+        self.align_corners = False
+        self.align_mode = 0


 class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
@ -198,6 +246,26 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
        self.out_h = 5
        self.out_w = 13
        self.out_size = np.array([6, 15]).astype("int32")
+        self.align_corners = False
+        self.align_mode = 0
+
+
+class TestBilinearInterpOtherMethod1(TestBilinearInterpOp):
+    def set_align_mode(self):
+        self.align_mode = 1
+        self.align_corners = False
+
+
+class TestBilinearInterpWithMethod2(TestBilinearInterpOp):
+    def set_align_mode(self):
+        self.align_corners = True
+        self.align_mode = 1
+
+
+class TestBilinearInterpWithMethod3(TestBilinearInterpOp):
+    def set_align_mode(self):
+        self.align_corners = True
+        self.align_mode = 0


 if __name__ == "__main__":
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
@ -24,7 +24,8 @@ def nearest_neighbor_interp_np(X,
                               out_h,
                               out_w,
                               out_size=None,
-                               actual_shape=None):
+                               actual_shape=None,
+                               align_corners=True):
    """nearest neighbor interpolation implement in shape [N, C, H, W]"""
    if out_size is not None:
        out_h = out_size[0]
@ -35,17 +36,29 @@ def nearest_neighbor_interp_np(X,
    n, c, in_h, in_w = X.shape

    ratio_h = ratio_w = 0.0
-    if out_h > 1:
+    if (align_corners and out_h > 1):
        ratio_h = (in_h - 1.0) / (out_h - 1.0)
-    if out_w > 1:
+    else:
+        ratio_h = 1.0 * in_h / out_h
+    if (align_corners and out_w > 1):
        ratio_w = (in_w - 1.0) / (out_w - 1.0)
+    else:
+        ratio_w = 1.0 * in_w / out_w

    out = np.zeros((n, c, out_h, out_w))
-    for i in range(out_h):
-        in_i = int(ratio_h * i + 0.5)
-        for j in range(out_w):
-            in_j = int(ratio_w * j + 0.5)
-            out[:, :, i, j] = X[:, :, in_i, in_j]
+
+    if align_corners:
+        for i in range(out_h):
+            in_i = int(ratio_h * i + 0.5)
+            for j in range(out_w):
+                in_j = int(ratio_w * j + 0.5)
+                out[:, :, i, j] = X[:, :, in_i, in_j]
+    else:
+        for i in range(out_h):
+            in_i = int(ratio_h * i)
+            for j in range(out_w):
+                in_j = int(ratio_w * j)
+                out[:, :, i, j] = X[:, :, in_i, in_j]

    return out.astype(X.dtype)

@ -59,7 +72,8 @@ class TestNearestInterpOp(OpTest):
        input_np = np.random.random(self.input_shape).astype("float32")

        output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
-                                               self.out_size, self.actual_shape)
+                                               self.out_size, self.actual_shape,
+                                               self.align_corners)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
@ -68,7 +82,8 @@ class TestNearestInterpOp(OpTest):
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners,
        }
        self.outputs = {'Out': output_np}

@ -84,6 +99,7 @@ class TestNearestInterpOp(OpTest):
        self.out_h = 2
        self.out_w = 2
        self.out_size = np.array([3, 3]).astype("int32")
+        self.align_corners = True


 class TestNearestNeighborInterpCase1(TestNearestInterpOp):
@ -92,6 +108,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp):
        self.input_shape = [4, 1, 7, 8]
        self.out_h = 1
        self.out_w = 1
+        self.align_corners = False


 class TestNearestNeighborInterpCase2(TestNearestInterpOp):
@ -100,6 +117,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp):
        self.input_shape = [3, 3, 9, 6]
        self.out_h = 12
        self.out_w = 12
+        self.align_corners = True


 class TestNearestNeighborInterpCase3(TestNearestInterpOp):
@ -108,6 +126,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp):
        self.input_shape = [1, 1, 128, 64]
        self.out_h = 64
        self.out_w = 128
+        self.align_corners = True


 class TestNearestNeighborInterpCase4(TestNearestInterpOp):
@ -117,6 +136,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp):
        self.out_h = 1
        self.out_w = 1
        self.out_size = np.array([2, 2]).astype("int32")
+        self.align_corners = True


 class TestNearestNeighborInterpCase5(TestNearestInterpOp):
@ -126,6 +146,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp):
        self.out_h = 12
        self.out_w = 12
        self.out_size = np.array([11, 11]).astype("int32")
+        self.align_corners = True


 class TestNearestNeighborInterpCase6(TestNearestInterpOp):
@ -135,6 +156,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp):
        self.out_h = 64
        self.out_w = 128
        self.out_size = np.array([65, 129]).astype("int32")
+        self.align_corners = True


 class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
@ -144,6 +166,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
        self.out_h = 64
        self.out_w = 32
        self.out_size = np.array([66, 40]).astype("int32")
+        self.align_corners = True


 class TestNearestInterpOpUint8(OpTest):
@ -155,14 +178,16 @@ class TestNearestInterpOpUint8(OpTest):
        input_np = np.random.randint(
            low=0, high=256, size=self.input_shape).astype("uint8")
        output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
-                                               self.out_size, self.actual_shape)
+                                               self.out_size, self.actual_shape,
+                                               self.align_corners)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
+            'align_corners': self.align_corners
        }
        self.outputs = {'Out': output_np}

@ -174,6 +199,7 @@ class TestNearestInterpOpUint8(OpTest):
        self.input_shape = [1, 3, 9, 6]
        self.out_h = 10
        self.out_w = 9
+        self.align_corners = True


 class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
@ -182,6 +208,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
        self.input_shape = [2, 3, 128, 64]
        self.out_h = 120
        self.out_w = 50
+        self.align_corners = False


 class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
@ -191,6 +218,12 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
        self.out_h = 5
        self.out_w = 13
        self.out_size = np.array([6, 15]).astype("int32")
+        self.align_corners = True
+
+
+class TestNearestInterpWithoutCorners(TestNearestInterpOp):
+    def set_align_corners(self):
+        self.align_corners = False


 if __name__ == "__main__":