test=develop

7 years ago · e402c0ec7d
parent 334f697da9
commit e402c0ec7d
7 changed files with 551 additions and 107 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -142,10 +142,10 @@ paddle.fluid.layers.label_smooth ArgSpec(args=['label', 'prior_dist', 'epsilon',
 paddle.fluid.layers.roi_pool ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0))
 paddle.fluid.layers.roi_align ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None))
 paddle.fluid.layers.dice_loss ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,))
-paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None))
+paddle.fluid.layers.image_resize ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1))
 paddle.fluid.layers.image_resize_short ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',))
-paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape'], varargs=None, keywords=None, defaults=(None, None, None, None))
+paddle.fluid.layers.resize_bilinear ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1))
-paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape'], varargs=None, keywords=None, defaults=(None, None, None, None))
+paddle.fluid.layers.resize_nearest ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True))
 paddle.fluid.layers.gather ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.sequence_scatter ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,))
--- a/paddle/fluid/operators/interpolate_op.cc
+++ b/paddle/fluid/operators/interpolate_op.cc
@ -82,6 +82,18 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
                         "bilinear interpolation and \"nearest\" for nearest "
                         "neighbor interpolation.")
        .SetDefault("bilinear");
    AddAttr<bool>(
        "align_corners",
        "an optinal bool. Defaults to True. "
        "If True, the centers of 4 corner pixels of the input and output "
        "tensors are aligned, preserving the values at the corner pixels, "
        "if Flase, are not aligned")
        .SetDefault(true);
    AddAttr<int>("align_mode",
                 "(int, default \'1\'), optional for bilinear interpolation"
                 "can be \'0\' for src_idx = scale*(dst_indx+0.5)-0.5 , "
                 "can be \'1\' for src_idx = scale*dst_index .")
        .SetDefault(1);
    AddComment(R"DOC(
          This operator samples input X to given output shape by using specified
          interpolation method, the interpolation methods can be \"nearest\"
@ -98,6 +110,64 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
          to perform linear interpolation first in one direction, and then 
          again in the other direction.
          Align_corners and align_mode are optinal parameters,the calculation method 
          of interpolation can be selected by them.
          Example:
          For scale:
            if align_corners = True and out_{size}>1 :
              scale_{factor} = (in_{size}-1.0)/(out_{size}-1.0)
            else:
              scale_{factor} = float(in_{size}/out_{size})
          Nearest neighbor interpolation:
          if:
              align_corners = False
              input : (N,C,H_in,W_in)
              output: (N,C,H_out,W_out) where:
              H_out = \left \lfloor {H_{in} * scale_{}factor}} \right \rfloor
              W_out = \left \lfloor {W_{in} * scale_{}factor}} \right \rfloor
          else:
              align_corners = True
              input : (N,C,H_in,W_in)
              output: (N,C,H_out,W_out) where:
              H_out = round(H_{in} * scale_{factor})
              W_out = round(W_{in} * scale_{factor})
          Bilinear interpolation:
          if:
              align_corners = False , align_mode = 0
              input : (N,C,H_in,W_in)
              output: (N,C,H_out,W_out) where:
              H_out = (H_{in}+0.5) * scale_{factor} - 0.5
              W_out = (W_{in}+0.5) * scale_{factor} - 0.5
          else:
              input : (N,C,H_in,W_in)
              output: (N,C,H_out,W_out) where:
              H_out = H_{in} * scale_{factor}
              W_out = W_{in} * scale_{factor}
          For details of nearest neighbor interpolation, please refer to Wikipedia: 
          https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
--- a/paddle/fluid/operators/interpolate_op.cu
+++ b/paddle/fluid/operators/interpolate_op.cu
@ -23,7 +23,8 @@ __global__ void KeNearestNeighborInterpFw(
    const T* in, const size_t in_img_h, const size_t in_img_w,
    const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
    const bool align_corners) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
@ -35,10 +36,14 @@ __global__ void KeNearestNeighborInterpFw(
    int channel_id = out_id_w / out_img_size;
    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
+    int in_img_idy = (align_corners)
                         ? static_cast<int>(ratio_h * out_img_idy + 0.5)
                         : static_cast<int>(ratio_h * out_img_idy);
    int out_img_idx = tid % out_img_w;
-    int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
+    int in_img_idx = (align_corners)
                         ? static_cast<int>(ratio_w * out_img_idx + 0.5)
                         : static_cast<int>(ratio_w * out_img_idx);
    out[tid] = in[out_id_h * input_w + channel_id * in_img_size +
                  in_img_idy * in_img_w + in_img_idx];
@ -50,7 +55,8 @@ __global__ void KeNearestNeighborInterpBw(
    T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
    const size_t input_w, const T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
    const bool align_corners) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
@ -62,10 +68,14 @@ __global__ void KeNearestNeighborInterpBw(
    int channel_id = out_id_w / out_img_size;
    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = static_cast<int>(ratio_h * out_img_idy + 0.5);
+    int in_img_idy = (align_corners)
                         ? static_cast<int>(ratio_h * out_img_idy + 0.5)
                         : static_cast<int>(ratio_h * out_img_idy);
    int out_img_idx = tid % out_img_w;
-    int in_img_idx = static_cast<int>(ratio_w * out_img_idx + 0.5);
+    int in_img_idx = (align_corners)
                         ? static_cast<int>(ratio_w * out_img_idx + 0.5)
                         : static_cast<int>(ratio_w * out_img_idx);
    T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
                    in_img_idy * in_img_w + in_img_idx];
@ -79,10 +89,12 @@ __global__ void KeBilinearInterpFw(
    const T* in, const size_t in_img_h, const size_t in_img_w,
    const size_t input_h, const size_t input_w, T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const float ratio_h, const float ratio_w) {
+    const size_t num_channels, const float ratio_h, const float ratio_w,
    const bool align_corners, const int align_mode) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
  bool align_flag = (align_mode == 0 && !align_corners);
  for (; tid < nthreads; tid += stride) {
    int out_id_h = tid / output_w;
    int out_id_w = tid % output_w;
@ -91,15 +103,23 @@ __global__ void KeBilinearInterpFw(
    int channel_id = out_id_w / out_img_size;
    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = ratio_h * out_img_idy;
+    int in_img_idy = align_flag
                         ? static_cast<int>(ratio_h * (out_img_idy + 0.5) - 0.5)
                         : static_cast<int>(ratio_h * out_img_idy);
    in_img_idy = (in_img_idy > 0) ? in_img_idy : 0;
    int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
-    T h1lambda = ratio_h * out_img_idy - in_img_idy;
+    T h1lambda = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
                            : ratio_h * out_img_idy - in_img_idy;
    T h2lambda = 1.f - h1lambda;
    int out_img_idx = tid % out_img_w;
-    int in_img_idx = ratio_w * out_img_idx;
+    int in_img_idx = align_flag
                         ? static_cast<int>(ratio_w * (out_img_idx + 0.5) - 0.5)
                         : static_cast<int>(ratio_w * out_img_idx);
    in_img_idx = (in_img_idx > 0) ? in_img_idx : 0;
    int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
-    T w1lambda = ratio_w * out_img_idx - in_img_idx;
+    T w1lambda = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
                            : ratio_w * out_img_idx - in_img_idx;
    T w2lambda = 1.f - w1lambda;
    const T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
@ -118,10 +138,12 @@ __global__ void KeBilinearInterpBw(
    T* in, const size_t in_img_h, const size_t in_img_w, const size_t input_h,
    const size_t input_w, const T* out, const size_t out_img_h,
    const size_t out_img_w, const size_t output_h, const size_t output_w,
-    const size_t num_channels, const T ratio_h, const T ratio_w) {
+    const size_t num_channels, const T ratio_h, const T ratio_w,
    const bool align_corners, const int align_mode) {
  int nthreads = output_h * output_w;
  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  int stride = blockDim.x * gridDim.x;
  bool align_flag = (align_mode == 0 && !align_corners);
  for (; tid < nthreads; tid += stride) {
    int out_id_h = tid / output_w;
    int out_id_w = tid % output_w;
@ -130,15 +152,22 @@ __global__ void KeBilinearInterpBw(
    int channel_id = out_id_w / out_img_size;
    int out_img_idy = (out_id_w % out_img_size) / out_img_w;
-    int in_img_idy = ratio_h * out_img_idy;
+    int in_img_idy = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5
                                : ratio_h * out_img_idy;
    in_img_idy = (in_img_idy > 0) ? in_img_idy : 0;
    int h_id = (in_img_idy < in_img_h - 1) ? 1 : 0;
-    T h1lambda = ratio_h * out_img_idy - in_img_idy;
+    T h1lambda = align_flag ? ratio_h * (out_img_idy + 0.5) - 0.5 - in_img_idy
                            : ratio_h * out_img_idy - in_img_idy;
    T h2lambda = 1.f - h1lambda;
    int out_img_idx = tid % out_img_w;
-    int in_img_idx = ratio_w * out_img_idx;
+    int in_img_idx = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5
                                : ratio_w * out_img_idx;
    in_img_idx = (in_img_idx > 0) ? in_img_idx : 0;
    int w_id = (in_img_idx < in_img_w - 1) ? 1 : 0;
-    T w1lambda = ratio_w * out_img_idx - in_img_idx;
+    T w1lambda = align_flag ? ratio_w * (out_img_idx + 0.5) - 0.5 - in_img_idx
                            : ratio_w * out_img_idx - in_img_idx;
    T w2lambda = 1.f - w1lambda;
    T* in_pos = &in[out_id_h * input_w + channel_id * in_img_size +
@ -175,6 +204,9 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
      out_w = size_data[1];
    }
    bool align_corners = ctx.Attr<bool>("align_corners");
    int align_mode = ctx.Attr<int>("align_mode");
    int n = input->dims()[0];
    int c = input->dims()[1];
    int in_h = input->dims()[2];
@ -188,10 +220,16 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
    int in_chw = c * in_hw;
    int out_chw = c * out_hw;
-    float ratio_h =
+    float ratio_h = 0.f;
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+    float ratio_w = 0.f;
-    float ratio_w =
+    if (out_h > 1) {
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
                                : static_cast<float>(in_h) / out_h;
    }
    if (out_w > 1) {
      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
                                : static_cast<float>(in_w) / out_w;
    }
    if (in_h == out_h && in_w == out_w) {
      framework::TensorCopy(*input, ctx.GetPlace(), output);
@ -206,12 +244,12 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
      KeNearestNeighborInterpFw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
-          out_chw, c, ratio_h, ratio_w);
+          out_chw, c, ratio_h, ratio_w, align_corners);
    } else if ("bilinear" == interp_method) {
      KeBilinearInterpFw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n,
-          out_chw, c, ratio_h, ratio_w);
+          out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
    }
  }
 };
@ -234,6 +272,10 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
    int out_h = ctx.Attr<int>("out_h");
    int out_w = ctx.Attr<int>("out_w");
    auto out_size = ctx.Input<Tensor>("OutSize");
    bool align_corners = ctx.Attr<bool>("align_corners");
    int align_mode = ctx.Attr<int>("align_mode");
    if (out_size != nullptr) {
      Tensor sizes;
      framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes);
@ -252,10 +294,16 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
    int in_chw = c * in_hw;
    int out_chw = c * out_hw;
-    float ratio_h =
+    float ratio_h = 0.f;
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+    float ratio_w = 0.f;
-    float ratio_w =
+    if (out_h > 1) {
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
                                : static_cast<float>(in_h) / out_h;
    }
    if (out_w > 1) {
      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
                                : static_cast<float>(in_w) / out_w;
    }
    if (in_h == out_h && in_w == out_w) {
      framework::TensorCopy(*output_grad, ctx.GetPlace(), input_grad);
@ -270,12 +318,12 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
      KeNearestNeighborInterpBw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
-          out_w, n, out_chw, c, ratio_h, ratio_w);
+          out_w, n, out_chw, c, ratio_h, ratio_w, align_corners);
    } else if ("bilinear" == interp_method) {
      KeBilinearInterpBw<
          T><<<grid_dim, 512, 0, ctx.cuda_device_context().stream()>>>(
          input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h,
-          out_w, n, out_chw, c, ratio_h, ratio_w);
+          out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode);
    }
  }
 };
--- a/paddle/fluid/operators/interpolate_op.h
+++ b/paddle/fluid/operators/interpolate_op.h
@ -26,14 +26,17 @@ template <typename T>
 static void NearestNeighborInterpolate(const Tensor& input, Tensor* output,
                                       const float ratio_h, const float ratio_w,
                                       const int n, const int c,
-                                       const int out_h, const int out_w) {
+                                       const int out_h, const int out_w,
                                       const bool align_corners) {
  auto input_t = EigenTensor<T, 4>::From(input);
  auto output_t = EigenTensor<T, 4>::From(*output);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int in_k = static_cast<int>(ratio_h * k + 0.5);
+    int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
                               : static_cast<int>(ratio_h * k);
    for (int l = 0; l < out_w; l++) {
-      int in_l = static_cast<int>(ratio_w * l + 0.5);
+      int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
                                 : static_cast<int>(ratio_w * l);
      for (int i = 0; i < n; i++) {    // loop for batches
        for (int j = 0; j < c; j++) {  // loop for channels
@ -48,20 +51,29 @@ template <typename T>
 static void BilinearInterpolation(const Tensor& input, Tensor* output,
                                  const float ratio_h, const float ratio_w,
                                  const int in_h, const int in_w, const int n,
-                                  const int c, const int out_h,
+                                  const int c, const int out_h, const int out_w,
-                                  const int out_w) {
+                                  const bool align_corners,
                                  const bool align_mode) {
  auto input_t = EigenTensor<T, 4>::From(input);
  auto output_t = EigenTensor<T, 4>::From(*output);
  bool align_flag = (align_mode == 0 && !align_corners);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int y_n = static_cast<int>(ratio_h * k);
+    int y_n = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
                         : static_cast<int>(ratio_h * k);
    y_n = (y_n > 0) ? y_n : 0;
    int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
-    float d_n = ratio_h * k - y_n;
+    float d_n =
        align_flag ? ratio_h * (k + 0.5) - 0.5 - y_n : ratio_h * k - y_n;
    float d_s = 1.f - d_n;
    for (int l = 0; l < out_w; l++) {
-      int x_w = static_cast<int>(ratio_w * l);
+      int x_w = (align_mode == 0 && !align_corners)
                    ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
                    : static_cast<int>(ratio_w * l);
      x_w = (x_w > 0) ? x_w : 0;
      int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
-      float d_w = ratio_w * l - x_w;
+      float d_w =
          align_flag ? ratio_w * (l + 0.5) - 0.5 - x_w : ratio_w * l - x_w;
      float d_e = 1.f - d_w;
      for (int i = 0; i < n; i++) {    // loop for batches
@ -78,19 +90,20 @@ static void BilinearInterpolation(const Tensor& input, Tensor* output,
 }
 template <typename T>
-static void NearestNeighborInterpolateGrad(const Tensor& output_grad,
+static void NearestNeighborInterpolateGrad(
-                                           Tensor* input_grad,
+    const Tensor& output_grad, Tensor* input_grad, const float ratio_h,
-                                           const float ratio_h,
+    const float ratio_w, const int n, const int c, const int out_h,
-                                           const float ratio_w, const int n,
+    const int out_w, const bool align_corners) {
                                           const int c, const int out_h,
                                           const int out_w) {
  auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
  auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int in_k = static_cast<int>(ratio_h * k + 0.5);
+    int in_k = (align_corners) ? static_cast<int>(ratio_h * k + 0.5)
                               : static_cast<int>(ratio_h * k);
    for (int l = 0; l < out_w; l++) {
-      int in_l = static_cast<int>(ratio_w * l + 0.5);
+      int in_l = (align_corners) ? static_cast<int>(ratio_w * l + 0.5)
                                 : static_cast<int>(ratio_w * l);
      for (int i = 0; i < n; i++) {    // loop for batches
        for (int j = 0; j < c; j++) {  // loop for channels
@ -106,19 +119,28 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
                                      Tensor* input_grad, const float ratio_h,
                                      const float ratio_w, const int in_h,
                                      const int in_w, const int n, const int c,
-                                      const int out_h, const int out_w) {
+                                      const int out_h, const int out_w,
                                      const bool align_corners,
                                      const int align_mode) {
  auto input_grad_t = EigenTensor<T, 4>::From(*input_grad);
  auto output_grad_t = EigenTensor<T, 4>::From(output_grad);
  bool align_flag = (align_mode == 0 && !align_corners);
  for (int k = 0; k < out_h; k++) {  // loop for images
-    int y_n = static_cast<int>(ratio_h * k);
+    int y_n = align_flag ? static_cast<int>(ratio_h * (k + 0.5) - 0.5)
                         : static_cast<int>(ratio_h * k);
    y_n = (y_n > 0) ? y_n : 0;
    int y_s = (y_n + 1) < (in_h - 1) ? (y_n + 1) : (in_h - 1);
-    float d_n = ratio_h * k - y_n;
+    float d_n =
        align_flag ? ratio_h * (k + 0.5) - 0.5 - y_n : ratio_h * k - y_n;
    float d_s = 1.f - d_n;
    for (int l = 0; l < out_w; l++) {
-      int x_w = static_cast<int>(ratio_w * l);
+      int x_w = align_flag ? static_cast<int>(ratio_w * (l + 0.5) - 0.5)
                           : static_cast<int>(ratio_w * l);
      x_w = (x_w > 0) ? x_w : 0;
      int x_e = (x_w + 1) < (in_w - 1) ? (x_w + 1) : (in_w - 1);
-      float d_w = ratio_w * l - x_w;
+      float d_w =
          align_flag ? ratio_w * (l + 0.5) - 0.5 - x_w : ratio_w * l - x_w;
      float d_e = 1.f - d_w;
      for (int i = 0; i < n; i++) {    // loop for batches
@ -134,7 +156,6 @@ static void BilinearInterpolationGrad(const Tensor& output_grad,
    }
  }
 }
 template <typename T>
 class InterpolateKernel : public framework::OpKernel<T> {
 public:
@ -151,6 +172,8 @@ class InterpolateKernel : public framework::OpKernel<T> {
      out_h = out_size_data[0];
      out_w = out_size_data[1];
    }
    bool align_corners = ctx.Attr<bool>("align_corners");
    int align_mode = ctx.Attr<int>("align_mode");
    const int n = input->dims()[0];
    const int c = input->dims()[1];
@ -168,17 +191,24 @@ class InterpolateKernel : public framework::OpKernel<T> {
      return;
    }
-    float ratio_h =
+    float ratio_h = 0.f;
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+    float ratio_w = 0.f;
-    float ratio_w =
+
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    if (out_h > 1) {
      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
                                : static_cast<float>(in_h) / out_h;
    }
    if (out_w > 1) {
      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
                                : static_cast<float>(in_w) / out_w;
    }
    if ("bilinear" == interp_method) {
      BilinearInterpolation<T>(*input, output, ratio_h, ratio_w, in_h, in_w, n,
-                               c, out_h, out_w);
+                               c, out_h, out_w, align_corners, align_mode);
    } else if ("nearest" == interp_method) {
      NearestNeighborInterpolate<T>(*input, output, ratio_h, ratio_w, n, c,
-                                    out_h, out_w);
+                                    out_h, out_w, align_corners);
    }
  }
 };
@ -200,6 +230,8 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
      out_h = out_size_data[0];
      out_w = out_size_data[1];
    }
    bool align_corners = ctx.Attr<bool>("align_corners");
    int align_mode = ctx.Attr<int>("align_mode");
    const int n = input->dims()[0];
    const int c = input->dims()[1];
@ -217,17 +249,26 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
      return;
    }
-    float ratio_h =
+    float ratio_h = 0.f;
-        (out_h > 1) ? static_cast<float>(in_h - 1) / (out_h - 1) : 0.f;
+    float ratio_w = 0.f;
-    float ratio_w =
+
-        (out_w > 1) ? static_cast<float>(in_w - 1) / (out_w - 1) : 0.f;
+    if (out_h > 1) {
      ratio_h = (align_corners) ? static_cast<float>(in_h - 1) / (out_h - 1)
                                : static_cast<float>(in_h) / out_h;
    }
    if (out_w > 1) {
      ratio_w = (align_corners) ? static_cast<float>(in_w - 1) / (out_w - 1)
                                : static_cast<float>(in_w) / out_w;
    }
    if ("bilinear" == interp_method) {
      BilinearInterpolationGrad<T>(*output_grad, input_grad, ratio_h, ratio_w,
-                                   in_h, in_w, n, c, out_h, out_w);
+                                   in_h, in_w, n, c, out_h, out_w,
                                   align_corners, align_mode);
    } else if ("nearest" == interp_method) {
      NearestNeighborInterpolateGrad<T>(*output_grad, input_grad, ratio_h,
-                                        ratio_w, n, c, out_h, out_w);
+                                        ratio_w, n, c, out_h, out_w,
                                        align_corners);
    }
  }
 };
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
--- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
@ -20,7 +20,13 @@ from op_test import OpTest
 import paddle.fluid.core as core
-def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
+def bilinear_interp_np(input,
                       out_h,
                       out_w,
                       out_size=None,
                       actual_shape=None,
                       align_corners=True,
                       align_mode=0):
    """bilinear interpolation implement in shape [N, C, H, W]"""
    if out_size is not None:
        out_h = out_size[0]
@ -29,25 +35,45 @@ def bilinear_interp_np(input, out_h, out_w, out_size=None, actual_shape=None):
        out_h = actual_shape[0]
        out_w = actual_shape[1]
    batch_size, channel, in_h, in_w = input.shape
    ratio_h = ratio_w = 0.0
    if out_h > 1:
-        ratio_h = (in_h - 1.0) / (out_h - 1.0)
+        if (align_corners):
-    else:
+            ratio_h = (in_h - 1.0) / (out_h - 1.0)
-        ratio_h = 0.0
+        else:
            ratio_h = 1.0 * in_h / out_h
    if out_w > 1:
-        ratio_w = (in_w - 1.0) / (out_w - 1.0)
+        if (align_corners):
-    else:
+            ratio_w = (in_w - 1.0) / (out_w - 1.0)
-        ratio_w = 0.0
+        else:
            ratio_w = 1.0 * in_w / out_w
    out = np.zeros((batch_size, channel, out_h, out_w))
    for i in range(out_h):
-        h = int(ratio_h * i)
+        if (align_mode == 0 and not align_corners):
            h = int(ratio_h * (i + 0.5) - 0.5)
        else:
            h = int(ratio_h * i)
        h = max(0, h)
        hid = 1 if h < in_h - 1 else 0
-        h1lambda = ratio_h * i - h
+        if (align_mode == 0 and not align_corners):
            h1lambda = ratio_h * (i + 0.5) - 0.5 - h
        else:
            h1lambda = ratio_h * i - h
        h2lambda = 1.0 - h1lambda
        for j in range(out_w):
-            w = int(ratio_w * j)
+            if (align_mode == 0 and not align_corners):
                w = int(ratio_w * (j + 0.5) - 0.5)
            else:
                w = int(ratio_w * j)
            w = max(0, w)
            wid = 1 if w < in_w - 1 else 0
-            w1lambda = ratio_w * j - w
+            if (align_mode == 0 and not align_corners):
                w1lambda = ratio_w * (j + 0.5) - 0.5 - w
            else:
                w1lambda = ratio_w * j - w
            w2lambda = 1.0 - w1lambda
            out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] +
@ -66,7 +92,8 @@ class TestBilinearInterpOp(OpTest):
        input_np = np.random.random(self.input_shape).astype("float32")
        output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
-                                       self.out_size, self.actual_shape)
+                                       self.out_size, self.actual_shape,
                                       self.align_corners, self.align_mode)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
@ -75,7 +102,9 @@ class TestBilinearInterpOp(OpTest):
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
            'align_corners': self.align_corners,
            'align_mode': self.align_mode
        }
        self.outputs = {'Out': output_np}
@ -91,6 +120,8 @@ class TestBilinearInterpOp(OpTest):
        self.out_h = 2
        self.out_w = 2
        self.out_size = np.array([3, 3]).astype("int32")
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase1(TestBilinearInterpOp):
@ -99,6 +130,8 @@ class TestBilinearInterpCase1(TestBilinearInterpOp):
        self.input_shape = [4, 1, 7, 8]
        self.out_h = 1
        self.out_w = 1
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase2(TestBilinearInterpOp):
@ -107,6 +140,8 @@ class TestBilinearInterpCase2(TestBilinearInterpOp):
        self.input_shape = [3, 3, 9, 6]
        self.out_h = 12
        self.out_w = 12
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase3(TestBilinearInterpOp):
@ -115,6 +150,8 @@ class TestBilinearInterpCase3(TestBilinearInterpOp):
        self.input_shape = [1, 1, 128, 64]
        self.out_h = 64
        self.out_w = 128
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase4(TestBilinearInterpOp):
@ -124,6 +161,8 @@ class TestBilinearInterpCase4(TestBilinearInterpOp):
        self.out_h = 1
        self.out_w = 1
        self.out_size = np.array([2, 2]).astype("int32")
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase5(TestBilinearInterpOp):
@ -133,6 +172,8 @@ class TestBilinearInterpCase5(TestBilinearInterpOp):
        self.out_h = 12
        self.out_w = 12
        self.out_size = np.array([11, 11]).astype("int32")
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase6(TestBilinearInterpOp):
@ -142,6 +183,8 @@ class TestBilinearInterpCase6(TestBilinearInterpOp):
        self.out_h = 64
        self.out_w = 128
        self.out_size = np.array([65, 129]).astype("int32")
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpActualShape(TestBilinearInterpOp):
@ -151,6 +194,8 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp):
        self.out_h = 64
        self.out_w = 32
        self.out_size = np.array([66, 40]).astype("int32")
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpOpUint8(OpTest):
@ -162,14 +207,17 @@ class TestBilinearInterpOpUint8(OpTest):
        input_np = np.random.randint(
            low=0, high=256, size=self.input_shape).astype("uint8")
        output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
-                                       self.out_size, self.actual_shape)
+                                       self.out_size, self.actual_shape,
                                       self.align_corners, self.align_mode)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
            'align_corners': self.align_corners,
            'align_mode': self.align_mode
        }
        self.outputs = {'Out': output_np}
@ -181,6 +229,8 @@ class TestBilinearInterpOpUint8(OpTest):
        self.input_shape = [1, 3, 9, 6]
        self.out_h = 10
        self.out_w = 9
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
@ -189,6 +239,8 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
        self.input_shape = [2, 3, 128, 64]
        self.out_h = 120
        self.out_w = 50
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
@ -198,6 +250,26 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
        self.out_h = 5
        self.out_w = 13
        self.out_size = np.array([6, 15]).astype("int32")
        self.align_corners = True
        self.align_mode = 1
 class TestBilinearInterpOtherMethod1(TestBilinearInterpOp):
    def set_align_mode(self):
        self.align_corners = False
        self.align_mode = 1
 class TestBilinearInterpWithMethod2(TestBilinearInterpOp):
    def set_align_mode(self):
        self.align_corners = False
        self.align_mode = 0
 class TestBilinearInterpWithMethod3(TestBilinearInterpOp):
    def set_align_mode(self):
        self.align_corners = True
        self.align_mode = 0
 if __name__ == "__main__":
--- a/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nearest_interp_op.py
@ -24,7 +24,8 @@ def nearest_neighbor_interp_np(X,
                               out_h,
                               out_w,
                               out_size=None,
-                               actual_shape=None):
+                               actual_shape=None,
                               align_corners=True):
    """nearest neighbor interpolation implement in shape [N, C, H, W]"""
    if out_size is not None:
        out_h = out_size[0]
@ -35,17 +36,31 @@ def nearest_neighbor_interp_np(X,
    n, c, in_h, in_w = X.shape
    ratio_h = ratio_w = 0.0
-    if out_h > 1:
+    if (out_h > 1):
-        ratio_h = (in_h - 1.0) / (out_h - 1.0)
+        if (align_corners):
-    if out_w > 1:
+            ratio_h = (in_h - 1.0) / (out_h - 1.0)
-        ratio_w = (in_w - 1.0) / (out_w - 1.0)
+        else:
            ratio_h = 1.0 * in_h / out_h
    if (out_w > 1):
        if (align_corners):
            ratio_w = (in_w - 1.0) / (out_w - 1.0)
        else:
            ratio_w = 1.0 * in_w / out_w
    out = np.zeros((n, c, out_h, out_w))
-    for i in range(out_h):
+
-        in_i = int(ratio_h * i + 0.5)
+    if align_corners:
-        for j in range(out_w):
+        for i in range(out_h):
-            in_j = int(ratio_w * j + 0.5)
+            in_i = int(ratio_h * i + 0.5)
-            out[:, :, i, j] = X[:, :, in_i, in_j]
+            for j in range(out_w):
                in_j = int(ratio_w * j + 0.5)
                out[:, :, i, j] = X[:, :, in_i, in_j]
    else:
        for i in range(out_h):
            in_i = int(ratio_h * i)
            for j in range(out_w):
                in_j = int(ratio_w * j)
                out[:, :, i, j] = X[:, :, in_i, in_j]
    return out.astype(X.dtype)
@ -59,7 +74,8 @@ class TestNearestInterpOp(OpTest):
        input_np = np.random.random(self.input_shape).astype("float32")
        output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
-                                               self.out_size, self.actual_shape)
+                                               self.out_size, self.actual_shape,
                                               self.align_corners)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
@ -68,7 +84,8 @@ class TestNearestInterpOp(OpTest):
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
            'align_corners': self.align_corners,
        }
        self.outputs = {'Out': output_np}
@ -84,6 +101,7 @@ class TestNearestInterpOp(OpTest):
        self.out_h = 2
        self.out_w = 2
        self.out_size = np.array([3, 3]).astype("int32")
        self.align_corners = True
 class TestNearestNeighborInterpCase1(TestNearestInterpOp):
@ -92,6 +110,7 @@ class TestNearestNeighborInterpCase1(TestNearestInterpOp):
        self.input_shape = [4, 1, 7, 8]
        self.out_h = 1
        self.out_w = 1
        self.align_corners = True
 class TestNearestNeighborInterpCase2(TestNearestInterpOp):
@ -100,6 +119,7 @@ class TestNearestNeighborInterpCase2(TestNearestInterpOp):
        self.input_shape = [3, 3, 9, 6]
        self.out_h = 12
        self.out_w = 12
        self.align_corners = True
 class TestNearestNeighborInterpCase3(TestNearestInterpOp):
@ -108,6 +128,7 @@ class TestNearestNeighborInterpCase3(TestNearestInterpOp):
        self.input_shape = [1, 1, 128, 64]
        self.out_h = 64
        self.out_w = 128
        self.align_corners = True
 class TestNearestNeighborInterpCase4(TestNearestInterpOp):
@ -117,6 +138,7 @@ class TestNearestNeighborInterpCase4(TestNearestInterpOp):
        self.out_h = 1
        self.out_w = 1
        self.out_size = np.array([2, 2]).astype("int32")
        self.align_corners = True
 class TestNearestNeighborInterpCase5(TestNearestInterpOp):
@ -126,6 +148,7 @@ class TestNearestNeighborInterpCase5(TestNearestInterpOp):
        self.out_h = 12
        self.out_w = 12
        self.out_size = np.array([11, 11]).astype("int32")
        self.align_corners = True
 class TestNearestNeighborInterpCase6(TestNearestInterpOp):
@ -135,6 +158,7 @@ class TestNearestNeighborInterpCase6(TestNearestInterpOp):
        self.out_h = 64
        self.out_w = 128
        self.out_size = np.array([65, 129]).astype("int32")
        self.align_corners = True
 class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
@ -144,6 +168,7 @@ class TestNearestNeighborInterpActualShape(TestNearestInterpOp):
        self.out_h = 64
        self.out_w = 32
        self.out_size = np.array([66, 40]).astype("int32")
        self.align_corners = True
 class TestNearestInterpOpUint8(OpTest):
@ -155,14 +180,16 @@ class TestNearestInterpOpUint8(OpTest):
        input_np = np.random.randint(
            low=0, high=256, size=self.input_shape).astype("uint8")
        output_np = nearest_neighbor_interp_np(input_np, self.out_h, self.out_w,
-                                               self.out_size, self.actual_shape)
+                                               self.out_size, self.actual_shape,
                                               self.align_corners)
        self.inputs = {'X': input_np}
        if self.out_size is not None:
            self.inputs['OutSize'] = self.out_size
        self.attrs = {
            'out_h': self.out_h,
            'out_w': self.out_w,
-            'interp_method': self.interp_method
+            'interp_method': self.interp_method,
            'align_corners': self.align_corners
        }
        self.outputs = {'Out': output_np}
@ -174,6 +201,7 @@ class TestNearestInterpOpUint8(OpTest):
        self.input_shape = [1, 3, 9, 6]
        self.out_h = 10
        self.out_w = 9
        self.align_corners = True
 class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
@ -182,6 +210,7 @@ class TestNearestNeighborInterpCase1Uint8(TestNearestInterpOpUint8):
        self.input_shape = [2, 3, 128, 64]
        self.out_h = 120
        self.out_w = 50
        self.align_corners = True
 class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
@ -191,6 +220,12 @@ class TestNearestNeighborInterpCase2Uint8(TestNearestInterpOpUint8):
        self.out_h = 5
        self.out_w = 13
        self.out_size = np.array([6, 15]).astype("int32")
        self.align_corners = True
 class TestNearestInterpWithoutCorners(TestNearestInterpOp):
    def set_align_corners(self):
        self.align_corners = False
 if __name__ == "__main__":