fix some inappropriate expressions in api doc for grid_sampler. test=develop

7 years ago · ff6329bd5f
parent 593e1b18d7
commit ff6329bd5f
8 changed files with 436 additions and 409 deletions
--- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc
@ -22,107 +22,111 @@ using framework::Tensor;
 using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
 using DataLayout = platform::DataLayout;
 using ScopedSpatialTransformerDescriptor =
-  platform::ScopedSpatialTransformerDescriptor;
+    platform::ScopedSpatialTransformerDescriptor;
 template <typename T>
 using CudnnDataType = platform::CudnnDataType<T>;

 template <typename T>
 class CUDNNGridSampleOpKernel : public framework::OpKernel<T> {
-  public:
-    void Compute(const framework::ExecutionContext& ctx) const override {
-      PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
-                    "It must use CUDAPlace");
-      auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
-      auto handle = dev_ctx.cudnn_handle();
-      auto* input = ctx.Input<Tensor>("X");
-      auto* grid = ctx.Input<Tensor>("Grid");
-      auto* output = ctx.Output<Tensor>("Output");
-
-      int n = input->dims()[0];
-      int c = input->dims()[1];
-      int h = input->dims()[2];
-      int w = input->dims()[3];
-      const int size[4] = {n, c, h, w};
-
-      const T* input_data = input->data<T>();
-      const T* grid_data = grid->data<T>();
-      T* output_data = output->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
-
-      ScopedSpatialTransformerDescriptor st_desc;
-      cudnnSpatialTransformerDescriptor_t cudnn_st_desc = 
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
+                   "It must use CUDAPlace");
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    auto handle = dev_ctx.cudnn_handle();
+    auto* input = ctx.Input<Tensor>("X");
+    auto* grid = ctx.Input<Tensor>("Grid");
+    auto* output = ctx.Output<Tensor>("Output");
+
+    int n = input->dims()[0];
+    int c = input->dims()[1];
+    int h = input->dims()[2];
+    int w = input->dims()[3];
+    const int size[4] = {n, c, h, w};
+
+    const T* input_data = input->data<T>();
+    const T* grid_data = grid->data<T>();
+    T* output_data = output->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
+
+    ScopedSpatialTransformerDescriptor st_desc;
+    cudnnSpatialTransformerDescriptor_t cudnn_st_desc =
        st_desc.descriptor<T>(4, size);

-      ScopedTensorDescriptor input_desc;
-      ScopedTensorDescriptor output_desc;
-      cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
-          DataLayout::kNCHW, framework::vectorize2int(input->dims()));
-      cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
-          DataLayout::kNCHW, framework::vectorize2int(output->dims()));
-
-      CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerForward(
-            handle, cudnn_st_desc, CudnnDataType<T>::kOne(), cudnn_input_desc, input_data,
-            grid_data, CudnnDataType<T>::kZero(), cudnn_output_desc, output_data));
-    }
-
+    ScopedTensorDescriptor input_desc;
+    ScopedTensorDescriptor output_desc;
+    cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
+        DataLayout::kNCHW, framework::vectorize2int(input->dims()));
+    cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
+        DataLayout::kNCHW, framework::vectorize2int(output->dims()));
+
+    CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerForward(
+        handle, cudnn_st_desc, CudnnDataType<T>::kOne(), cudnn_input_desc,
+        input_data, grid_data, CudnnDataType<T>::kZero(), cudnn_output_desc,
+        output_data));
+  }
 };

 template <typename T>
 class CUDNNGridSampleGradOpKernel : public framework::OpKernel<T> {
-  public:
-    void Compute(const framework::ExecutionContext& ctx) const override {
-      PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
-                    "It must use CUDAPlace");
-      auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
-      auto handle = dev_ctx.cudnn_handle();
-      auto* input = ctx.Input<Tensor>("X");
-      auto* grid = ctx.Input<Tensor>("Grid");
-      auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
-      auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
-      auto* grid_grad = ctx.Output<Tensor>(framework::GradVarName("Grid"));
-
-      auto output_grad_dims = output_grad->dims();
-      const int n = output_grad_dims[0];
-      const int c = output_grad_dims[1];
-      const int h = output_grad_dims[2];
-      const int w = output_grad_dims[3];
-      const int size[4] = {n, c, h, w};
-      
-      ScopedSpatialTransformerDescriptor st_dest;
-      cudnnSpatialTransformerDescriptor_t cudnn_st_dest = 
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
+                   "It must use CUDAPlace");
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    auto handle = dev_ctx.cudnn_handle();
+    auto* input = ctx.Input<Tensor>("X");
+    auto* grid = ctx.Input<Tensor>("Grid");
+    auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
+    auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto* grid_grad = ctx.Output<Tensor>(framework::GradVarName("Grid"));
+
+    auto output_grad_dims = output_grad->dims();
+    const int n = output_grad_dims[0];
+    const int c = output_grad_dims[1];
+    const int h = output_grad_dims[2];
+    const int w = output_grad_dims[3];
+    const int size[4] = {n, c, h, w};
+
+    ScopedSpatialTransformerDescriptor st_dest;
+    cudnnSpatialTransformerDescriptor_t cudnn_st_dest =
        st_dest.descriptor<T>(4, size);

-      const T* input_data = input->data<T>();
-      const T* grid_data = grid->data<T>();
-      const T* output_grad_data = output_grad->data<T>();
-      T* input_grad_data = input_grad->mutable_data<T>(output_grad_dims, ctx.GetPlace());
-      T* grid_grad_data = grid_grad->mutable_data<T>({n, h, w, 2}, ctx.GetPlace());
-
-      ScopedTensorDescriptor input_desc;
-      ScopedTensorDescriptor input_grad_desc;
-      ScopedTensorDescriptor output_grad_desc;
-      cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
-          DataLayout::kNCHW, framework::vectorize2int(input->dims()));
-      cudnnTensorDescriptor_t cudnn_input_grad_desc = input_grad_desc.descriptor<T>(
-          DataLayout::kNCHW, framework::vectorize2int(input_grad->dims()));
-      cudnnTensorDescriptor_t cudnn_output_grad_desc = output_grad_desc.descriptor<T>(
-          DataLayout::kNCHW, framework::vectorize2int(output_grad->dims()));
-
-      CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerBackward(
-            handle, cudnn_st_dest, CudnnDataType<T>::kOne(),
-            cudnn_input_desc, input_data, CudnnDataType<T>::kZero(),
-            cudnn_input_grad_desc, input_grad_data, CudnnDataType<T>::kOne(),
-            cudnn_output_grad_desc, output_grad_data, grid_data,
-            CudnnDataType<T>::kZero(), grid_grad_data));
-    }
+    const T* input_data = input->data<T>();
+    const T* grid_data = grid->data<T>();
+    const T* output_grad_data = output_grad->data<T>();
+    T* input_grad_data =
+        input_grad->mutable_data<T>(output_grad_dims, ctx.GetPlace());
+    T* grid_grad_data =
+        grid_grad->mutable_data<T>({n, h, w, 2}, ctx.GetPlace());
+
+    ScopedTensorDescriptor input_desc;
+    ScopedTensorDescriptor input_grad_desc;
+    ScopedTensorDescriptor output_grad_desc;
+    cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
+        DataLayout::kNCHW, framework::vectorize2int(input->dims()));
+    cudnnTensorDescriptor_t cudnn_input_grad_desc =
+        input_grad_desc.descriptor<T>(
+            DataLayout::kNCHW, framework::vectorize2int(input_grad->dims()));
+    cudnnTensorDescriptor_t cudnn_output_grad_desc =
+        output_grad_desc.descriptor<T>(
+            DataLayout::kNCHW, framework::vectorize2int(output_grad->dims()));
+
+    CUDNN_ENFORCE(platform::dynload::cudnnSpatialTfSamplerBackward(
+        handle, cudnn_st_dest, CudnnDataType<T>::kOne(), cudnn_input_desc,
+        input_data, CudnnDataType<T>::kZero(), cudnn_input_grad_desc,
+        input_grad_data, CudnnDataType<T>::kOne(), cudnn_output_grad_desc,
+        output_grad_data, grid_data, CudnnDataType<T>::kZero(),
+        grid_grad_data));
+  }
 };

 }  // namespace operators
 }  // namespace paddle

 namespace plat = paddle::platform;
-REGISTER_OP_KERNEL(grid_sampler, CUDNN,  plat::CUDAPlace,
-                    paddle::operators::CUDNNGridSampleOpKernel<float>,
-                    paddle::operators::CUDNNGridSampleOpKernel<double>);
+REGISTER_OP_KERNEL(grid_sampler, CUDNN, plat::CUDAPlace,
+                   paddle::operators::CUDNNGridSampleOpKernel<float>,
+                   paddle::operators::CUDNNGridSampleOpKernel<double>);
 REGISTER_OP_KERNEL(grid_sampler_grad, CUDNN, plat::CUDAPlace,
-                    paddle::operators::CUDNNGridSampleGradOpKernel<float>,
-                    paddle::operators::CUDNNGridSampleGradOpKernel<double>);
+                   paddle::operators::CUDNNGridSampleGradOpKernel<float>,
+                   paddle::operators::CUDNNGridSampleGradOpKernel<double>);
--- a/paddle/fluid/operators/grid_sampler_op.cc
+++ b/paddle/fluid/operators/grid_sampler_op.cc
@ -24,70 +24,76 @@ namespace operators {
 using Tensor = framework::Tensor;

 class GridSampleOp : public framework::OperatorWithKernel {
-  public:
-    using framework::OperatorWithKernel::OperatorWithKernel;
-    void InferShape(framework::InferShapeContext* ctx) const override {
-      PADDLE_ENFORCE(ctx->HasInput("X"),
-                    "Input(X) of GridSampleOp should not be null.");
-      PADDLE_ENFORCE(ctx->HasInput("Grid"),
-                    "Input(Grid) of GridSampleOp should not be null.");
-      PADDLE_ENFORCE(ctx->HasOutput("Output"),
-                    "Output(Output) of GridSampleOp should not be null.");
-      
-      auto x_dims = ctx->GetInputDim("X");
-      auto grid_dims = ctx->GetInputDim("Grid");
-      PADDLE_ENFORCE(x_dims.size() == 4, "Input(X) of GridSampleOp should be 4-D Tensor.");
-      PADDLE_ENFORCE(grid_dims.size() == 4, "Input(Grid) of GridSampleOp should be 4-D Tensor.");
-      PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2.");
-      PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0], "Input(X) and Input(Grid) dims[0] should be equal.");
-      PADDLE_ENFORCE_EQ(grid_dims[1], x_dims[2], "Input(X) dims[2] and Input(Grid) dims[1] should be equal.");
-      PADDLE_ENFORCE_EQ(grid_dims[2], x_dims[3], "Input(X) dims[3] and Input(Grid) dims[2] should be equal.");
-
-      ctx->SetOutputDim("Output", x_dims);
-      ctx->ShareLoD("X", "Output");
-    }
-  
-  protected:
-    framework::OpKernelType GetExpectedKernelType(
-        const framework::ExecutionContext& ctx) const override {
-      framework::LibraryType library_{framework::LibraryType::kPlain};
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of GridSampleOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Grid"),
+                   "Input(Grid) of GridSampleOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Output"),
+                   "Output(Output) of GridSampleOp should not be null.");
+
+    auto x_dims = ctx->GetInputDim("X");
+    auto grid_dims = ctx->GetInputDim("Grid");
+    PADDLE_ENFORCE(x_dims.size() == 4,
+                   "Input(X) of GridSampleOp should be 4-D Tensor.");
+    PADDLE_ENFORCE(grid_dims.size() == 4,
+                   "Input(Grid) of GridSampleOp should be 4-D Tensor.");
+    PADDLE_ENFORCE(grid_dims[3] == 2, "Input(Grid) dims[3] should be 2.");
+    PADDLE_ENFORCE_EQ(grid_dims[0], x_dims[0],
+                      "Input(X) and Input(Grid) dims[0] should be equal.");
+    PADDLE_ENFORCE_EQ(
+        grid_dims[1], x_dims[2],
+        "Input(X) dims[2] and Input(Grid) dims[1] should be equal.");
+    PADDLE_ENFORCE_EQ(
+        grid_dims[2], x_dims[3],
+        "Input(X) dims[3] and Input(Grid) dims[2] should be equal.");
+
+    ctx->SetOutputDim("Output", x_dims);
+    ctx->ShareLoD("X", "Output");
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    framework::LibraryType library_{framework::LibraryType::kPlain};
 #ifdef PADDLE_WITH_CUDA
-      if (platform::CanCUDNNBeUsed(ctx)) {
-        library_ = framework::LibraryType::kCUDNN;
-      }
-#endif    
-      return framework::OpKernelType(
-          framework::ToDataType(ctx.Input<Tensor>("X")->type()),
-          ctx.GetPlace(), framework::DataLayout::kAnyLayout, library_);
+    if (platform::CanCUDNNBeUsed(ctx)) {
+      library_ = framework::LibraryType::kCUDNN;
    }
+#endif
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
+        framework::DataLayout::kAnyLayout, library_);
+  }
 };

 class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
-  public:
-    void Make() override {
-      AddInput(
-          "X",
-          "(Tensor) The input data of GridSampleOp, "
-          "This is a 4-D tensor with shape of [N, C, H, W]");
-      AddInput(
-          "Grid",
-          "(Tensor) The input grid of GridSampleOp generated by AffineGridOp, "
-          "This is a 4-D tensor with shape of [N, H, W, 2] is the concatenation "
-          "of x and y coordinates with shape [N, H, W] in last dimention");
-      AddOutput(
-          "Output",
-          "(Tensor) Output tensor with shape [N, C, H, W]");
-      AddAttr<bool>(
-          "use_cudnn",
-          "(bool, default true) Only used in cudnn kernel, need install cudnn")
-          .SetDefault(true);
-
-      AddComment(R"DOC(
-      It sample input X by grid gennerate by AffineGridOp. The grid of shape
-      [N, H, W, 2] is the concatenation of (x, y) coordinates with shape 
-      [N, H, W] each, with x indexing the 4th-D(W) of input feature map and y to 
-      indexng the 3rd-D(H), finally results is the bilinear interpolation value
-      of 4 nearest corner points.
+ public:
+  void Make() override {
+    AddInput("X",
+             "(Tensor) The input data of GridSampleOp, "
+             "This is a 4-D tensor with shape of [N, C, H, W]");
+    AddInput(
+        "Grid",
+        "(Tensor) The input grid of GridSampleOp generated by AffineGridOp, "
+        "This is a 4-D tensor with shape of [N, H, W, 2] is the concatenation "
+        "of x and y coordinates with shape [N, H, W] in last dimention");
+    AddOutput("Output", "(Tensor) Output tensor with shape [N, C, H, W]");
+    AddAttr<bool>(
+        "use_cudnn",
+        "(bool, default true) Only used in cudnn kernel, need install cudnn")
+        .SetDefault(true);
+
+    AddComment(R"DOC(
+      This operation samples input X by using bilinear interpolation based on 
+      flow field grid, which is usually gennerated by affine_grid. The grid of
+      shape [N, H, W, 2] is the concatenation of (grid_x, grid_y) coordinates 
+      with shape [N, H, W] each, where grid_x is indexing the 4th dimension 
+      (in width dimension) of input data x and grid_y is indexng the 3rd 
+      dimention (in height dimension), finally results is the bilinear 
+      interpolation value of 4 nearest corner points.

      Step 1:
        Get (x, y) grid coordinates and scale to [0, H-1/W-1].
@ -127,11 +133,11 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
        output = wn * d_e * d_s + en * d_w * d_s
               + ws * d_e * d_n + es * d_w * d_n
        )DOC");
-    }
+  }
 };

 class GridSampleOpGrad : public framework::OperatorWithKernel {
-  public:
+ public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
    auto input_dims = ctx->GetInputDim("X");
@ -144,43 +150,43 @@ class GridSampleOpGrad : public framework::OperatorWithKernel {
    }
  }

-  protected:
-    framework::OpKernelType GetExpectedKernelType(
-        const framework::ExecutionContext& ctx) const override {
-      framework::LibraryType library_{framework::LibraryType::kPlain};
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    framework::LibraryType library_{framework::LibraryType::kPlain};
 #ifdef PADDLE_WITH_CUDA
-      if (platform::CanCUDNNBeUsed(ctx)) {
-        library_ = framework::LibraryType::kCUDNN;
-      }
-#endif    
-      return framework::OpKernelType(
-          framework::ToDataType(ctx.Input<Tensor>("X")->type()),
-          ctx.GetPlace(), framework::DataLayout::kAnyLayout, library_);
+    if (platform::CanCUDNNBeUsed(ctx)) {
+      library_ = framework::LibraryType::kCUDNN;
    }
+#endif
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
+        framework::DataLayout::kAnyLayout, library_);
+  }
 };

 class GridSampleGradMaker : public framework::SingleGradOpDescMaker {
-  public:
-    using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
-
-  protected:
-    std::unique_ptr<framework::OpDesc> Apply() const override {
-      auto* op = new framework::OpDesc();
-      op->SetType("grid_sampler_grad");
-      op->SetInput("X", Input("X"));
-      op->SetInput("Grid", Input("Grid"));
-      op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
-
-      op->SetAttrMap(Attrs());
-
-      op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
-      op->SetOutput(framework::GradVarName("Grid"), InputGrad("Grid"));
-      return std::unique_ptr<framework::OpDesc>(op);
-    }
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* op = new framework::OpDesc();
+    op->SetType("grid_sampler_grad");
+    op->SetInput("X", Input("X"));
+    op->SetInput("Grid", Input("Grid"));
+    op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
+
+    op->SetAttrMap(Attrs());
+
+    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    op->SetOutput(framework::GradVarName("Grid"), InputGrad("Grid"));
+    return std::unique_ptr<framework::OpDesc>(op);
+  }
 };

-} // namespace operators
-} // namespace paddle
+}  // namespace operators
+}  // namespace paddle

 namespace ops = paddle::operators;
 REGISTER_OPERATOR(grid_sampler, ops::GridSampleOp, ops::GridSampleOpMaker,
--- a/paddle/fluid/operators/grid_sampler_op.h
+++ b/paddle/fluid/operators/grid_sampler_op.h
--- a/paddle/fluid/platform/cudnn_helper.h
+++ b/paddle/fluid/platform/cudnn_helper.h
@ -342,7 +342,7 @@ class ScopedPoolingDescriptor {
 };

 class ScopedSpatialTransformerDescriptor {
-  public:
+ public:
  ScopedSpatialTransformerDescriptor() {
    PADDLE_ENFORCE(dynload::cudnnCreateSpatialTransformerDescriptor(&desc_));
  }
@ -354,13 +354,13 @@ class ScopedSpatialTransformerDescriptor {
  inline cudnnSpatialTransformerDescriptor_t descriptor(const int nbDims,
                                                        const int dimA[]) {
    PADDLE_ENFORCE(dynload::cudnnSetSpatialTransformerNdDescriptor(
-          desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
+        desc_, CUDNN_SAMPLER_BILINEAR, CudnnDataType<T>::type, nbDims, dimA));
    return desc_;
  }

-   private:
-    cudnnSpatialTransformerDescriptor_t desc_;
-    DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
+ private:
+  cudnnSpatialTransformerDescriptor_t desc_;
+  DISABLE_COPY_AND_ASSIGN(ScopedSpatialTransformerDescriptor);
 };

 inline bool CanCUDNNBeUsed(const framework::ExecutionContext& ctx) {
--- a/paddle/fluid/platform/dynload/cudnn.h
+++ b/paddle/fluid/platform/dynload/cudnn.h
@ -65,51 +65,51 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
 * include all needed cudnn functions in HPPL
 * different cudnn version has different interfaces
 **/
-#define CUDNN_DNN_ROUTINE_EACH(__macro)             \
-  __macro(cudnnSetTensor4dDescriptor);              \
-  __macro(cudnnSetTensor4dDescriptorEx);            \
-  __macro(cudnnSetTensorNdDescriptor);              \
-  __macro(cudnnGetTensorNdDescriptor);              \
-  __macro(cudnnGetConvolutionNdForwardOutputDim);   \
-  __macro(cudnnGetConvolutionForwardAlgorithm);     \
-  __macro(cudnnCreateTensorDescriptor);             \
-  __macro(cudnnDestroyTensorDescriptor);            \
-  __macro(cudnnCreateFilterDescriptor);             \
-  __macro(cudnnSetFilter4dDescriptor);              \
-  __macro(cudnnSetFilterNdDescriptor);              \
-  __macro(cudnnGetFilterNdDescriptor);              \
-  __macro(cudnnSetPooling2dDescriptor);             \
-  __macro(cudnnSetPoolingNdDescriptor);             \
-  __macro(cudnnGetPoolingNdDescriptor);             \
-  __macro(cudnnDestroyFilterDescriptor);            \
-  __macro(cudnnCreateConvolutionDescriptor);        \
-  __macro(cudnnCreatePoolingDescriptor);            \
-  __macro(cudnnDestroyPoolingDescriptor);           \
-  __macro(cudnnSetConvolution2dDescriptor);         \
-  __macro(cudnnDestroyConvolutionDescriptor);       \
-  __macro(cudnnSetConvolutionNdDescriptor);         \
-  __macro(cudnnGetConvolutionNdDescriptor);         \
-  __macro(cudnnDeriveBNTensorDescriptor);           \
-  __macro(cudnnCreateSpatialTransformerDescriptor); \
-  __macro(cudnnSetSpatialTransformerNdDescriptor);  \
-  __macro(cudnnDestroySpatialTransformerDescriptor);\
-  __macro(cudnnSpatialTfGridGeneratorForward);      \
-  __macro(cudnnSpatialTfGridGeneratorBackward);     \
-  __macro(cudnnSpatialTfSamplerForward);            \
-  __macro(cudnnSpatialTfSamplerBackward);           \
-  __macro(cudnnCreate);                             \
-  __macro(cudnnDestroy);                            \
-  __macro(cudnnSetStream);                          \
-  __macro(cudnnActivationForward);                  \
-  __macro(cudnnConvolutionForward);                 \
-  __macro(cudnnConvolutionBackwardBias);            \
-  __macro(cudnnGetConvolutionForwardWorkspaceSize); \
-  __macro(cudnnTransformTensor);                    \
-  __macro(cudnnPoolingForward);                     \
-  __macro(cudnnPoolingBackward);                    \
-  __macro(cudnnSoftmaxBackward);                    \
-  __macro(cudnnSoftmaxForward);                     \
-  __macro(cudnnGetVersion);                         \
+#define CUDNN_DNN_ROUTINE_EACH(__macro)              \
+  __macro(cudnnSetTensor4dDescriptor);               \
+  __macro(cudnnSetTensor4dDescriptorEx);             \
+  __macro(cudnnSetTensorNdDescriptor);               \
+  __macro(cudnnGetTensorNdDescriptor);               \
+  __macro(cudnnGetConvolutionNdForwardOutputDim);    \
+  __macro(cudnnGetConvolutionForwardAlgorithm);      \
+  __macro(cudnnCreateTensorDescriptor);              \
+  __macro(cudnnDestroyTensorDescriptor);             \
+  __macro(cudnnCreateFilterDescriptor);              \
+  __macro(cudnnSetFilter4dDescriptor);               \
+  __macro(cudnnSetFilterNdDescriptor);               \
+  __macro(cudnnGetFilterNdDescriptor);               \
+  __macro(cudnnSetPooling2dDescriptor);              \
+  __macro(cudnnSetPoolingNdDescriptor);              \
+  __macro(cudnnGetPoolingNdDescriptor);              \
+  __macro(cudnnDestroyFilterDescriptor);             \
+  __macro(cudnnCreateConvolutionDescriptor);         \
+  __macro(cudnnCreatePoolingDescriptor);             \
+  __macro(cudnnDestroyPoolingDescriptor);            \
+  __macro(cudnnSetConvolution2dDescriptor);          \
+  __macro(cudnnDestroyConvolutionDescriptor);        \
+  __macro(cudnnSetConvolutionNdDescriptor);          \
+  __macro(cudnnGetConvolutionNdDescriptor);          \
+  __macro(cudnnDeriveBNTensorDescriptor);            \
+  __macro(cudnnCreateSpatialTransformerDescriptor);  \
+  __macro(cudnnSetSpatialTransformerNdDescriptor);   \
+  __macro(cudnnDestroySpatialTransformerDescriptor); \
+  __macro(cudnnSpatialTfGridGeneratorForward);       \
+  __macro(cudnnSpatialTfGridGeneratorBackward);      \
+  __macro(cudnnSpatialTfSamplerForward);             \
+  __macro(cudnnSpatialTfSamplerBackward);            \
+  __macro(cudnnCreate);                              \
+  __macro(cudnnDestroy);                             \
+  __macro(cudnnSetStream);                           \
+  __macro(cudnnActivationForward);                   \
+  __macro(cudnnConvolutionForward);                  \
+  __macro(cudnnConvolutionBackwardBias);             \
+  __macro(cudnnGetConvolutionForwardWorkspaceSize);  \
+  __macro(cudnnTransformTensor);                     \
+  __macro(cudnnPoolingForward);                      \
+  __macro(cudnnPoolingBackward);                     \
+  __macro(cudnnSoftmaxBackward);                     \
+  __macro(cudnnSoftmaxForward);                      \
+  __macro(cudnnGetVersion);                          \
  __macro(cudnnGetErrorString);
 CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -7586,11 +7586,13 @@ def hash(input, hash_size, num_hash=1, name=None):
@templatedoc()
 def grid_sampler(x, grid, name=None):
    """
-    It sample input X by grid gennerate by AffineGridOp. The grid of shape
-    [N, H, W, 2] is the concatenation of (x, y) coordinates with shape 
-    [N, H, W] each, with x indexing the 4th-D(W) of input feature map and y to 
-    indexng the 3rd-D(H), finally results is the bilinear interpolation value
-    of 4 nearest corner points.
+    This operation samples input X by using bilinear interpolation based on 
+    flow field grid, which is usually gennerated by affine_grid. The grid of
+    shape [N, H, W, 2] is the concatenation of (grid_x, grid_y) coordinates 
+    with shape [N, H, W] each, where grid_x is indexing the 4th dimension 
+    (in width dimension) of input data x and grid_y is indexng the 3rd 
+    dimention (in height dimension), finally results is the bilinear 
+    interpolation value of 4 nearest corner points.

    Step 1:
    Get (x, y) grid coordinates and scale to [0, H-1/W-1].
@ -7636,7 +7638,16 @@ def grid_sampler(x, grid, name=None):
        name (str, default None): The name of this layer.

    Returns:
-        out(Variable): Output data indices by grid from x of shape [N, C, H, W].
+        out(Variable): Output of shape [N, C, H, W] data samples input X 
+        using bilnear interpolation based on input grid.
+
+    Exmples:
+    .. code-block:: python
+
+        x = fluid.layers.data(name='x', shape=[3, 10, 32, 32], dtype='float32')
+        theta = fluid.layers.data(name='theta', shape=[3, 2, 3], dtype='float32')
+        grid = fluid.layers.affine_grid(input=theta, size=[3, 10, 32, 32]})
+        out = fluid.layers.grid_sampler(x=x, grid=grid)
    """
    helper = LayerHelper("grid_sampler", **locals())

@ -7649,10 +7660,6 @@ def grid_sampler(x, grid, name=None):
    out = helper.create_tmp_variable(x.dtype)
    ipts = {'X': x, 'Grid': grid}

-    helper.apppend_op(
-            type='grid_sampler',
-            inputs=ipts,
-            outputs={'Output', out})
+    helper.apppend_op(type='grid_sampler', inputs=ipts, outputs={'Output', out})

    return out
-
--- a/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
+++ b/python/paddle/fluid/tests/unittests/test_grid_sampler_op.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-
 import unittest
 import numpy as np
 from op_test import OpTest
@ -23,11 +22,11 @@ def AffineGrid(theta, size):
    h = size[2]
    w = size[3]
    h_idx = np.repeat(
-            np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis]
+        np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis]
    w_idx = np.repeat(
-            np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis]
+        np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis]
    grid = np.concatenate(
-            [w_idx, h_idx, np.ones([h, w, 1])], axis=2)  # h * w * 3
+        [w_idx, h_idx, np.ones([h, w, 1])], axis=2)  # h * w * 3
    grid = np.repeat(grid[np.newaxis, :], size[0], axis=0)  # n * h * w *3

    ret = np.zeros([n, h * w, 2])
@ -37,6 +36,7 @@ def AffineGrid(theta, size):

    return ret.reshape([n, h, w, 2]).astype("float32")

+
 def getGridPointValue(data, x, y):
    data_shape = data.shape
    N = data_shape[0]
@ -47,13 +47,15 @@ def getGridPointValue(data, x, y):
    for i in range(N):
        for j in range(H):
            for k in range(W):
-                if y[i, j, k] < 0 or y[i, j, k] > H - 1 or x[i, j, k] < 0 or x[i, j, k] > W - 1:
+                if y[i, j, k] < 0 or y[i, j, k] > H - 1 or x[i, j, k] < 0 or x[
+                        i, j, k] > W - 1:
                    out[i, :, j, k] = 0
                else:
                    out[i, :, j, k] = data[i, :, y[i, j, k], x[i, j, k]]

    return out

+
 def GridSampler(data, grid):
    dims = data.shape
    N = dims[0]
@ -71,7 +73,7 @@ def GridSampler(data, grid):

    x0 = np.floor(x).astype('int32')
    x1 = x0 + 1
-    y0 = np.floor(y).astype('int32') 
+    y0 = np.floor(y).astype('int32')
    y1 = y0 + 1

    wa = np.tile(((x1 - x) * (y1 - y)).reshape((N, 1, H, W)), (1, C, 1, 1))
@ -87,6 +89,7 @@ def GridSampler(data, grid):
    out = (wa * va + wb * vb + wc * vc + wd * vd).astype('float32')
    return out

+
 class TestGridSamplerOp(OpTest):
    def setUp(self):
        self.initTestCase()
@ -115,5 +118,6 @@ class TestGridSamplerOp(OpTest):
        self.grid_shape = (2, 7, 3, 2)
        self.theta_shape = (2, 2, 3)

+
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@ -868,13 +868,12 @@ class TestBook(unittest.TestCase):
    def test_affine_grid_gen(self):
        program = Program()
        with program_guard(program):
-            x = layers.data(name='x', shape=[2, 5, 7, 3 ], dtype='float32')
-            grid = layers.data(name='grid', shape=[2, 5, 7, 2], dtype='float32' )
+            x = layers.data(name='x', shape=[2, 5, 7, 3], dtype='float32')
+            grid = layers.data(name='grid', shape=[2, 5, 7, 2], dtype='float32')
            out = layers.grid_sampler(x, grid)
            self.assertIsNotNone(out)
        print(str(program))


-
 if __name__ == '__main__':
    unittest.main()