Paddle/paddle/fluid/operators/pool_cudnn_op.cu.cc

/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/cudnn_helper.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor;
using DataLayout = platform::DataLayout;
using PoolingMode = platform::PoolingMode;
template <typename T>
using ScalingParamType = typename platform::CudnnDataType<T>::ScalingParamType;

template <typename T>
class PoolCUDNNOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
                   "It must use CUDAPlace.");

    const Tensor *input = ctx.Input<Tensor>("X");
    Tensor *output = ctx.Output<Tensor>("Out");

    const T *input_data = input->data<T>();
    T *output_data = output->mutable_data<T>(ctx.GetPlace());

    std::string pooling_type = ctx.Attr<std::string>("pooling_type");
    bool exclusive = ctx.Attr<bool>("exclusive");
    std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
    std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
    if (ctx.Attr<bool>("global_pooling")) {
      for (size_t i = 0; i < ksize.size(); ++i) {
        paddings[i] = 0;
        ksize[i] = static_cast<int>(input->dims()[i + 2]);
      }
    }

    // ------------------- cudnn descriptors ---------------------
    ScopedTensorDescriptor input_desc;
    ScopedTensorDescriptor output_desc;
    ScopedPoolingDescriptor pool_desc;
    DataLayout layout;

    if (strides.size() == 2U) {
      layout = DataLayout::kNCHW;
    } else {
      layout = DataLayout::kNCDHW;
    }

    cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
        layout, framework::vectorize2int(input->dims()));
    cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
        layout, framework::vectorize2int(output->dims()));

    PoolingMode pooling_mode;
    if (pooling_type == "max") {
      pooling_mode = PoolingMode::kMaximum;
    } else {
      pooling_mode = exclusive ? PoolingMode::kAverageExclusive
                               : PoolingMode::kAverageInclusive;
    }

    cudnnPoolingDescriptor_t cudnn_pool_desc =
        pool_desc.descriptor(pooling_mode, ksize, paddings, strides);

    // ------------------- cudnn pool algorithm ---------------------
    auto handle = ctx.cuda_device_context().cudnn_handle();
    ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
    CUDNN_ENFORCE(platform::dynload::cudnnPoolingForward(
        handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,
        cudnn_output_desc, output_data));
  }
};

template <typename T>
class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext &ctx) const override {
    PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
                   "It must use CUDAPlace.");

    const Tensor *input = ctx.Input<Tensor>("X");
    const Tensor *output = ctx.Input<Tensor>("Out");
    const Tensor *output_grad =
        ctx.Input<Tensor>(framework::GradVarName("Out"));
    Tensor *input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));

    std::string pooling_type = ctx.Attr<std::string>("pooling_type");
    bool exclusive = ctx.Attr<bool>("exclusive");
    std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
    std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
    std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");

    if (ctx.Attr<bool>("global_pooling")) {
      for (size_t i = 0; i < ksize.size(); ++i) {
        paddings[i] = 0;
        ksize[i] = static_cast<int>(input->dims()[i + 2]);
      }
    }

    const T *input_data = input->data<T>();
    const T *output_data = output->data<T>();
    const T *output_grad_data = output_grad->data<T>();

    // ------------------- cudnn descriptors ---------------------
    ScopedTensorDescriptor input_desc;
    ScopedTensorDescriptor output_desc;
    ScopedPoolingDescriptor pool_desc;
    DataLayout layout;

    if (strides.size() == 2U) {
      layout = DataLayout::kNCHW;
    } else {
      layout = DataLayout::kNCDHW;
    }

    cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
        layout, framework::vectorize2int(input->dims()));
    cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
        layout, framework::vectorize2int(output->dims()));

    PoolingMode pooling_mode;
    if (pooling_type == "max") {
      if (FLAGS_cudnn_deterministic) {
        pooling_mode = PoolingMode::kMaximumDeterministic;
      } else {
        pooling_mode = PoolingMode::kMaximum;
      }
    } else {
      pooling_mode = exclusive ? PoolingMode::kAverageExclusive
                               : PoolingMode::kAverageInclusive;
    }

    cudnnPoolingDescriptor_t cudnn_pool_desc =
        pool_desc.descriptor(pooling_mode, ksize, paddings, strides);

    // ------------------- cudnn pool algorithm ---------------------
    auto handle = ctx.cuda_device_context().cudnn_handle();
    ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
    if (input_grad) {
      T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
      // Because beta is zero, it is unnecessary to reset input_grad.

      CUDNN_ENFORCE(platform::dynload::cudnnPoolingBackward(
          handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data,
          cudnn_output_desc, output_grad_data, cudnn_input_desc, input_data,
          &beta, cudnn_input_desc, input_grad_data));
    }
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
namespace plat = paddle::platform;

REGISTER_OP_KERNEL(pool2d, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNOpKernel<float>,
                   ops::PoolCUDNNOpKernel<double>,
                   ops::PoolCUDNNOpKernel<plat::float16>);
REGISTER_OP_KERNEL(pool2d_grad, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNGradOpKernel<float>,
                   ops::PoolCUDNNGradOpKernel<double>,
                   ops::PoolCUDNNGradOpKernel<plat::float16>);

REGISTER_OP_KERNEL(pool3d, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNOpKernel<float>,
                   ops::PoolCUDNNOpKernel<double>,
                   ops::PoolCUDNNOpKernel<plat::float16>);
REGISTER_OP_KERNEL(pool3d_grad, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNGradOpKernel<float>,
                   ops::PoolCUDNNGradOpKernel<double>);
Fix the grammar in copyright. (#8403) 7 years ago			`/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.`
Add pool2d cudnn 7 years ago
			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License. */`

Correct #include path 7 years ago			`#include "paddle/fluid/framework/op_registry.h"`
			`#include "paddle/fluid/operators/pool_op.h"`
			`#include "paddle/fluid/platform/cudnn_helper.h"`
Add pool2d cudnn 7 years ago
			`namespace paddle {`
			`namespace operators {`

			`using Tensor = framework::Tensor;`
			`using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;`
			`using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor;`
			`using DataLayout = platform::DataLayout;`
			`using PoolingMode = platform::PoolingMode;`
update 7 years ago			`template <typename T>`
			`using ScalingParamType = typename platform::CudnnDataType<T>::ScalingParamType;`
Add pool2d cudnn 7 years ago
			`template <typename T>`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago			`class PoolCUDNNOpKernel : public framework::OpKernel<T> {`
Add pool2d cudnn 7 years ago			`public:`
			`void Compute(const framework::ExecutionContext &ctx) const override {`
			`PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),`
GPUPlace to CUDAPlace (#6960) 7 years ago			`"It must use CUDAPlace.");`
Add pool2d cudnn 7 years ago
			`const Tensor *input = ctx.Input<Tensor>("X");`
			`Tensor *output = ctx.Output<Tensor>("Out");`

			`const T *input_data = input->data<T>();`
			`T *output_data = output->mutable_data<T>(ctx.GetPlace());`

fix attr name 7 years ago			`std::string pooling_type = ctx.Attr<std::string>("pooling_type");`
add inclusive/exclusive mode in PoolOp avg pool type 6 years ago			`bool exclusive = ctx.Attr<bool>("exclusive");`
Add pool2d cudnn 7 years ago			`std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");`
			`std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");`
			`std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");`
fix attr name 7 years ago			`if (ctx.Attr<bool>("global_pooling")) {`
Add pool2d cudnn 7 years ago			`for (size_t i = 0; i < ksize.size(); ++i) {`
fix bug 7 years ago			`paddings[i] = 0;`
Add pool2d cudnn 7 years ago			`ksize[i] = static_cast<int>(input->dims()[i + 2]);`
			`}`
			`}`

			`// ------------------- cudnn descriptors ---------------------`
			`ScopedTensorDescriptor input_desc;`
			`ScopedTensorDescriptor output_desc;`
			`ScopedPoolingDescriptor pool_desc;`
fix data layout 7 years ago			`DataLayout layout;`

			`if (strides.size() == 2U) {`
			`layout = DataLayout::kNCHW;`
			`} else {`
			`layout = DataLayout::kNCDHW;`
			`}`
Add pool2d cudnn 7 years ago
follow comments 7 years ago			`cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(`
			`layout, framework::vectorize2int(input->dims()));`
			`cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(`
			`layout, framework::vectorize2int(output->dims()));`
Add pool2d cudnn 7 years ago
			`PoolingMode pooling_mode;`
			`if (pooling_type == "max") {`
			`pooling_mode = PoolingMode::kMaximum;`
			`} else {`
move param exclusive to the last in pool2d/pool3d for forward compatibility:. test=develop 6 years ago			`pooling_mode = exclusive ? PoolingMode::kAverageExclusive`
			`: PoolingMode::kAverageInclusive;`
Add pool2d cudnn 7 years ago			`}`

			`cudnnPoolingDescriptor_t cudnn_pool_desc =`
			`pool_desc.descriptor(pooling_mode, ksize, paddings, strides);`

			`// ------------------- cudnn pool algorithm ---------------------`
			`auto handle = ctx.cuda_device_context().cudnn_handle();`
update 7 years ago			`ScalingParamType<T> alpha = 1.0f, beta = 0.0f;`
refine conv cudnn enforce (#12353) * refine conv cudnn enforce * update * update all cudnn ops * fix 7 years ago			`CUDNN_ENFORCE(platform::dynload::cudnnPoolingForward(`
Add pool2d cudnn 7 years ago			`handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,`
			`cudnn_output_desc, output_data));`
			`}`
			`};`

			`template <typename T>`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago			`class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {`
Add pool2d cudnn 7 years ago			`public:`
			`void Compute(const framework::ExecutionContext &ctx) const override {`
			`PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),`
GPUPlace to CUDAPlace (#6960) 7 years ago			`"It must use CUDAPlace.");`
Add pool2d cudnn 7 years ago
			`const Tensor *input = ctx.Input<Tensor>("X");`
			`const Tensor *output = ctx.Input<Tensor>("Out");`
			`const Tensor *output_grad =`
			`ctx.Input<Tensor>(framework::GradVarName("Out"));`
			`Tensor *input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));`

fix attr name 7 years ago			`std::string pooling_type = ctx.Attr<std::string>("pooling_type");`
add inclusive/exclusive mode in PoolOp avg pool type 6 years ago			`bool exclusive = ctx.Attr<bool>("exclusive");`
Add pool2d cudnn 7 years ago			`std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");`
			`std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");`
			`std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");`

fix attr name 7 years ago			`if (ctx.Attr<bool>("global_pooling")) {`
fix bug 7 years ago			`for (size_t i = 0; i < ksize.size(); ++i) {`
			`paddings[i] = 0;`
Add pool2d cudnn 7 years ago			`ksize[i] = static_cast<int>(input->dims()[i + 2]);`
fix bug 7 years ago			`}`
Add pool2d cudnn 7 years ago			`}`

			`const T *input_data = input->data<T>();`
			`const T *output_data = output->data<T>();`
			`const T *output_grad_data = output_grad->data<T>();`

			`// ------------------- cudnn descriptors ---------------------`
			`ScopedTensorDescriptor input_desc;`
			`ScopedTensorDescriptor output_desc;`
			`ScopedPoolingDescriptor pool_desc;`
fix data layout 7 years ago			`DataLayout layout;`

			`if (strides.size() == 2U) {`
			`layout = DataLayout::kNCHW;`
			`} else {`
			`layout = DataLayout::kNCDHW;`
			`}`
Add pool2d cudnn 7 years ago
follow comments 7 years ago			`cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(`
			`layout, framework::vectorize2int(input->dims()));`
			`cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(`
			`layout, framework::vectorize2int(output->dims()));`
Add pool2d cudnn 7 years ago
			`PoolingMode pooling_mode;`
			`if (pooling_type == "max") {`
Feature/deterministic (#11205) * "fix deterministic" * "fix ci" * "fix init" 7 years ago			`if (FLAGS_cudnn_deterministic) {`
			`pooling_mode = PoolingMode::kMaximumDeterministic;`
			`} else {`
			`pooling_mode = PoolingMode::kMaximum;`
			`}`
Add pool2d cudnn 7 years ago			`} else {`
move param exclusive to the last in pool2d/pool3d for forward compatibility:. test=develop 6 years ago			`pooling_mode = exclusive ? PoolingMode::kAverageExclusive`
			`: PoolingMode::kAverageInclusive;`
Add pool2d cudnn 7 years ago			`}`

			`cudnnPoolingDescriptor_t cudnn_pool_desc =`
			`pool_desc.descriptor(pooling_mode, ksize, paddings, strides);`

			`// ------------------- cudnn pool algorithm ---------------------`
			`auto handle = ctx.cuda_device_context().cudnn_handle();`
update 7 years ago			`ScalingParamType<T> alpha = 1.0f, beta = 0.0f;`
Add pool2d cudnn 7 years ago			`if (input_grad) {`
			`T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());`
remove conflict 7 years ago			`// Because beta is zero, it is unnecessary to reset input_grad.`
Add pool2d cudnn 7 years ago
refine conv cudnn enforce (#12353) * refine conv cudnn enforce * update * update all cudnn ops * fix 7 years ago			`CUDNN_ENFORCE(platform::dynload::cudnnPoolingBackward(`
Add pool2d cudnn 7 years ago			`handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data,`
fix clear zero method and remove useless code 7 years ago			`cudnn_output_desc, output_grad_data, cudnn_input_desc, input_data,`
			`&beta, cudnn_input_desc, input_grad_data));`
Add pool2d cudnn 7 years ago			`}`
			`}`
			`};`

			`} // namespace operators`
			`} // namespace paddle`

			`namespace ops = paddle::operators;`
add fp16 pool2d support 7 years ago			`namespace plat = paddle::platform;`
Add pool2d cudnn 7 years ago
add fp16 pool2d support 7 years ago			`REGISTER_OP_KERNEL(pool2d, CUDNN, plat::CUDAPlace,`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago			`ops::PoolCUDNNOpKernel<float>,`
add fp16 pool2d support 7 years ago			`ops::PoolCUDNNOpKernel<double>,`
			`ops::PoolCUDNNOpKernel<plat::float16>);`
			`REGISTER_OP_KERNEL(pool2d_grad, CUDNN, plat::CUDAPlace,`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago			`ops::PoolCUDNNGradOpKernel<float>,`
Add fp16 backward support (#14202) * add fp16 backward support test=develop * add sum_op fp16 test * disable test_dist_save_load test=develop * add check_grad for sum * add unit test for softmax_grad fp16 test=develop * add scale_op unit test * add mul_grad_op unit test for fp16 * add cross_entropy_grad and eman_grad unit test for fp16 test=develop * fix cross_entropy unit test * add pool2d fp16 unit test * refine conv2d fp16 unit test test=develop * refine activation unit test test=develop * fix ci test=develop * follow zhihong's comment, copy from https://github.com/PaddlePaddle/Paddle/pull/12796 test=develop 6 years ago			`ops::PoolCUDNNGradOpKernel<double>,`
			`ops::PoolCUDNNGradOpKernel<plat::float16>);`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago
add fp16 pool2d support 7 years ago			`REGISTER_OP_KERNEL(pool3d, CUDNN, plat::CUDAPlace,`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago			`ops::PoolCUDNNOpKernel<float>,`
add float16 support to pool3d 7 years ago			`ops::PoolCUDNNOpKernel<double>,`
			`ops::PoolCUDNNOpKernel<plat::float16>);`
add fp16 pool2d support 7 years ago			`REGISTER_OP_KERNEL(pool3d_grad, CUDNN, plat::CUDAPlace,`
"cudnn operators change to cudnn kernel" (#6660) * "unified operators" * "add CUDNN register" * "add use cudnn attribute" * "add attribute" * "test conv tranpose op" * "remove duplicated attr" * "fix op test" * "add attribute to set cudnn" * "add more log" * "need layout op register support" * "add more log" * "change GetExpectedKernelType " * "fix Get attr in conv_op" * "fix CI" * "fix tests" * "removed kernel priority fallback" * "fix CI" * "fix stack pointer bug" * "refine buggy interface" * "add const cast to save life" * "fix get_output_with_grad" * "fix op test with dataformat" * ""fix pooling * "fix pooling test" * "fix CI" * "fix with_gpu error" * "add transform needed functional check" * "fix unpack list error" * "comment out parallel.do temporary" * "fix CI" * "fix compile doc error" * "make threshold larger" 7 years ago			`ops::PoolCUDNNGradOpKernel<float>,`
Revert ""cherry picked operators changes" (#12184)" (#12747) This reverts commit bf3c34960f2a59a2616957f8fb4107b2ac7aa02b. 7 years ago			`ops::PoolCUDNNGradOpKernel<double>);`