Merge pull request #9167 from kexinzhao/pool2d_fp16

Add float16 support for pool 2d operator
7 years ago · df99b16a16
parent 3f5705c346 dfec1df14f
commit df99b16a16
6 changed files with 152 additions and 102 deletions
--- a/paddle/fluid/operators/conv_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc
@ -28,6 +28,8 @@ using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
 using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
 using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor;
 using DataLayout = platform::DataLayout;
+template <typename T>
+using ScalingParamType = typename platform::CudnnDataType<T>::ScalingParamType;

 static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
    static_cast<size_t>(1024) * 1024 * 1024;
@ -134,8 +136,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
    platform::CUDAPlace gpu = boost::get<platform::CUDAPlace>(ctx.GetPlace());
    cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
    // ------------------- cudnn conv forward ---------------------
-    typename platform::CudnnDataType<T>::ScalingParamType alpha = 1.0f,
-                                                          beta = 0.0f;
+    ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
    for (int i = 0; i < groups; i++) {
      PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
          handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
@ -282,8 +283,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
    platform::CUDAPlace gpu = boost::get<platform::CUDAPlace>(ctx.GetPlace());
    cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
    // ------------------- cudnn conv backward data ---------------------
-    typename platform::CudnnDataType<T>::ScalingParamType alpha = 1.0f,
-                                                          beta = 0.0f;
+    ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
    if (input_grad) {
      T* input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
      // Because beta is zero, it is unnecessary to reset input_grad.
--- a/paddle/fluid/operators/pool_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/pool_cudnn_op.cu.cc
@ -24,6 +24,8 @@ using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
 using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor;
 using DataLayout = platform::DataLayout;
 using PoolingMode = platform::PoolingMode;
+template <typename T>
+using ScalingParamType = typename platform::CudnnDataType<T>::ScalingParamType;

 template <typename T>
 class PoolCUDNNOpKernel : public framework::OpKernel<T> {
@ -78,8 +80,7 @@ class PoolCUDNNOpKernel : public framework::OpKernel<T> {

    // ------------------- cudnn pool algorithm ---------------------
    auto handle = ctx.cuda_device_context().cudnn_handle();
-    T alpha = 1.0f, beta = 0.0f;
-
+    ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
    PADDLE_ENFORCE(platform::dynload::cudnnPoolingForward(
        handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,
        cudnn_output_desc, output_data));
@ -144,8 +145,7 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {

    // ------------------- cudnn pool algorithm ---------------------
    auto handle = ctx.cuda_device_context().cudnn_handle();
-    T alpha = 1.0f, beta = 0.0f;
-
+    ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
    if (input_grad) {
      T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
      // Because beta is zero, it is unnecessary to reset input_grad.
@ -162,17 +162,19 @@ class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
 }  // namespace paddle

 namespace ops = paddle::operators;
+namespace plat = paddle::platform;

-REGISTER_OP_KERNEL(pool2d, CUDNN, ::paddle::platform::CUDAPlace,
+REGISTER_OP_KERNEL(pool2d, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNOpKernel<float>,
-                   ops::PoolCUDNNOpKernel<double>);
-REGISTER_OP_KERNEL(pool2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNOpKernel<double>,
+                   ops::PoolCUDNNOpKernel<plat::float16>);
+REGISTER_OP_KERNEL(pool2d_grad, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNGradOpKernel<float>,
                   ops::PoolCUDNNGradOpKernel<double>);

-REGISTER_OP_KERNEL(pool3d, CUDNN, ::paddle::platform::CUDAPlace,
+REGISTER_OP_KERNEL(pool3d, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNOpKernel<float>,
                   ops::PoolCUDNNOpKernel<double>);
-REGISTER_OP_KERNEL(pool3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+REGISTER_OP_KERNEL(pool3d_grad, CUDNN, plat::CUDAPlace,
                   ops::PoolCUDNNGradOpKernel<float>,
                   ops::PoolCUDNNGradOpKernel<double>);
--- a/paddle/fluid/operators/pool_op.cc
+++ b/paddle/fluid/operators/pool_op.cc
@ -124,11 +124,15 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
  }
 #endif

+  auto input_data_type = framework::ToDataType(ctx.Input<Tensor>("X")->type());
+  if (input_data_type == framework::proto::VarType::FP16) {
+    PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN,
+                      "float16 can only be used when CUDNN is used");
+  }
  std::string data_format = ctx.Attr<std::string>("data_format");
  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
-  return framework::OpKernelType(
-      framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
-      layout_, library_);
+  return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_,
+                                 library_);
 }

 Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@ -483,9 +483,9 @@ class OpTest(unittest.TestCase):
            input: input numpy array

        Returns:
-            input: if the dtype of input is np.float16, its dtype will be
-                changed to np.uint16 so that the internal memory will be 
-                reinterpreted input as of dtype np.uint16. 
+            input: The dtype of input will be changed to np.uint16 if 
+                it is originally np.float16, such that the internal memory
+                of input will be reinterpreted as of dtype np.uint16. 
        """
        if input.dtype == np.float16:
            input.dtype = np.uint16
--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@ -63,12 +63,13 @@ def conv2d_forward_naive(input, filter, group, conv_param):

 class TestConv2dOp(OpTest):
    def setUp(self):
+        self.op_type = "conv2d"
        self.use_cudnn = False
        self.use_mkldnn = False
-        self.init_op_type()
+        self.dtype = np.float32
+        self.init_kernel_type()
        self.init_group()
        self.init_dilation()
-        self.init_data_type()
        self.init_test_case()

        conv2d_param = {
@ -159,17 +160,14 @@ class TestConv2dOp(OpTest):
        f_c = self.input_size[1] / self.groups
        self.filter_size = [6, f_c, 3, 3]

-    def init_data_type(self):
-        self.dtype = np.float32
-
    def init_dilation(self):
        self.dilations = [1, 1]

    def init_group(self):
        self.groups = 1

-    def init_op_type(self):
-        self.op_type = "conv2d"
+    def init_kernel_type(self):
+        pass


 class TestWithPad(TestConv2dOp):
@ -241,13 +239,13 @@ class TestWithInput1x1Filter1x1(TestConv2dOp):

 #----------------Conv2dCUDNN----------------
 class TestCUDNN(TestConv2dOp):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv2d"


-class TestFP16CUDNN(TestCUDNN):
-    def init_data_type(self):
+class TestFP16CUDNN(TestConv2dOp):
+    def init_kernel_type(self):
+        self.use_cudnn = True
        self.dtype = np.float16

    def test_check_output(self):
@ -258,13 +256,13 @@ class TestFP16CUDNN(TestCUDNN):


 class TestCUDNNWithPad(TestWithPad):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv2d"


-class TestFP16CUDNNWithPad(TestCUDNNWithPad):
-    def init_data_type(self):
+class TestFP16CUDNNWithPad(TestWithPad):
+    def init_kernel_type(self):
+        self.use_cudnn = True
        self.dtype = np.float16

    def test_check_output(self):
@ -275,13 +273,13 @@ class TestFP16CUDNNWithPad(TestCUDNNWithPad):


 class TestCUDNNWithStride(TestWithStride):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv2d"


-class TestFP16CUDNNWithStride(TestCUDNNWithStride):
-    def init_data_type(self):
+class TestFP16CUDNNWithStride(TestWithStride):
+    def init_kernel_type(self):
+        self.use_cudnn = True
        self.dtype = np.float16

    def test_check_output(self):
@ -292,13 +290,13 @@ class TestFP16CUDNNWithStride(TestCUDNNWithStride):


 class TestCUDNNWithGroup(TestWithGroup):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv2d"


-class TestFP16CUDNNWithGroup(TestCUDNNWithGroup):
-    def init_data_type(self):
+class TestFP16CUDNNWithGroup(TestWithGroup):
+    def init_kernel_type(self):
+        self.use_cudnn = True
        self.dtype = np.float16

    def test_check_output(self):
@ -309,13 +307,13 @@ class TestFP16CUDNNWithGroup(TestCUDNNWithGroup):


 class TestCUDNNWith1x1(TestWith1x1):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv2d"


-class TestFP16CUDNNWith1x1(TestCUDNNWith1x1):
-    def init_data_type(self):
+class TestFP16CUDNNWith1x1(TestWith1x1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
        self.dtype = np.float16

    def test_check_output(self):
@ -326,13 +324,13 @@ class TestFP16CUDNNWith1x1(TestCUDNNWith1x1):


 class TestCUDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv2d"


-class TestFP16CUDNNWithInput1x1Filter1x1(TestCUDNNWithInput1x1Filter1x1):
-    def init_data_type(self):
+class TestFP16CUDNNWithInput1x1Filter1x1(TestWithInput1x1Filter1x1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
        self.dtype = np.float16

    def test_check_output(self):
@ -375,21 +373,18 @@ class TestDepthwiseConv2(TestConv2dOp):

 #----------------Conv2dMKLDNN----------------
 class TestMKLDNN(TestConv2dOp):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_mkldnn = True
-        self.op_type = "conv2d"


 class TestMKLDNNWithPad(TestWithPad):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_mkldnn = True
-        self.op_type = "conv2d"


 class TestMKLDNNWithStride(TestWithStride):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_mkldnn = True
-        self.op_type = "conv2d"


 if __name__ == '__main__':
--- a/python/paddle/fluid/tests/unittests/test_pool2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_pool2d_op.py