add fp16 support to conv3d

scopeFix
Kexin Zhao 7 years ago committed by Kexin Zhao
parent fd1971caa0
commit 8b16927230

@ -366,7 +366,8 @@ REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace,
REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<double>);
paddle::operators::CUDNNConvOpKernel<double>,
paddle::operators::CUDNNConvOpKernel<plat::float16>);
REGISTER_OP_KERNEL(conv3d_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel<float>,
paddle::operators::CUDNNConvGradOpKernel<double>);

@ -70,9 +70,11 @@ def conv3d_forward_naive(input, filter, group, conv_param):
class TestConv3dOp(OpTest):
def setUp(self):
self.op_type = "conv3d"
self.use_cudnn = False
self.dtype = np.float32
self.init_kernel_type()
self.init_group()
self.init_op_type()
self.init_dilation()
self.init_test_case()
@ -80,20 +82,24 @@ class TestConv3dOp(OpTest):
'stride': self.stride,
'pad': self.pad,
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
input = np.random.random(self.input_size).astype("float32")
filter = np.random.random(self.filter_size).astype("float32")
input = np.random.random(self.input_size).astype(self.dtype)
filter = np.random.random(self.filter_size).astype(self.dtype)
output = conv3d_forward_naive(input, filter, self.groups,
conv3d_param).astype("float32")
conv3d_param).astype(self.dtype)
self.inputs = {'Input': input, 'Filter': filter}
self.inputs = {
'Input': OpTest.np_dtype_to_fluid_dtype(input),
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations
'dilations': self.dilations,
'use_cudnn': self.use_cudnn
}
self.outputs = {'Output': output}
@ -108,6 +114,8 @@ class TestConv3dOp(OpTest):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
if self.testcudnn():
place = core.CUDAPlace(0)
self.check_grad_with_place(
@ -120,6 +128,8 @@ class TestConv3dOp(OpTest):
set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
def test_check_grad_no_filter(self):
if self.dtype == np.float16:
return
if self.testcudnn():
place = core.CUDAPlace(0)
self.check_grad_with_place(
@ -135,6 +145,8 @@ class TestConv3dOp(OpTest):
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
if self.dtype == np.float16:
return
if self.testcudnn():
place = core.CUDAPlace(0)
self.check_grad_with_place(
@ -163,8 +175,8 @@ class TestConv3dOp(OpTest):
def init_group(self):
self.groups = 1
def init_op_type(self):
self.op_type = "conv3d"
def init_kernel_type(self):
pass
class TestCase1(TestConv3dOp):
@ -235,34 +247,90 @@ class TestWithDilation(TestConv3dOp):
self.groups = 3
#----------------Conv3dCUDNN----------------
class TestCUDNN(TestConv3dOp):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16CUDNN(TestConv3dOp):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWithGroup1CUDNN(TestWithGroup1):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16WithGroup1CUDNN(TestWithGroup1):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWithGroup2CUDNN(TestWithGroup2):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16WithGroup2CUDNN(TestWithGroup2):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWith1x1CUDNN(TestWith1x1):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16With1x1CUDNN(TestWith1x1):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16WithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
# FIXME(typhoonzero): find a way to determine if

Loading…
Cancel
Save