diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc
index 51d2d80206..c6c1af0e36 100644
--- a/paddle/fluid/operators/conv_transpose_op.cc
+++ b/paddle/fluid/operators/conv_transpose_op.cc
@@ -109,14 +109,30 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
   const int offset = (data_layout != DataLayout::kNHWC ? 2 : 1);
   for (size_t i = 0; i < strides.size(); ++i) {
     auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
-    auto infer_shape = (in_dims[i + offset] - 1) * strides[i] -
-                       paddings[2 * i] - paddings[2 * i + 1] + filter_extent;
+    auto infer_shape = (ctx->IsRuntime() || in_dims[i + offset] > 0)
+                           ? (in_dims[i + offset] - 1) * strides[i] -
+                                 paddings[2 * i] - paddings[2 * i + 1] +
+                                 filter_extent
+                           : -1;
     if (output_size.size()) {
-      PADDLE_ENFORCE_EQ((output_size[i] >= infer_shape &&
-                         output_size[i] < infer_shape + strides[i]),
-                        true,
-                        "output_size of Op(ConvTransposeOp) should be "
-                        "in appropriate range.");
+      if (ctx->IsRuntime()) {
+        PADDLE_ENFORCE_GE(
+            output_size[i], infer_shape,
+            platform::errors::InvalidArgument(
+                "output_size of Op(ConvTransposeOp) should not be "
+                "less than the infered output size. But received output_size = "
+                "[%s], whose dim %d is less than the infered output size [%s]",
+                framework::make_ddim(output_size), i, infer_shape));
+        PADDLE_ENFORCE_LT(
+            output_size[i], infer_shape + strides[i],
+            platform::errors::InvalidArgument(
+                "output_size of Op(ConvTransposeOp) should be less "
+                "than infered size + stride. But received output_size = [%s], "
+                "whose dim %d is not less than the infered output size (%d) + "
+                "stride (%d) = %d",
+                framework::make_ddim(output_size), i, infer_shape, strides[i],
+                infer_shape + strides[i]));
+      }
       output_shape.push_back(output_size[i]);
     } else {
       output_shape.push_back(infer_shape);
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 3f85f89a52..3a84184f80 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -3857,10 +3857,10 @@ def conv2d_transpose(input,
 
     if output_size is None:
         output_size = []
-    elif isinstance(output_size, list) or isinstance(output_size, int):
+    elif isinstance(output_size, (list, tuple, int)):
         output_size = utils.convert_to_list(output_size, 2, 'output_size')
     else:
-        raise ValueError("output_size should be list or int")
+        raise ValueError("output_size should be int, list[int] or tuple[int]")
     groups = 1 if groups is None else groups
     filter_shape = [input_channel, num_filters // groups] + filter_size
 
@@ -4129,7 +4129,7 @@ def conv3d_transpose(input,
         if output_size is None:
             raise ValueError("output_size must be set when filter_size is None")
         if isinstance(output_size, int):
-            output_size = [output_size, output_size]
+            output_size = [output_size, output_size, output_size]
 
         d_in = input.shape[2] if data_format == 'NCDHW' else input.shape[1]
         h_in = input.shape[3] if data_format == 'NCDHW' else input.shape[2]
@@ -4149,6 +4149,13 @@ def conv3d_transpose(input,
     if len(padding) == 6 and utils._is_symmetric_padding(padding, 3):
         padding = [padding[0], padding[2], padding[4]]
 
+    if output_size is None:
+        output_size = []
+    elif isinstance(output_size, (list, tuple, int)):
+        output_size = utils.convert_to_list(output_size, 3, 'output_size')
+    else:
+        raise ValueError("output_size should be int, list[int] or tuple[int]")
+
     groups = 1 if groups is None else groups
     filter_shape = [input_channel, num_filters // groups] + filter_size
     img_filter = helper.create_parameter(
@@ -4166,6 +4173,7 @@ def conv3d_transpose(input,
                 'Filter': [img_filter]},
         outputs={'Output': pre_bias},
         attrs={
+            'output_size': output_size,
             'strides': stride,
             'paddings': padding,
             'padding_algorithm': padding_algorithm,
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
new file mode 100644
index 0000000000..c43454eaae
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
@@ -0,0 +1,462 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv2D(TestCase):
+    batch_size = 4
+    spatial_shape = (16, 16)
+    dtype = "float32"
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 2
+        else:
+            filter_shape = tuple(self.filter_shape)
+
+        self.weight = np.random.uniform(
+            -1, 1, (self.out_channels, self.in_channels // self.groups
+                    ) + filter_shape).astype(self.dtype)
+        if not self.no_bias:
+            self.bias = np.random.uniform(-1, 1, (
+                self.out_channels, )).astype(self.dtype)
+
+        self.channel_last = (self.data_format == "NHWC")
+        if self.channel_last:
+            self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+                self.in_channels, )
+        else:
+            self.input_shape = (self.batch_size, self.in_channels
+                                ) + self.spatial_shape
+
+        self.input = np.random.uniform(-1, 1,
+                                       self.input_shape).astype(self.dtype)
+
+    def static_graph_case_1(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = fluid.data(
+                        "input", (-1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1),
+                        dtype=self.dtype)
+                y = fluid.layers.conv2d(
+                    x,
+                    self.out_channels,
+                    self.filter_shape,
+                    stride=self.stride,
+                    padding=self.padding,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    param_attr=I.NumpyArrayInitializer(self.weight),
+                    bias_attr=False
+                    if self.no_bias else I.NumpyArrayInitializer(self.bias),
+                    use_cudnn=self.use_cudnn,
+                    act=self.act,
+                    data_format=self.data_format)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+        return out
+
+    def static_graph_case_2(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight.shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+                y = F.conv2d(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        feed_dict = {"input": self.input, "weight": self.weight}
+        if not self.no_bias:
+            feed_dict["bias"] = self.bias
+        out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+        return out
+
+    def dygraph_case(self):
+        with dg.guard(self.place):
+            x = dg.to_variable(self.input)
+            weight = dg.to_variable(self.weight)
+            bias = None if self.no_bias else dg.to_variable(self.bias)
+            y = F.conv2d(
+                x,
+                weight,
+                bias,
+                padding=self.padding,
+                stride=self.stride,
+                dilation=self.dilation,
+                act=self.act,
+                groups=self.groups,
+                data_format=self.data_format,
+                use_cudnn=self.use_cudnn)
+            out = y.numpy()
+        return out
+
+    def _test_identity(self):
+        self.prepare()
+        out1 = self.static_graph_case_1()
+        out2 = self.static_graph_case_2()
+        out3 = self.dygraph_case()
+        np.testing.assert_array_almost_equal(out1, out2)
+        np.testing.assert_array_almost_equal(out2, out3)
+
+    def test_identity_cpu(self):
+        self.place = fluid.CPUPlace()
+        self._test_identity()
+
+    @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+                     "core is not compiled with CUDA")
+    def test_identity_gpu(self):
+        self.place = fluid.CUDAPlace(0)
+        self._test_identity()
+
+
+class TestFunctionalConv2DError(TestCase):
+    batch_size = 4
+    spatial_shape = (16, 16)
+    dtype = "float32"
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "not_valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+    def test_exception(self):
+        self.prepare()
+        with self.assertRaises(ValueError):
+            self.static_graph_case()
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 2
+        else:
+            filter_shape = tuple(self.filter_shape)
+        self.weight_shape = (self.out_channels, self.in_channels // self.groups
+                             ) + filter_shape
+        self.bias_shape = (self.out_channels, )
+
+    def static_graph_case(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                self.channel_last = self.data_format == "NHWC"
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight_shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+                y = F.conv2d(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv2DCase2(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase3(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 3, 1]
+        self.stride = 2
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase4(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 1, 2, 2]
+        self.stride = 1
+        self.dilation = 2
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase5(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 1], [2, 2], [0, 0]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase6(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [1, 1], [2, 2]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase7(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 6
+        self.out_channels = 8
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase8(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 6
+        self.out_channels = 12
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 6
+        self.no_bias = True
+        self.act = None
+        self.use_cudnn = False
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 2], [3, 4], [5, 6]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "not_valid"
+
+
+class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 3
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = "not_valid"
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "not_valid"
+
+
+class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 1, 2, 1]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = -5
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [3, 2], [1, 2]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase10(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NHWC"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
new file mode 100644
index 0000000000..21986f1b98
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
@@ -0,0 +1,530 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv2D(TestCase):
+    batch_size = 4
+    spatial_shape = (16, 16)
+    dtype = "float32"
+    output_size = None
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 2
+        else:
+            filter_shape = tuple(self.filter_shape)
+
+        self.weight = np.random.uniform(
+            -1, 1, (self.in_channels, self.out_channels // self.groups
+                    ) + filter_shape).astype(self.dtype)
+        if not self.no_bias:
+            self.bias = np.random.uniform(-1, 1, (
+                self.out_channels, )).astype(self.dtype)
+
+        self.channel_last = (self.data_format == "NHWC")
+        if self.channel_last:
+            self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+                self.in_channels, )
+        else:
+            self.input_shape = (self.batch_size, self.in_channels
+                                ) + self.spatial_shape
+
+        self.input = np.random.uniform(-1, 1,
+                                       self.input_shape).astype(self.dtype)
+
+    def static_graph_case_1(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = fluid.data(
+                        "input", (-1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1),
+                        dtype=self.dtype)
+                y = fluid.layers.conv2d_transpose(
+                    x,
+                    self.out_channels,
+                    output_size=self.output_size,
+                    filter_size=self.filter_shape,
+                    stride=self.stride,
+                    padding=self.padding,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    param_attr=I.NumpyArrayInitializer(self.weight),
+                    bias_attr=False
+                    if self.no_bias else I.NumpyArrayInitializer(self.bias),
+                    use_cudnn=self.use_cudnn,
+                    act=self.act,
+                    data_format=self.data_format)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+        return out
+
+    def static_graph_case_2(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight.shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+                y = F.conv2d_transpose(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    output_size=self.output_size,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        feed_dict = {"input": self.input, "weight": self.weight}
+        if not self.no_bias:
+            feed_dict["bias"] = self.bias
+        out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+        return out
+
+    def dygraph_case(self):
+        with dg.guard(self.place):
+            x = dg.to_variable(self.input)
+            weight = dg.to_variable(self.weight)
+            bias = None if self.no_bias else dg.to_variable(self.bias)
+            y = F.conv2d_transpose(
+                x,
+                weight,
+                bias,
+                output_size=self.output_size,
+                padding=self.padding,
+                stride=self.stride,
+                dilation=self.dilation,
+                act=self.act,
+                groups=self.groups,
+                data_format=self.data_format,
+                use_cudnn=self.use_cudnn)
+            out = y.numpy()
+        return out
+
+    def _test_identity(self):
+        self.prepare()
+        out1 = self.static_graph_case_1()
+        out2 = self.static_graph_case_2()
+        out3 = self.dygraph_case()
+        np.testing.assert_array_almost_equal(out1, out2)
+        np.testing.assert_array_almost_equal(out2, out3)
+
+    def test_identity_cpu(self):
+        self.place = fluid.CPUPlace()
+        self._test_identity()
+
+    @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+                     "core is not compiled with CUDA")
+    def test_identity_gpu(self):
+        self.place = fluid.CUDAPlace(0)
+        self._test_identity()
+
+
+class TestFunctionalConv2DError(TestCase):
+    batch_size = 4
+    spatial_shape = (16, 16)
+    dtype = "float32"
+    output_size = None
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "not_valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+    def test_exception(self):
+        self.prepare()
+        with self.assertRaises(ValueError):
+            self.static_graph_case()
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 2
+        else:
+            filter_shape = tuple(self.filter_shape)
+        self.weight_shape = (self.in_channels, self.out_channels // self.groups
+                             ) + filter_shape
+        self.bias_shape = (self.out_channels, )
+
+    def static_graph_case(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                self.channel_last = self.data_format == "NHWC"
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight_shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+                y = F.conv2d_transpose(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    output_size=self.output_size,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv2DCase2(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase3(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = True
+        self.act = None
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase4(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase5(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase6(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.stride = (1, 2)
+        self.dilation = (2, 1)
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase7(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.stride = (1, 2)
+        self.dilation = 1
+        self.groups = 4
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase8(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.output_size = [18, 34]
+        self.stride = (1, 2)
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase9(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 2], [2, 1], [0, 0]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase10(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [1, 1], [2, 2]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase11(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [1, 1, 2, 2]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase12(TestFunctionalConv2D):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [1, 2]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 2, 1, 3]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [1, 2], [2, 1]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 2], [0, 0], [2, 1]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase5(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = -2
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = "not_valid"
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.output_size = "not_valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "not_valid"
+
+
+class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCHW"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
new file mode 100644
index 0000000000..195e3812f9
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
@@ -0,0 +1,462 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv3D(TestCase):
+    batch_size = 4
+    spatial_shape = (8, 8, 8)
+    dtype = "float32"
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 3
+        else:
+            filter_shape = tuple(self.filter_shape)
+
+        self.weight = np.random.uniform(
+            -1, 1, (self.out_channels, self.in_channels // self.groups
+                    ) + filter_shape).astype(self.dtype)
+        if not self.no_bias:
+            self.bias = np.random.uniform(-1, 1, (
+                self.out_channels, )).astype(self.dtype)
+
+        self.channel_last = (self.data_format == "NDHWC")
+        if self.channel_last:
+            self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+                self.in_channels, )
+        else:
+            self.input_shape = (self.batch_size, self.in_channels
+                                ) + self.spatial_shape
+
+        self.input = np.random.uniform(-1, 1,
+                                       self.input_shape).astype(self.dtype)
+
+    def static_graph_case_1(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = fluid.data(
+                        "input", (-1, -1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1, -1),
+                        dtype=self.dtype)
+                y = fluid.layers.conv3d(
+                    x,
+                    self.out_channels,
+                    self.filter_shape,
+                    stride=self.stride,
+                    padding=self.padding,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    param_attr=I.NumpyArrayInitializer(self.weight),
+                    bias_attr=False
+                    if self.no_bias else I.NumpyArrayInitializer(self.bias),
+                    use_cudnn=self.use_cudnn,
+                    act=self.act,
+                    data_format=self.data_format)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+        return out
+
+    def static_graph_case_2(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight.shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+                y = F.conv3d(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        feed_dict = {"input": self.input, "weight": self.weight}
+        if not self.no_bias:
+            feed_dict["bias"] = self.bias
+        out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+        return out
+
+    def dygraph_case(self):
+        with dg.guard(self.place):
+            x = dg.to_variable(self.input)
+            weight = dg.to_variable(self.weight)
+            bias = None if self.no_bias else dg.to_variable(self.bias)
+            y = F.conv3d(
+                x,
+                weight,
+                bias,
+                padding=self.padding,
+                stride=self.stride,
+                dilation=self.dilation,
+                act=self.act,
+                groups=self.groups,
+                data_format=self.data_format,
+                use_cudnn=self.use_cudnn)
+            out = y.numpy()
+        return out
+
+    def _test_identity(self):
+        self.prepare()
+        out1 = self.static_graph_case_1()
+        out2 = self.static_graph_case_2()
+        out3 = self.dygraph_case()
+        np.testing.assert_array_almost_equal(out1, out2)
+        np.testing.assert_array_almost_equal(out2, out3)
+
+    def test_identity_cpu(self):
+        self.place = fluid.CPUPlace()
+        self._test_identity()
+
+    @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+                     "core is not compiled with CUDA")
+    def test_identity_gpu(self):
+        self.place = fluid.CUDAPlace(0)
+        self._test_identity()
+
+
+class TestFunctionalConv3DError(TestCase):
+    batch_size = 4
+    spatial_shape = (8, 8, 8)
+    dtype = "float32"
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "not_valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+    def test_exception(self):
+        self.prepare()
+        with self.assertRaises(ValueError):
+            self.static_graph_case()
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 3
+        else:
+            filter_shape = tuple(self.filter_shape)
+        self.weight_shape = (self.out_channels, self.in_channels // self.groups
+                             ) + filter_shape
+        self.bias_shape = (self.out_channels, )
+
+    def static_graph_case(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                self.channel_last = self.data_format == "NDHWC"
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight_shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+                y = F.conv3d(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv3DCase2(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 1]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase3(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 3, 1, 2, 3]
+        self.stride = 2
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase4(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 1, 2, 2, 3, 3]
+        self.stride = 1
+        self.dilation = 2
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase5(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 1], [2, 2], [1, 1], [0, 0]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase6(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [1, 1], [2, 2], [2, 2]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DCase7(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 6
+        self.out_channels = 8
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DCase8(TestFunctionalConv3D):
+    def setUp(self):
+        self.in_channels = 6
+        self.out_channels = 12
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 6
+        self.no_bias = True
+        self.act = None
+        self.use_cudnn = False
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 1], [1, 2], [3, 4], [5, 6]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "not_valid"
+
+
+class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 3
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase6(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = "not_valid"
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "not_valid"
+
+
+class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 1, 2, 1]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = -5
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [3, 2], [1, 2], [1, 1]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NDHWC"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
new file mode 100644
index 0000000000..f8e7818315
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
@@ -0,0 +1,523 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv3DTranspose(TestCase):
+    batch_size = 4
+    spatial_shape = (8, 8, 8)
+    dtype = "float32"
+    output_size = None
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 3
+        else:
+            filter_shape = tuple(self.filter_shape)
+
+        self.weight = np.random.uniform(
+            -1, 1, (self.in_channels, self.out_channels // self.groups
+                    ) + filter_shape).astype(self.dtype)
+        if not self.no_bias:
+            self.bias = np.random.uniform(-1, 1, (
+                self.out_channels, )).astype(self.dtype)
+
+        self.channel_last = (self.data_format == "NDHWC")
+        if self.channel_last:
+            self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+                self.in_channels, )
+        else:
+            self.input_shape = (self.batch_size, self.in_channels
+                                ) + self.spatial_shape
+
+        self.input = np.random.uniform(-1, 1,
+                                       self.input_shape).astype(self.dtype)
+
+    def static_graph_case_1(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = fluid.data(
+                        "input", (-1, -1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1, -1),
+                        dtype=self.dtype)
+                y = fluid.layers.conv3d_transpose(
+                    x,
+                    self.out_channels,
+                    output_size=self.output_size,
+                    filter_size=self.filter_shape,
+                    stride=self.stride,
+                    padding=self.padding,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    param_attr=I.NumpyArrayInitializer(self.weight),
+                    bias_attr=False
+                    if self.no_bias else I.NumpyArrayInitializer(self.bias),
+                    use_cudnn=self.use_cudnn,
+                    act=self.act,
+                    data_format=self.data_format)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+        return out
+
+    def static_graph_case_2(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight.shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+                y = F.conv3d_transpose(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    output_size=self.output_size,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+        exe = fluid.Executor(self.place)
+        exe.run(start)
+        feed_dict = {"input": self.input, "weight": self.weight}
+        if not self.no_bias:
+            feed_dict["bias"] = self.bias
+        out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+        return out
+
+    def dygraph_case(self):
+        with dg.guard(self.place):
+            x = dg.to_variable(self.input)
+            weight = dg.to_variable(self.weight)
+            bias = None if self.no_bias else dg.to_variable(self.bias)
+            y = F.conv3d_transpose(
+                x,
+                weight,
+                bias,
+                output_size=self.output_size,
+                padding=self.padding,
+                stride=self.stride,
+                dilation=self.dilation,
+                act=self.act,
+                groups=self.groups,
+                data_format=self.data_format,
+                use_cudnn=self.use_cudnn)
+            out = y.numpy()
+        return out
+
+    def _test_identity(self):
+        self.prepare()
+        out1 = self.static_graph_case_1()
+        out2 = self.static_graph_case_2()
+        out3 = self.dygraph_case()
+        np.testing.assert_array_almost_equal(out1, out2)
+        np.testing.assert_array_almost_equal(out2, out3)
+
+    def test_identity_cpu(self):
+        self.place = fluid.CPUPlace()
+        self._test_identity()
+
+    @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+                     "core is not compiled with CUDA")
+    def test_identity_gpu(self):
+        self.place = fluid.CUDAPlace(0)
+        self._test_identity()
+
+
+class TestFunctionalConv3DTransposeError(TestCase):
+    batch_size = 4
+    spatial_shape = (8, 8, 8)
+    dtype = "float32"
+    output_size = None
+
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = "not_valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+    def test_exception(self):
+        self.prepare()
+        with self.assertRaises(ValueError):
+            self.static_graph_case()
+
+    def prepare(self):
+        if isinstance(self.filter_shape, int):
+            filter_shape = (self.filter_shape, ) * 3
+        else:
+            filter_shape = tuple(self.filter_shape)
+        self.weight_shape = (self.in_channels, self.out_channels // self.groups
+                             ) + filter_shape
+        self.bias_shape = (self.out_channels, )
+
+    def static_graph_case(self):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                self.channel_last = self.data_format == "NDHWC"
+                if self.channel_last:
+                    x = x = fluid.data(
+                        "input", (-1, -1, -1, -1, self.in_channels),
+                        dtype=self.dtype)
+                else:
+                    x = fluid.data(
+                        "input", (-1, self.in_channels, -1, -1, -1),
+                        dtype=self.dtype)
+                weight = fluid.data(
+                    "weight", self.weight_shape, dtype=self.dtype)
+                if not self.no_bias:
+                    bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+                y = F.conv3d_transpose(
+                    x,
+                    weight,
+                    None if self.no_bias else bias,
+                    output_size=self.output_size,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    act=self.act,
+                    data_format=self.data_format,
+                    use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = "same"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = True
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.stride = (1, 2, 1)
+        self.dilation = (2, 1, 1)
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.stride = (1, 2, 1)
+        self.dilation = 1
+        self.groups = 4
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = False
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = "valid"
+        self.output_size = (10, 17, 10)
+        self.stride = (1, 2, 1)
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 2], [1, 2], [2, 1], [0, 0]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [1, 1], [1, 1], [2, 2]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [1, 1, 2, 2, 1, 1]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 6
+        self.filter_shape = 3
+        self.padding = [1, 2, 1]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase2(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [1, 2, 2, 1, 3]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeErrorCase3(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [0, 0], [1, 1], [1, 2], [2, 1]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeErrorCase4(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = [[0, 0], [1, 2], [1, 1], [0, 0], [2, 1]]
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase5(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = -2
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase6(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = "not_valid"
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase7(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.output_size = "not_valid"
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase8(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 4
+        self.out_channels = 5
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 1
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "not_valid"
+
+
+class TestFunctionalConv3DTransposeErrorCase9(
+        TestFunctionalConv3DTransposeError):
+    def setUp(self):
+        self.in_channels = 3
+        self.out_channels = 4
+        self.filter_shape = 3
+        self.padding = 0
+        self.stride = 1
+        self.dilation = 1
+        self.groups = 2
+        self.no_bias = False
+        self.act = "sigmoid"
+        self.use_cudnn = True
+        self.data_format = "NCDHW"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index 4e6bfded78..3fd7da1ec1 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO: import all neural network related api under this directory, 
+# TODO: import all neural network related api under this directory,
 # including layers, linear, conv, rnn etc.
 __all__ = []
 
@@ -85,10 +85,10 @@ from .layer import loss  #DEFINE_ALIAS
 # from .layer.common import Embedding   #DEFINE_ALIAS
 # from .layer.common import Linear   #DEFINE_ALIAS
 # from .layer.common import UpSample   #DEFINE_ALIAS
-# from .functional.conv import conv2d   #DEFINE_ALIAS
-# from .functional.conv import conv2d_transpose   #DEFINE_ALIAS
-# from .functional.conv import conv3d   #DEFINE_ALIAS
-# from .functional.conv import conv3d_transpose   #DEFINE_ALIAS
+from .functional.conv import conv2d  #DEFINE_ALIAS
+from .functional.conv import conv2d_transpose  #DEFINE_ALIAS
+from .functional.conv import conv3d  #DEFINE_ALIAS
+from .functional.conv import conv3d_transpose  #DEFINE_ALIAS
 # from .functional.loss import bpr_loss   #DEFINE_ALIAS
 # from .functional.loss import center_loss   #DEFINE_ALIAS
 # from .functional.loss import cross_entropy   #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
index a3b3411333..9e517726eb 100644
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO: import all neural network related api under this directory, 
+# TODO: import all neural network related api under this directory,
 # including layers, linear, conv, rnn etc.
 # __all__ = [ ]
 
 # TODO: define alias in functional directory
-# from .conv import conv2d   #DEFINE_ALIAS
-# from .conv import conv2d_transpose   #DEFINE_ALIAS
-# from .conv import conv3d   #DEFINE_ALIAS
-# from .conv import conv3d_transpose   #DEFINE_ALIAS
+from .conv import conv2d  #DEFINE_ALIAS
+from .conv import conv2d_transpose  #DEFINE_ALIAS
+from .conv import conv3d  #DEFINE_ALIAS
+from .conv import conv3d_transpose  #DEFINE_ALIAS
 # from .loss import bpr_loss   #DEFINE_ALIAS
 # from .loss import center_loss   #DEFINE_ALIAS
 # from .loss import cross_entropy   #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 199a72aa0a..6b37c1c68b 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -11,9 +11,1005 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import print_function
+__all__ = ['conv2d', 'conv2d_transpose', 'conv3d', 'conv3d_transpose']
 
-# TODO: define functions of convolutional neural network  
-# __all__ = ['conv2d',
-#            'conv2d_transpose',
-#            'conv3d',
-#            'conv3d_transpose']
+import numpy as np
+from ...fluid.framework import Variable, in_dygraph_mode
+from ...fluid import core, dygraph_utils
+from ...fluid.layers import nn, utils
+from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.param_attr import ParamAttr
+from ...fluid.layer_helper import LayerHelper
+
+
+def _is_list_or_tuple(input):
+    return isinstance(input, (list, tuple))
+
+
+def _zero_padding_in_batch_and_channel(padding, channel_last):
+    if channel_last:
+        return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
+    else:
+        return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
+
+
+def _exclude_padding_in_batch_and_channel(padding, channel_last):
+    padding_ = padding[1:-1] if channel_last else padding[2:]
+    padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
+    return padding_
+
+
+def _update_padding_nd(padding, channel_last, num_dims):
+    if isinstance(padding, str):
+        padding = padding.upper()
+        if padding not in ["SAME", "VALID"]:
+            raise ValueError(
+                "Unknown padding: '{}'. It can only be 'SAME' or 'VALID'.".
+                format(padding))
+        if padding == "VALID":
+            padding_algorithm = "VALID"
+            padding = [0] * num_dims
+        else:
+            padding_algorithm = "SAME"
+            padding = [0] * num_dims
+    elif _is_list_or_tuple(padding):
+        # for padding like
+        # [(pad_before, pad_after), (pad_before, pad_after), ...]
+        # padding for batch_dim and channel_dim included
+        if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
+            if not _zero_padding_in_batch_and_channel(padding, channel_last):
+                raise ValueError(
+                    "Non-zero padding({}) in the batch or channel dimensions "
+                    "is not supported.".format(padding))
+            padding_algorithm = "EXPLICIT"
+            padding = _exclude_padding_in_batch_and_channel(padding,
+                                                            channel_last)
+            if utils._is_symmetric_padding(padding, num_dims):
+                padding = padding[0::2]
+        # for padding like [pad_before, pad_after, pad_before, pad_after, ...]
+        elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
+            padding_algorithm = "EXPLICIT"
+            padding = utils.convert_to_list(padding, 2 * num_dims, 'padding')
+            if utils._is_symmetric_padding(padding, num_dims):
+                padding = padding[0::2]
+        # for padding like [pad_d1, pad_d2, ...]
+        elif len(padding) == num_dims and isinstance(padding[0], int):
+            padding_algorithm = "EXPLICIT"
+            padding = utils.convert_to_list(padding, num_dims, 'padding')
+        else:
+            raise ValueError("In valid padding: {}".format(padding))
+    # for integer padding
+    else:
+        padding_algorithm = "EXPLICIT"
+        padding = utils.convert_to_list(padding, num_dims, 'padding')
+    return padding, padding_algorithm
+
+
+def conv2d(input,
+           weight,
+           bias=None,
+           padding=0,
+           stride=1,
+           dilation=1,
+           groups=1,
+           use_cudnn=True,
+           act=None,
+           data_format="NCHW",
+           name=None):
+    """
+    The convolution2D layer calculates the output based on the input, filter
+    and strides, paddings, dilations, groups parameters. Input and
+    Output are in NCHW or NHWC format, where N is batch size, C is the number of
+    channels, H is the height of the feature, and W is the width of the feature.
+    Filter is in MCHW format, where M is the number of output image channels,
+    C is the number of input image channels, H is the height of the filter,
+    and W is the width of the filter. If the groups is greater than 1,
+    C will equal the number of input image channels divided by the groups.
+    Please refer to UFLDL's `convolution
+    <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
+    for more details.
+    If bias attribution and activation type are provided, bias is added to the
+    output of the convolution, and the corresponding activation function is
+    applied to the final result.
+
+    For each input :math:`X`, the equation is:
+
+    .. math::
+
+        Out = \sigma (W \\ast X + b)
+
+    Where:
+
+    * :math:`X`: Input value, a tensor with NCHW or NHWC format.
+    * :math:`W`: Filter value, a tensor with MCHW format.
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+    Example:
+
+        - Input:
+
+          Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
+
+          Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
+
+        - Output:
+
+          Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
+
+        Where
+
+        .. math::
+
+            H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
+            W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
+
+    Args:
+        input (Variable): The input is 4-D Tensor with shape [N, C, H, W], the data type 
+            of input is float16 or float32 or float64.
+        weight (Variable): The convolution kernel with shape [M, C/g, kH, kW], where M is
+            the number of output channels, g is the number of groups, kH is the filter's
+            height, kW is the filter's width. 
+        bias (Variable, optional): The bias with shape [M,].
+        padding (string|int|list|tuple): The padding size. It means the number of zero-paddings
+            on both sides for each dimension.If `padding` is a string, either 'VALID' or
+            'SAME' which is the padding algorithm. If padding size is a tuple or list,
+            it could be in three forms: `[pad_height, pad_width]` or
+            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when 
+            `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], 
+            [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
+            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+            Default: padding = 0.
+        stride (int|tuple): The stride size. It means the stride in convolution. 
+            If stride is a tuple, it must contain two integers, (stride_height, stride_width). 
+            Otherwise, stride_height = stride_width = stride. Default: stride = 1.
+        dilation (int|tuple): The dilation size. It means the spacing between the kernel
+            points. If dilation is a tuple, it must contain two integers, (dilation_height, 
+            dilation_width). Otherwise, dilation_height = dilation_width = dilation. 
+            Default: dilation = 1.
+        groups (int): The groups number of the Conv2d Layer. According to grouped
+            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+            the first half of the filters is only connected to the first half
+            of the input channels, while the second half of the filters is only
+            connected to the second half of the input channels. Default: groups=1.
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True
+        act (str): Activation type, if it is set to None, activation is not appended.
+            Default: None
+        data_format (str, optional): Specify the data format of the input, and the data format of the output 
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, input_height, input_width]`.
+        name(str, optional): For detailed information, please refer 
+           to :ref:`api_guide_Name`. Usually name is no need to set and 
+           None by default.
+
+    Returns:
+        A Variable holding Tensor representing the conv2d, whose data type is the 
+        same with input. If act is None, the tensor variable storing the convolution 
+        result, and if act is not None, the tensor variable storing convolution 
+        and non-linearity activation result.
+
+    Raises:
+        ValueError: If the type of `use_cudnn` is not bool.
+        ValueError: If `data_format` is not "NCHW" or "NHWC".
+        ValueError: If the channel dimmention of the input is less than or equal to zero.
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
+            or the element corresponding to the input's channel is not 0.
+        ShapeError: If the input is not 4-D Tensor.
+        ShapeError: If the input's dimension size and filter's dimension size not equal.
+        ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+        ShapeError: If the number of input channels is not equal to filter's channels * groups.
+        ShapeError: If the number of output channels is not be divided by groups.
+
+    Examples:
+        .. code-block:: python
+
+          from paddle import fluid
+          import paddle.nn.functional as F
+          import paddle.fluid.dygraph as dg
+          import numpy as np
+
+          x = np.random.randn(2, 3, 8, 8).astype(np.float32)
+          w = np.random.randn(6, 3, 3, 3).astype(np.float32)
+
+          place = fluid.CPUPlace()
+          with dg.guard(place):
+              x_var = dg.to_variable(x)
+              w_var = dg.to_variable(w)
+              y_var = F.conv2d(x_var, w_var, act="relu")
+              y_np = y_var.numpy()
+          print(y_np.shape)
+
+          # (2, 6, 6, 6)
+    """
+    # entry checks
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("Attr(use_cudnn) should be True or False. "
+                         "Received Attr(use_cudnn): {}.".format(use_cudnn))
+    if data_format not in ["NCHW", "NHWC"]:
+        raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'. "
+                         "Received Attr(data_format): {}.".format(data_format))
+
+    channel_last = (data_format == "NHWC")
+    channel_dim = -1 if channel_last else 1
+    num_channels = input.shape[channel_dim]
+    num_filters = weight.shape[0]
+    if num_channels < 0:
+        raise ValueError("The channel dimmention of the input({}) "
+                         "should be defined. Received: {}.".format(
+                             input.shape, num_channels))
+    if num_channels % groups != 0:
+        raise ValueError(
+            "the channel of input must be divisible by groups,"
+            "received: the channel of input is {}, the shape of input is {}"
+            ", the groups is {}".format(num_channels, input.shape, groups))
+    if num_filters % groups != 0:
+        raise ValueError(
+            "the number of filters must be divisible by groups,"
+            "received: the number of filters is {}, the shape of weight is {}"
+            ", the groups is {}".format(num_filters, weight.shape, groups))
+
+    # update attrs
+    padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
+    stride = utils.convert_to_list(stride, 2, 'stride')
+    dilation = utils.convert_to_list(dilation, 2, 'dilation')
+
+    l_type = "conv2d"
+    if (num_channels == groups and num_filters % num_channels == 0 and
+            not use_cudnn):
+        l_type = 'depthwise_conv2d'
+
+    inputs = {'Input': [input], 'Filter': [weight]}
+    attrs = {
+        'strides': stride,
+        'paddings': padding,
+        'dilations': dilation,
+        'groups': groups,
+        'use_cudnn': use_cudnn,
+        'use_mkldnn': False,
+        'fuse_relu_before_depthwise_conv': False,
+        "padding_algorithm": padding_algorithm,
+        "data_format": data_format
+    }
+
+    if in_dygraph_mode():
+        attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
+                 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
+                 'fuse_relu_before_depthwise_conv', False, "padding_algorithm",
+                 padding_algorithm, "data_format", data_format)
+        pre_bias = getattr(core.ops, l_type)(input, weight, *attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = dygraph_utils._append_activation_in_dygraph(
+            pre_act, act, use_cudnn=use_cudnn)
+    else:
+        inputs = {'Input': [input], 'Filter': [weight]}
+        attrs = {
+            'strides': stride,
+            'paddings': padding,
+            'dilations': dilation,
+            'groups': groups,
+            'use_cudnn': use_cudnn,
+            'use_mkldnn': False,
+            'fuse_relu_before_depthwise_conv': False,
+            "padding_algorithm": padding_algorithm,
+            "data_format": data_format
+        }
+        check_variable_and_dtype(input, 'input',
+                                 ['float16', 'float32', 'float64'], 'conv2d')
+        helper = LayerHelper(l_type, **locals())
+        dtype = helper.input_dtype()
+        pre_bias = helper.create_variable_for_type_inference(dtype)
+        outputs = {"Output": [pre_bias]}
+        helper.append_op(
+            type=l_type, inputs=inputs, outputs=outputs, attrs=attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = helper.append_activation(pre_act)
+    return out
+
+
+def conv2d_transpose(input,
+                     weight,
+                     bias=None,
+                     output_size=None,
+                     padding=0,
+                     stride=1,
+                     dilation=1,
+                     groups=1,
+                     use_cudnn=True,
+                     act=None,
+                     data_format='NCHW',
+                     name=None):
+    """
+    The convolution2D transpose layer calculates the output based on the input,
+    filter, and dilations, strides, paddings. Input(Input) and output(Output)
+    are in NCHW or NHWC format. Where N is batch size, C is the number of channels,
+    H is the height of the feature, and W is the width of the feature.
+    Parameters(dilations, strides, paddings) are two elements. These two elements
+    represent height and width, respectively. The details of convolution transpose
+    layer, please refer to the following explanation and references
+    `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
+    If bias attribution and activation type are provided, bias is added to
+    the output of the convolution, and the corresponding activation function
+    is applied to the final result.
+
+    For each input :math:`X`, the equation is:
+
+    .. math::
+
+        Out = \sigma (W \\ast X + b)
+
+    Where:
+
+    * :math:`X`: Input value, a 4-D Tensor with NCHW or NHWC format.
+    * :math:`W`: Filter value, a 4-D Tensor with MCHW format.
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, a 4-D Tensor with data format 'NCHW' or 'NHWC', the shape of :math:`Out` and :math:`X` may be different.
+
+    Example:
+
+        - Input:
+
+          Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
+
+          Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
+
+        - Output:
+
+          Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
+
+        Where
+
+        .. math::
+
+           H^\prime_{out} &= (H_{in} - 1) * strides[0] - pad_height_top - pad_height_bottom + dilations[0] * (H_f - 1) + 1 \\\\
+           W^\prime_{out} &= (W_{in} - 1) * strides[1] - pad_width_left - pad_width_right + dilations[1] * (W_f - 1) + 1 \\\\
+           H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ] \\\\
+           W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ]
+
+    Note:
+          The conv2d_transpose can be seen as the backward of the conv2d. For conv2d, 
+          when stride > 1, conv2d maps multiple input shape to the same output shape, 
+          so for conv2d_transpose, when stride > 1, input shape maps multiple output shape.
+          If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`; 
+          else, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` 
+          and :math:`H^\prime_{out} + strides[0]`, and the :math:`W_{out}` of the output size must 
+          between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[1]`, 
+          conv2d_transpose can compute the kernel size automatically.
+
+    Args:
+        input(Variable): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format,
+            whose data type is float32 or float64.
+        weight(Variable): The convolution kernel, a Tensor with shape [C, M/g, kH, kW],
+            where M is the number of output channels(filters), g is the number of groups,
+            kH is the height of the kernel, and kW is the width of the kernel.
+        bias(Variable, optional): The bias, a Tensor with shape [M, ].
+        output_size(int|tuple|list, optional): The output image size. If output size is a
+            tuple, it must contain two integers, (image_height, image_width). None if use
+            filter_size, padding, and stride to calculate output_size.
+            If output_size is specified, output_size and filter_size (weight)'s shape 
+            should follow the formula above. Default: None. output_size and filter_size 
+            should not be None at the same time.
+        padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
+             `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
+             string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
+             If `padding` is a tuple or list, it could be in three forms:
+             `[pad_height, pad_width]` or
+            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and
+            when `data_format` is `'NCHW'`,
+            `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+            when `data_format` is `'NHWC'`, `padding` can be in the form
+            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+            Default: padding = 0.
+        stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. 
+            If stride is a tuple, it must contain two integers, (stride_height, stride_width). 
+            Otherwise, stride_height = stride_width = stride. Default: stride = 1.
+        dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. 
+            If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width). 
+            Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
+        groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by
+            grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
+            when group=2, the first half of the filters is only connected to the
+            first half of the input channels, while the second half of the
+            filters is only connected to the second half of the input channels.
+            Default: groups = 1.
+        use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True.
+        act (str, optional): Activation type, if it is set to None, activation is not appended.
+            Default: None.
+        data_format (str, optional): Specify the data format of the input, and the data format of the output 
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, input_height, input_width]`.
+        name(str, optional): For detailed information, please refer 
+           to :ref:`api_guide_Name`. Usually name is no need to set and 
+           None by default.
+
+    Returns:
+        A Variable holding Tensor representing the conv2d_transpose, whose 
+        data type is the same with input and shape is (num_batches, channels, out_h, 
+        out_w) or (num_batches, out_h, out_w, channels). If act is None, the tensor variable 
+        storing the transposed convolution result, and if act is not None, the 
+        tensor variable storing transposed convolution and non-linearity activation 
+        result.
+
+    Raises:
+        ValueError: If the type of `use_cudnn` is not bool.
+        ValueError: If `data_format` is not "NCHW" or "NHWC".
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
+            or the element corresponding to the input's channel is not 0.
+        ValueError: If `output_size` and filter_size are None at the same time.
+        ShapeError: If the input is not 4-D Tensor.
+        ShapeError: If the input's dimension size and filter's dimension size not equal.
+        ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+        ShapeError: If the number of input channels is not equal to filter's channels.
+        ShapeError: If the size of `output_size` is not equal to that of `stride`.
+
+    Examples:
+        .. code-block:: python
+
+          from paddle import fluid
+          import paddle.nn.functional as F
+          import paddle.fluid.dygraph as dg
+          import numpy as np
+
+          x = np.random.randn(2, 3, 8, 8).astype(np.float32)
+          w = np.random.randn(3, 6, 3, 3).astype(np.float32)
+
+          place = fluid.CPUPlace()
+          with dg.guard(place):
+              x_var = dg.to_variable(x)
+              w_var = dg.to_variable(w)
+              y_var = F.conv2d_transpose(x_var, w_var, act="relu")
+              y_np = y_var.numpy()
+          print(y_np.shape)
+
+          # (2, 6, 10, 10)
+    """
+
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("Attr(use_cudnn) should be True or False. "
+                         "Received Attr(use_cudnn): {}.".format(use_cudnn))
+    if data_format not in ['NCHW', 'NHWC']:
+        raise ValueError(
+            "Attr(data_format) of conv2d_transpose got wrong value: "
+            "received {}, but only 'NCHW' or 'NHWC' are supported.".format(
+                data_format))
+    channel_last = (data_format == "NHWC")
+    channel_dim = -1 if channel_last else 1
+    num_channels = input.shape[channel_dim]
+    if num_channels < 0:
+        raise ValueError("The channel dimmention of the input({}) "
+                         "should be defined. Received: {}.".format(
+                             input.shape, num_channels))
+    if num_channels % groups != 0:
+        raise ValueError(
+            "the channel of input must be divisible by groups,"
+            "received: the channel of input is {}, the shape of input is {}"
+            ", the groups is {}".format(num_channels, input.shape, groups))
+
+    # update attrs
+    padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
+    stride = utils.convert_to_list(stride, 2, 'stride')
+    dilation = utils.convert_to_list(dilation, 2, 'dilation')
+    if output_size is None:
+        output_size = []
+    elif isinstance(output_size, (list, tuple, int)):
+        output_size = utils.convert_to_list(output_size, 2, 'output_size')
+    else:
+        raise ValueError("output_size should be int, or list, tuple of ints")
+
+    op_type = 'conv2d_transpose'
+    num_filters = weight.shape[1]
+    if (num_channels == groups and num_filters == 1 and not use_cudnn):
+        op_type = 'depthwise_conv2d_transpose'
+
+    if in_dygraph_mode():
+        attrs = ('output_size', output_size, 'strides', stride, 'paddings',
+                 padding, 'padding_algorithm', padding_algorithm, 'dilations',
+                 dilation, 'groups', groups, 'use_cudnn', use_cudnn,
+                 'data_format', data_format)
+        pre_bias = getattr(core.ops, op_type)(input, weight, *attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = dygraph_utils._append_activation_in_dygraph(
+            pre_act, act, use_cudnn=use_cudnn)
+    else:
+        inputs = {'Input': [input], 'Filter': [weight]}
+        attrs = {
+            'output_size': output_size,
+            'strides': stride,
+            'paddings': padding,
+            'padding_algorithm': padding_algorithm,
+            'dilations': dilation,
+            'groups': groups,
+            'use_cudnn': use_cudnn,
+            'data_format': data_format
+        }
+        check_variable_and_dtype(input, 'input',
+                                 ['float16', 'float32', 'float64'],
+                                 'conv2d_transpose')
+        helper = LayerHelper(op_type, **locals())
+        dtype = helper.input_dtype()
+        pre_bias = helper.create_variable_for_type_inference(dtype)
+        outputs = {"Output": [pre_bias]}
+        helper.append_op(
+            type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = helper.append_activation(pre_act)
+    return out
+
+
+def conv3d(input,
+           weight,
+           bias=None,
+           padding=0,
+           stride=1,
+           dilation=1,
+           groups=1,
+           use_cudnn=True,
+           act=None,
+           data_format="NCDHW",
+           name=None):
+    """
+    The convolution3D layer calculates the output based on the input, filter
+    and strides, paddings, dilations, groups parameters. Input(Input) and
+    Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of
+    channels, D is the depth of the feature, H is the height of the feature,
+    and W is the width of the feature. Convlution3D is similar with Convlution2D
+    but adds one dimension(depth). If bias attribution and activation type are
+    provided, bias is added to the output of the convolution, and the
+    corresponding activation function is applied to the final result.
+
+    For each input :math:`X`, the equation is:
+
+    .. math::
+
+        Out = \sigma (W \\ast X + b)
+
+    In the above equation:
+
+    * :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
+    * :math:`W`: Filter value, a tensor with MCDHW format.
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+    Example:
+
+        - Input:
+
+          Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
+
+          Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
+
+        - Output:
+          Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
+
+        Where
+
+        .. math::
+
+            D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
+            H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
+            W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
+
+    Args:
+        input (Variable): The input is 5-D Tensor with shape [N, C, D, H, W], the data 
+            type of input is float16 or float32 or float64.
+        weight (Variable): The convolution kernel, a Tensor with shape [M, C/g, kD, kH, kW],
+            where M is the number of filters(output channels), g is the number of groups,
+            kD, kH, kW are the filter's depth, height and width respectively.
+        bias (Variable, optional): The bias, a Tensor of shape [M, ].
+        padding (string|int|list|tuple): The padding size. It means the number of zero-paddings 
+            on both sides for each dimension. If `padding` is a string, either 'VALID' or
+            'SAME' which is the padding algorithm. If padding size is a tuple or list,
+            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
+            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
+            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
+            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
+            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+            Default: padding = 0.
+        stride (int|tuple): The stride size. It means the stride in convolution. If stride is a 
+            tuple, it must contain three integers, (stride_depth, stride_height, stride_width). 
+            Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1.
+        dilation (int|tuple): The dilation size. It means the spacing between the kernel points. 
+            If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
+            dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. 
+            Default: dilation = 1.
+        groups (int): The groups number of the Conv3d Layer. According to grouped
+            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+            the first half of the filters is only connected to the first half
+            of the input channels, while the second half of the filters is only
+            connected to the second half of the input channels. Default: groups=1
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True
+        act (str): Activation type, if it is set to None, activation is not appended.
+            Default: None.
+        data_format (str, optional): Specify the data format of the input, and the data format of the output 
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, input_height, input_width]`.
+        name(str|None): For detailed information, please refer 
+           to :ref:`api_guide_Name`. Usually name is no need to set and 
+           None by default.
+
+    Returns:
+        A Variable holding Tensor representing the conv3d, whose data type is 
+        the same with input. If act is None, the tensor variable storing the 
+        convolution result, and if act is not None, the tensor variable storing 
+        convolution and non-linearity activation result.
+
+    Raises:
+        ValueError: If the type of `use_cudnn` is not bool.
+        ValueError: If `data_format` is not "NCDHW" or "NDHWC".
+        ValueError: If the channel dimmention of the input is less than or equal to zero.
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
+            or the element corresponding to the input's channel is not 0.
+        ShapeError: If the input is not 5-D Tensor.
+        ShapeError: If the input's dimension size and filter's dimension size not equal.
+        ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+        ShapeError: If the number of input channels is not equal to filter's channels * groups.
+        ShapeError: If the number of output channels is not be divided by groups.
+
+    Examples:
+        .. code-block:: python
+
+            from paddle import fluid
+            import paddle.nn.functional as F
+            import paddle.fluid.dygraph as dg
+            import numpy as np
+
+            x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
+            w = np.random.randn(6, 3, 3, 3, 3).astype(np.float32)
+
+            place = fluid.CPUPlace()
+            with dg.guard(place):
+                x_var = dg.to_variable(x)
+                w_var = dg.to_variable(w)
+                y_var = F.conv3d(x_var, w_var, act="relu")
+                y_np = y_var.numpy()
+            print(y_np.shape)
+
+            # (2, 6, 6, 6, 6)
+    """
+    # entry check
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("Attr(use_cudnn) should be True or False. Received "
+                         "Attr(use_cudnn): {}. ".format(use_cudnn))
+
+    if data_format not in ["NCDHW", "NDHWC"]:
+        raise ValueError(
+            "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
+            "Attr(data_format): {}.".format(data_format))
+
+    channel_last = (data_format == "NDHWC")
+    channel_dim = -1 if channel_last else 1
+    num_channels = input.shape[channel_dim]
+    num_filters = weight.shape[0]
+    if num_channels < 0:
+        raise ValueError(
+            "The channel dimmention of the input({}) should be defined. "
+            "Received: {}.".format(input.shape, num_channels))
+    if num_channels % groups != 0:
+        raise ValueError(
+            "The number of input channels must be divisible by Attr(groups). "
+            "Received: number of channels({}), groups({}).".format(num_channels,
+                                                                   groups))
+    if num_filters % groups != 0:
+        raise ValueError(
+            "The number of filters must be divisible by Attr(groups). "
+            "Received: number of filters({}), groups({}).".format(num_filters,
+                                                                  groups))
+
+    padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
+    stride = utils.convert_to_list(stride, 3, 'stride')
+    dilation = utils.convert_to_list(dilation, 3, 'dilation')
+    op_type = "conv3d"
+
+    if in_dygraph_mode():
+        attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
+                 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
+                 "padding_algorithm", padding_algorithm, "data_format",
+                 data_format)
+        pre_bias = getattr(core.ops, op_type)(input, weight, *attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = dygraph_utils._append_activation_in_dygraph(
+            pre_act, act, use_cudnn=use_cudnn)
+    else:
+        inputs = {'Input': [input], 'Filter': [weight]}
+        attrs = {
+            'strides': stride,
+            'paddings': padding,
+            'dilations': dilation,
+            'groups': groups,
+            'use_cudnn': use_cudnn,
+            'use_mkldnn': False,
+            "padding_algorithm": padding_algorithm,
+            "data_format": data_format
+        }
+        helper = LayerHelper(op_type, **locals())
+        dtype = helper.input_dtype()
+        check_variable_and_dtype(input, 'input',
+                                 ['float16', 'float32', 'float64'], 'conv3d')
+
+        pre_bias = helper.create_variable_for_type_inference(dtype)
+        outputs = {"Output": [pre_bias]}
+
+        helper.append_op(
+            type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = helper.append_activation(pre_act)
+
+    return out
+
+
+def conv3d_transpose(input,
+                     weight,
+                     bias=None,
+                     output_size=None,
+                     padding=0,
+                     stride=1,
+                     dilation=1,
+                     groups=1,
+                     use_cudnn=True,
+                     act=None,
+                     data_format='NCDHW',
+                     name=None):
+    """
+    The convolution3D transpose layer calculates the output based on the input,
+    filter, and dilations, strides, paddings. Input(Input) and output(Output)
+    are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels,
+    D is the depth of the feature, H is the height of the feature, and W
+    is the width of the feature. Parameters(dilations, strides, paddings) are
+    two elements. These two elements represent height and width, respectively.
+    The details of convolution transpose layer, please refer to the following
+    explanation and references `therein <https://arxiv.org/pdf/1603.07285.pdf>`_.
+    If bias attribution and activation type are provided, bias is added to
+    the output of the convolution, and the corresponding activation function
+    is applied to the final result.
+
+    For each input :math:`X`, the equation is:
+
+    .. math::
+
+        Out = \sigma (W \\ast X + b)
+
+    In the above equation:
+
+    * :math:`X`: Input value, a Tensor with NCDHW or NDHWC format.
+    * :math:`W`: Filter value, a Tensor with MCDHW format.
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+    Example:
+
+        - Input:
+
+          Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
+
+          Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
+
+        - Output:
+
+          Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
+
+        Where
+
+        .. math::
+
+           D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
+           H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
+           W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\\\
+           D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\\\
+           H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\\\
+           W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[2] ]
+
+    Note:
+          The conv3d_transpose can be seen as the backward of the conv3d. For conv3d, 
+          when stride > 1, conv3d maps multiple input shape to the same output shape, 
+          so for conv3d_transpose, when stride > 1, input shape maps multiple output shape.
+          If output_size is None, :math:`H_{out} = H^\prime_{out}, :math:`H_{out} = \
+          H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`D_{out}` of the output 
+          size must between :math:`D^\prime_{out}` and :math:`D^\prime_{out} + strides[0]`, 
+          the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}` 
+          and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must 
+          between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`, 
+          conv3d_transpose can compute the kernel size automatically.
+
+    Args:
+        input(Variable): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type 
+            of input is float32 or float64.
+        weight (Variable): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW],
+            where M is the number of filters(output channels), g is the number of groups,
+            kD, kH, kW are the filter's depth, height and width respectively.
+        bias (Variable, optional): The bias, a Tensor of shape [M, ].
+        output_size(int|tuple, optional): The output image size. If output size is a
+            tuple, it must contain three integers, (image_depth, image_height, image_width). This
+            parameter only works when filter_size is None. If output_size and filter_size are 
+            specified at the same time, They should follow the formula above. Default: None. 
+            Output_size and filter_size should not be None at the same time.
+        padding(int|list|str|tuple, optional): The padding size. The padding argument effectively
+             adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string,
+             either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding`
+             is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or
+            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
+            and when `data_format` is `'NCDHW'`, `padding` can be in the form
+            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+            when `data_format` is `'NDHWC'`, `padding` can be in the form
+            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+            Default: padding = 0.
+        stride(int|tuple, optional): The stride size. It means the stride in transposed convolution. 
+            If stride is a tuple, it must contain three integers, (stride_depth, stride_height, 
+            stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. 
+            Default: stride = 1.
+        dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points. 
+            If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, 
+            dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. 
+            Default: dilation = 1.
+        groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by
+            grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
+            when group=2, the first half of the filters is only connected to the
+            first half of the input channels, while the second half of the
+            filters is only connected to the second half of the input channels.
+            Default: groups=1
+        use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True
+        act (str, optional): Activation type, if it is set to None, activation is not appended.
+            Default: None.
+        data_format (str, optional): Specify the data format of the input, and the data format of the output 
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, input_height, input_width]`.
+        name(str, optional): For detailed information, please refer 
+           to :ref:`api_guide_Name`. Usually name is no need to set and 
+           None by default.
+
+    Returns:
+        A Variable holding Tensor representing the conv3d_transpose, whose data 
+        type is the same with input and shape is (num_batches, channels, out_d, out_h, 
+        out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor 
+        variable storing the transposed convolution result, and if act is not None, the tensor 
+        variable storing transposed convolution and non-linearity activation result.
+
+    Raises:
+        ValueError: If the type of `use_cudnn` is not bool.
+        ValueError: If `data_format` is not "NCDHW" or "NDHWC".
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
+            or the element corresponding to the input's channel is not 0.
+        ValueError: If `output_size` and filter_size are None at the same time.
+        ShapeError: If the input is not 5-D Tensor.
+        ShapeError: If the input's dimension size and filter's dimension size not equal.
+        ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+        ShapeError: If the number of input channels is not equal to filter's channels.
+        ShapeError: If the size of `output_size` is not equal to that of `stride`.
+
+    Examples:
+       .. code-block:: python
+
+          from paddle import fluid
+          import paddle.nn.functional as F
+          import paddle.fluid.dygraph as dg
+          import numpy as np
+
+          x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
+          w = np.random.randn(3, 6, 3, 3, 3).astype(np.float32)
+
+          place = fluid.CPUPlace()
+          with dg.guard(place):
+              x_var = dg.to_variable(x)
+              w_var = dg.to_variable(w)
+              y_var = F.conv3d_transpose(x_var, w_var, act="relu")
+              y_np = y_var.numpy()
+          print(y_np.shape)
+
+          # (2, 6, 10, 10, 10)
+    """
+    # entry checks
+    if not isinstance(use_cudnn, bool):
+        raise ValueError("Attr(use_cudnn) should be True or False. "
+                         "Received Attr(use_cudnn): {}.".format(use_cudnn))
+    if data_format not in ["NCDHW", "NDHWC"]:
+        raise ValueError(
+            "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
+            "Attr(data_format): {}.".format(data_format))
+
+    channel_last = (data_format == "NDHWC")
+    channel_dim = -1 if channel_last else 1
+    num_channels = input.shape[channel_dim]
+    num_filters = weight.shape[1]
+    if num_channels < 0:
+        raise ValueError(
+            "The channel dimmention of the input({}) should be defined. "
+            "Received: {}.".format(input.shape, num_channels))
+    if num_channels % groups != 0:
+        raise ValueError(
+            "The number of input channels must be divisible by Attr(groups). "
+            "Received: number of channels({}), groups({}).".format(num_channels,
+                                                                   groups))
+
+    padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
+    stride = utils.convert_to_list(stride, 3, 'stride')
+    dilation = utils.convert_to_list(dilation, 3, 'dilation')
+    if output_size is None:
+        output_size = []
+    elif isinstance(output_size, (list, tuple, int)):
+        output_size = utils.convert_to_list(output_size, 3, 'output_size')
+    else:
+        raise ValueError("output_size should be int, or list, tuple of ints")
+
+    op_type = 'conv3d_transpose'
+    data_format_ = "NHWC" if channel_last else "NCHW"
+
+    if in_dygraph_mode():
+        attrs = ('output_size', output_size, 'paddings', padding,
+                 "padding_algorithm", padding_algorithm, 'strides', stride,
+                 'dilations', dilation, 'groups', groups, 'use_cudnn',
+                 use_cudnn, "data_format", data_format_)
+        pre_bias = getattr(core.ops, op_type)(input, weight, *attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = dygraph_utils._append_activation_in_dygraph(
+            pre_act, act, use_cudnn=use_cudnn)
+    else:
+        inputs = {'Input': [input], 'Filter': [weight]}
+        attrs = {
+            'output_size': output_size,
+            'paddings': padding,
+            "padding_algorithm": padding_algorithm,
+            'strides': stride,
+            'dilations': dilation,
+            'groups': groups,
+            'use_cudnn': use_cudnn,
+            "data_format": data_format_
+        }
+        helper = LayerHelper(op_type, **locals())
+        dtype = helper.input_dtype()
+        check_variable_and_dtype(input, 'input',
+                                 ['float16', 'float32', 'float64'], 'conv3d')
+
+        pre_bias = helper.create_variable_for_type_inference(dtype)
+        outputs = {"Output": [pre_bias]}
+
+        helper.append_op(
+            type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
+        if bias is not None:
+            pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+        else:
+            pre_act = pre_bias
+        out = helper.append_activation(pre_act)
+
+    return out
diff --git a/python/setup.py.in b/python/setup.py.in
index cdecd1189d..d70e93dc15 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -146,7 +146,10 @@ packages=['paddle',
           'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler',
           'paddle.fluid.incubate.fleet.parameter_server.pslib',
           'paddle.fluid.incubate.fleet.collective',
-          'paddle.fluid.incubate.fleet.utils']
+          'paddle.fluid.incubate.fleet.utils',
+          'paddle.nn',
+          'paddle.nn.functional',
+          'paddle.nn.layer']
 
 with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
     setup_requires = f.read().splitlines()