diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc
index 51d2d80206..c6c1af0e36 100644
--- a/paddle/fluid/operators/conv_transpose_op.cc
+++ b/paddle/fluid/operators/conv_transpose_op.cc
@@ -109,14 +109,30 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
const int offset = (data_layout != DataLayout::kNHWC ? 2 : 1);
for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
- auto infer_shape = (in_dims[i + offset] - 1) * strides[i] -
- paddings[2 * i] - paddings[2 * i + 1] + filter_extent;
+ auto infer_shape = (ctx->IsRuntime() || in_dims[i + offset] > 0)
+ ? (in_dims[i + offset] - 1) * strides[i] -
+ paddings[2 * i] - paddings[2 * i + 1] +
+ filter_extent
+ : -1;
if (output_size.size()) {
- PADDLE_ENFORCE_EQ((output_size[i] >= infer_shape &&
- output_size[i] < infer_shape + strides[i]),
- true,
- "output_size of Op(ConvTransposeOp) should be "
- "in appropriate range.");
+ if (ctx->IsRuntime()) {
+ PADDLE_ENFORCE_GE(
+ output_size[i], infer_shape,
+ platform::errors::InvalidArgument(
+ "output_size of Op(ConvTransposeOp) should not be "
+ "less than the infered output size. But received output_size = "
+ "[%s], whose dim %d is less than the infered output size [%s]",
+ framework::make_ddim(output_size), i, infer_shape));
+ PADDLE_ENFORCE_LT(
+ output_size[i], infer_shape + strides[i],
+ platform::errors::InvalidArgument(
+ "output_size of Op(ConvTransposeOp) should be less "
+ "than infered size + stride. But received output_size = [%s], "
+ "whose dim %d is not less than the infered output size (%d) + "
+ "stride (%d) = %d",
+ framework::make_ddim(output_size), i, infer_shape, strides[i],
+ infer_shape + strides[i]));
+ }
output_shape.push_back(output_size[i]);
} else {
output_shape.push_back(infer_shape);
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 3f85f89a52..3a84184f80 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -3857,10 +3857,10 @@ def conv2d_transpose(input,
if output_size is None:
output_size = []
- elif isinstance(output_size, list) or isinstance(output_size, int):
+ elif isinstance(output_size, (list, tuple, int)):
output_size = utils.convert_to_list(output_size, 2, 'output_size')
else:
- raise ValueError("output_size should be list or int")
+ raise ValueError("output_size should be int, list[int] or tuple[int]")
groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters // groups] + filter_size
@@ -4129,7 +4129,7 @@ def conv3d_transpose(input,
if output_size is None:
raise ValueError("output_size must be set when filter_size is None")
if isinstance(output_size, int):
- output_size = [output_size, output_size]
+ output_size = [output_size, output_size, output_size]
d_in = input.shape[2] if data_format == 'NCDHW' else input.shape[1]
h_in = input.shape[3] if data_format == 'NCDHW' else input.shape[2]
@@ -4149,6 +4149,13 @@ def conv3d_transpose(input,
if len(padding) == 6 and utils._is_symmetric_padding(padding, 3):
padding = [padding[0], padding[2], padding[4]]
+ if output_size is None:
+ output_size = []
+ elif isinstance(output_size, (list, tuple, int)):
+ output_size = utils.convert_to_list(output_size, 3, 'output_size')
+ else:
+ raise ValueError("output_size should be int, list[int] or tuple[int]")
+
groups = 1 if groups is None else groups
filter_shape = [input_channel, num_filters // groups] + filter_size
img_filter = helper.create_parameter(
@@ -4166,6 +4173,7 @@ def conv3d_transpose(input,
'Filter': [img_filter]},
outputs={'Output': pre_bias},
attrs={
+ 'output_size': output_size,
'strides': stride,
'paddings': padding,
'padding_algorithm': padding_algorithm,
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
new file mode 100644
index 0000000000..c43454eaae
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
@@ -0,0 +1,462 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv2D(TestCase):
+ batch_size = 4
+ spatial_shape = (16, 16)
+ dtype = "float32"
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 2
+ else:
+ filter_shape = tuple(self.filter_shape)
+
+ self.weight = np.random.uniform(
+ -1, 1, (self.out_channels, self.in_channels // self.groups
+ ) + filter_shape).astype(self.dtype)
+ if not self.no_bias:
+ self.bias = np.random.uniform(-1, 1, (
+ self.out_channels, )).astype(self.dtype)
+
+ self.channel_last = (self.data_format == "NHWC")
+ if self.channel_last:
+ self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+ self.in_channels, )
+ else:
+ self.input_shape = (self.batch_size, self.in_channels
+ ) + self.spatial_shape
+
+ self.input = np.random.uniform(-1, 1,
+ self.input_shape).astype(self.dtype)
+
+ def static_graph_case_1(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = fluid.data(
+ "input", (-1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1),
+ dtype=self.dtype)
+ y = fluid.layers.conv2d(
+ x,
+ self.out_channels,
+ self.filter_shape,
+ stride=self.stride,
+ padding=self.padding,
+ dilation=self.dilation,
+ groups=self.groups,
+ param_attr=I.NumpyArrayInitializer(self.weight),
+ bias_attr=False
+ if self.no_bias else I.NumpyArrayInitializer(self.bias),
+ use_cudnn=self.use_cudnn,
+ act=self.act,
+ data_format=self.data_format)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+ return out
+
+ def static_graph_case_2(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight.shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+ y = F.conv2d(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ feed_dict = {"input": self.input, "weight": self.weight}
+ if not self.no_bias:
+ feed_dict["bias"] = self.bias
+ out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+ return out
+
+ def dygraph_case(self):
+ with dg.guard(self.place):
+ x = dg.to_variable(self.input)
+ weight = dg.to_variable(self.weight)
+ bias = None if self.no_bias else dg.to_variable(self.bias)
+ y = F.conv2d(
+ x,
+ weight,
+ bias,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ act=self.act,
+ groups=self.groups,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ out = y.numpy()
+ return out
+
+ def _test_identity(self):
+ self.prepare()
+ out1 = self.static_graph_case_1()
+ out2 = self.static_graph_case_2()
+ out3 = self.dygraph_case()
+ np.testing.assert_array_almost_equal(out1, out2)
+ np.testing.assert_array_almost_equal(out2, out3)
+
+ def test_identity_cpu(self):
+ self.place = fluid.CPUPlace()
+ self._test_identity()
+
+ @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+ "core is not compiled with CUDA")
+ def test_identity_gpu(self):
+ self.place = fluid.CUDAPlace(0)
+ self._test_identity()
+
+
+class TestFunctionalConv2DError(TestCase):
+ batch_size = 4
+ spatial_shape = (16, 16)
+ dtype = "float32"
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "not_valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+ def test_exception(self):
+ self.prepare()
+ with self.assertRaises(ValueError):
+ self.static_graph_case()
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 2
+ else:
+ filter_shape = tuple(self.filter_shape)
+ self.weight_shape = (self.out_channels, self.in_channels // self.groups
+ ) + filter_shape
+ self.bias_shape = (self.out_channels, )
+
+ def static_graph_case(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ self.channel_last = self.data_format == "NHWC"
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight_shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+ y = F.conv2d(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv2DCase2(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase3(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 3, 1]
+ self.stride = 2
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase4(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 1, 2, 2]
+ self.stride = 1
+ self.dilation = 2
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase5(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 1], [2, 2], [0, 0]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase6(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [1, 1], [2, 2]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase7(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 6
+ self.out_channels = 8
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase8(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 6
+ self.out_channels = 12
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 6
+ self.no_bias = True
+ self.act = None
+ self.use_cudnn = False
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 2], [3, 4], [5, 6]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "not_valid"
+
+
+class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 3
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = "not_valid"
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "not_valid"
+
+
+class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 1, 2, 1]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = -5
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [3, 2], [1, 2]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase10(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NHWC"
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
new file mode 100644
index 0000000000..21986f1b98
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
@@ -0,0 +1,530 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv2D(TestCase):
+ batch_size = 4
+ spatial_shape = (16, 16)
+ dtype = "float32"
+ output_size = None
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 2
+ else:
+ filter_shape = tuple(self.filter_shape)
+
+ self.weight = np.random.uniform(
+ -1, 1, (self.in_channels, self.out_channels // self.groups
+ ) + filter_shape).astype(self.dtype)
+ if not self.no_bias:
+ self.bias = np.random.uniform(-1, 1, (
+ self.out_channels, )).astype(self.dtype)
+
+ self.channel_last = (self.data_format == "NHWC")
+ if self.channel_last:
+ self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+ self.in_channels, )
+ else:
+ self.input_shape = (self.batch_size, self.in_channels
+ ) + self.spatial_shape
+
+ self.input = np.random.uniform(-1, 1,
+ self.input_shape).astype(self.dtype)
+
+ def static_graph_case_1(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = fluid.data(
+ "input", (-1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1),
+ dtype=self.dtype)
+ y = fluid.layers.conv2d_transpose(
+ x,
+ self.out_channels,
+ output_size=self.output_size,
+ filter_size=self.filter_shape,
+ stride=self.stride,
+ padding=self.padding,
+ dilation=self.dilation,
+ groups=self.groups,
+ param_attr=I.NumpyArrayInitializer(self.weight),
+ bias_attr=False
+ if self.no_bias else I.NumpyArrayInitializer(self.bias),
+ use_cudnn=self.use_cudnn,
+ act=self.act,
+ data_format=self.data_format)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+ return out
+
+ def static_graph_case_2(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight.shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+ y = F.conv2d_transpose(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ output_size=self.output_size,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ feed_dict = {"input": self.input, "weight": self.weight}
+ if not self.no_bias:
+ feed_dict["bias"] = self.bias
+ out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+ return out
+
+ def dygraph_case(self):
+ with dg.guard(self.place):
+ x = dg.to_variable(self.input)
+ weight = dg.to_variable(self.weight)
+ bias = None if self.no_bias else dg.to_variable(self.bias)
+ y = F.conv2d_transpose(
+ x,
+ weight,
+ bias,
+ output_size=self.output_size,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ act=self.act,
+ groups=self.groups,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ out = y.numpy()
+ return out
+
+ def _test_identity(self):
+ self.prepare()
+ out1 = self.static_graph_case_1()
+ out2 = self.static_graph_case_2()
+ out3 = self.dygraph_case()
+ np.testing.assert_array_almost_equal(out1, out2)
+ np.testing.assert_array_almost_equal(out2, out3)
+
+ def test_identity_cpu(self):
+ self.place = fluid.CPUPlace()
+ self._test_identity()
+
+ @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+ "core is not compiled with CUDA")
+ def test_identity_gpu(self):
+ self.place = fluid.CUDAPlace(0)
+ self._test_identity()
+
+
+class TestFunctionalConv2DError(TestCase):
+ batch_size = 4
+ spatial_shape = (16, 16)
+ dtype = "float32"
+ output_size = None
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "not_valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+ def test_exception(self):
+ self.prepare()
+ with self.assertRaises(ValueError):
+ self.static_graph_case()
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 2
+ else:
+ filter_shape = tuple(self.filter_shape)
+ self.weight_shape = (self.in_channels, self.out_channels // self.groups
+ ) + filter_shape
+ self.bias_shape = (self.out_channels, )
+
+ def static_graph_case(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ self.channel_last = self.data_format == "NHWC"
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight_shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+ y = F.conv2d_transpose(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ output_size=self.output_size,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv2DCase2(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase3(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = True
+ self.act = None
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase4(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase5(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase6(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.stride = (1, 2)
+ self.dilation = (2, 1)
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase7(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.stride = (1, 2)
+ self.dilation = 1
+ self.groups = 4
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase8(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.output_size = [18, 34]
+ self.stride = (1, 2)
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase9(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 2], [2, 1], [0, 0]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DCase10(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [1, 1], [2, 2]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase11(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [1, 1, 2, 2]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DCase12(TestFunctionalConv2D):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [1, 2]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase2(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 2, 1, 3]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DErrorCase3(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [1, 2], [2, 1]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NHWC"
+
+
+class TestFunctionalConv2DErrorCase4(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 2], [0, 0], [2, 1]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase5(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = -2
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase6(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = "not_valid"
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase7(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.output_size = "not_valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+class TestFunctionalConv2DErrorCase8(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "not_valid"
+
+
+class TestFunctionalConv2DErrorCase9(TestFunctionalConv2DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCHW"
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
new file mode 100644
index 0000000000..195e3812f9
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py
@@ -0,0 +1,462 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv3D(TestCase):
+ batch_size = 4
+ spatial_shape = (8, 8, 8)
+ dtype = "float32"
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 3
+ else:
+ filter_shape = tuple(self.filter_shape)
+
+ self.weight = np.random.uniform(
+ -1, 1, (self.out_channels, self.in_channels // self.groups
+ ) + filter_shape).astype(self.dtype)
+ if not self.no_bias:
+ self.bias = np.random.uniform(-1, 1, (
+ self.out_channels, )).astype(self.dtype)
+
+ self.channel_last = (self.data_format == "NDHWC")
+ if self.channel_last:
+ self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+ self.in_channels, )
+ else:
+ self.input_shape = (self.batch_size, self.in_channels
+ ) + self.spatial_shape
+
+ self.input = np.random.uniform(-1, 1,
+ self.input_shape).astype(self.dtype)
+
+ def static_graph_case_1(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = fluid.data(
+ "input", (-1, -1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1, -1),
+ dtype=self.dtype)
+ y = fluid.layers.conv3d(
+ x,
+ self.out_channels,
+ self.filter_shape,
+ stride=self.stride,
+ padding=self.padding,
+ dilation=self.dilation,
+ groups=self.groups,
+ param_attr=I.NumpyArrayInitializer(self.weight),
+ bias_attr=False
+ if self.no_bias else I.NumpyArrayInitializer(self.bias),
+ use_cudnn=self.use_cudnn,
+ act=self.act,
+ data_format=self.data_format)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+ return out
+
+ def static_graph_case_2(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight.shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+ y = F.conv3d(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ feed_dict = {"input": self.input, "weight": self.weight}
+ if not self.no_bias:
+ feed_dict["bias"] = self.bias
+ out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+ return out
+
+ def dygraph_case(self):
+ with dg.guard(self.place):
+ x = dg.to_variable(self.input)
+ weight = dg.to_variable(self.weight)
+ bias = None if self.no_bias else dg.to_variable(self.bias)
+ y = F.conv3d(
+ x,
+ weight,
+ bias,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ act=self.act,
+ groups=self.groups,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ out = y.numpy()
+ return out
+
+ def _test_identity(self):
+ self.prepare()
+ out1 = self.static_graph_case_1()
+ out2 = self.static_graph_case_2()
+ out3 = self.dygraph_case()
+ np.testing.assert_array_almost_equal(out1, out2)
+ np.testing.assert_array_almost_equal(out2, out3)
+
+ def test_identity_cpu(self):
+ self.place = fluid.CPUPlace()
+ self._test_identity()
+
+ @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+ "core is not compiled with CUDA")
+ def test_identity_gpu(self):
+ self.place = fluid.CUDAPlace(0)
+ self._test_identity()
+
+
+class TestFunctionalConv3DError(TestCase):
+ batch_size = 4
+ spatial_shape = (8, 8, 8)
+ dtype = "float32"
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "not_valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+ def test_exception(self):
+ self.prepare()
+ with self.assertRaises(ValueError):
+ self.static_graph_case()
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 3
+ else:
+ filter_shape = tuple(self.filter_shape)
+ self.weight_shape = (self.out_channels, self.in_channels // self.groups
+ ) + filter_shape
+ self.bias_shape = (self.out_channels, )
+
+ def static_graph_case(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ self.channel_last = self.data_format == "NDHWC"
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight_shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+ y = F.conv3d(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv3DCase2(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 1]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase3(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 3, 1, 2, 3]
+ self.stride = 2
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase4(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 1, 2, 2, 3, 3]
+ self.stride = 1
+ self.dilation = 2
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase5(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 1], [2, 2], [1, 1], [0, 0]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DCase6(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [1, 1], [2, 2], [2, 2]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DCase7(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 6
+ self.out_channels = 8
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DCase8(TestFunctionalConv3D):
+ def setUp(self):
+ self.in_channels = 6
+ self.out_channels = 12
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 6
+ self.no_bias = True
+ self.act = None
+ self.use_cudnn = False
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase2(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 1], [1, 2], [3, 4], [5, 6]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase3(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "not_valid"
+
+
+class TestFunctionalConv3DErrorCase4(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 3
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase6(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = "not_valid"
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase7(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "not_valid"
+
+
+class TestFunctionalConv3DErrorCase8(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 1, 2, 1]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase9(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = -5
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [3, 2], [1, 2], [1, 1]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DErrorCase10(TestFunctionalConv3DError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NDHWC"
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
new file mode 100644
index 0000000000..f8e7818315
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py
@@ -0,0 +1,523 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn.functional as F
+from paddle import fluid
+import paddle.fluid.dygraph as dg
+import paddle.fluid.initializer as I
+import numpy as np
+import unittest
+from unittest import TestCase
+
+
+class TestFunctionalConv3DTranspose(TestCase):
+ batch_size = 4
+ spatial_shape = (8, 8, 8)
+ dtype = "float32"
+ output_size = None
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 3
+ else:
+ filter_shape = tuple(self.filter_shape)
+
+ self.weight = np.random.uniform(
+ -1, 1, (self.in_channels, self.out_channels // self.groups
+ ) + filter_shape).astype(self.dtype)
+ if not self.no_bias:
+ self.bias = np.random.uniform(-1, 1, (
+ self.out_channels, )).astype(self.dtype)
+
+ self.channel_last = (self.data_format == "NDHWC")
+ if self.channel_last:
+ self.input_shape = (self.batch_size, ) + self.spatial_shape + (
+ self.in_channels, )
+ else:
+ self.input_shape = (self.batch_size, self.in_channels
+ ) + self.spatial_shape
+
+ self.input = np.random.uniform(-1, 1,
+ self.input_shape).astype(self.dtype)
+
+ def static_graph_case_1(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = fluid.data(
+ "input", (-1, -1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1, -1),
+ dtype=self.dtype)
+ y = fluid.layers.conv3d_transpose(
+ x,
+ self.out_channels,
+ output_size=self.output_size,
+ filter_size=self.filter_shape,
+ stride=self.stride,
+ padding=self.padding,
+ dilation=self.dilation,
+ groups=self.groups,
+ param_attr=I.NumpyArrayInitializer(self.weight),
+ bias_attr=False
+ if self.no_bias else I.NumpyArrayInitializer(self.bias),
+ use_cudnn=self.use_cudnn,
+ act=self.act,
+ data_format=self.data_format)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ out, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
+ return out
+
+ def static_graph_case_2(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight.shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias.shape, dtype=self.dtype)
+ y = F.conv3d_transpose(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ output_size=self.output_size,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ exe = fluid.Executor(self.place)
+ exe.run(start)
+ feed_dict = {"input": self.input, "weight": self.weight}
+ if not self.no_bias:
+ feed_dict["bias"] = self.bias
+ out, = exe.run(main, feed=feed_dict, fetch_list=[y])
+ return out
+
+ def dygraph_case(self):
+ with dg.guard(self.place):
+ x = dg.to_variable(self.input)
+ weight = dg.to_variable(self.weight)
+ bias = None if self.no_bias else dg.to_variable(self.bias)
+ y = F.conv3d_transpose(
+ x,
+ weight,
+ bias,
+ output_size=self.output_size,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ act=self.act,
+ groups=self.groups,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+ out = y.numpy()
+ return out
+
+ def _test_identity(self):
+ self.prepare()
+ out1 = self.static_graph_case_1()
+ out2 = self.static_graph_case_2()
+ out3 = self.dygraph_case()
+ np.testing.assert_array_almost_equal(out1, out2)
+ np.testing.assert_array_almost_equal(out2, out3)
+
+ def test_identity_cpu(self):
+ self.place = fluid.CPUPlace()
+ self._test_identity()
+
+ @unittest.skipIf(not fluid.core.is_compiled_with_cuda(),
+ "core is not compiled with CUDA")
+ def test_identity_gpu(self):
+ self.place = fluid.CUDAPlace(0)
+ self._test_identity()
+
+
+class TestFunctionalConv3DTransposeError(TestCase):
+ batch_size = 4
+ spatial_shape = (8, 8, 8)
+ dtype = "float32"
+ output_size = None
+
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = "not_valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+ def test_exception(self):
+ self.prepare()
+ with self.assertRaises(ValueError):
+ self.static_graph_case()
+
+ def prepare(self):
+ if isinstance(self.filter_shape, int):
+ filter_shape = (self.filter_shape, ) * 3
+ else:
+ filter_shape = tuple(self.filter_shape)
+ self.weight_shape = (self.in_channels, self.out_channels // self.groups
+ ) + filter_shape
+ self.bias_shape = (self.out_channels, )
+
+ def static_graph_case(self):
+ main = fluid.Program()
+ start = fluid.Program()
+ with fluid.unique_name.guard():
+ with fluid.program_guard(main, start):
+ self.channel_last = self.data_format == "NDHWC"
+ if self.channel_last:
+ x = x = fluid.data(
+ "input", (-1, -1, -1, -1, self.in_channels),
+ dtype=self.dtype)
+ else:
+ x = fluid.data(
+ "input", (-1, self.in_channels, -1, -1, -1),
+ dtype=self.dtype)
+ weight = fluid.data(
+ "weight", self.weight_shape, dtype=self.dtype)
+ if not self.no_bias:
+ bias = fluid.data("bias", self.bias_shape, dtype=self.dtype)
+ y = F.conv3d_transpose(
+ x,
+ weight,
+ None if self.no_bias else bias,
+ output_size=self.output_size,
+ padding=self.padding,
+ stride=self.stride,
+ dilation=self.dilation,
+ groups=self.groups,
+ act=self.act,
+ data_format=self.data_format,
+ use_cudnn=self.use_cudnn)
+
+
+class TestFunctionalConv3DTransposeCase2(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase3(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase4(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = "same"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = True
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase5(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.stride = (1, 2, 1)
+ self.dilation = (2, 1, 1)
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase6(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.stride = (1, 2, 1)
+ self.dilation = 1
+ self.groups = 4
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = False
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase7(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = "valid"
+ self.output_size = (10, 17, 10)
+ self.stride = (1, 2, 1)
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase8(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 2], [1, 2], [2, 1], [0, 0]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeCase9(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [1, 1], [1, 1], [2, 2]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase10(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [1, 1, 2, 2, 1, 1]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeCase11(TestFunctionalConv3DTranspose):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 6
+ self.filter_shape = 3
+ self.padding = [1, 2, 1]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase2(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [1, 2, 2, 1, 3]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeErrorCase3(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [0, 0], [1, 1], [1, 2], [2, 1]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NDHWC"
+
+
+class TestFunctionalConv3DTransposeErrorCase4(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = [[0, 0], [1, 2], [1, 1], [0, 0], [2, 1]]
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase5(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = -2
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase6(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = "not_valid"
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase7(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.output_size = "not_valid"
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+class TestFunctionalConv3DTransposeErrorCase8(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 4
+ self.out_channels = 5
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 1
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "not_valid"
+
+
+class TestFunctionalConv3DTransposeErrorCase9(
+ TestFunctionalConv3DTransposeError):
+ def setUp(self):
+ self.in_channels = 3
+ self.out_channels = 4
+ self.filter_shape = 3
+ self.padding = 0
+ self.stride = 1
+ self.dilation = 1
+ self.groups = 2
+ self.no_bias = False
+ self.act = "sigmoid"
+ self.use_cudnn = True
+ self.data_format = "NCDHW"
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index 4e6bfded78..3fd7da1ec1 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# TODO: import all neural network related api under this directory,
+# TODO: import all neural network related api under this directory,
# including layers, linear, conv, rnn etc.
__all__ = []
@@ -85,10 +85,10 @@ from .layer import loss #DEFINE_ALIAS
# from .layer.common import Embedding #DEFINE_ALIAS
# from .layer.common import Linear #DEFINE_ALIAS
# from .layer.common import UpSample #DEFINE_ALIAS
-# from .functional.conv import conv2d #DEFINE_ALIAS
-# from .functional.conv import conv2d_transpose #DEFINE_ALIAS
-# from .functional.conv import conv3d #DEFINE_ALIAS
-# from .functional.conv import conv3d_transpose #DEFINE_ALIAS
+from .functional.conv import conv2d #DEFINE_ALIAS
+from .functional.conv import conv2d_transpose #DEFINE_ALIAS
+from .functional.conv import conv3d #DEFINE_ALIAS
+from .functional.conv import conv3d_transpose #DEFINE_ALIAS
# from .functional.loss import bpr_loss #DEFINE_ALIAS
# from .functional.loss import center_loss #DEFINE_ALIAS
# from .functional.loss import cross_entropy #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
index a3b3411333..9e517726eb 100644
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# TODO: import all neural network related api under this directory,
+# TODO: import all neural network related api under this directory,
# including layers, linear, conv, rnn etc.
# __all__ = [ ]
# TODO: define alias in functional directory
-# from .conv import conv2d #DEFINE_ALIAS
-# from .conv import conv2d_transpose #DEFINE_ALIAS
-# from .conv import conv3d #DEFINE_ALIAS
-# from .conv import conv3d_transpose #DEFINE_ALIAS
+from .conv import conv2d #DEFINE_ALIAS
+from .conv import conv2d_transpose #DEFINE_ALIAS
+from .conv import conv3d #DEFINE_ALIAS
+from .conv import conv3d_transpose #DEFINE_ALIAS
# from .loss import bpr_loss #DEFINE_ALIAS
# from .loss import center_loss #DEFINE_ALIAS
# from .loss import cross_entropy #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index 199a72aa0a..6b37c1c68b 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -11,9 +11,1005 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+from __future__ import print_function
+__all__ = ['conv2d', 'conv2d_transpose', 'conv3d', 'conv3d_transpose']
-# TODO: define functions of convolutional neural network
-# __all__ = ['conv2d',
-# 'conv2d_transpose',
-# 'conv3d',
-# 'conv3d_transpose']
+import numpy as np
+from ...fluid.framework import Variable, in_dygraph_mode
+from ...fluid import core, dygraph_utils
+from ...fluid.layers import nn, utils
+from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.param_attr import ParamAttr
+from ...fluid.layer_helper import LayerHelper
+
+
+def _is_list_or_tuple(input):
+ return isinstance(input, (list, tuple))
+
+
+def _zero_padding_in_batch_and_channel(padding, channel_last):
+ if channel_last:
+ return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
+ else:
+ return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
+
+
+def _exclude_padding_in_batch_and_channel(padding, channel_last):
+ padding_ = padding[1:-1] if channel_last else padding[2:]
+ padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
+ return padding_
+
+
+def _update_padding_nd(padding, channel_last, num_dims):
+ if isinstance(padding, str):
+ padding = padding.upper()
+ if padding not in ["SAME", "VALID"]:
+ raise ValueError(
+ "Unknown padding: '{}'. It can only be 'SAME' or 'VALID'.".
+ format(padding))
+ if padding == "VALID":
+ padding_algorithm = "VALID"
+ padding = [0] * num_dims
+ else:
+ padding_algorithm = "SAME"
+ padding = [0] * num_dims
+ elif _is_list_or_tuple(padding):
+ # for padding like
+ # [(pad_before, pad_after), (pad_before, pad_after), ...]
+ # padding for batch_dim and channel_dim included
+ if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
+ if not _zero_padding_in_batch_and_channel(padding, channel_last):
+ raise ValueError(
+ "Non-zero padding({}) in the batch or channel dimensions "
+ "is not supported.".format(padding))
+ padding_algorithm = "EXPLICIT"
+ padding = _exclude_padding_in_batch_and_channel(padding,
+ channel_last)
+ if utils._is_symmetric_padding(padding, num_dims):
+ padding = padding[0::2]
+ # for padding like [pad_before, pad_after, pad_before, pad_after, ...]
+ elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
+ padding_algorithm = "EXPLICIT"
+ padding = utils.convert_to_list(padding, 2 * num_dims, 'padding')
+ if utils._is_symmetric_padding(padding, num_dims):
+ padding = padding[0::2]
+ # for padding like [pad_d1, pad_d2, ...]
+ elif len(padding) == num_dims and isinstance(padding[0], int):
+ padding_algorithm = "EXPLICIT"
+ padding = utils.convert_to_list(padding, num_dims, 'padding')
+ else:
+ raise ValueError("In valid padding: {}".format(padding))
+ # for integer padding
+ else:
+ padding_algorithm = "EXPLICIT"
+ padding = utils.convert_to_list(padding, num_dims, 'padding')
+ return padding, padding_algorithm
+
+
+def conv2d(input,
+ weight,
+ bias=None,
+ padding=0,
+ stride=1,
+ dilation=1,
+ groups=1,
+ use_cudnn=True,
+ act=None,
+ data_format="NCHW",
+ name=None):
+ """
+ The convolution2D layer calculates the output based on the input, filter
+ and strides, paddings, dilations, groups parameters. Input and
+ Output are in NCHW or NHWC format, where N is batch size, C is the number of
+ channels, H is the height of the feature, and W is the width of the feature.
+ Filter is in MCHW format, where M is the number of output image channels,
+ C is the number of input image channels, H is the height of the filter,
+ and W is the width of the filter. If the groups is greater than 1,
+ C will equal the number of input image channels divided by the groups.
+ Please refer to UFLDL's `convolution
+ `_
+ for more details.
+ If bias attribution and activation type are provided, bias is added to the
+ output of the convolution, and the corresponding activation function is
+ applied to the final result.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = \sigma (W \\ast X + b)
+
+ Where:
+
+ * :math:`X`: Input value, a tensor with NCHW or NHWC format.
+ * :math:`W`: Filter value, a tensor with MCHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+ Example:
+
+ - Input:
+
+ Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
+
+ Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
+
+ - Output:
+
+ Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
+
+ Where
+
+ .. math::
+
+ H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
+ W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
+
+ Args:
+ input (Variable): The input is 4-D Tensor with shape [N, C, H, W], the data type
+ of input is float16 or float32 or float64.
+ weight (Variable): The convolution kernel with shape [M, C/g, kH, kW], where M is
+ the number of output channels, g is the number of groups, kH is the filter's
+ height, kW is the filter's width.
+ bias (Variable, optional): The bias with shape [M,].
+ padding (string|int|list|tuple): The padding size. It means the number of zero-paddings
+ on both sides for each dimension.If `padding` is a string, either 'VALID' or
+ 'SAME' which is the padding algorithm. If padding size is a tuple or list,
+ it could be in three forms: `[pad_height, pad_width]` or
+ `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when
+ `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0],
+ [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+ when `data_format` is `"NHWC"`, `pool_padding` can be in the form
+ `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+ Default: padding = 0.
+ stride (int|tuple): The stride size. It means the stride in convolution.
+ If stride is a tuple, it must contain two integers, (stride_height, stride_width).
+ Otherwise, stride_height = stride_width = stride. Default: stride = 1.
+ dilation (int|tuple): The dilation size. It means the spacing between the kernel
+ points. If dilation is a tuple, it must contain two integers, (dilation_height,
+ dilation_width). Otherwise, dilation_height = dilation_width = dilation.
+ Default: dilation = 1.
+ groups (int): The groups number of the Conv2d Layer. According to grouped
+ convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+ the first half of the filters is only connected to the first half
+ of the input channels, while the second half of the filters is only
+ connected to the second half of the input channels. Default: groups=1.
+ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True
+ act (str): Activation type, if it is set to None, activation is not appended.
+ Default: None
+ data_format (str, optional): Specify the data format of the input, and the data format of the output
+ will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+ The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+ `[batch_size, input_channels, input_height, input_width]`.
+ name(str, optional): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
+
+ Returns:
+ A Variable holding Tensor representing the conv2d, whose data type is the
+ same with input. If act is None, the tensor variable storing the convolution
+ result, and if act is not None, the tensor variable storing convolution
+ and non-linearity activation result.
+
+ Raises:
+ ValueError: If the type of `use_cudnn` is not bool.
+ ValueError: If `data_format` is not "NCHW" or "NHWC".
+ ValueError: If the channel dimmention of the input is less than or equal to zero.
+ ValueError: If `padding` is a string, but not "SAME" or "VALID".
+ ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
+ or the element corresponding to the input's channel is not 0.
+ ShapeError: If the input is not 4-D Tensor.
+ ShapeError: If the input's dimension size and filter's dimension size not equal.
+ ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+ ShapeError: If the number of input channels is not equal to filter's channels * groups.
+ ShapeError: If the number of output channels is not be divided by groups.
+
+ Examples:
+ .. code-block:: python
+
+ from paddle import fluid
+ import paddle.nn.functional as F
+ import paddle.fluid.dygraph as dg
+ import numpy as np
+
+ x = np.random.randn(2, 3, 8, 8).astype(np.float32)
+ w = np.random.randn(6, 3, 3, 3).astype(np.float32)
+
+ place = fluid.CPUPlace()
+ with dg.guard(place):
+ x_var = dg.to_variable(x)
+ w_var = dg.to_variable(w)
+ y_var = F.conv2d(x_var, w_var, act="relu")
+ y_np = y_var.numpy()
+ print(y_np.shape)
+
+ # (2, 6, 6, 6)
+ """
+ # entry checks
+ if not isinstance(use_cudnn, bool):
+ raise ValueError("Attr(use_cudnn) should be True or False. "
+ "Received Attr(use_cudnn): {}.".format(use_cudnn))
+ if data_format not in ["NCHW", "NHWC"]:
+ raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'. "
+ "Received Attr(data_format): {}.".format(data_format))
+
+ channel_last = (data_format == "NHWC")
+ channel_dim = -1 if channel_last else 1
+ num_channels = input.shape[channel_dim]
+ num_filters = weight.shape[0]
+ if num_channels < 0:
+ raise ValueError("The channel dimmention of the input({}) "
+ "should be defined. Received: {}.".format(
+ input.shape, num_channels))
+ if num_channels % groups != 0:
+ raise ValueError(
+ "the channel of input must be divisible by groups,"
+ "received: the channel of input is {}, the shape of input is {}"
+ ", the groups is {}".format(num_channels, input.shape, groups))
+ if num_filters % groups != 0:
+ raise ValueError(
+ "the number of filters must be divisible by groups,"
+ "received: the number of filters is {}, the shape of weight is {}"
+ ", the groups is {}".format(num_filters, weight.shape, groups))
+
+ # update attrs
+ padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
+ stride = utils.convert_to_list(stride, 2, 'stride')
+ dilation = utils.convert_to_list(dilation, 2, 'dilation')
+
+ l_type = "conv2d"
+ if (num_channels == groups and num_filters % num_channels == 0 and
+ not use_cudnn):
+ l_type = 'depthwise_conv2d'
+
+ inputs = {'Input': [input], 'Filter': [weight]}
+ attrs = {
+ 'strides': stride,
+ 'paddings': padding,
+ 'dilations': dilation,
+ 'groups': groups,
+ 'use_cudnn': use_cudnn,
+ 'use_mkldnn': False,
+ 'fuse_relu_before_depthwise_conv': False,
+ "padding_algorithm": padding_algorithm,
+ "data_format": data_format
+ }
+
+ if in_dygraph_mode():
+ attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
+ 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
+ 'fuse_relu_before_depthwise_conv', False, "padding_algorithm",
+ padding_algorithm, "data_format", data_format)
+ pre_bias = getattr(core.ops, l_type)(input, weight, *attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = dygraph_utils._append_activation_in_dygraph(
+ pre_act, act, use_cudnn=use_cudnn)
+ else:
+ inputs = {'Input': [input], 'Filter': [weight]}
+ attrs = {
+ 'strides': stride,
+ 'paddings': padding,
+ 'dilations': dilation,
+ 'groups': groups,
+ 'use_cudnn': use_cudnn,
+ 'use_mkldnn': False,
+ 'fuse_relu_before_depthwise_conv': False,
+ "padding_algorithm": padding_algorithm,
+ "data_format": data_format
+ }
+ check_variable_and_dtype(input, 'input',
+ ['float16', 'float32', 'float64'], 'conv2d')
+ helper = LayerHelper(l_type, **locals())
+ dtype = helper.input_dtype()
+ pre_bias = helper.create_variable_for_type_inference(dtype)
+ outputs = {"Output": [pre_bias]}
+ helper.append_op(
+ type=l_type, inputs=inputs, outputs=outputs, attrs=attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = helper.append_activation(pre_act)
+ return out
+
+
+def conv2d_transpose(input,
+ weight,
+ bias=None,
+ output_size=None,
+ padding=0,
+ stride=1,
+ dilation=1,
+ groups=1,
+ use_cudnn=True,
+ act=None,
+ data_format='NCHW',
+ name=None):
+ """
+ The convolution2D transpose layer calculates the output based on the input,
+ filter, and dilations, strides, paddings. Input(Input) and output(Output)
+ are in NCHW or NHWC format. Where N is batch size, C is the number of channels,
+ H is the height of the feature, and W is the width of the feature.
+ Parameters(dilations, strides, paddings) are two elements. These two elements
+ represent height and width, respectively. The details of convolution transpose
+ layer, please refer to the following explanation and references
+ `therein `_.
+ If bias attribution and activation type are provided, bias is added to
+ the output of the convolution, and the corresponding activation function
+ is applied to the final result.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = \sigma (W \\ast X + b)
+
+ Where:
+
+ * :math:`X`: Input value, a 4-D Tensor with NCHW or NHWC format.
+ * :math:`W`: Filter value, a 4-D Tensor with MCHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, a 4-D Tensor with data format 'NCHW' or 'NHWC', the shape of :math:`Out` and :math:`X` may be different.
+
+ Example:
+
+ - Input:
+
+ Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
+
+ Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
+
+ - Output:
+
+ Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
+
+ Where
+
+ .. math::
+
+ H^\prime_{out} &= (H_{in} - 1) * strides[0] - pad_height_top - pad_height_bottom + dilations[0] * (H_f - 1) + 1 \\\\
+ W^\prime_{out} &= (W_{in} - 1) * strides[1] - pad_width_left - pad_width_right + dilations[1] * (W_f - 1) + 1 \\\\
+ H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ] \\\\
+ W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ]
+
+ Note:
+ The conv2d_transpose can be seen as the backward of the conv2d. For conv2d,
+ when stride > 1, conv2d maps multiple input shape to the same output shape,
+ so for conv2d_transpose, when stride > 1, input shape maps multiple output shape.
+ If output_size is None, :math:`H_{out} = H^\prime_{out}, W_{out} = W^\prime_{out}`;
+ else, the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}`
+ and :math:`H^\prime_{out} + strides[0]`, and the :math:`W_{out}` of the output size must
+ between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[1]`,
+ conv2d_transpose can compute the kernel size automatically.
+
+ Args:
+ input(Variable): 4-D Tensor with [N, C, H, W] or [N, H, W, C] format,
+ whose data type is float32 or float64.
+ weight(Variable): The convolution kernel, a Tensor with shape [C, M/g, kH, kW],
+ where M is the number of output channels(filters), g is the number of groups,
+ kH is the height of the kernel, and kW is the width of the kernel.
+ bias(Variable, optional): The bias, a Tensor with shape [M, ].
+ output_size(int|tuple|list, optional): The output image size. If output size is a
+ tuple, it must contain two integers, (image_height, image_width). None if use
+ filter_size, padding, and stride to calculate output_size.
+ If output_size is specified, output_size and filter_size (weight)'s shape
+ should follow the formula above. Default: None. output_size and filter_size
+ should not be None at the same time.
+ padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
+ `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
+ string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
+ If `padding` is a tuple or list, it could be in three forms:
+ `[pad_height, pad_width]` or
+ `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and
+ when `data_format` is `'NCHW'`,
+ `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+ when `data_format` is `'NHWC'`, `padding` can be in the form
+ `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+ Default: padding = 0.
+ stride(int|tuple, optional): The stride size. It means the stride in transposed convolution.
+ If stride is a tuple, it must contain two integers, (stride_height, stride_width).
+ Otherwise, stride_height = stride_width = stride. Default: stride = 1.
+ dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points.
+ If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
+ Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
+ groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by
+ grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
+ when group=2, the first half of the filters is only connected to the
+ first half of the input channels, while the second half of the
+ filters is only connected to the second half of the input channels.
+ Default: groups = 1.
+ use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True.
+ act (str, optional): Activation type, if it is set to None, activation is not appended.
+ Default: None.
+ data_format (str, optional): Specify the data format of the input, and the data format of the output
+ will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+ The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+ `[batch_size, input_channels, input_height, input_width]`.
+ name(str, optional): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
+
+ Returns:
+ A Variable holding Tensor representing the conv2d_transpose, whose
+ data type is the same with input and shape is (num_batches, channels, out_h,
+ out_w) or (num_batches, out_h, out_w, channels). If act is None, the tensor variable
+ storing the transposed convolution result, and if act is not None, the
+ tensor variable storing transposed convolution and non-linearity activation
+ result.
+
+ Raises:
+ ValueError: If the type of `use_cudnn` is not bool.
+ ValueError: If `data_format` is not "NCHW" or "NHWC".
+ ValueError: If `padding` is a string, but not "SAME" or "VALID".
+ ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
+ or the element corresponding to the input's channel is not 0.
+ ValueError: If `output_size` and filter_size are None at the same time.
+ ShapeError: If the input is not 4-D Tensor.
+ ShapeError: If the input's dimension size and filter's dimension size not equal.
+ ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+ ShapeError: If the number of input channels is not equal to filter's channels.
+ ShapeError: If the size of `output_size` is not equal to that of `stride`.
+
+ Examples:
+ .. code-block:: python
+
+ from paddle import fluid
+ import paddle.nn.functional as F
+ import paddle.fluid.dygraph as dg
+ import numpy as np
+
+ x = np.random.randn(2, 3, 8, 8).astype(np.float32)
+ w = np.random.randn(3, 6, 3, 3).astype(np.float32)
+
+ place = fluid.CPUPlace()
+ with dg.guard(place):
+ x_var = dg.to_variable(x)
+ w_var = dg.to_variable(w)
+ y_var = F.conv2d_transpose(x_var, w_var, act="relu")
+ y_np = y_var.numpy()
+ print(y_np.shape)
+
+ # (2, 6, 10, 10)
+ """
+
+ if not isinstance(use_cudnn, bool):
+ raise ValueError("Attr(use_cudnn) should be True or False. "
+ "Received Attr(use_cudnn): {}.".format(use_cudnn))
+ if data_format not in ['NCHW', 'NHWC']:
+ raise ValueError(
+ "Attr(data_format) of conv2d_transpose got wrong value: "
+ "received {}, but only 'NCHW' or 'NHWC' are supported.".format(
+ data_format))
+ channel_last = (data_format == "NHWC")
+ channel_dim = -1 if channel_last else 1
+ num_channels = input.shape[channel_dim]
+ if num_channels < 0:
+ raise ValueError("The channel dimmention of the input({}) "
+ "should be defined. Received: {}.".format(
+ input.shape, num_channels))
+ if num_channels % groups != 0:
+ raise ValueError(
+ "the channel of input must be divisible by groups,"
+ "received: the channel of input is {}, the shape of input is {}"
+ ", the groups is {}".format(num_channels, input.shape, groups))
+
+ # update attrs
+ padding, padding_algorithm = _update_padding_nd(padding, channel_last, 2)
+ stride = utils.convert_to_list(stride, 2, 'stride')
+ dilation = utils.convert_to_list(dilation, 2, 'dilation')
+ if output_size is None:
+ output_size = []
+ elif isinstance(output_size, (list, tuple, int)):
+ output_size = utils.convert_to_list(output_size, 2, 'output_size')
+ else:
+ raise ValueError("output_size should be int, or list, tuple of ints")
+
+ op_type = 'conv2d_transpose'
+ num_filters = weight.shape[1]
+ if (num_channels == groups and num_filters == 1 and not use_cudnn):
+ op_type = 'depthwise_conv2d_transpose'
+
+ if in_dygraph_mode():
+ attrs = ('output_size', output_size, 'strides', stride, 'paddings',
+ padding, 'padding_algorithm', padding_algorithm, 'dilations',
+ dilation, 'groups', groups, 'use_cudnn', use_cudnn,
+ 'data_format', data_format)
+ pre_bias = getattr(core.ops, op_type)(input, weight, *attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = dygraph_utils._append_activation_in_dygraph(
+ pre_act, act, use_cudnn=use_cudnn)
+ else:
+ inputs = {'Input': [input], 'Filter': [weight]}
+ attrs = {
+ 'output_size': output_size,
+ 'strides': stride,
+ 'paddings': padding,
+ 'padding_algorithm': padding_algorithm,
+ 'dilations': dilation,
+ 'groups': groups,
+ 'use_cudnn': use_cudnn,
+ 'data_format': data_format
+ }
+ check_variable_and_dtype(input, 'input',
+ ['float16', 'float32', 'float64'],
+ 'conv2d_transpose')
+ helper = LayerHelper(op_type, **locals())
+ dtype = helper.input_dtype()
+ pre_bias = helper.create_variable_for_type_inference(dtype)
+ outputs = {"Output": [pre_bias]}
+ helper.append_op(
+ type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = helper.append_activation(pre_act)
+ return out
+
+
+def conv3d(input,
+ weight,
+ bias=None,
+ padding=0,
+ stride=1,
+ dilation=1,
+ groups=1,
+ use_cudnn=True,
+ act=None,
+ data_format="NCDHW",
+ name=None):
+ """
+ The convolution3D layer calculates the output based on the input, filter
+ and strides, paddings, dilations, groups parameters. Input(Input) and
+ Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of
+ channels, D is the depth of the feature, H is the height of the feature,
+ and W is the width of the feature. Convlution3D is similar with Convlution2D
+ but adds one dimension(depth). If bias attribution and activation type are
+ provided, bias is added to the output of the convolution, and the
+ corresponding activation function is applied to the final result.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = \sigma (W \\ast X + b)
+
+ In the above equation:
+
+ * :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
+ * :math:`W`: Filter value, a tensor with MCDHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+ Example:
+
+ - Input:
+
+ Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
+
+ Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
+
+ - Output:
+ Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
+
+ Where
+
+ .. math::
+
+ D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
+ H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
+ W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
+
+ Args:
+ input (Variable): The input is 5-D Tensor with shape [N, C, D, H, W], the data
+ type of input is float16 or float32 or float64.
+ weight (Variable): The convolution kernel, a Tensor with shape [M, C/g, kD, kH, kW],
+ where M is the number of filters(output channels), g is the number of groups,
+ kD, kH, kW are the filter's depth, height and width respectively.
+ bias (Variable, optional): The bias, a Tensor of shape [M, ].
+ padding (string|int|list|tuple): The padding size. It means the number of zero-paddings
+ on both sides for each dimension. If `padding` is a string, either 'VALID' or
+ 'SAME' which is the padding algorithm. If padding size is a tuple or list,
+ it could be in three forms: `[pad_depth, pad_height, pad_width]` or
+ `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
+ and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
+ `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+ when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
+ `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+ Default: padding = 0.
+ stride (int|tuple): The stride size. It means the stride in convolution. If stride is a
+ tuple, it must contain three integers, (stride_depth, stride_height, stride_width).
+ Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1.
+ dilation (int|tuple): The dilation size. It means the spacing between the kernel points.
+ If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
+ dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
+ Default: dilation = 1.
+ groups (int): The groups number of the Conv3d Layer. According to grouped
+ convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+ the first half of the filters is only connected to the first half
+ of the input channels, while the second half of the filters is only
+ connected to the second half of the input channels. Default: groups=1
+ use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True
+ act (str): Activation type, if it is set to None, activation is not appended.
+ Default: None.
+ data_format (str, optional): Specify the data format of the input, and the data format of the output
+ will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+ The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+ `[batch_size, input_channels, input_height, input_width]`.
+ name(str|None): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
+
+ Returns:
+ A Variable holding Tensor representing the conv3d, whose data type is
+ the same with input. If act is None, the tensor variable storing the
+ convolution result, and if act is not None, the tensor variable storing
+ convolution and non-linearity activation result.
+
+ Raises:
+ ValueError: If the type of `use_cudnn` is not bool.
+ ValueError: If `data_format` is not "NCDHW" or "NDHWC".
+ ValueError: If the channel dimmention of the input is less than or equal to zero.
+ ValueError: If `padding` is a string, but not "SAME" or "VALID".
+ ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
+ or the element corresponding to the input's channel is not 0.
+ ShapeError: If the input is not 5-D Tensor.
+ ShapeError: If the input's dimension size and filter's dimension size not equal.
+ ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+ ShapeError: If the number of input channels is not equal to filter's channels * groups.
+ ShapeError: If the number of output channels is not be divided by groups.
+
+ Examples:
+ .. code-block:: python
+
+ from paddle import fluid
+ import paddle.nn.functional as F
+ import paddle.fluid.dygraph as dg
+ import numpy as np
+
+ x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
+ w = np.random.randn(6, 3, 3, 3, 3).astype(np.float32)
+
+ place = fluid.CPUPlace()
+ with dg.guard(place):
+ x_var = dg.to_variable(x)
+ w_var = dg.to_variable(w)
+ y_var = F.conv3d(x_var, w_var, act="relu")
+ y_np = y_var.numpy()
+ print(y_np.shape)
+
+ # (2, 6, 6, 6, 6)
+ """
+ # entry check
+ if not isinstance(use_cudnn, bool):
+ raise ValueError("Attr(use_cudnn) should be True or False. Received "
+ "Attr(use_cudnn): {}. ".format(use_cudnn))
+
+ if data_format not in ["NCDHW", "NDHWC"]:
+ raise ValueError(
+ "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
+ "Attr(data_format): {}.".format(data_format))
+
+ channel_last = (data_format == "NDHWC")
+ channel_dim = -1 if channel_last else 1
+ num_channels = input.shape[channel_dim]
+ num_filters = weight.shape[0]
+ if num_channels < 0:
+ raise ValueError(
+ "The channel dimmention of the input({}) should be defined. "
+ "Received: {}.".format(input.shape, num_channels))
+ if num_channels % groups != 0:
+ raise ValueError(
+ "The number of input channels must be divisible by Attr(groups). "
+ "Received: number of channels({}), groups({}).".format(num_channels,
+ groups))
+ if num_filters % groups != 0:
+ raise ValueError(
+ "The number of filters must be divisible by Attr(groups). "
+ "Received: number of filters({}), groups({}).".format(num_filters,
+ groups))
+
+ padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
+ stride = utils.convert_to_list(stride, 3, 'stride')
+ dilation = utils.convert_to_list(dilation, 3, 'dilation')
+ op_type = "conv3d"
+
+ if in_dygraph_mode():
+ attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
+ 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
+ "padding_algorithm", padding_algorithm, "data_format",
+ data_format)
+ pre_bias = getattr(core.ops, op_type)(input, weight, *attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = dygraph_utils._append_activation_in_dygraph(
+ pre_act, act, use_cudnn=use_cudnn)
+ else:
+ inputs = {'Input': [input], 'Filter': [weight]}
+ attrs = {
+ 'strides': stride,
+ 'paddings': padding,
+ 'dilations': dilation,
+ 'groups': groups,
+ 'use_cudnn': use_cudnn,
+ 'use_mkldnn': False,
+ "padding_algorithm": padding_algorithm,
+ "data_format": data_format
+ }
+ helper = LayerHelper(op_type, **locals())
+ dtype = helper.input_dtype()
+ check_variable_and_dtype(input, 'input',
+ ['float16', 'float32', 'float64'], 'conv3d')
+
+ pre_bias = helper.create_variable_for_type_inference(dtype)
+ outputs = {"Output": [pre_bias]}
+
+ helper.append_op(
+ type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = helper.append_activation(pre_act)
+
+ return out
+
+
+def conv3d_transpose(input,
+ weight,
+ bias=None,
+ output_size=None,
+ padding=0,
+ stride=1,
+ dilation=1,
+ groups=1,
+ use_cudnn=True,
+ act=None,
+ data_format='NCDHW',
+ name=None):
+ """
+ The convolution3D transpose layer calculates the output based on the input,
+ filter, and dilations, strides, paddings. Input(Input) and output(Output)
+ are in NCDHW or NDHWC format. Where N is batch size, C is the number of channels,
+ D is the depth of the feature, H is the height of the feature, and W
+ is the width of the feature. Parameters(dilations, strides, paddings) are
+ two elements. These two elements represent height and width, respectively.
+ The details of convolution transpose layer, please refer to the following
+ explanation and references `therein `_.
+ If bias attribution and activation type are provided, bias is added to
+ the output of the convolution, and the corresponding activation function
+ is applied to the final result.
+
+ For each input :math:`X`, the equation is:
+
+ .. math::
+
+ Out = \sigma (W \\ast X + b)
+
+ In the above equation:
+
+ * :math:`X`: Input value, a Tensor with NCDHW or NDHWC format.
+ * :math:`W`: Filter value, a Tensor with MCDHW format.
+ * :math:`\\ast`: Convolution operation.
+ * :math:`b`: Bias value, a 2-D Tensor with shape [M, 1].
+ * :math:`\\sigma`: Activation function.
+ * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+ Example:
+
+ - Input:
+
+ Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
+
+ Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)`
+
+ - Output:
+
+ Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
+
+ Where
+
+ .. math::
+
+ D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (D_f - 1) + 1 \\\\
+ H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (H_f - 1) + 1 \\\\
+ W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (W_f - 1) + 1 \\\\
+ D_{out} &\in [ D^\prime_{out}, D^\prime_{out} + strides[0] ] \\\\
+ H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[1] ] \\\\
+ W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[2] ]
+
+ Note:
+ The conv3d_transpose can be seen as the backward of the conv3d. For conv3d,
+ when stride > 1, conv3d maps multiple input shape to the same output shape,
+ so for conv3d_transpose, when stride > 1, input shape maps multiple output shape.
+ If output_size is None, :math:`H_{out} = H^\prime_{out}, :math:`H_{out} = \
+ H^\prime_{out}, W_{out} = W^\prime_{out}`; else, the :math:`D_{out}` of the output
+ size must between :math:`D^\prime_{out}` and :math:`D^\prime_{out} + strides[0]`,
+ the :math:`H_{out}` of the output size must between :math:`H^\prime_{out}`
+ and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must
+ between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`,
+ conv3d_transpose can compute the kernel size automatically.
+
+ Args:
+ input(Variable): The input is 5-D Tensor with shape [N, C, D, H, W] or [N, D, H, W, C], the data type
+ of input is float32 or float64.
+ weight (Variable): The convolution kernel, a Tensor with shape [C, M/g, kD, kH, kW],
+ where M is the number of filters(output channels), g is the number of groups,
+ kD, kH, kW are the filter's depth, height and width respectively.
+ bias (Variable, optional): The bias, a Tensor of shape [M, ].
+ output_size(int|tuple, optional): The output image size. If output size is a
+ tuple, it must contain three integers, (image_depth, image_height, image_width). This
+ parameter only works when filter_size is None. If output_size and filter_size are
+ specified at the same time, They should follow the formula above. Default: None.
+ Output_size and filter_size should not be None at the same time.
+ padding(int|list|str|tuple, optional): The padding size. The padding argument effectively
+ adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string,
+ either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding`
+ is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or
+ `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
+ and when `data_format` is `'NCDHW'`, `padding` can be in the form
+ `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+ when `data_format` is `'NDHWC'`, `padding` can be in the form
+ `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+ Default: padding = 0.
+ stride(int|tuple, optional): The stride size. It means the stride in transposed convolution.
+ If stride is a tuple, it must contain three integers, (stride_depth, stride_height,
+ stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
+ Default: stride = 1.
+ dilation(int|tuple, optional): The dilation size. It means the spacing between the kernel points.
+ If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
+ dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
+ Default: dilation = 1.
+ groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by
+ grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
+ when group=2, the first half of the filters is only connected to the
+ first half of the input channels, while the second half of the
+ filters is only connected to the second half of the input channels.
+ Default: groups=1
+ use_cudnn(bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
+ library is installed. Default: True
+ act (str, optional): Activation type, if it is set to None, activation is not appended.
+ Default: None.
+ data_format (str, optional): Specify the data format of the input, and the data format of the output
+ will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+ The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+ `[batch_size, input_channels, input_height, input_width]`.
+ name(str, optional): For detailed information, please refer
+ to :ref:`api_guide_Name`. Usually name is no need to set and
+ None by default.
+
+ Returns:
+ A Variable holding Tensor representing the conv3d_transpose, whose data
+ type is the same with input and shape is (num_batches, channels, out_d, out_h,
+ out_w) or (num_batches, out_d, out_h, out_w, channels). If act is None, the tensor
+ variable storing the transposed convolution result, and if act is not None, the tensor
+ variable storing transposed convolution and non-linearity activation result.
+
+ Raises:
+ ValueError: If the type of `use_cudnn` is not bool.
+ ValueError: If `data_format` is not "NCDHW" or "NDHWC".
+ ValueError: If `padding` is a string, but not "SAME" or "VALID".
+ ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
+ or the element corresponding to the input's channel is not 0.
+ ValueError: If `output_size` and filter_size are None at the same time.
+ ShapeError: If the input is not 5-D Tensor.
+ ShapeError: If the input's dimension size and filter's dimension size not equal.
+ ShapeError: If the dimension size of input minus the size of `stride` is not 2.
+ ShapeError: If the number of input channels is not equal to filter's channels.
+ ShapeError: If the size of `output_size` is not equal to that of `stride`.
+
+ Examples:
+ .. code-block:: python
+
+ from paddle import fluid
+ import paddle.nn.functional as F
+ import paddle.fluid.dygraph as dg
+ import numpy as np
+
+ x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
+ w = np.random.randn(3, 6, 3, 3, 3).astype(np.float32)
+
+ place = fluid.CPUPlace()
+ with dg.guard(place):
+ x_var = dg.to_variable(x)
+ w_var = dg.to_variable(w)
+ y_var = F.conv3d_transpose(x_var, w_var, act="relu")
+ y_np = y_var.numpy()
+ print(y_np.shape)
+
+ # (2, 6, 10, 10, 10)
+ """
+ # entry checks
+ if not isinstance(use_cudnn, bool):
+ raise ValueError("Attr(use_cudnn) should be True or False. "
+ "Received Attr(use_cudnn): {}.".format(use_cudnn))
+ if data_format not in ["NCDHW", "NDHWC"]:
+ raise ValueError(
+ "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
+ "Attr(data_format): {}.".format(data_format))
+
+ channel_last = (data_format == "NDHWC")
+ channel_dim = -1 if channel_last else 1
+ num_channels = input.shape[channel_dim]
+ num_filters = weight.shape[1]
+ if num_channels < 0:
+ raise ValueError(
+ "The channel dimmention of the input({}) should be defined. "
+ "Received: {}.".format(input.shape, num_channels))
+ if num_channels % groups != 0:
+ raise ValueError(
+ "The number of input channels must be divisible by Attr(groups). "
+ "Received: number of channels({}), groups({}).".format(num_channels,
+ groups))
+
+ padding, padding_algorithm = _update_padding_nd(padding, channel_last, 3)
+ stride = utils.convert_to_list(stride, 3, 'stride')
+ dilation = utils.convert_to_list(dilation, 3, 'dilation')
+ if output_size is None:
+ output_size = []
+ elif isinstance(output_size, (list, tuple, int)):
+ output_size = utils.convert_to_list(output_size, 3, 'output_size')
+ else:
+ raise ValueError("output_size should be int, or list, tuple of ints")
+
+ op_type = 'conv3d_transpose'
+ data_format_ = "NHWC" if channel_last else "NCHW"
+
+ if in_dygraph_mode():
+ attrs = ('output_size', output_size, 'paddings', padding,
+ "padding_algorithm", padding_algorithm, 'strides', stride,
+ 'dilations', dilation, 'groups', groups, 'use_cudnn',
+ use_cudnn, "data_format", data_format_)
+ pre_bias = getattr(core.ops, op_type)(input, weight, *attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = dygraph_utils._append_activation_in_dygraph(
+ pre_act, act, use_cudnn=use_cudnn)
+ else:
+ inputs = {'Input': [input], 'Filter': [weight]}
+ attrs = {
+ 'output_size': output_size,
+ 'paddings': padding,
+ "padding_algorithm": padding_algorithm,
+ 'strides': stride,
+ 'dilations': dilation,
+ 'groups': groups,
+ 'use_cudnn': use_cudnn,
+ "data_format": data_format_
+ }
+ helper = LayerHelper(op_type, **locals())
+ dtype = helper.input_dtype()
+ check_variable_and_dtype(input, 'input',
+ ['float16', 'float32', 'float64'], 'conv3d')
+
+ pre_bias = helper.create_variable_for_type_inference(dtype)
+ outputs = {"Output": [pre_bias]}
+
+ helper.append_op(
+ type=op_type, inputs=inputs, outputs=outputs, attrs=attrs)
+ if bias is not None:
+ pre_act = nn.elementwise_add(pre_bias, bias, axis=channel_dim)
+ else:
+ pre_act = pre_bias
+ out = helper.append_activation(pre_act)
+
+ return out
diff --git a/python/setup.py.in b/python/setup.py.in
index cdecd1189d..d70e93dc15 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -146,7 +146,10 @@ packages=['paddle',
'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler',
'paddle.fluid.incubate.fleet.parameter_server.pslib',
'paddle.fluid.incubate.fleet.collective',
- 'paddle.fluid.incubate.fleet.utils']
+ 'paddle.fluid.incubate.fleet.utils',
+ 'paddle.nn',
+ 'paddle.nn.functional',
+ 'paddle.nn.layer']
with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
setup_requires = f.read().splitlines()