|
|
|
@ -1305,8 +1305,6 @@ def conv2d(input,
|
|
|
|
|
conv2d = fluid.layers.conv2d(
|
|
|
|
|
input=data, num_filters=2, filter_size=3, act="relu")
|
|
|
|
|
"""
|
|
|
|
|
if stride is None:
|
|
|
|
|
stride = [1, 1]
|
|
|
|
|
|
|
|
|
|
num_channels = input.shape[1]
|
|
|
|
|
|
|
|
|
@ -1369,6 +1367,172 @@ def conv2d(input,
|
|
|
|
|
return helper.append_activation(pre_act)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def conv3d(input,
|
|
|
|
|
num_filters,
|
|
|
|
|
filter_size,
|
|
|
|
|
stride=1,
|
|
|
|
|
padding=0,
|
|
|
|
|
dilation=1,
|
|
|
|
|
groups=None,
|
|
|
|
|
param_attr=None,
|
|
|
|
|
bias_attr=None,
|
|
|
|
|
use_cudnn=True,
|
|
|
|
|
use_mkldnn=False,
|
|
|
|
|
act=None,
|
|
|
|
|
name=None):
|
|
|
|
|
"""
|
|
|
|
|
**Convlution3D Layer**
|
|
|
|
|
|
|
|
|
|
The convolution3D layer calculates the output based on the input, filter
|
|
|
|
|
and strides, paddings, dilations, groups parameters. Input(Input) and
|
|
|
|
|
Output(Output) are in NCHW format. Where N is batch size, C is the number of
|
|
|
|
|
channels, H is the height of the feature, and W is the width of the feature.
|
|
|
|
|
The details of convolution layer, please refer UFLDL's `convolution,
|
|
|
|
|
<http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_ .
|
|
|
|
|
If bias attribution and activation type are provided, bias is added to the
|
|
|
|
|
output of the convolution, and the corresponding activation function is
|
|
|
|
|
applied to the final result.
|
|
|
|
|
|
|
|
|
|
For each input :math:`X`, the equation is:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
|
|
|
|
|
Out = \sigma (W \\ast X + b)
|
|
|
|
|
|
|
|
|
|
In the above equation:
|
|
|
|
|
|
|
|
|
|
* :math:`X`: Input value, a tensor with NCHW format.
|
|
|
|
|
* :math:`W`: Filter value, a tensor with MCHW format.
|
|
|
|
|
* :math:`\\ast`: Convolution operation.
|
|
|
|
|
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
|
|
|
|
|
* :math:`\\sigma`: Activation function.
|
|
|
|
|
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be
|
|
|
|
|
different.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
|
|
- Input:
|
|
|
|
|
|
|
|
|
|
Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
|
|
|
|
|
|
|
|
|
|
Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)`
|
|
|
|
|
|
|
|
|
|
- Output:
|
|
|
|
|
Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
|
|
|
|
|
|
|
|
|
|
Where
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
|
|
|
|
|
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
|
|
|
|
|
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
|
|
|
|
|
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
input (Variable): The input image with [N, C, D, H, W] format.
|
|
|
|
|
num_filters(int): The number of filter. It is as same as the output
|
|
|
|
|
image channel.
|
|
|
|
|
filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
|
|
|
|
|
it must contain two integers, (filter_size_D, filter_size_H, filter_size_W).
|
|
|
|
|
Otherwise, the filter will be a square.
|
|
|
|
|
stride (int|tuple): The stride size. If stride is a tuple, it must
|
|
|
|
|
contain two integers, (stride_D, stride_H, stride_W). Otherwise, the
|
|
|
|
|
stride_D = stride_H = stride_W = stride. Default: stride = 1.
|
|
|
|
|
padding (int|tuple): The padding size. If padding is a tuple, it must
|
|
|
|
|
contain two integers, (padding_D, padding_H, padding_W). Otherwise, the
|
|
|
|
|
padding_D = padding_H = padding_W = padding. Default: padding = 0.
|
|
|
|
|
dilation (int|tuple): The dilation size. If dilation is a tuple, it must
|
|
|
|
|
contain two integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
|
|
|
|
|
dilation_D = dilation_H = dilation_W = dilation. Default: dilation = 1.
|
|
|
|
|
groups (int): The groups number of the Conv3d Layer. According to grouped
|
|
|
|
|
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
|
|
|
|
|
the first half of the filters is only connected to the first half
|
|
|
|
|
of the input channels, while the second half of the filters is only
|
|
|
|
|
connected to the second half of the input channels. Default: groups=1
|
|
|
|
|
param_attr (ParamAttr): The parameters to the Conv3d Layer. Default: None
|
|
|
|
|
bias_attr (ParamAttr): Bias parameter for the Conv3d layer. Default: None
|
|
|
|
|
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
|
|
|
|
|
library is installed. Default: True
|
|
|
|
|
use_mkldnn (bool): Use mkldnn kernels or not.
|
|
|
|
|
act (str): Activation type. Default: None
|
|
|
|
|
name (str|None): A name for this layer(optional). If set None, the layer
|
|
|
|
|
will be named automatically.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Variable: The tensor variable storing the convolution and \
|
|
|
|
|
non-linearity activation result.
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
ValueError: If the shapes of input, filter_size, stride, padding and
|
|
|
|
|
groups mismatch.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
data = fluid.layers.data(
|
|
|
|
|
name='data', shape=[3, 12, 32, 32], dtype='float32')
|
|
|
|
|
conv2d = fluid.layers.conv3d(
|
|
|
|
|
input=data, num_filters=2, filter_size=3, act="relu")
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
l_type = 'conv3d'
|
|
|
|
|
|
|
|
|
|
helper = LayerHelper(l_type, **locals())
|
|
|
|
|
dtype = helper.input_dtype()
|
|
|
|
|
|
|
|
|
|
num_channels = input.shape[1]
|
|
|
|
|
|
|
|
|
|
if groups is None:
|
|
|
|
|
num_filter_channels = num_channels
|
|
|
|
|
else:
|
|
|
|
|
if num_channels % groups != 0:
|
|
|
|
|
raise ValueError("num_channels must be divisible by groups.")
|
|
|
|
|
num_filter_channels = num_channels / groups
|
|
|
|
|
|
|
|
|
|
filter_size = utils.convert_to_list(filter_size, 3, 'filter_size')
|
|
|
|
|
stride = utils.convert_to_list(stride, 3, 'stride')
|
|
|
|
|
padding = utils.convert_to_list(padding, 3, 'padding')
|
|
|
|
|
dilation = utils.convert_to_list(dilation, 3, 'dilation')
|
|
|
|
|
|
|
|
|
|
if not isinstance(use_cudnn, bool):
|
|
|
|
|
raise ValueError("use_cudnn should be True or False")
|
|
|
|
|
|
|
|
|
|
input_shape = input.shape
|
|
|
|
|
filter_shape = [num_filters, num_filter_channels] + filter_size
|
|
|
|
|
|
|
|
|
|
def _get_default_param_initializer():
|
|
|
|
|
std = (2.0 / (filter_size[0]**3 * num_channels))**0.5
|
|
|
|
|
return Normal(0.0, std, 0)
|
|
|
|
|
|
|
|
|
|
filter_param = helper.create_parameter(
|
|
|
|
|
attr=helper.param_attr,
|
|
|
|
|
shape=filter_shape,
|
|
|
|
|
dtype=dtype,
|
|
|
|
|
default_initializer=_get_default_param_initializer())
|
|
|
|
|
|
|
|
|
|
pre_bias = helper.create_tmp_variable(dtype)
|
|
|
|
|
|
|
|
|
|
helper.append_op(
|
|
|
|
|
type=l_type,
|
|
|
|
|
inputs={
|
|
|
|
|
'Input': input,
|
|
|
|
|
'Filter': filter_param,
|
|
|
|
|
},
|
|
|
|
|
outputs={"Output": pre_bias},
|
|
|
|
|
attrs={
|
|
|
|
|
'strides': stride,
|
|
|
|
|
'paddings': padding,
|
|
|
|
|
'dilations': dilation,
|
|
|
|
|
'groups': groups,
|
|
|
|
|
'use_cudnn': use_cudnn,
|
|
|
|
|
'use_mkldnn': use_mkldnn
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=3)
|
|
|
|
|
|
|
|
|
|
return helper.append_activation(pre_act)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sequence_pool(input, pool_type):
|
|
|
|
|
"""
|
|
|
|
|
This function add the operator for sequence pooling.
|
|
|
|
|