add doc for BilinearInitializer MSRAInitializer

revert-11610-move_hooks
qiaolongfei 7 years ago
parent 86092a9704
commit 323a048348

@ -19,10 +19,10 @@ from framework import convert_np_dtype_to_dtype_
from core import VarDesc from core import VarDesc
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu', 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'NormalInitializer', 'XavierInitializer', 'BilinearInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierInitializer',
'MSRAInitializer' 'BilinearInitializer', 'MSRAInitializer'
] ]
_force_init_on_cpu_ = False _force_init_on_cpu_ = False
@ -353,30 +353,42 @@ class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer """Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper This class implements the weight initialization from the paper
Delving Deep into Rectifiers: Surpassing Human-Level Performance on `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
and Jian Sun. This is a robust initialization method that particularly by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
considers the rectifier nonlinearities. In case of Uniform distribution, robust initialization method that particularly considers the rectifier
the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal nonlinearities. In case of Uniform distribution, the range is [-x, x], where
distribution, the mean is 0 and the standard deviation
is sqrt(2/ fan_in).
References:
[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance
on ImageNet Classification
(https://arxiv.org/abs/1502.01852)
"""
def __init__(self, uniform=True, fan_in=None, seed=0): .. math::
"""Constructor for MSRAInitializer
x = \sqrt{\\frac{6.0}{fan\_in}}
In case of Normal distribution, the mean is 0 and the standard deviation
is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
Args: Args:
uniform: whether to use uniform or normal distribution uniform (bool): whether to use uniform or normal distribution
fan_in: fan_in for MSRAInitializer. If None, it is fan_in (float): fan_in for MSRAInitializer. If None, it is\
inferred from the variable. inferred from the variable.
seed: random seed seed (int): random seed
Note:
It is recommended to set fan_in to None for most cases.
Note: It is recommended to set fan_in to None for most cases. Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.MSRA(uniform=False))
"""
def __init__(self, uniform=True, fan_in=None, seed=0):
"""Constructor for MSRAInitializer
""" """
assert uniform is not None assert uniform is not None
assert seed is not None assert seed is not None
@ -436,34 +448,37 @@ class MSRAInitializer(Initializer):
class BilinearInitializer(Initializer): class BilinearInitializer(Initializer):
"""Implements the bilinear initializer. """
This initializer can be used in transposed convolution operator to This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is: (B, C, H, W) by any integer factor. The usage is:
>>> factor = 2 Examples:
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear()) .. code-block:: python
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input, factor = 2
>>> num_filters=C, w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> output_size=None, initializer=Bilinear())
>>> filter_size=2 * factor - factor % 2, conv_up = fluid.layers.conv2d_transpose(
>>> padding=ceil((factor - 1) / 2.), input,
>>> stride=factor, num_filters=C,
>>> groups=C, output_size=None,
>>> param_attr=w_attr, filter_size=2 * factor - factor % 2,
>>> bias_attr=False) padding=ceil((factor - 1) / 2.),
stride=factor,
groups=C,
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed param_attr=w_attr,
bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise transposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`, convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training. interpolation unchanged during training.
""" """
def __init__(self): def __init__(self):
@ -480,7 +495,7 @@ class BilinearInitializer(Initializer):
be added. be added.
Returns: Returns:
the initialization op Operator: the initialization op
Raises: Raises:
ValueError: If type of `var` and `block` is not right. ValueError: If type of `var` and `block` is not right.

Loading…
Cancel
Save