add paddle.nn.initializer API, including: Normal, TruncatedNormal, Uniform, XavierNormal, XavierUniform, Assign (#27769)

4 years ago · 8e70b18e6c
parent a4f850748a
commit 8e70b18e6c
7 changed files with 892 additions and 12 deletions
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py
@ -41,7 +41,8 @@ class EmbeddingLayer(object):
            is_sparse=True,
            padding_idx=self.padding_idx,
            param_attr=paddle.ParamAttr(
-                name=self.name, initializer=paddle.nn.initializer.Xavier()))
+                name=self.name,
                initializer=paddle.nn.initializer.XavierUniform()))
        return emb
--- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
--- a/python/paddle/nn/initializer/init.py
+++ b/python/paddle/nn/initializer/init.py
@ -14,10 +14,6 @@
 # TODO: define the initializers to create a Parameter in neural network
 from ...fluid.initializer import Bilinear  #DEFINE_ALIAS
 from ...fluid.initializer import Normal  #DEFINE_ALIAS
 from ...fluid.initializer import TruncatedNormal  #DEFINE_ALIAS
 from ...fluid.initializer import Uniform  #DEFINE_ALIAS
 from ...fluid.initializer import Xavier  #DEFINE_ALIAS
 from . import constant
 from .constant import Constant  #DEFINE_ALIAS
@ -26,13 +22,26 @@ from . import kaiming
 from .kaiming import KaimingNormal  #DEFINE_ALIAS
 from .kaiming import KaimingUniform  #DEFINE_ALIAS
-__all__ = [
+__all__ = ['Bilinear', ]
    'Bilinear',
    'Normal',
    'TruncatedNormal',
    'Uniform',
    'Xavier',
 ]
 __all__ += constant.__all__
 __all__ += kaiming.__all__
 from . import xavier
 from .xavier import XavierNormal  #DEFINE_ALIAS
 from .xavier import XavierUniform  #DEFINE_ALIAS
 from . import assign
 from .assign import Assign  #DEFINE_ALIAS
 from . import normal
 from .normal import Normal  #DEFINE_ALIAS
 from .normal import TruncatedNormal  #DEFINE_ALIAS
 from . import uniform
 from .uniform import Uniform  #DEFINE_ALIAS
 __all__ += xavier.__all__
 __all__ += assign.__all__
 __all__ += normal.__all__
 __all__ += uniform.__all__
--- a/python/paddle/nn/initializer/assign.py
+++ b/python/paddle/nn/initializer/assign.py
@ -0,0 +1,100 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from ...fluid import framework
 from ...fluid import core
 from ...fluid import unique_name
 from ...fluid.core import VarDesc
 from ...fluid.data_feeder import check_type
 from ...fluid.initializer import NumpyArrayInitializer
 __all__ = ['Assign']
 class Assign(NumpyArrayInitializer):
    """Init an parameter with a numpy array, list, or tensor.
    Args:
        value (Tensor|numpy.ndarray|list): numpy array, list, or tensor to initialize the parameter.
        name(str, optional): The default value is None. Normally there is no need for user to set this
            property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
        A parameter initialized by the input numpy array, list, or tensor.
    Examples:
        .. code-block:: python
            import paddle
            import numpy as np
            # numpy array
            data_1 = paddle.ones(shape=[1, 2], dtype='float32')
            weight_attr_1 = paddle.framework.ParamAttr(
                name="linear_weight_1", 
                initializer=paddle.nn.initializer.Assign(np.array([2, 2])))
            bias_attr_1 = paddle.framework.ParamAttr(
                name="linear_bias_1",
                initializer=paddle.nn.initializer.Assign(np.array([2])))
            linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1)
            # linear_1.weight:  [2. 2.]
            # linear_1.bias:  [2.]
            res_1 = linear(data_1)
            # res_1:  [6.]
            # python list
            data_2 = paddle.ones(shape=[1, 2], dtype='float32')
            weight_attr_2 = paddle.framework.ParamAttr(
                name="linear_weight_2",
                initializer=paddle.nn.initializer.Assign([2, 2]))
            bias_attr_2 = paddle.framework.ParamAttr(
                name="linear_bias_2",
                initializer=paddle.nn.initializer.Assign([2]))
            linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2)
            # linear_2.weight:  [2. 2.]
            # linear_2.bias:  [2.]
            res_2 = linear(data_2)
            # res_2:  [6.]
            # tensor
            data_3 = paddle.ones(shape=[1, 2], dtype='float32')
            weight_attr_3 = paddle.framework.ParamAttr(
                name="linear_weight_3",
                initializer=paddle.nn.initializer.Assign(paddle.full([2], 2)))
            bias_attr_3 = paddle.framework.ParamAttr(
                name="linear_bias_3",
                initializer=paddle.nn.initializer.Assign(paddle.full([1], 2)))
            linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3)
            # linear_3.weight:  [2. 2.]
            # linear_3.bias:  [2.]
            res_3 = linear(data_3)
            # res_3:  [6.]
    """
    def __init__(self, value, name=None):
        import numpy
        check_type(value, 'value', (numpy.ndarray, list, framework.Variable),
                   'Assign')
        if (isinstance(value, list)):
            value = numpy.array(value)
        # TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized.
        if (isinstance(value, framework.Variable)):
            value = value.numpy()
        super(Assign, self).__init__(value)
--- a/python/paddle/nn/initializer/normal.py
+++ b/python/paddle/nn/initializer/normal.py
@ -0,0 +1,100 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from ...fluid.initializer import NormalInitializer
 from ...fluid.initializer import TruncatedNormalInitializer
 __all__ = ['Normal', 'TruncatedNormal']
 class Normal(NormalInitializer):
    """The Random Normal (Gaussian) distribution initializer.
    Args:
        mean (float, optional): mean of the normal distribution. The default value is 0.0.
        std (float, optional): standard deviation of the normal distribution. The default value is 1.0.
        name(str, optional): The default value is None. Normally there is no need for user to set this
            property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
        A parameter initialized by Random Normal (Gaussian) distribution.
    Examples:
        .. code-block:: python
            import paddle
            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            weight_attr = paddle.framework.ParamAttr(
                name="linear_weight",
                initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
            bias_attr = paddle.framework.ParamAttr(
                name="linear_bias",
                initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            # linear.weight:  [[ 2.1973135 -2.2697184]
            #                  [-1.9104223 -1.0541488]]
            # linear.bias:  [ 0.7885926  -0.74719954]
            res = linear(data)
            # res:  [[[ 1.0754838 -4.071067 ]]
            #        [[ 1.0754838 -4.071067 ]]
            #        [[ 1.0754838 -4.071067 ]]]
    """
    def __init__(self, mean=0.0, std=1.0, name=None):
        assert mean is not None, 'mean should not be None'
        assert std is not None, 'std should not be None'
        super(Normal, self).__init__(loc=mean, scale=std, seed=0)
 class TruncatedNormal(TruncatedNormalInitializer):
    """The Random TruncatedNormal (Gaussian) distribution initializer.
    Args:
        mean (float, optional): mean of the normal distribution. The default value is 0.0.
        std (float, optional): standard deviation of the normal distribution. The default value is 1.0.
        name(str, optional): The default value is None. Normally there is no need for user to set this
            property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
        A parameter initialized by Random TruncatedNormal (Gaussian) distribution.
    Examples:
        .. code-block:: python
            import paddle
            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            weight_attr = paddle.framework.ParamAttr(
                name="linear_weight",
                initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0))
            bias_attr = paddle.framework.ParamAttr(
                name="linear_bias",
                initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0))
            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            # linear.weight:  [[-1.0981836  1.4140984]
            #                  [ 3.1390522 -2.8266568]]
            # linear.bias:  [-2.1546738 -1.6570673]
            res = linear(data)
            # res:  [[[-0.11380529 -3.0696259 ]]
            #        [[-0.11380529 -3.0696259 ]]
            #        [[-0.11380529 -3.0696259 ]]
    """
    def __init__(self, mean=0.0, std=1.0, name=None):
        assert mean is not None, 'mean should not be None'
        assert std is not None, 'std should not be None'
        super(TruncatedNormal, self).__init__(loc=mean, scale=std, seed=0)
--- a/python/paddle/nn/initializer/uniform.py
+++ b/python/paddle/nn/initializer/uniform.py
@ -0,0 +1,60 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from ...fluid.initializer import UniformInitializer
 __all__ = ['Uniform']
 class Uniform(UniformInitializer):
    """The random uniform distribution initializer.
    Args:
        low (float, optional): lower boundary of the uniform distribution. The default value is -1.0.
        high (float, optional): upper boundary of the uniform distribution. The default value is 1.0.
        name(str, optional): The default value is None. Normally there is no need for user to set this
            property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
        A parameter initialized by random uniform distribution.
    Examples:
        .. code-block:: python
            import paddle
            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            weight_attr = paddle.framework.ParamAttr(
                name="linear_weight",
                initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
            bias_attr = paddle.framework.ParamAttr(
                name="linear_bias",
                initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            # linear.weight:  [[-0.46245047  0.05260676]
            #                  [ 0.38054508  0.29169726]]
            # linear.bias:  [-0.2734719   0.23939109]
            res = linear(data)
            # res:  [[[-0.3553773  0.5836951]]
            #        [[-0.3553773  0.5836951]]
            #        [[-0.3553773  0.5836951]]]
    """
    def __init__(self, low=-1.0, high=1.0, name=None):
        assert low is not None, 'low should not be None'
        assert high is not None, 'high should not be None'
        assert high >= low, 'high should greater or equal than low'
        super(Uniform, self).__init__(
            low=low, high=high, seed=0, diag_num=0, diag_step=0, diag_val=1.0)
--- a/python/paddle/nn/initializer/xavier.py
+++ b/python/paddle/nn/initializer/xavier.py
@ -0,0 +1,124 @@
 #   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from ...fluid.initializer import XavierInitializer
 __all__ = ['XavierNormal', 'XavierUniform']
 class XavierNormal(XavierInitializer):
    """
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio, using a normal distribution.
    The mean is 0 and the standard deviation is
    .. math::
        \sqrt{\\frac{2.0}{fan\_in + fan\_out}}
    Args:
        fan_in (float, optional): fan_in for Xavier initialization, It is
                inferred from the tensor. The default value is None.
        fan_out (float, optional): fan_out for Xavier initialization, it is
                 inferred from the tensor. The default value is None.
        name(str, optional): The default value is None. Normally there is no need for user to set this
            property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
        A parameter initialized by Xavier weight, using a normal distribution.
    Examples:
        .. code-block:: python
            import paddle
            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            weight_attr = paddle.framework.ParamAttr(
                name="linear_weight",
                initializer=paddle.nn.initializer.XavierNormal())
            bias_attr = paddle.framework.ParamAttr(
                name="linear_bias",
                initializer=paddle.nn.initializer.XavierNormal())
            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            # inear.weight:  [[ 0.06910077 -0.18103665]
            #                 [-0.02546741 -1.0402188 ]]
            # linear.bias:  [-0.5012929   0.12418364]
            res = linear(data)
            # res:  [[[-0.4576595 -1.0970719]]
            #        [[-0.4576595 -1.0970719]]
            #        [[-0.4576595 -1.0970719]]]
    """
    def __init__(self, fan_in=None, fan_out=None, name=None):
        super(XavierNormal, self).__init__(
            uniform=False, fan_in=fan_in, fan_out=fan_out, seed=0)
 class XavierUniform(XavierInitializer):
    """
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.
    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is [-x, x], where
    .. math::
        x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
    Args:
        fan_in (float, optional): fan_in for Xavier initialization, it is
                inferred from the tensor. The default value is None.
        fan_out (float, optional): fan_out for Xavier initialization, it is
                 inferred from the tensor. The default value is None.
        name(str, optional): The default value is None. Normally there is no need for user to set this
            property. For more information, please refer to :ref:`api_guide_Name`.
    Returns:
        A parameter initialized by Xavier weight, using a uniform distribution.
    Examples:
        .. code-block:: python
            import paddle
            data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            weight_attr = paddle.framework.ParamAttr(
                name="linear_weight",
                initializer=paddle.nn.initializer.XavierUniform())
            bias_attr = paddle.framework.ParamAttr(
                name="linear_bias",
                initializer=paddle.nn.initializer.XavierUniform())
            linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            # linear.weight:  [[-0.04229349 -1.1248565 ]
            #                  [-0.10789523 -0.5938053 ]]
            # linear.bias:  [ 1.1983747  -0.40201235]
            res = linear(data)
            # res:  [[[ 1.0481861 -2.1206741]]
            #        [[ 1.0481861 -2.1206741]]
            #        [[ 1.0481861 -2.1206741]]]
    """
    def __init__(self, fan_in=None, fan_out=None, name=None):
        super(XavierUniform, self).__init__(
            uniform=True, fan_in=fan_in, fan_out=fan_out, seed=0)