add paddle.nn.initializer API, including: Normal, TruncatedNormal, Uniform, XavierNormal, XavierUniform, Assign (#27769)
parent
a4f850748a
commit
8e70b18e6c
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,100 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from ...fluid import framework
|
||||||
|
from ...fluid import core
|
||||||
|
from ...fluid import unique_name
|
||||||
|
from ...fluid.core import VarDesc
|
||||||
|
from ...fluid.data_feeder import check_type
|
||||||
|
from ...fluid.initializer import NumpyArrayInitializer
|
||||||
|
|
||||||
|
__all__ = ['Assign']
|
||||||
|
|
||||||
|
|
||||||
|
class Assign(NumpyArrayInitializer):
|
||||||
|
"""Init an parameter with a numpy array, list, or tensor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
value (Tensor|numpy.ndarray|list): numpy array, list, or tensor to initialize the parameter.
|
||||||
|
name(str, optional): The default value is None. Normally there is no need for user to set this
|
||||||
|
property. For more information, please refer to :ref:`api_guide_Name`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A parameter initialized by the input numpy array, list, or tensor.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# numpy array
|
||||||
|
data_1 = paddle.ones(shape=[1, 2], dtype='float32')
|
||||||
|
weight_attr_1 = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight_1",
|
||||||
|
initializer=paddle.nn.initializer.Assign(np.array([2, 2])))
|
||||||
|
bias_attr_1 = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias_1",
|
||||||
|
initializer=paddle.nn.initializer.Assign(np.array([2])))
|
||||||
|
linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1)
|
||||||
|
# linear_1.weight: [2. 2.]
|
||||||
|
# linear_1.bias: [2.]
|
||||||
|
|
||||||
|
res_1 = linear(data_1)
|
||||||
|
# res_1: [6.]
|
||||||
|
|
||||||
|
# python list
|
||||||
|
data_2 = paddle.ones(shape=[1, 2], dtype='float32')
|
||||||
|
weight_attr_2 = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight_2",
|
||||||
|
initializer=paddle.nn.initializer.Assign([2, 2]))
|
||||||
|
bias_attr_2 = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias_2",
|
||||||
|
initializer=paddle.nn.initializer.Assign([2]))
|
||||||
|
linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2)
|
||||||
|
# linear_2.weight: [2. 2.]
|
||||||
|
# linear_2.bias: [2.]
|
||||||
|
|
||||||
|
res_2 = linear(data_2)
|
||||||
|
# res_2: [6.]
|
||||||
|
|
||||||
|
# tensor
|
||||||
|
data_3 = paddle.ones(shape=[1, 2], dtype='float32')
|
||||||
|
weight_attr_3 = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight_3",
|
||||||
|
initializer=paddle.nn.initializer.Assign(paddle.full([2], 2)))
|
||||||
|
bias_attr_3 = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias_3",
|
||||||
|
initializer=paddle.nn.initializer.Assign(paddle.full([1], 2)))
|
||||||
|
linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3)
|
||||||
|
# linear_3.weight: [2. 2.]
|
||||||
|
# linear_3.bias: [2.]
|
||||||
|
|
||||||
|
res_3 = linear(data_3)
|
||||||
|
# res_3: [6.]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, value, name=None):
|
||||||
|
import numpy
|
||||||
|
check_type(value, 'value', (numpy.ndarray, list, framework.Variable),
|
||||||
|
'Assign')
|
||||||
|
|
||||||
|
if (isinstance(value, list)):
|
||||||
|
value = numpy.array(value)
|
||||||
|
|
||||||
|
# TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized.
|
||||||
|
if (isinstance(value, framework.Variable)):
|
||||||
|
value = value.numpy()
|
||||||
|
|
||||||
|
super(Assign, self).__init__(value)
|
@ -0,0 +1,100 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from ...fluid.initializer import NormalInitializer
|
||||||
|
from ...fluid.initializer import TruncatedNormalInitializer
|
||||||
|
|
||||||
|
__all__ = ['Normal', 'TruncatedNormal']
|
||||||
|
|
||||||
|
|
||||||
|
class Normal(NormalInitializer):
|
||||||
|
"""The Random Normal (Gaussian) distribution initializer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mean (float, optional): mean of the normal distribution. The default value is 0.0.
|
||||||
|
std (float, optional): standard deviation of the normal distribution. The default value is 1.0.
|
||||||
|
name(str, optional): The default value is None. Normally there is no need for user to set this
|
||||||
|
property. For more information, please refer to :ref:`api_guide_Name`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A parameter initialized by Random Normal (Gaussian) distribution.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
|
||||||
|
weight_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight",
|
||||||
|
initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
|
||||||
|
bias_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias",
|
||||||
|
initializer=paddle.nn.initializer.Normal(mean=0.0, std=2.0))
|
||||||
|
linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
|
||||||
|
# linear.weight: [[ 2.1973135 -2.2697184]
|
||||||
|
# [-1.9104223 -1.0541488]]
|
||||||
|
# linear.bias: [ 0.7885926 -0.74719954]
|
||||||
|
|
||||||
|
res = linear(data)
|
||||||
|
# res: [[[ 1.0754838 -4.071067 ]]
|
||||||
|
# [[ 1.0754838 -4.071067 ]]
|
||||||
|
# [[ 1.0754838 -4.071067 ]]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, mean=0.0, std=1.0, name=None):
|
||||||
|
assert mean is not None, 'mean should not be None'
|
||||||
|
assert std is not None, 'std should not be None'
|
||||||
|
super(Normal, self).__init__(loc=mean, scale=std, seed=0)
|
||||||
|
|
||||||
|
|
||||||
|
class TruncatedNormal(TruncatedNormalInitializer):
|
||||||
|
"""The Random TruncatedNormal (Gaussian) distribution initializer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mean (float, optional): mean of the normal distribution. The default value is 0.0.
|
||||||
|
std (float, optional): standard deviation of the normal distribution. The default value is 1.0.
|
||||||
|
name(str, optional): The default value is None. Normally there is no need for user to set this
|
||||||
|
property. For more information, please refer to :ref:`api_guide_Name`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A parameter initialized by Random TruncatedNormal (Gaussian) distribution.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
|
||||||
|
weight_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight",
|
||||||
|
initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0))
|
||||||
|
bias_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias",
|
||||||
|
initializer=paddle.nn.initializer.TruncatedNormal(mean=0.0, std=2.0))
|
||||||
|
linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
|
||||||
|
# linear.weight: [[-1.0981836 1.4140984]
|
||||||
|
# [ 3.1390522 -2.8266568]]
|
||||||
|
# linear.bias: [-2.1546738 -1.6570673]
|
||||||
|
|
||||||
|
res = linear(data)
|
||||||
|
# res: [[[-0.11380529 -3.0696259 ]]
|
||||||
|
# [[-0.11380529 -3.0696259 ]]
|
||||||
|
# [[-0.11380529 -3.0696259 ]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, mean=0.0, std=1.0, name=None):
|
||||||
|
assert mean is not None, 'mean should not be None'
|
||||||
|
assert std is not None, 'std should not be None'
|
||||||
|
super(TruncatedNormal, self).__init__(loc=mean, scale=std, seed=0)
|
@ -0,0 +1,60 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from ...fluid.initializer import UniformInitializer
|
||||||
|
|
||||||
|
__all__ = ['Uniform']
|
||||||
|
|
||||||
|
|
||||||
|
class Uniform(UniformInitializer):
|
||||||
|
"""The random uniform distribution initializer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
low (float, optional): lower boundary of the uniform distribution. The default value is -1.0.
|
||||||
|
high (float, optional): upper boundary of the uniform distribution. The default value is 1.0.
|
||||||
|
name(str, optional): The default value is None. Normally there is no need for user to set this
|
||||||
|
property. For more information, please refer to :ref:`api_guide_Name`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A parameter initialized by random uniform distribution.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
|
||||||
|
weight_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight",
|
||||||
|
initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
|
||||||
|
bias_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias",
|
||||||
|
initializer=paddle.nn.initializer.Uniform(low=-0.5, high=0.5))
|
||||||
|
linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
|
||||||
|
# linear.weight: [[-0.46245047 0.05260676]
|
||||||
|
# [ 0.38054508 0.29169726]]
|
||||||
|
# linear.bias: [-0.2734719 0.23939109]
|
||||||
|
|
||||||
|
res = linear(data)
|
||||||
|
# res: [[[-0.3553773 0.5836951]]
|
||||||
|
# [[-0.3553773 0.5836951]]
|
||||||
|
# [[-0.3553773 0.5836951]]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, low=-1.0, high=1.0, name=None):
|
||||||
|
assert low is not None, 'low should not be None'
|
||||||
|
assert high is not None, 'high should not be None'
|
||||||
|
assert high >= low, 'high should greater or equal than low'
|
||||||
|
super(Uniform, self).__init__(
|
||||||
|
low=low, high=high, seed=0, diag_num=0, diag_step=0, diag_val=1.0)
|
@ -0,0 +1,124 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from ...fluid.initializer import XavierInitializer
|
||||||
|
|
||||||
|
__all__ = ['XavierNormal', 'XavierUniform']
|
||||||
|
|
||||||
|
|
||||||
|
class XavierNormal(XavierInitializer):
|
||||||
|
"""
|
||||||
|
This class implements the Xavier weight initializer from the paper
|
||||||
|
`Understanding the difficulty of training deep feedforward neural
|
||||||
|
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
|
||||||
|
by Xavier Glorot and Yoshua Bengio, using a normal distribution.
|
||||||
|
|
||||||
|
The mean is 0 and the standard deviation is
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
\sqrt{\\frac{2.0}{fan\_in + fan\_out}}
|
||||||
|
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fan_in (float, optional): fan_in for Xavier initialization, It is
|
||||||
|
inferred from the tensor. The default value is None.
|
||||||
|
fan_out (float, optional): fan_out for Xavier initialization, it is
|
||||||
|
inferred from the tensor. The default value is None.
|
||||||
|
name(str, optional): The default value is None. Normally there is no need for user to set this
|
||||||
|
property. For more information, please refer to :ref:`api_guide_Name`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A parameter initialized by Xavier weight, using a normal distribution.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
|
||||||
|
weight_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight",
|
||||||
|
initializer=paddle.nn.initializer.XavierNormal())
|
||||||
|
bias_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias",
|
||||||
|
initializer=paddle.nn.initializer.XavierNormal())
|
||||||
|
linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
|
||||||
|
# inear.weight: [[ 0.06910077 -0.18103665]
|
||||||
|
# [-0.02546741 -1.0402188 ]]
|
||||||
|
# linear.bias: [-0.5012929 0.12418364]
|
||||||
|
|
||||||
|
res = linear(data)
|
||||||
|
# res: [[[-0.4576595 -1.0970719]]
|
||||||
|
# [[-0.4576595 -1.0970719]]
|
||||||
|
# [[-0.4576595 -1.0970719]]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, fan_in=None, fan_out=None, name=None):
|
||||||
|
super(XavierNormal, self).__init__(
|
||||||
|
uniform=False, fan_in=fan_in, fan_out=fan_out, seed=0)
|
||||||
|
|
||||||
|
|
||||||
|
class XavierUniform(XavierInitializer):
|
||||||
|
"""
|
||||||
|
This class implements the Xavier weight initializer from the paper
|
||||||
|
`Understanding the difficulty of training deep feedforward neural
|
||||||
|
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
|
||||||
|
by Xavier Glorot and Yoshua Bengio.
|
||||||
|
|
||||||
|
This initializer is designed to keep the scale of the gradients
|
||||||
|
approximately same in all the layers. In case of Uniform distribution,
|
||||||
|
the range is [-x, x], where
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
|
||||||
|
x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fan_in (float, optional): fan_in for Xavier initialization, it is
|
||||||
|
inferred from the tensor. The default value is None.
|
||||||
|
fan_out (float, optional): fan_out for Xavier initialization, it is
|
||||||
|
inferred from the tensor. The default value is None.
|
||||||
|
name(str, optional): The default value is None. Normally there is no need for user to set this
|
||||||
|
property. For more information, please refer to :ref:`api_guide_Name`.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A parameter initialized by Xavier weight, using a uniform distribution.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
data = paddle.ones(shape=[3, 1, 2], dtype='float32')
|
||||||
|
weight_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_weight",
|
||||||
|
initializer=paddle.nn.initializer.XavierUniform())
|
||||||
|
bias_attr = paddle.framework.ParamAttr(
|
||||||
|
name="linear_bias",
|
||||||
|
initializer=paddle.nn.initializer.XavierUniform())
|
||||||
|
linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
|
||||||
|
# linear.weight: [[-0.04229349 -1.1248565 ]
|
||||||
|
# [-0.10789523 -0.5938053 ]]
|
||||||
|
# linear.bias: [ 1.1983747 -0.40201235]
|
||||||
|
|
||||||
|
res = linear(data)
|
||||||
|
# res: [[[ 1.0481861 -2.1206741]]
|
||||||
|
# [[ 1.0481861 -2.1206741]]
|
||||||
|
# [[ 1.0481861 -2.1206741]]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, fan_in=None, fan_out=None, name=None):
|
||||||
|
super(XavierUniform, self).__init__(
|
||||||
|
uniform=True, fan_in=fan_in, fan_out=fan_out, seed=0)
|
Loading…
Reference in new issue