Add truncated gaussian initializer. (#13000)

* Add truncated gaussian initializer.

* Fix unitest.

* Update API.spec

* Fix code style and fix bug.

* Fix code style.

* Small fix.
upload-readme
whs 7 years ago committed by GitHub
parent 642cf6ca2f
commit cf128231c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -32,6 +32,15 @@ Normal
:members:
:noindex:
.. _api_fluid_initializer_Normal:
TruncatedNormal
------
.. autoclass:: paddle.fluid.initializer.TruncatedNormal
:members:
:noindex:
.. _api_fluid_initializer_Xavier:
Xavier

@ -79,6 +79,7 @@ paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0))
paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
paddle.fluid.initializer.TruncatedNormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
paddle.fluid.initializer.XavierInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'fan_out', 'seed'], varargs=None, keywords=None, defaults=(True, None, None, 0))
paddle.fluid.initializer.BilinearInitializer.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.initializer.MSRAInitializer.__init__ ArgSpec(args=['self', 'uniform', 'fan_in', 'seed'], varargs=None, keywords=None, defaults=(True, None, 0))

File diff suppressed because it is too large Load Diff

@ -0,0 +1,76 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <thrust/random.h>
#include <thrust/transform.h>
#include <limits>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
namespace paddle {
namespace operators {
template <typename T>
struct TruncatedNormal {
T mean, std;
T a_normal_cdf;
T b_normal_cdf;
unsigned int seed;
T numeric_min;
__host__ __device__ TruncatedNormal(T mean, T std, T numeric_min, int seed)
: mean(mean), std(std), seed(seed), numeric_min(numeric_min) {
a_normal_cdf = (1.0 + erff(-2.0 / sqrtf(2.0))) / 2.0;
b_normal_cdf = (1.0 + erff(2.0 / sqrtf(2.0))) / 2.0;
}
__host__ __device__ T operator()(const unsigned int n) const {
thrust::minstd_rand rng;
rng.seed(seed);
thrust::uniform_real_distribution<T> dist(numeric_min, 1);
rng.discard(n);
T value = dist(rng);
auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value;
return (std::sqrt(2.0) * erfinvf(2 * p - 1) + mean) * std;
}
};
template <typename T>
class GPUTruncatedGaussianRandomKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
if (seed == 0) {
std::random_device rd;
seed = rd();
}
T mean = static_cast<T>(context.Attr<float>("mean"));
T std = static_cast<T>(context.Attr<float>("std"));
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
int64_t size = tensor->numel();
thrust::transform(
index_sequence_begin, index_sequence_begin + size,
thrust::device_ptr<T>(data),
TruncatedNormal<T>(mean, std, std::numeric_limits<T>::min(), seed));
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
truncated_gaussian_random,
paddle::operators::GPUTruncatedGaussianRandomKernel<float>);

@ -20,10 +20,10 @@ import contextlib
from .core import VarDesc
__all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'UniformInitializer', 'NormalInitializer', 'XavierInitializer',
'BilinearInitializer', 'MSRAInitializer'
'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
'MSRA', 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'UniformInitializer', 'NormalInitializer', 'TruncatedNormalInitializer',
'XavierInitializer', 'BilinearInitializer', 'MSRAInitializer'
]
_force_init_on_cpu_ = False
@ -33,6 +33,8 @@ def force_init_on_cpu():
"""
The flag of whether force to init variables on CPU.
Returns::
Examples:
.. code-block:: python
@ -272,6 +274,60 @@ class NormalInitializer(Initializer):
return op
class TruncatedNormalInitializer(Initializer):
"""Implements the Random TruncatedNormal(Gaussian) distribution initializer
Args:
loc (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0))
"""
def __init__(self, loc=0.0, scale=1.0, seed=0):
assert loc is not None
assert scale is not None
assert seed is not None
super(NormalInitializer, self).__init__()
self._mean = loc
self._std_dev = scale
self._seed = seed
def __call__(self, var, block):
"""Add truncated normal distribution initialization ops for a variable
Args:
var: Variable that needs to be initialized
block: The block in which initialization ops
should be added
Returns:
the initialization op
"""
assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended
if self._seed == 0:
self._seed = block.program.random_seed
op = block._prepend_op(
type="truncated_gaussian_random",
outputs={"Out": var},
attrs={
"shape": var.shape,
"dtype": int(var.dtype),
"mean": self._mean,
"std": self._std_dev,
"seed": self._seed
})
var.op = op
return op
class XavierInitializer(Initializer):
"""
This class implements the Xavier weight initializer from the paper
@ -583,6 +639,7 @@ class BilinearInitializer(Initializer):
Constant = ConstantInitializer
Uniform = UniformInitializer
Normal = NormalInitializer
TruncatedNormal = TruncatedNormalInitializer
Xavier = XavierInitializer
MSRA = MSRAInitializer
Bilinear = BilinearInitializer

@ -0,0 +1,69 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from paddle.fluid.executor import Executor
class TestTrunctedGaussianRandomOp(unittest.TestCase):
def setUp(self):
self.op_type = "truncated_gaussian_random"
self.inputs = {}
self.attrs = {
"shape": [10000],
"mean": .0,
"std": 1.,
"seed": 10,
}
self.outputs = ["Out"]
def test_cpu(self):
self.gaussian_random_test(place=fluid.CPUPlace())
def test_gpu(self):
if core.is_compiled_with_cuda():
self.gaussian_random_test(place=fluid.CUDAPlace(0))
def gaussian_random_test(self, place):
program = fluid.Program()
block = program.global_block()
vout = block.create_var(name="Out")
op = block.append_op(
type=self.op_type, outputs={"Out": vout}, attrs=self.attrs)
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
fetch_list = []
for var_name in self.outputs:
fetch_list.append(block.var(var_name))
exe = Executor(place)
outs = exe.run(program, fetch_list=fetch_list)
tensor = outs[0]
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.var(tensor), 0.773, delta=0.1)
if __name__ == "__main__":
unittest.main()
Loading…
Cancel
Save