add bnn_layers to nn.probability

5 years ago · 61dbb1b17c
parent fb2f888ec8
commit 61dbb1b17c
9 changed files with 685 additions and 5 deletions
--- a/mindspore/nn/probability/bnn_layers/init.py
+++ b/mindspore/nn/probability/bnn_layers/init.py
@ -0,0 +1,31 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """
 Bayesian Layer.
 The high-level components(Cells) used to construct the bayesian neural network.
 """
 from . import conv_variational, dense_variational, layer_distribution, bnn_cell_wrapper
 from .conv_variational import ConvReparam
 from .dense_variational import DenseReparam
 from .layer_distribution import NormalPrior, NormalPosterior
 from .bnn_cell_wrapper import WithBNNLossCell
 __all__ = []
 __all__.extend(conv_variational.__all__)
 __all__.extend(dense_variational.__all__)
 __all__.extend(layer_distribution.__all__)
 __all__.extend(bnn_cell_wrapper.__all__)
--- a/mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py
+++ b/mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py
@ -0,0 +1,92 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Generate WithLossCell suitable for BNN."""
 from .conv_variational import _ConvVariational
 from .dense_variational import _DenseVariational
 from ..transforms.bnn_loss.generate_kl_loss import gain_bnn_with_loss
 __all__ = ['WithBNNLossCell']
 class ClassWrap:
    """Decorator of WithBNNLossCell"""
    def __init__(self, cls):
        self._cls = cls
        self.bnn_loss_file = None
    def __call__(self, backbone, loss_fn, backbone_factor, kl_factor):
        obj = self._cls(backbone, loss_fn, backbone_factor, kl_factor)
        bnn_with_loss = obj()
        self.bnn_loss_file = obj.bnn_loss_file
        return bnn_with_loss
@ClassWrap
 class WithBNNLossCell:
    r"""
    Generate WithLossCell suitable for BNN.
    Args:
        backbone (Cell): The target network.
        loss_fn (Cell): The loss function used to compute loss.
        dnn_factor(int, float): The coefficient of backbone's loss, which is computed by loss functin. Default: 1.
        bnn_factor(int, float): The coefficient of kl loss, which is kl divergence of Bayesian layer. Default: 1.
    Inputs:
        - **data** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
        - **label** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
    Outputs:
        Tensor, a scalar tensor with shape :math:`()`.
    Examples:
        >>> net = Net()
        >>> loss_fn = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
        >>> net_with_criterion_object = WithBNNLossCell(net, loss_fn)
        >>> net_with_criterion = net_with_criterion_object()
        >>>
        >>> batch_size = 2
        >>> data = Tensor(np.ones([batch_size, 3, 64, 64]).astype(np.float32) * 0.01)
        >>> label = Tensor(np.ones([batch_size, 1, 1, 1]).astype(np.int32))
        >>>
        >>> net_with_criterion(data, label)
    """
    def __init__(self, backbone, loss_fn, dnn_factor=1, bnn_factor=1):
        self.backbone = backbone
        self.loss_fn = loss_fn
        self.dnn_factor = dnn_factor
        self.bnn_factor = bnn_factor
        self.bnn_loss_file = None
    def _generate_loss_cell(self):
        """Generate WithBNNLossCell by ast."""
        layer_count = self._kl_loss_count(self.backbone)
        bnn_with_loss, self.bnn_loss_file = gain_bnn_with_loss(layer_count, self.backbone, self.loss_fn,
                                                               self.dnn_factor, self.bnn_factor)
        return bnn_with_loss
    def _kl_loss_count(self, net):
        """ Calculate the number of Bayesian layers."""
        count = 0
        for (_, layer) in net.name_cells().items():
            if isinstance(layer, (_DenseVariational, _ConvVariational)):
                count += 1
            else:
                count += self._kl_loss_count(layer)
        return count
    def __call__(self):
        return self._generate_loss_cell()
--- a/mindspore/nn/probability/bnn_layers/conv_variational.py
+++ b/mindspore/nn/probability/bnn_layers/conv_variational.py
--- a/mindspore/nn/probability/bnn_layers/dense_variational.py
+++ b/mindspore/nn/probability/bnn_layers/dense_variational.py
@ -0,0 +1,188 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """dense_variational"""
 from mindspore.ops import operations as P
 from mindspore._checkparam import check_int_positive, check_bool
 from ...cell import Cell
 from ...layer.activation import get_activation
 from .layer_distribution import NormalPrior, NormalPosterior
 __all__ = ['DenseReparam']
 class _DenseVariational(Cell):
    """
    Base class for all dense variational layers.
    """
    def __init__(
            self,
            in_channels,
            out_channels,
            activation=None,
            has_bias=True,
            weight_prior_fn=NormalPrior,
            weight_posterior_fn=lambda name, shape: NormalPosterior(name=name, shape=shape),
            bias_prior_fn=NormalPrior,
            bias_posterior_fn=lambda name, shape: NormalPosterior(name=name, shape=shape)):
        super(_DenseVariational, self).__init__()
        self.in_channels = check_int_positive(in_channels)
        self.out_channels = check_int_positive(out_channels)
        self.has_bias = check_bool(has_bias)
        if isinstance(weight_prior_fn, Cell):
            self.weight_prior = weight_prior_fn
        else:
            self.weight_prior = weight_prior_fn()
        self.weight_posterior = weight_posterior_fn(shape=[self.out_channels, self.in_channels], name='bnn_weight')
        if self.has_bias:
            if isinstance(bias_prior_fn, Cell):
                self.bias_prior = bias_prior_fn
            else:
                self.bias_prior = bias_prior_fn()
            self.bias_posterior = bias_posterior_fn(shape=[self.out_channels], name='bnn_bias')
        self.activation = activation
        if isinstance(self.activation, str):
            self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()
        self.sum = P.ReduceSum()
    def construct(self, x):
        outputs = self._apply_variational_weight(x)
        if self.has_bias:
            outputs = self._apply_variational_bias(outputs)
        if self.activation_flag:
            outputs = self.activation(outputs)
        return outputs
    def extend_repr(self):
        str_info = 'in_channels={}, out_channels={}, weight_mean={}, weight_std={}, has_bias={}' \
            .format(self.in_channels, self.out_channels, self.weight_posterior.mean,
                    self.weight_posterior.untransformed_std, self.has_bias)
        if self.has_bias:
            str_info = str_info + ', bias_mean={}, bias_std={}' \
                .format(self.bias_posterior.mean, self.bias_posterior.untransformed_std)
        if self.activation_flag:
            str_info = str_info + ', activation={}'.format(self.activation)
        return str_info
    def _apply_variational_bias(self, inputs):
        bias_posterior_tensor = self.bias_posterior("sample")
        return self.bias_add(inputs, bias_posterior_tensor)
    def compute_kl_loss(self):
        """Compute kl loss."""
        weight_post_mean = self.weight_posterior("mean")
        weight_post_sd = self.weight_posterior("sd")
        kl = self.weight_prior("kl_loss", "Normal", weight_post_mean, weight_post_sd)
        kl_loss = self.sum(kl)
        if self.has_bias:
            bias_post_mean = self.bias_posterior("mean")
            bias_post_sd = self.bias_posterior("sd")
            kl = self.bias_prior("kl_loss", "Normal", bias_post_mean, bias_post_sd)
            kl = self.sum(kl)
            kl_loss += kl
        return kl_loss
 class DenseReparam(_DenseVariational):
    r"""
    Dense variational layers with Reparameterization.
    See more details in paper `Auto-Encoding Variational Bayes
    <https://arxiv.org/abs/1312.6114>`
    Applies dense-connected layer for the input. This layer implements the operation as:
    .. math::
        \text{outputs} = \text{activation}(\text{inputs} * \text{kernel} + \text{bias}),
    where :math:`\text{activation}` is the activation function passed as the activation
    argument (if passed in), :math:`\text{activation}` is a weight matrix with the same
    data type as the inputs created by the layer, :math:`\text{weight}` is a weight
    matrix sampling from posterior distribution of weight, and :math:`\text{bias}` is a
    bias vector with the same data type as the inputs created by the layer (only if
    has_bias is True). The bias vector is sampling from posterior distribution of
    :math:`\text{bias}`.
    Args:
        in_channels (int): The number of input channel.
        out_channels (int): The number of output channel .
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: False.
        activation (str): Regularizer function applied to the output of the layer, eg. 'relu'. Default: None.
        weight_prior_fn: prior distribution for weight.
            It should return a mindspore distribution instance.
            Default: NormalPrior. (which creates an instance of standard
            normal distribution).
        weight_posterior_fn: posterior distribution for sampling weight.
            It should be a function handle which returns a mindspore
            distribution instance.
            Default: NormalPosterior.
        bias_prior_fn: prior distribution for bias vector. It should return
            a mindspore distribution.
            Default: NormalPrior(which creates an instance of standard
            normal distribution).
        bias_posterior_fn: posterior distribution for sampling bias vector.
            It should be a function handle which returns a mindspore
            distribution instance.
            Default: NormalPosterior.
    Inputs:
        - **input** (Tensor) - Tensor of shape :math:`(N, in\_channels)`.
    Outputs:
        Tensor of shape :math:`(N, out\_channels)`.
    Examples:
        >>> net = DenseReparam(3, 4)
        >>> input = Tensor(np.random.randint(0, 255, [2, 3]), mindspore.float32)
        >>> net(input)
    """
    def __init__(
            self,
            in_channels,
            out_channels,
            activation=None,
            has_bias=True,
            weight_prior_fn=NormalPrior,
            weight_posterior_fn=lambda name, shape: NormalPosterior(name=name, shape=shape),
            bias_prior_fn=NormalPrior,
            bias_posterior_fn=lambda name, shape: NormalPosterior(name=name, shape=shape)):
        super(DenseReparam, self).__init__(
            in_channels,
            out_channels,
            activation=activation,
            has_bias=has_bias,
            weight_prior_fn=weight_prior_fn,
            weight_posterior_fn=weight_posterior_fn,
            bias_prior_fn=bias_prior_fn,
            bias_posterior_fn=bias_posterior_fn
        )
    def _apply_variational_weight(self, inputs):
        weight_posterior_tensor = self.weight_posterior("sample")
        outputs = self.matmul(inputs, weight_posterior_tensor)
        return outputs
--- a/mindspore/nn/probability/bnn_layers/layer_distribution.py
+++ b/mindspore/nn/probability/bnn_layers/layer_distribution.py
@ -0,0 +1,96 @@
 # Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
 """Initialize normal distributions"""
 import numpy as np
 import mindspore.common.dtype as mstype
 from mindspore.common.tensor import Tensor
 from mindspore.common.parameter import Parameter
 from mindspore.ops import operations as P
 from ...cell import Cell
 from ..distribution.normal import Normal
 __all__ = ['NormalPrior', 'NormalPosterior']
 class NormalPrior(Cell):
    r"""
    To initialize a normal distribution of mean 0 and standard deviation 0.1.
    Args:
        dtype (class `mindspore.dtype`): The argument is used to define the data type of the output tensor.
            Default: mindspore.float32.
        mean (int, float): Mean of normal distribution.
        std (int, float): Standard deviation of normal distribution.
    Returns:
        Cell, a normal distribution.
    """
    def __init__(self, dtype=mstype.float32, mean=0, std=0.1):
        super(NormalPrior, self).__init__()
        self.normal = Normal(mean, std, dtype=dtype)
    def construct(self, *inputs):
        return self.normal(*inputs)
 class NormalPosterior(Cell):
    r"""
    Build Normal distributions with trainable parameters.
    Args:
        name (str): Name prepended to trainable parameter.
        shape (list): Shape of the mean and standard deviation.
        dtype (class `mindspore.dtype`): The argument is used to define the data type of the output tensor.
            Default: mindspore.float32.
        loc_mean ( float, array_like of floats): Mean of distribution to initialize trainable parameters. Default: 0.
        loc_std ( float, array_like of floats): Standard deviation of distribution to initialize trainable parameters.
            Default: 0.1.
        untransformed_scale_mean ( float, array_like of floats): Mean of distribution to initialize trainable
            parameters. Default: -5.
        untransformed_scale_std ( float, array_like of floats): Standard deviation of distribution to initialize
            trainable parameters. Default: 0.1.
    Returns:
        Cell, a normal distribution.
    """
    def __init__(self,
                 name,
                 shape,
                 dtype=mstype.float32,
                 loc_mean=0,
                 loc_std=0.1,
                 untransformed_scale_mean=-5,
                 untransformed_scale_std=0.1):
        super(NormalPosterior, self).__init__()
        if not isinstance(name, str):
            raise ValueError('The type of `name` should be `str`')
        self.mean = Parameter(
            Tensor(np.random.normal(loc_mean, loc_std, shape), dtype=dtype), name=name + '_mean')
        self.untransformed_std = Parameter(
            Tensor(np.random.normal(untransformed_scale_mean, untransformed_scale_std, shape), dtype=dtype),
            name=name + '_untransformed_std')
        self.normal = Normal()
    def std_trans(self, std_pre):
        """Transform std_pre to prevent its value being zero."""
        std = 1e-6 + P.Log()(P.Exp()(std_pre) + 1)
        return std
    def construct(self, *inputs):
        std = self.std_trans(self.untransformed_std)
        return self.normal(*inputs, mean=self.mean, sd=std)
--- a/mindspore/nn/probability/distribution/_utils/utils.py
+++ b/mindspore/nn/probability/distribution/_utils/utils.py
@ -21,6 +21,7 @@ from mindspore.common import dtype as mstype
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
 import mindspore.nn as nn
 import mindspore.nn.probability as msp
 def cast_to_tensor(t, hint_dtype=mstype.float32):
    """
@ -84,7 +85,7 @@ def check_scalar_from_param(params):
    Notes: String parameters are excluded.
    """
    for value in params.values():
-        if isinstance(value, (nn.probability.bijector.Bijector, nn.probability.distribution.Distribution)):
+        if isinstance(value, (msp.bijector.Bijector, msp.distribution.Distribution)):
            return params['distribution'].is_scalar_batch
        if isinstance(value, Parameter):
            return False
@ -109,7 +110,7 @@ def calc_broadcast_shape_from_param(params):
    """
    broadcast_shape = []
    for value in params.values():
-        if isinstance(value, (nn.probability.bijector.Bijector, nn.probability.distribution.Distribution)):
+        if isinstance(value, (msp.bijector.Bijector, msp.distribution.Distribution)):
            return params['distribution'].broadcast_shape
        if isinstance(value, (str, type(params['dtype']))):
            continue
--- a/mindspore/nn/probability/transforms/bnn_loss/generate_kl_loss.py
+++ b/mindspore/nn/probability/transforms/bnn_loss/generate_kl_loss.py
@ -36,7 +36,7 @@ class _CodeTransformer(ast.NodeTransformer):
    def visit_FunctionDef(self, node):
        """visit function and add kl_loss computation."""
        self.generic_visit(node)
-        if node.name == 'compute_kl_loss':
+        if node.name == 'cal_kl_loss':
            for i in range(self.layer_count):
                func = ast.Assign(targets=[ast.Name(id='loss', ctx=ast.Store())],
                                  value=ast.BinOp(left=ast.Name(id='loss', ctx=ast.Load()), op=ast.Add(),
@ -71,7 +71,7 @@ def gain_bnn_with_loss(layer_count, backbone, loss_fn, dnn_factor, bnn_factor):
        layer_count (int): The number of kl loss to be generated, namely the number of Bayesian layers.
        backbone (Cell): The target network to wrap.
        loss_fn (Cell): The loss function used to compute loss.
-        dnn_factor ((int, float): The coefficient of backbone's loss, which is computed by loss function.
+        dnn_factor (int, float): The coefficient of backbone's loss, which is computed by loss function.
        bnn_factor (int, float): The coefficient of kl loss, which is kl divergence of Bayesian layer.
    """
    bnn_loss_func = _generate_kl_loss_func(layer_count)
--- a/requirements.txt
+++ b/requirements.txt
@ -14,3 +14,4 @@ opencv-python >= 4.1.2.30   # for ut test
 sklearn >= 0.0              # for st test
 pandas >= 1.0.2             # for ut test
 bs4
 astunparse
--- a/setup.py
+++ b/setup.py
@ -92,7 +92,8 @@ required_package = [
    'easydict >= 1.9',
    'sympy >= 1.4',
    'cffi >= 1.13.2',
-    'decorator >= 4.4.0'
+    'decorator >= 4.4.0',
    'astunparse >= 1.6.3'
 ]
 package_data = {