|
|
|
@ -49,6 +49,7 @@ from ...fluid.framework import Variable
|
|
|
|
|
|
|
|
|
|
__all__ = [
|
|
|
|
|
'binary_cross_entropy',
|
|
|
|
|
'binary_cross_entropy_with_logits',
|
|
|
|
|
'bpr_loss',
|
|
|
|
|
'center_loss',
|
|
|
|
|
'cross_entropy',
|
|
|
|
@ -214,6 +215,154 @@ def binary_cross_entropy(input, label, weight=None, reduction='mean',
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def binary_cross_entropy_with_logits(logit,
|
|
|
|
|
label,
|
|
|
|
|
weight=None,
|
|
|
|
|
reduction='mean',
|
|
|
|
|
pos_weight=None,
|
|
|
|
|
name=None):
|
|
|
|
|
"""
|
|
|
|
|
This operator combines the sigmoid layer and the :ref:`api_nn_loss_BCELoss` layer.
|
|
|
|
|
Also, we can see it as the combine of ``sigmoid_cross_entropy_with_logits``
|
|
|
|
|
layer and some reduce operations.
|
|
|
|
|
|
|
|
|
|
This measures the element-wise probability error in classification tasks
|
|
|
|
|
in which each class is independent.
|
|
|
|
|
This can be thought of as predicting labels for a data-point, where labels
|
|
|
|
|
are not mutually exclusive. For example, a news article can be about
|
|
|
|
|
politics, technology or sports at the same time or none of these.
|
|
|
|
|
|
|
|
|
|
First this operator calculate loss function as follows:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
Out = -Labels * \\log(\\sigma(Logit)) - (1 - Labels) * \\log(1 - \\sigma(Logit))
|
|
|
|
|
|
|
|
|
|
We know that :math:`\\sigma(Logit) = \\frac{1}{1 + \\e^{-Logit}}`. By substituting this we get:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
Out = Logit - Logit * Labels + \\log(1 + \\e^{-Logit})
|
|
|
|
|
|
|
|
|
|
For stability and to prevent overflow of :math:`\\e^{-Logit}` when Logit < 0,
|
|
|
|
|
we reformulate the loss as follows:
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
Out = \\max(Logit, 0) - Logit * Labels + \\log(1 + \\e^{-\|Logit\|})
|
|
|
|
|
|
|
|
|
|
Then, if ``weight`` or ``pos_weight`` is not None, this operator multiply the
|
|
|
|
|
weight tensor on the loss `Out`. The ``weight`` tensor will attach different
|
|
|
|
|
weight on every items in the batch. The ``pos_weight`` will attach different
|
|
|
|
|
weight on the positive label of each class.
|
|
|
|
|
|
|
|
|
|
Finally, this operator applies reduce operation on the loss.
|
|
|
|
|
If :attr:`reduction` set to ``'none'``, the operator will return the original loss `Out`.
|
|
|
|
|
If :attr:`reduction` set to ``'mean'``, the reduced mean loss is :math:`Out = MEAN(Out)`.
|
|
|
|
|
If :attr:`reduction` set to ``'sum'``, the reduced sum loss is :math:`Out = SUM(Out)`.
|
|
|
|
|
|
|
|
|
|
Note that the target labels ``label`` should be numbers between 0 and 1.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
logit (Tensor): The input predications tensor. 2-D tensor with shape: [N, *],
|
|
|
|
|
N is batch_size, `*` means number of additional dimensions. The ``logit``
|
|
|
|
|
is usually the output of Linear layer. Available dtype is float32, float64.
|
|
|
|
|
label (Tensor): The target labels tensor. 2-D tensor with the same shape as
|
|
|
|
|
``logit``. The target labels which values should be numbers between 0 and 1.
|
|
|
|
|
Available dtype is float32, float64.
|
|
|
|
|
weight (Tensor, optional): A manual rescaling weight given to the loss of each
|
|
|
|
|
batch element. If given, it has to be a 1D Tensor whose size is `[N, ]`,
|
|
|
|
|
The data type is float32, float64. Default is ``'None'``.
|
|
|
|
|
reduction (str, optional): Indicate how to average the loss by batch_size,
|
|
|
|
|
the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
|
|
|
|
|
If :attr:`reduction` is ``'none'``, the unreduced loss is returned;
|
|
|
|
|
If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
|
|
|
|
|
If :attr:`reduction` is ``'sum'``, the summed loss is returned.
|
|
|
|
|
Default is ``'mean'``.
|
|
|
|
|
pos_weight (Tensor, optional): A weight of positive examples. Must be a vector
|
|
|
|
|
with length equal to the number of classes. The data type is float32, float64.
|
|
|
|
|
Default is ``'None'``.
|
|
|
|
|
name (str, optional): Name for the operation (optional, default is None).
|
|
|
|
|
For more information, please refer to :ref:`api_guide_Name`.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
output (Tensor): If ``reduction`` is ``'none'``, the shape of output is
|
|
|
|
|
same as ``logit`` , else the shape of output is scalar.
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
import paddle
|
|
|
|
|
paddle.disable_static()
|
|
|
|
|
logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32")
|
|
|
|
|
label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32")
|
|
|
|
|
output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label)
|
|
|
|
|
print(output.numpy()) # [0.45618808]
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
if reduction not in ['sum', 'mean', 'none']:
|
|
|
|
|
raise ValueError(
|
|
|
|
|
"The value of 'reduction' in binary_cross_entropy_with_logits "
|
|
|
|
|
"should be 'sum', 'mean' or 'none', but received %s, which is not allowed."
|
|
|
|
|
% reduction)
|
|
|
|
|
|
|
|
|
|
if in_dygraph_mode():
|
|
|
|
|
one = _varbase_creator(dtype=logit.dtype)
|
|
|
|
|
core.ops.fill_constant(one, 'value',
|
|
|
|
|
float(1.0), 'force_cpu', False, 'dtype',
|
|
|
|
|
one.dtype, 'str_value', '1.0', 'shape', [1])
|
|
|
|
|
out = core.ops.sigmoid_cross_entropy_with_logits(logit, label)
|
|
|
|
|
if pos_weight is not None:
|
|
|
|
|
log_weight = core.ops.elementwise_add(
|
|
|
|
|
core.ops.elementwise_mul(
|
|
|
|
|
label, core.ops.elementwise_sub(pos_weight, one)), one)
|
|
|
|
|
out = core.ops.elementwise_mul(out, log_weight)
|
|
|
|
|
if weight is not None:
|
|
|
|
|
out = core.ops.elementwise_mul(out, weight)
|
|
|
|
|
|
|
|
|
|
if reduction == "sum":
|
|
|
|
|
return core.ops.reduce_sum(out, 'reduce_all', True)
|
|
|
|
|
elif reduction == "mean":
|
|
|
|
|
return core.ops.mean(out)
|
|
|
|
|
else:
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
fluid.data_feeder.check_variable_and_dtype(
|
|
|
|
|
logit, 'logit', ['float32', 'float64'],
|
|
|
|
|
'binary_cross_entropy_with_logits')
|
|
|
|
|
fluid.data_feeder.check_variable_and_dtype(
|
|
|
|
|
label, 'label', ['float32', 'float64'],
|
|
|
|
|
'binary_cross_entropy_with_logits')
|
|
|
|
|
sigmoid_name = None
|
|
|
|
|
if reduction == 'none' and pos_weight is None and weight is None:
|
|
|
|
|
sigmoid_name = name
|
|
|
|
|
|
|
|
|
|
out = paddle.nn.functional.sigmoid_cross_entropy_with_logits(
|
|
|
|
|
logit, label, name=sigmoid_name)
|
|
|
|
|
|
|
|
|
|
one = paddle.fill_constant(shape=[1], value=1.0, dtype=logit.dtype)
|
|
|
|
|
if pos_weight is not None:
|
|
|
|
|
fluid.data_feeder.check_variable_and_dtype(
|
|
|
|
|
pos_weight, 'pos_weight', ['float32', 'float64'],
|
|
|
|
|
'binary_cross_entropy_with_logits')
|
|
|
|
|
log_weight = paddle.add(
|
|
|
|
|
paddle.multiply(label, paddle.elementwise_sub(pos_weight, one)),
|
|
|
|
|
one)
|
|
|
|
|
pos_weight_name = name if reduction == 'none' and weight is None else None
|
|
|
|
|
out = paddle.multiply(out, log_weight, name=pos_weight_name)
|
|
|
|
|
|
|
|
|
|
if weight is not None:
|
|
|
|
|
fluid.data_feeder.check_variable_and_dtype(
|
|
|
|
|
weight, 'weight', ['float32', 'float64'],
|
|
|
|
|
'binary_cross_entropy_with_logits')
|
|
|
|
|
weight_name = name if reduction == 'none' else None
|
|
|
|
|
out = paddle.multiply(out, weight, name=weight_name)
|
|
|
|
|
|
|
|
|
|
if reduction == "sum":
|
|
|
|
|
return paddle.sum(out, name=name)
|
|
|
|
|
elif reduction == "mean":
|
|
|
|
|
return paddle.mean(out, name=name)
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def smooth_l1_loss(input, label, reduction='mean', delta=1.0, name=None):
|
|
|
|
|
"""
|
|
|
|
|
This operator calculates smooth_l1_loss. Creates a criterion that uses a squared
|
|
|
|
|