add soft_label and axis for CrossEntropyLoss and improve performance (#29024)

* add soft_label and axis for CrossEntropyLoss and improve performance,test=develop * fix conflict in nn/functional/loss.py, test=develop
5 years ago · b52427327d
parent 018e169923
commit b52427327d
4 changed files with 299 additions and 648 deletions
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
--- a/python/paddle/nn/functional/init.py
+++ b/python/paddle/nn/functional/init.py
@ -128,6 +128,8 @@ from .loss import binary_cross_entropy  #DEFINE_ALIAS
 from .loss import binary_cross_entropy_with_logits  #DEFINE_ALIAS
 # from .loss import bpr_loss  #DEFINE_ALIAS
 # from .loss import center_loss  #DEFINE_ALIAS
+#from .loss import cross_entropy  #DEFINE_ALIAS
+from .loss import softmax_cross_entropy  #DEFINE_ALIAS
 from .loss import cross_entropy  #DEFINE_ALIAS
 from .loss import dice_loss  #DEFINE_ALIAS
 from .loss import hsigmoid_loss  #DEFINE_ALIAS
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
--- a/python/paddle/nn/layer/loss.py
+++ b/python/paddle/nn/layer/loss.py
@ -141,30 +141,40 @@ class BCEWithLogitsLoss(fluid.dygraph.Layer):


 class CrossEntropyLoss(fluid.dygraph.Layer):
-    r"""
-	:alias_main: paddle.nn.CrossEntropyLoss
-	:alias: paddle.nn.CrossEntropyLoss,paddle.nn.layer.CrossEntropyLoss,paddle.nn.layer.loss.CrossEntropyLoss
+    """
+    This operator implements the cross entropy loss function with softmax. This function 
+    combines the calculation of the softmax operation and the cross entropy loss function 
+    to provide a more numerically stable gradient.

-    This operator implements the cross entropy loss function. This OP combines ``LogSoftmax``,
-    and ``NLLLoss`` together.
+    Because this operator performs a softmax on logits internally, it expects
+    unscaled logits. This operator should not be used with the output of
+    softmax operator since that would produce incorrect results.

-    It is useful when training a classification problem with ``C`` classes.
-    If provided, the optional argument ``weight`` should be a 1D Variable assigning
-    weight to each of the classes.
+    When the attribute :attr:`soft_label` is set :attr:`False`, this operators 
+    expects mutually exclusive hard labels, each sample in a batch is in exactly 
+    one class with a probability of 1.0. Each sample in the batch will have a 
+    single label.

-    For predictions label, and target label, the loss is calculated as follows.
+    The equation is as follows:
+
+    1) Hard label (one-hot label, so every sample has exactly one class)

    .. math::

-        loss_j =  -\\text{input[class]} +
-        \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right), j = 1,..., K
+        loss_j =  -\\text{logits}_{label_j} +
+        \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{logits}_i)\\right), j = 1,..., K

-    If weight is not ``None``:
+    2) Soft label (each sample can have a distribution over all classes)

    .. math::

-        loss_j =  \\text{weight[class]}(-\\text{input[class]} +
-        \\log\\left(\\sum_{i=0}^{K}\\exp(\\text{input}_i)\\right)), j = 1,..., K
+        loss_j =  -\\sum_{i=0}^{K}\\text{label}_i
+        \\left(\\text{logits}_i - \\log\\left(\\sum_{i=0}^{K}
+        \\exp(\\text{logits}_i)\\right)\\right), j = 1,...,K
+
+ 
+    It is useful when training a classification problem with ``C`` classes.
+

    Parameters:
        input (Variable): Input tensor, the data type is float32, float64. Shape is
@ -173,9 +183,9 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
        label (Variable): Label tensor, the data type is int64. Shape is (N), where each
 	    value is 0 <= label[i] <= C-1, and if shape is more than 2D, this is
 	    (N, D1, D2,..., Dk), k >= 1.
-        weight (Variable, optional): Weight tensor, a manual rescaling weight given
-            to each class and the shape is (C). It has the same dimensions as class
-	    number and the data type is float32, float64. Default is ``'None'``.
+        weight (Variable, optional): Weight tensor, a manual rescaling weight for each
+            sample relative to each class. It has the same shape as label.
+	    and the data type is float32, float64. Default is ``'None'``.
        reduction (str, optional): Indicate how to average the loss by batch_size,
            the candicates are ``'none'`` | ``'mean'`` | ``'sum'``.
            If :attr:`reduction` is ``'mean'``, the reduced mean loss is returned;
@ -184,6 +194,12 @@ class CrossEntropyLoss(fluid.dygraph.Layer):
            Default is ``'mean'``.
        ignore_index (int64, optional): Specifies a target value that is ignored
            and does not contribute to the input gradient. Default is ``-100``.
+        soft_label (bool): indicate whether label is soft. Default False, meaning that
+                the label is hard. If soft_label=True, the label is soft.
+        axis (int, optional): The index of dimension to perform softmax calculations. It 
+                              should be in range :math:`[-1, rank - 1]`, while :math:`rank`
+                              is the rank of input :attr:`logits`. Default: -1.
+

    Returns:
        The tensor variable storing the cross_entropy_loss of input and label.
@ -192,64 +208,47 @@ class CrossEntropyLoss(fluid.dygraph.Layer):

    Examples:
        .. code-block:: python
-
-            # declarative mode
            import paddle
-            import paddle.fluid as fluid
            import numpy as np
-
-            input = fluid.data(name='input', shape=[5, 100], dtype='float64')
-            label = fluid.data(name='label', shape=[5], dtype='int64')
-            weight = fluid.data(name='weight', shape=[100], dtype='float64')
-            ce_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight, reduction='mean')
-            output = ce_loss(input, label)
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            exe.run(fluid.default_startup_program())
-            input_data = np.random.random([5, 100]).astype("float64")
-            label_data = np.random.randint(0, 100, size=(5)).astype(np.int64)
-            weight_data = np.random.random([100]).astype("float64")
-            output = exe.run(fluid.default_main_program(),
-                        feed={"input": input_data, "label": label_data,"weight": weight_data},
-                        fetch_list=[output],
-                        return_numpy=True)
-            print(output)
-
-            # imperative mode
-            import paddle.fluid.dygraph as dg
-            with dg.guard(place) as g:
-                input = dg.to_variable(input_data)
-                label = dg.to_variable(label_data)
-                weight = dg.to_variable(weight_data)
-                ce_loss = paddle.nn.loss.CrossEntropyLoss(weight=weight, reduction='mean')
-                output = ce_loss(input, label)
-                print(output.numpy())
+            input_np = np.random.random([2, 4]).astype(np.float64)
+            label_np = np.random.randint(0, 4, size=(2, 1)).astype(np.int64)
+            weight_np = np.random.random([4]).astype(np.float64) #shape:C
+            weight_ce = weight_np[label_np]  #shape:N,1
+            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                weight=paddle.to_tensor(weight_ce))
+            output = cross_entropy_loss(
+                paddle.to_tensor(input_np),
+                paddle.to_tensor(label_np))
+            print(output.numpy()) #[1.44375251]
    """

-    def __init__(self, weight=None, ignore_index=-100, reduction='mean'):
+    def __init__(self,
+                 weight=None,
+                 ignore_index=-100,
+                 reduction='mean',
+                 soft_label=False,
+                 axis=-1,
+                 name=None):
        super(CrossEntropyLoss, self).__init__()
        self.weight = weight
        self.reduction = reduction
        self.ignore_index = ignore_index
+        self.soft_label = soft_label
+        self.axis = axis
+        self.name = name

    def forward(self, input, label):
-        fluid.data_feeder.check_variable_and_dtype(
-            input, 'input', ['float32', 'float64'], 'cross_entropy_loss')
-        fluid.data_feeder.check_variable_and_dtype(label, 'label', ['int64'],
-                                                   'cross_entropy_loss')
-
-        if self.reduction not in ['sum', 'mean', 'none']:
-            raise ValueError(
-                "The value of 'reduction' in cross_entropy_loss should be 'sum', 'mean' or"
-                " 'none', but received %s, which is not allowed." %
-                self.reduction)
-
-        return paddle.nn.functional.cross_entropy(
+        ret = paddle.nn.functional.softmax_cross_entropy(
            input,
            label,
            weight=self.weight,
            ignore_index=self.ignore_index,
-            reduction=self.reduction)
+            reduction=self.reduction,
+            soft_label=self.soft_label,
+            axis=self.axis,
+            name=self.name)
+
+        return ret


 class HSigmoidLoss(fluid.dygraph.Layer):
@ -491,27 +490,29 @@ class L1Loss(fluid.dygraph.Layer):
            If `reduction` is ``'mean'`` or ``'sum'``, the shape of output loss is [1].

    Examples:
-        
        .. code-block:: python
-
            import paddle
+            import numpy as np

-            input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]])
-            label = paddle.to_tensor([[1.7, 1.0], [0.4, 0.5]])
+            paddle.disable_static()
+            input_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32")
+            label_data = np.array([[1.7, 1], [0.4, 0.5]]).astype("float32")
+            input = paddle.to_tensor(input_data)
+            label = paddle.to_tensor(label_data)

            l1_loss = paddle.nn.loss.L1Loss()
            output = l1_loss(input, label)
-            print(output)
+            print(output.numpy())
            # [0.35]

            l1_loss = paddle.nn.loss.L1Loss(reduction='sum')
            output = l1_loss(input, label)
-            print(output)
+            print(output.numpy())
            # [1.4]

            l1_loss = paddle.nn.loss.L1Loss(reduction='none')
            output = l1_loss(input, label)
-            print(output)
+            print(output.numpy())
            # [[0.20000005 0.19999999]
            # [0.2        0.79999995]]
    """
@ -622,7 +623,9 @@ class BCELoss(fluid.dygraph.Layer):


 class NLLLoss(fluid.dygraph.Layer):
-    r"""
+    """
+	:alias_main: paddle.nn.NLLLoss
+	:alias: paddle.nn.NLLLoss,paddle.nn.layer.NLLLoss,paddle.nn.layer.loss.NLLLoss

    This class accepts input and target label and returns negative log likelihood
    cross error. It is useful to train a classification problem with C classes.
@ -689,7 +692,7 @@ class NLLLoss(fluid.dygraph.Layer):
                import paddle
                import numpy as np

-                nll_loss = paddle.nn.NLLLoss()
+                nll_loss = paddle.nn.layer.NLLLoss()
                log_softmax = paddle.nn.LogSoftmax(axis=1)

                input_np = np.array([[0.88103855, 0.9908683 , 0.6226845 ],
@ -699,11 +702,13 @@ class NLLLoss(fluid.dygraph.Layer):
                                 [0.05689114, 0.0862954 , 0.6325046 ]]).astype(np.float32)
                label_np = np.array([0, 2, 1, 1, 0]).astype(np.int64)

+                place = paddle.CPUPlace()
+                paddle.disable_static(place)
                input = paddle.to_tensor(input_np)
                log_out = log_softmax(input)
                label = paddle.to_tensor(label_np)
                result = nll_loss(log_out, label)
-                print(result) # [1.0720209]
+                print(result.numpy()) # [1.0720209]

    """

@ -999,7 +1004,7 @@ class SmoothL1Loss(fluid.dygraph.Layer):
            is the same as the shape of input.

    Returns:
-        The tensor storing the smooth_l1_loss of input and label.
+        The tensor variable storing the smooth_l1_loss of input and label.

    Return type: Tensor.