mod_SoftmaxCrossEntropyWithLogits

5 years ago · 0ec70068ae
parent b346f0b3ec
commit 0ec70068ae
66 changed files with 170 additions and 126 deletions
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@ -213,13 +213,9 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
        of entry is a valid one.

    Args:
-        is_grad (bool): Specifies whether calculate grad only. Default: True.
        sparse (bool): Specifies whether labels use sparse format or not. Default: False.
        reduction (str): Type of reduction to be applied to loss. The optional values are "mean", "sum", and "none".
            If "none", do not perform reduction. Default: "none".
-        smooth_factor (float): Label smoothing factor. It is a optional input which should be in range [0, 1].
-            Default: 0.
-        num_classes (int): The number of classes in the task. It is a optional input Default: 2.

    Inputs:
        - **logits** (Tensor) - Tensor of shape (N, C).
@ -238,29 +234,22 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
        >>> loss(logits, labels)
    """
    def __init__(self,
-                 is_grad=True,
                 sparse=False,
-                 reduction='none',
-                 smooth_factor=0,
-                 num_classes=2):
+                 reduction='none'):
        super(SoftmaxCrossEntropyWithLogits, self).__init__(reduction)
-        self.is_grad = is_grad
        self.sparse = sparse
-        validator.check_number_range(
-            "smooth_factor", smooth_factor, 0, 1, Rel.INC_BOTH, self.cls_name)
-        self.smooth_factor = smooth_factor
-        self.num_classes = num_classes
+        self.reduction = reduction
        self.softmax_cross_entropy = _selected_ops.SoftmaxCrossEntropyWithLogits()
        self.one_hot = P.OneHot()
-        self.on_value = Tensor(1.0 - self.smooth_factor, mstype.float32)
-        self.off_value = Tensor(1.0 * self.smooth_factor / (self.num_classes - 1), mstype.float32)
+        self.on_value = Tensor(1.0, mstype.float32)
+        self.off_value = Tensor(0., mstype.float32)
        self.is_cpugpu = context.get_context('device_target') in ["CPU", "GPU"]

        if self.is_cpugpu:
-            self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits(is_grad=self.is_grad)
+            self.sparse_softmax_cross_entropy = P.SparseSoftmaxCrossEntropyWithLogits()

    def construct(self, logits, labels):
-        if self.is_cpugpu and self.sparse:
+        if self.is_cpugpu and self.sparse and self.reduction == 'mean':
            x = self.sparse_softmax_cross_entropy(logits, labels)
            return x

--- a/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
+++ b/mindspore/nn/probability/toolbox/uncertainty_evaluation.py
@ -115,7 +115,7 @@ class UncertaintyEvaluation:
            self.epi_uncer_model = EpistemicUncertaintyModel(self.epi_model)
            if self.epi_uncer_model.drop_count == 0:
                if self.task_type == 'classification':
-                    net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+                    net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
                    net_opt = Adam(self.epi_uncer_model.trainable_params())
                    model = Model(self.epi_uncer_model, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
                else:
@ -314,7 +314,7 @@ class AleatoricLoss(Cell):
            self.exp = P.Exp()
            self.normal = C.normal
            self.to_tensor = P.ScalarToArray()
-            self.entropy = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+            self.entropy = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        else:
            self.mean = P.ReduceMean()
            self.exp = P.Exp()
--- a/model_zoo/official/cv/alexnet/eval.py
+++ b/model_zoo/official/cv/alexnet/eval.py
@ -42,7 +42,7 @@ if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

    network = AlexNet(cfg.num_classes)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    repeat_size = cfg.epoch_size
    opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
    model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
--- a/model_zoo/official/cv/alexnet/train.py
+++ b/model_zoo/official/cv/alexnet/train.py
@ -45,7 +45,7 @@ if __name__ == "__main__":

    ds_train = create_dataset_cifar10(args.data_path, cfg.batch_size, 1)
    network = AlexNet(cfg.num_classes)
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    lr = Tensor(get_lr(0, cfg.learning_rate, cfg.epoch_size, ds_train.get_dataset_size()))
    opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum)
    model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})
--- a/model_zoo/official/cv/googlenet/eval.py
+++ b/model_zoo/official/cv/googlenet/eval.py
@ -41,7 +41,7 @@ if __name__ == '__main__':
    net = GoogleNet(num_classes=cfg.num_classes)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum,
                   weight_decay=cfg.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

    if device_target == "Ascend":
--- a/model_zoo/official/cv/googlenet/train.py
+++ b/model_zoo/official/cv/googlenet/train.py
@ -101,7 +101,7 @@ if __name__ == '__main__':
    lr = lr_steps(0, lr_max=cfg.lr_init, total_epochs=cfg.epoch_size, steps_per_epoch=batch_num)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), Tensor(lr), cfg.momentum,
                   weight_decay=cfg.weight_decay)
-    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    if device_target == "Ascend":
        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'},
--- a/model_zoo/official/cv/lenet/eval.py
+++ b/model_zoo/official/cv/lenet/eval.py
@ -44,7 +44,7 @@ if __name__ == "__main__":
    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)

    network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    repeat_size = cfg.epoch_size
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
    model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
--- a/model_zoo/official/cv/lenet/train.py
+++ b/model_zoo/official/cv/lenet/train.py
@ -50,7 +50,7 @@ if __name__ == "__main__":
                              cfg.batch_size)

    network = LeNet5(cfg.num_classes)
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
    time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
    config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
--- a/model_zoo/official/cv/lenet_quant/eval_quant.py
+++ b/model_zoo/official/cv/lenet_quant/eval_quant.py
@ -53,7 +53,7 @@ if __name__ == "__main__":
                                          per_channel=[True, False])

    # define loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    # define network optimization
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)

--- a/model_zoo/official/cv/lenet_quant/train_quant.py
+++ b/model_zoo/official/cv/lenet_quant/train_quant.py
@ -62,7 +62,7 @@ if __name__ == "__main__":
                                          symmetric=[False, False])

    # define network loss
-    net_loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
+    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    # define network optimization
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)

--- a/model_zoo/official/cv/mobilenetv2/eval.py
+++ b/model_zoo/official/cv/mobilenetv2/eval.py
@ -51,8 +51,7 @@ if __name__ == '__main__':
    else:
        raise ValueError("Unsupported device_target.")

-    loss = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    if args_opt.device_target == "Ascend":
        net.to_float(mstype.float16)
--- a/model_zoo/official/cv/mobilenetv2/train.py
+++ b/model_zoo/official/cv/mobilenetv2/train.py
@ -172,7 +172,7 @@ if __name__ == '__main__':
            loss = CrossEntropyWithLabelSmooth(smooth_factor=config_gpu.label_smooth,
                                               num_classes=config_gpu.num_classes)
        else:
-            loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        # define dataset
        epoch_size = config_gpu.epoch_size
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
@ -236,8 +236,7 @@ if __name__ == '__main__':
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes)
        else:
-            loss = SoftmaxCrossEntropyWithLogits(
-                is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 config=config_ascend,
--- a/model_zoo/official/cv/mobilenetv2_quant/eval.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/eval.py
@ -55,7 +55,7 @@ if __name__ == '__main__':
        # convert fusion network to quantization aware network
        network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
    # define network loss
-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

    # define dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
--- a/model_zoo/official/cv/mobilenetv2_quant/train.py
+++ b/model_zoo/official/cv/mobilenetv2_quant/train.py
@ -89,7 +89,7 @@ def train_on_ascend():
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    # define dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=True,
@ -150,7 +150,7 @@ def train_on_gpu():
        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth,
                                           num_classes=config.num_classes)
    else:
-        loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    # define dataset
    epoch_size = config.epoch_size
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
--- a/model_zoo/official/cv/mobilenetv3/eval.py
+++ b/model_zoo/official/cv/mobilenetv3/eval.py
@ -41,8 +41,7 @@ if __name__ == '__main__':
    else:
        raise ValueError("Unsupported device_target.")

-    loss = nn.SoftmaxCrossEntropyWithLogits(
-        is_grad=False, sparse=True, reduction='mean')
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    net = mobilenet_v3_large(num_classes=config.num_classes)

    dataset = create_dataset(dataset_path=args_opt.dataset_path,
--- a/model_zoo/official/cv/mobilenetv3/train.py
+++ b/model_zoo/official/cv/mobilenetv3/train.py
@ -162,8 +162,7 @@ if __name__ == '__main__':
            loss = CrossEntropyWithLabelSmooth(
                smooth_factor=config_gpu.label_smooth, num_classes=config_gpu.num_classes)
        else:
-            loss = SoftmaxCrossEntropyWithLogits(
-                is_grad=False, sparse=True, reduction='mean')
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        # define dataset
        epoch_size = config_gpu.epoch_size
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
--- a/model_zoo/official/cv/resnet/eval.py
+++ b/model_zoo/official/cv/resnet/eval.py
@ -22,6 +22,7 @@ from mindspore import dataset as de
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.train.model import Model
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from src.CrossEntropySmooth import CrossEntropySmooth

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
@ -79,8 +80,8 @@ if __name__ == '__main__':
    if args_opt.dataset == "imagenet2012":
        if not config.use_label_smooth:
            config.label_smooth_factor = 0.0
-        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                             smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+        loss = CrossEntropySmooth(sparse=True, reduction='mean',
+                                  smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
    else:
        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')

--- a/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
+++ b/model_zoo/official/cv/resnet/src/CrossEntropySmooth.py
@ -0,0 +1,38 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""define loss function for network"""
+import mindspore.nn as nn
+from mindspore import Tensor
+from mindspore.common import dtype as mstype
+from mindspore.nn.loss.loss import _Loss
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
+
+
+class CrossEntropySmooth(_Loss):
+    """CrossEntropy"""
+    def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
+        super(CrossEntropySmooth, self).__init__()
+        self.onehot = P.OneHot()
+        self.sparse = sparse
+        self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
+        self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
+        self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
+
+    def construct(self, logit, label):
+        if self.sparse:
+            label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
+        loss = self.ce(logit, label)
+        return loss
--- a/model_zoo/official/cv/resnet/train.py
+++ b/model_zoo/official/cv/resnet/train.py
@ -31,6 +31,7 @@ from mindspore.communication.management import init, get_rank, get_group_size
 import mindspore.nn as nn
 import mindspore.common.initializer as weight_init
 from src.lr_generator import get_lr, warmup_cosine_annealing_lr
+from src.CrossEntropySmooth import CrossEntropySmooth

 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--net', type=str, default=None, help='Resnet Model, either resnet50 or resnet101')
@ -145,8 +146,8 @@ if __name__ == '__main__':
        if args_opt.dataset == "imagenet2012":
            if not config.use_label_smooth:
                config.label_smooth_factor = 0.0
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean",
-                                                 smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+            loss = CrossEntropySmooth(sparse=True, reduction="mean",
+                                      smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
        else:
            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
@ -157,11 +158,10 @@ if __name__ == '__main__':
        if args_opt.dataset == "imagenet2012":
            if not config.use_label_smooth:
                config.label_smooth_factor = 0.0
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
-                                                 smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+            loss = CrossEntropySmooth(sparse=True, reduction="mean",
+                                      smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
        else:
-            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean", is_grad=False,
-                                                 num_classes=config.class_num)
+            loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")

        if args_opt.net == "resnet101" or args_opt.net == "resnet50":
            opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay,
--- a/model_zoo/official/cv/vgg16/eval.py
+++ b/model_zoo/official/cv/vgg16/eval.py
@ -134,7 +134,7 @@ def test(cloud_args=None):
        net = vgg16(num_classes=args.num_classes, args=args)
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, args.momentum,
                       weight_decay=args.weight_decay)
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})

        param_dict = load_checkpoint(args.pre_trained)
--- a/model_zoo/official/cv/vgg16/train.py
+++ b/model_zoo/official/cv/vgg16/train.py
@ -210,7 +210,7 @@ if __name__ == '__main__':
                   loss_scale=args.loss_scale)

    if args.dataset == "cifar10":
-        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False)
+        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        model = Model(network, loss_fn=loss, optimizer=opt, metrics={'acc'},
                      amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=None)
    else:
--- a/model_zoo/official/nlp/lstm/eval.py
+++ b/model_zoo/official/nlp/lstm/eval.py
@ -64,7 +64,7 @@ if __name__ == '__main__':
                           weight=Tensor(embedding_table),
                           batch_size=cfg.batch_size)

-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
    loss_cb = LossMonitor()

--- a/model_zoo/official/nlp/lstm/train.py
+++ b/model_zoo/official/nlp/lstm/train.py
@ -70,7 +70,7 @@ if __name__ == '__main__':
    if args.pre_trained:
        load_param_into_net(network, load_checkpoint(args.pre_trained))

-    loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
    loss_cb = LossMonitor()

--- a/tests/st/fusion/test_conv_bn1_fusion.py
+++ b/tests/st/fusion/test_conv_bn1_fusion.py
@ -39,7 +39,7 @@ class MsWrapper(nn.Cell):


 def me_train_tensor(net, input_np, label_np, epoch_size=2):
-    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    opt = nn.Momentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
                      filter(lambda x: x.requires_grad, net.get_parameters()))
    context.set_context(mode=context.GRAPH_MODE)
--- a/tests/st/host_device/test_host_device_lenet.py
+++ b/tests/st/host_device/test_host_device_lenet.py
@ -66,7 +66,7 @@ def train(net, data, label):
    momentum = 0.9

    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
-    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
+    criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
    net_with_criterion = WithLossCell(net, criterion)
    train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
    train_network.set_train()
--- a/Show More
+++ b/Show More