tod bug fix

4 years ago · 15d4f6fb04
parent 3b46781aa0
commit 15d4f6fb04
42 changed files with 602 additions and 519 deletions
--- a/mindspore/lite/include/train_session.h
+++ b/mindspore/lite/include/train_session.h
@ -20,7 +20,6 @@
 #include <tuple>
 #include <unordered_map>
 #include "include/lite_session.h"
-#include "include/train_model.h"

 namespace mindspore {
 namespace session {
@ -33,19 +32,23 @@ class TrainSession : public session::LiteSession {

  /// \brief Static method to create a TrainSession object
  ///
+  /// \param[in] model_buf A buffer that was read from a MS model file
+  /// \param[in] size Length of the buffer
  /// \param[in] context Defines the context of the session to be created
+  /// \param[in] train_mode training mode to initialize Session with
  ///
  /// \return Pointer of MindSpore Lite TrainSession
-  static TrainSession *CreateSession(lite::Context *context);
+  static TrainSession *CreateSession(const char *model_buf, size_t size, lite::Context *context,
+                                     bool train_mode = false);

-  /// \brief Compile MindSpore Lite train model
-  ///
-  /// \note CompileTrainGraph should be called before RunGraph
+  /// \brief Static method to create a TrainSession object
  ///
-  /// \param[in] model Define the model to be compiled
+  /// \param[in] filename Filename to read flatbuffer from
+  /// \param[in] context Defines the context of the session to be created
+  /// \param[in] train_mode training mode to initialize Session with
  ///
-  /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h
-  virtual int CompileTrainGraph(lite::TrainModel *model) = 0;
+  /// \return Pointer of MindSpore Lite TrainSession
+  static TrainSession *CreateSession(const std::string &filename, lite::Context *context, bool train_mode = false);

  /// \brief Export the trained model into a buffer
  ///
--- a/mindspore/lite/nnacl/fp32_grad/softmax_grad.h
+++ b/mindspore/lite/nnacl/fp32_grad/softmax_grad.h
@ -30,6 +30,7 @@ typedef struct SoftmaxCrossEntropyParameter {
  unsigned int number_of_classes_;
  int n_dim_;
  int input_shape_[5];
+  int is_grad;
 } SoftmaxCrossEntropyParameter;

 void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, float *sum_data, float *sum_mul,
--- a/mindspore/lite/schema/model.fbs
+++ b/mindspore/lite/schema/model.fbs
@ -253,6 +253,7 @@ union PrimitiveType {
    All,
    Assert,
    Adder,
+    SparseSoftmaxCrossEntropy
 }

 enum QuantType: int {
--- a/mindspore/lite/schema/ops.fbs
+++ b/mindspore/lite/schema/ops.fbs
@ -301,12 +301,14 @@ table BatchNorm {
 }

 table BiasGrad {
-    axis: [int];
 }


 table SoftmaxCrossEntropy {
-    axis: [int];
+}
+
+table SparseSoftmaxCrossEntropy {
+    isGrad: int;
 }

 table make_tuple {
--- a/mindspore/lite/src/ops/bias_grad.cc
+++ b/mindspore/lite/src/ops/bias_grad.cc
@ -23,9 +23,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-std::vector<int> BiasGrad::GetAxis() const { return this->primitive_->value.AsBiasGrad()->axis; }
-
-void BiasGrad::SetAxis(const std::vector<int> &axis) { this->primitive_->value.AsBiasGrad()->axis = axis; }
 int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  if (this->primitive_ == nullptr) {
    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@ -45,11 +42,11 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &i
      MS_LOG(ERROR) << "new primitiveT value failed";
      return RET_ERROR;
    }
-    if (prim.GetAttr("axis") == nullptr) {
-      MS_LOG(WARNING) << "get axis failed";
-      attr->axis = {0};
-    } else {
-      attr->axis = CastToInt(prim.GetAttr("axis"));
+
+    this->primitive_->value.value = attr;
+    if (this->primitive_->value.value == nullptr) {
+      MS_LOG(ERROR) << "primitive value is nullptr";
+      return RET_ERROR;
    }
    this->primitive_->value.value = attr;
  }
@ -64,21 +61,12 @@ int BiasGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffer
    MS_LOG(ERROR) << "value_as_BiasGrad return nullptr";
    return RET_ERROR;
  }
-  std::vector<int32_t> axis;
-  if (attr->axis() != nullptr) {
-    for (int i = 0; i < static_cast<int>(attr->axis()->size()); i++) {
-      axis.push_back(attr->axis()->data()[i]);
-    }
-  }
-  auto val_offset = schema::CreateBiasGradDirect(*fbb, &axis);
+
+  auto val_offset = schema::CreateBiasGrad(*fbb);
  auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_BiasGrad, val_offset.o);
  fbb->Finish(prim_offset);
  return RET_OK;
 }
-std::vector<int> BiasGrad::GetAxis() const {
-  auto fb_vector = this->primitive_->value_as_BiasGrad()->axis();
-  return std::vector<int>(fb_vector->begin(), fb_vector->end());
-}

 PrimitiveC *BiasGradCreator(const schema::Primitive *primitive) {
  return PrimitiveC::NewPrimitiveC<BiasGrad>(primitive);
--- a/mindspore/lite/src/ops/conv2d.cc
+++ b/mindspore/lite/src/ops/conv2d.cc
@ -82,7 +82,8 @@ void ConvertConvWeight(const ParameterPtr &param_node) {
  auto weight = std::dynamic_pointer_cast<ParamValueLite>(param);
  MS_ASSERT(weight != nullptr);

-  std::unique_ptr<T> buf(new (std::nothrow) T[weight->tensor_shape_size()]);
+  std::unique_ptr<T[]> buf(new (std::nothrow) T[weight->tensor_shape_size()]);
+
  if (buf == nullptr) {
    MS_LOG(ERROR) << "new buf failed";
    return;
@ -150,9 +151,13 @@ void Conv2D::PopulaterConv2DMultiGroup(const Primitive &prim, schema::PrimitiveT
  attr->padRight = pad_list[3];

  auto dilation = CastToInt(prim.GetAttr("dilation"));
+#ifdef SUPPORT_TRAIN
+  attr->dilateH = dilation[2];
+  attr->dilateW = dilation[3];
+#else
  attr->dilateH = dilation[0];
  attr->dilateW = dilation[1];
-
+#endif
  auto kernel_size = CastToInt(prim.GetAttr("kernel_size"));
  attr->kernelH = kernel_size[0];
  attr->kernelW = kernel_size[1];
--- a/mindspore/lite/src/ops/conv2d_grad_filter.cc
+++ b/mindspore/lite/src/ops/conv2d_grad_filter.cc
@ -110,8 +110,8 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vector<AnfNod
    attr->padRight = pad_list[3];

    auto dilation = CastToInt(prim.GetAttr("dilation"));
-    attr->dilateH = dilation[0];
-    attr->dilateW = dilation[1];
+    attr->dilateH = dilation[2];
+    attr->dilateW = dilation[3];

    auto kernel_size = CastToInt(prim.GetAttr("kernel_size"));
    attr->kernelH = kernel_size[0];
--- a/mindspore/lite/src/ops/conv2d_grad_input.cc
+++ b/mindspore/lite/src/ops/conv2d_grad_input.cc
@ -111,8 +111,8 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vector<AnfNode
    attr->padRight = pad_list[3];

    auto dilation = CastToInt(prim.GetAttr("dilation"));
-    attr->dilateH = dilation[0];
-    attr->dilateW = dilation[1];
+    attr->dilateH = dilation[2];
+    attr->dilateW = dilation[3];

    auto kernel_size = CastToInt(prim.GetAttr("kernel_size"));
    attr->kernelH = kernel_size[0];
--- a/mindspore/lite/src/ops/deconv2d.cc
+++ b/mindspore/lite/src/ops/deconv2d.cc
@ -76,7 +76,7 @@ void ConvertConvWeight(const ParameterPtr &param_node) {
  auto weight = std::dynamic_pointer_cast<ParamValueLite>(param);
  MS_ASSERT(weight != nullptr);

-  std::unique_ptr<T> buf(new (std::nothrow) T[weight->tensor_shape_size()]);
+  std::unique_ptr<T[]> buf(new (std::nothrow) T[weight->tensor_shape_size()]);
  if (buf == nullptr) {
    MS_LOG(ERROR) << "new buf failed";
    return;
--- a/mindspore/lite/src/ops/dropout.cc
+++ b/mindspore/lite/src/ops/dropout.cc
@ -89,7 +89,6 @@ int Dropout::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> out
  output0->set_shape(input->shape());
  output0->set_data_type(input->data_type());
  output0->set_format(input->format());
-
  if (outputs_.size() > 1) {
    auto output1 = outputs_[1];
    MS_ASSERT(output1 != nullptr);
@ -97,7 +96,6 @@ int Dropout::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> out
    output1->set_data_type(input->data_type());
    output1->set_format(input->format());
  }
-
  return RET_OK;
 }

--- a/mindspore/lite/src/ops/dropout_grad.cc
+++ b/mindspore/lite/src/ops/dropout_grad.cc
@ -92,9 +92,7 @@ int DropoutGrad::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *>
  output->set_shape(input->shape());
  output->set_data_type(input->data_type());
  output->set_format(input->format());
-
  return RET_OK;
 }
-
 }  // namespace lite
 }  // namespace mindspore
--- a/mindspore/lite/src/ops/populate/tile_populate.cc
+++ b/mindspore/lite/src/ops/populate/tile_populate.cc
@ -31,6 +31,13 @@ OpParameter *PopulateTileParameter(const mindspore::lite::PrimitiveC *primitive)
  memset(tile_param, 0, sizeof(TileParameter));
  tile_param->op_parameter_.type_ = primitive->Type();
  auto param = reinterpret_cast<mindspore::lite::Tile *>(const_cast<mindspore::lite::PrimitiveC *>(primitive));
+#ifdef SUPPORT_TRAIN
+  auto multiples = param->GetMultiples();
+  tile_param->in_dim_ = multiples.size();
+  for (int i = 0; i < tile_param->in_dim_; ++i) {
+    tile_param->multiples_[i] = multiples[i];
+  }
+#else
  auto dims = param->GetDims();
  auto multiples = param->GetMultiples();
  for (size_t i = 0; i < kDimension_4d; ++i) {
@ -39,6 +46,7 @@ OpParameter *PopulateTileParameter(const mindspore::lite::PrimitiveC *primitive)
  for (size_t i = 0; i < dims.size(); ++i) {
    tile_param->multiples_[dims[i]] = multiples[i];
  }
+#endif
  return reinterpret_cast<OpParameter *>(tile_param);
 }

--- a/mindspore/lite/src/ops/primitive_c.cc
+++ b/mindspore/lite/src/ops/primitive_c.cc
@ -161,6 +161,7 @@
 #include "src/ops/group_conv2d_grad_input.h"
 #include "src/ops/power_grad.h"
 #include "src/ops/softmax_cross_entropy.h"
+#include "src/ops/sparse_softmax_cross_entropy.h"
 #include "src/ops/bn_grad.h"
 #include "src/ops/arithmetic_grad.h"
 #include "src/ops/depend.h"
@ -578,6 +579,8 @@ std::shared_ptr<PrimitiveC> PrimitiveC::Create(const Primitive &prim, const std:
 #ifdef SUPPORT_TRAIN
  } else if (op_type == "SoftmaxCrossEntropyWithLogits") {
    return NewPrimitiveC<SoftmaxCrossEntropy>(prim, inputs, quantType);
+  } else if (op_type == "SparseSoftmaxCrossEntropyWithLogits") {
+    return NewPrimitiveC<SparseSoftmaxCrossEntropy>(prim, inputs, quantType);
  } else if (op_type == "BiasAddGrad") {
    return NewPrimitiveC<BiasGrad>(prim, inputs, quantType);
  } else if (op_type == "ApplyMomentum") {
@ -916,6 +919,8 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) {
      return new (std::nothrow) ArithmeticGrad(primitive);
    case schema::PrimitiveType_SoftmaxCrossEntropy:
      return new (std::nothrow) SoftmaxCrossEntropy(primitive);
+    case schema::PrimitiveType_SparseSoftmaxCrossEntropy:
+      return new (std::nothrow) SparseSoftmaxCrossEntropy(primitive);
    case schema::PrimitiveType_PowerGrad:
      return new (std::nothrow) PowerGrad(primitive);
    case schema::PrimitiveType_Depend:
--- a/mindspore/lite/src/ops/softmax_cross_entropy.cc
+++ b/mindspore/lite/src/ops/softmax_cross_entropy.cc
@ -23,11 +23,6 @@
 namespace mindspore {
 namespace lite {
 #ifdef PRIMITIVE_WRITEABLE
-std::vector<int> SoftmaxCrossEntropy::GetAxis() const { return this->primitive_->value.AsSoftmaxCrossEntropy()->axis; }
-
-void SoftmaxCrossEntropy::SetAxis(const std::vector<int> &axis) {
-  this->primitive_->value.AsSoftmaxCrossEntropy()->axis = axis;
-}
 int SoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
  if (this->primitive_ == nullptr) {
    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
@ -48,7 +43,6 @@ int SoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vector<Anf
      return RET_ERROR;
    }

-    attr->axis = {0};
    this->primitive_->value.value = attr;
    if (this->primitive_->value.value == nullptr) {
      MS_LOG(ERROR) << "primitive value is nullptr";
@ -59,10 +53,6 @@ int SoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vector<Anf
 }
 #else

-std::vector<int> SoftmaxCrossEntropy::GetAxis() const {
-  auto fb_vector = this->primitive_->value_as_SoftmaxCrossEntropy()->axis();
-  return std::vector<int>(fb_vector->begin(), fb_vector->end());
-}
 int SoftmaxCrossEntropy::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) {
  MS_ASSERT(nullptr != primitive);
  MS_ASSERT(nullptr != fbb);
@ -71,13 +61,8 @@ int SoftmaxCrossEntropy::UnPackToFlatBuilder(const schema::Primitive *primitive,
    MS_LOG(ERROR) << "value_as_SoftmaxCrossEntropy return nullptr";
    return RET_ERROR;
  }
-  std::vector<int32_t> axis;
-  if (attr->axis() != nullptr) {
-    for (int i = 0; i < static_cast<int>(attr->axis()->size()); i++) {
-      axis.push_back(attr->axis()->data()[i]);
-    }
-  }
-  auto val_offset = schema::CreateSoftmaxCrossEntropyDirect(*fbb, &axis);
+
+  auto val_offset = schema::CreateSoftmaxCrossEntropy(*fbb);
  auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_SoftmaxCrossEntropy, val_offset.o);
  fbb->Finish(prim_offset);
  return RET_OK;
@ -100,6 +85,7 @@ int SoftmaxCrossEntropy::InferShape(std::vector<Tensor *> inputs, std::vector<Te
  MS_ASSERT(out != nullptr);

  std::vector<int> outshape;
+  outshape.push_back(in0->shape()[0]);
  outshape.push_back(1);
  out->set_shape(outshape);
  out->set_data_type(in0->data_type());
--- a/mindspore/lite/src/ops/sparse_softmax_cross_entropy.cc
+++ b/mindspore/lite/src/ops/sparse_softmax_cross_entropy.cc
@ -0,0 +1,120 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/ops/sparse_softmax_cross_entropy.h"
+
+#ifndef PRIMITIVE_WRITEABLE
+#include "src/ops/ops_register.h"
+#endif
+
+namespace mindspore {
+namespace lite {
+#ifdef PRIMITIVE_WRITEABLE
+int SparseSoftmaxCrossEntropy::GetIsGrad() const {
+  return this->primitive_->value.AsSparseSoftmaxCrossEntropy()->isGrad;
+}
+
+void SparseSoftmaxCrossEntropy::SetIsGrad(int isGrad) {
+  this->primitive_->value.AsSparseSoftmaxCrossEntropy()->isGrad = isGrad;
+}
+
+int SparseSoftmaxCrossEntropy::UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) {
+  if (this->primitive_ == nullptr) {
+    this->primitive_ = new (std::nothrow) schema::PrimitiveT;
+    if (this->primitive_ == nullptr) {
+      MS_LOG(ERROR) << "new primitiveT failed";
+      return RET_ERROR;
+    }
+    this->primitive_->value.type = schema::PrimitiveType_SparseSoftmaxCrossEntropy;
+  }
+  if (this->primitive_->value.type != schema::PrimitiveType_SparseSoftmaxCrossEntropy) {
+    MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type;
+    return RET_ERROR;
+  }
+  if (this->primitive_->value.value == nullptr) {
+    auto attr = new (std::nothrow) schema::SparseSoftmaxCrossEntropyT();
+    if (attr == nullptr) {
+      MS_LOG(ERROR) << "new primitiveT value failed";
+      return RET_ERROR;
+    }
+
+    attr->isGrad = GetValue<bool>(prim.GetAttr("is_grad"));
+    this->primitive_->value.value = attr;
+    if (this->primitive_->value.value == nullptr) {
+      MS_LOG(ERROR) << "primitive value is nullptr";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+#else
+
+int SparseSoftmaxCrossEntropy::GetIsGrad() const {
+  return this->primitive_->value_as_SparseSoftmaxCrossEntropy()->isGrad();
+}
+int SparseSoftmaxCrossEntropy::UnPackToFlatBuilder(const schema::Primitive *primitive,
+                                                   flatbuffers::FlatBufferBuilder *fbb) {
+  MS_ASSERT(nullptr != primitive);
+  MS_ASSERT(nullptr != fbb);
+  auto attr = primitive->value_as_SparseSoftmaxCrossEntropy();
+  if (attr == nullptr) {
+    MS_LOG(ERROR) << "value_as_SparseSoftmaxCrossEntropy return nullptr";
+    return RET_ERROR;
+  }
+  auto val_offset = schema::CreateSparseSoftmaxCrossEntropy(*fbb, attr->isGrad());
+  auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_SparseSoftmaxCrossEntropy, val_offset.o);
+  fbb->Finish(prim_offset);
+  return RET_OK;
+}
+
+PrimitiveC *SparseSoftmaxCrossEntropyCreator(const schema::Primitive *primitive) {
+  return PrimitiveC::NewPrimitiveC<SparseSoftmaxCrossEntropy>(primitive);
+}
+Registry SparseSoftmaxCrossEntropyRegistry(schema::PrimitiveType_SparseSoftmaxCrossEntropy,
+                                           SparseSoftmaxCrossEntropyCreator);
+#endif
+
+int SparseSoftmaxCrossEntropy::InferShape(std::vector<Tensor *> inputs, std::vector<Tensor *> outputs) {
+  if (2 != inputs.size()) {
+    MS_LOG(ERROR) << "SparseSoftmaxCrossEntropy should have at two inputs";
+    return RET_ERROR;
+  }
+
+  if (1 != outputs.size()) {
+    MS_LOG(ERROR) << "SparseSoftmaxCrossEntropy should have one output";
+    return RET_ERROR;
+  }
+  auto *in0 = inputs.front();
+  MS_ASSERT(in0 != nullptr);
+  auto *out = outputs.front();
+  MS_ASSERT(out != nullptr);
+
+  if (GetIsGrad() != 0) {
+    out->set_shape(in0->shape());
+    out->set_data_type(in0->data_type());
+    out->set_format(in0->format());
+  } else {
+    std::vector<int> outshape;
+    outshape.push_back(1);
+    out->set_shape(outshape);
+    out->set_data_type(in0->data_type());
+    out->set_format(in0->format());
+  }
+
+  return RET_OK;
+}
+}  // namespace lite
+}  // namespace mindspore
--- a/mindspore/lite/src/ops/sparse_softmax_cross_entropy.h
+++ b/mindspore/lite/src/ops/sparse_softmax_cross_entropy.h
@ -0,0 +1,48 @@
+/**
+ * Copyright 2019-2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_OPS_SPARSE_SOFTMAX_CROSS_ENTROPY_H_
+#define MINDSPORE_LITE_SRC_OPS_SPARSE_SOFTMAX_CROSS_ENTROPY_H_
+
+#include <vector>
+#include <set>
+#include <cmath>
+#include <memory>
+
+#include "src/ops/primitive_c.h"
+
+namespace mindspore {
+namespace lite {
+class SparseSoftmaxCrossEntropy : public PrimitiveC {
+ public:
+  SparseSoftmaxCrossEntropy() = default;
+  ~SparseSoftmaxCrossEntropy() = default;
+#ifdef PRIMITIVE_WRITEABLE
+  MS_DECLARE_PARENT(SparseSoftmaxCrossEntropy, PrimitiveC);
+  explicit SparseSoftmaxCrossEntropy(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {}
+  void SetIsGrad(int isGrad);
+  int UnPackAttr(const Primitive &prim, const std::vector<AnfNodePtr> &inputs) override;
+#else
+  int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override;
+#endif
+  int InferShape(std::vector<lite::Tensor *> inputs_, std::vector<lite::Tensor *> outputs_) override;
+
+  int GetIsGrad() const;
+};
+}  // namespace lite
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_SRC_OPS_SPARSE_SOFTMAX_CROSS_ENTROPY_H_
--- a/mindspore/lite/src/ops/tile.cc
+++ b/mindspore/lite/src/ops/tile.cc
@ -140,6 +140,21 @@ int Tile::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> output

  std::vector<int> out_shape;
  std::vector<int> multiples = GetMultiples();
+
+#ifdef SUPPORT_TRAIN
+  const size_t in_dims = input->shape().size();
+  const size_t delta_dims = in_dims - multiples.size();
+
+  size_t i = 0;
+  for (; i < delta_dims; ++i) {
+    int tmp = input->shape().at(i);
+    out_shape.push_back(tmp);
+  }
+  for (; i < in_dims; ++i) {
+    int tmp = input->shape().at(i) * (multiples[i - delta_dims]);
+    out_shape.push_back(tmp);
+  }
+#else
  std::vector<int> dims = GetDims();
  const size_t in_dims = input->shape().size();

@ -150,7 +165,7 @@ int Tile::InferShape(std::vector<Tensor *> inputs_, std::vector<Tensor *> output
  for (size_t i = 0; i < dims.size(); ++i) {
    out_shape[dims[i]] = input->shape()[dims[i]] * (multiples[i]);
  }
-
+#endif
  output->set_shape(out_shape);
  return RET_OK;
 }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc
@ -45,22 +45,28 @@ int AdamCPUKernel::Execute(int task_id) {
  auto eps = reinterpret_cast<float *>(in_tensors_[8]->MutableData())[0];
  auto gradient = reinterpret_cast<float *>(in_tensors_[9]->MutableData());
  size_t elem_num = in_tensors_[0]->ElementsNum();
-  if (fabs(1 - beta1_power) <= 0.0f) {
-    MS_LOG(ERROR) << "divisor cannot be 0";
+
+  if ((1.f - beta1_power) <= 0.0f) {
+    MS_LOG(ERROR) << "divisor cannot be 0 or below";
+    return RET_ERROR;
+  }
+  if ((1.f - beta2_power) < 0.0f) {
+    MS_LOG(ERROR) << "sqrt cannot be negative";
    return RET_ERROR;
  }
-  auto update_lr = learning_rate * std::sqrt(1 - beta2_power) / (1 - beta1_power);
+
+  auto update_lr = learning_rate * std::sqrt(1.f - beta2_power) / (1.f - beta1_power);

  if (adam_param_->use_nesterov_) {  // Nadam
    for (size_t i = 0; i < elem_num; ++i) {
-      m[i] += (gradient[i] - m[i]) * (1 - beta1);
-      v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2);
-      weight[i] -= update_lr * (m[i] * beta1 + (1 - beta1) * gradient[i]) / (std::sqrt(v[i]) + eps);
+      m[i] += (gradient[i] - m[i]) * (1.f - beta1);
+      v[i] += (gradient[i] * gradient[i] - v[i]) * (1.f - beta2);
+      weight[i] -= update_lr * (m[i] * beta1 + (1.f - beta1) * gradient[i]) / (std::sqrt(v[i]) + eps);
    }
  } else {
    for (size_t i = 0; i < elem_num; ++i) {
-      m[i] += (gradient[i] - m[i]) * (1 - beta1);
-      v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2);
+      m[i] += (gradient[i] - m[i]) * (1.f - beta1);
+      v[i] += (gradient[i] * gradient[i] - v[i]) * (1.f - beta2);
      weight[i] -= update_lr * m[i] / (std::sqrt(v[i]) + eps);
    }
  }
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc
@ -115,7 +115,6 @@ kernel::LiteKernel *CpuDropoutFp32KernelCreator(const std::vector<lite::Tensor *
  auto *kernel = new (std::nothrow) DropoutCPUKernel(opParameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "Dropout new kernel failed.";
-    free(opParameter);
    return nullptr;
  }
  auto ret = kernel->Init();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc
@ -102,7 +102,6 @@ kernel::LiteKernel *CpuDropoutGradFp32KernelCreator(const std::vector<lite::Tens
  auto *kernel = new (std::nothrow) DropoutGradCPUKernel(opParameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
    MS_LOG(ERROR) << "DropoutGrad new kernel failed.";
-    free(opParameter);
    return nullptr;
  }
  auto ret = kernel->Init();
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.cc
@ -39,16 +39,37 @@ int SgdCPUKernel::Execute(int task_id) {
  auto gradient = reinterpret_cast<float *>(in_tensors_[1]->MutableData());
  float moment = reinterpret_cast<float *>(in_tensors_[4]->MutableData())[0];
  size_t elem_num = in_tensors_[0]->ElementsNum();
-
-  if (sgd_param_->use_nesterov_) {
-    for (size_t i = 0; i < elem_num; ++i) {
-      accumulate[i] = accumulate[i] * moment + gradient[i];
-      weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate;
+  auto stat = reinterpret_cast<float *>(in_tensors_[5]->MutableData());
+
+  if (stat[0] > 0) {
+    stat[0] = 0;
+    memcpy(accumulate, gradient, elem_num * sizeof(float));
+    if (sgd_param_->use_nesterov_) {
+      for (size_t i = 0; i < elem_num; ++i) {
+        weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate;
+      }
+    } else {
+      for (size_t i = 0; i < elem_num; ++i) {
+        weight[i] -= accumulate[i] * learning_rate;
+      }
    }
  } else {
-    for (size_t i = 0; i < elem_num; ++i) {
-      accumulate[i] = accumulate[i] * moment + gradient[i] * (1.f - sgd_param_->dampening_);
-      weight[i] -= accumulate[i] * learning_rate;
+    if (moment > 0.f) {
+      if (sgd_param_->use_nesterov_) {
+        for (size_t i = 0; i < elem_num; ++i) {
+          accumulate[i] = accumulate[i] * moment + gradient[i] * (1.f - sgd_param_->dampening_);
+          weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate;
+        }
+      } else {
+        for (size_t i = 0; i < elem_num; ++i) {
+          accumulate[i] = accumulate[i] * moment + gradient[i] * (1.f - sgd_param_->dampening_);
+          weight[i] -= accumulate[i] * learning_rate;
+        }
+      }
+    } else {
+      for (size_t i = 0; i < elem_num; ++i) {
+        weight[i] -= gradient[i] * learning_rate;
+      }
    }
  }
  return RET_OK;
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc
@ -34,27 +34,29 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::ReSize() { return RET_OK; }
 void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *labels, const float *logits, float *grads,
                                                                float *output2) const {
  float eps = 1e-6;
-  float total_loss = 0.0;
  if (grads != nullptr) {
    for (int i = 0; i < param_->batch_size_; ++i) {
+      float loss = 0.f;
      for (size_t j = 0; j < param_->number_of_classes_; ++j) {
        float logit =
          -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]);
        grads[i * param_->number_of_classes_ + j] =
          (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]);
-        total_loss += labels[i * param_->number_of_classes_ + j] * logit;
+        loss += labels[i * param_->number_of_classes_ + j] * logit;
      }
+      output2[i] = loss;
    }
  } else {
    for (int i = 0; i < param_->batch_size_; ++i) {
+      float loss = 0.f;
      for (size_t j = 0; j < param_->number_of_classes_; ++j) {
        float logit =
          -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]);
-        total_loss += labels[i * param_->number_of_classes_ + j] * logit;
+        loss += labels[i * param_->number_of_classes_ + j] * logit;
      }
+      output2[i] = loss;
    }
  }
-  output2[0] = total_loss / param_->batch_size_;
 }

 int SoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc
@ -25,7 +25,7 @@
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
-using mindspore::schema::PrimitiveType_SoftmaxCrossEntropy;
+using mindspore::schema::PrimitiveType_SparseSoftmaxCrossEntropy;

 namespace mindspore::kernel {

@ -51,10 +51,9 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const int *
  return RET_OK;
 }

-int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses, float *grads,
-                                                                  float *output) const {
+int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *labels, const float *losses,
+                                                                  float *grads) const {
  size_t row_start = 0;
-  float total_loss = 0;
  for (int i = 0; i < param->batch_size_; ++i) {
    if (labels[i] < 0) {
      MS_LOG(ERROR) << "label value must >= 0";
@ -65,7 +64,6 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *lab
      MS_LOG(ERROR) << "error label input!";
      return RET_ERROR;
    } else {
-      total_loss -= logf(losses[i * param->number_of_classes_ + label]);
      for (size_t j = 0; j < param->number_of_classes_; ++j) {
        size_t index = row_start + j;
        if (j == label) {
@ -77,18 +75,14 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *lab
    }
    row_start += param->number_of_classes_;
  }
-  output[0] = total_loss / param->batch_size_;
  return RET_OK;
 }

 int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
+  auto sce_param = reinterpret_cast<SoftmaxCrossEntropyParameter *>(op_parameter_);
  auto ins = reinterpret_cast<float *>(in_tensors_.at(0)->data_c());
  auto labels = reinterpret_cast<int *>(in_tensors_.at(1)->data_c());
  float *out = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
-  float *grads = nullptr;
-  if (IsTrain() && out_tensors_.size() > 1) {
-    grads = reinterpret_cast<float *>(out_tensors_.at(1)->MutableData());
-  }
  size_t data_size = in_tensors_.at(0)->ElementsNum();
  MS_ASSERT(out != nullptr);
  MS_ASSERT(labels != nullptr);
@ -99,8 +93,8 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) {
  std::fill(losses_, losses_ + data_size, 0.f);
  std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0.f);
  Softmax(ins, losses_, sum_data_, &sm_params_);
-  if (IsTrain()) {
-    GradPostExecute(labels, losses_, grads, out);
+  if (sce_param->is_grad) {
+    GradPostExecute(labels, losses_, out);
  } else {
    ForwardPostExecute(labels, losses_, out);
  }
@ -133,12 +127,12 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() {
  param->batch_size_ = dims[0];
  for (unsigned int i = 0; i < dims.size(); i++) param->input_shape_[i] = dims[i];
  if (2 != this->in_tensors_.size()) {
-    MS_LOG(ERROR) << "softmax entropy loss should have two inputs";
+    MS_LOG(ERROR) << "sparse softmax entropy loss should have two inputs";
    return RET_ERROR;
  }
  auto *in0 = in_tensors_.front();
  if (in0 == nullptr) {
-    MS_LOG(ERROR) << "softmax etropy loss in0 have no data";
+    MS_LOG(ERROR) << "sparse softmax etropy loss in0 have no data";
    return RET_ERROR;
  }
  size_t data_size = in_tensors_.at(0)->ElementsNum();
@ -155,7 +149,7 @@ kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(
  const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs, OpParameter *opParameter,
  const lite::InnerContext *ctx, const kernel::KernelKey &desc, const mindspore::lite::PrimitiveC *primitive) {
  MS_ASSERT(opParameter != nullptr);
-  MS_ASSERT(desc.type == schema::PrimitiveType_SoftmaxCrossEntropy);
+  MS_ASSERT(desc.type == schema::PrimitiveType_SparseSoftmaxCrossEntropy);
  auto *kernel =
    new (std::nothrow) SparseSoftmaxCrossEntropyWithLogitsCPUKernel(opParameter, inputs, outputs, ctx, primitive);
  if (kernel == nullptr) {
@ -172,4 +166,6 @@ kernel::LiteKernel *CpuSparseSoftmaxCrossEntropyFp32KernelCreator(
  }
  return kernel;
 }
+REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SparseSoftmaxCrossEntropy,
+           CpuSparseSoftmaxCrossEntropyFp32KernelCreator)
 }  // namespace mindspore::kernel
--- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h
@ -38,7 +38,7 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel {
  ~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {}

  int ForwardPostExecute(const int *labels, const float *losses, float *output) const;
-  int GradPostExecute(const int *labels, const float *losses, float *grads, float *output) const;
+  int GradPostExecute(const int *labels, const float *losses, float *grads) const;

  int Init() override;
  int ReSize() override;
--- a/mindspore/lite/src/train/train_model.cc
+++ b/mindspore/lite/src/train/train_model.cc
@ -14,7 +14,7 @@
 * limitations under the License.
 */
 #include "src/ops/primitive_c.h"
-#include "include/train_model.h"
+#include "src/train/train_model.h"
 #include "src/common/log_adapter.h"
 #include "include/errorcode.h"
 #include "src/common/graph_util.h"
--- a/Show More
+++ b/Show More