!3198 synchronize latest Ascend software suite 18 Jul 2020, and merging branches

Merge pull request !3198 from yanghaoran/code_sync_0718
5 years ago · 6f8863b65d
parent 05de773775 859acc6d2a
commit 6f8863b65d
66 changed files with 4937 additions and 147 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -10,9 +10,9 @@
 [submodule "third_party/protobuf"]
 	path = third_party/protobuf
 	url = https://github.com/protocolbuffers/protobuf.git
-[submodule "graphengine"]
-	path = graphengine
-	url = https://gitee.com/mindspore/graphengine.git
 [submodule "akg"]
 	path = akg
 	url = https://gitee.com/mindspore/akg.git
+[submodule "graphengine"]
+	path = graphengine
+	url = https://gitee.com/mindspore/graphengine.git
--- a/cmake/dependency_graphengine.cmake
+++ b/cmake/dependency_graphengine.cmake
@ -15,6 +15,7 @@ include(${GE_SOURCE_DIR}/cmake/external_libs/securec.cmake)
 if (NOT ENABLE_D)
    set(GE_PREBUILD_PATH ${GE_SOURCE_DIR}/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR})
    find_library(slog libslog.so ${GE_PREBUILD_PATH})
+    find_library(error_manager liberror_manager.so ${GE_PREBUILD_PATH})
 elseif (DEFINED ENV{D_LINK_PATH})
    set(GE_LIB_PATH $ENV{D_LINK_PATH})
    set(GE_SYS_ARCH "")
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@ -156,6 +156,7 @@ if (NOT ENABLE_GE)
            set(ASCEND_PATH /usr/local/Ascend)
        endif ()
        set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
+        set(ASCEND_FWK_PATH ${ASCEND_PATH}/fwkacllib/lib64)

        install(
            FILES
@ -164,6 +165,7 @@ if (NOT ENABLE_GE)
                ${CMAKE_BINARY_DIR}/graphengine/src/ge/ge_runtime/libge_runtime.so
                ${ASCEND_DRIVER_PATH}/libslog.so
                ${ASCEND_DRIVER_PATH}/libc_sec.so
+                ${ASCEND_FWK_PATH}/liberror_manager.so
            DESTINATION ${INSTALL_LIB_DIR}
            COMPONENT mindspore
        )
@ -172,6 +174,7 @@ if (NOT ENABLE_GE)
            FILES
                ${CMAKE_BINARY_DIR}/graphengine/src/common/graph/libgraph.so
                ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libslog.so
+                ${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/liberror_manager.so
                ${CMAKE_SOURCE_DIR}/build/graphengine/libc_sec.so
            DESTINATION ${INSTALL_LIB_DIR}
            COMPONENT mindspore
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 18cf690152add623ffbddfbbb4674d1b34484ca7
+Subproject commit 103f2d1019dc50d781d7a964551d9f1f50b3b009
--- a/mindspore/_extends/parallel_compile/tbe_compiler/common.py
+++ b/mindspore/_extends/parallel_compile/tbe_compiler/common.py
@ -40,7 +40,7 @@ def get_ddk_version():
            with open(backup_ddk_info_file, "r") as fp:
                ddk_version = json.load(fp)["VERSION"]
        else:
-            ddk_version = "1.60.T17.B830"
+            ddk_version = "Ascend910"
    return ddk_version


--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -185,7 +185,7 @@ if (ENABLE_GE)
    else ()
        target_link_libraries(mindspore ge_client)
    endif ()
-    target_link_libraries(mindspore graph tsdclient)
+    target_link_libraries(mindspore graph tsdclient datatransfer)
 endif()

 if (ENABLE_D)
@ -216,8 +216,9 @@ if (ENABLE_D)
    find_library(CCE_LIB cce ${ASCEND_RUNTIME_PATH})
    find_library(RUNTIME_LIB runtime ${ASCEND_RUNTIME_PATH})
    find_library(TSDCLIENT tsdclient HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
+    find_library(DATATRANSFER datatransfer HINTS ${ASCEND_RUNTIME_PATH} ${ASCEND_DRIVER_BACK_PATH})
    find_library(PROFILING msprof ${ASCEND_DRIVER_PATH})
-    target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${PROFILING} ${HCCL} ${TSDCLIENT})
+    target_link_libraries(mindspore ge_runtime ${CCE_LIB} ${RUNTIME_LIB} ${TSDCLIENT} ${PROFILING} ${HCCL} ${DATATRANSFER})
 endif()

 # link protobuf
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_kernel_select/tbe_kernel_select.cc
@ -292,7 +292,6 @@ bool TbeKernelSelect::TbeCheckSupported(
                                                              parallel::TOPK,
                                                              parallel::IN_TOPK,
                                                              parallel::PACK,
-                                                              parallel::GATHER_ND,
                                                              parallel::UNSORTEF_SEGMENT_MIND,
                                                              parallel::UNSORTEF_SEGMENT_PRODD,
                                                              parallel::CAST};
--- a/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ascend_backend_optimization.cc
@ -23,6 +23,7 @@
 #include "backend/optimizer/ascend/ir_fission/batch_norm_grad_split.h"
 #include "backend/optimizer/ascend/ir_fission/batch_norm_bert_fission.h"
 #include "backend/optimizer/ascend/ir_fission/single_batch_norm_fission.h"
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
 #include "backend/optimizer/ascend/ir_fusion/fused_batch_norm_fusion.h"
 #include "backend/optimizer/ascend/ir_fission/layer_norm_grad_split.h"
 #include "backend/optimizer/pass/communication_op_fusion.h"
@ -154,6 +155,7 @@ void AddAscendBackendOptionalIRFusion(PassManager *ir_fusion_pm) {
  ir_fusion_pm->AddPass(std::make_shared<BatchNormGrad2BNInferGrad>());
  ir_fusion_pm->AddPass(std::make_shared<BatchNormGradInferFission>());
  ir_fusion_pm->AddPass(std::make_shared<SplitFission>());
+  ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());
  ir_fusion_pm->AddPass(std::make_shared<GetitemTuple>());
  ir_fusion_pm->AddPass(std::make_shared<PackFission>());
  ir_fusion_pm->AddPass(std::make_shared<ConcatFission>());
@ -303,6 +305,7 @@ void RunOpAscendBackendIRFusionOptimization(const std::shared_ptr<session::Kerne
  ir_fusion_pm->AddPass(std::make_shared<TopKSplit>());
  ir_fusion_pm->AddPass(std::make_shared<AddnFission>());
  ir_fusion_pm->AddPass(std::make_shared<InsertPadForNMSWithMask>());
+  ir_fusion_pm->AddPass(std::make_shared<TensorScatterUpdateFission>());

  optimizer->AddPassManager(ir_fusion_pm);
  (void)optimizer->Optimize(kernel_graph);
--- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/deal_ref_trans_and_cast.cc
@ -94,7 +94,7 @@ AnfNodePtr AddAdditionalToRefOutput(const FuncGraphPtr &func_graph, const CNodeP
  origin_pair = FindRefOriginNode(input_node);
  MS_EXCEPTION_IF_NULL(origin_pair.first);
  if (!origin_pair.first->isa<Parameter>()) {
-    MS_LOG(EXCEPTION) << "ref op origin node is not parameter";
+    MS_LOG(WARNING) << "ref op origin node is not parameter";
  }
  MS_LOG(DEBUG) << "DealRefTransAndCast the node input index " << input_index << ", find origin op is "
                << origin_pair.first->DebugString() << ", index is " << origin_pair.second;
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.cc
@ -0,0 +1,71 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h"
+#include <vector>
+#include <memory>
+#include "backend/session/anf_runtime_algorithm.h"
+#include "backend/optimizer/common/helper.h"
+
+namespace mindspore {
+namespace opt {
+namespace {
+CNodePtr CreateTensorMove(const FuncGraphPtr &graph, const CNodePtr &tensor_scatter_update) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(tensor_scatter_update);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(kTensorMoveOpName)),
+                                    tensor_scatter_update->input(1)};
+  auto tensor_move = graph->NewCNode(inputs);
+  MS_EXCEPTION_IF_NULL(tensor_move);
+  tensor_move->set_scope(tensor_scatter_update->scope());
+  tensor_move->set_abstract(tensor_scatter_update->abstract());
+  AnfAlgo::SetNodeAttr(kAttrUseLocking, MakeValue(false), tensor_move);
+  return tensor_move;
+}
+
+CNodePtr CreateScatterNdUpdate(const FuncGraphPtr &graph, const CNodePtr &tensor_scatter_update,
+                               const CNodePtr &tensor_move) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(tensor_scatter_update);
+  MS_EXCEPTION_IF_NULL(tensor_move);
+  std::vector<AnfNodePtr> inputs = {NewValueNode(std::make_shared<Primitive>(kScatterNdUpdateOpName)), tensor_move,
+                                    tensor_scatter_update->input(2), tensor_scatter_update->input(3)};
+  auto scatter_nd_update = graph->NewCNode(inputs);
+  MS_EXCEPTION_IF_NULL(scatter_nd_update);
+  scatter_nd_update->set_scope(tensor_scatter_update->scope());
+  scatter_nd_update->set_abstract(tensor_scatter_update->abstract());
+  return scatter_nd_update;
+}
+}  // namespace
+
+const BaseRef TensorScatterUpdateFission::DefinePattern() const {
+  VarPtr Xs = std::make_shared<SeqVar>();
+  auto prim = std::make_shared<Primitive>(kTensorScatterUpdateOpName);
+  return VectorRef({prim, Xs});
+}
+
+const AnfNodePtr TensorScatterUpdateFission::Process(const FuncGraphPtr &func_graph, const AnfNodePtr &node,
+                                                     const EquivPtr &) const {
+  MS_EXCEPTION_IF_NULL(func_graph);
+  MS_EXCEPTION_IF_NULL(node);
+  auto tensor_scatter_update = node->cast<CNodePtr>();
+  if (tensor_scatter_update == nullptr || tensor_scatter_update->size() != 4) {
+    return nullptr;
+  }
+  auto tensor_move = CreateTensorMove(func_graph, tensor_scatter_update);
+  return CreateScatterNdUpdate(func_graph, tensor_scatter_update, tensor_move);
+}
+}  // namespace opt
+}  // namespace mindspore
--- a/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/ir_fission/tensor_scatter_update_fission.h
@ -0,0 +1,33 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
+#define MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
+
+#include "backend/optimizer/common/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+class TensorScatterUpdateFission : public PatternProcessPass {
+ public:
+  explicit TensorScatterUpdateFission(bool multigraph = true)
+      : PatternProcessPass("tensor_scatter_update_fission", multigraph) {}
+  ~TensorScatterUpdateFission() override = default;
+  const BaseRef DefinePattern() const override;
+  const AnfNodePtr Process(const FuncGraphPtr &, const AnfNodePtr &, const EquivPtr &) const override;
+};
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PRE_ACTIVATE_ASCEND_IR_FISSION_TENSOR_SCATTER_UPDATE_FISSION_H_
--- a/mindspore/ccsrc/operator/prim_structures.cc
+++ b/mindspore/ccsrc/operator/prim_structures.cc
--- a/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h
+++ b/mindspore/ccsrc/optimizer/irpass/ref_eliminate.h
@ -0,0 +1,93 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
+#define MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
+
+#include <memory>
+
+#include "ir/pattern_matcher.h"
+#include "optimizer/irpass.h"
+#include "optimizer/optimizer.h"
+
+namespace mindspore {
+namespace opt {
+namespace irpass {
+// {prim::kPrimMakeRef, X, Y, Z} -> Y
+class MakeRefEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x, y, z;
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimMakeRef, x, y, z), y);
+    return nullptr;
+  }
+};
+
+// {prim::kPrimGetRefValue, Parameter} -> Parameter
+// {prim::kPrimGetRefOrigin, Parameter} -> Parameter
+class GetRefParamEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x;
+    MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefValue, x), x, x.CheckFunc(IsParam, node));
+    MATCH_REPLACE_IF(node, PPrimitive(prim::kPrimGetRefOrigin, x), x, x.CheckFunc(IsParam, node));
+    return nullptr;
+  }
+};
+
+// {prim::kPrimGetRefKey, {prim::kPrimMakeRef, X, Y, Z}} -> X
+// {prim::kPrimGetRefValue, {prim::kPrimMakeRef, X, Y, Z}} -> Y
+// {prim::kPrimGetRefOrigin, {prim::kPrimMakeRef, X, Y, Z}} -> Z
+class GetMakeRefEliminater : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override {
+    PatternNode<AnfNodePtr> x, y, z;
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefKey, PPrimitive(prim::kPrimMakeRef, x, y, z)), x);
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefValue, PPrimitive(prim::kPrimMakeRef, x, y, z)), y);
+    MATCH_REPLACE(node, PPrimitive(prim::kPrimGetRefOrigin, PPrimitive(prim::kPrimMakeRef, x, y, z)), z);
+    return nullptr;
+  }
+};
+
+// IsValueNode<RefKey>
+class ReplaceRefkeyByParam : public OptimizerCaller {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
+    auto RefKeyLambda = [&node, &optimizer]() -> AnfNodePtr {
+      auto refkey = GetValueNode<RefKeyPtr>(node);
+      auto resource = std::dynamic_pointer_cast<pipeline::Resource>(optimizer->resource());
+      MS_EXCEPTION_IF_NULL(resource);
+
+      auto top_graph = resource->func_graph();
+      MS_EXCEPTION_IF_NULL(top_graph);
+
+      for (const auto &tnode : top_graph->parameters()) {
+        auto para = tnode->cast<ParameterPtr>();
+        if (para != nullptr && para->name() == refkey->tag()) {
+          return para;
+        }
+      }
+      return nullptr;
+    };
+    PatternNode<AnfNodePtr> x;
+    MATCH_REPLACE_LAMBDA_IF(node, x, RefKeyLambda, x.CheckFunc(IsValueNode<RefKey>, node));
+    return nullptr;
+  }
+};
+}  // namespace irpass
+}  // namespace opt
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_OPTIMIZER_IRPASS_REF_ELIMINATE_H_
--- a/mindspore/ccsrc/parallel/graph_util/generate_graph.cc
+++ b/mindspore/ccsrc/parallel/graph_util/generate_graph.cc
@ -0,0 +1,175 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "parallel/graph_util/generate_graph.h"
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+
+using mindspore::tensor::Tensor;
+
+namespace mindspore {
+namespace parallel {
+std::string GetOpPythonPath(const OperatorName &op_name) {
+  // almost all ops are defined in two main paths
+  const std::string ops_module = OP_PATH;
+  const std::string inner_ops_module = INNER_OP_PATH;
+  py::module mod = py::module::import(common::SafeCStr(ops_module));
+  py::module inner_mod = py::module::import(common::SafeCStr(inner_ops_module));
+  if (!py::hasattr(inner_mod, common::SafeCStr(op_name))) {
+    if (!py::hasattr(mod, common::SafeCStr(op_name))) {
+      MS_LOG(EXCEPTION) << ops_module << " or " << inner_ops_module << " don't have op:" << op_name;
+    }
+    return ops_module;
+  }
+  return inner_ops_module;
+}
+
+ValuePtr CreatOpInstance(const OperatorAttrs &attrs, const OperatorName &op_name, const std::string &instance_name) {
+  std::string op_path = GetOpPythonPath(op_name);
+  py::module mod = py::module::import(common::SafeCStr(op_path));
+  if (!py::hasattr(mod, common::SafeCStr(op_name))) {
+    MS_LOG(ERROR) << "Failure: op_path:" << op_path << " don't have attr " << op_name;
+    return nullptr;
+  }
+  std::vector<py::object> arg_list;
+  (void)std::transform(attrs.begin(), attrs.end(), std::back_inserter(arg_list),
+                       [](const Attr &attr) { return ValuePtrToPyData(attr.second); });
+  py::object obj =
+    parse::python_adapter::CallPyFn(GET_OP_FUNCTION_PATH, GET_OP_FUNCTION, op_name, op_path, instance_name, arg_list);
+  ValuePtr op_instance = nullptr;
+  bool succ = parse::ConvertData(obj, &op_instance);
+  if (!succ) {
+    MS_LOG(ERROR) << "Failure:get Python op " << op_path << " from " << op_name << " fail";
+    return nullptr;
+  }
+  return op_instance;
+}
+
+AnfNodePtr ValuePtrToAnfNodePtr(const ValuePtr &value_ptr) {
+  auto value_node = NewValueNode(value_ptr);
+  MS_EXCEPTION_IF_NULL(value_node);
+  return value_node->cast<AnfNodePtr>();
+}
+
+static std::unordered_map<int32_t, AnfNodePtr> int_tensor_map = {};
+AnfNodePtr CreateInt32Tensor(int32_t value) {
+  auto it = int_tensor_map.find(value);
+  if (it != int_tensor_map.end()) {
+    return it->second;
+  }
+  mindspore::tensor::TensorPtr tensor_ptr = std::make_shared<tensor::Tensor>(py::int_(value), kInt32);
+  ValuePtr value_ptr = MakeValue(tensor_ptr);
+  auto anf_node_ptr = ValuePtrToAnfNodePtr(value_ptr);
+  int_tensor_map[value] = anf_node_ptr;
+  return anf_node_ptr;
+}
+
+AnfNodePtr CreatTypeInt(int32_t value) {
+  ValuePtr value_ptr = MakeValue(std::make_shared<Int>(value));
+  return ValuePtrToAnfNodePtr(value_ptr);
+}
+
+AnfNodePtr CreatInt32Imm(int32_t value) {
+  ValuePtr value_ptr = MakeValue(std::make_shared<Int32Imm>(value));
+  return ValuePtrToAnfNodePtr(value_ptr);
+}
+
+std::string GetInstanceNameByCNode(const CNodePtr &cnode) {
+  PrimitivePtr prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (!prim) {
+    MS_LOG(EXCEPTION) << "The first input of the cnode is not a PrimitivePtr.";
+  }
+  std::string instance_name = prim->instance_name();
+  return HashInstanceName(instance_name);
+}
+
+std::string HashInstanceName(const std::string &name) {
+  auto using_hash_name = common::GetEnv(USING_HASH_NAME);
+  std::string instance_name;
+  if ((using_hash_name.empty()) || (using_hash_name == "on")) {
+    instance_name = HashName(name);
+  } else {
+    instance_name = name;
+  }
+  return instance_name;
+}
+
+Status GenerateGraph::Init(const CNodePtr &cnode) {
+  if (!cnode) {
+    MS_LOG(ERROR) << "Init:cnode is nullptr";
+    return FAILED;
+  }
+  cnode_ = cnode;
+  func_graph_ = cnode->func_graph();
+  if (!func_graph_) {
+    MS_LOG(ERROR) << "Init:func_graph_ is nullptr";
+    return FAILED;
+  }
+  manager_ = func_graph_->manager();
+  if (!manager_) {
+    MS_LOG(ERROR) << "Init:manager_ is nullptr";
+    return FAILED;
+  }
+  scope_ = cnode_->scope();
+  if (!scope_) {
+    MS_LOG(ERROR) << "Init:scope_ is nullptr";
+    return FAILED;
+  }
+  virtual_input_node_ = std::make_shared<AnfNode>(nullptr);
+  virtual_input_node_->set_scope(scope_);
+  instance_name_base_ = GetInstanceNameByCNode(cnode_);
+  name_idx_ = 0;
+  return SUCCESS;
+}
+
+AnfNodePtr GenerateGraph::PushBack(const std::vector<AnfNodePtr> &inputs) {
+  CNodePtr cnode = func_graph_->NewCNode(inputs);  // using NewCNode to creat anfnode
+  MS_EXCEPTION_IF_NULL(cnode);
+  cnode->set_scope(scope_);
+  if (inputs.size() < 2) {
+    MS_LOG(EXCEPTION) << "inputs.size() must be more than 1";
+  }
+  (void)manager_->Replace(inputs.at(1), cnode);  // using Replace function to insert cnode after inputs[0]
+  auto new_anf_node_ptr = cnode->cast<AnfNodePtr>();
+  MS_EXCEPTION_IF_NULL(new_anf_node_ptr);
+  return new_anf_node_ptr;
+}
+
+AnfNodePtr GenerateGraph::NewOpInst(const OperatorName &op_name, const OperatorAttrs &attrs) {
+  name_idx_++;
+  ValuePtr pyop_instance = CreatOpInstance(attrs, op_name, instance_name_base_ + op_name + std::to_string(name_idx_));
+  if (pyop_instance == nullptr) {
+    MS_LOG(EXCEPTION) << "Failure:" << op_name << " CreatOpInstance failed";
+  }
+  auto value_node = NewValueNode(pyop_instance);
+  return value_node->cast<AnfNodePtr>();
+}
+
+AnfNodePtr GenerateGraph::NewOpInst(const OperatorName &op_name) {
+  name_idx_++;
+  OperatorAttrs attrs;
+  ValuePtr pyop_instance = CreatOpInstance(attrs, op_name, instance_name_base_ + std::to_string(name_idx_));
+  if (pyop_instance == nullptr) {
+    MS_LOG(EXCEPTION) << "Failure:" << op_name << " CreatOpInstance failed";
+  }
+  auto value_node = NewValueNode(pyop_instance);
+  return value_node->cast<AnfNodePtr>();
+}
+}  // namespace parallel
+}  // namespace mindspore
--- a/mindspore/ccsrc/transform/op_declare.cc
+++ b/mindspore/ccsrc/transform/op_declare.cc
--- a/mindspore/ccsrc/utils/context/ms_context.cc
+++ b/mindspore/ccsrc/utils/context/ms_context.cc
@ -192,21 +192,18 @@ bool MsContext::OpenTsd() {
  }

  MS_LOG(INFO) << "Device id = " << device_id << ", rank size = " << rank_size << ".";
-
-  TDT_StatusT status = tdt::TsdClient::GetInstance()->Open(device_id, rank_size);
-  if (status != TDT_OK) {
-    MS_LOG(EXCEPTION) << "Device " << device_id << " is occupied, open tsd failed, status = " << status << ".";
-    return false;
-  }
-  tsd_ref_++;
-#ifdef ENABLE_TDTQUE
  int32_t initStatus = tdt::TdtHostInit(device_id);
  if (initStatus != TDT_OK_CODE) {
    MS_LOG(EXCEPTION) << "Init tsd failed, status = " << initStatus << ".";
    return false;
  }
  tdt_print_ = std::thread(TensorPrint());
-#endif
+  TDT_StatusT status = tdt::TsdClient::GetInstance()->Open(device_id, rank_size);
+  if (status != TDT_OK) {
+    MS_LOG(EXCEPTION) << "Device " << device_id << " is occupied, open tsd failed, status = " << status << ".";
+    return false;
+  }
+  tsd_ref_++;
  MS_LOG(INFO) << "Open and init tsd successful, tsd reference = " << tsd_ref_ << ".";
  return true;
 }
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@ -173,6 +173,9 @@ constexpr auto kSparseApplyProximalAdagradOpName = "SparseApplyProximalAdagrad";
 constexpr auto kSparseApplyRMSPropOpName = "SparseApplyRMSProp";
 constexpr auto kSparseApplyAdadeltaOpName = "SparseApplyAdadelta";
 constexpr auto kApplyAdamWithAmsgradOpName = "ApplyAdamWithAmsgrad";
+constexpr auto kTensorMoveOpName = "TensorMove";
+constexpr auto kTensorScatterUpdateOpName = "TensorScatterUpdate";
+constexpr auto kScatterNdUpdateOpName = "ScatterNdUpdate";
 constexpr auto kPushOpName = "Push";
 constexpr auto kPullOpName = "Pull";
 constexpr auto kEmbeddingLookupOpName = "EmbeddingLookup";
@ -236,6 +239,8 @@ constexpr auto kAttrNumSplit = "num_split";
 constexpr auto kAttrOutputNum = "output_num";
 constexpr auto kAttrSizeSplits = "size_splits";
 constexpr auto kAttrOutputDefault = "output_default";
+constexpr auto kAttrPrimitiveTarget = "primitive_target";
+constexpr auto kAttrUseLocking = "use_locking";
 constexpr auto kAttrReduceScatterFlag = "reduce_scatter_flag";
 constexpr auto kAttrOffset = "offset";
 constexpr auto kAttrPsKey = "ps_key";
--- a/mindspore/nn/layer/conv.py
+++ b/mindspore/nn/layer/conv.py
--- a/mindspore/nn/layer/pooling.py
+++ b/mindspore/nn/layer/pooling.py
@ -283,6 +283,7 @@ class AvgPool1d(_PoolNd):
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.slice = P.Slice()
        self.expand = P.ExpandDims()
+        self.squeeze = P.Squeeze(2)

    def construct(self, x):
        _shape_check(self.shape(x))
@ -295,4 +296,5 @@ class AvgPool1d(_PoolNd):
        else:
            x = self.expand(x, 2)
            x = self.avg_pool(x)
+            x = self.squeeze(x)
        return x
--- a/mindspore/nn/optim/optimizer.py
+++ b/mindspore/nn/optim/optimizer.py
@ -393,7 +393,6 @@ class Optimizer(Cell):
                    current_dynamic_lr = self.gather(self.learning_rate[i], self.global_step, 0)
                    lr += (current_dynamic_lr,)
                F.control_depend(lr, self.assignadd(self.global_step, 1))
-
        else:
            lr = self.learning_rate
            if self.dynamic_lr:
--- a/mindspore/ops/_grad/init.py
+++ b/mindspore/ops/_grad/init.py
@ -15,7 +15,7 @@

 """grad impl."""
 from . import grad_array_ops, grad_comm_ops, grad_debug_ops, grad_implementations, \
-               grad_math_ops, grad_nn_ops, grad_other_ops, grad_quant_ops
+               grad_inner_ops, grad_math_ops, grad_nn_ops, grad_other_ops, grad_quant_ops
 from .grad_base import get_bprop_fn

 __all__ = ['get_bprop_fn']
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@ -211,6 +211,25 @@ def get_bprop_embedding_lookup(self):
    return bprop_sparse


+@bprop_getters.register(P.EmbeddingLookup)
+def get_bprop_embedding_look_up(self):
+    """Generate bprop for EmbeddingLookup"""
+    sub_op = P.Sub()
+    reshape_op = P.Reshape()
+    def bprop(x, indices, offset, out, dout):
+        x_shp = shape_op(x)
+        new_indices = sub_op(indices, offset)
+        # Reshape the 'new_indices'
+        new_indices_shape_changed = (size_op(new_indices),)
+        new_indices = reshape_op(new_indices, new_indices_shape_changed)
+        actual_dout_shape_changed = new_indices_shape_changed
+        if len(x_shp) > 1:
+            actual_dout_shape_changed += x_shp[1:]
+        actual_dout = reshape_op(dout, actual_dout_shape_changed)
+        return (new_indices, actual_dout, x_shp), zeros_like(indices), zeros_like(offset)
+    return bprop
+
+
@bprop_getters.register(P.Transpose)
 def get_bprop_transpose(self):
    """Generate bprop for Transpose"""
--- a/mindspore/ops/_grad/grad_inner_ops.py
+++ b/mindspore/ops/_grad/grad_inner_ops.py
@ -0,0 +1,39 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""array_ops"""
+
+from .. import operations as P
+from ..operations import _grad_ops as G
+from ..operations import _inner_ops as inner
+from ..composite.multitype_ops.zeros_like_impl import zeros_like
+from .grad_base import bprop_getters
+
+
+@bprop_getters.register(inner.StridedSliceAICPU)
+def get_bprop_strided_slice_aicpu(self):
+    """Generate bprop for StridedSlice"""
+    shape_op = P.Shape()
+    input_grad = G.StridedSliceGradAICPU(self.begin_mask,
+                                         self.end_mask,
+                                         self.ellipsis_mask,
+                                         self.new_axis_mask,
+                                         self.shrink_axis_mask)
+
+    def bprop(x, begin, end, strides, out, dout):
+        dx = input_grad(dout, shape_op(x), begin, end, strides)
+        return dx, zeros_like(begin), zeros_like(end), zeros_like(strides)
+
+    return bprop
--- a/mindspore/ops/_grad/grad_nn_ops.py
+++ b/mindspore/ops/_grad/grad_nn_ops.py
@ -673,7 +673,7 @@ def get_bprop_mirror_pad(self):
    mirror_pad_grad = G.MirrorPadGrad(self.mode)

    def bprop(x, paddings, out, dout):
-        dx = mirror_pad_grad(dout, paddings, x)
+        dx = mirror_pad_grad(dout, paddings)
        return (dx, zeros_like(paddings))

    return bprop
--- a/mindspore/ops/_op_impl/aicpu/init.py
+++ b/mindspore/ops/_op_impl/aicpu/init.py
@ -14,6 +14,7 @@

 """aicpu ops"""
 from .init_data_set_queue import _init_data_set_queue_aicpu
+from .embedding_lookup import _embedding_lookup_aicpu
 from .dropout_genmask import _dropout_genmask_aicpu
 from .get_next import _get_next_aicpu
 from .print_tensor import _print_aicpu
@ -25,10 +26,20 @@ from .squeeze import _squeeze_aicpu
 from .expand_dims import _expand_dims_aicpu
 from .random_choice_with_mask import _random_choice_with_mask_aicpu
 from .pack import _pack_aicpu
-from .normal import _normal_aicpu
 from .ctcloss import _ctcloss_aicpu
 from .reverse_sequence import _reverse_sequence_aicpu
 from .crop_and_resize import _crop_and_resize_aicpu
-from .end_of_sequence import _end_of_sequence_aicpu
 from .rnnt_loss import _rnnt_loss_aicpu
 from .random_categorical import _random_categorical_aicpu
+from .cast import _cast_aicpu
+from .mirror_pad import _mirror_pad_aicpu
+from .mirror_pad_grad import _mirror_pad_grad_aicpu
+from .standard_normal import _standard_normal_aicpu
+from .gamma import _gamma_aicpu
+from .poisson import _poisson_aicpu
+from .uniform_int import _uniform_int_aicpu
+from .uniform_real import _uniform_real_aicpu
+from .laplace import _laplace_aicpu
+from .strided_slice import _strided_slice_aicpu
+from .strided_slice_grad import _strided_slice_grad_aicpu
+from .end_of_sequence import _end_of_sequence_aicpu
--- a/Show More
+++ b/Show More