!1450 Reduce weight memory usage & Remove redundant memcpy

From: @xchu42 Reviewed-by: @wqtshg,@ji_chen Signed-off-by: @ji_chen
4 years ago · b964b15ee4
parent 20d014daee dd64c8d549
commit b964b15ee4
6 changed files with 36 additions and 51 deletions
--- a/ge/graph/build/graph_builder.cc
+++ b/ge/graph/build/graph_builder.cc
@ -413,43 +413,6 @@ static Status InsertMemcpyNode(const ComputeGraphPtr &graph, const OutDataAnchor
  return SUCCESS;
 }

-static Status GenerateTaskForConstant(const std::shared_ptr<ComputeGraph> &graph) {
-  if (graph->GetGraphUnknownFlag()) {
-    GELOGI("Graph %s is unknown graph, ignore gen_task for constant.", graph->GetName().c_str());
-    return SUCCESS;
-  }
-  for (auto &node : graph->GetDirectNode()) {
-    // CONSTANT not generate task, so insert IDENTITY between CONSTANT and NETOUTPUT
-    auto op_desc = node->GetOpDesc();
-    if (op_desc == nullptr) {
-      continue;
-    }
-    auto op_type = op_desc->GetType();
-    if (op_type == NETOUTPUT) {
-      for (InDataAnchorPtr &in_data_anchor : node->GetAllInDataAnchors()) {
-        const OutDataAnchorPtr &peer_out_anchor = in_data_anchor->GetPeerOutAnchor();
-        GE_IF_BOOL_EXEC(peer_out_anchor == nullptr, continue);
-        NodePtr in_node = peer_out_anchor->GetOwnerNode();
-        GE_CHECK_NOTNULL(in_node);
-
-        std::string in_node_op_type = in_node->GetType();
-        if (in_node_op_type == CONSTANT) {
-          GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str());
-          std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy";
-          if (InsertMemcpyNode(graph, peer_out_anchor, {in_data_anchor}, name) != SUCCESS) {
-            REPORT_CALL_ERROR("E19999", "Insert memcpy between %s and %s failed when GenerateTaskForConstant",
-                              in_node->GetName().c_str(), node->GetName().c_str());
-            GELOGE(FAILED, "Insert memcpy between %s and %s failed.",
-                   in_node->GetName().c_str(), node->GetName().c_str());
-            return FAILED;
-          }
-        }
-      }
-    }
-  }
-  return SUCCESS;
-}
-
 Status GraphBuilder::MarkFpBpProfilingTaskAttr(ComputeGraphPtr &com_graph) {
  bool original_unknown_shape_flag = com_graph->GetGraphUnknownFlag();
  com_graph->SetGraphUnknownFlag(false);
@ -534,9 +497,6 @@ Status GraphBuilder::BuildForDynamicShapeGraph(ComputeGraphPtr &comp_graph,
        !sub_graph->GetParentGraph()->GetGraphUnknownFlag()) {
      continue;
    }
-
-    GE_CHK_STATUS_RET(GenerateTaskForConstant(sub_graph), "Generate task For constant node in subgraph failed.");
-
    if (sub_graph->GetGraphUnknownFlag()) {
      // unknown shape build flow
      GE_CHK_STATUS_RET(BuildForUnknownShapeGraph(sub_graph, ge_model_ptr, session_id),
--- a/ge/hybrid/model/hybrid_model.cc
+++ b/ge/hybrid/model/hybrid_model.cc
@ -368,5 +368,14 @@ void *HybridModel::GetGlobalStep() const {
  }
  return global_step_->GetData();
 }
+
+TensorBuffer *HybridModel::GetModelWeight(const string &subgraph_name) const {
+  auto it = weight_buffer_map_.find(subgraph_name);
+  if (it == weight_buffer_map_.end()) {
+    GELOGD("Model weight not found, subgraph name = %s", subgraph_name.c_str());
+    return nullptr;
+  }
+  return it->second.get();
+}
 }  // namespace hybrid
 }  // namespace ge
--- a/ge/hybrid/model/hybrid_model.h
+++ b/ge/hybrid/model/hybrid_model.h
@ -93,6 +93,8 @@ class HybridModel {

  TensorValue* GetTensor(const NodePtr &node) const;

+  TensorBuffer* GetModelWeight(const std::string &subgraph_name) const;
+
  const std::map<int64_t, std::vector<std::pair<int, Tensor>>> &GetHostTensors() const;

  const std::vector<domi::TaskDef>* GetTaskDefs(const NodePtr &node) const;
@ -159,7 +161,6 @@ class HybridModel {
  uint32_t device_id_ = 0;
  uint32_t model_id_ = 0;
  uint8_t *var_mem_base_ = nullptr;
-  std::unique_ptr<TensorBuffer> weight_buffer_;
  std::map<string, std::unique_ptr<TensorBuffer>> weight_buffer_map_;
  RuntimeParam root_runtime_param_;
  string om_name_;
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.cc
@ -118,7 +118,7 @@ Status KnownNodeTask::Init(TaskContext &context) {
  return SUCCESS;
 }

-Status KnownNodeTask::InitDavinciModel(const HybridModel &model) {
+Status KnownNodeTask::InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer) {
  GELOGD("[Init][DavinciModel] start");
  davinci_model_->InitRuntimeParams();
  GE_CHK_STATUS_RET(davinci_model_->InitVariableMem(), "init variable mem failed");
@ -133,13 +133,20 @@ Status KnownNodeTask::InitDavinciModel(const HybridModel &model) {
    davinci_model_->SetKnownShapeGlobalStep(global_step);
  }

-  GE_CHK_STATUS_RET(DoInitDavinciModel(), "[Init][Model] Failed to init davinci model.");
+  void *weight = nullptr;
+  size_t weight_size = 0;
+  if (weight_buffer != nullptr) {
+    weight = weight_buffer->GetData();
+    weight_size = weight_buffer->GetSize();
+  }
+  GELOGD("Start to init davinci model, weight size = %zu", weight_size);
+  GE_CHK_STATUS_RET(DoInitDavinciModel(weight, weight_size), "[Init][Model] Failed to init davinci model.");
  GELOGD("[Init][Model] success");
  return SUCCESS;
 }

-Status KnownNodeTask::DoInitDavinciModel() {
-  return davinci_model_->Init();
+Status KnownNodeTask::DoInitDavinciModel(void *weight, size_t weight_size) {
+  return davinci_model_->Init(nullptr, 0, weight, weight_size);
 }

 Status KnownNodeExecutor::PrepareTask(NodeTask &task, TaskContext &context) const {
@ -165,6 +172,10 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node
  const GeModelPtr ge_model = model.GetGeModel(node);
  GE_CHECK_NOTNULL(ge_model);

+  AscendString graph_name;
+  GE_CHK_GRAPH_STATUS_RET(ge_model->GetGraph().GetName(graph_name), "Failed to get graph name");
+  auto weight_buffer = model.GetModelWeight(graph_name.GetString());
+
  std::shared_ptr<DavinciModel> davinci_model = MakeShared<DavinciModel>(0, nullptr);
  GE_CHECK_NOTNULL(davinci_model);

@ -181,7 +192,7 @@ Status KnownNodeExecutor::LoadTask(const HybridModel &model, const NodePtr &node

  auto known_node_task = MakeShared<KnownNodeTask>(davinci_model);
  GE_CHECK_NOTNULL(known_node_task);
-  GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model));
+  GE_CHK_STATUS_RET_NOLOG(known_node_task->InitDavinciModel(model, weight_buffer));
  GELOGI("[%s] KnownNodeExecutor::LoadTask success.", node->GetName().c_str());
  task = std::move(known_node_task);
  return SUCCESS;
--- a/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
+++ b/ge/hybrid/node_executor/compiledsubgraph/known_node_executor.h
@ -36,10 +36,10 @@ class KnownNodeTask : public NodeTask {
  Status UpdateArgs(TaskContext &context) override;
  Status ExecuteAsync(TaskContext &context, std::function<void()> done_callback) override;
  Status Init(TaskContext &context) override;
-  Status InitDavinciModel(const HybridModel &model);
+  Status InitDavinciModel(const HybridModel &model, TensorBuffer *weight_buffer);

 protected:
-  virtual Status DoInitDavinciModel();
+  virtual Status DoInitDavinciModel(void *weight, size_t weight_size);
 private:
  std::shared_ptr<DavinciModel> davinci_model_ = nullptr;
 };
--- a/tests/ut/ge/hybrid/known_node_executor_unittest.cc
+++ b/tests/ut/ge/hybrid/known_node_executor_unittest.cc
@ -43,7 +43,7 @@ class KnownNodeTaskMock : public KnownNodeTask {
 public:
  KnownNodeTaskMock(std::shared_ptr<DavinciModel> davinci_model): KnownNodeTask(davinci_model) {};
  ~KnownNodeTaskMock() override = default;
-  MOCK_METHOD0(DoInitDavinciModel, Status());
+  MOCK_METHOD2(DoInitDavinciModel, Status(void *, size_t));
 };
 }

@ -62,6 +62,10 @@ TEST_F(UnknownNodeExecutorTest, test_init_davinci_model) {
  DumpProperties dump_properties;
  dump_properties.enable_dump_ = "1";
  DumpManager::GetInstance().AddDumpProperties(model.GetSessionId(), dump_properties);
-  EXPECT_CALL(mock, DoInitDavinciModel).WillOnce(::testing::Return(SUCCESS));
-  ASSERT_EQ(mock.InitDavinciModel(model), SUCCESS);
+  EXPECT_CALL(mock, DoInitDavinciModel).WillRepeatedly(::testing::Return(SUCCESS));
+  ASSERT_EQ(mock.InitDavinciModel(model, model.GetModelWeight("subgraph")), SUCCESS);
+
+  int32_t buffer[8];
+  model.weight_buffer_map_.emplace("subgraph", TensorBuffer::Create(buffer, sizeof(buffer)));
+  ASSERT_EQ(mock.InitDavinciModel(model, model.GetModelWeight("subgraph")), SUCCESS);
 }