Performance optimization

pull/1347/head
TangQunzhang 4 years ago
parent 96eaa5364d
commit d983cef480

@ -430,17 +430,14 @@ void SetLastUsedInputMemAttr(NodePtr &node, int input_index) {
}
auto node_op_desc = node->GetOpDesc();
if (node_op_desc != nullptr) {
auto input_desc = node_op_desc->GetInputDesc(input_index);
if (!ge::AttrUtils::SetInt(input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) {
auto input_desc = node_op_desc->MutableInputDesc(input_index);
if (!ge::AttrUtils::SetInt(*input_desc, ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, true)) {
GELOGW("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true failed.", node_op_desc->GetName().c_str(),
input_index);
return;
}
GELOGD("Set %s input[%d] ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE to true success.", node_op_desc->GetName().c_str(),
input_index);
if (node_op_desc->UpdateInputDesc(input_index, input_desc) != GRAPH_SUCCESS) {
GELOGW("Update %s input[%d] desc failed.", node_op_desc->GetName().c_str(), input_index);
}
}
}
@ -593,9 +590,9 @@ void BlockMemAssigner::GetOutAndWorkSpaceMem(vector<int64_t> &all_memory_size) {
}
for (auto &out_anchor : n->GetAllOutDataAnchors()) {
GeTensorDesc output_desc = node_op_desc->GetOutputDesc(out_anchor->GetIdx());
auto output_desc = node_op_desc->GetOutputDescPtr(out_anchor->GetIdx());
int64_t size = 0;
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(output_desc, size) != SUCCESS, GELOGI("Get size failed"));
GE_IF_BOOL_EXEC(ge::TensorUtils::GetSize(*output_desc, size) != SUCCESS, GELOGI("Get size failed"));
GE_IF_BOOL_EXEC(size < 0,
GELOGE(FAILED, "[Check][TensorSize]tensor_size:%ld is invalid, "
"maybe it is unknown shape node, Node_name:%s",

@ -197,8 +197,7 @@ void ModelBuilder::SetInputIsConst(const ge::NodePtr &n) {
}
}
std::string input_const_info = ToString(is_input_const);
GELOGD("update opdesc:%s InputConst:%s", node_op_desc->GetName().c_str(), input_const_info.c_str());
GELOGD("update opdesc:%s InputConst:%s", node_op_desc->GetName().c_str(), ToString(is_input_const).c_str());
node_op_desc->SetIsInputConst(is_input_const);
}

@ -3683,33 +3683,34 @@ Status DavinciModel::NnExecute(rtStream_t stream, bool async_mode, const InputDa
GE_CHK_STATUS_RET(InitModelStream(stream), "Init model stream failed.");
is_dynamic_ = input_data.is_dynamic_batch;
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_START));
bool profiling_model_execute_on = ProfilingManager::Instance().ProfilingModelExecuteOn();
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_START));
Status ret = CopyModelData(input_data, output_data, is_dynamic_);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ret, "Copy input data to model failed. model id: %u",
model_id_);
GELOGD("current_data.index=%u", input_data.index);
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_PRE_PROC_END));
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_PRE_PROC_END));
if (!task_list_.empty()) {
GELOGD("rtModelExecute do");
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_START));
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_START));
rtError_t rt_ret = rtModelExecute(rt_model_handle_, rt_model_stream_, 0);
GE_CHK_RT_EXEC(rt_ret, return RT_ERROR_TO_GE_STATUS(rt_ret));
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_INFER_END));
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_INFER_END));
GELOGD("rtModelExecute end");
}
if (!is_async_mode_) {
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_START));
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_START));
ret = CopyOutputData(input_data.index, output_data, RT_MEMCPY_DEVICE_TO_DEVICE);
GE_CHK_BOOL_TRUE_EXEC_WITH_LOG(ret != SUCCESS, return ACL_ERROR_GE_INTERNAL_ERROR,
"Copy Output data to user failed.");
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), SetProfileTime(MODEL_AFTER_PROC_END));
GE_IF_BOOL_EXEC(profiling_model_execute_on, SetProfileTime(MODEL_AFTER_PROC_END));
}
// report model time data
GE_IF_BOOL_EXEC(ProfilingManager::Instance().ProfilingModelExecuteOn(), (void)SinkTimeProfile(input_data));
GE_IF_BOOL_EXEC(profiling_model_execute_on, (void)SinkTimeProfile(input_data));
GELOGD("Model run end, model id:%u", model_id_);
return SUCCESS;
}

@ -58,15 +58,15 @@ class ZeroCopyOffset {
uint32_t GetDataCount() const { return data_count_; }
uint32_t GetAddrCount() const { return addr_count_; }
// value of *data_info_ from davinci_model
std::vector<std::pair<int64_t, void *>> GetDataInfo() const { return data_info_; }
const std::vector<std::pair<int64_t, void *>> &GetDataInfo() const { return data_info_; }
// relative_offset from zero_copy_relative_offset_
std::vector<int64_t> GetRelativeOffset() const { return relative_offset_; }
const std::vector<int64_t> &GetRelativeOffset() const { return relative_offset_; }
// data_size of Data/Netoutput
int64_t GetDataSize() const { return data_size_; }
// value of *outside_addrs_ from davinci_model
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
// name of op
std::string GetOpName() const { return op_name_; }
const std::string &GetOpName() const { return op_name_; }
const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }
private:

@ -764,6 +764,7 @@ set(MULTI_PARTS_TEST_FILES
"common/ge_format_util_unittest.cc"
"graph/variable_accelerate_ctrl_unittest.cc"
"graph/build/logical_stream_allocator_unittest.cc"
"graph/build/model_builder_unittest.cc"
"graph/build/mem_assigner_unittest.cc"
"graph/preprocess/graph_preprocess_unittest.cc"
"graph/manager/hcom_util_unittest.cc"

@ -249,3 +249,17 @@ TEST_F(UtestMemoryAssignerTest, graph_memory_assign_continuous_input) {
EXPECT_EQ(addn1->GetOpDesc()->GetOutputOffset()[0], 500);
EXPECT_EQ(addn2->GetOpDesc()->GetOutputOffset()[0], 600);
}
TEST_F(UtestMemoryAssignerTest, graph_memory_set_last_used_attr) {
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeGraph(graph);
auto node_f = graph->FindNode("F");
MemoryAssigner memory_assigner(graph);
map<int64_t, size_t> mem_offset;
size_t zero_memory_size = 0;
EXPECT_EQ(memory_assigner.AssignMemory(false, mem_offset, zero_memory_size), GRAPH_SUCCESS);
int32_t flag = 0;
(void) ge::AttrUtils::GetInt(node_f->GetOpDesc()->GetInputDesc(0), ATTR_NAME_IS_END_OF_INPUTMEM_LIFECYCLE, flag);
EXPECT_EQ(flag, 1);
}

@ -0,0 +1,146 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <memory>
#include "graph/anchor.h"
#include "graph/attr_value.h"
#include "graph/debug/ge_attr_define.h"
#include "graph/utils/graph_utils.h"
#include "graph/utils/node_utils.h"
#include "graph/utils/op_desc_utils.h"
#include "graph/utils/tensor_utils.h"
#include "omg/omg_inner_types.h"
#include "../passes/graph_builder_utils.h"
#define protected public
#define private public
#include "graph/build/model_builder.h"
#undef protected
#undef private
using namespace std;
using namespace testing;
using namespace ge;
using domi::GetContext;
class UtestModelBuilderTest : public testing::Test {
public:
ge::OpDescPtr CreateOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
auto desc_temp = *desc_temp_ptr;
TensorUtils::SetSize(desc_temp, 1024);
op_def->AddInputDesc(desc_temp);
op_def->AddOutputDesc(desc_temp);
std::vector<int64_t> workspace_bytes;
workspace_bytes.push_back(wsByte);
op_def->SetWorkspaceBytes(workspace_bytes);
return op_def;
}
ge::OpDescPtr CreateRefOpWithWsSize(const string &name, int64_t wsByte, const string &type = "some") {
ge::OpDescPtr op_def = make_shared<ge::OpDesc>(name, type);
auto desc_temp_ptr = make_shared<ge::GeTensorDesc>();
auto desc_temp = *desc_temp_ptr;
TensorUtils::SetSize(desc_temp, 1024);
op_def->AddInputDesc(desc_temp);
auto desc_output_ptr = make_shared<ge::GeTensorDesc>();
auto desc_output = *desc_output_ptr;
TensorUtils::SetSize(desc_output, 6500);
ge::TensorUtils::SetReuseInput(desc_output, true);
ge::TensorUtils::SetReuseInputIndex(desc_output, 0);
op_def->AddOutputDesc(desc_output);
std::vector<int64_t> workspace_bytes;
workspace_bytes.push_back(wsByte);
op_def->SetWorkspaceBytes(workspace_bytes);
return op_def;
}
void MakeGraph(ge::ComputeGraphPtr &graph) {
ge::OpDescPtr op_def_a = CreateOpWithWsSize("A", 6000);
op_def_a->SetStreamId(0);
ge::OpDescPtr op_def_b = CreateOpWithWsSize("B", 120000);
op_def_b->SetStreamId(0);
ge::OpDescPtr op_def_c = CreateOpWithWsSize("C", 16000);
op_def_c->SetStreamId(1);
ge::OpDescPtr op_def_d = CreateOpWithWsSize("D", 24000);
op_def_d->SetStreamId(2);
ge::OpDescPtr op_def_e = CreateOpWithWsSize("E", 24000);
op_def_e->SetStreamId(3);
ge::OpDescPtr op_def_f = CreateOpWithWsSize("F", 30000);
op_def_f->SetStreamId(2);
ge::OpDescPtr op_def_g = CreateOpWithWsSize("G", 32000);
op_def_g->SetStreamId(3);
ge::OpDescPtr op_def_h = CreateOpWithWsSize("H", 48000);
op_def_h->SetStreamId(2);
ge::OpDescPtr op_def_i = CreateOpWithWsSize("I", 60000);
op_def_i->SetStreamId(2);
ge::OpDescPtr op_def_j = CreateOpWithWsSize("J", 256000, NETOUTPUT);
op_def_j->SetStreamId(3);
// add node
ge::NodePtr node_a = graph->AddNode(op_def_a);
ge::NodePtr node_b = graph->AddNode(op_def_b);
ge::NodePtr node_c = graph->AddNode(op_def_c);
ge::NodePtr node_d = graph->AddNode(op_def_d);
ge::NodePtr node_e = graph->AddNode(op_def_e);
ge::NodePtr node_f = graph->AddNode(op_def_f);
ge::NodePtr node_g = graph->AddNode(op_def_g);
ge::NodePtr node_h = graph->AddNode(op_def_h);
ge::NodePtr node_i = graph->AddNode(op_def_i);
ge::NodePtr node_j = graph->AddNode(op_def_j);
// add edge
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_b->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_a->GetOutDataAnchor(0), node_c->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_d->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_b->GetOutDataAnchor(0), node_e->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_c->GetOutDataAnchor(0), node_g->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_d->GetOutDataAnchor(0), node_f->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_e->GetOutDataAnchor(0), node_g->GetInDataAnchor(1));
ge::GraphUtils::AddEdge(node_f->GetOutDataAnchor(0), node_h->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_g->GetOutDataAnchor(0), node_j->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_h->GetOutDataAnchor(0), node_i->GetInDataAnchor(0));
ge::GraphUtils::AddEdge(node_i->GetOutDataAnchor(0), node_j->GetInDataAnchor(1));
GetContext().out_nodes_map["H"] = {0};
GetContext().out_nodes_map["I"] = {0};
GetContext().out_nodes_map["J"] = {0};
graph->TopologicalSorting();
}
protected:
void SetUp() {}
void TearDown() { GetContext().out_nodes_map.clear(); }
};
// when check GetMemoryRanges return fail, Assign return fail
TEST_F(UtestModelBuilderTest, SetInputIsConst) {
Graph2SubGraphInfoList subgraphs;
std::map<std::string, int> stream_max_parallel_num;
ge::ComputeGraphPtr graph = make_shared<ge::ComputeGraph>("");
MakeGraph(graph);
graph->TopologicalSorting();
ge::ModelBuilder builder(0, graph, subgraphs, stream_max_parallel_num, false);
EXPECT_EQ(builder.PreBuildModel(), SUCCESS);
}

@ -942,4 +942,52 @@ TEST_F(UtestDavinciModel, simple_test_gmock) {
EXPECT_EQ(mock_stub.func2(2, 5), 1023);
EXPECT_EQ(mock_stub.func2(3, 5), 1023);
}
TEST_F(UtestDavinciModel, NnExecute) {
DavinciModel model(0, nullptr);
ComputeGraphPtr graph = make_shared<ComputeGraph>("default");
ProfilingManager::Instance().is_load_profiling_ = true;
GeModelPtr ge_model = make_shared<GeModel>();
ge_model->SetGraph(GraphUtils::CreateGraphFromComputeGraph(graph));
AttrUtils::SetInt(ge_model, ATTR_MODEL_MEMORY_SIZE, 10240);
AttrUtils::SetInt(ge_model, ATTR_MODEL_STREAM_NUM, 1);
shared_ptr<domi::ModelTaskDef> model_task_def = make_shared<domi::ModelTaskDef>();
ge_model->SetModelTaskDef(model_task_def);
GeTensorDesc tensor(GeShape({1,4,128,128}), FORMAT_NCHW, DT_FLOAT);
TensorUtils::SetSize(tensor, 512);
{
OpDescPtr op_desc = CreateOpDesc("data", DATA);
op_desc->AddInputDesc(tensor);
op_desc->AddOutputDesc(tensor);
op_desc->SetInputOffset({1024});
op_desc->SetOutputOffset({1024});
NodePtr node = graph->AddNode(op_desc); // op_index = 0
}
{
OpDescPtr op_desc = CreateOpDesc("output", NETOUTPUT);
op_desc->AddInputDesc(tensor);
op_desc->SetInputOffset({5120});
op_desc->SetSrcName( { "memcpy" } );
op_desc->SetSrcIndex( { 0 } );
NodePtr node = graph->AddNode(op_desc); // op_index = 3
}
EXPECT_EQ(model.Assign(ge_model), SUCCESS);
EXPECT_EQ(model.Init(), SUCCESS);
rtStream_t stream = nullptr;
InputData input_data;
OutputData output_data;
vector<OutputTensorInfo> outputs;
EXPECT_EQ(model.GenOutputTensorInfo(&output_data, outputs), SUCCESS);
EXPECT_EQ(output_data.blobs.size(), 1);
EXPECT_EQ(outputs.size(), 1);
input_data.blobs = output_data.blobs;
EXPECT_EQ(input_data.blobs.size(), 1);
EXPECT_EQ(model.NnExecute(stream, false, input_data, output_data), SUCCESS);
}
} // namespace ge

Loading…
Cancel
Save