!1050 fix mdc mbuf_list order error

From: @zhou_lili
Reviewed-by: @xchu42,@wangxiaotian22
Signed-off-by: @ji_chen
pull/1050/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit d2886c4604

@ -99,7 +99,7 @@ Status CpuTaskModelDequeue::Distribute() {
/// @param [in] outside_addrs: model input/output memory addr /// @param [in] outside_addrs: model input/output memory addr
/// @return: 0 for success / others for failed /// @return: 0 for success / others for failed
/// ///
Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs) { Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs) {
if ((args_ != nullptr) || (args_size_ > 0)) { if ((args_ != nullptr) || (args_size_ > 0)) {
GELOGE(FAILED, "Task already initialized, size: %u", args_size_); GELOGE(FAILED, "Task already initialized, size: %u", args_size_);
return FAILED; return FAILED;
@ -110,32 +110,22 @@ Status CpuTaskZeroCopy::Init(std::vector<uintptr_t> &mbuf_list, std::map<const v
GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_) GE_PRINT_DYNAMIC_MEMORY(rtMalloc, "args data.", args_size_)
AddrMapInfo addr_map_info; AddrMapInfo addr_map_info;
for (auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
}
}
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);
// init src_addrs/dst_addrs // init src_addrs/dst_addrs
size_t index = 0;
vector<uint64_t> src_addrs; vector<uint64_t> src_addrs;
vector<uint64_t> dst_addrs; vector<uint64_t> dst_addrs;
for (auto &addrs : outside_addrs) { for (const auto &addrs : outside_addrs) {
auto &addrs_mapping_list = addrs.second.GetOutsideAddrs(); const auto &addrs_mapping_list = addrs.second.GetOutsideAddrs();
GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs"); GE_CHK_BOOL_EXEC(!addrs_mapping_list.empty(), return PARAM_INVALID, "not set outside_addrs");
std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0]; std::map<const void *, std::vector<void *>> virtual_args_addrs = addrs_mapping_list[0];
for (const auto &virtual_args_addr : virtual_args_addrs) { for (const auto &virtual_args_addr : virtual_args_addrs) {
addr_map_info.addr_num += virtual_args_addr.second.size();
for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) { for (size_t i = 0; i < virtual_args_addr.second.size(); ++i) {
src_addrs.push_back(mbuf_list.at(index)); src_addrs.emplace_back(mbuf_list.at(addrs.first));
dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i)))); dst_addrs.push_back(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(virtual_args_addr.second.at(i))));
} }
} }
index++;
} }
GELOGI("addr_map_info.addr_num is %u", addr_map_info.addr_num);
// malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs // malloc mem for src_addrs/dst_addrs, and copy data of src_addrs/dst_addrs
GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM)); GE_CHK_RT_RET(rtMalloc(&src_addr_, src_addrs.size() * sizeof(uint64_t), RT_MEMORY_HBM));

@ -93,7 +93,7 @@ class CpuTaskZeroCopy : public CpuTaskInfo {
~CpuTaskZeroCopy() override; ~CpuTaskZeroCopy() override;
Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; } Status Init(const domi::TaskDef &task_def, DavinciModel *davinci_model) override { return SUCCESS; }
Status Init(std::vector<uintptr_t> &mbuf_list, std::map<const void *, ZeroCopyOffset> &outside_addrs); Status Init(std::vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);
Status Distribute() override; Status Distribute() override;
private: private:

File diff suppressed because it is too large Load Diff

@ -675,7 +675,7 @@ class DavinciModel {
/// @return Status /// @return Status
/// ///
Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index, Status InitDataOp(const ComputeGraphPtr &graph, const NodePtr &node, uint32_t &data_op_index,
map<uint32_t, OpDescPtr> &data_by_index); map<uint32_t, OpDescPtr> &data_by_index, set<const void *> &input_outside_addrs);
/// ///
/// @ingroup ge /// @ingroup ge
@ -694,7 +694,8 @@ class DavinciModel {
/// @param [in/out] vector<OpDescPtr>: All NetOutput node in model. /// @param [in/out] vector<OpDescPtr>: All NetOutput node in model.
/// @return Status /// @return Status
/// ///
Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list); Status InitNetOutput(const ComputeGraphPtr &graph, const NodePtr &node, vector<OpDescPtr> &output_op_list,
set<const void *> &output_outside_addrs);
/// ///
/// @ingroup ge /// @ingroup ge
@ -764,7 +765,7 @@ class DavinciModel {
/// ///
Status BindInputQueue(); Status BindInputQueue();
Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, map<const void *, ZeroCopyOffset> &outside_addrs); Status CpuTaskModelZeroCopy(vector<uintptr_t> &mbuf_list, const map<uint32_t, ZeroCopyOffset> &outside_addrs);
/// ///
/// @ingroup ge /// @ingroup ge
@ -897,10 +898,8 @@ class DavinciModel {
void *global_step_addr_{nullptr}; void *global_step_addr_{nullptr};
uint64_t global_step_size_{0}; uint64_t global_step_size_{0};
map<uint32_t, ZeroCopyOffset> new_input_data_info_; map<uint32_t, ZeroCopyOffset> input_data_info_;
map<uint32_t, ZeroCopyOffset> new_output_data_info_; map<uint32_t, ZeroCopyOffset> output_data_info_;
map<const void *, ZeroCopyOffset> new_input_outside_addrs_;
map<const void *, ZeroCopyOffset> new_output_outside_addrs_;
set<const void *> real_virtual_addrs_; set<const void *> real_virtual_addrs_;

@ -127,8 +127,8 @@ void ZeroCopyOffset::IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const
} }
} }
void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, void ZeroCopyOffset::SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag,
bool fusion_flag, std::set<const void *> &real_virtual_addrs) { set<const void *> &real_virtual_addrs) {
uint32_t out_count = 0; uint32_t out_count = 0;
if (!fusion_flag) { if (!fusion_flag) {
out_count++; out_count++;
@ -138,7 +138,6 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
real_virtual_addrs.insert(addr); real_virtual_addrs.insert(addr);
} else { } else {
GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr); GELOGI("[ZCPY] set l2-fusion for virtual_addr %p.", addr);
int64_t output_offset = output_offset_list.at(index);
for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) { for (size_t i = 0; i < zero_copy_basic_offset_.size(); ++i) {
if (zero_copy_basic_offset_.at(i) == output_offset) { if (zero_copy_basic_offset_.at(i) == output_offset) {
out_count++; out_count++;
@ -153,6 +152,7 @@ void ZeroCopyOffset::SetInputOutsideAddrs(const vector<int64_t> &output_offset_l
} }
} }
addr_count_ = out_count; addr_count_ = out_count;
valid_relative_offset_ = true;
} }
void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr, void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bool &fusion_flag, void *addr,
@ -181,9 +181,13 @@ void ZeroCopyOffset::SetOutputOutsideAddrs(const int64_t &input_offset, const bo
} }
} }
addr_count_ = out_count; addr_count_ = out_count;
valid_relative_offset_ = true;
} }
void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) { void ZeroCopyOffset::SetOutsideAddrsValue(ZeroCopyTask &zero_copy_task, void *outside_addr, void *args, size_t offset) {
if (!valid_relative_offset_) {
return;
}
const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr); const auto addr_val = reinterpret_cast<uintptr_t>(outside_addr);
for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) { for (uint32_t out_count = 0; out_count < GetAddrCount(); ++out_count) {
auto args_addrs = outside_addrs_[out_count].find(outside_addr); auto args_addrs = outside_addrs_[out_count].find(outside_addr);

@ -43,8 +43,7 @@ class ZeroCopyOffset {
~ZeroCopyOffset(); ~ZeroCopyOffset();
Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag); Status InitInputDataInfo(int64_t output_size, void *virtual_addr, const OpDescPtr &op_desc, bool &fusion_flag);
void SetInputOutsideAddrs(const vector<int64_t> &output_offset_list, void *addr, const size_t &index, void SetInputOutsideAddrs(int64_t output_offset, void *addr, bool fusion_flag, set<const void *> &real_virtual_addrs);
bool fusion_flag, std::set<const void *> &real_virtual_addrs);
void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag); void IsL2Fusion(const vector<int64_t> &fusion_basic_addrs, const int64_t &tensor_addr, bool &fusion_flag);
Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list, Status InitOutputDataInfo(const vector<int64_t> &input_size_list, const vector<void *> &virtual_addr_list,
@ -65,9 +64,10 @@ class ZeroCopyOffset {
// data_size of Data/Netoutput // data_size of Data/Netoutput
int64_t GetDataSize() const { return data_size_; } int64_t GetDataSize() const { return data_size_; }
// value of *outside_addrs_ from davinci_model // value of *outside_addrs_ from davinci_model
const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() { return outside_addrs_; } const std::vector<std::map<const void *, std::vector<void *>>> &GetOutsideAddrs() const { return outside_addrs_; }
// name of op // name of op
std::string GetOpName() const { return op_name_; } std::string GetOpName() const { return op_name_; }
const bool IsRelativeOffsetValid() const { return valid_relative_offset_; }
private: private:
void *basic_addr_ = nullptr; void *basic_addr_ = nullptr;
@ -81,6 +81,7 @@ class ZeroCopyOffset {
std::vector<int64_t> zero_copy_basic_offset_; std::vector<int64_t> zero_copy_basic_offset_;
std::vector<int64_t> zero_copy_relative_offset_; std::vector<int64_t> zero_copy_relative_offset_;
bool valid_relative_offset_ = false;
}; };
} // namespace ge } // namespace ge
#endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_ #endif // GE_GRAPH_LOAD_NEW_MODEL_MANAGER_ZERO_COPY_OFFSET_H_

@ -629,6 +629,7 @@ set(DISTINCT_GRAPH_LOAD_TEST_FILES
"graph/load/kernel_task_info_unittest.cc" "graph/load/kernel_task_info_unittest.cc"
"graph/load/memcpy_addr_async_task_info_unittest.cc" "graph/load/memcpy_addr_async_task_info_unittest.cc"
"graph/load/memcpy_async_task_info_unittest.cc" "graph/load/memcpy_async_task_info_unittest.cc"
"graph/load/cpu_queue_schedule_unittest.cc"
#"graph/graph_load_unittest.cc" #"graph/graph_load_unittest.cc"
"graph/ge_executor_unittest.cc" "graph/ge_executor_unittest.cc"
"graph/load/model_helper_unittest.cc" "graph/load/model_helper_unittest.cc"

@ -0,0 +1,70 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#define private public
#define protected public
#include "graph/load/model_manager/cpu_queue_schedule.h"
#undef private
#undef protected
using namespace std;
namespace ge {
class UtestCpuQueueSchedule : public testing::Test {
protected:
void SetUp() {}
void TearDown() {}
};
// test Init_CpuTaskZeroCopy_succ
TEST_F(UtestCpuQueueSchedule, CpuTaskZeroCopy_Init_Success) {
CpuTaskZeroCopy cpu_task_zero_copy(nullptr);
std::vector<uintptr_t> mbuf_list;
map<uint32_t, ZeroCopyOffset> outside_addrs;
ZeroCopyOffset addr_mapping;
addr_mapping.addr_count_ = 1;
std::vector<void *> addr_offset;
addr_offset.push_back((void*) 0x11110000);
uintptr_t addr = 0x12340000;
std::map<const void *, std::vector<void *>> outside_addr;
outside_addr[(void*)addr] = addr_offset;
addr_mapping.outside_addrs_.emplace_back(outside_addr);
mbuf_list.emplace_back(addr);
uint32_t index = 0;
outside_addrs[index] = addr_mapping;
EXPECT_EQ(cpu_task_zero_copy.Init(mbuf_list, outside_addrs), SUCCESS);
}
TEST_F(UtestCpuQueueSchedule, CpuTaskInfo_Init_args_valid) {
CpuTaskZeroCopy cpu_task_zero_copy(nullptr);
CpuTaskActiveEntry cpu_task_active_entry(nullptr);
CpuTaskModelDequeue cpu_task_model_dequeue(nullptr);
CpuTaskModelRepeat cpu_task_model_repeat(nullptr);
CpuTaskWaitEndGraph cpu_task_wait_end_graph(nullptr);
CpuTaskModelEnqueue cpu_task_model_enqueue(nullptr);
CpuTaskPrepareOutput cpu_task_prepare_output(nullptr);
EXPECT_EQ(cpu_task_zero_copy.Distribute(), FAILED);
EXPECT_EQ(cpu_task_active_entry.Distribute(), FAILED);
EXPECT_EQ(cpu_task_model_dequeue.Distribute(), FAILED);
EXPECT_EQ(cpu_task_model_repeat.Distribute(), FAILED);
EXPECT_EQ(cpu_task_wait_end_graph.Distribute(), FAILED);
EXPECT_EQ(cpu_task_model_enqueue.Distribute(), FAILED);
EXPECT_EQ(cpu_task_prepare_output.Distribute(), FAILED);
}
} // namespace ge

@ -169,7 +169,8 @@ TEST_F(UtestDavinciModel, init_data_op_subgraph) {
uint32_t data_op_index = 0; uint32_t data_op_index = 0;
map<uint32_t, OpDescPtr> data_by_index; map<uint32_t, OpDescPtr> data_by_index;
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index), SUCCESS); set<const void *> input_outside_addrs;
EXPECT_EQ(model.InitDataOp(nullptr, node, data_op_index, data_by_index, input_outside_addrs), SUCCESS);
EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0);
@ -194,7 +195,8 @@ TEST_F(UtestDavinciModel, init_netoutput_op_subgraph) {
NodePtr node = graph->AddNode(op_output); NodePtr node = graph->AddNode(op_output);
std::vector<OpDescPtr> output_op_list; std::vector<OpDescPtr> output_op_list;
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list), SUCCESS); set<const void *> output_outside_addrs;
EXPECT_EQ(model.InitNetOutput(nullptr, node, output_op_list, output_outside_addrs), SUCCESS);
EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0);
@ -800,7 +802,6 @@ TEST_F(UtestDavinciModel, label_task_success) {
label_task_def->set_op_index(op_index++); label_task_def->set_op_index(op_index++);
} }
{ {
OpDescPtr op_desc = CreateOpDesc("label_else", LABELSET); OpDescPtr op_desc = CreateOpDesc("label_else", LABELSET);
NodePtr node = graph->AddNode(op_desc); // op_index = 3 NodePtr node = graph->AddNode(op_desc); // op_index = 3
@ -813,7 +814,6 @@ TEST_F(UtestDavinciModel, label_task_success) {
label_task_def->set_op_index(op_index++); label_task_def->set_op_index(op_index++);
} }
{ {
OpDescPtr op_desc = CreateOpDesc("label_leave", LABELSET); OpDescPtr op_desc = CreateOpDesc("label_leave", LABELSET);
NodePtr node = graph->AddNode(op_desc); // op_index = 4 NodePtr node = graph->AddNode(op_desc); // op_index = 4
@ -826,13 +826,27 @@ TEST_F(UtestDavinciModel, label_task_success) {
label_task_def->set_op_index(op_index++); label_task_def->set_op_index(op_index++);
} }
EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_LABEL_NUM, 3)); EXPECT_TRUE(AttrUtils::SetInt(ge_model, ATTR_MODEL_LABEL_NUM, 3));
EXPECT_EQ(model.Assign(ge_model), SUCCESS); EXPECT_EQ(model.Assign(ge_model), SUCCESS);
EXPECT_EQ(model.Init(), SUCCESS); EXPECT_EQ(model.Init(), SUCCESS);
EXPECT_EQ(model.input_addrs_list_.size(), 0); EXPECT_EQ(model.input_addrs_list_.size(), 0);
EXPECT_EQ(model.output_addrs_list_.size(), 0); EXPECT_EQ(model.output_addrs_list_.size(), 0);
EXPECT_EQ(model.task_list_.size(), 5); EXPECT_EQ(model.task_list_.size(), 5);
} }
TEST_F(UtestDavinciModel, LoadWithQueue_fail_with_diff_args) {
DavinciModel model(0, nullptr);
model.ge_model_ = make_shared<GeModel>();
model.input_queue_ids_.emplace_back(0);
EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID);
EXPECT_EQ(model.input_data_info_.size(), 0);
ZeroCopyOffset zero_copy_offset;
model.input_data_info_[0] = zero_copy_offset;
model.output_queue_ids_.emplace_back(0);
EXPECT_EQ(model.LoadWithQueue(), ACL_ERROR_GE_EXEC_MODEL_QUEUE_ID_INVALID);
EXPECT_EQ(model.output_data_info_.size(), 0);
model.output_data_info_[0] = zero_copy_offset;
EXPECT_EQ(model.LoadWithQueue(), INTERNAL_ERROR);
EXPECT_EQ(model.active_stream_list_.size(), 0);
}
} // namespace ge } // namespace ge

Loading…
Cancel
Save