!264 synchronize latest Ascend software suite 10 Nov 2020

From: @nicholas_yhr
Reviewed-by: @youui,@liujunzhu
Signed-off-by: @liujunzhu
pull/264/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 9d5177a954

@ -285,6 +285,11 @@ namespace ir_option {
static const char *const INPUT_FORMAT = "input_format";
static const char *const INPUT_SHAPE = "input_shape";
static const char *const OP_NAME_MAP = "op_name_map";
static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input";
static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout";
static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout";
static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes";
static const char *const OUTPUT = "output";
static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize;
static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize;
static const char *const DYNAMIC_DIMS = kDynamicDims;
@ -317,6 +322,22 @@ const std::set<std::string> ir_builder_suppported_options = {
INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY,
AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES,
INPUT_FP16_NODES, LOG_LEVEL};
// for interface: aclgrphParse
const std::set<std::string> ir_parser_suppported_options = {INPUT_FORMAT,
INPUT_SHAPE,
OP_NAME_MAP,
IS_DYNAMIC_INPUT,
INPUT_FP16_NODES,
IS_INPUT_ADJUST_HW_LAYOUT,
IS_OUTPUT_ADJUST_HW_LAYOUT,
OUTPUT,
OUTPUT_TYPE,
OUT_NODES,
COMPRESS_WEIGHT_CONF,
ENABLE_SCOPE_FUSION_PASSES,
LOG_LEVEL};
// for interface: aclgrphBuildInitialize
const std::set<std::string> global_options = {CORE_TYPE,
SOC_VERSION,

@ -0,0 +1,28 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INC_GRAPH_COMMON_ERROR_CODES_H_
#define INC_GRAPH_COMMON_ERROR_CODES_H_
#include "external/graph/ge_error_codes.h"
namespace ge {
const graphStatus NO_DEPENDENCE_FUNC = 50331647;
const graphStatus NO_OVERLAP_DIM = 50331646;
const graphStatus NOT_SUPPORT_SLICE = 50331645;
} // namespace ge
#endif // INC_GRAPH_COMMON_ERROR_CODES_H_

@ -247,12 +247,16 @@ class ComputeGraph : public std::enable_shared_from_this<ComputeGraph>, public A
private:
graphStatus DFSTopologicalSorting(std::vector<NodePtr> &node_vec, std::map<NodePtr, uint32_t> &map_in_edge_num,
std::vector<NodePtr> &stack);
std::vector<NodePtr> &stack, bool reverse);
graphStatus BFSTopologicalSorting(std::vector<NodePtr> &node_vec, std::map<NodePtr, uint32_t> &map_in_edge_num,
std::deque<NodePtr> &stack);
graphStatus CollectBreadthOutNode(const NodePtr &node, std::map<NodePtr, uint32_t> &map_in_edge_num,
std::map<string, NodePtr> &breadth_node_map);
graphStatus TopologicalSortingGraph();
/// nodes like : (a) <--- (c) ---> (b)
/// node a and b have only one parent node c, and a is connected to c firstly
/// topo order of DFS is `c, b, a` with `dfs_reverse=false` as default
/// in same case, user could get `c, a, b` with `dfs_reverse=true`
graphStatus TopologicalSortingGraph(bool dfs_reverse = false);
graphStatus SortNodes(std::vector<NodePtr> &stack, std::map<NodePtr, uint32_t> &mapInEdgeNum);
Vistor<NodePtr> AllGraphNodes(std::vector<std::shared_ptr<ComputeGraph>> &subgraphs) const;
size_t GetInEdgeSize(const NodePtr &node);

@ -1041,6 +1041,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_COMPILE_STRATEGY;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TBE_KERNEL_NAME;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TBE_KERNEL_BUFFER;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_SLICE;
// used for memory allocate
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_MEM_TYPE_LIST;
@ -1132,6 +1133,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_STA
// input_output_offset
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET;
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET;
// The processing mode of INF and NAN during floating-point number calculation.
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_FP_CEILING_MODE;
} // namespace ge
#endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_

@ -278,6 +278,8 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {
graphStatus GetSubgraphNameByInstanceName(const std::string &instance_name, std::string &subgraph_name) const;
graphStatus InferDataSlice();
protected:
ProtoAttrMapHelper MutableAttrMap() override;
ConstProtoAttrMapHelper GetAttrMap() const override;
@ -317,6 +319,7 @@ class OpDesc : public std::enable_shared_from_this<OpDesc>, public AttrHolder {
std::function<graphStatus(Operator &)> infer_func_ = nullptr;
std::function<graphStatus(Operator &)> infer_format_func_ = nullptr;
std::function<graphStatus(Operator &)> verifier_func_ = nullptr;
std::function<graphStatus(Operator &)> infer_data_slice_func_ = nullptr;
string op_kernel_lib_name_;
string engine_name_;
friend class OpDescUtils;

@ -22,6 +22,7 @@
#include <string>
#include <vector>
#include "graph/operator_factory.h"
#include "register/infer_data_slice_registry.h"
namespace ge {
class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl {
@ -38,6 +39,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl {
static VerifyFunc GetVerifyFunc(const std::string &operator_type);
static InferDataSliceFunc GetInferDataSliceFunc(const std::string &operator_type);
static graphStatus RegisterOperatorCreator(const std::string &operator_type, OpCreator const &op_creator);
static graphStatus RegisterInferShapeFunc(const std::string &operator_type, InferShapeFunc const infer_shape_func);
@ -46,10 +49,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl {
static graphStatus RegisterVerifyFunc(const std::string &operator_type, VerifyFunc const verify_func);
static graphStatus RegisterInferDataSliceFunc(const std::string &operator_type,
InferDataSliceFunc const infer_data_slice_func);
static shared_ptr<std::map<string, OpCreator>> operator_creators_;
static shared_ptr<std::map<string, InferShapeFunc>> operator_infershape_funcs_;
static shared_ptr<std::map<string, InferFormatFunc>> operator_inferformat_funcs_;
static shared_ptr<std::map<string, VerifyFunc>> operator_verify_funcs_;
static shared_ptr<std::map<string, InferDataSliceFunc>> operator_infer_data_slice_funcs_;
};
} // namespace ge

@ -728,11 +728,18 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::InsertE
graphStatus ComputeGraph::DFSTopologicalSorting(std::vector<NodePtr> &node_vec,
std::map<NodePtr, uint32_t> &map_in_edge_num,
std::vector<NodePtr> &stack) {
std::vector<NodePtr> &stack, bool reverse) {
GELOGI("Runing_Dfs_Sort: %s", name_.c_str());
// Record the number of non data nodes but no input nodes
GE_CHK_BOOL_EXEC(SortNodes(stack, map_in_edge_num) == GRAPH_SUCCESS, return GRAPH_FAILED, "sort nodes failed");
std::vector<NodePtr> out_nodes;
auto stack_push = [&reverse, &stack](std::vector<NodePtr> &out_nodes) {
if (reverse) {
std::reverse(out_nodes.begin(), out_nodes.end());
}
stack.insert(stack.end(), out_nodes.begin(), out_nodes.end());
out_nodes.clear();
};
// Only data nodes here
while (!stack.empty()) {
NodePtr node = stack.back();
@ -746,16 +753,18 @@ graphStatus ComputeGraph::DFSTopologicalSorting(std::vector<NodePtr> &node_vec,
GE_CHECK_NOTNULL(peer_in_anchor);
auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode());
if (iter != map_in_edge_num.end() && --iter->second == 0) {
stack.push_back(peer_in_anchor->GetOwnerNode());
out_nodes.push_back(peer_in_anchor->GetOwnerNode());
}
}
stack_push(out_nodes);
for (const auto &peer_in_anchor : anchor->GetPeerInControlAnchors()) {
GE_CHECK_NOTNULL(peer_in_anchor);
auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode());
if (iter != map_in_edge_num.end() && --iter->second == 0) {
stack.push_back(peer_in_anchor->GetOwnerNode());
out_nodes.push_back(peer_in_anchor->GetOwnerNode());
}
}
stack_push(out_nodes);
}
GE_IF_BOOL_EXEC(
node->GetOutControlAnchor() != nullptr, for (AnchorPtr peer_in_anchor
@ -763,9 +772,9 @@ graphStatus ComputeGraph::DFSTopologicalSorting(std::vector<NodePtr> &node_vec,
GE_CHECK_NOTNULL(peer_in_anchor);
auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode());
if (iter != map_in_edge_num.end() && --iter->second == 0) {
stack.push_back(peer_in_anchor->GetOwnerNode());
out_nodes.push_back(peer_in_anchor->GetOwnerNode());
}
})
} stack_push(out_nodes);)
}
return GRAPH_SUCCESS;
@ -867,7 +876,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Topolog
return SUCCESS;
}
graphStatus ComputeGraph::TopologicalSortingGraph() {
graphStatus ComputeGraph::TopologicalSortingGraph(bool dfs_reverse) {
std::vector<NodePtr> node_vec;
std::map<NodePtr, uint32_t> map_in_edge_num;
bool use_BFS = IsUseBFS();
@ -878,7 +887,7 @@ graphStatus ComputeGraph::TopologicalSortingGraph() {
}
} else {
std::vector<NodePtr> stack;
if (DFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) {
if (DFSTopologicalSorting(node_vec, map_in_edge_num, stack, dfs_reverse) != GRAPH_SUCCESS) {
return GRAPH_FAILED;
}
}

@ -982,6 +982,7 @@ const std::string ATTR_NAME_OPTIMIZE_GROUP = "_optimize_group";
const std::string ATTR_NAME_OP_COMPILE_STRATEGY = "_op_compile_strategy";
const std::string ATTR_NAME_TBE_KERNEL_NAME = "_tbe_kernel_name";
const std::string ATTR_NAME_TBE_KERNEL_BUFFER = "_tbe_kernel_buffer";
const std::string ATTR_NAME_DATA_SLICE = "_data_slice";
// used for memory allocate
const std::string ATTR_NAME_INPUT_MEM_TYPE_LIST = "_input_memory_type";
@ -1095,4 +1096,7 @@ const std::string ATTR_STAGE_LEVEL = "_stage_level";
// input_output_offset
const std::string ATTR_ZERO_COPY_BASIC_OFFSET = "_zero_copy_basic_offset";
const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET = "_zero_copy_relative_offset";
// The processing mode of INF and NAN during floating-point number calculation.
const std::string ATTR_FP_CEILING_MODE = "_fp_ceiling_mode";
} // namespace ge

@ -66,13 +66,14 @@ COMMON_LOCAL_C_INCLUDES := \
third_party/protobuf/include \
libc_sec/include \
ops/built-in/op_proto/inc \
cann/ops/built-in/op_proto/inc \
#compiler for host
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 -Dgoogle=ascend_private
LOCAL_CPPFLAGS += -fexceptions
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
@ -80,7 +81,7 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
liberror_manager \
@ -148,14 +149,14 @@ include $(BUILD_HOST_SHARED_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph
LOCAL_CFLAGS += -O2
LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
liberror_manager \
@ -234,14 +235,14 @@ include $(BUILD_SHARED_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph
LOCAL_CFLAGS +=
LOCAL_CFLAGS += -Dgoogle=ascend_private
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
liberror_manager \
@ -257,14 +258,14 @@ include $(BUILD_LLT_SHARED_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 -Dgoogle=ascend_private
LOCAL_CPPFLAGS += -fexceptions
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_STATIC_LIBRARIES := \
libprotobuf \
libascend_protobuf \
LOCAL_SHARED_LIBRARIES := \
libc_sec \
@ -282,13 +283,13 @@ include $(BUILD_HOST_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := libgraph
LOCAL_CFLAGS += -O2
LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private
LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_STATIC_LIBRARIES := \
libprotobuf \
libascend_protobuf \
LOCAL_SHARED_LIBRARIES := \
libc_sec \

@ -20,6 +20,7 @@
#include "external/graph/operator.h"
#include "framework/common/debug/ge_log.h"
#include "common/util/error_manager/error_manager.h"
#include "graph/common_error_codes.h"
#include "graph/ge_attr_value.h"
#include "graph/ge_tensor.h"
#include "graph/operator_factory_impl.h"
@ -1406,4 +1407,17 @@ OpDesc::GetSubgraphNameByInstanceName(const std::string &instance_name, std::str
return GRAPH_PARAM_INVALID;
}
GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::InferDataSlice() {
if (infer_data_slice_func_ == nullptr) {
infer_data_slice_func_ = OperatorFactoryImpl::GetInferDataSliceFunc(GetType());
if (infer_data_slice_func_ == nullptr) {
GELOGW("%s does not have infer data slice func.", GetName().c_str());
return NO_DEPENDENCE_FUNC;
}
}
Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this());
graphStatus ret = (graphStatus)infer_data_slice_func_(op_proxy);
op_proxy.BreakConnect();
return ret;
}
} // namespace ge

@ -23,6 +23,7 @@ shared_ptr<std::map<string, OpCreator>> OperatorFactoryImpl::operator_creators_;
shared_ptr<std::map<string, InferShapeFunc>> OperatorFactoryImpl::operator_infershape_funcs_;
shared_ptr<std::map<string, InferFormatFunc>> OperatorFactoryImpl::operator_inferformat_funcs_;
shared_ptr<std::map<string, VerifyFunc>> OperatorFactoryImpl::operator_verify_funcs_;
shared_ptr<std::map<string, InferDataSliceFunc>> OperatorFactoryImpl::operator_infer_data_slice_funcs_;
Operator OperatorFactoryImpl::CreateOperator(const std::string &operator_name, const std::string &operator_type) {
if (operator_creators_ == nullptr) {
@ -94,6 +95,17 @@ VerifyFunc OperatorFactoryImpl::GetVerifyFunc(const std::string &operator_type)
return it->second;
}
InferDataSliceFunc OperatorFactoryImpl::GetInferDataSliceFunc(const std::string &operator_type) {
if (operator_infer_data_slice_funcs_ == nullptr) {
return nullptr;
}
auto it = operator_infer_data_slice_funcs_->find(operator_type);
if (it == operator_infer_data_slice_funcs_->end()) {
return nullptr;
}
return it->second;
}
graphStatus OperatorFactoryImpl::RegisterOperatorCreator(const string &operator_type, OpCreator const &op_creator) {
if (operator_creators_ == nullptr) {
operator_creators_.reset(new (std::nothrow) std::map<string, OpCreator>());
@ -146,4 +158,18 @@ graphStatus OperatorFactoryImpl::RegisterVerifyFunc(const std::string &operator_
(void)operator_verify_funcs_->emplace(operator_type, verify_func);
return GRAPH_SUCCESS;
}
graphStatus OperatorFactoryImpl::RegisterInferDataSliceFunc(const std::string &operator_type,
InferDataSliceFunc const infer_data_slice_func) {
if (operator_infer_data_slice_funcs_ == nullptr) {
GELOGI("operator_infer_data_slice_funcs_ init");
operator_infer_data_slice_funcs_.reset(new (std::nothrow) std::map<string, InferDataSliceFunc>());
}
auto it = operator_infer_data_slice_funcs_->find(operator_type);
if (it != operator_infer_data_slice_funcs_->end()) {
return GRAPH_FAILED;
}
(void)operator_infer_data_slice_funcs_->emplace(operator_type, infer_data_slice_func);
return GRAPH_SUCCESS;
}
} // namespace ge

@ -119,7 +119,11 @@ graphStatus TuningUtils::ConvertGraphToFile(std::vector<ComputeGraphPtr> tuning_
// +---------------+
graphStatus TuningUtils::MakeExeGraph(ComputeGraphPtr &exe_graph, const HelpInfo &help_info) {
GE_CHECK_NOTNULL(exe_graph);
graphStatus ret = exe_graph->TopologicalSortingGraph(true);
if (ret != SUCCESS) {
GELOGE(ret, "Graph[%s] topological sort failed, ret:%d.", exe_graph->GetName().c_str(), ret);
return ret;
}
// clear graph id
GELOGI("TUU:clear [%s] session_graph_id %s", exe_graph->GetName().c_str(),
(AttrUtils::SetStr(*exe_graph, ATTR_NAME_SESSION_GRAPH_ID, "") ? "success" : "not success"));
@ -148,7 +152,7 @@ graphStatus TuningUtils::MakeExeGraph(ComputeGraphPtr &exe_graph, const HelpInfo
}
}
}
graphStatus ret = exe_graph->TopologicalSorting();
ret = exe_graph->TopologicalSortingGraph(true);
if (ret != SUCCESS) {
GELOGE(ret, "Graph[%s] topological sort failed, ret:%d.", exe_graph->GetName().c_str(), ret);
return ret;

@ -52,7 +52,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_client
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
@ -63,7 +63,7 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
libmmpa \
libgraph \
@ -88,7 +88,7 @@ LOCAL_MODULE := libge_client
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1 -Dgoogle=ascend_private
LOCAL_MODULE_CLASS := SHARED_LIBRARIES
LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES)
@ -97,7 +97,7 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
libmmpa \
libgraph \

@ -82,7 +82,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_common
LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
@ -105,7 +105,7 @@ LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libprotobuf \
libascend_protobuf \
libc_sec \
libslog \
libmmpa \
@ -123,7 +123,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_common
LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
@ -146,7 +146,7 @@ LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES)
LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)
LOCAL_SHARED_LIBRARIES := \
libprotobuf \
libascend_protobuf \
libc_sec \
libslog \
libmmpa \
@ -169,7 +169,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_common
LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
@ -192,7 +192,7 @@ LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)
LOCAL_STATIC_LIBRARIES := \
libgraph \
libprotobuf \
libascend_protobuf \
LOCAL_SHARED_LIBRARIES := \
libc_sec \
@ -211,7 +211,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_common
LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
@ -233,7 +233,7 @@ LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES)
LOCAL_STATIC_LIBRARIES := \
libgraph \
libprotobuf \
libascend_protobuf \
LOCAL_SHARED_LIBRARIES := \
libc_sec \

@ -82,7 +82,7 @@ local_ge_executor_c_include := \
third_party/json/include \
local_ge_executor_shared_library := \
libprotobuf \
libascend_protobuf \
libc_sec \
libge_common \
libruntime \
@ -101,7 +101,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
LOCAL_SRC_FILES := $(local_ge_executor_src_files)
LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
@ -127,7 +127,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
@ -139,7 +139,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files)
LOCAL_C_INCLUDES := $(local_ge_executor_c_include)
LOCAL_SHARED_LIBRARIES := \
libprotobuf \
libascend_protobuf \
libc_sec \
libge_common \
libruntime \
@ -163,7 +163,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
@ -178,7 +178,7 @@ LOCAL_STATIC_LIBRARIES := \
libge_common \
libgraph \
libregister \
libprotobuf \
libascend_protobuf \
LOCAL_SHARED_LIBRARIES := \
libc_sec \
@ -196,7 +196,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_executor
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
else
@ -210,7 +210,7 @@ LOCAL_STATIC_LIBRARIES := \
libge_common \
libgraph \
libregister \
libprotobuf \
libascend_protobuf \
LOCAL_SHARED_LIBRARIES := \
libc_sec \

@ -363,7 +363,7 @@ LOCAL_MODULE := libge_compiler
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
# from ome_inference.mk
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE
LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
@ -383,7 +383,7 @@ LOCAL_STATIC_LIBRARIES := libge_memory \
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
libmmpa \
libgraph \
@ -428,7 +428,7 @@ LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0
LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE
LOCAL_CFLAGS += -DOMG_DEVICE_VERSION
LOCAL_CFLAGS += -O2
LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private
LOCAL_MODULE_CLASS := SHARED_LIBRARIES
@ -447,7 +447,7 @@ LOCAL_STATIC_LIBRARIES := libge_memory \
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
libmmpa \
libgraph \

@ -29,11 +29,11 @@ local_lib_inc_path := proto/task.proto \
include $(CLEAR_VARS)
LOCAL_MODULE := libge_local_engine
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
@ -49,11 +49,11 @@ include ${BUILD_HOST_SHARED_LIBRARY}
include $(CLEAR_VARS)
LOCAL_MODULE := atclib/libge_local_engine
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE
LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
@ -69,11 +69,11 @@ include ${BUILD_HOST_SHARED_LIBRARY}
include $(CLEAR_VARS)
LOCAL_MODULE := libge_local_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libregister \
@ -90,11 +90,11 @@ include ${BUILD_HOST_SHARED_LIBRARY}
include $(CLEAR_VARS)
LOCAL_MODULE := atclib/libge_local_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libregister \
@ -110,10 +110,10 @@ include ${BUILD_HOST_SHARED_LIBRARY}
include $(CLEAR_VARS)
LOCAL_MODULE := libge_local_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES := libprotobuf \
LOCAL_STATIC_LIBRARIES := libascend_protobuf \
libregister \
libgraph \
@ -130,10 +130,10 @@ include ${BUILD_HOST_STATIC_LIBRARY}
include $(CLEAR_VARS)
LOCAL_MODULE := libge_local_opskernel_builder
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES := libprotobuf \
LOCAL_STATIC_LIBRARIES := libascend_protobuf \
libregister \
libgraph \

@ -352,7 +352,7 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libge_runner
LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD
LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
@ -369,7 +369,7 @@ LOCAL_STATIC_LIBRARIES := libge_memory \
LOCAL_SHARED_LIBRARIES := \
libc_sec \
libprotobuf \
libascend_protobuf \
libslog \
libmmpa \
libgraph \

@ -599,9 +599,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector<GeTensor> &in
} else {
for (const auto &in_desc : inputs) {
GeTensorDesc input_desc = in_desc.GetTensorDesc();
if (!IsNeedConnectInputOpForSingleOp(input_desc)) {
continue;
}
GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true));
arg_index++;
}

@ -25,7 +25,7 @@ LOCAL_MODULE := libge_memory
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -O2
LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private
ifeq ($(DEBUG), 1)
LOCAL_CFLAGS += -g -O0
endif
@ -33,7 +33,7 @@ endif
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
@ -56,11 +56,11 @@ LOCAL_MODULE := libge_memory
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -Werror
LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY
LOCAL_CFLAGS += -O2
LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \
@ -79,11 +79,11 @@ include ${BUILD_STATIC_LIBRARY}
include $(CLEAR_VARS)
LOCAL_MODULE := libge_memory
LOCAL_CFLAGS += -std=c++11
LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private
LOCAL_LDFLAGS :=
LOCAL_STATIC_LIBRARIES :=
LOCAL_SHARED_LIBRARIES := libprotobuf \
LOCAL_SHARED_LIBRARIES := libascend_protobuf \
libc_sec \
libslog \
libgraph \

@ -709,14 +709,10 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) {
GELOGE(RT_FAILED, "rtGetRtCapability failed.");
return RT_FAILED;
} else {
if (value == RT_CAPABILITY_SUPPORT) {
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode);
MemcpyAddrAsyncPass memcpy_addr;
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed.");
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run.");
} else {
GELOGW("rtGetRtCapability not support memcpy_addr_async.");
}
GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode);
MemcpyAddrAsyncPass memcpy_addr;
GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed.");
GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run.");
}
GE_TIMESTAMP_START(AssignMemory);

@ -650,7 +650,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size
for (const ge::NodePtr &node : compute_graph->GetDirectNode()) {
auto op_desc = node->GetOpDesc();
GE_IF_BOOL_EXEC(op_desc == nullptr, continue);
GetFixedAddrAttr(op_desc);
GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue);
GE_IF_BOOL_EXEC(IsBroadCastOpData(node),
(void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore"););
@ -839,7 +838,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) {
}
continue;
}
// for dynamic shape with control flow
SetLabelForDynamic(node);
if (IsNoTaskAndDumpNeeded(op_desc)) {
GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str());
const RuntimeParam &rts_param = GetRuntimeParam();
@ -913,6 +913,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr
return SUCCESS;
}
void DavinciModel::SetLabelForDynamic(const NodePtr &node) {
if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) {
for (auto &in_data_anchor : node->GetAllInDataAnchors()) {
auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor();
if (peer_out_data_anchor != nullptr) {
string tensor_name = node->GetName();
auto peer_node = peer_out_data_anchor->GetOwnerNode();
(void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name);
(void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0);
tensor_name_to_peer_output_index_[tensor_name] = 0;
}
}
}
}
/// @ingroup ge
/// @brief Data Op Initialize.
/// @param [in] NodePtr: Data Op.
@ -3949,15 +3964,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) {
}
}
void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) {
if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) {
string tensor_name;
(void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name);
int64_t index = -1;
(void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index);
if (index >= 0) {
tensor_name_to_peer_output_index_[tensor_name] = index;
}
}
}
} // namespace ge

@ -836,7 +836,7 @@ class DavinciModel {
std::vector<ge::OutputTensorInfo> &outputs);
void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info);
void GetFixedAddrAttr(const OpDescPtr &op_desc);
void SetLabelForDynamic(const NodePtr &node);
bool is_model_has_inited_;
uint32_t model_id_;

@ -337,16 +337,6 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
continue;
}
int64_t mem_type;
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
if (tensor_has_mem_type && v_memory_type[i] != RT_MEMORY_L1) {
uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_input_offset[i];
v_input_data_addr.push_back(p2p_mem_addr);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[P] name[%s] input[%zu] memaddr[%p]", model_param.graph_id,
op_desc->GetName().c_str(), i, p2p_mem_addr);
continue;
}
GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(),
GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index);
break);
@ -361,6 +351,8 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr);
continue);
int64_t mem_type;
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
// feature maps
void *mem_addr = nullptr;
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion
@ -372,6 +364,12 @@ vector<void *> ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset);
mem_addr = model_param.ts_mem_mall->Acquire(input_offset, static_cast<uint64_t>(tensor_size));
v_input_data_addr.push_back(mem_addr);
} else if (tensor_has_mem_type && mem_type == RT_MEMORY_P2P_DDR) {
uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_input_offset[i];
v_input_data_addr.push_back(p2p_mem_addr);
GELOGI("[IMAS]GetInputDataAddrs graph_%u type[P] name[%s] input[%zu] memaddr[%p]", model_param.graph_id,
op_desc->GetName().c_str(), i, p2p_mem_addr);
continue;
} else {
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset);
mem_addr = model_param.mem_base + input_offset;
@ -420,15 +418,9 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i);
continue;
}
int64_t mem_type;
bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type);
if (tensor_has_mem_type && v_memory_type[i] != RT_MEMORY_L1) {
uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_output_offset[i];
v_output_data_addr.push_back(p2p_mem_addr);
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[P] name[%s] output[%zu] memaddr[%p]", model_param.graph_id,
op_desc->GetName().c_str(), i, p2p_mem_addr);
continue;
}
// feature maps
void *mem_addr = nullptr;
if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion
@ -442,6 +434,12 @@ vector<void *> ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]);
mem_addr = model_param.ts_mem_mall->Acquire(v_output_offset[i], static_cast<uint64_t>(tensor_size));
v_output_data_addr.push_back(mem_addr);
} else if (tensor_has_mem_type && mem_type == RT_MEMORY_P2P_DDR) {
uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_output_offset[i];
v_output_data_addr.push_back(p2p_mem_addr);
GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[P] name[%s] output[%zu] memaddr[%p]", model_param.graph_id,
op_desc->GetName().c_str(), i, p2p_mem_addr);
continue;
} else {
VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]);
mem_addr = static_cast<uint8_t *>(model_param.mem_base + v_output_offset[i]);

@ -559,10 +559,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
GE_CHECK_NOTNULL(davinci_model_);
// get tvm op desc
OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex);
if (op_desc == nullptr) {
GELOGE(INTERNAL_ERROR, "InitTVMTaskInfo error, index:%u out of range!", ctx_.opIndex);
return INTERNAL_ERROR;
}
GE_CHECK_NOTNULL(op_desc);
if (davinci_model_->IsKnownNode()) {
return SUCCESS;
}
@ -650,6 +647,9 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne
vector<void *> virtual_io_addrs; // use virtual address for zero copy key.
virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end());
virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end());
if (op_desc->GetType() == ATOMICADDRCLEAN) {
virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end());
}
davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset);
GELOGD("Do InitTVMTask end");

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save