diff --git a/inc/external/ge/ge_api_types.h b/inc/external/ge/ge_api_types.h index 113b8bc6..9c51bac0 100644 --- a/inc/external/ge/ge_api_types.h +++ b/inc/external/ge/ge_api_types.h @@ -285,6 +285,11 @@ namespace ir_option { static const char *const INPUT_FORMAT = "input_format"; static const char *const INPUT_SHAPE = "input_shape"; static const char *const OP_NAME_MAP = "op_name_map"; +static const char *const IS_DYNAMIC_INPUT = "is_dynamic_input"; +static const char *const IS_INPUT_ADJUST_HW_LAYOUT = "is_input_adjust_hw_layout"; +static const char *const IS_OUTPUT_ADJUST_HW_LAYOUT = "is_output_adjust_hw_layout"; +static const char *const ENABLE_SCOPE_FUSION_PASSES = "enable_scope_fusion_passes"; +static const char *const OUTPUT = "output"; static const char *const DYNAMIC_BATCH_SIZE = kDynamicBatchSize; static const char *const DYNAMIC_IMAGE_SIZE = kDynamicImageSize; static const char *const DYNAMIC_DIMS = kDynamicDims; @@ -317,6 +322,22 @@ const std::set ir_builder_suppported_options = { INSERT_OP_FILE, PRECISION_MODE, EXEC_DISABLE_REUSED_MEMORY, AUTO_TUNE_MODE, OUTPUT_TYPE, OUT_NODES, INPUT_FP16_NODES, LOG_LEVEL}; + +// for interface: aclgrphParse +const std::set ir_parser_suppported_options = {INPUT_FORMAT, + INPUT_SHAPE, + OP_NAME_MAP, + IS_DYNAMIC_INPUT, + INPUT_FP16_NODES, + IS_INPUT_ADJUST_HW_LAYOUT, + IS_OUTPUT_ADJUST_HW_LAYOUT, + OUTPUT, + OUTPUT_TYPE, + OUT_NODES, + COMPRESS_WEIGHT_CONF, + ENABLE_SCOPE_FUSION_PASSES, + LOG_LEVEL}; + // for interface: aclgrphBuildInitialize const std::set global_options = {CORE_TYPE, SOC_VERSION, diff --git a/inc/graph/common_error_codes.h b/inc/graph/common_error_codes.h new file mode 100644 index 00000000..cdf9086f --- /dev/null +++ b/inc/graph/common_error_codes.h @@ -0,0 +1,28 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_GRAPH_COMMON_ERROR_CODES_H_ +#define INC_GRAPH_COMMON_ERROR_CODES_H_ + +#include "external/graph/ge_error_codes.h" + +namespace ge { +const graphStatus NO_DEPENDENCE_FUNC = 50331647; +const graphStatus NO_OVERLAP_DIM = 50331646; +const graphStatus NOT_SUPPORT_SLICE = 50331645; +} // namespace ge + +#endif // INC_GRAPH_COMMON_ERROR_CODES_H_ diff --git a/inc/graph/compute_graph.h b/inc/graph/compute_graph.h index 9a454f39..3bbfb740 100644 --- a/inc/graph/compute_graph.h +++ b/inc/graph/compute_graph.h @@ -247,12 +247,16 @@ class ComputeGraph : public std::enable_shared_from_this, public A private: graphStatus DFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, - std::vector &stack); + std::vector &stack, bool reverse); graphStatus BFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, std::deque &stack); graphStatus CollectBreadthOutNode(const NodePtr &node, std::map &map_in_edge_num, std::map &breadth_node_map); - graphStatus TopologicalSortingGraph(); + /// nodes like : (a) <--- (c) ---> (b) + /// node a and b have only one parent node c, and a is connected to c firstly + /// topo order of DFS is `c, b, a` with `dfs_reverse=false` as default + /// in same case, user could get `c, a, b` with `dfs_reverse=true` + graphStatus TopologicalSortingGraph(bool dfs_reverse = false); graphStatus SortNodes(std::vector &stack, std::map &mapInEdgeNum); Vistor AllGraphNodes(std::vector> &subgraphs) const; size_t GetInEdgeSize(const NodePtr &node); diff --git a/inc/graph/debug/ge_attr_define.h b/inc/graph/debug/ge_attr_define.h index b7b3c3cd..a76008a3 100644 --- a/inc/graph/debug/ge_attr_define.h +++ b/inc/graph/debug/ge_attr_define.h @@ -1041,6 +1041,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAM GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_OP_COMPILE_STRATEGY; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TBE_KERNEL_NAME; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_TBE_KERNEL_BUFFER; +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_DATA_SLICE; // used for memory allocate GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_NAME_INPUT_MEM_TYPE_LIST; @@ -1132,6 +1133,9 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_STA // input_output_offset GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_BASIC_OFFSET; GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET; + +// The processing mode of INF and NAN during floating-point number calculation. +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY extern const std::string ATTR_FP_CEILING_MODE; } // namespace ge #endif // INC_GRAPH_DEBUG_GE_ATTR_DEFINE_H_ diff --git a/inc/graph/op_desc.h b/inc/graph/op_desc.h index 4d724c42..2e3009b3 100644 --- a/inc/graph/op_desc.h +++ b/inc/graph/op_desc.h @@ -278,6 +278,8 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { graphStatus GetSubgraphNameByInstanceName(const std::string &instance_name, std::string &subgraph_name) const; + graphStatus InferDataSlice(); + protected: ProtoAttrMapHelper MutableAttrMap() override; ConstProtoAttrMapHelper GetAttrMap() const override; @@ -317,6 +319,7 @@ class OpDesc : public std::enable_shared_from_this, public AttrHolder { std::function infer_func_ = nullptr; std::function infer_format_func_ = nullptr; std::function verifier_func_ = nullptr; + std::function infer_data_slice_func_ = nullptr; string op_kernel_lib_name_; string engine_name_; friend class OpDescUtils; diff --git a/inc/graph/operator_factory_impl.h b/inc/graph/operator_factory_impl.h index ea343ebc..aaa9b2c7 100644 --- a/inc/graph/operator_factory_impl.h +++ b/inc/graph/operator_factory_impl.h @@ -22,6 +22,7 @@ #include #include #include "graph/operator_factory.h" +#include "register/infer_data_slice_registry.h" namespace ge { class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl { @@ -38,6 +39,8 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl { static VerifyFunc GetVerifyFunc(const std::string &operator_type); + static InferDataSliceFunc GetInferDataSliceFunc(const std::string &operator_type); + static graphStatus RegisterOperatorCreator(const std::string &operator_type, OpCreator const &op_creator); static graphStatus RegisterInferShapeFunc(const std::string &operator_type, InferShapeFunc const infer_shape_func); @@ -46,10 +49,14 @@ class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY OperatorFactoryImpl { static graphStatus RegisterVerifyFunc(const std::string &operator_type, VerifyFunc const verify_func); + static graphStatus RegisterInferDataSliceFunc(const std::string &operator_type, + InferDataSliceFunc const infer_data_slice_func); + static shared_ptr> operator_creators_; static shared_ptr> operator_infershape_funcs_; static shared_ptr> operator_inferformat_funcs_; static shared_ptr> operator_verify_funcs_; + static shared_ptr> operator_infer_data_slice_funcs_; }; } // namespace ge diff --git a/src/common/graph/compute_graph.cc b/src/common/graph/compute_graph.cc index bae4d362..e62cba69 100644 --- a/src/common/graph/compute_graph.cc +++ b/src/common/graph/compute_graph.cc @@ -728,11 +728,18 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::InsertE graphStatus ComputeGraph::DFSTopologicalSorting(std::vector &node_vec, std::map &map_in_edge_num, - std::vector &stack) { + std::vector &stack, bool reverse) { GELOGI("Runing_Dfs_Sort: %s", name_.c_str()); // Record the number of non data nodes but no input nodes GE_CHK_BOOL_EXEC(SortNodes(stack, map_in_edge_num) == GRAPH_SUCCESS, return GRAPH_FAILED, "sort nodes failed"); - + std::vector out_nodes; + auto stack_push = [&reverse, &stack](std::vector &out_nodes) { + if (reverse) { + std::reverse(out_nodes.begin(), out_nodes.end()); + } + stack.insert(stack.end(), out_nodes.begin(), out_nodes.end()); + out_nodes.clear(); + }; // Only data nodes here while (!stack.empty()) { NodePtr node = stack.back(); @@ -746,16 +753,18 @@ graphStatus ComputeGraph::DFSTopologicalSorting(std::vector &node_vec, GE_CHECK_NOTNULL(peer_in_anchor); auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); if (iter != map_in_edge_num.end() && --iter->second == 0) { - stack.push_back(peer_in_anchor->GetOwnerNode()); + out_nodes.push_back(peer_in_anchor->GetOwnerNode()); } } + stack_push(out_nodes); for (const auto &peer_in_anchor : anchor->GetPeerInControlAnchors()) { GE_CHECK_NOTNULL(peer_in_anchor); auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); if (iter != map_in_edge_num.end() && --iter->second == 0) { - stack.push_back(peer_in_anchor->GetOwnerNode()); + out_nodes.push_back(peer_in_anchor->GetOwnerNode()); } } + stack_push(out_nodes); } GE_IF_BOOL_EXEC( node->GetOutControlAnchor() != nullptr, for (AnchorPtr peer_in_anchor @@ -763,9 +772,9 @@ graphStatus ComputeGraph::DFSTopologicalSorting(std::vector &node_vec, GE_CHECK_NOTNULL(peer_in_anchor); auto iter = map_in_edge_num.find(peer_in_anchor->GetOwnerNode()); if (iter != map_in_edge_num.end() && --iter->second == 0) { - stack.push_back(peer_in_anchor->GetOwnerNode()); + out_nodes.push_back(peer_in_anchor->GetOwnerNode()); } - }) + } stack_push(out_nodes);) } return GRAPH_SUCCESS; @@ -867,7 +876,7 @@ GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus ComputeGraph::Topolog return SUCCESS; } -graphStatus ComputeGraph::TopologicalSortingGraph() { +graphStatus ComputeGraph::TopologicalSortingGraph(bool dfs_reverse) { std::vector node_vec; std::map map_in_edge_num; bool use_BFS = IsUseBFS(); @@ -878,7 +887,7 @@ graphStatus ComputeGraph::TopologicalSortingGraph() { } } else { std::vector stack; - if (DFSTopologicalSorting(node_vec, map_in_edge_num, stack) != GRAPH_SUCCESS) { + if (DFSTopologicalSorting(node_vec, map_in_edge_num, stack, dfs_reverse) != GRAPH_SUCCESS) { return GRAPH_FAILED; } } diff --git a/src/common/graph/ge_attr_define.cc b/src/common/graph/ge_attr_define.cc index 25b6aa1e..2cc447f8 100644 --- a/src/common/graph/ge_attr_define.cc +++ b/src/common/graph/ge_attr_define.cc @@ -982,6 +982,7 @@ const std::string ATTR_NAME_OPTIMIZE_GROUP = "_optimize_group"; const std::string ATTR_NAME_OP_COMPILE_STRATEGY = "_op_compile_strategy"; const std::string ATTR_NAME_TBE_KERNEL_NAME = "_tbe_kernel_name"; const std::string ATTR_NAME_TBE_KERNEL_BUFFER = "_tbe_kernel_buffer"; +const std::string ATTR_NAME_DATA_SLICE = "_data_slice"; // used for memory allocate const std::string ATTR_NAME_INPUT_MEM_TYPE_LIST = "_input_memory_type"; @@ -1095,4 +1096,7 @@ const std::string ATTR_STAGE_LEVEL = "_stage_level"; // input_output_offset const std::string ATTR_ZERO_COPY_BASIC_OFFSET = "_zero_copy_basic_offset"; const std::string ATTR_ZERO_COPY_RELATIVE_OFFSET = "_zero_copy_relative_offset"; + +// The processing mode of INF and NAN during floating-point number calculation. +const std::string ATTR_FP_CEILING_MODE = "_fp_ceiling_mode"; } // namespace ge diff --git a/src/common/graph/graph.mk b/src/common/graph/graph.mk index 562439d2..4221da38 100644 --- a/src/common/graph/graph.mk +++ b/src/common/graph/graph.mk @@ -66,13 +66,14 @@ COMMON_LOCAL_C_INCLUDES := \ third_party/protobuf/include \ libc_sec/include \ ops/built-in/op_proto/inc \ + cann/ops/built-in/op_proto/inc \ #compiler for host include $(CLEAR_VARS) LOCAL_MODULE := libgraph -LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 -Dgoogle=ascend_private LOCAL_CPPFLAGS += -fexceptions LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) @@ -80,7 +81,7 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ liberror_manager \ @@ -148,14 +149,14 @@ include $(BUILD_HOST_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libgraph -LOCAL_CFLAGS += -O2 +LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ liberror_manager \ @@ -234,14 +235,14 @@ include $(BUILD_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libgraph -LOCAL_CFLAGS += +LOCAL_CFLAGS += -Dgoogle=ascend_private LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ liberror_manager \ @@ -257,14 +258,14 @@ include $(BUILD_LLT_SHARED_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libgraph -LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -O2 -Dgoogle=ascend_private LOCAL_CPPFLAGS += -fexceptions LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := \ - libprotobuf \ + libascend_protobuf \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -282,13 +283,13 @@ include $(BUILD_HOST_STATIC_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libgraph -LOCAL_CFLAGS += -O2 +LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private LOCAL_C_INCLUDES := $(COMMON_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := \ - libprotobuf \ + libascend_protobuf \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/common/graph/op_desc.cc b/src/common/graph/op_desc.cc index 35cbf3f7..d568d491 100644 --- a/src/common/graph/op_desc.cc +++ b/src/common/graph/op_desc.cc @@ -20,6 +20,7 @@ #include "external/graph/operator.h" #include "framework/common/debug/ge_log.h" #include "common/util/error_manager/error_manager.h" +#include "graph/common_error_codes.h" #include "graph/ge_attr_value.h" #include "graph/ge_tensor.h" #include "graph/operator_factory_impl.h" @@ -1406,4 +1407,17 @@ OpDesc::GetSubgraphNameByInstanceName(const std::string &instance_name, std::str return GRAPH_PARAM_INVALID; } +GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY graphStatus OpDesc::InferDataSlice() { + if (infer_data_slice_func_ == nullptr) { + infer_data_slice_func_ = OperatorFactoryImpl::GetInferDataSliceFunc(GetType()); + if (infer_data_slice_func_ == nullptr) { + GELOGW("%s does not have infer data slice func.", GetName().c_str()); + return NO_DEPENDENCE_FUNC; + } + } + Operator op_proxy = ge::OpDescUtils::CreateOperatorFromOpDesc(shared_from_this()); + graphStatus ret = (graphStatus)infer_data_slice_func_(op_proxy); + op_proxy.BreakConnect(); + return ret; +} } // namespace ge diff --git a/src/common/graph/operator_factory_impl.cc b/src/common/graph/operator_factory_impl.cc index 026a85bc..8927f496 100644 --- a/src/common/graph/operator_factory_impl.cc +++ b/src/common/graph/operator_factory_impl.cc @@ -23,6 +23,7 @@ shared_ptr> OperatorFactoryImpl::operator_creators_; shared_ptr> OperatorFactoryImpl::operator_infershape_funcs_; shared_ptr> OperatorFactoryImpl::operator_inferformat_funcs_; shared_ptr> OperatorFactoryImpl::operator_verify_funcs_; +shared_ptr> OperatorFactoryImpl::operator_infer_data_slice_funcs_; Operator OperatorFactoryImpl::CreateOperator(const std::string &operator_name, const std::string &operator_type) { if (operator_creators_ == nullptr) { @@ -94,6 +95,17 @@ VerifyFunc OperatorFactoryImpl::GetVerifyFunc(const std::string &operator_type) return it->second; } +InferDataSliceFunc OperatorFactoryImpl::GetInferDataSliceFunc(const std::string &operator_type) { + if (operator_infer_data_slice_funcs_ == nullptr) { + return nullptr; + } + auto it = operator_infer_data_slice_funcs_->find(operator_type); + if (it == operator_infer_data_slice_funcs_->end()) { + return nullptr; + } + return it->second; +} + graphStatus OperatorFactoryImpl::RegisterOperatorCreator(const string &operator_type, OpCreator const &op_creator) { if (operator_creators_ == nullptr) { operator_creators_.reset(new (std::nothrow) std::map()); @@ -146,4 +158,18 @@ graphStatus OperatorFactoryImpl::RegisterVerifyFunc(const std::string &operator_ (void)operator_verify_funcs_->emplace(operator_type, verify_func); return GRAPH_SUCCESS; } + +graphStatus OperatorFactoryImpl::RegisterInferDataSliceFunc(const std::string &operator_type, + InferDataSliceFunc const infer_data_slice_func) { + if (operator_infer_data_slice_funcs_ == nullptr) { + GELOGI("operator_infer_data_slice_funcs_ init"); + operator_infer_data_slice_funcs_.reset(new (std::nothrow) std::map()); + } + auto it = operator_infer_data_slice_funcs_->find(operator_type); + if (it != operator_infer_data_slice_funcs_->end()) { + return GRAPH_FAILED; + } + (void)operator_infer_data_slice_funcs_->emplace(operator_type, infer_data_slice_func); + return GRAPH_SUCCESS; +} } // namespace ge diff --git a/src/common/graph/utils/tuning_utils.cc b/src/common/graph/utils/tuning_utils.cc index a5a15562..26b03f56 100644 --- a/src/common/graph/utils/tuning_utils.cc +++ b/src/common/graph/utils/tuning_utils.cc @@ -119,7 +119,11 @@ graphStatus TuningUtils::ConvertGraphToFile(std::vector tuning_ // +---------------+ graphStatus TuningUtils::MakeExeGraph(ComputeGraphPtr &exe_graph, const HelpInfo &help_info) { GE_CHECK_NOTNULL(exe_graph); - + graphStatus ret = exe_graph->TopologicalSortingGraph(true); + if (ret != SUCCESS) { + GELOGE(ret, "Graph[%s] topological sort failed, ret:%d.", exe_graph->GetName().c_str(), ret); + return ret; + } // clear graph id GELOGI("TUU:clear [%s] session_graph_id %s", exe_graph->GetName().c_str(), (AttrUtils::SetStr(*exe_graph, ATTR_NAME_SESSION_GRAPH_ID, "") ? "success" : "not success")); @@ -148,7 +152,7 @@ graphStatus TuningUtils::MakeExeGraph(ComputeGraphPtr &exe_graph, const HelpInfo } } } - graphStatus ret = exe_graph->TopologicalSorting(); + ret = exe_graph->TopologicalSortingGraph(true); if (ret != SUCCESS) { GELOGE(ret, "Graph[%s] topological sort failed, ret:%d.", exe_graph->GetName().c_str(), ret); return ret; diff --git a/src/ge/client/module.mk b/src/ge/client/module.mk index 1a304cbf..6ac69d31 100644 --- a/src/ge/client/module.mk +++ b/src/ge/client/module.mk @@ -52,7 +52,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_client LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -63,7 +63,7 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ libmmpa \ libgraph \ @@ -88,7 +88,7 @@ LOCAL_MODULE := libge_client LOCAL_CFLAGS += -Werror LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1 +LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -DREUSE_MEMORY=1 -Dgoogle=ascend_private LOCAL_MODULE_CLASS := SHARED_LIBRARIES LOCAL_C_INCLUDES := $(DEVICE_LOCAL_C_INCLUDES) @@ -97,7 +97,7 @@ LOCAL_SRC_FILES := $(COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ libmmpa \ libgraph \ diff --git a/src/ge/common/ge_common.mk b/src/ge/common/ge_common.mk index efddc788..45ee1057 100644 --- a/src/ge/common/ge_common.mk +++ b/src/ge/common/ge_common.mk @@ -82,7 +82,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_common LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 else @@ -105,7 +105,7 @@ LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ - libprotobuf \ + libascend_protobuf \ libc_sec \ libslog \ libmmpa \ @@ -123,7 +123,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_common LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 else @@ -146,7 +146,7 @@ LOCAL_C_INCLUDES := $(GE_COMMON_LOCAL_C_INCLUDES) LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) LOCAL_SHARED_LIBRARIES := \ - libprotobuf \ + libascend_protobuf \ libc_sec \ libslog \ libmmpa \ @@ -169,7 +169,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_common LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -192,7 +192,7 @@ LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := \ libgraph \ - libprotobuf \ + libascend_protobuf \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -211,7 +211,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_common LOCAL_CFLAGS += -Werror -DFMK_SUPPORT_DUMP -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -233,7 +233,7 @@ LOCAL_SRC_FILES := $(GE_COMMON_LOCAL_SRC_FILES) LOCAL_STATIC_LIBRARIES := \ libgraph \ - libprotobuf \ + libascend_protobuf \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/ge/executor/module.mk b/src/ge/executor/module.mk index 309feb10..eaa611d2 100644 --- a/src/ge/executor/module.mk +++ b/src/ge/executor/module.mk @@ -82,7 +82,7 @@ local_ge_executor_c_include := \ third_party/json/include \ local_ge_executor_shared_library := \ - libprotobuf \ + libascend_protobuf \ libc_sec \ libge_common \ libruntime \ @@ -101,7 +101,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_executor LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -O2 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private LOCAL_SRC_FILES := $(local_ge_executor_src_files) LOCAL_C_INCLUDES := $(local_ge_executor_c_include) @@ -127,7 +127,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_executor LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 else @@ -139,7 +139,7 @@ LOCAL_SRC_FILES := $(local_ge_executor_src_files) LOCAL_C_INCLUDES := $(local_ge_executor_c_include) LOCAL_SHARED_LIBRARIES := \ - libprotobuf \ + libascend_protobuf \ libc_sec \ libge_common \ libruntime \ @@ -163,7 +163,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_executor LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 else @@ -178,7 +178,7 @@ LOCAL_STATIC_LIBRARIES := \ libge_common \ libgraph \ libregister \ - libprotobuf \ + libascend_protobuf \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ @@ -196,7 +196,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_executor LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING +LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DDAVINCI_SUPPORT_PROFILING -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 else @@ -210,7 +210,7 @@ LOCAL_STATIC_LIBRARIES := \ libge_common \ libgraph \ libregister \ - libprotobuf \ + libascend_protobuf \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ diff --git a/src/ge/ge_inference.mk b/src/ge/ge_inference.mk index a33174e5..81928c1b 100644 --- a/src/ge/ge_inference.mk +++ b/src/ge/ge_inference.mk @@ -363,7 +363,7 @@ LOCAL_MODULE := libge_compiler LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 # from ome_inference.mk -LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE +LOCAL_CFLAGS += -DFMK_HOST_INFER -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -383,7 +383,7 @@ LOCAL_STATIC_LIBRARIES := libge_memory \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ libmmpa \ libgraph \ @@ -428,7 +428,7 @@ LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -DNONSUPPORT_SAVE_TO_ LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 LOCAL_CFLAGS += -DREUSE_MEMORY=1 -DFMK_SUPPORT_DUMP -DCOMPILE_OMG_PACKAGE LOCAL_CFLAGS += -DOMG_DEVICE_VERSION -LOCAL_CFLAGS += -O2 +LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private LOCAL_MODULE_CLASS := SHARED_LIBRARIES @@ -447,7 +447,7 @@ LOCAL_STATIC_LIBRARIES := libge_memory \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ libmmpa \ libgraph \ diff --git a/src/ge/ge_local_engine/module.mk b/src/ge/ge_local_engine/module.mk index a0247da7..0f95d352 100644 --- a/src/ge/ge_local_engine/module.mk +++ b/src/ge/ge_local_engine/module.mk @@ -29,11 +29,11 @@ local_lib_inc_path := proto/task.proto \ include $(CLEAR_VARS) LOCAL_MODULE := libge_local_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -49,11 +49,11 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libge_local_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE +LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -69,11 +69,11 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libge_local_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libregister \ @@ -90,11 +90,11 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libge_local_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libregister \ @@ -110,10 +110,10 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libge_local_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := -LOCAL_STATIC_LIBRARIES := libprotobuf \ +LOCAL_STATIC_LIBRARIES := libascend_protobuf \ libregister \ libgraph \ @@ -130,10 +130,10 @@ include ${BUILD_HOST_STATIC_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libge_local_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := -LOCAL_STATIC_LIBRARIES := libprotobuf \ +LOCAL_STATIC_LIBRARIES := libascend_protobuf \ libregister \ libgraph \ diff --git a/src/ge/ge_runner.mk b/src/ge/ge_runner.mk index 270e632f..7d1058f4 100644 --- a/src/ge/ge_runner.mk +++ b/src/ge/ge_runner.mk @@ -352,7 +352,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libge_runner LOCAL_CFLAGS += -DPROTOBUF_INLINE_NOT_IN_HEADERS=0 -DREUSE_MEMORY=1 -O2 -LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD +LOCAL_CFLAGS += -DFMK_SUPPORT_DUMP -DDAVINCI_SUPPORT_PROFILING -DDAVINCI_CLOUD -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -369,7 +369,7 @@ LOCAL_STATIC_LIBRARIES := libge_memory \ LOCAL_SHARED_LIBRARIES := \ libc_sec \ - libprotobuf \ + libascend_protobuf \ libslog \ libmmpa \ libgraph \ diff --git a/src/ge/generator/ge_generator.cc b/src/ge/generator/ge_generator.cc index db52ce59..c19cb35d 100644 --- a/src/ge/generator/ge_generator.cc +++ b/src/ge/generator/ge_generator.cc @@ -599,9 +599,6 @@ Status GeGenerator::BuildSingleOp(OpDescPtr &op_desc, const vector &in } else { for (const auto &in_desc : inputs) { GeTensorDesc input_desc = in_desc.GetTensorDesc(); - if (!IsNeedConnectInputOpForSingleOp(input_desc)) { - continue; - } GE_CHK_STATUS_RET_NOLOG(AddInputs(compute_graph, op_node, input_desc, arg_index, true)); arg_index++; } diff --git a/src/ge/graph/build/memory/module.mk b/src/ge/graph/build/memory/module.mk index 47c9e5cc..2d669a50 100644 --- a/src/ge/graph/build/memory/module.mk +++ b/src/ge/graph/build/memory/module.mk @@ -25,7 +25,7 @@ LOCAL_MODULE := libge_memory LOCAL_CFLAGS += -std=c++11 LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -O2 +LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private ifeq ($(DEBUG), 1) LOCAL_CFLAGS += -g -O0 endif @@ -33,7 +33,7 @@ endif LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -56,11 +56,11 @@ LOCAL_MODULE := libge_memory LOCAL_CFLAGS += -std=c++11 LOCAL_CFLAGS += -Werror LOCAL_CFLAGS += -DGOOGLE_PROTOBUF_NO_RTTI -DDEV_VISIBILITY -LOCAL_CFLAGS += -O2 +LOCAL_CFLAGS += -O2 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -79,11 +79,11 @@ include ${BUILD_STATIC_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libge_memory -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ diff --git a/src/ge/graph/build/model_builder.cc b/src/ge/graph/build/model_builder.cc index 6efc78fb..6285bced 100644 --- a/src/ge/graph/build/model_builder.cc +++ b/src/ge/graph/build/model_builder.cc @@ -709,14 +709,10 @@ Status ModelBuilder::BuildModelForGetTask(ge::Model &model) { GELOGE(RT_FAILED, "rtGetRtCapability failed."); return RT_FAILED; } else { - if (value == RT_CAPABILITY_SUPPORT) { - GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); - MemcpyAddrAsyncPass memcpy_addr; - GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed."); - GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); - } else { - GELOGW("rtGetRtCapability not support memcpy_addr_async."); - } + GE_TIMESTAMP_START(AddMemcpyAddrAsyncNode); + MemcpyAddrAsyncPass memcpy_addr; + GE_CHK_STATUS_RET(memcpy_addr.Run(compute_graph_), "Add memcpy_addr_async node failed."); + GE_TIMESTAMP_END(AddMemcpyAddrAsyncNode, "MemcpyAddrAsyncPass::Run."); } GE_TIMESTAMP_START(AssignMemory); diff --git a/src/ge/graph/load/new_model_manager/davinci_model.cc b/src/ge/graph/load/new_model_manager/davinci_model.cc index b5335d1b..d589bd8a 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.cc +++ b/src/ge/graph/load/new_model_manager/davinci_model.cc @@ -650,7 +650,6 @@ Status DavinciModel::Init(void *dev_ptr, size_t mem_size, void *weight_ptr, size for (const ge::NodePtr &node : compute_graph->GetDirectNode()) { auto op_desc = node->GetOpDesc(); GE_IF_BOOL_EXEC(op_desc == nullptr, continue); - GetFixedAddrAttr(op_desc); GE_IF_BOOL_EXEC(op_desc->GetType() != VARIABLE, continue); GE_IF_BOOL_EXEC(IsBroadCastOpData(node), (void)ge::AttrUtils::SetStr(op_desc, VAR_ATTR_VAR_IS_BROADCAST, "var_is_restore");); @@ -839,7 +838,8 @@ Status DavinciModel::InitNodes(const ComputeGraphPtr &compute_graph) { } continue; } - + // for dynamic shape with control flow + SetLabelForDynamic(node); if (IsNoTaskAndDumpNeeded(op_desc)) { GELOGD("node[%s] without task, and save op_desc and addr for dump", op_desc->GetName().c_str()); const RuntimeParam &rts_param = GetRuntimeParam(); @@ -913,6 +913,21 @@ Status DavinciModel::InitInputOutputForDynamic(const ComputeGraphPtr &compute_gr return SUCCESS; } +void DavinciModel::SetLabelForDynamic(const NodePtr &node) { + if (known_node_ && node->GetOpDesc()->GetType() == LABELSWITCHBYINDEX) { + for (auto &in_data_anchor : node->GetAllInDataAnchors()) { + auto peer_out_data_anchor = in_data_anchor->GetPeerOutAnchor(); + if (peer_out_data_anchor != nullptr) { + string tensor_name = node->GetName(); + auto peer_node = peer_out_data_anchor->GetOwnerNode(); + (void)AttrUtils::SetStr(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); + (void)AttrUtils::SetInt(peer_node->GetOpDesc(), ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, 0); + tensor_name_to_peer_output_index_[tensor_name] = 0; + } + } + } +} + /// @ingroup ge /// @brief Data Op Initialize. /// @param [in] NodePtr: Data Op. @@ -3949,15 +3964,4 @@ int64_t DavinciModel::GetFixedAddrsSize(string tensor_name) { } } -void DavinciModel::GetFixedAddrAttr(const OpDescPtr &op_desc) { - if (op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR) && op_desc->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX)) { - string tensor_name; - (void)AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, tensor_name); - int64_t index = -1; - (void)AttrUtils::GetInt(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR_INDEX, index); - if (index >= 0) { - tensor_name_to_peer_output_index_[tensor_name] = index; - } - } -} } // namespace ge diff --git a/src/ge/graph/load/new_model_manager/davinci_model.h b/src/ge/graph/load/new_model_manager/davinci_model.h index 6e127b3c..8cc824f4 100644 --- a/src/ge/graph/load/new_model_manager/davinci_model.h +++ b/src/ge/graph/load/new_model_manager/davinci_model.h @@ -836,7 +836,7 @@ class DavinciModel { std::vector &outputs); void ParseAIPPInfo(std::string in_out_info, InputOutputDims &dims_info); - void GetFixedAddrAttr(const OpDescPtr &op_desc); + void SetLabelForDynamic(const NodePtr &node); bool is_model_has_inited_; uint32_t model_id_; diff --git a/src/ge/graph/load/new_model_manager/model_utils.cc b/src/ge/graph/load/new_model_manager/model_utils.cc index 75917e0e..61c77202 100644 --- a/src/ge/graph/load/new_model_manager/model_utils.cc +++ b/src/ge/graph/load/new_model_manager/model_utils.cc @@ -337,16 +337,6 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co continue; } - int64_t mem_type; - bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); - if (tensor_has_mem_type && v_memory_type[i] != RT_MEMORY_L1) { - uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_input_offset[i]; - v_input_data_addr.push_back(p2p_mem_addr); - GELOGI("[IMAS]GetInputDataAddrs graph_%u type[P] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, - op_desc->GetName().c_str(), i, p2p_mem_addr); - continue; - } - GE_IF_BOOL_EXEC(non_const_index >= v_input_offset.size(), GELOGW("offsets=%zu, inputs=%zu, index=%zu.", v_input_offset.size(), inputs_size, non_const_index); break); @@ -361,6 +351,8 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co model_param.graph_id, op_desc->GetName().c_str(), i, variable_addr); continue); + int64_t mem_type; + bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); // feature maps void *mem_addr = nullptr; if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion @@ -372,6 +364,12 @@ vector ModelUtils::GetInputDataAddrs(const RuntimeParam &model_param, Co VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); mem_addr = model_param.ts_mem_mall->Acquire(input_offset, static_cast(tensor_size)); v_input_data_addr.push_back(mem_addr); + } else if (tensor_has_mem_type && mem_type == RT_MEMORY_P2P_DDR) { + uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_input_offset[i]; + v_input_data_addr.push_back(p2p_mem_addr); + GELOGI("[IMAS]GetInputDataAddrs graph_%u type[P] name[%s] input[%zu] memaddr[%p]", model_param.graph_id, + op_desc->GetName().c_str(), i, p2p_mem_addr); + continue; } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, input_offset); mem_addr = model_param.mem_base + input_offset; @@ -420,15 +418,9 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C GELOGW("Op: %s, Index: %zu, Tensor Desc is null", op_desc->GetName().c_str(), i); continue; } + int64_t mem_type; bool tensor_has_mem_type = ge::AttrUtils::GetInt(tensor_desc, ATTR_NAME_TENSOR_MEM_TYPE, mem_type); - if (tensor_has_mem_type && v_memory_type[i] != RT_MEMORY_L1) { - uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_output_offset[i]; - v_output_data_addr.push_back(p2p_mem_addr); - GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[P] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, - op_desc->GetName().c_str(), i, p2p_mem_addr); - continue; - } // feature maps void *mem_addr = nullptr; if (has_mem_type_attr && v_memory_type[i] == RT_MEMORY_L1) { // fusion @@ -442,6 +434,12 @@ vector ModelUtils::GetOutputDataAddrs(const RuntimeParam &model_param, C VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); mem_addr = model_param.ts_mem_mall->Acquire(v_output_offset[i], static_cast(tensor_size)); v_output_data_addr.push_back(mem_addr); + } else if (tensor_has_mem_type && mem_type == RT_MEMORY_P2P_DDR) { + uint8_t *p2p_mem_addr = model_param.memory_infos.at(RT_MEMORY_P2P_DDR).memory_base + v_output_offset[i]; + v_output_data_addr.push_back(p2p_mem_addr); + GELOGI("[IMAS]GetOutputDataAddrs graph_%u type[P] name[%s] output[%zu] memaddr[%p]", model_param.graph_id, + op_desc->GetName().c_str(), i, p2p_mem_addr); + continue; } else { VALIDATE_MEM_RANGE(op_desc, model_param.mem_size, v_output_offset[i]); mem_addr = static_cast(model_param.mem_base + v_output_offset[i]); diff --git a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc index 3476751b..8c00c1d7 100644 --- a/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/kernel_task_info.cc @@ -559,10 +559,7 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne GE_CHECK_NOTNULL(davinci_model_); // get tvm op desc OpDescPtr op_desc = davinci_model_->GetOpByIndex(ctx_.opIndex); - if (op_desc == nullptr) { - GELOGE(INTERNAL_ERROR, "InitTVMTaskInfo error, index:%u out of range!", ctx_.opIndex); - return INTERNAL_ERROR; - } + GE_CHECK_NOTNULL(op_desc); if (davinci_model_->IsKnownNode()) { return SUCCESS; } @@ -650,6 +647,9 @@ Status KernelTaskInfo::InitTVMTask(uint16_t offset, const domi::KernelDef &kerne vector virtual_io_addrs; // use virtual address for zero copy key. virtual_io_addrs.insert(virtual_io_addrs.end(), input_data_addrs.begin(), input_data_addrs.end()); virtual_io_addrs.insert(virtual_io_addrs.end(), output_data_addrs.begin(), output_data_addrs.end()); + if (op_desc->GetType() == ATOMICADDRCLEAN) { + virtual_io_addrs.insert(virtual_io_addrs.end(), workspace_data_addrs.begin(), workspace_data_addrs.end()); + } davinci_model_->SetZeroCopyAddr(op_desc, virtual_io_addrs, args_info.data(), args_, args_size_, offset); GELOGD("Do InitTVMTask end"); diff --git a/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc index bbbf313f..6ff98894 100644 --- a/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/label_switch_by_index_task_info.cc @@ -144,7 +144,7 @@ Status LabelSwitchByIndexTaskInfo::CalculateArgs(const domi::TaskDef &task_def, GELOGE(FAILED, "Label switch op only have one data input. Now input size is %zu", op_desc->GetInputsSize()); return FAILED; } - string input_tensor_name = op_desc->GetInputNameByIndex(0); + string input_tensor_name = op_desc->GetName(); fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(input_tensor_name); auto tensor_desc = op_desc->GetInputDesc(0); int64_t tensor_size = 0; diff --git a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc index 3bad3c67..fb0928a5 100644 --- a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc +++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.cc @@ -35,6 +35,7 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da kind_ = memcpy_async_.kind(); dst_max_ = memcpy_async_.dst_max(); OpDescPtr op_desc = davinci_model_->GetOpByIndex(memcpy_async_.op_index()); + op_desc_ = op_desc; if (op_desc == nullptr) { GELOGE(INTERNAL_ERROR, "Task op index:%u out of range", memcpy_async_.op_index()); return INTERNAL_ERROR; @@ -45,7 +46,8 @@ Status MemcpyAsyncTaskInfo::Init(const domi::TaskDef &task_def, DavinciModel *da dst_ = reinterpret_cast(reinterpret_cast(src_) + sizeof(void *)); // for zero copy kind_ = RT_MEMCPY_ADDR_DEVICE_TO_DEVICE; - GELOGI("MemcpyAsyncTaskInfo src_ %p, dst_ %p, args_offset %u.", src_, dst_, args_offset_); + GELOGI("MemcpyAsyncTaskInfo op name %s, src_ %p, dst_ %p, args_offset %u.", op_desc->GetName().c_str(), src_, dst_, + args_offset_); return SUCCESS; } @@ -93,12 +95,23 @@ Status MemcpyAsyncTaskInfo::Distribute() { } Status MemcpyAsyncTaskInfo::CalculateArgs(const domi::TaskDef &task_def, DavinciModel *davinci_model) { + GE_CHECK_NOTNULL(davinci_model); + OpDescPtr op_desc = davinci_model->GetOpByIndex(task_def.memcpy_async().op_index()); // the num of src and dst size is 2 uint32_t args_size = sizeof(void *) * 2; args_offset_ = davinci_model->GetTotalArgsSize(); davinci_model->SetTotalArgsSize(args_size); davinci_model_ = davinci_model; GELOGI("MemcpyAsyncTaskInfo kernel args_size %u, args_offset %u", args_size, args_offset_); + string peer_input_name; + if (AttrUtils::GetStr(op_desc, ATTR_DYNAMIC_SHAPE_FIXED_ADDR, peer_input_name) && !peer_input_name.empty()) { + uint32_t output_index = davinci_model->GetFixedAddrOutputIndex(peer_input_name); + fixed_addr_offset_ = davinci_model->GetFixedAddrsSize(peer_input_name); + auto tensor_desc = op_desc->GetOutputDesc(output_index); + int64_t tensor_size = 0; + GE_CHK_STATUS(TensorUtils::GetSize(tensor_desc, tensor_size)); + davinci_model->SetTotalFixedAddrsSize(peer_input_name, tensor_size); + } return SUCCESS; } @@ -117,8 +130,12 @@ Status MemcpyAsyncTaskInfo::UpdateArgs() { vector io_addrs; io_addrs.emplace_back(reinterpret_cast(src_)); - io_addrs.emplace_back(reinterpret_cast(dst_)); - + if (op_desc_->HasAttr(ATTR_DYNAMIC_SHAPE_FIXED_ADDR)) { + void *fixed_addr = davinci_model_->GetCurrentFixedAddr(fixed_addr_offset_); + io_addrs.emplace_back(fixed_addr); + } else { + io_addrs.emplace_back(reinterpret_cast(dst_)); + } davinci_model_->SetTotalIOAddrs(io_addrs); GELOGI("MemcpyAsyncTaskInfo::UpdateArgs success."); diff --git a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h index 9fe1ce24..6a10fc40 100644 --- a/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h +++ b/src/ge/graph/load/new_model_manager/task_info/memcpy_async_task_info.h @@ -44,6 +44,8 @@ class MemcpyAsyncTaskInfo : public TaskInfo { uint8_t *src_; uint64_t count_; uint32_t kind_; + OpDescPtr op_desc_; + int64_t fixed_addr_offset_; DavinciModel *davinci_model_ = nullptr; uint32_t args_offset_ = 0; domi::MemcpyAsyncDef memcpy_async_; diff --git a/src/ge/graph/manager/graph_manager.cc b/src/ge/graph/manager/graph_manager.cc index d1cc914c..e0a6d751 100644 --- a/src/ge/graph/manager/graph_manager.cc +++ b/src/ge/graph/manager/graph_manager.cc @@ -685,15 +685,10 @@ Status GraphManager::PreRun(const GraphNodePtr &graph_node, const std::vectorGetName().c_str()); - return ret; - } + ret = PreRunOptimizeSubGraph(graph_node, compute_graph, session_id); + if (ret != SUCCESS) { + GELOGE(ret, "Run PreRunOptimizeSubGraph failed for graph:%s.", compute_graph->GetName().c_str()); + return ret; } /// 1. BUILD_MODE_TUNING with BUILD_STEP_BEFORE_UB_MATCH no need PreRunAfterOptimizeSubGraph; diff --git a/src/ge/graph/optimize/graph_optimize.cc b/src/ge/graph/optimize/graph_optimize.cc index ad919338..807b43f5 100644 --- a/src/ge/graph/optimize/graph_optimize.cc +++ b/src/ge/graph/optimize/graph_optimize.cc @@ -98,7 +98,8 @@ Status GraphOptimize::OptimizeSubGraph(ComputeGraphPtr &compute_graph, const std return SUCCESS; } - if (build_mode_ == BUILD_MODE_TUNING && build_step_ == BUILD_STEP_AFTER_UB_MATCH) { + if (build_mode_ == BUILD_MODE_TUNING && + (build_step_ == BUILD_STEP_AFTER_UB_MATCH || build_step_ == BUILD_STEP_AFTER_MERGE)) { for (auto iter = graph_optimizer.begin(); iter != graph_optimizer.end(); ++iter) { Status ret = (*iter)->OptimizeFusedGraphAfterGraphSlice(*(compute_graph)); if (ret != SUCCESS) { diff --git a/src/ge/graph/passes/subgraph_pass.cc b/src/ge/graph/passes/subgraph_pass.cc index fd71e65b..41f26379 100644 --- a/src/ge/graph/passes/subgraph_pass.cc +++ b/src/ge/graph/passes/subgraph_pass.cc @@ -146,10 +146,14 @@ Status SubgraphPass::SubgraphOutputNode(const ComputeGraphPtr &graph, const Node // 2. AtomicOp->NetOutput in subgraph // 3. OutputContinuesRequiredOp->NetOutput in subgraph // 4. Data->NetOutput in subgraph but parent_node is not while + // 5. While->NetOutput in known subgraph std::string op_type; - bool insert_flag = NodeUtils::GetConstOpType(in_node, op_type) || - IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || IsOutputContinuesRequired(in_node) || - ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)); + bool insert_flag = + NodeUtils::GetConstOpType(in_node, op_type) || IsAtomicRequired(in_node, peer_out_anchor->GetIdx()) || + IsOutputContinuesRequired(in_node) || + ((in_node->GetType() == DATA) && (kWhileOpTypes.count(graph->GetParentNode()->GetType()) == 0)) || + (!graph->GetGraphUnknownFlag() && NodeUtils::IsDynamicShape(node) && + (kWhileOpTypes.count(in_node->GetType()) != 0)); if (insert_flag) { GELOGD("Insert MemcpyAsync node between %s and %s.", in_node->GetName().c_str(), node->GetName().c_str()); std::string name = node->GetName() + "_input_" + std::to_string(in_data_anchor->GetIdx()) + "_Memcpy"; @@ -212,6 +216,19 @@ Status SubgraphPass::WhileBodySubgraph(const ComputeGraphPtr &graph, const NodeP return SUCCESS; } + // insert identity between data and labelswitch in while cond subgraph + if (NodeUtils::IsDynamicShape(node)) { + ComputeGraphPtr while_cond = NodeUtils::GetSubgraph(*node, 0); + GE_CHECK_NOTNULL(while_cond); + std::vector cond_data_nodes; + for (const auto &n : while_cond->GetDirectNode()) { + if (n->GetType() == DATA) { + cond_data_nodes.emplace_back(n); + } + } + GE_CHK_STATUS_RET(InsertInputMemcpy(while_cond, cond_data_nodes), "InsertInputMemcpy failed."); + } + std::vector data_nodes; std::set bypass_index; NodePtr output_node = nullptr; diff --git a/src/ge/host_cpu_engine/CMakeLists.txt b/src/ge/host_cpu_engine/CMakeLists.txt index 63d219d0..02b5f996 100644 --- a/src/ge/host_cpu_engine/CMakeLists.txt +++ b/src/ge/host_cpu_engine/CMakeLists.txt @@ -22,6 +22,10 @@ target_compile_options(host_cpu_engine PRIVATE -Werror ) +target_compile_definitions(host_cpu_engine PRIVATE + google=ascend_private +) + target_include_directories(host_cpu_engine PRIVATE ${CMAKE_CURRENT_LIST_DIR} ${GE_CODE_DIR}/ge @@ -42,7 +46,7 @@ target_include_directories(host_cpu_engine PRIVATE target_link_libraries(host_cpu_engine PRIVATE $ -Wl,--no-as-needed - protobuf + ascend_protobuf c_sec graph register @@ -60,6 +64,7 @@ target_compile_options(atc_host_cpu_engine PRIVATE target_compile_definitions(atc_host_cpu_engine PRIVATE COMPILE_OMG_PACKAGE + google=ascend_private ) target_include_directories(atc_host_cpu_engine PRIVATE @@ -82,7 +87,7 @@ target_include_directories(atc_host_cpu_engine PRIVATE target_link_libraries(atc_host_cpu_engine PRIVATE $ -Wl,--no-as-needed - protobuf + ascend_protobuf c_sec graph register @@ -103,6 +108,10 @@ target_compile_options(host_cpu_opskernel_builder PRIVATE -Werror ) +target_compile_definitions(host_cpu_opskernel_builder PRIVATE + google=ascend_private +) + target_include_directories(host_cpu_opskernel_builder PRIVATE ${CMAKE_CURRENT_LIST_DIR} ${GE_CODE_DIR}/ge @@ -123,7 +132,7 @@ target_include_directories(host_cpu_opskernel_builder PRIVATE target_link_libraries(host_cpu_opskernel_builder PRIVATE $ -Wl,--no-as-needed - protobuf + ascend_protobuf c_sec slog graph @@ -138,6 +147,10 @@ target_compile_options(atc_host_cpu_opskernel_builder PRIVATE -Werror ) +target_compile_definitions(atc_host_cpu_opskernel_builder PRIVATE + google=ascend_private +) + target_include_directories(atc_host_cpu_opskernel_builder PRIVATE ${CMAKE_CURRENT_LIST_DIR} ${GE_CODE_DIR}/ge @@ -158,7 +171,7 @@ target_include_directories(atc_host_cpu_opskernel_builder PRIVATE target_link_libraries(atc_host_cpu_opskernel_builder PRIVATE $ -Wl,--no-as-needed - protobuf + ascend_protobuf c_sec slog graph @@ -178,6 +191,10 @@ target_compile_options(host_cpu_opskernel_builder_static PRIVATE -Werror ) +target_compile_definitions(host_cpu_opskernel_builder_static PRIVATE + google=ascend_private +) + target_include_directories(host_cpu_opskernel_builder_static PRIVATE ${CMAKE_CURRENT_LIST_DIR} ${GE_CODE_DIR}/ge @@ -197,7 +214,7 @@ target_include_directories(host_cpu_opskernel_builder_static PRIVATE target_link_libraries(host_cpu_opskernel_builder_static PRIVATE $ - protobuf + ascend_protobuf c_sec ) diff --git a/src/ge/host_cpu_engine/module.mk b/src/ge/host_cpu_engine/module.mk index 3c8e0cc9..6c4932a3 100644 --- a/src/ge/host_cpu_engine/module.mk +++ b/src/ge/host_cpu_engine/module.mk @@ -21,11 +21,11 @@ local_lib_inc_path := proto/task.proto \ include $(CLEAR_VARS) LOCAL_MODULE := libhost_cpu_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -41,11 +41,11 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libhost_cpu_engine LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE +LOCAL_CFLAGS += -std=c++11 -DCOMPILE_OMG_PACKAGE -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -61,11 +61,11 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libhost_cpu_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ @@ -81,10 +81,10 @@ include ${BUILD_HOST_SHARED_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libhost_cpu_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := -LOCAL_STATIC_LIBRARIES := libprotobuf \ +LOCAL_STATIC_LIBRARIES := libascend_protobuf \ libgraph \ libregister \ @@ -101,10 +101,10 @@ include ${BUILD_HOST_STATIC_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := libhost_cpu_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := -LOCAL_STATIC_LIBRARIES := libprotobuf \ +LOCAL_STATIC_LIBRARIES := libascend_protobuf \ libgraph \ libregister \ @@ -121,11 +121,11 @@ include ${BUILD_STATIC_LIBRARY} include $(CLEAR_VARS) LOCAL_MODULE := atclib/libhost_cpu_opskernel_builder LOCAL_CFLAGS += -Werror -LOCAL_CFLAGS += -std=c++11 +LOCAL_CFLAGS += -std=c++11 -Dgoogle=ascend_private LOCAL_LDFLAGS := LOCAL_STATIC_LIBRARIES := -LOCAL_SHARED_LIBRARIES := libprotobuf \ +LOCAL_SHARED_LIBRARIES := libascend_protobuf \ libc_sec \ libslog \ libgraph \ diff --git a/src/ge/hybrid/model/hybrid_model_builder.cc b/src/ge/hybrid/model/hybrid_model_builder.cc index 2c6227fb..f22e50f9 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.cc +++ b/src/ge/hybrid/model/hybrid_model_builder.cc @@ -15,6 +15,7 @@ */ #include "hybrid/model/hybrid_model_builder.h" +#include #include "common/math/math_util.h" #include "graph/ge_context.h" #include "graph/build/memory/var_mem_assign_util.h" @@ -58,6 +59,34 @@ int64_t CalcVarSizeInBytes(const GeTensorDesc &desc) { } return var_size; } + +Status CollectDependenciesForFusedGraph(NodeItem &node_item, std::set &data_ops) { + for (const auto &node : node_item.fused_subgraph->nodes) { + auto op_desc = node->GetOpDesc(); + GE_CHECK_NOTNULL(op_desc); + const auto &depends = op_desc->GetOpInferDepends(); + if (depends.empty()) { + continue; + } + + for (auto &input_name : depends) { + auto input_index = op_desc->GetInputIndexByName(input_name); + auto src_node = NodeUtils::GetInDataNodeByIndex(*node, input_index); + GE_CHECK_NOTNULL(src_node); + auto src_op_desc = src_node->GetOpDesc(); + GE_CHECK_NOTNULL(src_op_desc); + if (src_node->GetType() != DATA_TYPE) { + GELOGE(UNSUPPORTED, "[%s::%s] Node in fused subgraph can only depend on Data nodes, but depend on %s", + node_item.NodeName().c_str(), node->GetName().c_str(), src_node->GetType().c_str()); + return UNSUPPORTED; + } + + data_ops.emplace(src_op_desc.get()); + } + } + + return SUCCESS; +} } // namespace HybridModelBuilder::HybridModelBuilder(HybridModel &hybrid_model) : hybrid_model_(hybrid_model), runtime_param_(hybrid_model.root_runtime_param_) { @@ -262,6 +291,47 @@ Status HybridModelBuilder::ParseDependentInputNodes(NodeItem &node_item, const s node_item.dependents_for_shape_inference.emplace_back(dep_node); } + GE_CHK_STATUS_RET(ParseDependentForFusedSubgraph(node_item)); + return SUCCESS; +} + +Status HybridModelBuilder::ParseDependentForFusedSubgraph(NodeItem &node_item) { + if (node_item.fused_subgraph == nullptr) { + return SUCCESS; + } + + std::set data_ops; + GE_CHK_STATUS_RET_NOLOG(CollectDependenciesForFusedGraph(node_item, data_ops)); + for (auto &op_desc : data_ops) { + uint32_t parent_index = 0; + if (!AttrUtils::GetInt(*op_desc, ATTR_NAME_PARENT_NODE_INDEX, parent_index)) { + GELOGE(INTERNAL_ERROR, "[%s] Failed to get attr [%s]", op_desc->GetName().c_str(), + ATTR_NAME_PARENT_NODE_INDEX.c_str()); + return INTERNAL_ERROR; + } + + const auto &in_anchor = node_item.node->GetInDataAnchor(parent_index); + GE_CHECK_NOTNULL(in_anchor); + const auto &peer_out_anchor = in_anchor->GetPeerOutAnchor(); + GE_CHECK_NOTNULL(peer_out_anchor); + const auto &src_node = peer_out_anchor->GetOwnerNode(); + GE_CHECK_NOTNULL(src_node); + NodeItem *src_node_item = nullptr; + GE_CHK_STATUS_RET_NOLOG(GetOrCreateNodeItem(src_node, &src_node_item)); + op_desc->SetId(src_node_item->op_desc->GetId()); + GELOGD("[%s::%s] Node id was set to that of outer src node's, src_node = %s", node_item.NodeName().c_str(), + op_desc->GetName().c_str(), src_node_item->NodeName().c_str()); + src_node_item->has_observer = true; + src_node_item->to_const_output_id_list.emplace(peer_out_anchor->GetIdx()); + + auto &depends = node_item.dependents_for_shape_inference; + if (std::find(depends.begin(), depends.end(), src_node) == depends.end()) { + depends.emplace_back(src_node); + GELOGD("[%s] Dependent added from output of [%s:%d]", node_item.NodeName().c_str(), + src_node_item->NodeName().c_str(), peer_out_anchor->GetIdx()); + } + } + return SUCCESS; } diff --git a/src/ge/hybrid/model/hybrid_model_builder.h b/src/ge/hybrid/model/hybrid_model_builder.h index ecd327ff..d7f6dcf7 100644 --- a/src/ge/hybrid/model/hybrid_model_builder.h +++ b/src/ge/hybrid/model/hybrid_model_builder.h @@ -60,6 +60,7 @@ class HybridModelBuilder { Status BuildNodeItem(const NodePtr &node, NodeItem &node_item); Status GetOrCreateNodeItem(const NodePtr &node, NodeItem **node_item); Status ParseDependentInputNodes(NodeItem &node_item, const std::vector &dependencies); + Status ParseDependentForFusedSubgraph(NodeItem &node_item); Status IndexTaskDefs(); Status IndexSpecialNodes(); Status InitRuntimeParams(); diff --git a/src/ge/opskernel_manager/ops_kernel_builder_manager.cc b/src/ge/opskernel_manager/ops_kernel_builder_manager.cc index 6afcc891..0662d203 100644 --- a/src/ge/opskernel_manager/ops_kernel_builder_manager.cc +++ b/src/ge/opskernel_manager/ops_kernel_builder_manager.cc @@ -24,7 +24,8 @@ const std::vector kBasicBuilderLibs = {"libge_local_opskernel_build "libhost_cpu_opskernel_builder.so", "librts_kernel_builder.so", "libaicpu_ascend_builder.so", "libaicpu_tf_builder.so"}; -const std::vector kHcclBuilderLibs = {"libhcom_opskernel_builder.so", "libhvd_opskernel_builder.so"}; +const std::vector kHcclBuilderLibs = {"libhcom_opskernel_builder.so", "libhvd_opskernel_builder.so", + "libhcom_gradtune_opskernel_builder.so"}; } // namespace OpsKernelBuilderManager::~OpsKernelBuilderManager() { // it's OK to call Finalize multiply times diff --git a/src/ge/single_op/single_op_model.cc b/src/ge/single_op/single_op_model.cc index 9c6ad8d9..8b59565f 100644 --- a/src/ge/single_op/single_op_model.cc +++ b/src/ge/single_op/single_op_model.cc @@ -31,7 +31,6 @@ #include "task/aicpu_task_builder.h" #include "task/aicpu_kernel_task_builder.h" #include "task/tbe_task_builder.h" -#include "graph/load/new_model_manager/model_manager.h" static std::atomic aicpu_sessionid(0); @@ -275,7 +274,6 @@ Status SingleOpModel::BuildTaskList(SingleOp &single_op) { GELOGD("Skip task type: %d", static_cast(task_type)); } } - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); return SUCCESS; } @@ -447,8 +445,6 @@ Status SingleOpModel::BuildTaskListForDynamicOp(DynamicSingleOp &single_op) { GELOGD("Skip task type: %d", static_cast(task_type)); } } - GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); - return SUCCESS; } diff --git a/src/ge/single_op/task/aicpu_kernel_task_builder.cc b/src/ge/single_op/task/aicpu_kernel_task_builder.cc index eb9d4d5c..8cb2a6da 100644 --- a/src/ge/single_op/task/aicpu_kernel_task_builder.cc +++ b/src/ge/single_op/task/aicpu_kernel_task_builder.cc @@ -63,6 +63,7 @@ Status AiCpuCCTaskBuilder::BuildTask(AiCpuCCTask &task) { task.is_custom_ = true; task.dump_flag_ |= RT_KERNEL_CUSTOM_AICPU; GE_CHK_STATUS_RET(ModelManager::GetInstance()->LoadCustAicpuSo(op_desc_, so_name), "launch cust aicpu so failed"); + GE_CHK_STATUS_RET(ModelManager::GetInstance()->LaunchCustAicpuSo(), "launch cust aicpu so failed."); } task.num_inputs_ = op_desc_->GetInputsSize(); diff --git a/third_party/fwkacllib/inc/mmpa/mmpa_api.h b/third_party/fwkacllib/inc/mmpa/mmpa_api.h index a7f13636..b9d9766e 100644 --- a/third_party/fwkacllib/inc/mmpa/mmpa_api.h +++ b/third_party/fwkacllib/inc/mmpa/mmpa_api.h @@ -26,6 +26,12 @@ #define _GNU_SOURCE #endif +#ifdef FUNC_VISIBILITY +#define MMPA_FUNC_VISIBILITY __attribute__((visibility("default"))) +#else +#define MMPA_FUNC_VISIBILITY +#endif + #include #include #include @@ -86,6 +92,13 @@ #if(OS_TYPE == WIN) + +#ifdef FUNC_VISIBILITY +#define MMPA_FUNC_VISIBILITY _declspec(dllexport) +#else +#define MMPA_FUNC_VISIBILITY +#endif + #include #include #include "Windows.h" diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h index aced4968..ea51f497 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_linux.h @@ -44,12 +44,16 @@ typedef VOID *mmExitCode; typedef key_t mmKey_t; typedef int mmMsgid; typedef struct dirent mmDirent; +typedef struct dirent mmDirent2; typedef struct shmid_ds mmshmId_ds; typedef int (*mmFilter)(const mmDirent *entry); +typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); +typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); typedef size_t mmSize_t; typedef off_t mmOfft_t; typedef pid_t mmPid_t; +typedef long MM_LONG; typedef VOID *(*userProcFunc)(VOID *pulArg); @@ -113,6 +117,7 @@ typedef struct { } mmIoctlBuf; typedef int mmAtomicType; +typedef int mmAtomicType64; typedef enum { pollTypeRead = 1, // pipe read @@ -151,8 +156,8 @@ typedef struct { } mmTimeval; typedef struct { - LONG tv_sec; - LONG tv_nsec; + MM_LONG tv_sec; + MM_LONG tv_nsec; } mmTimespec; typedef struct { @@ -272,6 +277,9 @@ typedef struct { #define M_R_OK R_OK #define M_W_OK W_OK +#define MM_DT_DIR DT_DIR +#define MM_DT_REG DT_REG + #define MMPA_STDIN STDIN_FILENO #define MMPA_STDOUT STDOUT_FILENO #define MMPA_STDERR STDERR_FILENO @@ -283,182 +291,195 @@ typedef struct { #define MMPA_DL_EXT_NAME ".so" -extern INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock); -extern INT32 mmJoinTask(mmThread *threadHandle); -extern INT32 mmMutexInit(mmMutex_t *mutex); -extern INT32 mmMutexLock(mmMutex_t *mutex); -extern INT32 mmMutexTryLock(mmMutex_t *mutex); -extern INT32 mmMutexUnLock(mmMutex_t *mutex); -extern INT32 mmMutexDestroy(mmMutex_t *mutex); -extern INT32 mmCondInit(mmCond *cond); -extern INT32 mmCondLockInit(mmMutexFC *mutex); -extern INT32 mmCondLock(mmMutexFC *mutex); -extern INT32 mmCondUnLock(mmMutexFC *mutex); -extern INT32 mmCondLockDestroy(mmMutexFC *mutex); -extern INT32 mmRWLockInit(mmRWLock_t *rwLock); -extern INT32 mmRWLockRDLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockWRLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock); -extern INT32 mmRDLockUnLock(mmRWLock_t *rwLock); -extern INT32 mmWRLockUnLock(mmRWLock_t *rwLock); -extern INT32 mmRWLockDestroy(mmRWLock_t *rwLock); -extern INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex); -extern INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond); -extern INT32 mmCondNotify(mmCond *cond); -extern INT32 mmCondNotifyAll(mmCond *cond); -extern INT32 mmCondDestroy(mmCond *cond); -extern INT32 mmGetPid(); -extern INT32 mmGetTid(); -extern INT32 mmGetPidHandle(mmProcess *processHandle); -extern INT32 mmGetLocalTime(mmSystemTime_t *sysTime); -extern INT32 mmGetSystemTime(mmSystemTime_t *sysTime); - -extern INT32 mmSemInit(mmSem_t *sem, UINT32 value); -extern INT32 mmSemWait(mmSem_t *sem); -extern INT32 mmSemPost(mmSem_t *sem); -extern INT32 mmSemDestroy(mmSem_t *sem); -extern INT32 mmOpen(const CHAR *pathName, INT32 flags); -extern INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode); -extern FILE *mmPopen(CHAR *command, CHAR *type); -extern INT32 mmClose(INT32 fd); -extern INT32 mmPclose(FILE *stream); -extern mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen); -extern mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen); -extern mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol); -extern INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); -extern INT32 mmListen(mmSockHandle sockFd, INT32 backLog); -extern mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen); -extern INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); -extern INT32 mmCloseSocket(mmSockHandle sockFd); -extern mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); -extern mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); -extern INT32 mmSocketSendTo(mmSockHandle sockFd, +MMPA_FUNC_VISIBILITY INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock); +MMPA_FUNC_VISIBILITY INT32 mmJoinTask(mmThread *threadHandle); +MMPA_FUNC_VISIBILITY INT32 mmMutexInit(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexLock(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexTryLock(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexUnLock(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexDestroy(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondInit(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmCondLockInit(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondLock(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondUnLock(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondLockDestroy(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmRWLockInit(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockRDLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockWRLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRDLockUnLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmWRLockUnLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockDestroy(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond); +MMPA_FUNC_VISIBILITY INT32 mmCondNotify(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmCondNotifyAll(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmGetPid(); +MMPA_FUNC_VISIBILITY INT32 mmGetTid(); +MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle); +MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime); +MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime); + +MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value); +MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem); +MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem); +MMPA_FUNC_VISIBILITY INT32 mmSemDestroy(mmSem_t *sem); +MMPA_FUNC_VISIBILITY INT32 mmOpen(const CHAR *pathName, INT32 flags); +MMPA_FUNC_VISIBILITY INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode); +MMPA_FUNC_VISIBILITY FILE *mmPopen(CHAR *command, CHAR *type); +MMPA_FUNC_VISIBILITY INT32 mmClose(INT32 fd); +MMPA_FUNC_VISIBILITY INT32 mmPclose(FILE *stream); +MMPA_FUNC_VISIBILITY mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen); +MMPA_FUNC_VISIBILITY mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen); +MMPA_FUNC_VISIBILITY mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol); +MMPA_FUNC_VISIBILITY INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); +MMPA_FUNC_VISIBILITY INT32 mmListen(mmSockHandle sockFd, INT32 backLog); +MMPA_FUNC_VISIBILITY mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen); +MMPA_FUNC_VISIBILITY INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); +MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); +MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); +MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); +MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, VOID *sendMsg, INT32 sendLen, UINT32 sendFlag, const mmSockAddr* addr, INT32 tolen); -extern mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, +MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, VOID *recvBuf, mmSize recvLen, UINT32 recvFlag, mmSockAddr* addr, mmSocklen_t *FromLen); -extern INT32 mmSAStartup(); -extern INT32 mmSACleanup(); -extern VOID *mmDlopen(const CHAR *fileName, INT32 mode); -extern INT32 mmDladdr(VOID *addr, mmDlInfo *info); -extern VOID *mmDlsym(VOID *handle, CHAR *funcName); -extern INT32 mmDlclose(VOID *handle); -extern CHAR *mmDlerror(); -extern INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); -extern INT32 mmDeleteTimer(mmTimer timerHandle); -extern INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); -extern INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); -extern INT32 mmFStatGet(INT32 fd, mmStat_t *buffer); -extern INT32 mmMkdir(const CHAR *pathName, mmMode_t mode); -extern INT32 mmSleep(UINT32 milliSecond); - -extern INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock); -extern INT32 mmGetProcessPrio(mmProcess pid); -extern INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio); -extern INT32 mmGetThreadPrio(mmThread *threadHandle); -extern INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio); -extern INT32 mmAccess(const CHAR *pathName); -extern INT32 mmAccess2(const CHAR *pathName, INT32 mode); -extern INT32 mmRmdir(const CHAR *pathName); - -extern INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr); -extern INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout); -extern mmSsize_t mmWritev(mmProcess fd, mmIovSegment *iov, INT32 iovcnt); -extern VOID mmMb(); -extern INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr); - -extern mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag); -extern mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len); -extern mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len); -extern INT32 mmCloseFile(mmProcess fileId); - -extern mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value); -extern mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value); -extern mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value); -extern INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); +MMPA_FUNC_VISIBILITY INT32 mmSAStartup(); +MMPA_FUNC_VISIBILITY INT32 mmSACleanup(); +MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT32 mode); +MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); +MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *funcName); +MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); +MMPA_FUNC_VISIBILITY CHAR *mmDlerror(); +MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); +MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); +MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); +MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); +MMPA_FUNC_VISIBILITY INT32 mmFStatGet(INT32 fd, mmStat_t *buffer); +MMPA_FUNC_VISIBILITY INT32 mmMkdir(const CHAR *pathName, mmMode_t mode); +MMPA_FUNC_VISIBILITY INT32 mmSleep(UINT32 milliSecond); + +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock); +MMPA_FUNC_VISIBILITY INT32 mmGetProcessPrio(mmProcess pid); +MMPA_FUNC_VISIBILITY INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio); +MMPA_FUNC_VISIBILITY INT32 mmGetThreadPrio(mmThread *threadHandle); +MMPA_FUNC_VISIBILITY INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio); +MMPA_FUNC_VISIBILITY INT32 mmAccess(const CHAR *pathName); +MMPA_FUNC_VISIBILITY INT32 mmAccess2(const CHAR *pathName, INT32 mode); +MMPA_FUNC_VISIBILITY INT32 mmRmdir(const CHAR *pathName); + +MMPA_FUNC_VISIBILITY INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr); +MMPA_FUNC_VISIBILITY INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout); +MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmProcess fd, mmIovSegment *iov, INT32 iovcnt); +MMPA_FUNC_VISIBILITY VOID mmMb(); +MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr); + +MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag); +MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len); +MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len); +MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId); + +MMPA_FUNC_VISIBILITY mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value); +MMPA_FUNC_VISIBILITY mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value); +MMPA_FUNC_VISIBILITY mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value); +MMPA_FUNC_VISIBILITY mmAtomicType64 mmSetData64(mmAtomicType64 *ptr, mmAtomicType64 value); +MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicType64 value); +MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value); +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); // The following 3 interfaces are to be deleted -extern INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); -extern INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); -extern VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); +MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); -extern INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -extern INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -extern VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); +MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); // Poll related interface -extern mmCompletionHandle mmCreateCompletionPort(); -extern VOID mmCloseCompletionPort(mmCompletionHandle handle); -extern INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, pmmPollData polledData, - mmPollBack pollBack); -extern INT32 mmGetErrorCode(); -extern CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); -extern INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); -extern mmTimespec mmGetTickCount(); -extern INT32 mmGetRealPath(CHAR *path, CHAR *realPath); -extern INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen); +MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); +MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, + pmmPollData polledData, mmPollBack pollBack); +MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); +MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); +MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); +MMPA_FUNC_VISIBILITY mmTimespec mmGetTickCount(); +MMPA_FUNC_VISIBILITY INT32 mmGetRealPath(CHAR *path, CHAR *realPath); +MMPA_FUNC_VISIBILITY INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen); -extern INT32 mmDup2(INT32 oldFd, INT32 newFd); +MMPA_FUNC_VISIBILITY INT32 mmDup2(INT32 oldFd, INT32 newFd); -extern INT32 mmDup(INT32 fd); +MMPA_FUNC_VISIBILITY INT32 mmDup(INT32 fd); -extern INT32 mmUnlink(const CHAR *filename); +MMPA_FUNC_VISIBILITY INT32 mmUnlink(const CHAR *filename); -extern INT32 mmChmod(const CHAR *filename, INT32 mode); +MMPA_FUNC_VISIBILITY INT32 mmChmod(const CHAR *filename, INT32 mode); -extern INT32 mmFileno(FILE *stream); +MMPA_FUNC_VISIBILITY INT32 mmFileno(FILE *stream); -extern INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort); +MMPA_FUNC_VISIBILITY INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort); +MMPA_FUNC_VISIBILITY INT32 mmScandir2(const CHAR *path, mmDirent2 ***entryList, mmFilter2 filterFunc, mmSort2 sort); -extern VOID mmScandirFree(mmDirent **entryList, INT32 count); +MMPA_FUNC_VISIBILITY VOID mmScandirFree(mmDirent **entryList, INT32 count); +MMPA_FUNC_VISIBILITY VOID mmScandirFree2(mmDirent2 **entryList, INT32 count); -extern mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag); +MMPA_FUNC_VISIBILITY mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag); -extern mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag); +MMPA_FUNC_VISIBILITY mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag); -extern INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); +MMPA_FUNC_VISIBILITY INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); -extern INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); +MMPA_FUNC_VISIBILITY INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); -extern INT32 mmMsgClose(mmMsgid msqid); +MMPA_FUNC_VISIBILITY INT32 mmMsgClose(mmMsgid msqid); -extern INT32 mmLocalTimeR(const time_t *timep, struct tm *result); +MMPA_FUNC_VISIBILITY INT32 mmLocalTimeR(const time_t *timep, struct tm *result); -extern INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); -extern INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, - INT32 *longIndex); +MMPA_FUNC_VISIBILITY INT32 mmGetOptErr(); +MMPA_FUNC_VISIBILITY VOID mmSetOptErr(INT32 mmOptErr); +MMPA_FUNC_VISIBILITY INT32 mmGetOptInd(); +MMPA_FUNC_VISIBILITY VOID mmSetOptInd(INT32 mmOptInd); +MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt(); +MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); +MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); +MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); +MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); +MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, char *const *argv, const char *opts, const mmStructOption *longOpts, + INT32 *longIndex); -extern LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); -extern INT32 mmFtruncate(mmProcess fd, UINT32 length); +MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); +MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); -extern INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *)); -extern INT32 mmTlsSet(mmThreadKey key, const VOID *value); -extern VOID *mmTlsGet(mmThreadKey key); -extern INT32 mmTlsDelete(mmThreadKey key); -extern INT32 mmGetOsType(); +MMPA_FUNC_VISIBILITY INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *)); +MMPA_FUNC_VISIBILITY INT32 mmTlsSet(mmThreadKey key, const VOID *value); +MMPA_FUNC_VISIBILITY VOID *mmTlsGet(mmThreadKey key); +MMPA_FUNC_VISIBILITY INT32 mmTlsDelete(mmThreadKey key); +MMPA_FUNC_VISIBILITY INT32 mmGetOsType(); -extern INT32 mmFsync(mmProcess fd); -extern INT32 mmFsync2(INT32 fd); -extern INT32 mmChdir(const CHAR *path); -extern INT32 mmUmask(INT32 pmode); -extern INT32 mmThreadKill(mmThread id); -extern INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options); +MMPA_FUNC_VISIBILITY INT32 mmFsync(mmProcess fd); +MMPA_FUNC_VISIBILITY INT32 mmFsync2(INT32 fd); +MMPA_FUNC_VISIBILITY INT32 mmChdir(const CHAR *path); +MMPA_FUNC_VISIBILITY INT32 mmUmask(INT32 pmode); +MMPA_FUNC_VISIBILITY INT32 mmThreadKill(mmThread id); +MMPA_FUNC_VISIBILITY INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options); -extern INT32 mmGetCwd(CHAR *buffer, INT32 maxLen); -extern INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len); -extern INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite); -extern CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr); -extern CHAR *mmDirName(CHAR *path); -extern CHAR *mmBaseName(CHAR *path); -extern INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); +MMPA_FUNC_VISIBILITY INT32 mmGetCwd(CHAR *buffer, INT32 maxLen); +MMPA_FUNC_VISIBILITY INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len); +MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite); +MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr); +MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path); +MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path); +MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); /* * Function: set the thread name created by mmcreatetask @@ -467,7 +488,7 @@ extern INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the * execution failure returns EN_ERROR */ -extern INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); +MMPA_FUNC_VISIBILITY INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); /* * Function: get thread name @@ -477,37 +498,38 @@ extern INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the * execution failure returns EN_ERROR */ -extern INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size); +MMPA_FUNC_VISIBILITY INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size); /* * Function:Set the thread name of the currently executing thread - call inside the thread body * Input:name:Thread name to be set * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the * execution failure returns EN_ERROR */ -extern INT32 mmSetCurrentThreadName(const CHAR *name); +MMPA_FUNC_VISIBILITY INT32 mmSetCurrentThreadName(const CHAR *name); /* * Function:Get the thread name of the currently executing thread - in body call * Input:name:The name of the thread to get, and the cache is allocated by the user,size>=MMPA_THREADNAME_SIZE * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the * execution failure returns EN_ERROR */ -extern INT32 mmGetCurrentThreadName(CHAR *name, INT32 size); -extern INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length); -extern INT32 mmIsDir(const CHAR *fileName); -extern INT32 mmGetOsName(CHAR *name, INT32 nameSize); -extern INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength); -extern INT32 mmGetMac(mmMacInfo **list, INT32 *count); -extern INT32 mmGetMacFree(mmMacInfo *list, INT32 count); -extern INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); -extern INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); -extern INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, mmProcess *id); - -extern INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, - const mmThreadAttr *threadAttr); -extern mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); -extern INT32 mmShmUnlink(const CHAR *name); -extern VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); -extern INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); +MMPA_FUNC_VISIBILITY INT32 mmGetCurrentThreadName(CHAR *name, INT32 size); +MMPA_FUNC_VISIBILITY INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length); +MMPA_FUNC_VISIBILITY INT32 mmIsDir(const CHAR *fileName); +MMPA_FUNC_VISIBILITY INT32 mmGetOsName(CHAR *name, INT32 nameSize); +MMPA_FUNC_VISIBILITY INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength); +MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); +MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); +MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); +MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); +MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, + mmProcess *id); + +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, + const mmThreadAttr *threadAttr); +MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); +MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); +MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); +MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); #define MMPA_DLL_API #ifdef __cplusplus diff --git a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h index be8e2bf3..b9415b2e 100644 --- a/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h +++ b/third_party/fwkacllib/inc/mmpa/sub_inc/mmpa_win.h @@ -65,6 +65,7 @@ typedef struct sockaddr mmSockAddr; typedef int mmSocklen_t; typedef int mmSemTimeout_t; typedef long mmAtomicType; +typedef long long mmAtomicType64; typedef DWORD mmExitCode; typedef DWORD mmErrorMsg; typedef int mmKey_t; @@ -76,6 +77,7 @@ typedef INT32 mmSsize_t; typedef int mmSize; // size typedef size_t mmSize_t; typedef VOID mmshmId_ds; +typedef long long MM_LONG; typedef enum { DT_DIR = FILE_ATTRIBUTE_DIRECTORY, @@ -86,8 +88,15 @@ typedef struct { char d_name[MAX_PATH]; // file name } mmDirent; +typedef struct { + unsigned long d_type; + char d_name[MAX_PATH]; // file name +} mmDirent2; + typedef int (*mmFilter)(const mmDirent *entry); +typedef int (*mmFilter2)(const mmDirent2 *entry); typedef int (*mmSort)(const mmDirent **a, const mmDirent **b); +typedef int (*mmSort2)(const mmDirent2 **a, const mmDirent2 **b); typedef struct { VOID *sendBuf; @@ -162,8 +171,8 @@ typedef struct { } mmTimezone; typedef struct { - LONG tv_sec; - LONG tv_nsec; + MM_LONG tv_sec; + MM_LONG tv_nsec; } mmTimespec; typedef mmTimerHandle mmTimer; @@ -283,12 +292,13 @@ typedef VOID (*mmPf)(VOID); #define DT_UNKNOWN 0 #define DT_FIFO 1 #define DT_CHR 2 -#define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 +#define MM_DT_DIR 16 +#define MM_DT_REG 32 #define mmConstructor(x) __declspec(allocate(".CRT$XCU")) mmPf con = x #define mmDestructor(x) __declspec(allocate(".CRT$XPU")) mmPf de = x @@ -327,175 +337,188 @@ typedef VOID (*mmPf)(VOID); #define __attribute__(v) -_declspec(dllexport) INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock); -_declspec(dllexport) INT32 mmJoinTask(mmThread *threadHandle); -_declspec(dllexport) INT32 mmMutexInit(mmMutex_t *mutex); -_declspec(dllexport) INT32 mmMutexLock(mmMutex_t *mutex); -_declspec(dllexport) INT32 mmMutexTryLock(mmMutex_t *mutex); -_declspec(dllexport) INT32 mmMutexUnLock(mmMutex_t *mutex); -_declspec(dllexport) INT32 mmMutexDestroy(mmMutex_t *mutex); -_declspec(dllexport) INT32 mmCondInit(mmCond *cond); -_declspec(dllexport) INT32 mmCondLockInit(mmMutexFC *mutex); -_declspec(dllexport) INT32 mmCondLock(mmMutexFC *mutex); -_declspec(dllexport) INT32 mmCondUnLock(mmMutexFC *mutex); -_declspec(dllexport) INT32 mmCondLockDestroy(mmMutexFC *mutex); -_declspec(dllexport) INT32 mmRWLockInit(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockRDLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockWRLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRDLockUnLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmWRLockUnLock(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmRWLockDestroy(mmRWLock_t *rwLock); -_declspec(dllexport) INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex); -_declspec(dllexport) INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond); - -_declspec(dllexport) INT32 mmCondNotify(mmCond *cond); -_declspec(dllexport) INT32 mmCondNotifyAll(mmCond *cond); -_declspec(dllexport) INT32 mmCondDestroy(mmCond *cond); -_declspec(dllexport) INT32 mmGetPid(VOID); -_declspec(dllexport) INT32 mmGetTid(VOID); -_declspec(dllexport) INT32 mmGetPidHandle(mmProcess *processHandle); -_declspec(dllexport) INT32 mmGetLocalTime(mmSystemTime_t *sysTime); -_declspec(dllexport) INT32 mmGetSystemTime(mmSystemTime_t *sysTime); -_declspec(dllexport) INT32 mmSemInit(mmSem_t *sem, UINT32 value); -_declspec(dllexport) INT32 mmSemWait(mmSem_t *sem); -_declspec(dllexport) INT32 mmSemPost(mmSem_t *sem); -_declspec(dllexport) INT32 mmSemDestroy(mmSem_t *sem); -_declspec(dllexport) INT32 mmOpen(const CHAR *pathName, INT32 flags); -_declspec(dllexport) INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode); -_declspec(dllexport) FILE *mmPopen(CHAR *command, CHAR *type); -_declspec(dllexport) INT32 mmClose(INT32 fd); -_declspec(dllexport) INT32 mmPclose(FILE *stream); -_declspec(dllexport) mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen); -_declspec(dllexport) mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen); -_declspec(dllexport) mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol); -_declspec(dllexport) INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); -_declspec(dllexport) INT32 mmListen(mmSockHandle sockFd, INT32 backLog); -_declspec(dllexport) mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen); -_declspec(dllexport) INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); -_declspec(dllexport) INT32 mmCloseSocket(mmSockHandle sockFd); -_declspec(dllexport) mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); -_declspec(dllexport) mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); -_declspec(dllexport) INT32 mmSocketSendTo(mmSockHandle sockFd, +MMPA_FUNC_VISIBILITY INT32 mmCreateTask(mmThread *threadHandle, mmUserBlock_t *funcBlock); +MMPA_FUNC_VISIBILITY INT32 mmJoinTask(mmThread *threadHandle); +MMPA_FUNC_VISIBILITY INT32 mmMutexInit(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexLock(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexTryLock(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexUnLock(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmMutexDestroy(mmMutex_t *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondInit(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmCondLockInit(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondLock(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondUnLock(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondLockDestroy(mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmRWLockInit(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockRDLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockTryRDLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockWRLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockTryWRLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRDLockUnLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmWRLockUnLock(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmRWLockDestroy(mmRWLock_t *rwLock); +MMPA_FUNC_VISIBILITY INT32 mmCondWait(mmCond *cond, mmMutexFC *mutex); +MMPA_FUNC_VISIBILITY INT32 mmCondTimedWait(mmCond *cond, mmMutexFC *mutex, UINT32 milliSecond); + +MMPA_FUNC_VISIBILITY INT32 mmCondNotify(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmCondNotifyAll(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmCondDestroy(mmCond *cond); +MMPA_FUNC_VISIBILITY INT32 mmGetPid(VOID); +MMPA_FUNC_VISIBILITY INT32 mmGetTid(VOID); +MMPA_FUNC_VISIBILITY INT32 mmGetPidHandle(mmProcess *processHandle); +MMPA_FUNC_VISIBILITY INT32 mmGetLocalTime(mmSystemTime_t *sysTime); +MMPA_FUNC_VISIBILITY INT32 mmGetSystemTime(mmSystemTime_t *sysTime); +MMPA_FUNC_VISIBILITY INT32 mmSemInit(mmSem_t *sem, UINT32 value); +MMPA_FUNC_VISIBILITY INT32 mmSemWait(mmSem_t *sem); +MMPA_FUNC_VISIBILITY INT32 mmSemPost(mmSem_t *sem); +MMPA_FUNC_VISIBILITY INT32 mmSemDestroy(mmSem_t *sem); +MMPA_FUNC_VISIBILITY INT32 mmOpen(const CHAR *pathName, INT32 flags); +MMPA_FUNC_VISIBILITY INT32 mmOpen2(const CHAR *pathName, INT32 flags, MODE mode); +MMPA_FUNC_VISIBILITY FILE *mmPopen(CHAR *command, CHAR *type); +MMPA_FUNC_VISIBILITY INT32 mmClose(INT32 fd); +MMPA_FUNC_VISIBILITY INT32 mmPclose(FILE *stream); +MMPA_FUNC_VISIBILITY mmSsize_t mmWrite(INT32 fd, VOID *buf, UINT32 bufLen); +MMPA_FUNC_VISIBILITY mmSsize_t mmRead(INT32 fd, VOID *buf, UINT32 bufLen); +MMPA_FUNC_VISIBILITY mmSockHandle mmSocket(INT32 sockFamily, INT32 type, INT32 protocol); +MMPA_FUNC_VISIBILITY INT32 mmBind(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); +MMPA_FUNC_VISIBILITY INT32 mmListen(mmSockHandle sockFd, INT32 backLog); +MMPA_FUNC_VISIBILITY mmSockHandle mmAccept(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t *addrLen); +MMPA_FUNC_VISIBILITY INT32 mmConnect(mmSockHandle sockFd, mmSockAddr *addr, mmSocklen_t addrLen); +MMPA_FUNC_VISIBILITY INT32 mmCloseSocket(mmSockHandle sockFd); +MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecv(mmSockHandle sockFd, VOID *recvBuf, INT32 recvLen, INT32 recvFlag); +MMPA_FUNC_VISIBILITY mmSsize_t mmSocketSend(mmSockHandle sockFd, VOID *sendBuf, INT32 sendLen, INT32 sendFlag); +MMPA_FUNC_VISIBILITY INT32 mmSocketSendTo(mmSockHandle sockFd, VOID *sendMsg, INT32 sendLen, UINT32 sendFlag, const mmSockAddr* addr, INT32 tolen); -_declspec(dllexport) mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, +MMPA_FUNC_VISIBILITY mmSsize_t mmSocketRecvFrom(mmSockHandle sockFd, VOID *recvBuf, mmSize recvLen, UINT32 recvFlag, mmSockAddr* addr, mmSocklen_t *FromLen); -_declspec(dllexport) INT32 mmSAStartup(VOID); -_declspec(dllexport) INT32 mmSACleanup(VOID); -_declspec(dllexport) VOID *mmDlopen(const CHAR *fileName, INT mode); -_declspec(dllexport) INT32 mmDladdr(VOID *addr, mmDlInfo *info); -_declspec(dllexport) VOID *mmDlsym(VOID *handle, CHAR *fileName); -_declspec(dllexport) INT32 mmDlclose(VOID *handle); -_declspec(dllexport) CHAR *mmDlerror(VOID); -_declspec(dllexport) INT32 +MMPA_FUNC_VISIBILITY INT32 mmSAStartup(VOID); +MMPA_FUNC_VISIBILITY INT32 mmSACleanup(VOID); +MMPA_FUNC_VISIBILITY VOID *mmDlopen(const CHAR *fileName, INT mode); +MMPA_FUNC_VISIBILITY INT32 mmDladdr(VOID *addr, mmDlInfo *info); +MMPA_FUNC_VISIBILITY VOID *mmDlsym(VOID *handle, const CHAR *fileName); +MMPA_FUNC_VISIBILITY INT32 mmDlclose(VOID *handle); +MMPA_FUNC_VISIBILITY CHAR *mmDlerror(VOID); +MMPA_FUNC_VISIBILITY INT32 mmCreateAndSetTimer(mmTimer *timerHandle, mmUserBlock_t *timerBlock, UINT milliSecond, UINT period); -_declspec(dllexport) INT32 mmDeleteTimer(mmTimer timerHandle); -_declspec(dllexport) INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); -_declspec(dllexport) INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); -_declspec(dllexport) INT32 mmFStatGet(INT32 fd, mmStat_t *buffer); -_declspec(dllexport) INT32 mmMkdir(const CHAR *pathName, mmMode_t mode); -_declspec(dllexport) INT32 mmSleep(UINT32 milliSecond); -_declspec(dllexport) INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock); -_declspec(dllexport) INT32 mmGetProcessPrio(mmProcess pid); -_declspec(dllexport) INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio); -_declspec(dllexport) INT32 mmGetThreadPrio(mmThread *threadHandle); -_declspec(dllexport) INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio); -_declspec(dllexport) INT32 mmAccess(const CHAR *pathName); -_declspec(dllexport) INT32 mmAccess2(const CHAR *pathName, INT32 mode); -_declspec(dllexport) INT32 mmRmdir(const CHAR *pathName); - -_declspec(dllexport) INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr); -_declspec(dllexport) INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout); -_declspec(dllexport) mmSsize_t mmWritev(mmSockHandle fd, mmIovSegment *iov, INT32 iovcnt); -_declspec(dllexport) VOID mmMb(); -_declspec(dllexport) INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr); - -_declspec(dllexport) mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag); -_declspec(dllexport) mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len); -_declspec(dllexport) mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len); -_declspec(dllexport) INT32 mmCloseFile(mmProcess fileId); - -_declspec(dllexport) mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value); -_declspec(dllexport) mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value); -_declspec(dllexport) mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value); -_declspec(dllexport) INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); - -_declspec(dllexport) INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); -_declspec(dllexport) INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); -_declspec(dllexport) VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); - -_declspec(dllexport) INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -_declspec(dllexport) INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); -_declspec(dllexport) VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); - -_declspec(dllexport) mmCompletionHandle mmCreateCompletionPort(); -_declspec(dllexport) VOID mmCloseCompletionPort(mmCompletionHandle handle); -_declspec(dllexport) INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, +MMPA_FUNC_VISIBILITY INT32 mmDeleteTimer(mmTimer timerHandle); +MMPA_FUNC_VISIBILITY INT32 mmStatGet(const CHAR *path, mmStat_t *buffer); +MMPA_FUNC_VISIBILITY INT32 mmStat64Get(const CHAR *path, mmStat64_t *buffer); +MMPA_FUNC_VISIBILITY INT32 mmFStatGet(INT32 fd, mmStat_t *buffer); +MMPA_FUNC_VISIBILITY INT32 mmMkdir(const CHAR *pathName, mmMode_t mode); +MMPA_FUNC_VISIBILITY INT32 mmSleep(UINT32 milliSecond); +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithAttr(mmThread *threadHandle, mmUserBlock_t *funcBlock); +MMPA_FUNC_VISIBILITY INT32 mmGetProcessPrio(mmProcess pid); +MMPA_FUNC_VISIBILITY INT32 mmSetProcessPrio(mmProcess pid, INT32 processPrio); +MMPA_FUNC_VISIBILITY INT32 mmGetThreadPrio(mmThread *threadHandle); +MMPA_FUNC_VISIBILITY INT32 mmSetThreadPrio(mmThread *threadHandle, INT32 threadPrio); +MMPA_FUNC_VISIBILITY INT32 mmAccess(const CHAR *pathName); +MMPA_FUNC_VISIBILITY INT32 mmAccess2(const CHAR *pathName, INT32 mode); +MMPA_FUNC_VISIBILITY INT32 mmRmdir(const CHAR *pathName); + +MMPA_FUNC_VISIBILITY INT32 mmIoctl(mmProcess fd, INT32 ioctlCode, mmIoctlBuf *bufPtr); +MMPA_FUNC_VISIBILITY INT32 mmSemTimedWait(mmSem_t *sem, INT32 timeout); +MMPA_FUNC_VISIBILITY mmSsize_t mmWritev(mmSockHandle fd, mmIovSegment *iov, INT32 iovcnt); +MMPA_FUNC_VISIBILITY VOID mmMb(); +MMPA_FUNC_VISIBILITY INT32 mmInetAton(const CHAR *addrStr, mmInAddr *addr); + +MMPA_FUNC_VISIBILITY mmProcess mmOpenFile(const CHAR *fileName, UINT32 access, mmCreateFlag fileFlag); +MMPA_FUNC_VISIBILITY mmSsize_t mmReadFile(mmProcess fileId, VOID *buffer, INT32 len); +MMPA_FUNC_VISIBILITY mmSsize_t mmWriteFile(mmProcess fileId, VOID *buffer, INT32 len); +MMPA_FUNC_VISIBILITY INT32 mmCloseFile(mmProcess fileId); + +MMPA_FUNC_VISIBILITY mmAtomicType mmSetData(mmAtomicType *ptr, mmAtomicType value); +MMPA_FUNC_VISIBILITY mmAtomicType mmValueInc(mmAtomicType *ptr, mmAtomicType value); +MMPA_FUNC_VISIBILITY mmAtomicType mmValueSub(mmAtomicType *ptr, mmAtomicType value); +MMPA_FUNC_VISIBILITY mmAtomicType64 mmSetData64(mmAtomicType64 *ptr, mmAtomicType64 value); +MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueInc64(mmAtomicType64 *ptr, mmAtomicType64 value); +MMPA_FUNC_VISIBILITY mmAtomicType64 mmValueSub64(mmAtomicType64 *ptr, mmAtomicType64 value); +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithDetach(mmThread *threadHandle, mmUserBlock_t *funcBlock); + +MMPA_FUNC_VISIBILITY INT32 mmCreateNamedPipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenNamePipe(mmPipeHandle pipe[], CHAR *pipeName[], INT32 waitMode); +MMPA_FUNC_VISIBILITY VOID mmCloseNamedPipe(mmPipeHandle namedPipe[]); + +MMPA_FUNC_VISIBILITY INT32 mmCreatePipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY INT32 mmOpenPipe(mmPipeHandle pipe[], CHAR *pipeName[], UINT32 pipeCount, INT32 waitMode); +MMPA_FUNC_VISIBILITY VOID mmClosePipe(mmPipeHandle pipe[], UINT32 pipeCount); + +MMPA_FUNC_VISIBILITY mmCompletionHandle mmCreateCompletionPort(); +MMPA_FUNC_VISIBILITY VOID mmCloseCompletionPort(mmCompletionHandle handle); +MMPA_FUNC_VISIBILITY INT32 mmPoll(mmPollfd *fds, INT32 fdCount, INT32 timeout, mmCompletionHandle handleIOCP, pmmPollData polledData, mmPollBack pollBack); -_declspec(dllexport) INT32 mmGetErrorCode(); -_declspec(dllexport) CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); -_declspec(dllexport) INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); -_declspec(dllexport) mmTimespec mmGetTickCount(); -_declspec(dllexport) INT32 mmGetRealPath(CHAR *path, CHAR *realPath); - -_declspec(dllexport) INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen); - -_declspec(dllexport) INT32 mmDup2(INT32 oldFd, INT32 newFd); -_declspec(dllexport) INT32 mmDup(INT32 fd); -_declspec(dllexport) INT32 mmUnlink(const CHAR *filename); -_declspec(dllexport) INT32 mmChmod(const CHAR *filename, INT32 mode); -_declspec(dllexport) INT32 mmFileno(FILE *stream); -_declspec(dllexport) INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort); -_declspec(dllexport) VOID mmScandirFree(mmDirent **entryList, INT32 count); - -_declspec(dllexport) mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag); -_declspec(dllexport) mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag); -_declspec(dllexport) INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); -_declspec(dllexport) INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); - -_declspec(dllexport) INT32 mmMsgClose(mmMsgid msqid); - -_declspec(dllexport) INT32 mmLocalTimeR(const time_t *timep, struct tm *result); -_declspec(dllexport) INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); -_declspec(dllexport) INT32 +MMPA_FUNC_VISIBILITY INT32 mmGetErrorCode(); +MMPA_FUNC_VISIBILITY CHAR *mmGetErrorFormatMessage(mmErrorMsg errnum, CHAR *buf, mmSize size); +MMPA_FUNC_VISIBILITY INT32 mmGetTimeOfDay(mmTimeval *timeVal, mmTimezone *timeZone); +MMPA_FUNC_VISIBILITY mmTimespec mmGetTickCount(); +MMPA_FUNC_VISIBILITY INT32 mmGetRealPath(CHAR *path, CHAR *realPath); + +MMPA_FUNC_VISIBILITY INT32 mmRealPath(const CHAR *path, CHAR *realPath, INT32 realPathLen); + +MMPA_FUNC_VISIBILITY INT32 mmDup2(INT32 oldFd, INT32 newFd); +MMPA_FUNC_VISIBILITY INT32 mmDup(INT32 fd); +MMPA_FUNC_VISIBILITY INT32 mmUnlink(const CHAR *filename); +MMPA_FUNC_VISIBILITY INT32 mmChmod(const CHAR *filename, INT32 mode); +MMPA_FUNC_VISIBILITY INT32 mmFileno(FILE *stream); +MMPA_FUNC_VISIBILITY INT32 mmScandir(const CHAR *path, mmDirent ***entryList, mmFilter filterFunc, mmSort sort); +MMPA_FUNC_VISIBILITY INT32 mmScandir2(const CHAR *path, mmDirent2 ***entryList, mmFilter2 filterFunc, mmSort2 sort); +MMPA_FUNC_VISIBILITY VOID mmScandirFree(mmDirent **entryList, INT32 count); +MMPA_FUNC_VISIBILITY VOID mmScandirFree2(mmDirent2 **entryList, INT32 count); + +MMPA_FUNC_VISIBILITY mmMsgid mmMsgCreate(mmKey_t key, INT32 msgFlag); +MMPA_FUNC_VISIBILITY mmMsgid mmMsgOpen(mmKey_t key, INT32 msgFlag); +MMPA_FUNC_VISIBILITY INT32 mmMsgRcv(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); +MMPA_FUNC_VISIBILITY INT32 mmMsgSnd(mmMsgid msqid, VOID *buf, INT32 bufLen, INT32 msgFlag); + +MMPA_FUNC_VISIBILITY INT32 mmMsgClose(mmMsgid msqid); + +MMPA_FUNC_VISIBILITY INT32 mmLocalTimeR(const time_t *timep, struct tm *result); +MMPA_FUNC_VISIBILITY INT32 mmGetOptErr(); +MMPA_FUNC_VISIBILITY VOID mmSetOptErr(INT32 mmOptErr); +MMPA_FUNC_VISIBILITY INT32 mmGetOptInd(); +MMPA_FUNC_VISIBILITY VOID mmSetOptInd(INT32 mmOptInd); +MMPA_FUNC_VISIBILITY INT32 mmGetOptOpt(); +MMPA_FUNC_VISIBILITY VOID mmSetOpOpt(INT32 mmOptOpt); +MMPA_FUNC_VISIBILITY CHAR *mmGetOptArg(); +MMPA_FUNC_VISIBILITY VOID mmSetOptArg(CHAR *mmOptArg); +MMPA_FUNC_VISIBILITY INT32 mmGetOpt(INT32 argc, char *const *argv, const char *opts); +MMPA_FUNC_VISIBILITY INT32 mmGetOptLong(INT32 argc, CHAR *const *argv, const CHAR *opts, const mmStructOption *longopts, INT32 *longindex); -_declspec(dllexport) LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); -_declspec(dllexport) INT32 mmFtruncate(mmProcess fd, UINT32 length); +MMPA_FUNC_VISIBILITY LONG mmLseek(INT32 fd, INT64 offset, INT32 seekFlag); +MMPA_FUNC_VISIBILITY INT32 mmFtruncate(mmProcess fd, UINT32 length); -_declspec(dllexport) INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *)); -_declspec(dllexport) INT32 mmTlsSet(mmThreadKey key, const VOID *value); -_declspec(dllexport) VOID *mmTlsGet(mmThreadKey key); -_declspec(dllexport) INT32 mmTlsDelete(mmThreadKey key); -_declspec(dllexport) INT32 mmGetOsType(); +MMPA_FUNC_VISIBILITY INT32 mmTlsCreate(mmThreadKey *key, VOID (*destructor)(VOID *)); +MMPA_FUNC_VISIBILITY INT32 mmTlsSet(mmThreadKey key, const VOID *value); +MMPA_FUNC_VISIBILITY VOID *mmTlsGet(mmThreadKey key); +MMPA_FUNC_VISIBILITY INT32 mmTlsDelete(mmThreadKey key); +MMPA_FUNC_VISIBILITY INT32 mmGetOsType(); -_declspec(dllexport) INT32 mmFsync(mmProcess fd); -_declspec(dllexport) INT32 mmFsync2(INT32 fd); -_declspec(dllexport) INT32 mmChdir(const CHAR *path); -_declspec(dllexport) INT32 mmUmask(INT32 pmode); -_declspec(dllexport) INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options); +MMPA_FUNC_VISIBILITY INT32 mmFsync(mmProcess fd); +MMPA_FUNC_VISIBILITY INT32 mmFsync2(INT32 fd); +MMPA_FUNC_VISIBILITY INT32 mmChdir(const CHAR *path); +MMPA_FUNC_VISIBILITY INT32 mmUmask(INT32 pmode); +MMPA_FUNC_VISIBILITY INT32 mmWaitPid(mmProcess pid, INT32 *status, INT32 options); -_declspec(dllexport) INT32 mmGetCwd(CHAR *buffer, INT32 maxLen); -_declspec(dllexport) CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr); +MMPA_FUNC_VISIBILITY INT32 mmGetCwd(CHAR *buffer, INT32 maxLen); +MMPA_FUNC_VISIBILITY CHAR *mmStrTokR(CHAR *str, const CHAR *delim, CHAR **saveptr); -_declspec(dllexport) INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len); -_declspec(dllexport) INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite); -_declspec(dllexport) CHAR *mmDirName(CHAR *path); -_declspec(dllexport) CHAR *mmBaseName(CHAR *path); -_declspec(dllexport) INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); +MMPA_FUNC_VISIBILITY INT32 mmGetEnv(const CHAR *name, CHAR *value, UINT32 len); +MMPA_FUNC_VISIBILITY INT32 mmSetEnv(const CHAR *name, const CHAR *value, INT32 overwrite); +MMPA_FUNC_VISIBILITY CHAR *mmDirName(CHAR *path); +MMPA_FUNC_VISIBILITY CHAR *mmBaseName(CHAR *path); +MMPA_FUNC_VISIBILITY INT32 mmGetDiskFreeSpace(const char *path, mmDiskSize *diskSize); -_declspec(dllexport) INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); -_declspec(dllexport) INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size); +MMPA_FUNC_VISIBILITY INT32 mmSetThreadName(mmThread *threadHandle, const CHAR *name); +MMPA_FUNC_VISIBILITY INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, INT32 size); /* * Function: set the thread name of the currently executing thread - internal call of thread, which is not supported @@ -504,7 +527,7 @@ _declspec(dllexport) INT32 mmGetThreadName(mmThread *threadHandle, CHAR *name, I * The input parameter error returns EN_INVALID_PARAM, the execution success returns EN_OK, and the * execution failure returns EN_ERROR */ -_declspec(dllexport) INT32 mmSetCurrentThreadName(const CHAR *name); +MMPA_FUNC_VISIBILITY INT32 mmSetCurrentThreadName(const CHAR *name); /* * Function: Get the thread name of the currently executing thread - thread body call, not supported under windows, null @@ -513,25 +536,25 @@ _declspec(dllexport) INT32 mmSetCurrentThreadName(const CHAR *name); * The input parameter error returns EN_INVALID_PARAM, the execution success returns * EN_OK, and the execution failure returns EN_ERROR */ -_declspec(dllexport) INT32 mmGetCurrentThreadName(CHAR *name, INT32 size); - -_declspec(dllexport) INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length); -_declspec(dllexport) INT32 mmIsDir(const CHAR *fileName); -_declspec(dllexport) INT32 mmGetOsName(CHAR *name, INT32 nameSize); -_declspec(dllexport) INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength); -_declspec(dllexport) INT32 mmGetMac(mmMacInfo **list, INT32 *count); -_declspec(dllexport) INT32 mmGetMacFree(mmMacInfo *list, INT32 count); -_declspec(dllexport) INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); -_declspec(dllexport) INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); -_declspec(dllexport) INT32 +MMPA_FUNC_VISIBILITY INT32 mmGetCurrentThreadName(CHAR *name, INT32 size); + +MMPA_FUNC_VISIBILITY INT32 mmGetFileSize(const CHAR *fileName, ULONGLONG *length); +MMPA_FUNC_VISIBILITY INT32 mmIsDir(const CHAR *fileName); +MMPA_FUNC_VISIBILITY INT32 mmGetOsName(CHAR *name, INT32 nameSize); +MMPA_FUNC_VISIBILITY INT32 mmGetOsVersion(CHAR *versionInfo, INT32 versionLength); +MMPA_FUNC_VISIBILITY INT32 mmGetMac(mmMacInfo **list, INT32 *count); +MMPA_FUNC_VISIBILITY INT32 mmGetMacFree(mmMacInfo *list, INT32 count); +MMPA_FUNC_VISIBILITY INT32 mmGetCpuInfo(mmCpuDesc **cpuInfo, INT32 *count); +MMPA_FUNC_VISIBILITY INT32 mmCpuInfoFree(mmCpuDesc *cpuInfo, INT32 count); +MMPA_FUNC_VISIBILITY INT32 mmCreateProcess(const CHAR *fileName, const mmArgvEnv *env, const char *stdoutRedirectFile, mmProcess *id); -_declspec(dllexport) INT32 +MMPA_FUNC_VISIBILITY INT32 mmCreateTaskWithThreadAttr(mmThread *threadHandle, const mmUserBlock_t *funcBlock, const mmThreadAttr *threadAttr); -_declspec(dllexport) mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); -_declspec(dllexport) INT32 mmShmUnlink(const CHAR *name); -_declspec(dllexport) VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); -_declspec(dllexport) INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); +MMPA_FUNC_VISIBILITY mmFileHandle mmShmOpen(const CHAR *name, INT32 oflag, mmMode_t mode); +MMPA_FUNC_VISIBILITY INT32 mmShmUnlink(const CHAR *name); +MMPA_FUNC_VISIBILITY VOID *mmMmap(mmFd_t fd, mmSize_t size, mmOfft_t offset, mmFd_t *extra, INT32 prot, INT32 flags); +MMPA_FUNC_VISIBILITY INT32 mmMunMap(VOID *data, mmSize_t size, mmFd_t *extra); #ifdef __cplusplus #if __cplusplus } diff --git a/third_party/fwkacllib/inc/ops/hcom_ops.h b/third_party/fwkacllib/inc/ops/hcom_ops.h index 1fe9055c..b90b225e 100644 --- a/third_party/fwkacllib/inc/ops/hcom_ops.h +++ b/third_party/fwkacllib/inc/ops/hcom_ops.h @@ -89,6 +89,10 @@ REG_OP(HcomAllReduce) * @par Attributes: * @li root_rank: A required integer identifying the root rank in the op input of this rank will be broadcast to other ranks. + * @li fusion: A required integer identifying if the op need to fusion,the + default value is none fusion + * @li fusion: A required integer identifying the fusion id if para fusion + is set. * @li group: A required string identifying the group name of ranks participating in the op. * @par Outputs: @@ -103,6 +107,8 @@ REG_OP(HcomBroadcast) .DYNAMIC_OUTPUT(y, TensorType({DT_FLOAT, DT_INT32, DT_INT8, DT_INT16, DT_FLOAT16, DT_INT64, DT_UINT64})) .REQUIRED_ATTR(root_rank, Int) .REQUIRED_ATTR(group, String) + .ATTR(fusion, Int, 0) + .ATTR(fusion_id, Int, -1) .ATTR(alpha, Float, 1.0) .ATTR(beta, Float, 0.0) .OP_END_FACTORY_REG(HcomBroadcast) @@ -213,6 +219,14 @@ REG_OP(HcomRemoteRead) .REQUIRED_ATTR(dtype, Type) .OP_END_FACTORY_REG(HcomRemoteRead) +REG_OP(HcomRemoteRefRead) + .INPUT(remote, TensorType({DT_UINT64})) + .INPUT(cache_var, TensorType({DT_UINT64})) + .INPUT(local_offset, TensorType({DT_UINT64})) + .OUTPUT(cache_var, TensorType({DT_UINT64})) + .REQUIRED_ATTR(dtype, Type) + .OP_END_FACTORY_REG(HcomRemoteRefRead) + /** * @brief Performs Remote Write of input tensors * @par Inputs: @@ -225,5 +239,11 @@ REG_OP(HcomRemoteWrite) .INPUT(local, TensorType::ALL()) .OP_END_FACTORY_REG(HcomRemoteWrite) +REG_OP(HcomRemoteScatterWrite) + .INPUT(remote, TensorType({DT_INT64, DT_UINT64})) + .INPUT(local, TensorType::ALL()) + .OPTIONAL_INPUT(local_offset, TensorType({DT_UINT64})) + .OP_END_FACTORY_REG(HcomRemoteScatterWrite) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_HCOM_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h index c0576dd5..35296870 100644 --- a/third_party/fwkacllib/inc/ops/nn_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_calculation_ops.h @@ -778,7 +778,7 @@ REG_OP(Conv2DCompress) * With the format "HWCN" , the data is stored in the order of: [filter_height, * filter_width, in_channels / groups, out_channels]. *@li offsets: A 4D tensor of x-y coordinates offset and mask. With the format -* "NHWC", the data is stored in the order of: [batch, out_height, out_width, +* "NHWC", the data is stored in the order of: [batch, in_height, in_width, * deformable_groups * filter_height * filter_width * 3]. *@li bias: An optional 1D tensor of additive biases to the filter outputs. * The data is stored in the order of: [out_channels]. @@ -789,27 +789,30 @@ REG_OP(Conv2DCompress) | Tensor | x | filter | offsets | bias | y ------------|---------|---------|---------|----------|-------- | Data Type | float16 | float16 | float16 | float16 | float16 + | |---------|---------|---------|----------|-------- + | | float32 | float32 | float32 | float32 | float32 ------------|---------|---------|---------|----------|-------- | Format | NCHW | NCHW | NCHW | ND | NCHW | | NHWC | HWCN | NHWC | | NHWC @endverbatim +* For float32 type, the actual convolution calculation part on the chip is +* based on float16. *\n * *@par Attributes: *@li strides: Required. A list of 4 integers. The stride of the sliding window * for each dimension of input. The dimension order is interpreted according to -* the value of data_format. The N and C dimensions must be set to 1. +* the data format of "x". The N and C dimensions must be set to 1. *@li pads: Required. A list of 4 integers. The number of pixels to add to each * (top, bottom, left, right) side of the input. *@li dilations: Optional. A list of 4 integers. The dilation factor for each -* dimension of input. The dimension order is interpreted according to the value -* of data_format The N and C dimensions must be set to 1. Defaults to +* dimension of input. The dimension order is interpreted according to the data +* format of "x". The N and C dimensions must be set to 1. Defaults to * [1, 1, 1, 1]. *@li groups: Optional. An integer of type int32. The number of blocked * connections from input channels to output channels. In_channels and * out_channels must both be divisible by "groups". Defaults to 1. -*@li data_format: Optional. An optional string from: "NHWC", "NCHW". Specify -* the data format of the input and output data. Defaults to "NHWC". +*@li data_format: Reserved. *@li deformable_groups: Optional. An integer of type int32. The number of * deformable group partitions. In_channels must be divisible by * "deformable_groups". Defaults to 1. @@ -819,8 +822,8 @@ REG_OP(Conv2DCompress) *@verbatim | Name | Field | Scope --------------------|--------|---------------------------- - | Input Image Size | H | [1, 100000 / H(filter)] - | | W | [1, 4096 / W(filter)] + | Input Image Size | H | [1, 100000] + | | W | [1, 4096] --------------------|--------|---------------------------- | Filter Size | H | [1, 255] | | W | [1, 255] @@ -877,11 +880,11 @@ REG_OP(Conv2DCompress) *@li Compatible with the Mmcv operator "deform_conv". */ REG_OP(DeformableConv2D) - .INPUT(x, TensorType({DT_FLOAT16})) - .INPUT(filter, TensorType({DT_FLOAT16})) - .INPUT(offsets, TensorType({DT_FLOAT16})) - .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16})) - .OUTPUT(y, TensorType({DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(filter, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) + .OPTIONAL_INPUT(bias, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .ATTR(dilations, ListInt, {1, 1, 1, 1}) @@ -1400,14 +1403,13 @@ REG_OP(Conv2DTransposeD) .OP_END_FACTORY_REG(Conv2DTransposeD) /** -*@brief In the deformable convolution operator, the original input FeatureMap is expanded to a ksize_y * H * ksize_x *W -*FeatureMap by bilinear interpolation according to the offset offset. +*@brief Computes the deformed convolution output with the expected input *@par Inputs: * Four inputs: - * @li x: A Tensor of type float16 + * @li x: A Tensor of type float16,float32 * @li offsets: A Tensor of type float16,float32.Deformation offset parameter. *@par Required Attributes: - * @li strides: A tuple/list of 2 integers.The stride of the sliding window for + * @li strides: A tuple/list of 4 integers.The stride of the sliding window for * height and width for H/W dimension. * @li pads: A tuple/list of 4 integers.Padding added to each dimension * of the input. @@ -1415,20 +1417,20 @@ REG_OP(Conv2DTransposeD) *@par Attributes: * Three attributes: * @li dilations: A tuple/list of 4 integers, The dilation factor for each dimension - * of input. Defaults to [0, 0, 0, 0] + * of input. Defaults to [1, 1, 1, 1] * @li data_format: An optional string from: "NCHW", "NHWC". Defaults to "NCHW". Specify the data format of the input x. * @li deformable_groups: Specify the c-axis grouping number of input x. *@par Outputs: - * y: A Tensor. A Tensor of type float16. + * y: A Tensor. A Tensor of type float16, float32. */ REG_OP(DeformableOffsets) - .INPUT(x, TensorType({DT_FLOAT16})) - .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT32})) - .OUTPUT(y, TensorType({DT_FLOAT16})) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(offsets, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) .REQUIRED_ATTR(strides, ListInt) .REQUIRED_ATTR(pads, ListInt) .REQUIRED_ATTR(ksize, ListInt) - .ATTR(dilations, ListInt, {0, 0, 0, 0}) + .ATTR(dilations, ListInt, {1, 1, 1, 1}) .ATTR(data_format, String, "NCHW") .ATTR(deformable_groups, Int, 1) .OP_END_FACTORY_REG(DeformableOffsets) diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 290e5880..64e18fc7 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -140,7 +140,8 @@ support "NHWC/NCHW" to "NC1HWC0" and "NC1HWC0" to "NHWC/NCHW" *@par Attributes: *@li src_format: A string source data format, can be "NHWC", "NCHW", "FRACTAL_Zn" etc. -*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc . \n +*@li dst_format: A string target data format, can be "NC1HWC0", "NCHW", "FRACTAL_Zn" etc. +*@li group: A required int32, default value is 1. \n *@par Outputs: *dst: A Tensor dtype of all types. @@ -150,6 +151,7 @@ REG_OP(TransData) .OUTPUT(dst, TensorType::BasicType()) .REQUIRED_ATTR(src_format, String) .REQUIRED_ATTR(dst_format, String) + .ATTR(group, Int, 1) .OP_END_FACTORY_REG(TransData) /** diff --git a/third_party/fwkacllib/inc/register/infer_data_slice_registry.h b/third_party/fwkacllib/inc/register/infer_data_slice_registry.h new file mode 100644 index 00000000..e8623468 --- /dev/null +++ b/third_party/fwkacllib/inc/register/infer_data_slice_registry.h @@ -0,0 +1,45 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef INC_REGISTER_INFER_DATA_SLICE_REGISTRY_H_ +#define INC_REGISTER_INFER_DATA_SLICE_REGISTRY_H_ + +#include "external/graph/ge_error_codes.h" +#include "external/graph/operator.h" + +namespace ge { +using InferDataSliceFunc = std::function; + +class GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY InferDataSliceFuncRegister { + public: + InferDataSliceFuncRegister(const char *operator_type, const InferDataSliceFunc &infer_data_slice_func); + ~InferDataSliceFuncRegister() = default; +}; + +// infer data slice func register +#define IMPLEMT_INFER_DATA_SLICE(op_name, func_name) \ + GE_FUNC_DEV_VISIBILITY GE_FUNC_HOST_VISIBILITY static graphStatus func_name(op::op_name &op) + +#define INFER_DATA_SLICE_FUNC(op_name, x) [&](Operator &v) { return x((op::op_name &)v); } + +#define __INFER_DATA_SLICE_FUNC_REG_IMPL__(op_name, x, n) \ + static const InferDataSliceFuncRegister PASTE(ids_register, n)(#op_name, x) + +#define INFER_DATA_SLICE_FUNC_REG(op_name, x) \ + __INFER_DATA_SLICE_FUNC_REG_IMPL__(op_name, INFER_DATA_SLICE_FUNC(op_name, x), __COUNTER__) +} // namespace ge + +#endif // INC_REGISTER_INFER_DATA_SLICE_REGISTRY_H_ diff --git a/third_party/fwkacllib/inc/runtime/context.h b/third_party/fwkacllib/inc/runtime/context.h index 21296ca2..3346ff75 100644 --- a/third_party/fwkacllib/inc/runtime/context.h +++ b/third_party/fwkacllib/inc/runtime/context.h @@ -139,7 +139,7 @@ RTS_API rtError_t rtSetGroup(int32_t groupId); * @param [in] groupid count * @return RT_ERROR_NONE for ok, errno for failed */ -RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t* groupInfo, uint32_t count); +RTS_API rtError_t rtGetGroupInfo(int32_t groupId, rtGroupInfo_t *groupInfo, uint32_t count); /** * @ingroup diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index af7b16d8..d24af6fa 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -116,13 +116,13 @@ RTS_API rtError_t rtEventGetTimeStamp(uint64_t *time, rtEvent_t event); /** * @ingroup dvrt_event * @brief name an event - * @param [in] event_ event to be named + * @param [in] event event to be named * @param [in] name identification name * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input of event, name * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtNameEvent(rtEvent_t event_, const char *name); +RTS_API rtError_t rtNameEvent(rtEvent_t event, const char *name); /** * @ingroup dvrt_event @@ -132,7 +132,7 @@ RTS_API rtError_t rtNameEvent(rtEvent_t event_, const char *name); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtNotifyCreate(int32_t device_id, rtNotify_t *notify_); +RTS_API rtError_t rtNotifyCreate(int32_t deviceId, rtNotify_t *notify); /** * @ingroup dvrt_event @@ -142,7 +142,7 @@ RTS_API rtError_t rtNotifyCreate(int32_t device_id, rtNotify_t *notify_); * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtNotifyDestroy(rtNotify_t notify_); +RTS_API rtError_t rtNotifyDestroy(rtNotify_t notify); /** * @ingroup dvrt_event @@ -153,7 +153,7 @@ RTS_API rtError_t rtNotifyDestroy(rtNotify_t notify_); * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx */ -RTS_API rtError_t rtNotifyRecord(rtNotify_t notify_, rtStream_t stream_); +RTS_API rtError_t rtNotifyRecord(rtNotify_t notify, rtStream_t stream); /** * @ingroup dvrt_event @@ -164,7 +164,7 @@ RTS_API rtError_t rtNotifyRecord(rtNotify_t notify_, rtStream_t stream_); * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_STREAM_CONTEXT for stream is not in current ctx */ -RTS_API rtError_t rtNotifyWait(rtNotify_t notify_, rtStream_t stream_); +RTS_API rtError_t rtNotifyWait(rtNotify_t notify, rtStream_t stream); /** * @ingroup dvrt_event @@ -174,7 +174,7 @@ RTS_API rtError_t rtNotifyWait(rtNotify_t notify_, rtStream_t stream_); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtNameNotify(rtNotify_t notify_, const char *name); +RTS_API rtError_t rtNameNotify(rtNotify_t notify, const char *name); /** * @ingroup dvrt_event @@ -184,7 +184,7 @@ RTS_API rtError_t rtNameNotify(rtNotify_t notify_, const char *name); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtGetNotifyID(rtNotify_t notify_, uint32_t *notify_id); +RTS_API rtError_t rtGetNotifyID(rtNotify_t notify, uint32_t *notify_id); /** * @ingroup dvrt_event @@ -199,7 +199,7 @@ RTS_API rtError_t rtIpcSetNotifyName(rtNotify_t notify, char *name, uint32_t len /** * @ingroup dvrt_event * @brief Open IPC notify - * @param [in] notify notify to be opened + * @param [out] notify the opened notify * @param [in] name identification name * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 2fd7799d..98862ad4 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -297,7 +297,7 @@ RTS_API rtError_t rtQueryFunctionRegistered(const char *stubName); * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, uint32_t blockDim, void **dumpBaseAddr, - rtStream_t stream_); + rtStream_t stream); /** * @ingroup rt_kernel @@ -309,7 +309,7 @@ RTS_API rtError_t rtKernelConfigDump(uint32_t kind, uint32_t dumpSizePerBlock, u * @param [in] smDesc shared memory description * @param [in] stream associated stream * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream); @@ -325,7 +325,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * * @param [in] stream associated stream * @param [in] flag dump flag * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunchWithFlag(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, rtSmDesc_t *smDesc, rtStream_t stream, uint32_t flags); @@ -387,7 +387,7 @@ typedef void *rtModel_t; * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ - RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr , uint32_t dumpSize, uint32_t flag); + RTS_API rtError_t rtDumpAddrSet(rtModel_t model, void *addr, uint32_t dumpSize, uint32_t flag); /** * @ingroup rt_kernel @@ -456,7 +456,7 @@ RTS_API rtError_t rtKernelConfigTransArg(const void *ptr, uint64_t size, uint32_ * @brief start fusion kernels. * @param [in] stream stream for fusion kernels * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelFusionStart(rtStream_t stream); @@ -465,7 +465,7 @@ RTS_API rtError_t rtKernelFusionStart(rtStream_t stream); * @brief end fusion kernels. * @param [in] stream stream for fusion kernels * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelFusionEnd(rtStream_t stream); @@ -474,7 +474,7 @@ RTS_API rtError_t rtKernelFusionEnd(rtStream_t stream); * @brief set kernelinfo callback * @param [in] callback * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtSetKernelReportCallback(rtKernelReportCallback callBack); @@ -484,7 +484,7 @@ RTS_API rtError_t rtSetKernelReportCallback(rtKernelReportCallback callBack); * @param [in] threadId thread id for stream * @param [in] stream stream for subscribe * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stream); @@ -495,7 +495,7 @@ RTS_API rtError_t rtSubscribeReport(uint64_t threadId, rtStream_t stream); * @param [in] fnData user data * @param [in] stream subscribed stream * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtStream_t stream, bool isBlock); @@ -504,7 +504,7 @@ RTS_API rtError_t rtCallbackLaunch(rtCallback_t callBackFunc, void *fnData, rtSt * @brief process callback report. * @param [in] timeout if timeout=-1, while(1); else timeout * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtProcessReport(int32_t timeout); @@ -514,7 +514,7 @@ RTS_API rtError_t rtProcessReport(int32_t timeout); * @param [in] threadId thread id for stream * @param [in] stream stream for subscribe * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream); @@ -522,7 +522,7 @@ RTS_API rtError_t rtUnSubscribeReport(uint64_t threadId, rtStream_t stream); * @ingroup profiling_base * @brief start online prof. * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum); @@ -530,7 +530,7 @@ RTS_API rtError_t rtStartOnlineProf(rtStream_t stream, uint32_t sampleNum); * @ingroup profiling_base * @brief stop online prof. * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream); @@ -538,9 +538,26 @@ RTS_API rtError_t rtStopOnlineProf(rtStream_t stream); * @ingroup profiling_base * @brief get online prof. * @return RT_ERROR_NONE for ok - * @return RT_ERROR_INVALID_VALUE for error input + * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtGetOnlineProfData(rtStream_t stream, rtProfDataInfo_t *pProfData, uint32_t profDataNum); + +/** + * @ingroup profiling_base + * @brief start mdc profiler. + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtStartMDCProfiler(void **addr, uint32_t length); + +/** + * @ingroup profiling_base + * @brief stop mdc profiler. + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtStopMDCProfiler(void *addr); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/mem.h b/third_party/fwkacllib/inc/runtime/mem.h index 0d9e20ce..cc1dc05d 100644 --- a/third_party/fwkacllib/inc/runtime/mem.h +++ b/third_party/fwkacllib/inc/runtime/mem.h @@ -501,13 +501,13 @@ RTS_API rtError_t rtIpcCloseMemory(const void *ptr); * @ingroup dvrt_mem * @brief HCCL Async memory cpy * @param [in] index sq index - * @param [in] wqe_index moudle index + * @param [in] wqeIndex moudle index * @param [in] stream asynchronized task stream * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input * @return RT_ERROR_DRV_ERR for driver error */ -RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqe_index, rtStream_t stream); +RTS_API rtError_t rtRDMASend(uint32_t index, uint32_t wqeIndex, rtStream_t stream); /** * @ingroup dvrt_mem diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index ab542d89..0d973851 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -144,12 +144,12 @@ RTS_API rtError_t rtNameStream(rtStream_t stream_, const char *name); * @param [in] ptr Determine the address where the value of the true and false branches is located * @param [in] condition switch condition * @param [in] value switch value - * @param [in] true_stream Stream that needs to be activated when the value is non-zero + * @param [in] trueStream Stream that needs to be activated when the value is non-zero * @param [in] stream input stream to init task * @return RT_ERROR_NONE for complete * @return RT_ERROR_INVALID_VALUE for error input */ -RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t true_stream, +RTS_API rtError_t rtStreamSwitch(void *ptr, rtCondition_t condition, int64_t value, rtStream_t trueStream, rtStream_t stream); /**