diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h new file mode 100644 index 00000000..46d934e6 --- /dev/null +++ b/inc/external/hccl/hccl.h @@ -0,0 +1,134 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl.h + * @brief HCCL API + */ + +#ifndef HCCL_H_ +#define HCCL_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief Initialize HCCL. + * + * @param clusterInfo A string identifying the cluster info file path, include file name. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitClusterInfo(const char *clusterInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief Get hccl root info. + * + * @param rootInfo A pointer identifying the hccl root info. + * @return HcclResult + */ +extern HcclResult HcclGetRootInfo(HcclRootInfo *rootInfo); + +/** + * @brief Initialize HCCL with root info. + * + * @param nRanks A integer identifying the rank size of the cluster. + * @param rootInfo A struct identifying the hccl root info. + * @param rank A integer identifying the identify for the rank. + * @param comm A pointer identifying the initialized communication resource. + * @return HcclResult + * @see HcclCommDestroy() + */ +extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *rootInfo, uint32_t rank, HcclComm *comm); + +/** + * @brief AllReduce operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param count An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, + * float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, + HcclComm comm, aclrtStream stream); + +/** + * @brief Broadcast operator. + * + * @param buf A pointer identifying the data address of the operator. + * @param count An integer(u64) identifying the number of the data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param root An integer(u32) identifying the the root rank in the operator. + * @param comm A pointer identifying the communication resource based on + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, + aclrtStream stream); + +/** + * @brief ReduceScatter operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param recvCount An integer(u64) identifying the number of the output data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, + HcclReduceOp op, HcclComm comm, aclrtStream stream); + +/** + * @brief AllGather operator. + * + * @param sendBuf A pointer identifying the input data address of the operator. + * @param recvBuf A pointer identifying the output data address of the operator. + * @param sendCount An integer(u64) identifying the number of the input data. + * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. + * @param comm A pointer identifying the communication resource based on. + * @param stream A pointer identifying the stream information. + * @return HcclResult + */ +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, + aclrtStream stream); + +/** + * @brief Destroy HCCL comm + * + * @param comm A pointer identifying the communication resource targetting + * @return HcclResult + * @see HcclCommInitClusterInfo() + */ +extern HcclResult HcclCommDestroy(HcclComm comm); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h new file mode 100644 index 00000000..0e832396 --- /dev/null +++ b/inc/external/hccl/hccl_types.h @@ -0,0 +1,101 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hccl_types.h + * @brief HCCL data type definition + * + */ + +#ifndef HCCL_TYPES_H_ +#define HCCL_TYPES_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/** + * @brief HCCL functions return value definition + */ +typedef enum { + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ +} HcclResult; + +/** + * @brief handle to HCCL communicator + */ +typedef void *HcclComm; + +/** + * @brief HCCL Reduction opperation + */ +typedef enum { + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ +} HcclReduceOp; + +/** + * @brief HCCL data type + */ +typedef enum { + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ +} HcclDataType; + +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length + +/** + * @brief HCCL root info + */ +typedef struct HcclRootInfoDef { + char internal[HCCL_ROOT_INFO_BYTES]; +} HcclRootInfo; + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc index 545fe66f..8a6e0bdc 100644 --- a/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc +++ b/src/ge/graph/preprocess/insert_op/ge_aipp_op.cc @@ -790,22 +790,24 @@ Status AippOp::AddAttrToAippData(const OpDescPtr &aipp_data_op_desc) { } Status AippOp::AddNodeToGraph(const NodePtr &aipp_node, int64_t max_dynamic_aipp_size) { - static int index = 0; std::vector input_shape_dim(1, max_dynamic_aipp_size); GeShape input_shape(input_shape_dim); // construct input tensor GeTensorDesc input_tensor(input_shape, FORMAT_ND, DT_UINT8); TensorUtils::SetReuseInput(input_tensor, false); TensorUtils::SetSize(input_tensor, max_dynamic_aipp_size); - + GE_CHECK_NOTNULL(aipp_node); const ComputeGraphPtr &graph = aipp_node->GetOwnerComputeGraph(); string node_name; - if (index == 0) { + // First aippdata name should be definite. + if (graph->FindFirstNodeMatchType(AIPPDATA) == nullptr) { + GELOGI("Current graph has no aippdata node, so the name of it must be definite."); node_name = kDynamicAippData; } else { - node_name = string(kDynamicAippData) + "_" + to_string(index); + node_name = string(kDynamicAippData) + "_" + aipp_node->GetName(); } - ++index; + GELOGI("Current add aippdata node name is %s", node_name.c_str()); + // new add aipp_data ops for dynamic aipp param input OpDescPtr op_desc_ptr_data = MakeShared(node_name, AIPPDATA); GE_CHECK_NOTNULL(op_desc_ptr_data); diff --git a/src/ge/host_cpu_engine/proto/task.proto b/src/ge/host_cpu_engine/proto/task.proto deleted file mode 120000 index 36ae4847..00000000 --- a/src/ge/host_cpu_engine/proto/task.proto +++ /dev/null @@ -1 +0,0 @@ -../../proto/task.proto \ No newline at end of file diff --git a/src/ge/host_cpu_engine/proto/task.proto b/src/ge/host_cpu_engine/proto/task.proto new file mode 100644 index 00000000..50ea061b --- /dev/null +++ b/src/ge/host_cpu_engine/proto/task.proto @@ -0,0 +1,170 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package domi; + +message ModelTaskDef { + string version = 1; + + map attr = 9; // Extended field + repeated TaskDef task = 10; + + uint64 memory_size = 11; + uint32 stream_num = 12; + uint32 event_num = 13; + uint64 weight_size = 14; + + repeated bytes op = 15; // input/output opdef in bytes + + uint64 base_addr = 16; // base addr + uint64 weight_addr = 17; // weight addr + uint32 batch_num = 18; +} + + +message TaskDef { + uint32 id = 1; + uint32 type = 2; + + uint32 stream_id = 10; + uint32 event_id = 11; + + KernelDef kernel = 20; + KernelExDef kernel_ex = 21; + KernelHcclDef kernel_hccl = 25; + EventExDef event_ex = 26; + LogTimeStampDef log_timestamp = 28; + + uint32 label_id = 30; + + MemcpyAsyncDef memcpy_async = 31; + StreamSwitchDef stream_switch = 32; + StreamActiveDef stream_active = 33; + bytes private_def = 34; + uint64 ops_kernel_store_ptr = 35; // adjustments to other fields in the future + StreamSwitchNDef stream_switch_n = 36; + + LabelSetDef label_set = 37; + LabelGotoExDef label_goto_ex = 38; + LabelSwitchByIndexDef label_switch_by_index = 39; +} + +message KernelDef { + KernelContext context = 1; + + string stub_func = 10; + uint32 block_dim = 11; + uint32 args_size = 12; + bytes args = 13; + bytes sm_desc = 14; + bytes flowtable = 15; + string so_name = 16; + string kernel_name = 17; + bytes kernel_ext_info = 18; + uint32 kernel_ext_info_size = 19; +} + +message KernelContext { + uint32 kernel_type = 1; + uint32 op_id = 2; // OP type in CCE + uint32 kernel_func_id = 3; + uint32 op_index = 4; // TE/Custom operator + bool is_flowtable = 5; // Identify whether args is a flowtable structure + bytes args_offset = 6; // args offset information + uint32 args_count = 7; // args count + repeated uint32 origin_op_index = 8; +} + + +message KernelExDef { + uint32 flags = 1; + + uint32 op_index = 4; + uint32 args_size = 12; + bytes args = 13; + bytes task_info = 14; // serialized nodeDef, funcDef, inputoutput + uint32 task_info_size = 15; + bytes kernel_ext_info = 16; + uint32 kernel_ext_info_size = 17; +} + + +message KernelHcclDef { + uint32 op_index = 8; + string hccl_type = 9; +} + + +message EventExDef { + uint32 op_index = 1; + uint32 event_type = 2; +} + +message LogTimeStampDef { + uint64 logid = 1; + bool notify = 2; + uint32 flat = 3; +} + +message MemcpyAsyncDef { + uint64 dst = 1; + uint64 dst_max = 2; + uint64 src = 3; + uint64 count = 4; + uint32 kind = 5; + uint32 op_index = 6; +} + +message StreamSwitchDef { + uint32 op_index = 1; + uint32 true_stream_id = 2; + int64 value = 3; + uint64 value_ptr = 4; + uint32 data_type = 5; +} + +message StreamActiveDef { + uint32 op_index = 1; + uint32 active_stream_id = 2; +} + +message StreamSwitchNDef { + uint32 op_index = 1; + uint32 size = 2; + repeated int64 target_value = 3; + repeated uint32 true_stream_id = 4; + uint32 element_size = 5; + uint32 data_type = 6; +} + +message LabelSetDef { + uint32 op_index = 1; + uint32 label_id = 2; + uint32 model_id = 3; +} + +message LabelGotoExDef { + uint32 op_index = 1; + uint32 label_id = 2; + uint32 model_id = 3; +} + +message LabelSwitchByIndexDef { + uint32 op_index = 1; + uint32 label_max = 2; +} diff --git a/third_party/fwkacllib/inc/ops/array_ops.h b/third_party/fwkacllib/inc/ops/array_ops.h index 1af02b05..ef0ce449 100644 --- a/third_party/fwkacllib/inc/ops/array_ops.h +++ b/third_party/fwkacllib/inc/ops/array_ops.h @@ -889,14 +889,29 @@ REG_OP(ReadVariableOp) .ATTR(dtype, Int, DT_INT32) .OP_END_FACTORY_REG(ReadVariableOp) +/** +*@brief Mark outputs of one sub graph which partitioned by engine type. + +*@par Inputs: +*x: A tensor. \n + +*@par Outputs: +*y: A tensor. \n + +*@par Attributes: +*@li peerIndex: The index of the corresponding 'placeholder' node it's connected to. +*@li parentOpType: Op type of original node. + +*@par Restrictions: +*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use. +*/ REG_OP(End) .INPUT(x, TensorType::ALL()) .OUTPUT(y, TensorType::ALL()) - .ATTR(peerIndex, Int, 0) // the index of the corresponding 'placeholder' node it's connected to - .ATTR(parentOpType, String, "") // op type of original node + .ATTR(peerIndex, Int, 0) + .ATTR(parentOpType, String, "") .OP_END_FACTORY_REG(End) - /** *@brief Operations for writing summary data, for use in analysis and visualization.