diff --git a/inc/external/acl/acl_base.h b/inc/external/acl/acl_base.h index 165b0812..657bf67c 100644 --- a/inc/external/acl/acl_base.h +++ b/inc/external/acl/acl_base.h @@ -174,6 +174,12 @@ typedef enum { ACL_ERROR = 3, } aclLogLevel; +typedef enum { + ACL_MEMTYPE_DEVICE = 0, + ACL_MEMTYPE_HOST = 1, +} aclMemType; + + /** * @ingroup AscendCL * @brief Converts data of type aclFloat16 to data of type float @@ -594,6 +600,18 @@ ACL_FUNC_VISIBILITY aclError aclSetTensorDynamicInput(aclTensorDesc *desc, const */ ACL_FUNC_VISIBILITY aclError aclSetTensorConst(aclTensorDesc *desc, void *dataBuffer, size_t length); +/** + * @ingroup AscendCL + * @brief Set tensor memory type specified by the tensor description + * + * @param desc [OUT] pointer to the instance of aclTensorDesc + * @param memType [IN] ACL_MEMTYPE_DEVICE means device, ACL_MEMTYPE_HOST means host + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclSetTensorPlaceMent(aclTensorDesc *desc, aclMemType memType); + /** * @ingroup AscendCL * @brief an interface for users to output APP logs diff --git a/inc/external/acl/acl_mdl.h b/inc/external/acl/acl_mdl.h index 4f3e257f..3a62ce32 100644 --- a/inc/external/acl/acl_mdl.h +++ b/inc/external/acl/acl_mdl.h @@ -1203,6 +1203,18 @@ ACL_FUNC_VISIBILITY aclError aclmdlDestroyConfigHandle(aclmdlConfigHandle *handl ACL_FUNC_VISIBILITY aclError aclmdlSetConfigOpt(aclmdlConfigHandle *handle, aclmdlConfigAttr attr, const void *attrValue, size_t valueSize); +/** + * @ingroup AscendCL + * @brief get real tensor name from modelDesc + * + * @param modelDesc [IN] pointer to modelDesc + * @param name [IN] tensor name + * + * @retval the pointer of real tensor name + * @retval Failure return NULL + */ +ACL_FUNC_VISIBILITY const char *aclmdlGetTensorRealName(const aclmdlDesc *modelDesc, const char *name); + #ifdef __cplusplus } #endif diff --git a/inc/external/acl/acl_rt.h b/inc/external/acl/acl_rt.h index 8fb7f05a..703ca4ca 100644 --- a/inc/external/acl/acl_rt.h +++ b/inc/external/acl/acl_rt.h @@ -25,6 +25,8 @@ extern "C" { #endif +#define ACL_EVENT_TIME_LINE 0x00000008u + typedef enum aclrtRunMode { ACL_DEVICE, ACL_HOST, @@ -425,6 +427,18 @@ ACL_FUNC_VISIBILITY aclError aclrtGetDeviceCount(uint32_t *count); */ ACL_FUNC_VISIBILITY aclError aclrtCreateEvent(aclrtEvent *event); +/** + * @ingroup AscendCL + * @brief create event instance with flag + * + * @param event [OUT] created event + * @param flag [IN] event flag + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError aclrtCreateEventWithFlag(aclrtEvent *event, uint32_t flag); + /** * @ingroup AscendCL * @brief destroy event instance diff --git a/inc/external/hccl/hccl.h b/inc/external/hccl/hccl.h index 46d934e6..311e78f2 100644 --- a/inc/external/hccl/hccl.h +++ b/inc/external/hccl/hccl.h @@ -27,7 +27,7 @@ #ifdef __cplusplus extern "C" { -#endif // __cplusplus +#endif // __cplusplus /** * @brief Initialize HCCL. @@ -66,15 +66,14 @@ extern HcclResult HcclCommInitRootInfo(uint32_t nRanks, const HcclRootInfo *root * @param sendBuf A pointer identifying the input data address of the operator. * @param recvBuf A pointer identifying the output data address of the operator. * @param count An integer(u64) identifying the number of the output data. - * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, - * float32. + * @param dataType The data type of the operator, must be one of the following types: int8, int16, int32, float16, float32. * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, - HcclComm comm, aclrtStream stream); +extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); /** * @brief Broadcast operator. @@ -85,10 +84,10 @@ extern HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, Hc * @param root An integer(u32) identifying the the root rank in the operator. * @param comm A pointer identifying the communication resource based on * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, - aclrtStream stream); +extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, HcclComm comm, +aclrtStream stream); /** * @brief ReduceScatter operator. @@ -100,10 +99,10 @@ extern HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType * @param op The reduction type of the operator, must be one of the following types: sum, min, max, prod. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, - HcclReduceOp op, HcclComm comm, aclrtStream stream); +extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvCount, HcclDataType dataType, +HcclReduceOp op, HcclComm comm, aclrtStream stream); /** * @brief AllGather operator. @@ -114,10 +113,10 @@ extern HcclResult HcclReduceScatter(void *sendBuf, void *recvBuf, uint64_t recvC * @param dataType The data type of the operator, must be one of the following types: int8, int32, float16, float32. * @param comm A pointer identifying the communication resource based on. * @param stream A pointer identifying the stream information. - * @return HcclResult + * @return HcclResult */ -extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, HcclComm comm, - aclrtStream stream); +extern HcclResult HcclAllGather(void *sendBuf, void *recvBuf, uint64_t sendCount, HcclDataType dataType, +HcclComm comm, aclrtStream stream); /** * @brief Destroy HCCL comm @@ -130,5 +129,5 @@ extern HcclResult HcclCommDestroy(HcclComm comm); #ifdef __cplusplus } -#endif // __cplusplus -#endif // HCCL_H_ +#endif // __cplusplus +#endif // HCCL_H_ diff --git a/inc/external/hccl/hccl_types.h b/inc/external/hccl/hccl_types.h index 0e832396..50a64795 100644 --- a/inc/external/hccl/hccl_types.h +++ b/inc/external/hccl/hccl_types.h @@ -16,10 +16,10 @@ /** * @file hccl_types.h - * @brief HCCL data type definition - * + * @brief HCCL data type definition + * */ - + #ifndef HCCL_TYPES_H_ #define HCCL_TYPES_H_ @@ -27,33 +27,33 @@ #ifdef __cplusplus extern "C" { -#endif // __cplusplus +#endif // __cplusplus /** * @brief HCCL functions return value definition */ typedef enum { - HCCL_SUCCESS = 0, /**< success */ - HCCL_E_PARA = 1, /**< parameter error */ - HCCL_E_PTR = 2, /**< empty pointer */ - HCCL_E_MEMORY = 3, /**< memory error */ - HCCL_E_INTERNAL = 4, /**< internal error */ - HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ - HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ - HCCL_E_UNAVAIL = 7, /**< resource unavailable */ - HCCL_E_SYSCALL = 8, /**< call system interface error */ - HCCL_E_TIMEOUT = 9, /**< timeout */ - HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ - HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ - HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ - HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ - HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ - HCCL_E_RUNTIME = 15, /**< call runtime api fail */ - HCCL_E_DRV = 16, /**< call driver api fail */ - HCCL_E_PROFILING = 17, /**< call profiling api fail */ - HCCL_E_CCE = 18, /**< call cce api fail */ - HCCL_E_NETWORK = 19, /**< call network api fail */ - HCCL_E_RESERVED /**< reserved */ + HCCL_SUCCESS = 0, /**< success */ + HCCL_E_PARA = 1, /**< parameter error */ + HCCL_E_PTR = 2, /**< empty pointer */ + HCCL_E_MEMORY = 3, /**< memory error */ + HCCL_E_INTERNAL = 4, /**< internal error */ + HCCL_E_NOT_SUPPORT = 5, /**< not support feature */ + HCCL_E_NOT_FOUND = 6, /**< not found specific resource */ + HCCL_E_UNAVAIL = 7, /**< resource unavailable */ + HCCL_E_SYSCALL = 8, /**< call system interface error */ + HCCL_E_TIMEOUT = 9, /**< timeout */ + HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */ + HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */ + HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */ + HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */ + HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */ + HCCL_E_RUNTIME = 15, /**< call runtime api fail */ + HCCL_E_DRV = 16, /**< call driver api fail */ + HCCL_E_PROFILING = 17, /**< call profiling api fail */ + HCCL_E_CCE = 18, /**< call cce api fail */ + HCCL_E_NETWORK = 19, /**< call network api fail */ + HCCL_E_RESERVED /**< reserved */ } HcclResult; /** @@ -65,37 +65,37 @@ typedef void *HcclComm; * @brief HCCL Reduction opperation */ typedef enum { - HCCL_REDUCE_SUM = 0, /**< sum */ - HCCL_REDUCE_PROD = 1, /**< prod */ - HCCL_REDUCE_MAX = 2, /**< max */ - HCCL_REDUCE_MIN = 3, /**< min */ - HCCL_REDUCE_RESERVED /**< reserved */ + HCCL_REDUCE_SUM = 0, /**< sum */ + HCCL_REDUCE_PROD = 1, /**< prod */ + HCCL_REDUCE_MAX = 2, /**< max */ + HCCL_REDUCE_MIN = 3, /**< min */ + HCCL_REDUCE_RESERVED /**< reserved */ } HcclReduceOp; /** * @brief HCCL data type */ typedef enum { - HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ - HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ - HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ - HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ - HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ - HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ - HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ - HCCL_DATA_TYPE_RESERVED /**< reserved */ + HCCL_DATA_TYPE_INT8 = 0, /**< int8 */ + HCCL_DATA_TYPE_INT16 = 1, /**< int16 */ + HCCL_DATA_TYPE_INT32 = 2, /**< int32 */ + HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */ + HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */ + HCCL_DATA_TYPE_INT64 = 5, /**< int64 */ + HCCL_DATA_TYPE_UINT64 = 6, /**< uint64 */ + HCCL_DATA_TYPE_RESERVED /**< reserved */ } HcclDataType; -const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length +const uint32_t HCCL_ROOT_INFO_BYTES = 4108; // 4108: root info length /** * @brief HCCL root info */ typedef struct HcclRootInfoDef { - char internal[HCCL_ROOT_INFO_BYTES]; + char internal[HCCL_ROOT_INFO_BYTES]; } HcclRootInfo; #ifdef __cplusplus } -#endif // __cplusplus -#endif // HCCL_TYPES_H_ +#endif // __cplusplus +#endif // HCCL_TYPES_H_ diff --git a/inc/external/runtime/rt_error_codes.h b/inc/external/runtime/rt_error_codes.h index 2109fb79..47f16d9f 100644 --- a/inc/external/runtime/rt_error_codes.h +++ b/inc/external/runtime/rt_error_codes.h @@ -23,80 +23,80 @@ extern "C" { #endif -static const int32_t ACL_RT_SUCCESS = 0; // success +static const int32_t ACL_RT_SUCCESS = 0; // success -static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid -static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id -static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null -static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context -static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context -static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid -static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal -static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned -static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed -static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed -static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream -static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread -static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set -static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create -static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream -static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type -static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle -static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type +static const int32_t ACL_ERROR_RT_PARAM_INVALID = 107000; // param invalid +static const int32_t ACL_ERROR_RT_INVALID_DEVICEID = 107001; // invalid device id +static const int32_t ACL_ERROR_RT_CONTEXT_NULL = 107002; // current context null +static const int32_t ACL_ERROR_RT_STREAM_CONTEXT = 107003; // stream not in current context +static const int32_t ACL_ERROR_RT_MODEL_CONTEXT = 107004; // model not in current context +static const int32_t ACL_ERROR_RT_STREAM_MODEL = 107005; // stream not in model +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_INVALID = 107006; // event timestamp invalid +static const int32_t ACL_ERROR_RT_EVENT_TIMESTAMP_REVERSAL = 107007; // event timestamp reversal +static const int32_t ACL_ERROR_RT_ADDR_UNALIGNED = 107008; // memory address unaligned +static const int32_t ACL_ERROR_RT_FILE_OPEN = 107009; // open file failed +static const int32_t ACL_ERROR_RT_FILE_WRITE = 107010; // write file failed +static const int32_t ACL_ERROR_RT_STREAM_SUBSCRIBE = 107011; // error subscribe stream +static const int32_t ACL_ERROR_RT_THREAD_SUBSCRIBE = 107012; // error subscribe thread +static const int32_t ACL_ERROR_RT_GROUP_NOT_SET = 107013; // group not set +static const int32_t ACL_ERROR_RT_GROUP_NOT_CREATE = 107014; // group not create +static const int32_t ACL_ERROR_RT_STREAM_NO_CB_REG = 107015; // callback not register to stream +static const int32_t ACL_ERROR_RT_INVALID_MEMORY_TYPE = 107016; // invalid memory type +static const int32_t ACL_ERROR_RT_INVALID_HANDLE = 107017; // invalid handle +static const int32_t ACL_ERROR_RT_INVALID_MALLOC_TYPE = 107018; // invalid malloc type -static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support -static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error -static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error -static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow -static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device -static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail -static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission -static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource -static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource -static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource -static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource +static const int32_t ACL_ERROR_RT_FEATURE_NOT_SUPPORT = 207000; // feature not support +static const int32_t ACL_ERROR_RT_MEMORY_ALLOCATION = 207001; // memory allocation error +static const int32_t ACL_ERROR_RT_MEMORY_FREE = 207002; // memory free error +static const int32_t ACL_ERROR_RT_AICORE_OVER_FLOW = 207003; // aicore over flow +static const int32_t ACL_ERROR_RT_NO_DEVICE = 207004; // no device +static const int32_t ACL_ERROR_RT_RESOURCE_ALLOC_FAIL = 207005; // resource alloc fail +static const int32_t ACL_ERROR_RT_NO_PERMISSION = 207006; // no permission +static const int32_t ACL_ERROR_RT_NO_EVENT_RESOURCE = 207007; // no event resource +static const int32_t ACL_ERROR_RT_NO_STREAM_RESOURCE = 207008; // no stream resource +static const int32_t ACL_ERROR_RT_NO_NOTIFY_RESOURCE = 207009; // no notify resource +static const int32_t ACL_ERROR_RT_NO_MODEL_RESOURCE = 207010; // no model resource -static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error -static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error -static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream -static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream -static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete -static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence -static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete -static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error -static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error -static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support -static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat -static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed -static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout -static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error -static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout -static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception -static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception -static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout -static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception -static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error -static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error -static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error -static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error -static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal -static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering -static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init -static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data -static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error -static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate -static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed -static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed -static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context -static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out -static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error +static const int32_t ACL_ERROR_RT_INTERNAL_ERROR = 507000; // runtime internal error +static const int32_t ACL_ERROR_RT_TS_ERROR = 507001; // ts internel error +static const int32_t ACL_ERROR_RT_STREAM_TASK_FULL = 507002; // task full in stream +static const int32_t ACL_ERROR_RT_STREAM_TASK_EMPTY = 507003; // task empty in stream +static const int32_t ACL_ERROR_RT_STREAM_NOT_COMPLETE = 507004; // stream not complete +static const int32_t ACL_ERROR_RT_END_OF_SEQUENCE = 507005; // end of sequence +static const int32_t ACL_ERROR_RT_EVENT_NOT_COMPLETE = 507006; // event not complete +static const int32_t ACL_ERROR_RT_CONTEXT_RELEASE_ERROR = 507007; // context release error +static const int32_t ACL_ERROR_RT_SOC_VERSION = 507008; // soc version error +static const int32_t ACL_ERROR_RT_TASK_TYPE_NOT_SUPPORT = 507009; // task type not support +static const int32_t ACL_ERROR_RT_LOST_HEARTBEAT = 507010; // ts lost heartbeat +static const int32_t ACL_ERROR_RT_MODEL_EXECUTE = 507011; // model execute failed +static const int32_t ACL_ERROR_RT_REPORT_TIMEOUT = 507012; // report timeout +static const int32_t ACL_ERROR_RT_SYS_DMA = 507013; // sys dma error +static const int32_t ACL_ERROR_RT_AICORE_TIMEOUT = 507014; // aicore timeout +static const int32_t ACL_ERROR_RT_AICORE_EXCEPTION = 507015; // aicore exception +static const int32_t ACL_ERROR_RT_AICORE_TRAP_EXCEPTION = 507016; // aicore trap exception +static const int32_t ACL_ERROR_RT_AICPU_TIMEOUT = 507017; // aicpu timeout +static const int32_t ACL_ERROR_RT_AICPU_EXCEPTION = 507018; // aicpu exception +static const int32_t ACL_ERROR_RT_AICPU_DATADUMP_RSP_ERR = 507019; // aicpu datadump response error +static const int32_t ACL_ERROR_RT_AICPU_MODEL_RSP_ERR = 507020; // aicpu model operate response error +static const int32_t ACL_ERROR_RT_PROFILING_ERROR = 507021; // profiling error +static const int32_t ACL_ERROR_RT_IPC_ERROR = 507022; // ipc error +static const int32_t ACL_ERROR_RT_MODEL_ABORT_NORMAL = 507023; // model abort normal +static const int32_t ACL_ERROR_RT_KERNEL_UNREGISTERING = 507024; // kernel unregistering +static const int32_t ACL_ERROR_RT_RINGBUFFER_NOT_INIT = 507025; // ringbuffer not init +static const int32_t ACL_ERROR_RT_RINGBUFFER_NO_DATA = 507026; // ringbuffer no data +static const int32_t ACL_ERROR_RT_KERNEL_LOOKUP = 507027; // kernel lookup error +static const int32_t ACL_ERROR_RT_KERNEL_DUPLICATE = 507028; // kernel register duplicate +static const int32_t ACL_ERROR_RT_DEBUG_REGISTER_FAIL = 507029; // debug register failed +static const int32_t ACL_ERROR_RT_DEBUG_UNREGISTER_FAIL = 507030; // debug unregister failed +static const int32_t ACL_ERROR_RT_LABEL_CONTEXT = 507031; // label not in current context +static const int32_t ACL_ERROR_RT_PROGRAM_USE_OUT = 507032; // program register num use out +static const int32_t ACL_ERROR_RT_DEV_SETUP_ERROR = 507033; // device setup error -static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error -static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error +static const int32_t ACL_ERROR_RT_DRV_INTERNAL_ERROR = 507899; // drv internal error +static const int32_t ACL_ERROR_RT_AICPU_INTERNAL_ERROR = 507900; // aicpu internal error #ifdef __cplusplus } #endif -#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ +#endif // __INC_EXTERNEL_RT_ERROR_CODES_H__ diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index bc35f4ce..6207c917 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -3781,6 +3781,32 @@ REG_OP(ArgMaxGradD) .REQUIRED_ATTR(dimension, Int) .OP_END_FACTORY_REG(ArgMaxGradD) +/** +*@brief Returns cosine similarity between x1 and x2,computed along dim. \n + +*@par Inputs: +*Two inputs, including: +* @li input_x1: A tensor. Must be the following types: +* float32. \n + +*@par Inputs: +*@li input_x2: A tensor. Must of the following types: +* float32. \n + +*@par Outputs: +*@li output_y: A Tensor with the same type of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator CosineSimilarity. \n +*/ +REG_OP(CosineSimilarity) + .INPUT(input_x1, TensorType({DT_FLOAT})) /* "First operand." */ + .INPUT(input_x2, TensorType({DT_FLOAT})) /* "Second operand." */ + .OUTPUT(output_y, TensorType({DT_FLOAT})) /* "Result, has same element type as two inputs" */ + .ATTR(dim, Int, 1) + .ATTR(eps, Float, 1e-8) + .OP_END_FACTORY_REG(CosineSimilarity) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_ELEWISE_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index 50d058ba..bd30e50c 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -952,6 +952,36 @@ REG_OP(Angle) .ATTR(Tout, Type, DT_FLOAT) .OP_END_FACTORY_REG(Angle) +/** +*@brief Computes the gradient of SoftMarginLossGrad. \n + +*@par Inputs: +*Three inputs, including: +* @li predict: A tensor. Must be one of the following types: +* float16, float32. \n +* @li label: A tensor with same shape of predict. Must be one of the following types: +* float16, float32. \n +* @li dout: A tensor with same shpae of predcit. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +* @li reduction: Specifies the reduction to apply to the output: +* 'none' | 'mean' | 'sum'. Default: 'mean'. \n + +*@par Outputs: +* gradient: A Tensor with the same type of predict. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator SoftMarginLoss Backward. \n +*/ +REG_OP(SoftMarginLossGrad) + .INPUT(predict, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(label, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(dout, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(gradient, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(reduction, String, "mean") + .OP_END_FACTORY_REG(SoftMarginLossGrad) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 00e2020f..041aa765 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -1205,6 +1205,34 @@ REG_OP(Centralization) .ATTR(axes, ListInt, {-1}) .OP_END_FACTORY_REG(Centralization) +/** + *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification + logistic loss between input_x and input_y (containing 1 or -1). \n + + *@par Inputs: + *One inputs, including: + * @li input_x: A tensor. Must be one of the following types: + * float16, float32. \n + * @li input_y: A tensor. Must be one of the following types: + * float16, float32. \n + + *@par Attributes: + *@li lambd: An optional string.Defaults to "mean". \n + + *@par Outputs: + *output_z: while reduction == "none", A Tensor with the same type and shape of input_x's. \n + * while reduction == "sum" or "mean", A Tensor with the same type of input_x , shape of which is (1,) + + *@par Third-party framework compatibility + *Compatible with the Pytorch operator SoftMarginLoss. \n + */ +REG_OP(SoftMarginLoss) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(input_y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(reduction, String, "mean") + .OUTPUT(output_z, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(SoftMarginLoss) + /** * @brief Computes gradients of sigmoid_cross_entropy_with_logits_v2. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index a225bb5f..a911fa51 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -792,6 +792,34 @@ REG_OP(HardShrink) .ATTR(lambd, Float, 0.5) .OP_END_FACTORY_REG(HardShrink) +/** +*@brief Calculate the hard shrink grad function. \n +* +* Computes the gradient for the HardShrink: if x > lambda or x < -lambda, x,otherwise 0 +* +*@par Inputs: +*Two inputs, including: +* @li gradients: A tensor. Must be one of the following types: +* float16, float32. \n +* @li features: A tensor. Must be one of the following types: +* float16, float32. \n +* +*@par Outputs: +*backprops: A Tensor with the same type and shape of features's. \n +* +*@par Attributes: +*@li lambda: An optional float.Defaults to 0.5. \n +* +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Hardshrink_backward. \n +*/ + REG_OP(HardShrinkGrad) + .INPUT(gradients, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(backprops, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(lambda, Float, 0.5) + .OP_END_FACTORY_REG(HardShrinkGrad) + /** * @brief Calculate the hard sigmoid function. \n @@ -884,6 +912,36 @@ REG_OP(LogSigmoid) .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) /* "input:x" */ .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) /* "output:y" */ .OP_END_FACTORY_REG(LogSigmoid) + +/** +*@brief Calculate the backward outputs of the function "hard_sigmoid" \n + +*@par Inputs: +*One inputs, including: +* @li grads: A tensor. Must be one of the following types: +* float16, float32. \n +* @li input_x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Outputs: +*One outputs, including: +* @li y: A tensor with the same type and shape of x's. \n + +* @par Attributes: +* @li alpha: An optional float. Defaults to 0.16666666. \n +* @li beta: An optional float. Defaults to 0.5. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator LogSigmoidGrad. \n +*/ +REG_OP(HardSigmoidGrad) + .INPUT(grads, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .ATTR(alpha, Float, 0.16666666) + .ATTR(beta, Float, 0.5) + .OP_END_FACTORY_REG(HardSigmoidGrad) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NONLINEAR_FUC_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/transformation_ops.h b/third_party/fwkacllib/inc/ops/transformation_ops.h index 42c7745a..32baf56c 100644 --- a/third_party/fwkacllib/inc/ops/transformation_ops.h +++ b/third_party/fwkacllib/inc/ops/transformation_ops.h @@ -737,14 +737,51 @@ where ho/wo is do = (output_d + 2*padding_d - dilation_d*(kernel_d - 1) - 1)//st * Compatible with Pytorch col2im/im2col_backward operator. */ REG_OP(Col2im) - .INPUT(x, TensorType({DT_FLOAT})) - .INPUT(output_size, TensorType({DT_INT32})) - .OUTPUT(y, TensorType({DT_FLOAT})) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) + .INPUT(output_size, TensorType({DT_INT32, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) .REQUIRED_ATTR(kernel_size, ListInt) .REQUIRED_ATTR(dilation, ListInt) .REQUIRED_ATTR(padding, ListInt) .REQUIRED_ATTR(stride, ListInt) .OP_END_FACTORY_REG(Col2im) + +/** +*@brief Generates a 2D or 3D flow field (sampling grid), given a batch of affine +matrices theta. \n + +*@par Inputs: +*Input theta must be float16 or float, output_size must be int32 type.Inputs +include: +*@li theta: input batch of affine matrices with shape (N,2,3) for 2D or (N,3,4) +for 3D +*@li output_size: the target output image size. (N×C×H×W for 2D or N×C×D×H×W for +3D) Example: torch.Size((32, 3, 24, 24)) . \n + + +*@par Attributes: +*align_corners: if True, consider -1 and 1 to refer to the centers of the corner +pixels rather than the image corners.Refer to grid_sample() for a more complete +description. A grid generated by affine_grid() should be passed to grid_sample() +with the same setting for this option. Default: False \n + +*@par Outputs: +*@li y: A 2-D integer tensor of shape [M] representing the +selected indices from the boxes tensor, where M <= max_output_size. \n + +*@attention Constraints: +*Input theta must be float16 or float, output_size must be int32 type . \n + +*@par Third-party framework compatibility +*Compatible with Pytorch affine_grid operator. +*/ + +REG_OP(AffineGrid) + .INPUT(theta, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(output_size, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(AffineGrid) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_TRANSFORMATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/runtime/event.h b/third_party/fwkacllib/inc/runtime/event.h index 41e611ea..d4219559 100644 --- a/third_party/fwkacllib/inc/runtime/event.h +++ b/third_party/fwkacllib/inc/runtime/event.h @@ -30,6 +30,11 @@ extern "C" { #define RT_EVENT_DEFAULT (0x00) #define RT_EVENT_WITH_FLAG (0x01) +#define RT_EVENT_DDSYNC_NS 0x01U +#define RT_EVENT_STREAM_MARK 0x02U +#define RT_EVENT_DDSYNC 0x04U +#define RT_EVENT_TIME_LINE 0x08U + /** * @ingroup dvrt_event * @brief create event instance diff --git a/third_party/fwkacllib/inc/toolchain/slog.h b/third_party/fwkacllib/inc/toolchain/slog.h index 7c4f7be2..c6be6b79 100644 --- a/third_party/fwkacllib/inc/toolchain/slog.h +++ b/third_party/fwkacllib/inc/toolchain/slog.h @@ -141,7 +141,7 @@ enum { IDEDD, /**< IDE daemon device */ IDEDH, /**< IDE daemon host */ HCCL, /**< HCCL */ - FMK, /**< Framework */ + FMK, /**< Adapter */ HIAIENGINE, /**< Matrix */ DVPP, /**< DVPP */ RUNTIME, /**< Runtime */ @@ -162,11 +162,11 @@ enum { MDCDEFAULT, /**< MDC undefine */ MDCSC, /**< MDC spatial cognition */ MDCPNC, - MLL, + MLL, /**< abandon */ DEVMM, /**< Dlog memory managent */ KERNEL, /**< Kernel */ LIBMEDIA, /**< Libmedia */ - CCECPU, /**< ai cpu */ + CCECPU, /**< aicpu shedule */ ASCENDDK, /**< AscendDK */ ROS, /**< ROS */ HCCP, @@ -179,7 +179,7 @@ enum { TSDUMP, /**< TSDUMP module */ AICPU, /**< AICPU module */ LP, /**< LP module */ - TDT, + TDT, /**< tsdaemon or aicpu shedule */ FE, MD, MB,