From 54a48678aeeb7153633ac56df8eaf9307ab97663 Mon Sep 17 00:00:00 2001 From: shenwei41 Date: Fri, 12 Mar 2021 12:18:57 +0800 Subject: [PATCH] update include headers 0311 --- inc/external/acl/error_codes/ge_error_codes.h | 6 +- inc/external/acl/ops/acl_dvpp.h | 531 ++++++++++-------- scripts/format_source_code.sh | 0 .../fwkacllib/inc/ops/avg_pool_1d_ops.h | 58 ++ .../inc/ops/elewise_calculation_ops.h | 19 + third_party/fwkacllib/inc/ops/image_ops.h | 166 ++++++ third_party/fwkacllib/inc/ops/list_ops.h | 274 +++++++++ third_party/fwkacllib/inc/ops/math_ops.h | 59 ++ .../inc/ops/matrix_calculation_ops.h | 30 + third_party/fwkacllib/inc/ops/nn_norm_ops.h | 54 ++ third_party/fwkacllib/inc/ops/nn_ops.h | 55 ++ .../fwkacllib/inc/ops/nn_pooling_ops.h | 4 +- .../fwkacllib/inc/ops/nonlinear_fuc_ops.h | 28 + third_party/fwkacllib/inc/ops/pad_ops.h | 2 +- third_party/fwkacllib/inc/ops/selection_ops.h | 56 ++ third_party/fwkacllib/inc/runtime/base.h | 2 +- third_party/fwkacllib/inc/runtime/config.h | 23 + third_party/fwkacllib/inc/runtime/kernel.h | 8 +- third_party/fwkacllib/inc/runtime/rt_model.h | 22 +- third_party/fwkacllib/inc/runtime/stream.h | 22 + .../inc/toolchain/tuning_tool/tune_api.h | 82 --- 21 files changed, 1178 insertions(+), 323 deletions(-) mode change 100644 => 100755 scripts/format_source_code.sh create mode 100644 third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h diff --git a/inc/external/acl/error_codes/ge_error_codes.h b/inc/external/acl/error_codes/ge_error_codes.h index b477a18c..cafc5a64 100644 --- a/inc/external/acl/error_codes/ge_error_codes.h +++ b/inc/external/acl/error_codes/ge_error_codes.h @@ -53,9 +53,9 @@ static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016; static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017; static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018; static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019; -static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020; -static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021; -static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022; +static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020; +static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021; +static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022; static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000; static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001; static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000; diff --git a/inc/external/acl/ops/acl_dvpp.h b/inc/external/acl/ops/acl_dvpp.h index 42ec4a8d..dcaa3936 100644 --- a/inc/external/acl/ops/acl_dvpp.h +++ b/inc/external/acl/ops/acl_dvpp.h @@ -53,123 +53,109 @@ typedef void (*aclvencCallback)(acldvppPicDesc *input, acldvppStreamDesc *output // Supported Pixel Format enum acldvppPixelFormat { - PIXEL_FORMAT_YUV_400 = 0, // 0 - PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 - PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 - PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 - PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 - PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 - PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 - PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 - PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 - PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 - PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 - PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 - PIXEL_FORMAT_RGB_888 = 12, // 12 - PIXEL_FORMAT_BGR_888 = 13, // 13 - PIXEL_FORMAT_ARGB_8888 = 14, // 14 - PIXEL_FORMAT_ABGR_8888 = 15, // 15 - PIXEL_FORMAT_RGBA_8888 = 16, // 16 - PIXEL_FORMAT_BGRA_8888 = 17, // 17 - PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 - PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 - PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 - PIXEL_FORMAT_YVU_PLANAR_422, - PIXEL_FORMAT_YVU_PLANAR_444, - PIXEL_FORMAT_RGB_444 = 23, - PIXEL_FORMAT_BGR_444, - PIXEL_FORMAT_ARGB_4444, - PIXEL_FORMAT_ABGR_4444, - PIXEL_FORMAT_RGBA_4444, - PIXEL_FORMAT_BGRA_4444, - PIXEL_FORMAT_RGB_555, - PIXEL_FORMAT_BGR_555, - PIXEL_FORMAT_RGB_565, - PIXEL_FORMAT_BGR_565, - PIXEL_FORMAT_ARGB_1555, - PIXEL_FORMAT_ABGR_1555, - PIXEL_FORMAT_RGBA_1555, - PIXEL_FORMAT_BGRA_1555, - PIXEL_FORMAT_ARGB_8565, - PIXEL_FORMAT_ABGR_8565, - PIXEL_FORMAT_RGBA_8565, - PIXEL_FORMAT_BGRA_8565, - PIXEL_FORMAT_RGB_BAYER_8BPP = 50, - PIXEL_FORMAT_RGB_BAYER_10BPP, - PIXEL_FORMAT_RGB_BAYER_12BPP, - PIXEL_FORMAT_RGB_BAYER_14BPP, - PIXEL_FORMAT_RGB_BAYER_16BPP, - PIXEL_FORMAT_BGR_888_PLANAR = 70, - PIXEL_FORMAT_HSV_888_PACKAGE, - PIXEL_FORMAT_HSV_888_PLANAR, - PIXEL_FORMAT_LAB_888_PACKAGE, - PIXEL_FORMAT_LAB_888_PLANAR, - PIXEL_FORMAT_S8C1, - PIXEL_FORMAT_S8C2_PACKAGE, - PIXEL_FORMAT_S8C2_PLANAR, - PIXEL_FORMAT_S16C1, - PIXEL_FORMAT_U8C1, - PIXEL_FORMAT_U16C1, - PIXEL_FORMAT_S32C1, - PIXEL_FORMAT_U32C1, - PIXEL_FORMAT_U64C1, - PIXEL_FORMAT_S64C1, - PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, - PIXEL_FORMAT_YVU_SEMIPLANAR_440, - PIXEL_FORMAT_FLOAT32, - PIXEL_FORMAT_BUTT, - PIXEL_FORMAT_UNKNOWN = 10000 + PIXEL_FORMAT_YUV_400 = 0, // 0 + PIXEL_FORMAT_YUV_SEMIPLANAR_420 = 1, // 1 + PIXEL_FORMAT_YVU_SEMIPLANAR_420 = 2, // 2 + PIXEL_FORMAT_YUV_SEMIPLANAR_422 = 3, // 3 + PIXEL_FORMAT_YVU_SEMIPLANAR_422 = 4, // 4 + PIXEL_FORMAT_YUV_SEMIPLANAR_444 = 5, // 5 + PIXEL_FORMAT_YVU_SEMIPLANAR_444 = 6, // 6 + PIXEL_FORMAT_YUYV_PACKED_422 = 7, // 7 + PIXEL_FORMAT_UYVY_PACKED_422 = 8, // 8 + PIXEL_FORMAT_YVYU_PACKED_422 = 9, // 9 + PIXEL_FORMAT_VYUY_PACKED_422 = 10, // 10 + PIXEL_FORMAT_YUV_PACKED_444 = 11, // 11 + PIXEL_FORMAT_RGB_888 = 12, // 12 + PIXEL_FORMAT_BGR_888 = 13, // 13 + PIXEL_FORMAT_ARGB_8888 = 14, // 14 + PIXEL_FORMAT_ABGR_8888 = 15, // 15 + PIXEL_FORMAT_RGBA_8888 = 16, // 16 + PIXEL_FORMAT_BGRA_8888 = 17, // 17 + PIXEL_FORMAT_YUV_SEMI_PLANNER_420_10BIT = 18, // 18 + PIXEL_FORMAT_YVU_SEMI_PLANNER_420_10BIT = 19, // 19 + PIXEL_FORMAT_YVU_PLANAR_420 = 20, // 20 + PIXEL_FORMAT_YVU_PLANAR_422, + PIXEL_FORMAT_YVU_PLANAR_444, + PIXEL_FORMAT_RGB_444 = 23, + PIXEL_FORMAT_BGR_444, + PIXEL_FORMAT_ARGB_4444, + PIXEL_FORMAT_ABGR_4444, + PIXEL_FORMAT_RGBA_4444, + PIXEL_FORMAT_BGRA_4444, + PIXEL_FORMAT_RGB_555, + PIXEL_FORMAT_BGR_555, + PIXEL_FORMAT_RGB_565, + PIXEL_FORMAT_BGR_565, + PIXEL_FORMAT_ARGB_1555, + PIXEL_FORMAT_ABGR_1555, + PIXEL_FORMAT_RGBA_1555, + PIXEL_FORMAT_BGRA_1555, + PIXEL_FORMAT_ARGB_8565, + PIXEL_FORMAT_ABGR_8565, + PIXEL_FORMAT_RGBA_8565, + PIXEL_FORMAT_BGRA_8565, + PIXEL_FORMAT_RGB_BAYER_8BPP = 50, + PIXEL_FORMAT_RGB_BAYER_10BPP, + PIXEL_FORMAT_RGB_BAYER_12BPP, + PIXEL_FORMAT_RGB_BAYER_14BPP, + PIXEL_FORMAT_RGB_BAYER_16BPP, + PIXEL_FORMAT_BGR_888_PLANAR = 70, + PIXEL_FORMAT_HSV_888_PACKAGE, + PIXEL_FORMAT_HSV_888_PLANAR, + PIXEL_FORMAT_LAB_888_PACKAGE, + PIXEL_FORMAT_LAB_888_PLANAR, + PIXEL_FORMAT_S8C1, + PIXEL_FORMAT_S8C2_PACKAGE, + PIXEL_FORMAT_S8C2_PLANAR, + PIXEL_FORMAT_S16C1, + PIXEL_FORMAT_U8C1, + PIXEL_FORMAT_U16C1, + PIXEL_FORMAT_S32C1, + PIXEL_FORMAT_U32C1, + PIXEL_FORMAT_U64C1, + PIXEL_FORMAT_S64C1, + PIXEL_FORMAT_YUV_SEMIPLANAR_440 = 1000, + PIXEL_FORMAT_YVU_SEMIPLANAR_440, + PIXEL_FORMAT_FLOAT32, + PIXEL_FORMAT_BUTT, + PIXEL_FORMAT_UNKNOWN = 10000 }; // Stream Format -enum acldvppStreamFormat { - H265_MAIN_LEVEL = 0, - H264_BASELINE_LEVEL, - H264_MAIN_LEVEL, - H264_HIGH_LEVEL -}; +enum acldvppStreamFormat { H265_MAIN_LEVEL = 0, H264_BASELINE_LEVEL, H264_MAIN_LEVEL, H264_HIGH_LEVEL }; // Supported Channel Mode -enum acldvppChannelMode { - DVPP_CHNMODE_VPC = 1, - DVPP_CHNMODE_JPEGD = 2, - DVPP_CHNMODE_JPEGE = 4 -}; +enum acldvppChannelMode { DVPP_CHNMODE_VPC = 1, DVPP_CHNMODE_JPEGD = 2, DVPP_CHNMODE_JPEGE = 4 }; // Supported Border Type -enum acldvppBorderType { - BORDER_CONSTANT = 0, - BORDER_REPLICATE, - BORDER_REFLECT, - BORDER_REFLECT_101 -}; +enum acldvppBorderType { BORDER_CONSTANT = 0, BORDER_REPLICATE, BORDER_REFLECT, BORDER_REFLECT_101 }; // Venc parameter type enum aclvencChannelDescParamType { - ACL_VENC_THREAD_ID_UINT64 = 0, - ACL_VENC_CALLBACK_PTR, - ACL_VENC_PIXEL_FORMAT_UINT32, - ACL_VENC_ENCODE_TYPE_UINT32, - ACL_VENC_PIC_WIDTH_UINT32, - ACL_VENC_PIC_HEIGHT_UINT32, - ACL_VENC_KEY_FRAME_INTERVAL_UINT32, - ACL_VENC_BUF_ADDR_PTR, - ACL_VENC_BUF_SIZE_UINT32, - ACL_VENC_RC_MODE_UINT32, - ACL_VENC_SRC_RATE_UINT32, - ACL_VENC_MAX_BITRATE_UINT32, - ACL_VENC_MAX_IP_PROP_UINT32 + ACL_VENC_THREAD_ID_UINT64 = 0, + ACL_VENC_CALLBACK_PTR, + ACL_VENC_PIXEL_FORMAT_UINT32, + ACL_VENC_ENCODE_TYPE_UINT32, + ACL_VENC_PIC_WIDTH_UINT32, + ACL_VENC_PIC_HEIGHT_UINT32, + ACL_VENC_KEY_FRAME_INTERVAL_UINT32, + ACL_VENC_BUF_ADDR_PTR, + ACL_VENC_BUF_SIZE_UINT32, + ACL_VENC_RC_MODE_UINT32, + ACL_VENC_SRC_RATE_UINT32, + ACL_VENC_MAX_BITRATE_UINT32, + ACL_VENC_MAX_IP_PROP_UINT32 }; // Jpeg picture format enum acldvppJpegFormat { - ACL_JPEG_CSS_444 = 0, - ACL_JPEG_CSS_422, - ACL_JPEG_CSS_420, - ACL_JPEG_CSS_GRAY, - ACL_JPEG_CSS_440, - ACL_JPEG_CSS_411, - ACL_JPEG_CSS_UNKNOWN = 1000 + ACL_JPEG_CSS_444 = 0, + ACL_JPEG_CSS_422, + ACL_JPEG_CSS_420, + ACL_JPEG_CSS_GRAY, + ACL_JPEG_CSS_440, + ACL_JPEG_CSS_411, + ACL_JPEG_CSS_UNKNOWN = 1000 }; /** @@ -523,9 +509,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetPicDescRetCode(const acldvppPicDesc *picD * @retval null for failed. * @retval other success */ -ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, - uint32_t right, - uint32_t top, +ACL_FUNC_VISIBILITY acldvppRoiConfig *acldvppCreateRoiConfig(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom); /** @@ -604,10 +588,7 @@ ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfigBottom(acldvppRoiConfig *config, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, - uint32_t left, - uint32_t right, - uint32_t top, +ACL_FUNC_VISIBILITY aclError acldvppSetRoiConfig(acldvppRoiConfig *config, uint32_t left, uint32_t right, uint32_t top, uint32_t bottom); /** @@ -1096,7 +1077,8 @@ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescMaxBitRate(aclvencChannelDesc * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencSetChannelDescParam(aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, const void *param); + aclvencChannelDescParamType paramType, size_t length, + const void *param); /** * @ingroup AscendCL @@ -1245,7 +1227,8 @@ ACL_FUNC_VISIBILITY uint32_t aclvencGetChannelDescMaxBitRate(const aclvencChanne * @retval ACL_SUCCESS for success, other for failure */ ACL_FUNC_VISIBILITY aclError aclvencGetChannelDescParam(const aclvencChannelDesc *channelDesc, - aclvencChannelDescParamType paramType, size_t length, size_t *paramRetSize, void *param); + aclvencChannelDescParamType paramType, size_t length, + size_t *paramRetSize, void *param); /** * @ingroup AscendCL @@ -1545,10 +1528,7 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyFrameConfig(aclvdecFrameConfig *vdecF * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, - uint32_t size, - uint32_t *width, - uint32_t *height, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, uint32_t size, uint32_t *width, uint32_t *height, int32_t *components); /** @@ -1565,11 +1545,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfo(const void *data, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, - uint32_t size, - uint32_t *width, - uint32_t *height, - int32_t *components, +ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, uint32_t size, uint32_t *width, + uint32_t *height, int32_t *components, acldvppJpegFormat *format); /** @@ -1584,8 +1561,7 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegGetImageInfoV2(const void *data, * @retval OtherValues Failure */ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inputDesc, - const acldvppJpegeConfig *config, - uint32_t *size); + const acldvppJpegeConfig *config, uint32_t *size); /** * @ingroup AscendCL @@ -1599,10 +1575,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictEncSize(const acldvppPicDesc *inp * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, - uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, - uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, uint32_t *decSize); /** * @ingroup AscendCL @@ -1617,11 +1591,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegPredictDecSize(const void *data, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, - uint32_t dataSize, - uint32_t *width, - uint32_t *height, - int32_t *components); +ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, uint32_t dataSize, uint32_t *width, + uint32_t *height, int32_t *components); /** * @ingroup AscendCL @@ -1635,10 +1606,8 @@ ACL_FUNC_VISIBILITY aclError acldvppPngGetImageInfo(const void *data, * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, - uint32_t dataSize, - acldvppPixelFormat outputPixelFormat, - uint32_t *decSize); +ACL_FUNC_VISIBILITY aclError acldvppPngPredictDecSize(const void *data, uint32_t dataSize, + acldvppPixelFormat outputPixelFormat, uint32_t *decSize); /** * @ingroup AscendCL @@ -1702,10 +1671,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyChannel(acldvppChannelDesc *channelDe * @see acldvppCreateChannel | acldvppCreatePicDesc * | acldvppCreateResizeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - acldvppResizeConfig *resizeConfig, +ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppResizeConfig *resizeConfig, aclrtStream stream); /** @@ -1714,7 +1681,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe * * @par Function * crop the input picture according to the specified area, - * and then store the picture in the output memory as the output picture + * and then store the picture in the output memory as the output picture * * @par Restriction * Width alignment requirements: @@ -1732,21 +1699,57 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcResizeAsync(acldvppChannelDesc *channelDe * @li The height of the input image is aligned to 2. * High stride minimum 6 and maximum 4096. * - * @param channelDesc [IN] the channel destruction - * @param inputDesc [IN] crop input picture destruction - * @param outputDesc [IN|OUT] crop output picture destruction - * @param cropArea [IN] crop area config - * @param stream [IN] crop task stream + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] crop input picture destruction + * @param outputDesc [IN|OUT] crop output picture destruction + * @param cropArea [IN] crop area config + * @param stream [IN] crop task stream * * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - acldvppRoiConfig *cropArea, +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, aclrtStream stream); +/** + * @ingroup AscendCL + * @brief dvpp vpc crop and resize config. + * + * @par Function + * crop the input picture with resize config according to the specified area, + * and then store the picture in the output memory as the output picture + * + * @par Restriction + * Width alignment requirements: + * @li The minimum stride is 32 and the maximum is 4096 * 4 + * (that is, an image in argb format with a width of 4096); + * @li For 8K scaling, widthStride is required to be aligned to 2; + * @li For non 8K scaling, the calculation formula for widthStride + * is different for different image formats: + * @li yuv400sp, yuv420sp, yuv422sp, yuv444sp: input image width aligned to 16 + * @li yuv422packed: input image width * 2 and then align to 16 + * @li yuv444packed, rgb888: input image width alignment * 3, alignment to 16 + * @li xrgb8888: input image width * 4, align to 16 + * @li HFBC:input image width + * Height alignment requirements: + * @li The height of the input image is aligned to 2. + * High stride minimum 6 and maximum 4096. + * + * @param channelDesc [IN] the channel destruction + * @param inputDesc [IN] crop input picture destruction + * @param outputDesc [IN|OUT] crop output picture destruction + * @param cropArea [IN] crop area config + * @param resizeConfig [IN] resize config + * @param stream [IN] crop and resize config task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppResizeConfig *resizeConfig, aclrtStream stream); + /** * @ingroup AscendCL * @brief dvpp vpc batch crop. @@ -1769,12 +1772,37 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAsync(acldvppChannelDesc *channelDesc * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, - uint32_t *roiNums, - uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], - aclrtStream stream); + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop and resize config. + * + * @par Function + * crop the input batch picture with resize config according to the specified area + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param resizeConfig [IN] resize config + * @param stream [IN] crop batch and resize config task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateDvppConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** * @ingroup AscendCL @@ -1797,12 +1825,36 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAsync(acldvppChannelDesc *channe * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - acldvppRoiConfig *cropArea, - acldvppRoiConfig *pasteArea, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc crop, resize config and paste. + * + * @par Function + * crop the input picture with resize config according to the specified area, + * and paste the picture to the specified position of the target picture + * as the output picture + * + * @param channelDesc [IN] thechannel destruction + * @param inputDesc [IN] crop and paste input picture destruction + * @param outputDesc [IN|OUT] crop and paste output picture destruction + * @param cropArea [IN] crop area config + * @param pasteArea [IN] paste area config + * @param resizeConfig [IN] resize config + * @param stream [IN] crop, paste and resize task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcCropResizePasteAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, acldvppRoiConfig *cropArea, + acldvppRoiConfig *pasteArea, + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** * @ingroup AscendCL @@ -1827,14 +1879,40 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig */ - ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, - acldvppBatchPicDesc *srcBatchPicDescs, - uint32_t *roiNums, - uint32_t size, - acldvppBatchPicDesc *dstBatchPicDescs, - acldvppRoiConfig *cropAreas[], - acldvppRoiConfig *pasteAreas[], - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropAndPasteAsync(acldvppChannelDesc *channelDesc, + acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, + uint32_t size, acldvppBatchPicDesc *dstBatchPicDescs, + acldvppRoiConfig *cropAreas[], + acldvppRoiConfig *pasteAreas[], aclrtStream stream); + +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop, resize config and paste. + * + * @par Function + * crop the input batch picture with resize config according to the specified area, + * and paste the pictures to the specified position of the target pictures + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param pasteAreas [IN] paste area configs + * @param resizeConfig [IN] resize config + * @param stream [IN] crop batch and resize config task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizePasteAsync( + acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppRoiConfig *pasteAreas[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); /** * @ingroup AscendCL @@ -1862,11 +1940,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCropAndPasteAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, - const void *data, - uint32_t size, - acldvppPicDesc *outputDesc, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, + acldvppPicDesc *outputDesc, aclrtStream stream); /** * @ingroup AscendCL @@ -1884,11 +1959,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegDecodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreateJpegeConfig */ -ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *inputDesc, - const void *data, - uint32_t *size, - acldvppJpegeConfig *config, +ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + const void *data, uint32_t *size, acldvppJpegeConfig *config, aclrtStream stream); /** @@ -1906,11 +1978,8 @@ ACL_FUNC_VISIBILITY aclError acldvppJpegEncodeAsync(acldvppChannelDesc *channelD * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, - const void *data, - uint32_t size, - acldvppPicDesc *outputDesc, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppPngDecodeAsync(acldvppChannelDesc *channelDesc, const void *data, uint32_t size, + acldvppPicDesc *outputDesc, aclrtStream stream); /** * @ingroup AscendCL @@ -1965,11 +2034,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecDestroyChannel(aclvdecChannelDesc *channelDe * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, - acldvppStreamDesc *input, - acldvppPicDesc *output, - aclvdecFrameConfig *config, - void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, + acldvppPicDesc *output, aclvdecFrameConfig *config, void *userData); /** * @ingroup AscendCL @@ -1988,10 +2054,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendFrame(aclvdecChannelDesc *channelDesc, * * @see aclvdecCreateChannel | acldvppCreateStreamDesc | acldvppCreatePicDesc | aclvdecSendFrame */ -ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, - acldvppStreamDesc *input, - aclvdecFrameConfig *config, - void *userData); +ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channelDesc, acldvppStreamDesc *input, + aclvdecFrameConfig *config, void *userData); /** * @ingroup AscendCL @@ -2012,10 +2076,8 @@ ACL_FUNC_VISIBILITY aclError aclvdecSendSkippedFrame(aclvdecChannelDesc *channel * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, aclrtStream stream); /** * @ingroup AscendCL @@ -2037,11 +2099,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcConvertColorAsync(acldvppChannelDesc *cha * * @see acldvppCreateChannel | acldvppCreatePicDesc */ -ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - void *reserve, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *inputDesc, + acldvppPicDesc *outputDesc, void *reserve, aclrtStream stream); /** * @ingroup AscendCL @@ -2053,8 +2112,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcPyrDownAsync(acldvppChannelDesc *channelD * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, - uint32_t mode); +ACL_FUNC_VISIBILITY aclError acldvppSetChannelDescMode(acldvppChannelDesc *channelDesc, uint32_t mode); /** * @ingroup AscendCL @@ -2089,8 +2147,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetResizeConfigInterpolation(const acldvppRe * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, - uint32_t outMode); +ACL_FUNC_VISIBILITY aclError aclvdecSetChannelDescOutMode(aclvdecChannelDesc *channelDesc, uint32_t outMode); /** * @ingroup AscendCL @@ -2187,9 +2244,7 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetLutMapDims(const acldvppLutMap *lutMap); * @retval ACL_SUCCESS The function is successfully executed. * @retval OtherValues Failure */ -ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, - uint32_t dim, - uint8_t **data, +ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, uint32_t dim, uint8_t **data, uint32_t *len); /** * @ingroup AscendCL @@ -2207,10 +2262,8 @@ ACL_FUNC_VISIBILITY aclError acldvppGetLutMapData(const acldvppLutMap *lutMap, * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateLutMap */ ACL_FUNC_VISIBILITY aclError acldvppVpcEqualizeHistAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - const acldvppLutMap *lutMap, - aclrtStream stream); + const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, + const acldvppLutMap *lutMap, aclrtStream stream); /** * @ingroup AscendCL @@ -2231,8 +2284,7 @@ ACL_FUNC_VISIBILITY acldvppBorderConfig *acldvppCreateBorderConfig(); * * @retval ACL_SUCCESS for success, other for failure */ -ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, - uint32_t index, +ACL_FUNC_VISIBILITY aclError acldvppSetBorderConfigValue(acldvppBorderConfig *borderConfig, uint32_t index, double value); /** @@ -2377,10 +2429,8 @@ ACL_FUNC_VISIBILITY aclError acldvppDestroyBorderConfig(acldvppBorderConfig *bor * @see acldvppCreateChannel|acldvppCreatePicDesc|acldvppCreateBorderConfig */ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc *channelDesc, - const acldvppPicDesc *inputDesc, - acldvppPicDesc *outputDesc, - const acldvppBorderConfig *borderConfig, - aclrtStream stream); + const acldvppPicDesc *inputDesc, acldvppPicDesc *outputDesc, + const acldvppBorderConfig *borderConfig, aclrtStream stream); /** * @ingroup AscendCL @@ -2397,11 +2447,8 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcMakeBorderAsync(const acldvppChannelDesc * * @see acldvppCreateChannel | acldvppCreatePicDesc | acldvppCreateHist */ -ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, - acldvppPicDesc *srcPicDesc, - acldvppHist *hist, - void *reserve, - aclrtStream stream); +ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channelDesc, acldvppPicDesc *srcPicDesc, + acldvppHist *hist, void *reserve, aclrtStream stream); /** * @ingroup AscendCL @@ -2410,7 +2457,7 @@ ACL_FUNC_VISIBILITY aclError acldvppVpcCalcHistAsync(acldvppChannelDesc *channel * @retval null for failed. * @retval OtherValues success. */ -ACL_FUNC_VISIBILITY acldvppHist* acldvppCreateHist(); +ACL_FUNC_VISIBILITY acldvppHist *acldvppCreateHist(); /** * @ingroup AscendCL @@ -2467,7 +2514,7 @@ ACL_FUNC_VISIBILITY aclError acldvppGetHistData(acldvppHist *hist, uint32_t dim, * * @see acldvppCreateHist | acldvppVpcCalcHistAsync */ -ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); +ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist *hist); /** * @ingroup AscendCL @@ -2486,8 +2533,36 @@ ACL_FUNC_VISIBILITY uint32_t acldvppGetHistRetCode(acldvppHist* hist); */ ACL_FUNC_VISIBILITY aclError acldvppClearHist(acldvppHist *hist); +/** + * @ingroup AscendCL + * @brief dvpp vpc batch crop, resize config and make border. + * + * @par Function + * crop the input batch picture with resize config and border configs according to the specified area + * as the output batch pictures + * + * @param channelDesc [IN] the channel destruction + * @param srcBatchPicDescs [IN] crop input batch picture destruction + * @param roiNums [IN] roi config numbers + * @param size [IN] roiNum size + * @param dstBatchPicDescs [IN|OUT] crop output batch picture destruction + * @param cropAreas [IN] crop area configs + * @param borderCfgs [IN] border configs + * @param resizeConfig [IN] resize config + * @param stream [IN] crop batch, resize config and make border task stream + * + * @retval ACL_SUCCESS The function is successfully executed. + * @retval OtherValues Failure + * + * @see acldvppCreateChannel | acldvppCreateBatchPicDesc | acldvppCreateRoiConfig | acldvppCreateResizeConfig + */ +ACL_FUNC_VISIBILITY aclError acldvppVpcBatchCropResizeMakeBorderAsync( + acldvppChannelDesc *channelDesc, acldvppBatchPicDesc *srcBatchPicDescs, uint32_t *roiNums, uint32_t size, + acldvppBatchPicDesc *dstBatchPicDescs, acldvppRoiConfig *cropAreas[], acldvppBorderConfig *borderCfgs[], + acldvppResizeConfig *resizeConfig, aclrtStream stream); + #ifdef __cplusplus } #endif -#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ +#endif // INC_EXTERNAL_ACL_OPS_ACL_DVPP_H_ diff --git a/scripts/format_source_code.sh b/scripts/format_source_code.sh old mode 100644 new mode 100755 diff --git a/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h new file mode 100644 index 00000000..d0800a08 --- /dev/null +++ b/third_party/fwkacllib/inc/ops/avg_pool_1d_ops.h @@ -0,0 +1,58 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! + * \file avg_pool_1d_ops.h + * \brief + */ +#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_ +#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_ +#include "graph/operator_reg.h" + +namespace ge { +/** +*@brief Generate an auxiliary matrix . \n + +*@par Inputs: +* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32, + int64, float16, float, double.The format must be NHWC NCHW NC1HWC0. + +*@par Attributes: +*@li ksize: Kernel size. Input type is int. +*@li strides: Input type is int. +*@li pads: Input type is listInt . +*@li ceil_mode: Bool, default value is false. +*@li count_include_pad: Bool, default value is false. \n + +*@par Outputs: +*y_tensor: A tensor with the same types as "x" . \n +*@par Third-party framework compatibility + +*Compatible with the TensorFlow operator Unbatch. +*/ +REG_OP(AvgPool1DAvgMatrix) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8, + DT_INT32, DT_INT64, DT_DOUBLE})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8, + DT_INT32, DT_INT64, DT_DOUBLE})) + .REQUIRED_ATTR(ksize, Int) + .REQUIRED_ATTR(strides, Int) + .REQUIRED_ATTR(pads, ListInt) + .ATTR(ceil_mode, Bool, false) + .ATTR(count_include_pad, Bool, false) + .OP_END_FACTORY_REG(AvgPool1DAvgMatrix) +} +#endif \ No newline at end of file diff --git a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h index 6207c917..0ec15367 100644 --- a/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/elewise_calculation_ops.h @@ -2454,6 +2454,25 @@ REG_OP(Eltwise) .ATTR(coeff, ListFloat, {}) .OP_END_FACTORY_REG(Eltwise) +/** + *@brief Computes the inverse error function of each element of input. \n + + *@par Inputs: + *One inputs, including: + * @li input_x: A tensor. Must be one of the following types: + * float16, float32. \n + + *@par Outputs: + *y: A Tensor with the same type and shape of input_x's. \n + + *@par Third-party framework compatibility + *Compatible with the Pytorch operator Erfinv. \n + */ +REG_OP(Erfinv) + .INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16})) + .OP_END_FACTORY_REG(Erfinv) + /** *@brief Computes element-wise population count. \n diff --git a/third_party/fwkacllib/inc/ops/image_ops.h b/third_party/fwkacllib/inc/ops/image_ops.h index 4703705b..6ae1613c 100644 --- a/third_party/fwkacllib/inc/ops/image_ops.h +++ b/third_party/fwkacllib/inc/ops/image_ops.h @@ -1516,6 +1516,96 @@ REG_OP(DenseImageWarp) .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(DenseImageWarp) +/** +*@brief Calculate the resize_d function. \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +*@li sizes: An optional listInt. \n +*@li scales: An optional listFloat. + Defaults to none. \n +*@li roi: An optional listInt. + Defaults to none. \n +*@li coordinate_transformation_mode: An optional String. + Defaults to "half_pixel". \n +*@li cubic_coeff_a: An optional float. + Defaults to -0.75. \n +*@li exclude_outside: An optional int. + Defaults to 0. \n +*@li extrapolation_value: An optional float. + Defaults to 0.0. \n +*@li mode: An optional String. + Defaults to "nearest". \n +*@li nearest_mode: An optional String. + Defaults to "round_prefer_floor". \n + +*@par Outputs: +*y: A Tensor with the same type of x's, + shape depends on x and sizes. \n +*/ +REG_OP(ResizeD) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(sizes, ListInt) + .ATTR(scales, ListFloat, {}) + .ATTR(roi, ListInt, {}) + .ATTR(coordinate_transformation_mode, String, "half_pixel") + .ATTR(cubic_coeff_a, Float, -0.75) + .ATTR(exclude_outside, Int, 0) + .ATTR(extrapolation_value, Float, 0.0) + .ATTR(mode, String, "nearest") + .ATTR(nearest_mode, String, "round_prefer_floor") + .OP_END_FACTORY_REG(ResizeD) + +/** +*@brief Calculate the resize_grad_d function. \n + +*@par Inputs: +*One inputs, including: +* @li grads: A tensor. Must be one of the following types: +* float16, float32. \n + +*@par Attributes: +*@li original_size: An optional listInt. \n +*@li roi: An optional listInt. + Defaults to none. \n +*@li scales: An optional listFloat. + Defaults to none. \n +*@li coordinate_transformation_mode: An optional String. + Defaults to "half_pixel". \n +*@li cubic_coeff_a: An optional float. + Defaults to -0.75. \n +*@li exclude_outside: An optional int. + Defaults to 0. \n +*@li extrapolation_value: An optional float. + Defaults to 0.0. \n +*@li mode: An optional String. + Defaults to "nearest". \n +*@li nearest_mode: An optional String. + Defaults to "round_prefer_floor". \n + +*@par Outputs: +*y: A Tensor with the same type of x's, + shape depends on x and sizes. \n +*/ +REG_OP(ResizeGradD) + .INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .REQUIRED_ATTR(original_size, ListInt) + .ATTR(roi, ListInt, {}) + .ATTR(scales, ListFloat, {}) + .ATTR(coordinate_transformation_mode, String, "half_pixel") + .ATTR(cubic_coeff_a, Float, -0.75) + .ATTR(exclude_outside, Int, 0) + .ATTR(extrapolation_value, Float, 0.0) + .ATTR(mode, String, "nearest") + .ATTR(nearest_mode, String, "round_prefer_floor") + .OP_END_FACTORY_REG(ResizeGradD) + /** *@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n @@ -1535,5 +1625,81 @@ REG_OP(DenseImageWarpGrad) .OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16})) .OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16})) .OP_END_FACTORY_REG(DenseImageWarpGrad) + +/** +*@brief This operation samples input X by using interpolation based on flow field grid, + which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of + (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of + input data x and y is indexng the 3rd dimention (in height dimension), finally results is + the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. + +*@par Inputs: +*@li x: 4-D Tensor with shape `[batch, channels, height, width]`. +*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. + +*@par Attributes: +*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is + supported for now . +*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . + +*@par Outputs: +*y: Returns 4-D Tensor with the same dtype as `X`. + +*@par Third-party framework compatibility +*Compatible with pytorch GridSampler2D operator. +*/ +REG_OP(GridSampler2D) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(interpolation_mode, String, "bilinear") + .ATTR(padding_mode, String, "zeros") + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridSampler2D) + +/** +*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid. + +*@par Inputs: +*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`. +*@li assist: Assist matrix, a 4-D tensor of type float16. + +*@par Attributes: +*@li align_corners: An optional bool. If "true", the centers of the corner + pixels of the input and output tensors are aligned. Defaults to "false" . + +*@par Outputs: +*diff: Returns 4-D Tensor with the same shape and dtype as `grid`. +*position: Returns 4-D Tensor with the same shape as `grid`. +*/ +REG_OP(GridUnnormal) + .INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(position, TensorType({DT_INT32})) + .ATTR(align_corners, Bool, false) + .OP_END_FACTORY_REG(GridUnnormal) + +/** +*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal. + +*@par Inputs: +*@li x: 4-D Tensor with shape `[batch, channels, height, width]`. +*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`. + +*@par Attributes: +*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now . + +*@par Outputs: +*y: Returns 4-D Tensor with the same dtype as `x`. +*/ +REG_OP(ImageUnfold) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(position, TensorType({DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(padding_mode, String, "zeros") + .OP_END_FACTORY_REG(ImageUnfold) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/list_ops.h b/third_party/fwkacllib/inc/ops/list_ops.h index 33270ea8..a1b622e9 100644 --- a/third_party/fwkacllib/inc/ops/list_ops.h +++ b/third_party/fwkacllib/inc/ops/list_ops.h @@ -225,6 +225,280 @@ REG_OP(TensorListSetItem) .ATTR(element_dtype, Type, DT_INT32) .OP_END_FACTORY_REG(TensorListSetItem) +/** +*@brief Push tensor to list. \n + +*@par Inputs: +*@li input_handles: The input tensor lists. +*@li tensor: The tensor push into tensor list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handles: The output tensor lists. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListPushBackBatch operator. +*/ +REG_OP(TensorListPushBackBatch) + .INPUT(input_handles, TensorType({DT_VARIANT})) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(output_handles, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListPushBackBatch) + +/** +*@brief Stacks all tensors in the list. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li element_shape: A shape compatible with that of elements in the tensor. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. +*@li num_elements: The number of elements in the list. \n + +*@par Outputs: +*@li tensor: The tensor of list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListStack operator. +*/ +REG_OP(TensorListStack) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(element_shape, TensorType({DT_INT32})) + .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(element_dtype, Type, DT_INT32) + .ATTR(num_elements, Int, -1) + .OP_END_FACTORY_REG(TensorListStack) + +/** +*@brief Concats all tensors in the list along the 0th dimension. +Requires that all tensors have the same shape except the first dimension. \n + +*@par Inputs: +*@li input_handle: The input list. +*@li element_shape: The shape of the uninitialized elements in the list. +If the first dimension is not -1, it is assumed that all list elements have +the same leading dim. +*@li leading_dims: The list of leading dims of uninitialized list elements. Used if +the leading dim of input_handle.element_shape or the element_shape input arg +is not already set. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li tensor: The concated result. +*@li lengths: Output tensor containing sizes of the 0th dimension of tensors +in the list, used for computing the gradient. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListConcatV2 operator. +*/ +REG_OP(TensorListConcatV2) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(leading_dims, TensorType({DT_INT64})) + .OUTPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .OUTPUT(lengths, TensorType({DT_INT64})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListConcatV2) + +/** +*@brief Splits a tensor into a list. \n + +*@par Inputs: +*@li tensor: The input tensor. +*@li element_shape: A shape compatible with that of elements in the tensor. +*@li lengths: Vector of sizes of the 0th dimension of tensors in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: The list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListSplit operator. +*/ +REG_OP(TensorListSplit) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(lengths, TensorType({DT_INT64})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListSplit) + +/** +*@brief Creates a TensorList which, when stacked, has the value of `tensor`. \n + +*@par Inputs: +*@li tensor: The input tensor. +*@li element_shape: The shape of elements in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: An output tensor list . \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListFromTensor operator. +*/ +REG_OP(TensorListFromTensor) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListFromTensor) + +/** +*@brief Resizes the list. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li size: size of the output list. \n + +*@par Outputs: +*@li output_handle: The output tensor list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListResize operator. +*/ +REG_OP(TensorListResize) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(size, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .OP_END_FACTORY_REG(TensorListResize) + +/** +*@brief Creates a Tensor by indexing into the TensorList. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li indices: The indices used to index into the list. +*@li element_shape: The shape of elements in the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li values: The tensor. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListGather operator. +*/ +REG_OP(TensorListGather) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(element_shape, TensorType({DT_INT32})) + .OUTPUT(values, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListGather) + +/** +*@brief Creates a TensorList by indexing into a Tensor. \n + +*@par Inputs: +*@li tensor: The input tensor. +*@li indices: The indices used to index into the list. +*@li element_shape: The shape of the elements in the list (can be less specified than +the shape of the tensor). +*@li num_elements: The size of the output list. Must be large enough to accommodate +the largest index in indices. If -1, the list is just large enough to include +the largest index in indices. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: The TensorList. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListScatterV2 operator. +*/ +REG_OP(TensorListScatterV2) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(indices, TensorType({DT_INT32})) + .INPUT(element_shape, TensorType({DT_INT32,DT_INT64})) + .INPUT(num_elements, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListScatterV2) + +/** +*@brief Scatters tensor at indices in an input list. \n + +*@par Inputs: +*@li input_handle: The input tensor list. +*@li tensor: The input tensor. +*@li indices: The indices used to index into the list. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output_handle: The TensorList. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListScatterIntoExistingList operator. +*/ +REG_OP(TensorListScatterIntoExistingList) + .INPUT(input_handle, TensorType({DT_VARIANT})) + .INPUT(tensor, TensorType({DT_FLOAT16,DT_FLOAT,DT_DOUBLE,DT_INT8, + DT_INT16,DT_INT32,DT_INT64,DT_UINT8,DT_UINT16,DT_QINT8,DT_QUINT8, + DT_QINT16,DT_QUINT16,DT_QINT32,DT_BOOL, + DT_STRING,DT_COMPLEX64,DT_COMPLEX128})) + .INPUT(indices, TensorType({DT_INT32})) + .OUTPUT(output_handle, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListScatterIntoExistingList) + +/** +*@brief Concat two tensor lists to a new tensor list. \n + +*@par Inputs: +*@li input_a: The input tensor list A. +*@li input_b: The input tensor list B. \n + +*@par Attributes: +*@li element_dtype: The type of elements in the list. \n + +*@par Outputs: +*@li output: The output list. \n + +*@par Third-party framework compatibility. +*Compatible with tensorflow TensorListConcatLists operator. +*/ +REG_OP(TensorListConcatLists) + .INPUT(input_a, TensorType({DT_VARIANT})) + .INPUT(input_b, TensorType({DT_VARIANT})) + .OUTPUT(output, TensorType({DT_VARIANT})) + .ATTR(element_dtype, Type, DT_INT32) + .OP_END_FACTORY_REG(TensorListConcatLists) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_LIST_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/math_ops.h b/third_party/fwkacllib/inc/ops/math_ops.h index bd30e50c..28f7f0aa 100644 --- a/third_party/fwkacllib/inc/ops/math_ops.h +++ b/third_party/fwkacllib/inc/ops/math_ops.h @@ -982,6 +982,65 @@ REG_OP(SoftMarginLossGrad) .ATTR(reduction, String, "mean") .OP_END_FACTORY_REG(SoftMarginLossGrad) +/** + *@brief Computes batched the p-norm distance between each pair of + *the two collections of row vectors. \n + + *@par Inputs: + *Two inputs, including: + * @li x1: A tensor with shpae: BxPXM. Must be one of the following types: + * float16, float32. \n + * @li x2: A tensor with shpae: BxRxM. Must be one of the following types: + * float16, float32. \n + + *@par Attributes: + * @li p: An optional float >= 0 or inf. Defaults to 2.0. \n + + *@par Outputs: + * y: A Tensor with the same type of x1's and with shape BxPxR. \n + + *@par Third-party framework compatibility + *Compatible with the Pytorch operator Cdist. \n + */ +REG_OP(Cdist) + .INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT})) + .ATTR(p, Float, 2.0) + .OP_END_FACTORY_REG(Cdist) + +/** +*@brief Computes the grad of x1 in cdist. \n + +*@par Inputs: +*Four inputs, including: + * @li grad: Grad with shape BxPxR. Must be one of the following types: +* float16, float32. \n +* @li x1: A tensor with shpae: BxPXM. Must be one of the following types: +* float16, float32. \n +* @li x2: A tensor with shpae: BxRxM. Must be one of the following types: +* float16, float32. \n +* @li cdist: Output tensor of cdist forward with shpae: BxPXR. +* Must be one of the following types: float16, float32. \n + +*@par Attributes: +* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n + +*@par Outputs: +* y: A Tensor with the same type and shape of x1's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Cdist Backward. \n +*/ +REG_OP(CdistGrad) + .INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .ATTR(p, Float, 2.0) + .OP_END_FACTORY_REG(CdistGrad) + } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h index 32631f14..49a49931 100644 --- a/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h +++ b/third_party/fwkacllib/inc/ops/matrix_calculation_ops.h @@ -1065,7 +1065,37 @@ REG_OP(Tril) .ATTR(diagonal, Int, 0) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(Tril) +/** +*@brief Concatenates a list of N tensors along the first dimension. +*@par Inputs: +* Two inputs, including: +* @li values: A list of Tensors. Must be one of the following types: int32, float16, float32. +* Tensors to be concatenated. All must have size 1 in the first dimension and same shape. +* It's a dynamic input. +* @li shape: A Tensor of the same type as "x". +* The final shape of the result. Should be equal to the shapes of any input +* but with the number of input values in the first dimension . \n +*@par Attributes: +*equation: The subscripts for the Einstein summation. \n +*tensor_size: tensor size of input \n + +*@par Outputs: +*@li y: Sums the product of the elements of the input operands along dimensions specified + using a notation based on the Einstein summation convention. \n + +*@attention Constraints: +*Input tensor_size must be Int. \n + +*@par Third-party framework compatibility +*Compatible with Pytorch einsum operator. +*/ +REG_OP(EinSum) + .DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32})) + .REQUIRED_ATTR(equation, String) + .REQUIRED_ATTR(tensor_size, Int) + .OP_END_FACTORY_REG(EinSum) } // namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_norm_ops.h b/third_party/fwkacllib/inc/ops/nn_norm_ops.h index 041aa765..7d38beb5 100644 --- a/third_party/fwkacllib/inc/ops/nn_norm_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_norm_ops.h @@ -427,6 +427,33 @@ REG_OP(MVN) .ATTR(eps, Float, 1e-9) .OP_END_FACTORY_REG(MVN) +/** +*@brief Normalizes the input . \n + +*@par Inputs: +* One input: +*x: An NCHW tensor of type float16 or float32 . \n + +*@par Attributes: +*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n +*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n + +*@par Outputs: +*y: An NCHW tensor of type float16 or float32 . \n + +*@attention Constraints: +* The input tensor must have the NCHW format, whose shape length must be 4. +*@par Third-party framework compatibility +* Compatible with the ONNX operator MeanVarianceNormalization. +*/ + +REG_OP(MVNV2) + .INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */ + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as inputs" */ + .ATTR(eps, Float, 1e-9) + .ATTR(axes, ListInt, {0, 2, 3}) + .OP_END_FACTORY_REG(MVNV2) + /** *@brief Normalizes the input "x1" . \n @@ -1205,6 +1232,33 @@ REG_OP(Centralization) .ATTR(axes, ListInt, {-1}) .OP_END_FACTORY_REG(Centralization) +/** +*@brief Roll the tensor along the given dimension(s). +* Elements that are shifted beyond the last position are re-introduced at the first position. +* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n + +*@par Inputs: +*One inputs, including: +* @li x: A tensor . Must be one of the following types: +* float16, float32, int32, uint32, int8, uint8. \n + +*@par Attributes: +* @li shifts: The number of places by which the elements of the tensor are shifted. \n +* @li dims: Axis along which to roll. \n + +*@par Outputs: +* y: A Tensor with the same type and shape of x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator Roll. \n +*/ +REG_OP(Roll) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8})) + .REQUIRED_ATTR(shifts, ListInt) + .ATTR(dims, ListInt, {}) + .OP_END_FACTORY_REG(Roll) + /** *@brief Calculate the loss. Creates a criterion that optimizes a two-class classification logistic loss between input_x and input_y (containing 1 or -1). \n diff --git a/third_party/fwkacllib/inc/ops/nn_ops.h b/third_party/fwkacllib/inc/ops/nn_ops.h index 820aa00d..f7dd6795 100644 --- a/third_party/fwkacllib/inc/ops/nn_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_ops.h @@ -49,5 +49,60 @@ REG_OP(InTopKV2) .INPUT(k, TensorType({IndexNumberType})) .OUTPUT(precision, TensorType({DT_BOOL})) .OP_END_FACTORY_REG(InTopKV2) + +/** +*@brief Performs batch normalization . \n + +*@par Inputs: +* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D. +*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the scaling factor. +*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the offset. +*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the +operation is used for training. +*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be +5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None" +if the operation is used for training . \n + +*@par Attributes: +*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001". +*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC". +*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n + +*@par Outputs: +* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported) +*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D. +*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D +if input "x" is with format NC1HWC0. Specifies the mean of "x". +*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x". +*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output. +*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. +Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n + +*@attention Constraints: +*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available, +then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance". +*@li For Ascend 310, the result accuracy fails to reach 1‰ due to the square root instruction . \n +*/ +REG_OP(FusedBatchNormV2) + .INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT})) + .INPUT(scale, TensorType({DT_FLOAT})) + .INPUT(offset, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(mean, TensorType({DT_FLOAT})) + .OPTIONAL_INPUT(variance, TensorType({DT_FLOAT})) + .OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT})) + .OUTPUT(batch_mean, TensorType({DT_FLOAT})) + .OUTPUT(batch_variance, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_1, TensorType({DT_FLOAT})) + .OUTPUT(reserve_space_2, TensorType({DT_FLOAT})) + .ATTR(epsilon, Float, 0.0001) + .ATTR(data_format, String, "NHWC") + .ATTR(is_training, Bool, true) + .OP_END_FACTORY_REG(FusedBatchNormV2) }// namespace ge #endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_ diff --git a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h index 9f191ebe..32eb148b 100644 --- a/third_party/fwkacllib/inc/ops/nn_pooling_ops.h +++ b/third_party/fwkacllib/inc/ops/nn_pooling_ops.h @@ -1502,14 +1502,14 @@ REG_OP(AdaptiveAvgPool2d) * @brief Compute gradients of adaptive averagev2 pooling function. * @par Inputs: -* @li input_grad: A NCHW Tensor. Must be one of the following data types: +* @li input_grad: A Tensor. Must be one of the following data types: * float16, float32. * @par Attributes: * @li orig_input_shape: A required tuple or list of type int32. * @par Outputs: -* @li output_grad: A tensor with the same shape and type as "orig_input_shape". +* @li output_grad: A tensor with the same type as "input_grad". * @par Third-party framework compatibility * Compatible with the Pytorch operator AdaptiveAvgPool2dGrad. diff --git a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h index a911fa51..0d0be241 100644 --- a/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h +++ b/third_party/fwkacllib/inc/ops/nonlinear_fuc_ops.h @@ -530,6 +530,34 @@ REG_OP(Elu) .ATTR(alpha, Float, 1.0) .OP_END_FACTORY_REG(Elu) +/** +*@brief Continuously Differentiable Exponential Linear Uints: +* Perform the linear uint element-wise on the input tensor X using formula: +* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n + +*@par Inputs: +*x: A float16, float32 or double, for the input data type . \n + +*@par Attributes: +*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n + +*@par Outputs: +*y: A float16, float32 or double, for the normalized result . \n + +*@attention Constraints: +*@li The input is of type float16 or float32 . \n + +*@par Multiple batches supported or not +*Supported +*@par Third-party framework compatibility +*@li Compatible with ONNX's Celu operator +*/ +REG_OP(Celu) + .INPUT(x, TensorType::FloatingDataType()) + .OUTPUT(y, TensorType::FloatingDataType()) + .ATTR(alpha, Float, 1.0) + .OP_END_FACTORY_REG(Celu) + /** *@brief Computes gradients for the exponential linear (Elu) operation. * diff --git a/third_party/fwkacllib/inc/ops/pad_ops.h b/third_party/fwkacllib/inc/ops/pad_ops.h index 8d71c5cd..50eb98c8 100644 --- a/third_party/fwkacllib/inc/ops/pad_ops.h +++ b/third_party/fwkacllib/inc/ops/pad_ops.h @@ -101,7 +101,7 @@ REG_OP(FillD) */ REG_OP(BroadcastTo) .INPUT(x, TensorType::BasicType()) - .INPUT(shape, TensorType({DT_INT32})) + .INPUT(shape, TensorType({DT_INT32,DT_INT64})) .OUTPUT(y, TensorType::BasicType()) .OP_END_FACTORY_REG(BroadcastTo) diff --git a/third_party/fwkacllib/inc/ops/selection_ops.h b/third_party/fwkacllib/inc/ops/selection_ops.h index 0aff5a81..7fe05b1e 100644 --- a/third_party/fwkacllib/inc/ops/selection_ops.h +++ b/third_party/fwkacllib/inc/ops/selection_ops.h @@ -239,6 +239,30 @@ REG_OP(GatherV2D) .REQUIRED_ATTR(axis, Int) .OP_END_FACTORY_REG(GatherV2D) +/** +*@Gathers values along an axis specified by dim . \n + +*@par Inputs: +*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64. +*@li index: A Tensor. Must be one of the following types: int64 . \n + +*@par Attributes: +* dim: the axis along which to index . \n + +*@par Outputs: +* y: A Tensor. Has the same type as "x" . \n + +*@par Third-party framework compatibility +*Compatible with the PyTorch operator Gather. +*/ + +REG_OP(GatherElements) + .INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .INPUT(index, TensorType({DT_INT64})) + .OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64})) + .ATTR(dim, Int, 0) + .OP_END_FACTORY_REG(GatherElements) + /** *@brief Extracts a strided slice of a tensor. Roughly speaking, this op extracts a slice of size (end-begin)/stride from the given input tensor. @@ -486,6 +510,38 @@ REG_OP(UnsortedSegmentSum) .OUTPUT(y, TensorType::NumberType()) .OP_END_FACTORY_REG(UnsortedSegmentSum) +/** +*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to +* end, inclusive, on a logarithmic scale with base base. \n + +*@par Inputs: +*One inputs, including: +* @li assist: A tensor. Must be one of the following types: +* float16, float32. \n + +* @par Attributes: +* @li start: An required float. Used to select the start. \n +* @li end: An required float. Used to select the end. \n +* @li steps: An optional int.Defaults to 100. \n +* @li base: An optional float.Defaults to 10.0. \n +* @li dtype: An optional int.Defaults to 1. \n + +*@par Outputs: +*y: A Tensor with the same type and shape of input_x's. \n + +*@par Third-party framework compatibility +*Compatible with the Pytorch operator logspaced. \n +*/ +REG_OP(LogSpaceD) + .INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16})) + .OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) + .REQUIRED_ATTR (start, Float) + .REQUIRED_ATTR (end, Float) + .ATTR(steps, Int, 100) + .ATTR(base, Float, 10.0) + .ATTR(dtype, Int, 1) + .OP_END_FACTORY_REG(LogSpaceD) + /** *@brief Computes the sum along segments of a tensor . \n diff --git a/third_party/fwkacllib/inc/runtime/base.h b/third_party/fwkacllib/inc/runtime/base.h index 5b246eed..1b264843 100644 --- a/third_party/fwkacllib/inc/runtime/base.h +++ b/third_party/fwkacllib/inc/runtime/base.h @@ -339,7 +339,7 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream); * @return RT_ERROR_NONE for ok * @return RT_ERROR_INVALID_VALUE for error input */ -rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream); +RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream); /** * @ingroup dvrt_base diff --git a/third_party/fwkacllib/inc/runtime/config.h b/third_party/fwkacllib/inc/runtime/config.h index fed7341a..66ae36ef 100644 --- a/third_party/fwkacllib/inc/runtime/config.h +++ b/third_party/fwkacllib/inc/runtime/config.h @@ -132,6 +132,11 @@ typedef struct tagRtPlatformConfig { uint32_t platformConfig; } rtPlatformConfig_t; +typedef enum tagRTTaskTimeoutType { + RT_TIMEOUT_TYPE_OP_WAIT = 0, + RT_TIMEOUT_TYPE_OP_EXECUTE, +} rtTaskTimeoutType_t; + /** * @ingroup * @brief get AI core count @@ -203,6 +208,24 @@ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion); */ RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value); +/** + * @ingroup + * @brief set event wait task timeout time. + * @param [in] timeout + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout); + +/** + * @ingroup + * @brief set op execute task timeout time. + * @param [in] timeout + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/runtime/kernel.h b/third_party/fwkacllib/inc/runtime/kernel.h index 754f069a..cf5347cb 100644 --- a/third_party/fwkacllib/inc/runtime/kernel.h +++ b/third_party/fwkacllib/inc/runtime/kernel.h @@ -188,7 +188,7 @@ typedef void (*rtCallback_t)(void *fnData); /** * @ingroup rt_kernel * @brief kernel mode - */ +**/ #define RT_DEFAULT_KERNEL_MODE (0x00) #define RT_NORMAL_KERNEL_MODE (0x01) #define RT_ALL_KERNEL_MODE (0x02) @@ -211,7 +211,7 @@ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle); /** * @ingroup rt_kernel - * @brief register device binary + * @brief register device binary with all kernel * @param [in] bin device binary description * @param [out] handle device binary handle * @return RT_ERROR_NONE for ok @@ -330,7 +330,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * * @ingroup rt_kernel * @brief launch kernel with handle to device * @param [in] handle program - * @param [in] devFunc device function description + * @param [in] devFunc device function description. * @param [in] blockDim block dimentions * @param [in] args argments address for kernel function * @param [in] argsSize argements size @@ -341,7 +341,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void * * @return RT_ERROR_INVALID_VALUE for error input */ RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize, - rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo); + rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo); /** * @ingroup rt_kernel diff --git a/third_party/fwkacllib/inc/runtime/rt_model.h b/third_party/fwkacllib/inc/runtime/rt_model.h index 798f63ae..df2eddc9 100644 --- a/third_party/fwkacllib/inc/runtime/rt_model.h +++ b/third_party/fwkacllib/inc/runtime/rt_model.h @@ -133,12 +133,13 @@ typedef struct tagAllKernelTaskInfo { uint16_t argsCount; uint16_t argsSize; uint16_t reserved; - const void *dev_func; + void *devfunc; void *handle; uint8_t *smDesc; uint8_t *args; uint16_t *argsOffset; } rtAllKernelTaskInfo_t; + typedef struct tagKernelTaskInfoEx { uint32_t flags; uint32_t argsSize; @@ -263,7 +264,7 @@ typedef struct tagTaskInfo { union { rtKernelTaskInfoEx_t kernelTaskEx; rtKernelTaskInfo_t kernelTask; - rtAllKernelTaskInfo_t allkernelTask; + rtAllKernelTaskInfo_t allKernelTask; rtEventTaskInfo_t eventTask; rtStreamSwitchTaskInfo_t streamSwitchTask; rtStreamActiveTaskInfo_t streamActiveTask; @@ -285,10 +286,27 @@ typedef struct tagTaskInfo { } u; } rtTaskInfo_t; +typedef struct tagNodeInfo_t { + uint32_t nodeIdx; + uint32_t reserved[1]; +} rtNodeInfo; + +typedef struct tagHwtsInfo_t { + uint16_t taskId; + uint16_t sqExeHead; + uint16_t streamExeHead; + uint16_t reserved[2]; +} rtHwtsInfo; + typedef struct tagLabelDevInfo_t { uint16_t modelId; uint16_t streamId; uint16_t labelId; + union { + rtNodeInfo nodeInfo; + rtHwtsInfo hwtsInfo; + uint16_t reserved[5]; + }u; }rtLabelDevInfo; typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo); diff --git a/third_party/fwkacllib/inc/runtime/stream.h b/third_party/fwkacllib/inc/runtime/stream.h index 6b9f80ae..f9981514 100644 --- a/third_party/fwkacllib/inc/runtime/stream.h +++ b/third_party/fwkacllib/inc/runtime/stream.h @@ -189,6 +189,28 @@ RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream); */ RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr, uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType); + +/* + * @ingroup dvrt_stream + * @brief enable debug for dump overflow exception with stream + * @param [in] addr: ddr address of kernel exception dumpped + * @param [in] stream: stream handle + * @param [in] flag: debug flag + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr, + uint32_t *streamId, uint32_t *taskId); + +/* + * @ingroup rt_model + * @brief disable debug for dump overflow exception with stream + * @param [in] stream: stream handle + * @return RT_ERROR_NONE for ok + * @return RT_ERROR_INVALID_VALUE for error input + */ +RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream); + #if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE) } #endif diff --git a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h index 47c41c48..3634b8a8 100644 --- a/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h +++ b/third_party/fwkacllib/inc/toolchain/tuning_tool/tune_api.h @@ -11,93 +11,11 @@ /** @defgroup aoe aoe调优接口 */ #ifndef TUNE_API_H #define TUNE_API_H -#include #include #include -#include "graph/graph.h" #include "ge/ge_api.h" #include "aoe_types.h" -/** - * @ingroup aoe - * - * aoe status - */ -enum MsTuneStatus { - MSTUNE_SUCCESS, /** tune success */ - MSTUNE_FAILED, /** tune failed */ -}; - -// Option key: for train options sets -const std::string MSTUNE_SELF_KEY = "mstune"; -const std::string MSTUNE_GEINIT_KEY = "initialize"; -const std::string MSTUNE_GESESS_KEY = "session"; - -#ifdef __cplusplus -extern "C" { -#endif - -struct RunnerInitConfig { - // onilne online - std::string profPath; - std::string parserPath; - // ncs only - std::vector devList; -}; - -struct RunnerOpInfo { - std::string opName; - uint64_t opCostTime; - uint64_t aicoreCostTime; - // gradient_split only - std::string modelName; - std::string opType; - std::vector start; - std::vector end; -}; - -struct RunnerModelInfo { - uint64_t totalCostTime; -}; - -struct RunnerRunResult { - std::vector modelInfo; - std::vector opInfo; -}; - -struct RunnerResult { - uint64_t totalCostTime; - std::map opCostTime; - std::map aicoreCostTime; -}; - -struct RunnerDataBuf { - void *ptr = nullptr; - size_t size = 0; -}; - -struct AOEBufferData { - std::shared_ptr data = nullptr; - uint64_t length; -}; - -struct RunnerConfig { - bool isProf; - uint32_t loop; - // offline only - std::vector input; - std::vector output; - std::string modelPath; - RunnerDataBuf modelData; - // online only - uint32_t devId; - std::vector> inputs; - std::vector dependGraph; // run graph (for training) -}; -#ifdef __cplusplus -} -#endif - /** * @ingroup aoe * @par 描述: 命令行调优