You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
graphengine/third_party/fwkacllib/inc/cce/dnn_op.h

4839 lines
223 KiB

/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DNN_OP_H__
#define DNN_OP_H__
#include "cce/blas_struct.h"
#include "cce/cce.h"
#include "cce/customize.h"
namespace cce {
/**
* @ingroup dnn
* @brief create descriptor of parameters for exponential function
* @param [in] point to descriptor of parameters for exponential function
* @return ccStatus_t
*/
ccStatus_t ccCreateExpDescriptor(ccExpDescriptor_t *expDesc);
/**
* @ingroup dnn
* @brief create descriptor of parameters for logarithmic function
* @param [in] point to descriptor of parameters for logarithmic function
* @return ccStatus_t
*/
ccStatus_t ccCreateLogDescriptor(ccLogDescriptor_t *logDesc);
/**
* @ingroup dnn
* @brief create descriptor of parameters for pow function
* @param [in] point to descriptor of parameters for pow function
* @return ccStatus_t
*/
ccStatus_t ccCreatePowDescriptor(ccPowDescriptor_t *powDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of parameters for exponential function
* @param [in] point to descriptor of parameters for exponential function
* @return ccStatus_t
*/
ccStatus_t ccDestroyExpDescriptor(ccExpDescriptor_t *expDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of parameters for logarithmic function
* @param [in] point to descriptor of parameters for exponential function
* @return ccStatus_t
*/
ccStatus_t ccDestroyLogDescriptor(ccLogDescriptor_t *logDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of parameters for pow function
* @param [in] point to descriptor of parameters for pow function
* @return ccStatus_t
*/
ccStatus_t ccDestroyPowDescriptor(ccPowDescriptor_t *powDesc);
/**
* @ingroup dnn
* @brief create descriptor of parameters for NonMaxSuppress function
* @param [in] point to descriptor of parameters for NonMaxSuppress function
* @return ccStatus_t
*/
ccStatus_t ccCreateNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of parameters for NonMaxSuppress function
* @param [in] point to descriptor of parameters for NonMaxSuppress function
* @return ccStatus_t
*/
ccStatus_t ccDestroyNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t *nonMaxSuppressionDesc);
ccStatus_t ccTransTensorIncertPads(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum, bool interweave,
bool background, uint32_t boxTypeNumMax = 0, bool isScaleVec = false);
ccStatus_t ccTransTensorIncertPadsInt32(const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
uint32_t boxTypeNum, bool interweave, bool background);
ccStatus_t ccTransMskrcnnBbox(const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
void *y, uint32_t ySizeInBytes, uint32_t boxTypeNum);
ccStatus_t ccSetTensorDescriptorQuantizeParam(ccTensorDescriptor_t tensorDesc,
const ccVecQuantizePara_t *vecQuantizePara);
ccStatus_t ccGetTensorDescriptorQuantizeParam(const ccTensorDescriptor_t tensorDesc,
ccVecQuantizePara_t *vecQuantizePara);
/**
* @ingroup dnn
* @brief init tensor to 4d filter
* @param [in|out] filterDesc descriptor of filter
* @param [in] format format of filter
* @param [in] dataType data type in device
* @param [in] k number of output feature maps
* @param [in] c number of input feature maps
* @param [in] h height of filter
* @param [in] w width of filter
* @return ccStatus_t
*/
ccStatus_t ccSetFilter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
int32_t k, int32_t c, int32_t h, int32_t w);
ccStatus_t ccSetFilter6dDescriptor(ccTensorDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
int32_t c1, int32_t h, int32_t w, int32_t n, int32_t co, int32_t c0);
/**
* @ingroup dnn
* @brief init tensor to Fractal filter
* @param [in|out] filterDesc descriptor of filter
* @param [in] format format of filter
* @param [in] dataType data type in device
* @param [in] k number of output feature maps
* @param [in] c number of input feature maps
* @param [in] h height of filter
* @param [in] w width of filter
* @return ccStatus_t
*/
ccStatus_t ccSetFilterFractalDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
int32_t k, int32_t c, int32_t h, int32_t w);
/**
* @ingroup dnn
* @brief init tensor to Fractal filter
* @param [in|out] filterDesc descriptor of filter
* @param [in] format format of filter
* @param [in] dataType data type in device
* @param [in] k number of output feature maps
* @param [in] c number of input feature maps
* @param [in] h height of filter
* @param [in] w width of filter
* @return ccStatus_t
*/
ccStatus_t ccSetInt8Filter4dDescriptor(ccFilterDescriptor_t filterDesc, ccTensorFormat_t format, ccDataType_t dataType,
int32_t k, int32_t c, int32_t h, int32_t w, ccDataType_t outputDataType);
/**
* @ingroup dnn
* @brief read 4d filter
* @param [in] filterDesc descriptor of filter
* @param [in|out] format point to format of filter
* @param [in|out] dataType point to data type in device
* @param [in|out] k point to number of output feature maps
* @param [in|out] c point to number of input feature maps
* @param [in|out] h point to height of filter
* @param [in|out] w point to width of filter
* @return ccStatus_t
*/
ccStatus_t ccGetFilterFractalDescriptor(const ccFilterDescriptor_t filterDesc, ccTensorFormat_t *format,
ccDataType_t *dataType, int32_t *k, int32_t *c, int32_t *h, int32_t *w);
/**
* @ingroup dnn
* @brief get data size of 4d filter
* @param [in] filterDesc descriptor of filter
* @param [in|out] size point to data size
* @return ccStatus_t
*/
ccStatus_t ccGetDepthWiseConvFilterSizeInBytes(const ccFilterDescriptor_t filterDesc, int32_t groupNum, uint32_t *size);
/**
* @ingroup dnn
* @brief trans group conv filter to fractal format
* @param [in] filterSrcInfo descriptor of input filter
* @param [in] filterSrc input data pointer
* @param [in] filterDstInfo descriptor of output filter
* @param [in|out] filterDst output data pointer
* @param [in] group group size
* @return ccStatus_t
*/
ccStatus_t ccTransGroupConvFilter(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
uint32_t dstSize);
/**
* @ingroup dnn
* @brief trans conv filter With BoxTypeNuM to fractal format
* @param [in] filterSrcInfo descriptor of input filter
* @param [in] filterSrc input data pointer
* @param [in] filterDstInfo descriptor of output filter
* @param [in|out] filterDst output data pointer
* @param [in] ySizeInBytes the malloc memory size
* @param [in] boxTypeNum the num of boxType
* @param [in] interweave whether the axis interweave
* @return ccStatus_t
*/
ccStatus_t ccTransFilterWithBoxTypeNum(const ccFilterDescriptor_t xDesc, const void *x,
const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
uint32_t boxTypeNum, bool interweave, uint32_t boxTypeNumMax = 0);
/**
* @ingroup dnn
* @brief trans conv filter With BoxTypeNuM to fractal format
* @param [in] filterSrcInfo descriptor of input filter
* @param [in] filterSrc input data pointer
* @param [in] filterDstInfo descriptor of output filter
* @param [in|out] filterDst output data pointer
* @param [in] ySizeInBytes the malloc memory size
* @param [in] boxTypeNum the num of boxType
* @param [in] interweave whether the axis interweave
* @param [in] outputDataType output DataType
* @return ccStatus_t
*/
ccStatus_t ccTransFilterInt8WithBoxTypeNum(const ccFilterDescriptor_t wDesc, const void *x,
const ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes,
uint32_t boxTypeNum, bool interweave, ccDataType_t outputDataType);
/**
* @ingroup dnn
* @brief trans depthwise conv filter to fractal format
* @param [in] wDesc descriptor of input filter
* @param [in] w input data pointer
* @param [in] groupNum groupNum of conv
* @param [in]..yDesc descriptor of output filter
* @param [in|out] y output data pointer
* @param [in] ySizeInBytes the malloc memory size
* @return ccStatus_t
*/
ccStatus_t transDepthWiseConvFilterNCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
/**
* @ingroup dnn
* @brief trans depthwise conv filter to fractal format
* @param [in] wDesc descriptor of input filter
* @param [in] w input data pointer
* @param [in] groupNum groupNum of conv
* @param [in]..yDesc descriptor of output filter
* @param [in|out] y output data pointer
* @param [in] ySizeInBytes the malloc memory size
* @return ccStatus_t
*/
ccStatus_t transDepthWiseConvFilterInt8NCHWToFractalZ(const ccFilterDescriptor_t wDesc, const void *w, int32_t groupNum,
ccFilterDescriptor_t yDesc, void *y, uint32_t ySizeInBytes);
/**
* @ingroup dnn
* @brief trans depthwise conv filter to fractal format, input format CHWN
* @param [in] wDesc descriptor of input filter
* @param [in]..yDesc descriptor of output filter
* @param [in] ySizeInBytes the malloc memory size
* @param [in] w input data pointer
* @param [in|out] y output data pointer
* @return ccStatus_t
*/
ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
uint32_t destSize);
/**
* @ingroup dnn
* @Check if it is surpported by HighPerformance depthwise
* @param [in] inputN,C,H,W input param
* @param [in] filterN,C,H,W
* @param [in] dilationH,W dilation param
* @param [in] padHHead,padHtail,padWHead,padWTail pad param
* @param [in] strideH,W stride param
* @param [in] groupNum Conv groupNum
* @param [in|out] isHighPerformance isHighPerformance flag
* @return ccStatus_t
*/
ccStatus_t ccIsDepthwiseHighPerformance(int32_t inputN, int32_t inputC, int32_t inputH, int32_t inputW, int32_t filterN,
int32_t filterC, int32_t filterH, int32_t filterW, int32_t dilationH,
int32_t dilationW, int32_t padHHead, int32_t padHTail, int32_t padWHead,
int32_t padWTail, int32_t strideH, int32_t strideW, int32_t groupNum,
bool &isHighPerformance, bool isquant = false,
ccDataType_t inputDataType = CC_DATA_HALF,
ccDataType_t outputDataType = CC_DATA_HALF);
/**
* @ingroup dnn
* @brief trans depthwise conv filter to fractal format, input format CHWN
* @param [in] wDesc descriptor of input filter
* @param [in]..yDesc descriptor of output filter
* @param [in] ySizeInBytes the malloc memory size
* @param [in] w input data pointer
* @param [in|out] y output data pointer
* @return ccStatus_t
*/
ccStatus_t transDepthWiseConvFilterCHWNToFractalZ(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
uint32_t group, ccFilterDescriptor_t filterDstInfo, void *filterDst,
uint32_t destSize);
/**
* @ingroup dnn
* @brief create descriptor of fullconnection operator
* @param [in|out] fcDesc point to descriptor of fullconnection operator
* @return ccStatus_t
*/
ccStatus_t ccCreateFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of fullconnection operator
* @param [in] *fcDesc descriptor of fullconnection operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyFullConnectionDescriptor(ccFullConnectionDescriptor_t *fcDesc);
/**
* @ingroup dnn
* @brief init conv descriptor to 2d conv, use for beforeHasPad
* @param [in|out] convDesc descriptor of convolution operator
* @param [in] beforepadHHead before padding in height head
* @param [in] beforepadHTail before padding in height tail
* @param [in] beforepadWHead before padding in width head
* @param [in] beforepadWTail before padding in width tail
* @return ccStatus_t
*/
ccStatus_t ccSetConvolution2dDescriptorForPad(ccConvolutionDescriptor_t convDesc, int32_t beforepadHHead,
int32_t beforepadHTail, int32_t beforepadWHead, int32_t beforepadWTail);
/**
* @ingroup dnn
* @brief init conv descriptor to 2d conv, use for concat batch size
* @param [in|out] convDesc descriptor of convolution operator
* @param [in] concatBatchSize concat batch size
* @return ccStatus_t
*/
ccStatus_t ccSetConvolution2dDescriptorForConcatBatchSize(ccConvolutionDescriptor_t convDesc, int64_t concatBatchSize);
/**
* @ingroup dnn
* @brief init conv descriptor to 2d conv
* @param [in|out] convDesc descriptor of convolution operator
* @param [in] opType operation type for append at convolution operation
* @param [in] opDesc operation descritpor for the opType
* @return ccStatus_t
*/
ccStatus_t ccConvolution2dAppendOp(ccConvolutionDescriptor_t convDesc, ccOpType_t opType, const void *opDesc);
/**
* @ingroup dnn
* @brief read 2d conv beforeHasPad
* @param [in] convDesc descriptor of convolution operator
* @param [in|out] beforepadHHead before padding in height head, default is 0
* @param [in|out] beforepadHTail before padding in height tail, default is 0
* @param [in|out] beforepadWHead before padding in width head, default is 0
* @param [in|out] beforepadWTail before padding in width tail, default is 0
*/
ccStatus_t ccGetConvolution2dDescriptorForPad(const ccConvolutionDescriptor_t convDesc, int32_t *beforepadHHead,
int32_t *beforepadHTail, int32_t *beforepadWHead,
int32_t *beforepadWTail);
/**
* @ingroup dnn
* @brief read 2d conv concat batch size
* @param [in] convDesc descriptor of convolution operator
* @param [in|out] concatBatchSize concat batch size, default is 0
*/
ccStatus_t ccGetConvolution2dDescriptorForConcatBatchSize(const ccConvolutionDescriptor_t convDesc,
int64_t *concatBatchSize);
/**
* @ingroup dnn
* @brief get the temp space size of convolution forward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] convDesc descriptor of convolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in] yDesc descriptor of output tensor
* @param [in] algo algorithm of convolution forward
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetConvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief get the temp space size of convolution backward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] convDesc descriptor of convolution operator
* @param [in] dyDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in] dxDesc descriptor of output tensor
* @param [in] algo algorithm of convolution forward
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetConvolutionBackwardDataWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc,
const ccTensorDescriptor_t dyDesc,
const ccFilterDescriptor_t wDesc,
const ccTensorDescriptor_t dxDesc, ccConvolutionBwdAlgo_t algo,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief get the temp space size of fc forward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] fcDesc descriptor of fc operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in] yDesc descriptor of output tensor
* @param [in|out] sizeInBytes temp space size need, 0 means no memeory needed
* @return ccStatus_t
*/
ccStatus_t ccGetFullConnectionForwardWorkspaceSize(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief convolution forward computation
* @param [in] handle cce handle
* @param [in] convDesc descriptor of convolution operator
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] wDesc descriptor of filter
* @param [in] w filter data in device memory
* @param [in] biasDesc descriptor of bias
* @param [in] bias bias data in device memory
* @param [in] algo algorithm of convolution forward
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccConvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief full alloc float and reset to 0
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in|out] x output data in device memory
* @param [in] beta scaling factors
* @return ccStatus_t
*/
ccStatus_t ccAllocFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta);
/**
* @ingroup dnn
* @brief full get data set by op
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in|out] x output data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccGetFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
/**
* @ingroup dnn
* @brief full clear register
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccClearFloatStatus(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, const void *y);
#ifndef DAVINCI_LITE
/**
* @ingroup dnn
* @brief convolution backward data computation
* @param [in] handle cce handle
* @param [in] convDesc descriptor of convolution operator
* @param [in] alpha scaling factors
* @param [in] dyDesc descriptor of input tensor
* @param [in] dy input data in device memory
* @param [in] wDesc descriptor of filter
* @param [in] w filter data in device memory
* @param [in] algo algorithm of convolution backward
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @param [in] beta scaling factors
* @param [in] dxDesc descriptor of output tensor
* @param [in|out] dx output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccConvolutionBackwardData(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, const void *alpha,
const ccTensorDescriptor_t dyDesc, const void *dy,
const ccFilterDescriptor_t wDesc, const void *w, ccConvolutionBwdAlgo_t algo,
void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
const ccTensorDescriptor_t dxDesc, void *dx);
#endif
/**
* @ingroup dnn
* @brief create descriptor of pooling operator
* @param [in|out] poolingDesc point to descriptor of pooling operator
* @return ccStatus_t
*/
ccStatus_t ccCreatePoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of pooling operator
* @param [in] *poolingDesc descriptor of pooling operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyPoolingDescriptor(ccPoolingDescriptor_t *poolingDesc);
/**
* @ingroup dnn
* @brief init pooling descriptor to 2d pooling
* @param [in|out] poolingDesc descriptor of pooling operator
* @param [in] mode mode of pooling
* @param [in] padMode mode of padding
* @param [in] maxpoolingNanOpt Nan propagation mode
* @param [in] windowH height of pooling window
* @param [in] windowW width of pooling window
* @param [in] padHHead zero padding in height head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
* value.
* @param [in] padHTail zero padding in height tail, need set when padMode is CC_PADDING_DIRECTASSIGN.
* @param [in] padWHead zero padding in width head, if padMode is not CC_PADDING_DIRECTASSIGN head and tail is same
* value.
* @param [in] padWTail zero padding in width tail, need set when padMode is CC_PADDING_DIRECTASSIGN..
* @param [in] strideH stride in height
* @param [in] strideW stride in width
* @param [in] dataMode
* @param [in] ceilMode 0:Floor 1:Ceil
* @return ccStatus_t
*/
ccStatus_t ccSetPooling2dDescriptor(ccPoolingDescriptor_t poolingDesc, ccPoolingMode_t mode, ccPaddingMode_t padMode,
ccNanPropagation_t maxpoolingNanOpt, int32_t windowH, int32_t windowW,
int32_t padHHead, int32_t padHTail, int32_t padWHead, int32_t padWTail,
int32_t strideH, int32_t strideW, int32_t dataMode, int32_t ceilMode,
ccPooingFwdAlgo_t algo = CC_POOLING_FWD_ALGO_HALF);
/**
* @ingroup dnn
* @brief get the output dimension info of 2d pooling
* @param [in] poolingDesc descriptor of pooling operator
* @param [in] xDesc descriptor of input tensor
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetPooling2dForwardOutputDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc,
int32_t *n, int32_t *c, int32_t *h, int32_t *w);
/**
* @ingroup dnn
* @brief pooling forward computation
* @param [in] handle cce handle
* @param [in] poolingDesc descriptor of pooling operator
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccPoolingForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief pooling backward computation
* @param [in] handle cce handle
* @param [in] poolingDesc descriptor of pooling operator
* @param [in] alpha scaling factors
* @param [in] beta scaling factors
* @param [in] argMaskDesc descriptor of mask tensor
* @param [in] argMask mask data in device memory
* @param [in] dyDesc descriptor of input tensor
* @param [in] dy input data in device memory
* @param [in] dxDesc descriptor of output tensor
* @param [in|out] dx output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccMaxPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
const void *beta, const ccTensorDescriptor_t argMaskDesc, const void *argMask,
const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t dxDesc,
void *dx);
/**
* @ingroup dnn
* @brief create descriptor of activation operator
* @param [in|out] activationDesc point to descriptor of activation operator
* @return ccStatus_t
*/
ccStatus_t ccCreateActivationDescriptor(ccActivationDescriptor_t *activationDesc);
/**
* @ingroup dnn
* @brief init activation descriptor to 2d activation
* @param [in|out] activationDesc descriptor of activation operator
* @param [in] mode mode of activation
* @param [in] reluNanOpt Nan propagation mode
* @param [in] coef ceiling for clipped RELU, alpha for ELU
* @param [in] activationPara activation parameter union
* @return ccStatus_t
*/
ccStatus_t ccSetActivationDescriptor(ccActivationDescriptor_t activationDesc, ccActivationMode_t mode,
ccNanPropagation_t reluNanOpt, double coef,
ccActivationPara_u activationPara = {{0, CC_NAN_NOT_PROPAGATE}});
/**
* @ingroup dnn
* @brief read activation param
* @param [in] activationDesc descriptor of activation operator
* @param [in|out] mode point to mode of activation
* @param [in|out] reluNanOpt point to Nan propagation mode
* @param [in|out] coef point to coef
* @param [in|out] activationPara point to activation parameter union
* @return ccStatus_t
*/
ccStatus_t ccGetActivationDescriptor(const ccActivationDescriptor_t activationDesc, ccActivationMode_t *mode,
ccNanPropagation_t *reluNanOpt, double *coef,
ccActivationPara_u *activationPara = NULL);
/**
* @ingroup dnn
* @brief destroy descriptor of activation operator
* @param [in] *activationDesc descriptor of activation operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyActivationDescriptor(ccActivationDescriptor_t *activationDesc);
/**
* @ingroup dnn
* @brief activation forward computation
* @param [in] handle cce handle
* @param [in] activationDesc descriptor of activation operator
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccActivationForward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief Derives a tensor descriptor from layer data descriptor for BatchNormalization
* @param [in|out] derivedBnDesc descriptor of mean, variance, bias, scale tensors tensor
* @param [in] xDesc descriptor of input tensor
* @param [in] mode mode of BatchNormalization
* @return ccStatus_t
*/
ccStatus_t ccDeriveBNTensorDescriptor(ccTensorDescriptor_t derivedBnDesc, const ccTensorDescriptor_t xDesc,
ccBatchNormMode_t mode);
/**
* @ingroup dnn
* @brief batchnorm forward computation
* @param [in] handle cce handle
* @param [in] mode mode of batchnorm
* @param [in] alpha scaling factors
* @param [in] beta scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @param [in] bnScaleBiasMeanVarDesc descriptor of scale, bias, mean, variance tensor
* @param [in] bnScale scaling factor
* @param [in] bnBias bias factor
* @param [in] estimatedMean mean
* @param [in] estimatedVariance variance
* @param [in] epsilon epsilon
* @return ccStatus_t
*/
ccStatus_t ccBatchNormForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha, const void *beta,
const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, void *y,
const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
const void *bnBias, const void *estimatedMean, const void *estimatedVariance,
double epsilon);
/**
* @ingroup dnn
* @brief batchnorm forward computation
* @param [in] handle cce handle
* @param [in] mode mode of batchnorm
* @param [in] reluFlag relu fusion flag
* @param [in] alpha scaling factors
* @param [in] beta scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @param [in] bnScaleBiasMeanVarDesc descriptor of scale, bias, mean, variance tensor
* @param [in] bnScale scaling factor
* @param [in] bnBias bias factor
* @param [in] estimatedMean mean
* @param [in] estimatedVariance variance
* @param [in] epsilon epsilon
* @return ccStatus_t
*/
ccStatus_t ccBatchNormFusionForwardInference(ccHandle_t handle, ccBatchNormMode_t mode, ccBatchNormDescriptor_t bnDesc,
const void *alpha, const void *beta, const ccTensorDescriptor_t xDesc,
const void *x, const ccTensorDescriptor_t yDesc, void *y,
const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
const void *bnBias, const void *estimatedMean,
const void *estimatedVariance, double epsilon);
/**
* @ingroup dnn
* @brief create descriptor of batchnorm operator
* @param [in|out] bnDesc point to descriptor of batchnorm operator
* @return ccStatus_t
*/
ccStatus_t ccCreateBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
/**
* @ingroup dnn
* @brief destroy batchnorm descriptor
* @param [in] descriptor of batchnorm operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyBatchNormDescriptor(ccBatchNormDescriptor_t *bnDesc);
/**
* @ingroup dnn
* @brief append operation after batchnorm
* @param [in|out] bnDesc descriptor of batchnorm operator
* @param [in] opType operation type for append at batchnorm operation
* @param [in] opDesc operation descritpor for the opType
* @return ccStatus_t
*/
ccStatus_t ccBatchNormAppendOp(ccBatchNormDescriptor_t bnDesc, ccOpType_t opType, const void *opDesc);
/**
* @ingroup dnn
* @brief full get the output 4d dimension info of full connection
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of weight tensor
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetFullConnectionFwdOutputDim(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
int32_t *n, int32_t *c, int32_t *h, int32_t *w);
/**
* @ingroup dnn
* @brief full connection forward computation
* @param [in] handle cce handle
* @param [in] fcDesc fc desc
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] wDesc descriptor of weight tensor
* @param [in] w filter data in device memory
* @param [in] biasDesc bias data in device memory
* @param [in] bias descriptor of bias tensor
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccFullConnectionForwardEx2(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief full connection forward computation with workspace
* @param [in] handle cce handle
* @param [in] fcDesc fc desc
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] wDesc descriptor of weight tensor
* @param [in] w filter data in device memory
* @param [in] biasDesc bias data in device memory
* @param [in] bias descriptor of bias tensor
* @param [in] workSpace workSpace in device memory
* @param [in] workSpaceSizeInBytes workSpace size in bytes
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccFullConnectionForwardWithWorkSpace(ccHandle_t handle, const ccFullConnectionDescriptor_t fcDesc,
const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccFilterDescriptor_t wDesc, const void *w,
const ccTensorDescriptor_t biasDesc, const void *bias, void *workSpace,
uint32_t workSpaceSizeInBytes, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief full softmax forward computation
* @param [in] handle cce handle
* @param [in] algo softmax algorithm
* @param [in] mode mode of softmax
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] workSpace workSpace in device memory
* @param [in] workSpaceSizeInBytes workSpace size in bytes
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSoftmaxForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief full softmax forward computation
* @param [in] handle cce handle
* @param [in] algo softmax algorithm
* @param [in] softmaxAxis mode of softmax
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] workSpace workSpace in device memory
* @param [in] workSpaceSizeInBytes workSpace size in bytes
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @param [in] classNum class number
* @param [in] padNum pad Num
* @return ccStatus_t
*/
ccStatus_t ccSoftmaxClassForward(ccHandle_t handle, ccSoftmaxAlgo_t algo, int32_t softmaxAxis, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
void *y, uint32_t classNum, uint32_t padNum);
/**
* @ingroup dnn
* @brief full scale forward computation
* @param [in] handle cce handle
* @param [in] scaleBiasDesc descriptor of scale and bias tensor
* @param [in] scale scaling factor
* @param [in] bias bias factor
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccScaleForward(ccHandle_t handle, const ccTensorDescriptor_t scaleBiasDesc, const void *scale,
const void *bias, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief full scale forward computation
* @param [in] handle cce handle
* @param [in] scaleDesc descriptor of scale and bias tensor
* @param [in] scale scaling factor
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccScaleNoBiasForward(ccHandle_t handle, const ccTensorDescriptor_t scaleDesc, const void *scale,
const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief get the output dimension info of depth to space
* @param [in] xDesc descriptor of input tensor
* @param [in] blockSize the size of block
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @return ccStatus_t
*/
ccStatus_t ccGetDepthToSpaceOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief depth to space forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] blockSize the size of block
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccDepthToSpaceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of space to depth
* @param [in] xDesc descriptor of input tensor
* @param [in] blockSize the size of block
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @return ccStatus_t
*/
ccStatus_t ccGetSpaceToDepthOutputDim(const ccTensorDescriptor_t xDesc, const int32_t blockSize, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief space to depth forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] blockSize the size of block
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSpaceToDepthForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const int32_t blockSize, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief full eltwise forward computation
* @param [in] handle cce handle
* @param [in] eltDesc eltwise descriptor
* @param [in] mode mode of eltwise
* @param [in] alpha scaling factors
* @param [in] broadcast(Reserve) support tensor broadcasting or not
* @param [in] xDesc[] array of descriptor for input tensor
* @param [in] x array of input data in device memory
* @param [in] inputNum the number of input tensors
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccEltwiseForwardEx(ccHandle_t handle, ccEltwiseDescriptor_t eltDesc, ccEltwiseMode_t mode, int32_t inputNum,
const void *alpha, bool broadcast, const ccTensorDescriptor_t xDesc[], const void *x[],
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief create descriptor of eltwise operator
* @param [in|out] eltwiseDesc point to descriptor of eltwise operator
* @return ccStatus_t
*/
ccStatus_t ccCreateEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
/**
* @ingroup dnn
* @brief destroy eltwise descriptor
* @param [in] descriptor of eltwise operator
* @return ccStatus_t
*/
ccStatus_t ccDestroyEltwiseDescriptor(ccEltwiseDescriptor_t *eltDesc);
/**
* @ingroup dnn
* @brief append operation after eltwise
* @param [in|out] eltDesc descriptor of eltwise operator
* @param [in] opType operation type for append at eltwise operation
* @param [in] opDesc operation descritpor for the opType
* @return ccStatus_t
*/
ccStatus_t ccEltwiseAppendOp(ccEltwiseDescriptor_t eltDesc, ccOpType_t opType, const void *opDesc);
/**
* @ingroup dnn
* @brief set eltwise desciptor's quantize parameters
* @param [in] eltDesc eltwise descriptor
* @param [in] quantizeInfo descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccSetEltwiseQuantizeInfo(ccEltwiseDescriptor_t eltDesc, const ccQuantizeDescriptor_t QuantizeInfo);
/**
* @ingroup dnn
* @brief get the temp space size of reshape forward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetReshapeForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief reshape the input tensor
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc input tensor
* @param [in] x input data
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @param [in] beta scaling factors
* @param [in] yDesc output tensor
* @param [in|out] y output data
* @return ccStatus_t
*/
ccStatus_t ccReshapeForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
void *workSpace, uint32_t workSpaceSizeInBytes, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief reshape the input tensor for data in ND format
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc input tensor
* @param [in] x input data
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @param [in] beta scaling factors
* @param [in] yDesc output tensor
* @param [in|out] y output data
* @return ccStatus_t
*/
ccStatus_t ccNdReshapeForward(ccHandle_t handle, const void *alpha, ccTensorFormat_t rawFormat,
const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc,
void *y);
/**
* @ingroup dnn
* @brief Four2Five forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] yDesc descriptor of output tensor
* @param [in | out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccFour2FiveForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief Five2Four forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] yDesc descriptor of output tensor
* @param [in | out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccFive2FourForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief get the temp space size of add forward computation
* @param [in] handle cce handle
* @param [in] xDesc descriptor of the first input tensor
* @param [in] wDesc descriptor of the second input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetAddForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief Add forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x one input data in device memory
* @param [in] wDesc descriptor of input tensor
* @param [in] w the other input data in device memory
* @param [in] beta bias factors
* @param [in] workSpace the address apply in HBM
* @param [in] workSpaceSizeInBytes the size apply in HBM
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccAddForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief Stack forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x[] x array is host mem array, the element is device address of input data
* @param [in] num number of input tensor
* @param [in] axis along which axis to stack the input tensor
* @param [in] beta bias factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccStackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x[],
uint32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief get the output dimension info of stack
* @param [in] xDesc descriptor of input tensor
* @param [in] num number of input tensor
* @param [in] axis along which axis to stack the input tensor
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height
* @param [in|out] w point to width
* @param [in|out] realDimCnt point to real dimCnt after stack
* @return ccStatus_t
*/
ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *n, int32_t *c,
int32_t *h, int32_t *w, int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief get the output dimension info of stack
* @param [in] xDesc descriptor of input tensor
* @param [in] num number of input tensor
* @param [in] axis along which axis to stack the input tensor
* @param [in|out] dimCnt dimcnt
* @param [in|out] dim save dim value
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetStackOutputDim(const ccTensorDescriptor_t xDesc, uint32_t num, int32_t axis, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief return need grid generator or not
* @param [in] inputH, inputW, outputH, outputW, alignCorner(interp=true,resizeBilinear depends para align corner)
* @param [out] bool needGridFlag, true mean need, false mean not need
* @return ccStatus_t
*/
ccStatus_t ccIsGridGenetatorNeed(int32_t inputH, int32_t inputW, int32_t outputH, int32_t outputW, bool alignCorner,
bool &needGridFlag);
/**
* @ingroup dnn
* @brief get the temp space size of Deconvolution forward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] deconvDesc descriptor of Deconvolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of filter
* @param [in] yDesc descriptor of output tensor
* @param [in] algo algorithm of Deconvolution forward
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetDeconvolutionForwardWorkspaceSize(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc,
const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
const ccTensorDescriptor_t yDesc, ccConvolutionFwdAlgo_t algo,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief Deconvolution forward computation
* @param [in] handle cce handle
* @param [in] deconvDesc descriptor of deconvolution operator
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] wDesc descriptor of filter
* @param [in] w filter data in device memory
* @param [in] biasDesc descriptor of bias
* @param [in] bias bias data in device memory
* @param [in] algo algorithm of deconvolution forward
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccDeconvolutionForward(ccHandle_t handle, const ccConvolutionDescriptor_t deconvDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const ccFilterDescriptor_t wDesc,
const void *w, const ccTensorDescriptor_t biasDesc, const void *bias,
ccConvolutionFwdAlgo_t algo, void *workSpace, uint32_t workSpaceSizeInBytes,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
#define MODE_C_N (0)
#define MODE_N_C (1)
/**
* [ccArgMaxForward]
* @param [in] handle [handle]
* @param [in] alpha [reserved parameters]
* @param [in] xDesc [x tensor descriptor]
* @param [in] x [innput tensor]
* @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
* ]
* @param [in] topK [The number that returns the maximum index or maximum value]
* @param [in] axis [Describes which axis of the input Tensor to reduce across]
* @param [in] beta [reserved parameters]
* @param [in] yDesc [y tensor descriptor]
* @param [in] y [The max value index or max value tensor]
*/
ccStatus_t ccArgMaxForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
bool outMaxVal, uint32_t topK, int32_t axis, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* [ccGetArgMaxOutputDim]
* @param [in] xDesc [x tensor descriptor]
* @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
* ]
* @param [in] topK [The number that returns the maximum index or maximum value]
* @param [in] axis [Describes which axis of the input Tensor to reduce across]
* @param [in|out] dimCnt [point to the output dimCnt]
* @param [in|out] dim [arrays to save dims]
* @param [in| dimlen length of dim
*/
ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* [ccGetArgMaxOutputDim]
* @param [in] xDesc [x tensor descriptor]
* @param [in] outMaxVaule [Whether to return the maximum value, true: return max value; false: return max value index
* ]
* @param [in] topK [The number that returns the maximum index or maximum value]
* @param [in] axis [Describes which axis of the input Tensor to reduce across]
* @param [in] n [Batch number of the output tensor]
* @param [in] c [Channel of the output tensor]
* @param [in] h [Height number of the output tensor]
* @param [in] w [Weight number of the output tensor]
*/
ccStatus_t ccGetArgMaxOutputDim(const ccTensorDescriptor_t xDesc, bool outMaxVal, uint32_t topK, int32_t axis,
int32_t *n, int32_t *c, int32_t *h, int32_t *w);
/**
* @ingroup dnn
* @brief Yolo2ReorgForward computation
* @param [in] handle CCE handle
* @param [in] stride scale parameter
* @param [in] reverse reverse parameter
* @param [in] alpha alpha factor
* @param [in] beta beta factor
* @param [in] xDesc x-tensor descriptor
* @param [in] x x-tensor in device memory
* @param [out] workSpaceSizeInBytes temporary work sapce size
* @param [out] workSpace temporary work sapce in device memory
* @param [in] yDesc y-tensor descriptor
* @param [out] y y-tensor in device memory
* @return ccStatus_t
*/
ccStatus_t ccYolo2ReorgForward(ccHandle_t handle, int32_t stride, bool reverse, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, uint32_t workSpaceSizeInBytes,
void *workSpace, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @param [in] stride scale parameter
* @param [in] reverse reverse parameter
* @param [in] xDesc x-tensor descriptor
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
*/
ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *n, int32_t *c,
int32_t *h, int32_t *w);
/**
* @param [in] stride scale parameter
* @param [in] reverse reverse parameter
* @param [in] xDesc x-tensor descriptor
* @param [out] dimCnt output tensor dim cnt
* @param [out] dim output tensor dim
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetReorgOutPutDim(int32_t stride, bool reverse, const ccTensorDescriptor_t xDesc, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @param [in] xDesc x-tensor descriptor
* @param [out] temporary work sapce size
*/
ccStatus_t ccGetYolo2ReorgForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief full shuffle channel forward computation
* @param [in] handle cce handle
* @param [in] groupNum number of groups in a channal
* @param [in] subgroupNum number of sub-groups in a group
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccShuffleChannelForward(ccHandle_t handle, int32_t groupNum, int32_t subgroupNum, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief get the temp space size of permute forward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of output tensor
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetPermuteForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief get the output dim of permute forward computation
* @param [in] xDesc descriptor of input tensor
* @param [in] dimIndex dim Index
* @param [in|out] dimCnt dim count
* @param [in|out] dim dim value
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetPermuteOutputDim(const ccTensorDescriptor_t xDesc, const int32_t dimIndex[], const int32_t dimIndexLen,
int32_t *dimCnt, int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief full permute forward computation
* @param [in] handle cce handle
* @param [in] dimIndex dim Index,only support [0,1,2,3]
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @param [in] beta scaling factors
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccPermuteForward(ccHandle_t handle, const int32_t dimIndex[], const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, void *workspace,
uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief full split forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
* @param [in] num the number of outputs
* @param [in] beta scaling factors
* @param [in] yDescArr descriptors of output tensors
* @param [in|out] yArr output data array in device memory
* @return ccStatus_t
*/
ccStatus_t ccSplitForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
int32_t axis, uint32_t num, const void *beta, const ccTensorDescriptor_t yDescArr[],
void *yArr[]);
/**
* @ingroup dnn
* @brief get the output dimensions info of split
* @param [in] xDesc descriptor of input tensor
* @param [in] axis the dimension along which to split. Must be in the range [-xDesc->dimCnt, xDesc->dimCnt)
* @param [in] num the number of outputs
* @param [in] sizes Optional, used to specify the sizes of each output tensor along split dim. The tensor x would
* be split evenly along split dim if sizes is NULL
* @param [in|out] nArr point to the first element of batch sizes
* @param [in|out] cArr point to the first element of channels
* @param [in|out] hArr point to the first element of heights of feature map
* @param [in|out] wArr point to the first element of widths of feature map
* @return ccStatus_t
*/
ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
const uint32_t sizes[], uint32_t nArr[], uint32_t cArr[], uint32_t hArr[],
uint32_t wArr[]);
/**
* @ingroup dnn
* @brief Get split output shape(s).
* @param [in] xDesc input tensor, support ND and NC1HWC0
* @param [in] axis split axis, negtive axis will increased by dimCnt once time.
* @param [in] num splited nums.
* @param [in] sizes splited dim size on axis. if NULL was set, The input will be divided into num equally.
* @param [output] dimCnt splited dimCnt array. One to one correspondence with the splited output.
* @param [output] dim array of splited dim array. One to one correspondence with the splited output.
* @param [in| dimlen length of dim(Pass in the length of the entire space pointed to by dim,
not just the length of the dim array, because dim is a level 2 array
dimlen = lengthof dim[][], not just lengthof dim[])
* @return ccStatus_t
*/
ccStatus_t ccGetSplitForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, uint32_t num,
const uint32_t sizes[], int32_t *dimCnt, int32_t *dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief create weight compress info
* @param [in|out] compressInfo point to CompressInfo
* @return ccStatus_t
*/
ccStatus_t ccCreateWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
/**
* @ingroup dnn
* @brief destory weight compress info
* @param [in] *compressInfo point to CompressInfo
* @return ccStatus_t
*/
ccStatus_t ccDestroyWeightCompressInfo(ccWeightCompressInfo_t **compressInfo);
/**
* @ingroup dnn
* @brief create compress table
* @param [in|out] compressTab point to weight compress table
* @return ccStatus_t
*/
ccStatus_t ccCreateWeightCompressTab(ccWeightCompressTab_t **compressTab);
/**
* @ingroup dnn
* @brief destory compress table
* @param [in] compressTab point to weight compress table
* @return ccStatus_t
*/
ccStatus_t ccDestroyWeightCompressTab(ccWeightCompressTab_t **compressTab);
/**
* @ingroup dnn
* @brief get fc compress info
* @param [in] xDesc descriptor of input tensor
* @param [in] wDesc descriptor of weight tensor
* @param [in] biasDesc descriptor of bias tensor
* @param [in] dataTypeTransmode mode of data type transform
* @param [in] weightCompressInfo compress info, compute based on tiling method
* @param [in|out] outputSize output data size in byte
* @param [in|out] infoTabSize compress info table
* @return ccStatus_t
*/
ccStatus_t ccGetCompressedFcWeightInfo(const ccTensorDescriptor_t xDesc, const ccFilterDescriptor_t wDesc,
const ccTensorDescriptor_t biasDesc, ccDataTypeTransMode_t dataTypeTransmode,
ccWeightCompressInfo_t *weightCompressInfo, uint32_t *outputSize,
uint32_t *infoTabSize);
/**
* @ingroup dnn
* @brief compress fc
* @param [in] wDesc descriptor of weight tensor
* @param [in] w filter data in device memory
* @param [in] weightCompressInfo compress info, compute based on tiling method
* @param [in] dataTypeTransmode mode of data type transform
* @param [in|out] y output data in device memory
* @param [in] ySize transformed data size in byte
* @param [in|out] yCompressedSize compressed output data size in byte
* @param [in|out] infoTab compressed info table
* @param [in] infoTabSize compressed info table size in byte
* @return ccStatus_t
*/
ccStatus_t ccCompressWeight(const ccFilterDescriptor_t wDesc, const void *w,
const ccWeightCompressInfo_t *weightCompressInfo, ccDataTypeTransMode_t dataTypeTransmode,
ccFilterDescriptor_t yDesc, void *y, uint32_t ySize, uint32_t *yCompressedSize,
void *infoTab, uint32_t infoTabSize);
/**
* @ingroup dnn
* @brief restore compressed fc data
* @param [in] x input data in device memory
* @param [in] xSizeInBytes input compressed weight data size in byte
* @param [in|out] y output data in device memory
* @param [in] ySizeInBytes output data size in byte
* @return ccStatus_t
*/
ccStatus_t ccRestoreCompressedWeight(const void *x, uint32_t xSizeInBytes, void *y, uint32_t ySizeInBytes,
rtMemcpyKind_t kind);
/**
* @ingroup dnn
* @brief create quantize parameters struct
* @param [in|out] quantizeInfo descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccCreateQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
/**
* @ingroup dnn
* @brief destroy quantize parameters struct
* @param [in] quantizeInfo descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccDestoryQuantizeInfoTab(ccQuantizeDescriptor_t *quantizeInfo);
/**
* @ingroup dnn
* @brief set quantize parameters
* @param [in] quantizeInfo descriptor of quantize parameters
* @param [in] scaleValMode enmu type for quantize scale value type (normal or sqrt)
* @param [in] scale quantize scale value
* @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be
* configed)
* @param [in] offsetPad padding value for load3d (only for half offset or full offset)
* @return ccStatus_t
*/
ccStatus_t ccSetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
const uint16_t *scale, const uint16_t *offset, const uint8_t *offsetPad);
/**
* @ingroup dnn
* @brief set Requantize parameters
* @param [in] quantizeInfo descriptor of quantize parameters
* @param [in] scaleValMode enmu type for requantize scale value type (normal or sqrt)
* @param [in] scale quantize scale value
* @param [in] offset quantize offset(when quantize algorithm is half offset or full offset,this should be
* configed)
* @param [in] offsetw offset for filter (only config for full offset quantize)
* @return ccStatus_t
*/
ccStatus_t ccSetReQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
const uint16_t *scaleRq, const uint16_t *nextLayerOffset, const int32_t *offsetw);
/**
* @ingroup dnn
* @brief set Dequantize parameters
* @param [in] quantizeInfo descriptor of quantize parameters
* @param [in] scaleValMode enmu type for dequantize scale value type (normal or sqrt)
* @param [in] scaleDq quantize scale value
* @param [in] offsetw offset for filter (only config for full offset quantize)
* @return ccStatus_t
*/
ccStatus_t ccSetDeQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, ccScaleValueMode_t scaleValMode,
const uint16_t *scaleDq, const int32_t *offsetw);
/**
* @ingroup dnn
* @brief set convolution desciptor's quantize parameters
* @param [in] convDesc convolution descriptor
* @param [in] quantizeInfo descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionQuantizeInfo(ccConvolutionDescriptor_t convDesc, const ccQuantizeDescriptor_t QuantizeInfo);
/**
* @ingroup dnn
* @brief set convolution desciptor's all offset quantize parameters
* @param [in] convDesc convolution descriptor
* @param [in] offsetw descriptor of quantize parameters
* @param [in] scaleReq descriptor of quantize parameters
* @param [in] offset_d_next descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccSetAllOffsetQuantizeFactors(ccQuantizeDescriptor_t quantizeInfo, const uint8_t *offsetW,
const uint8_t *offsetD, const uint16_t *scaleReq, const uint16_t *offsetDNext);
/**
* @ingroup dnn
* @brief set full connection desciptor's quantize parameters
* @param [in] fcDesc full connection descriptor
* @param [in] quantizeInfo descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccSetFullConnectionQuantizeInfo(ccFullConnectionDescriptor_t fcDesc,
const ccQuantizeDescriptor_t QuantizeInfo);
/**
* @ingroup dnn
* @brief set pooling desciptor's quantize parameters
* @param [in] poolingDesc pooling descriptor
* @param [in] quantizeInfo descriptor of quantize parameters
* @return ccStatus_t
*/
ccStatus_t ccSetPoolingQuantizeInfo(ccPoolingDescriptor_t poolingDesc, const ccQuantizeDescriptor_t QuantizeInfo);
/**
* @ingroup dnn
* @brief set full connection desciptor's info table
* @param [in] fcDesc full connection descriptor
* @param [in] infoTabSize table size
* @param [in] infoTab pointer to info table
* @return ccStatus_t
*/
ccStatus_t ccSetFullConnectionDescriptor(ccFullConnectionDescriptor_t fcDesc, uint32_t infoTabSize, const void *infoTab,
ccFullConnectFwdAlgo_t algo = CC_FULLCONNECT_FWD_ALGO_HALF);
/**
* @ingroup dnn
* @brief set full connection desciptor's relu flag
* @param [in] fcDesc full connection descriptor
* @param [in] opType operation type for append at convolution operation
* @param [in] opDesc operation descritpor for the opType
* @return ccStatus_t
*/
ccStatus_t ccFullConnectionAppendOp(ccFullConnectionDescriptor_t fcDesc, tagCcOpType opType, const void *opDesc);
/**
* @ingroup dnn
* @brief check aipp basic info
* @param [in] inputFormat format of input image
* @param [in] loadStartPosH vertical start position in source image
* @param [in] loadStartPosW horizontal start position in source image
* @param [in] srcImageSizeH vertical size of source image
* @param [in] srcImageSizeW horizontal size of source image
* @param [in] cpaddingValue C direction padding value
* @param [in] cscSwitch csc enable or not
* @param [in] rbuvSwapSwitch swap R/U and B/V position of the image
* @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV
* @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
* not useful.
* @return ccStatus_t
*/
ccStatus_t ccCheckConvolutionAippCommInfo(ccAippInputFormat_t inputFormat, int32_t loadStartPosW, int32_t loadStartPosH,
int32_t srcImageSizeW, int32_t srcImageSizeH, float cpaddingValue,
bool cscSwitch, bool rbuvSwapSwitch, bool axSwapSwitch, bool singleLineMode);
/**
* @ingroup dnn
* @brief check aipp dtc info
* @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x
* @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x
* @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x
* @return ccStatus_t
*/
ccStatus_t ccCheckConvolutionAippDtcInfo(int32_t dtcPixelMeanChn0, int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2,
float dtcPixelMinChn0, float dtcPixelMinChn1, float dtcPixelMinChn2,
float dtcPixelVarReciChn0, float dtcPixelVarReciChn1,
float dtcPixelVarReciChn2);
/**
* @ingroup dnn
* @brief check aipp pad info
* @param [in] paddingMode padding mode
* @param [in] leftPaddingSize left hblank/padding size
* @param [in] rightPaddingSize right hblank/padding size
* @param [in] topPaddingSize top padding size
* @param [in] bottomPaddingSize bottom padding size
* @return ccStatus_t
*/
ccStatus_t ccCheckConvolutionAippPadInfo(ccAippPaddingMode_t paddingMode, int32_t leftPaddingSize,
int32_t rightPaddingSize, int32_t topPaddingSize, int32_t bottomPaddingSize);
/**
* @ingroup dnn
* @brief check aipp csc info
* @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
* @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m
* @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m
* @return ccStatus_t
*/
ccStatus_t ccCheckConvolutionAippCscInfo(int32_t cscMatrixR0C0, int32_t cscMatrixR0C1, int32_t cscMatrixR0C2,
int32_t cscMatrixR1C0, int32_t cscMatrixR1C1, int32_t cscMatrixR1C2,
int32_t cscMatrixR2C0, int32_t cscMatrixR2C1, int32_t cscMatrixR2C2,
int32_t cscOutputBias0, int32_t cscOutputBias1, int32_t cscOutputBias2,
int32_t cscInputBias0, int32_t cscInputBias1, int32_t cscInputBias2);
/**
* @ingroup dnn
* @brief check aipp scf info
* @param [in] scfSwitch scaling enable or not
* @param [in] scfInputW input width of scaling
* @param [in] scfInputH input height of scaling
* @param [in] scfOutputW output width of scaling
* @param [in] scfOutputH output height of scaling
* @return ccStatus_t
*/
ccStatus_t ccCheckConvolutionAippScfInfo(bool scfSwitch, int32_t scfInputW, int32_t scfInputH, int32_t scfOutputW,
int32_t scfOutputH);
/**
* @ingroup dnn
* @brief check aipp param
* @param [in] convDesc descriptor of conv operator
* @param [in] xDesc input tensor info
* @param [in] yDesc output tensor info
* @return ccStatus_t
*/
ccStatus_t ccCheckConvFwdAippParam(const ccConvolutionDescriptor_t convDesc, const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t yDesc);
/**
* @ingroup dnn
* @brief init aipp basic info
* @param [in|out] convDesc descriptor of conv operator
* @param [in] inputFormat format of input image
* @param [in] loadStartPosH vertical start position in source image
* @param [in] loadStartPosW horizontal start position in source image
* @param [in] srcImageSizeH vertical size of source image
* @param [in] srcImageSizeW horizontal size of source image
* @param [in] cpaddingValue C direction padding value
* @param [in] cscSwitch csc enable or not
* @param [in] rbuvSwapSwitch swap R/U and B/V position of the image
* @param [in] axSwapSwitch swap RGBA->ARGB, YUVA->AYUV
* @param [in] singleLineMode when set this bit to 1, only read 1 line. Under this case, vertical size configuration is
* not useful.
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionAippCommInfo(ccConvolutionDescriptor_t convDesc, ccAippInputFormat_t inputFormat,
int32_t loadStartPosW, int32_t loadStartPosH, int32_t srcImageSizeW,
int32_t srcImageSizeH, float cpaddingValue, bool cscSwitch, bool rbuvSwapSwitch,
bool axSwapSwitch, bool singleLineMode);
/**
* @ingroup dnn
* @brief init aipp dtc info
* @param [in|out] convDesc descriptor of conv operator
* @param [in] dtcPixelMeanChnx Mean value for YUV or RGB data channel x
* @param [in] dtcPixelMinChnx Min value for YUV or RGB data channel x
* @param [in] dtcPixelVarReciChnx Reciprocal of variance or (max-min) for YUV or RGB data channel x
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionAippDtcInfo(ccConvolutionDescriptor_t convDesc, int32_t dtcPixelMeanChn0,
int32_t dtcPixelMeanChn1, int32_t dtcPixelMeanChn2, float dtcPixelMinChn0,
float dtcPixelMinChn1, float dtcPixelMinChn2, float dtcPixelVarReciChn0,
float dtcPixelVarReciChn1, float dtcPixelVarReciChn2);
/**
* @ingroup dnn
* @brief init aipp pad info
* @param [in|out] convDesc descriptor of conv operator
* @param [in] paddingMode padding mode
* @param [in] leftPaddingSize left hblank/padding size
* @param [in] rightPaddingSize right hblank/padding size
* @param [in] topPaddingSize top padding size
* @param [in] bottomPaddingSize bottom padding size
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionAippPadInfo(ccConvolutionDescriptor_t convDesc, ccAippPaddingMode_t paddingMode,
int32_t leftPaddingSize, int32_t rightPaddingSize, int32_t topPaddingSize,
int32_t bottomPaddingSize);
/**
* @ingroup dnn
* @brief init aipp csc info
* @param [in|out] convDesc descriptor of conv operator
* @param [in] cscMatrixRmCn 3x3 CSC matrix for YUV to RGB or RGB to YUV, element of row m and column n
* @param [in] cscOutputBiasm output Bias for RGB to YUV, element of row m
* @param [in] cscInputBiasm input Bias for YUV to RGB, element of row m
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionAippCscInfo(ccConvolutionDescriptor_t convDesc, int32_t cscMatrixR0C0, int32_t cscMatrixR0C1,
int32_t cscMatrixR0C2, int32_t cscMatrixR1C0, int32_t cscMatrixR1C1,
int32_t cscMatrixR1C2, int32_t cscMatrixR2C0, int32_t cscMatrixR2C1,
int32_t cscMatrixR2C2, int32_t cscOutputBias0, int32_t cscOutputBias1,
int32_t cscOutputBias2, int32_t cscInputBias0, int32_t cscInputBias1,
int32_t cscInputBias2);
/**
* @ingroup dnn
* @brief init aipp scf info
* @param [in|out] convDesc descriptor of conv operator
* @param [in] scfSwitch scaling enable or not
* @param [in] scfInputW input width of scaling
* @param [in] scfInputH input height of scaling
* @param [in] scfOutputW output width of scaling
* @param [in] scfOutputH output height of scaling
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionAippScfInfo(ccConvolutionDescriptor_t convDesc, bool scfSwitch, int32_t scfInputW,
int32_t scfInputH, int32_t scfOutputW, int32_t scfOutputH);
/**
* @ingroup dnn
* @brief set dynamic aipp parameter address and enflag info
* @param [in|out] convDesc descriptor of conv operator
* @param [in] dyncParaAddr aipp parameter address
* @param [in] dyncAippFlag flag to show whether to use dynamic aipp
* @return ccStatus_t
*/
ccStatus_t ccSetConvolutionAippDyncParaAddr(ccConvolutionDescriptor_t convDesc, const void *dyncParaAddr,
bool dyncAippFlag, bool rotationFlag = false);
/**
* @ingroup dnn
* @brief check dynamic aipp parameter
* @param [in] dyncParaAddr aipp parameter address
* @param [in] dataLength parameter lenght
* @param [in] convolutionDimW convDimW
* @param [in] convolutionDimH convDimH
* @return ccStatus_t
*/
ccStatus_t ccCheckDynamicAippParam(const void *dynamicParamAddr, uint32_t dataLength, int64_t convolutionDimW,
int64_t convolutionDimH);
/*** @ingroup dnn
* @brief trans mean and var
* @param [in|out] mean' = bnScale/sqrt(var)
* @param [in|out] var' = -bnScale * mean / sqrt(var) + bnBias
* @return ccStatus_t
*/
ccStatus_t ccTransBatchnormMeanAndVar(void *mean, void *var, const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
const void *alpha, const void *beta, void *bnScale, void *bnBias, double epsilon);
/**
* @ingroup dnn
* @brief init deconvolution adj or targetShape info.
* @param [in] convDesc conv descriptor.
* @param [in] adjH, adjust H output.
* @param [in] adjW, adjust W output.
* @param [in] targetShape, values of output shape, if this pointer was set, ignore adj.
* @return ccStatus_t
*/
ccStatus_t ccSetDeconvolutionOutShapeInfo(ccConvolutionDescriptor_t convDesc, uint32_t adjSize, const uint32_t *adj,
uint32_t targetShapeSize, const uint32_t *targetShape);
/**
* @ingroup dnn
* @brief gather elements according to the indices.
* @param [in] alpha reserved.
* @param [in] xDesc description of the tensor from which to gather elements.
* @param [in] x data point of the tensor from which to gather elements.
* @param [in] indicesDesc description of the tensor of indices.
* @param [in] indices data point of the tensor of indices.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccGatherNdForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t indicesDesc, const void *indices, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of gather_nd.
* @param [in] xDesc description of the tensor from which to gather elements.
* @param [in] indicesDesc description of the tensor of indices.
* @param [output] n dim-size of n-dim.
* @param [output] c dim-size of c-dim.
* @param [output] h dim-size of h-dim.
* @param [output] w dim-size of w-dim.
* @param [output] realDimCnt real dim.
* @return ccStatus_t
*/
ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc, int32_t *n,
int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetGatherNdOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t indicesDesc,
int32_t *dimCnt, int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief tile tensor by multiples.
* @param [in] alpha reserved.
* @param [in] xDesc description of the tensor which to be tiled.
* @param [in] x data point of the tensor which to be tiled.
* @param [in] multiples tile coefficient of each dim.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccTileForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccIntArray_t *multiples, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief get output shape of tile.
* @param [in] xDesc description of the dividend tensor.
* @param [in] multiples multiples of each dim.
* @param [in|out] dimCnt [point to the output dimCnt]
* @param [in|out] dim [arrays to save dims]
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *multiples, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief get output shape of tile.
* @param [in] xDesc description of the dividend tensor.
* @param [in] multiples multiples of each dim.
* @param [output] n dim-size of n-dim.
* @param [output] c dim-size of c-dim.
* @param [output] h dim-size of h-dim.
* @param [output] w dim-size of w-dim.
* @param [output] realDimCnt real dim.
* @return ccStatus_t
*/
ccStatus_t ccGetTileOutputDim(const ccTensorDescriptor_t xDesc,
// const ccIntArrayDescriptor_t multiples,
const ccIntArray_t *multiples, int32_t *n, int32_t *c, int32_t *h, int32_t *w,
int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief realdiv between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the dividend tensor.
* @param [in] x data point of the dividend tensor.
* @param [in] yDesc description of the divisor tensor.
* @param [in] y data point of the divisor tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccRealdivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the dividend tensor.
* @param [in] yDesc description of the divisor tensor.
* @param [output] n dim-size of n-dim.
* @param [output] c dim-size of c-dim.
* @param [output] h dim-size of h-dim.
* @param [output] w dim-size of w-dim.
* @param [output] realDimCnt real dim.
* @return ccStatus_t
*/
ccStatus_t ccGetRealdivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *n,
int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief realdiv between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccFloordivForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] realDimCnt real dim.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetFloordivOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief realdiv between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccGreaterForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetGreaterOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief realdiv between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccLessForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetLessOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief get output shape of LogicalOr.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetLogicalOrOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief get output shape of LogicalXor.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in] dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetLogicalXorOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief sqrt forward:
* data type only support bool
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccLogicalNotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief equal between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief dump data during inference, only for eng ver.
* @param [in] handle cce handle
* @return ccStatus_t
*/
ccStatus_t ccDataDumpForward(ccHandle_t handle, const void *buffer, const uint64_t bufLen, const uint32_t taskIndex);
/**
* @ingroup dnn
* @brief logicaland between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccLogicalAndForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief logical or between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccLogicalOrForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief logical Xor between two tensors(x ^ y = (x | y) & ~(x & y).
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccLogicalXorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of equal.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief get output shape of logicaland.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetLogicalAndOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief realdiv between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccFloormodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetFloormodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief compare between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccCompareForward(ccHandle_t handle, ccCompareType_t compareType, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
const void *y, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [output] dimCnt dim nums.
* @param [output] dim dim size.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetCompareOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief create descriptor of FillParam
* @param [in|out] fillParamDesc point to descriptor of fill param
* @return ccStatus_t
*/
ccStatus_t ccCreateFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of FillParam
* @param [in] *fillParamDesc point to descriptor of fill param
* @return ccStatus_t
*/
ccStatus_t ccDestroyFillParamDescriptor(ccFillParamDescriptor_t *fillParamDesc);
/**
* @ingroup dnn
* @brief get output shape of broadcat operations.
* @param [in] inputNum input number of the operation tensors.
* @param [in] xDesc[] description of the input operation tensors list.
* @param [output] dimCnt dim-size of output tensor.
* @param [output] dim dim of output tensor.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetMultiNdBroadcastOpOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief get output shape of maximultitensor.
* @param [in] inputNum the num of input operator tensors.
* @param [in] xDesc[] description of the input operator tensors list.
* @param [output] dimCnt dim count of output tensor.
* @param [output] dim array of output tensor.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetMaxMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief get output shape of minmultitensor.
* @param [in] inputNum the num of input operator tensors.
* @param [in] xDesc[] description of the input operator tensors list.
* @param [output] dimCnt dim count of output tensor.
* @param [output] dim array of output tensor.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetMinMultitensorOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[], int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief MaxMultitensor forward:
* data type only support float float16 and int32
* data format only support ND
* @param [in] handle cce handle
* @param [in] inputNum input tensor number
* @param [in] alpha common scale factor
* @param [in] xDesc[] descriptor of input tensors list
* @param [in] x[] input data in device memory list
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccMaxMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief MinMultitensor forward:
* data type only support float float16 and int32
* data format only support ND
* @param [in] handle cce handle
* @param [in] inputNum input tensor number
* @param [in] alpha common scale factor
* @param [in] xDesc[] descriptor of input data list
* @param [in] x[] input data in device memory list
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccMinMultitensorForward(const ccHandle_t handle, const int32_t inputNum, const void *alpha,
const ccTensorDescriptor_t xDesc[], const void *x[], const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief create descriptor of StridedSlice
* @param [in|out] stridedSliceDesc point to descriptor of StridedSlice param
* @return ccStatus_t
*/
ccStatus_t ccCreateStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of StridedSlice
* @param [in] *stridedSliceDesc point to descriptor of StridedSlice param
* @return ccStatus_t
*/
ccStatus_t ccDestroyStridedSliceDescriptor(ccStridedSliceDescriptor_t *stridedSliceDesc);
/**
* @ingroup dnn
* @brief init stridedSlice descriptor_t.
* @param [out] stridedSliceDesc struct of stridedslice param
* @param [in] dimCnt dimension of the input tensor
* @param [in] begin slice begin(include)
* @param [in] end slice end index(not include)
* @param [in] strides slice stride
* @return ccStatus_t
*/
ccStatus_t ccSetStridedSliceDescriptor(ccStridedSliceDescriptor_t stridedSliceDesc, int32_t dimCnt, int32_t begin[],
int32_t end[], int32_t strides[]);
/**
* @ingroup dnn
* @brief create descriptor of StridedSlice
* @param [in|out] stridedSliceDesc point to descriptor of StridedSlice attr
* @return ccStatus_t
*/
ccStatus_t ccCreateStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of StridedSlice
* @param [in] *stridedSliceDesc point to descriptor of StridedSlice attr
* @return ccStatus_t
*/
ccStatus_t ccDestroyStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t *attrDesc);
/**
* @ingroup dnn
* @brief init stridedSlice mask attrs desescriptor.
* @param [out] attrDesc struct of stridedslice mask attrs
* @param [in] beginMask begin mask
* @param [in] endMask end mask
* @param [in] ellipsisMask ellipsis mask
* @param [in] newAxisMask new axis mask
* @param [in] shrinkAxisMask shrink axis mask
* @return ccStatus_t
*/
ccStatus_t ccSetStridedSliceAttrsDescriptor(ccStridedSliceAttrsDescriptor_t attrDesc, int32_t beginMask,
int32_t endMask, int32_t ellipsisMask, int32_t newAxisMask,
int32_t shrinkAxisMask);
/**
* @ingroup dnn
* @brief Extracts a strided slice of a tensor.
* @param [in] xDesc descriptor of input data
* @param [in] stridedSliceDesc specifies the begin, end, strides of slice
* @param [in] attrDesc reserve for optional attributes.
* @param [out] n point to n size
* @param [out] c point to c size
* @param [out] h point to h size
* @param [out] w point to w size
* @return ccStatus_t
*/
ccStatus_t ccGetStridedSliceOutputDim(const ccTensorDescriptor_t xDesc,
const ccStridedSliceDescriptor_t stridedSliceDesc,
const ccStridedSliceAttrsDescriptor_t attrDesc, int32_t *n, int32_t *c,
int32_t *h, int32_t *w, int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief Extracts a strided slice of a tensor.
* @param [in] handle cce handle
* @param [in] stridedSliceDesc specifies the begin, end, strides of slice
* @param [in] attrDesc reserve for optional attributes.
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] yDesc descriptor of output data
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccStridedSliceForward(ccHandle_t handle, const ccStridedSliceDescriptor_t stridedSliceDesc,
const ccStridedSliceAttrsDescriptor_t attrDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @
* @brief get out put descrition of slice tensor.
* @param [in] xDesc descriptor of input data
* @param [in] begin begin position of tensor
* @param [in] size size to slice
* @param [out] n point to n size
* @param [out] c point to c size
* @param [out] h point to h size
* @param [out] w point to w size
* @param [out] realDimCnt realdim count
* @return ccStatus_t
*/
ccStatus_t ccGetSliceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *begin, const ccIntArray_t *size,
int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief slice of a tensor.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] begin begin position of tensor
* @param [in] size size to slice
* @param [in] beta common scale factor
* @param [in] yDesc descriptor of output data
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSliceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccIntArray_t *begin, const ccIntArray_t *size, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief gather forward computation
* @param [in] handle cce handle
* @param [in] paramsDesc descriptor of params tensor
* @param [in] params input data in device memory
* @param [in] indicesDesc descriptor of indices tensor
* @param [in] indices indices data in device memory
* @param [in] axis descriptor of roi tensor
* @param [in] alpha reserved
* @param [in] beta reserved
* @param [in] outputDesc descriptor of output tensor
* @param [out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccGatherForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc,
const void *params, const ccTensorDescriptor_t indicesDesc, const void *indices,
const int32_t axis, const void *beta, ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief gather output dim computation, for NC1HWC0
* @param [in] paramsDesc descriptor of params tensor
* @param [in] indicesDesc descriptor of indices tensor
* @param [in] axis descriptor of roi tensor
* @param [out] n dim of n
* @param [out] c dim of c
* @param [out] h dim of h
* @param [out] w dim of w
* @param [out] realDimCnt real dim count
* @return ccStatus_t
*/
ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
int32_t axis, int32_t *n, int32_t *c, int32_t *h, int32_t *w, int32_t *realDimCnt);
/**
* @ingroup dnn
* @brief gather output dim computation
* @param [in] paramsDesc descriptor of params tensor
* @param [in] indicesDesc descriptor of indices tensor
* @param [in] axis descriptor of roi tensor
* @param [out] dimCnt dimcnt of output
* @param [out] dim dim of output
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetGatherOutputDim(const ccTensorDescriptor_t paramsDesc, const ccTensorDescriptor_t indicesDesc,
int32_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief exp forward computation
* @param [in] handle cce handle
* @param [in] expDesc descriptor of expParam
* @param [in] expParam a ternary array
* @param [in] alpha reserved parameter
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta reserved parameter
* @param [in] yDesc descriptor of output tensor
* @param [out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccExpForward(ccHandle_t handle, const ccExpDescriptor_t expDesc, const void *expParam, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief expm1 forward:
* data type only support float float16 and double
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccExpm1Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief log1p forward:
* data type only support float float16 and double
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccLog1pForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief init descriptor for parameter of exp function
* @param [in|out] powDesc descriptor of tensor
* @param [in] dataType data type in device
* @param [in] paramCnt number of parameters
* @return ccStatus_t
*/
ccStatus_t ccSetExpDescriptor(ccExpDescriptor_t expDesc, ccDataType_t dataType, uint32_t paramCnt);
/**
* @ingroup dnn
* @brief exp forward computation
* @param [in] handle cce handle
* @param [in] logDesc descriptor of logParam
* @param [in] logParam a ternary array
* @param [in] alpha reserved parameter
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta reserved parameter
* @param [in] yDesc descriptor of output tensor
* @param [in] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccLogForward(ccHandle_t handle, const ccLogDescriptor_t logDesc, const void *logParam, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief init descriptor for parameter of log function
* @param [in|out] logDesc descriptor of tensor
* @param [in] dataType data type in device
* @param [in] paramCnt number of parameters
* @return ccStatus_t
*/
ccStatus_t ccSetLogDescriptor(ccLogDescriptor_t logDesc, ccDataType_t dataType, uint32_t paramCnt);
/**
* @ingroup dnn
* @brief pow forward computation
* @param [in] handle cce handle
* @param [in] powDesc descriptor of logParam
* @param [in] powParam a ternary array
* @param [in] alpha reserved parameter
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta reserved parameter
* @param [in] yDesc descriptor of input tensor
* @param [in] y input data in device memory
* @param [in] zDesc descriptor of output tensor
* @param [out] z output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccPowForward(ccHandle_t handle, const ccPowDescriptor_t powDesc, const void *powParam, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc,
const void *y, const void *beta, const ccTensorDescriptor_t zDesc, void *z);
/**
* @brief init descriptor for parameter of pow function
* @param [in|out] powDesc descriptor of tensor
* @param [in] dataType data type in device
* @param [in] paramCnt number of parameters
* @return ccStatus_t
*/
ccStatus_t ccSetPowDescriptor(ccPowDescriptor_t powDesc, ccDataType_t dataType, uint32_t paramCnt);
/**
* @ingroup dnn
* @brief non max suppression forward.
* @param [in] handle cce handle
* @param [in] nonmaxParaDesc descriptor of para
* @param [in] nonmaxPara input para in host memory
* @param [in] maxoutputsizex input para in host memory
* @param [in] alpha common scale factor
* @param [in] boxesDesc descriptor of input data boxesDesc
* @param [in] boxes input data boxes in device memory
* @param [in] scoresDesc descriptor of input data boxesDesc
* @param [in] scores input data scores in device memory
* @param [in] workSpaceSizeInBytes workspace size
* @param [in] workSpace input workspace in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccNonMaxSuppressionForward(ccHandle_t handle, const ccNonMaxSuppressionDescriptor_t nonmaxParaDesc,
const void *nonmaxPara, const int *maxoutputsize, const void *alpha,
const ccTensorDescriptor_t boxesDesc, const void *boxes,
const ccTensorDescriptor_t scoresDesc, const void *scores,
const uint32_t workSpaceSizeInBytes, void *workSpace, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @brief init descriptor for parameter of NonMaxSuppression function
* @param [in|out] powDesc descriptor of tensor
* @param [in] dataType data type in device
* @param [in] paramCnt number of parameters
* @return ccStatus_t
*/
ccStatus_t ccSetNonMaxSuppressionDescriptor(ccNonMaxSuppressionDescriptor_t nonMaxSuppressionDesc,
ccDataType_t dataType, uint32_t paramCnt);
/**
* @ingroup dnn
* @brief get the output dimension info of resizeBilinear op.
* @param [in] xDesc descriptor of input data
* @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
* @param [out] dimCnt
* @param [out] dim[] dim of output
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetResizeBilinearOutputDim(const ccTensorDescriptor_t xDesc,
const ccResizeBilinearDescriptor_t resizeBilinearDesc, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief get the output dimension info of interp op.
* @param [in] xDesc descriptor of input data
* @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
* @param [out] dimCnt
* @param [out] dim[] dim of output
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetInterpOutputDim(const ccTensorDescriptor_t xDesc, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief resize bilinear forward for t network.
* @param [in] handle cce handle
* @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] yDesc descriptor of output data
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccResizeBilinearForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc,
const void *alpha, const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief resize bilinear forward for c network.
* @param [in] handle cce handle
* @param [in] resizeBilinearDesc descriptor of resize_bilinear operator
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] yDesc descriptor of output data
* @param [in|out] y output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccInterpForward(ccHandle_t handle, const ccResizeBilinearDescriptor_t resizeBilinearDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief create descriptor of ResizeBilinear
* @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr
* @return ccStatus_t
*/
ccStatus_t ccCreateResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of Interp
* @param [in|out] resizeBilinearDesc point to descriptor of resizeBilinear attr
* @return ccStatus_t
*/
ccStatus_t ccDestroyResizeBilinearDescriptor(ccResizeBilinearDescriptor_t *resizeBilinearDesc);
/**
* @ingroup dnn
* @brief set descriptor of resizeBilinear.
* @param [in|out] resizeBilinearDesc descriptor of resize_bilinear operator
* @param [in] resizeOutputDimMode way to decide output dimensions
* @param [in] alignCorners whether the centers of input and output are aligned
* @param [in] zoom_factor zoom factor
* @param [in] shrink_factor shrink factor
* @param [in] height height of output
* @param [in] width width of output
* @param [in] pad_begin padding at begin of input
* @param [in] pad_end padding at end of input
* @return ccStatus_t
*/
ccStatus_t ccSetResizeBilinearDescriptor(ccResizeBilinearDescriptor_t resizeBilinearDesc,
ccResizeOutputDimMode_t resizeOutputDimMode, bool alignCorners,
int32_t zoom_factor, int32_t shrink_factor, int32_t height, int32_t width,
int32_t pad_begin, int32_t pad_end);
/**
* @ingroup dnn
* @brief fill forward computation
* @param [in] handle cce handle
* @param [in] fillParamDesc descriptor of fill parameter
* @param [in] alpha reserved
* @param [in] givenDesc descriptor of given tensor
* @param [in] givenData given data in device memory
* @param [in] workspace space for fill algorithm
* @param [in] workSpaceSizeInBytes space size in byte
* @param [in] beta reserved
* @param [in] outputDesc descriptor of output tensor
* @param [out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccFillForward(ccHandle_t handle, const ccFillParamDescriptor_t fillParamDesc, const void *alpha,
const ccTensorDescriptor_t givenDesc, const void *givenData, const void *workspace,
const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
*[ccGetFillWorkspaceSize]
*@param fillType [fill type]
*@param givenDesc [given tensor descriptor]
*@param xDesc [input tensor descriptor]
*@param sizeInBytes [output size]
*@return ccStatus_t [status]
*/
ccStatus_t ccGetFillWorkspaceSize(const ccFillOpType_t fillType, const ccTensorDescriptor_t xDesc,
uint32_t *sizeInBytes);
/**
*[ccCast]
*@param handle [cce handler]
*@param alpha [alpha]
*@param xDesc [tensor Description of tensor x]
*@param x [input tensor x]
*@param beta [beta
*@param yDesc [tensor Description of tensor y]
*@param y [output tensor y]
*@return ccStatus_t [status]
*/
ccStatus_t ccCast(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief round forward:
* data type only support float float16 and int32
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccRoundForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief rint forward:
* data type only support float float16
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccRintForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief sqrt forward:
* data type only support float float16
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSqrtForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
*[ccCast]
*@param filterSrcInfo [cce filtersrc descriptor]
*@param filterSrc [filterSrc address]
*@param filterDstInfo [cce filterdst descriptor]
*@param filterDst [filterdst address]
*@param group [group]
*@param ySizeInBytes [fraczfilter size]
*@param outputDataType [datatype]
*@return ccStatus_t [status]
*/
ccStatus_t ccTransGroupConvFilterInt8(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
ccFilterDescriptor_t filterDstInfo, void *filterDst, uint32_t group,
uint32_t ySizeInBytes, ccDataType_t outputDataType);
/**
*[ccGetConcatOutputDim]
*@param xDesc[] [input tensor descriptor]
*@param axis [concat axis]
*@param inputNum [input tensor numbers]
*@param dim[] [output dim]
*@param [in| dimlen length of dim
*@return ccStatus_t [status]
*/
ccStatus_t ccGetConcatOutputDim(const ccTensorDescriptor_t xDesc[], int32_t axis, int32_t inputNum, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief get the output dimension info of reduce.
* @param [in] xDesc descriptor of input tensor
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetReduceOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *axis, bool keepDims,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief reduce sum forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceSumForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reduce max forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceMaxForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reduce min forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceMinForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reduce mean forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceMeanForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reduce prod forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceProdForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reduce all forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceAllForward(ccHandle_t handle, const ccIntArray_t *axis, bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
*@brief print times stats
*@return ccStatus_t [status]
*/
ccStatus_t ccPrintTimeStat();
/**
* @ingroup dnn
* @brief reduce abs sum forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceAbsSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reduce square sum forward computation
* @param [in] handle cce handle
* @param [in] axis The dimensions to reduce
* @param [in] keepDims If true, retains reduced dimensions with length 1.
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReduceSquareSumForward(ccHandle_t handle, const ccIntArray_t *axis, const bool keepDims, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of crop and resize
* @param [in] imageDesc descriptor of images
* @param [in] boxesDesc descriptor of boxes
* @param [in] boxidxDesc descriptor of boxidx
* @param [in] resizeHeight resize height
* @param [in] resizeWidth resize width
* @param [out] dimCnt dimcnt of output
* @param [out] dim dim of output
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetCropAndResizeOutputDim(const ccTensorDescriptor_t imageDesc, const ccTensorDescriptor_t boxesDesc,
const ccTensorDescriptor_t boxidxDesc, const int32_t resizeHeight,
const int32_t resizeWidth, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief crop and resize forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] imageDesc descriptor of images
* @param [in] image input data in device memory
* @param [in] boxesDesc descriptor of boxes
* @param [in] boxes input data in device memory
* @param [in] boxidxDesc descriptor of boxidx
* @param [in] boxidx input data in device memory
* @param [in] method enum of resize method
* @param [in] extrapolationValue Value used for extrapolation, when applicable
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccCropAndResizeForward(ccHandle_t handle, const ccResizeMethod_t method, const float extrapolationValue,
const void *alpha, const ccTensorDescriptor_t imageDesc, const void *image,
const ccTensorDescriptor_t boxesDesc, const void *boxes,
const ccTensorDescriptor_t boxidxDesc, const void *boxidx, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief select forward computation
* @param [in] handle cce handle
* @param [in] alpha reserved
* @param [in] condDesc descriptor of cond tensor
* @param [in] cond cond data in device memory
* @param [in] xDesc descriptor of x tensor
* @param [in] x x data in device memory
* @param [in] yDesc descriptor of y tensor
* @param [in] y y data in device memory
* @param [in] beta reserved
* @param [in] outputDesc descriptor of output tensor
* @param [out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSelect(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t condDesc, const void *cond,
const ccTensorDescriptor_t xDesc, const void *x, const ccTensorDescriptor_t yDesc, const void *y,
const void *beta, const ccTensorDescriptor_t outDesc, void *out);
/**
* @ingroup dnn
* @brief get the output dimension info of where
* @param [in] xDesc descriptor of input tensor
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @return ccStatus_t
*/
ccStatus_t ccGetWhereOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief where forward computation
* @param [in] handle cce handle
* @param [in] alpha reserved
* @param [in] condDesc descriptor of cond tensor
* @param [in] cond cond data in device memory
* @param [in] xDesc descriptor of x tensor
* @param [in] x x data in device memory
* @param [in] yDesc descriptor of y tensor
* @param [out] y y data in device memory
* @return ccStatus_t
*/
ccStatus_t ccWhere(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief reverse forward.
* @param [in] handle cce handle
* @param [in] axis dim that need reverse
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReverseForward(ccHandle_t handle, const ccIntArray_t *axis, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief floor forward:
* data type only support float float16
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccFloorForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief ceil forward:
* data type only support float float16
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccCeilForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of truncate mod
* @param [in] xDesc descriptor of input tensor
* @param [in] yDesc descriptor of input tensor
* @param [out] dimCnt [dim count of the output tensor]
* @param [out] dim[] [shape of the output tensor]
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetTruncatemodOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief truncate mod forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] yDesc descriptor of input tensor
* @param [in] y input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccTruncatemodForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief Spatial Pyramid Pooling
* @param [in] handle cce handle
* @param [in] alpha reserved
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] workspace temp workspace
* @param [in] workspaceSizeInBytes temp workspace size
* @param [in] pyramidHeight pyramid height
* @param [in] poolingMode pooling mode
* @param [in] beta reserved
* @param [in] outputDesc descriptor of output tensor
* @param [out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSPPForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
void *workspace, const uint32_t workspaceSizeInBytes, const uint32_t pyramidHeight,
const ccPoolingMode_t poolingMode, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief Get Spatial Pyramid Pooling output dim
* @param [in] xDesc descriptor of input tensor
* @param [in] pyramidHeight pyramid height
* @param [in] dimLen length of dim
* @param [out] dimCnt output tensor dim cnt
* @param [out] dim output tensor dim
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetSPPOutputDim(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight, int32_t *dimCnt,
int32_t dim[], const int32_t dimLen);
/**
* @ingroup dnn
* @brief Get Spatial Pyramid Pooling workspace size
* @param [in] xDesc descriptor of input tensor
* @param [in] pyramidHeight pyramid height
* @param [out] workspaceSizeInBytes workspace size
* @return ccStatus_t
*/
ccStatus_t ccGetSPPWorkspaceSize(const ccTensorDescriptor_t xDesc, const uint32_t pyramidHeight,
uint32_t *workspaceSizeInBytes);
/**
* @ingroup dnn
* @brief BNLL forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccBNLLForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief bias forward.
* @param [in] handle cce handle
* @param [in] axis axis
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data x
* @param [in] x input data x in device memory
* @param [in] biasDesc descriptor of input data bias
* @param [in] bias input data bias in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccBiasForward(ccHandle_t handle, const int axis, const void *alpha, const ccTensorDescriptor_t xDesc,
const void *x, const ccTensorDescriptor_t biasDesc, const void *bias, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief threshold forward computation
* @param [in] handle cce handle
* @param [in] threshold threshold
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccThresholdForward(ccHandle_t handle, const void *threshold, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief shufflechannel forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] group number of groups
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
// TODO AICPU: please add shufflechannel custom params and comment
ccStatus_t ccShuffleChannelForward(ccHandle_t handle, const void *alpha, uint32_t group,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief mvn forward.
* @param [in] handle cce handle
* @param [in] acrossChannel across channel. true: across, false: not
* @param [in] normalizeVariance normalizeVariance. true: normalizeVariance, false: not
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccMVNForward(ccHandle_t handle, bool acrossChannel, bool normalizeVariance, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, void *workSpace, uint32_t workSpaceSizeInBytes,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the workspace size of mvn
* @param [in] xDesc descriptor of input data
* @param [in] acrossChannel across channel. true: across, false: not
* @param [in|out] sizeInBytes Workspace size need for whole computation
*/
ccStatus_t ccGetMVNWorkspaceSize(const ccTensorDescriptor_t xDesc, bool acrossChannel, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief heatmap2coord forward output is hotspot value and corresponding coordinates
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] coordh calibration high
* @param [in] coordw calibration wide
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccHeatmap2coordForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
int32_t coordh, int32_t coordw, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of heatmap2coord
* @param [in] xDesc descriptor of input tensor
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetHeatmap2coordOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief swish forward.
* @param [in] handle cce handle
* @param [in] scale param of swish function, y = x / (1 + sigmoid(scale * x))
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSwishForward(ccHandle_t handle, const float scale, const void *alpha, const ccTensorDescriptor_t xDesc,
const void *x, const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
ccStatus_t ccTeForward(ccHandle_t handle, const void *stubFunc, uint32_t coreDim, const void *args, uint32_t argsSize,
const rtL2Ctrl_t *l2ctrl, int32_t inputNum, const ccTensorDescriptor_t xDesc[], const void *x[],
int32_t outputNum, const ccTensorDescriptor_t yDesc[], void *y[], bool isAiCore);
#ifndef DAVINCI_LITE
ccStatus_t ccAiCpuCustomizeForward(ccHandle_t handle, aicpu_run_func stubFunc, opTensor_t *xOpDesc[], void *x[],
int32_t inputNum, opTensor_t *yOpDesc[], void *y[], void *op_attr_handle,
int32_t outputNum, const ccTensorDescriptor_t xDesc[],
const ccTensorDescriptor_t yDesc[], const void *op_attr_str, uint32_t op_attr_size);
#endif
/**
* @ingroup dnn
* @brief embedding lookup forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data x
* @param [in] x input data x in device memory
* @param [in] idxDesc descriptor of input data idx
* @param [in] idx input data idx in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
const void *x, const ccTensorDescriptor_t idxDesc, const void *idx,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup
* @brief embedding lookup forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] inputNum inputNum
* @param [in] xDesc[] descriptor array of input data x
* @param [in] x[] input data x array in device memory
* @param [in] workSpace workSpace addr
* @param [in] workSpaceSizeInBytes workSpace size
* @param [in] idxDesc descriptor of input data idx
* @param [in] idx input data idx in device memory
* @param [in] partitionStrategy partitionStrategy
* @param [in] maxNorm addr of maxNorm
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccEmbeddingLookupForward(ccHandle_t handle, const void *alpha, const int32_t inputNum,
const ccTensorDescriptor_t xDesc[], const void *x[], void *workSpace,
const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t idxDesc,
const void *idx, ccPartitionStrategy_t partitionStrategy, const void *maxNorm,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
*[ccGetEmbeddingLookupOutputDim]
*@param inputNum [input tensor numbers]
*@param xDesc[] [input tensor descriptor]
*@param idxDesc [idx tensor descriptor]
*@param dimCnt [output dim count]
*@param dim[] [output dim]
*@param [in| dimlen length of dim
*@return ccStatus_t [status]
*/
ccStatus_t ccGetEmbeddingLookupOutputDim(const int32_t inputNum, const ccTensorDescriptor_t xDesc[],
const ccTensorDescriptor_t idxDesc, int32_t *dimCnt, int32_t dim[],
int32_t dimLen);
/**
* @ingroup dnn
*[ccGetEmbeddingLookupWorkspaceSize]
*@param inputNum [input tensor numbers]
*@param idxDesc [input tensor descriptor]
*@param isMaxNormExist [isMaxNormExist]
*@param sizeInBytes [output size]
*@return ccStatus_t [status]
*/
ccStatus_t ccGetEmbeddingLookupWorkspaceSize(const int32_t inputNum, const ccTensorDescriptor_t idxDesc,
const bool isMaxNormExist, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief check if it is the first layer of resnet50 and semecefc
* @param [in] tensorDesc descriptor of input tensor.
* @param [in] convDesc conv descriptor.
* @param [in] filterDesc descriptor of weight tensor.
* @return ccStatus_t
*/
ccStatus_t c04DescParamCheck(const ccTensorDescriptor_t tensorDesc, const ccConvolutionDescriptor_t convDesc,
const ccFilterDescriptor_t filterDesc);
#ifndef DAVINCI_LITE
/**
* @ingroup dnn
* @brief convolution forward computation
* @param [in] handle cce handle
* @param [in] convDesc descriptor of convolution operator
* @param [in] alpha scaling factors
* @param [in] beta scaling factors
* @param [in] xDesc x descriptor of input tensor
* @param [in] x x data in device memory
* @param [in] dyDesc descriptor of dy
* @param [in] dy dy data in device memory
* @param [in] dwDesc descriptor of dwDesc
* @param [out] dw dw data in device memory
* @param [in] algo algorithm of convolution forward
* @param [in] workSpace temp space, maybe NULL if no need temp space
* @param [in] workSpaceSizeInBytes sizeof workspace
* @return ccStatus_t
*/
ccStatus_t ccConvolutionBackwardFilter(ccHandle_t handle, const ccConvolutionDescriptor_t convDesc, void *alpha,
void *beta, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t dyDesc, const void *dy,
const ccFilterDescriptor_t dwDesc, void *dw, ccConvolutionBwdAlgo_t algo,
void *workSpace, uint32_t workSpaceSizeInBytes);
#endif
/**
* @ingroup dnn
* @brief get the temp space size of convolution forward computation, maybe no need temp space
* @param [in] handle cce handle
* @param [in] dyDesc descriptor of input tensor dy
* @param [in] convDesc descriptor of convolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in] dwDesc descriptor of filter
* @param [in] algo algorithm of convolution forward
* @param [in|out] sizeInBytes temp space size need for specified algorithm
* @return ccStatus_t
*/
ccStatus_t ccGetConvolutionBackwardFilterWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t dyDesc,
const ccConvolutionDescriptor_t convDesc,
const ccTensorDescriptor_t xDesc,
const ccFilterDescriptor_t dwDesc, ccConvolutionBwdAlgo_t algo,
uint32_t *sizeInBytes);
#ifndef DAVINCI_LITE
ccStatus_t ccBatchNormalizationBackward(ccHandle_t handle, ccBatchNormMode_t mode, const void *alphaDataDiff,
const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff,
const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t dyDesc, const void *dy,
const ccTensorDescriptor_t dxDesc, void *dx,
const ccTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale,
void *resultBnScaleDiff, void *resultBnBiasDiff, const void *workSpace,
const uint32_t workSpaceSizeInBytes, double epsilon, const void *SaveMean,
const void *SaveInvVariance);
#endif
ccStatus_t ccGetBatchNormalizationBackwardWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
ccTensorDescriptor_t xDesc, ccTensorDescriptor_t dyDesc,
ccTensorDescriptor_t dxDesc,
ccTensorDescriptor_t bnScaleBiasDesc, uint32_t *sizeInBytes);
#ifndef DAVINCI_LITE
ccStatus_t ccBatchNormalizationForwardTraining(ccHandle_t handle, ccBatchNormMode_t mode, const void *alpha,
const void *beta, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, void *y,
const ccTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale,
const void *bnBias, double exponentialAverageFactor,
void *resultRunningMean, void *resultRunningVariance, void *workSpace,
uint32_t workSpaceSizeInBytes, double epsilon, void *resultSaveMean,
void *resultSaveInvVariance, const bool isTraining);
#endif
ccStatus_t ccGetBatchNormalizationForwardTrainingWorkspaceSize(ccHandle_t handle, ccBatchNormMode_t mode,
ccTensorDescriptor_t xDesc, ccTensorDescriptor_t yDesc,
const ccTensorDescriptor_t bnScaleBiasMeanVarDesc,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief generate an random normal Tensor use given on/off scale.
* @param [in] handle Stream handle.
* @param [in] alpha reserved.
* @param [in] meanDesc Mean description of one-hot position.
* @param [in] mean Data pointer of mean.
* @param [in] scaleDesc On/off scale description.
* @param [in] scale Data pointer of on/off scale.
* @param [in] seed random seed used to generate random number
* @param [in] seed2 random seed used to generate random number
* @param [in] beta reserved.
* @param [in] outputDesc Description of the generated one-hot tensor.
* @param [output] output Data pointer of output.
* @return ccStatus_t
*/
ccStatus_t ccRandomNormalForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t meanDesc,
const void *mean, const ccTensorDescriptor_t scaleDesc, const void *scale,
const int64_t seed1, const int64_t seed2, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief generate random uniform tensor.
* @param [in] handle Stream handle.
* @param [in] alpha reserved.
* @param [in] minvalDesc Mean description of one-hot position.
* @param [in] minval Data pointer of mean.
* @param [in] maxvalDesc On/off scale description.
* @param [in] maxval Data pointer of on/off scale.
* @param [in] seed random seed used to generate random number
* @param [in] seed2 random seed used to generate random number
* @param [in] beta reserved.
* @param [in] outputDesc Description of the generated one-hot tensor.
* @param [output] output Data pointer of output.
* @return ccStatus_t
*/
ccStatus_t ccRandomUniformForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t minvalDesc,
const void *minval, const ccTensorDescriptor_t maxvalDesc, const void *maxval,
const int64_t seed1, const int64_t seed2, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**^M
* @ingroup dnn^M\r 10932
* @brief generate BatchMatMul tensor.^M\r 10933
* @param [in] handle Stream handle.^M\r 10934
* @param [in] alpha reserved.^M\r 10935
* @param [in] xDesc tensorA Desc.^M\r 10936
* @param [in] x Data pointer of tensorA.^M\r 10937
* @param [in] yDesc tensorB Desc.^M\r 10938
* @param [in] y Data pointer of tensorB.^M\r 10939
* @param [in] beta reserved.^M\r 10940
* @param [in] adj_x tensorA transpose flag^M\r 10941
* @param [in] adj_y tensorB transpose flag^M\r 10942
* @param [in] outpDesc Description of the tensor output .^M\r 10943
* @param [output] out Data pointer of output.^M\r 10944
* @return ccStatus_t^M
*/
ccStatus_t ccBatchMatMulForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta, const bool adj_x,
const bool adj_y, const ccTensorDescriptor_t outDesc, void *out);
ccStatus_t ccGetBatchMatMulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, bool adj_x,
bool adj_y, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief generator conv int8 all offset factor
* @param [in] para the struct for scale and offset of input, filter and output
* @param [in|out] offsetW offset of filter
* @param [in|out] offsetPad offset of input
* @param [in|out] scaledQrq scale computing result of input , filter and output
* @param [in|out] nextoffsetq offset of output
* @return ccStatus_t
*/
ccStatus_t ccGenQuantAllOffsetFactor(const ccQuantAllOffsetPara_t *para, uint8_t &offsetW, uint8_t &offsetPad,
uint16_t &scaledQrq, uint16_t &nextoffsetq);
/**
* @ingroup dnn
* @brief get conv int8 all offset fracZ size
* @param [in] filterDesc descriptor of filter tensor
* @param [in|out] conv int8 all offset fracZ size
* @param [in] groupNum group conv num
* @return ccStatus_t
*/
ccStatus_t ccSetGroupConvScene(const ccFilterDescriptor_t tensorDesc, ccConvolutionDescriptor_t convDesc);
ccStatus_t ccGetInt8AllOffsetFilterFracZSizeInBytes(const ccFilterDescriptor_t filterSrcDesc,
const ccFilterDescriptor_t filterDesc, uint32_t &size,
uint32_t groupNum);
/**
* @ingroup dnn
* @brief transform filter in conv int8 all offset scene
* @param [in] filterSrcInfo descriptor of filter tensor before fracZ transform
* @param [in] filterSrc filter addr before fracZ transform
* @param [in] filterDstInfo descriptor of filter tensor after fracZ transform
* @param [in] filterDst filter addr after fracZ transform
* @param [in] quantPara the struct for scale and offset of input, filter and output
* @param [in] ySizeInBytes filter size after fracZ transform
* @param [in|out] outputDataType output data type
* @param [in] groupNum group conv num
* @return ccStatus_t
*/
ccStatus_t ccTransFilterInt8AllOffset(ccFilterDescriptor_t filterSrcInfo, const void *filterSrc,
ccFilterDescriptor_t filterDstInfo, void *filterDst,
const ccQuantAllOffsetPara_t *quantPara, uint32_t ySizeInBytes,
ccDataType_t outputDataType, uint32_t groupNum);
/**
* @ingroup dnn
* @brief transform bias in conv int8 all offset scene
* @param [in] filterDesc descriptor of filter tensor
* @param [in] biasDesc descriptor of bias tensor
* @param [in] quantPara the struct for scale and offset of input, filter and output
* @param [in] w filter addr
* @param [in] bias bias addr
* @return ccStatus_t
*/
ccStatus_t ccTransInt8AllOffsetBias(const ccFilterDescriptor_t filterDesc, const ccTensorDescriptor_t biasDesc,
const ccQuantAllOffsetPara_t *quantPara, const void *w, const void *bias);
/**
* @ingroup dnn
* @get dequantize
* @param [in] handle handle id
* @param [in] alpha alpha addr
* @param [in] xDesc the input Desc descriptor
* @param [in] x x data addr
* @param [in] beta beta data addr
* @param [in] yDesc the output Desc descriptor
* @param [in] y y data addr
* @return ccStatus_t
*/
ccStatus_t ccDequantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
const void *x, const void *beta, const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @get quantize
* @param [in] handle handle id
* @param [in] alpha alpha addr
* @param [in] xDesc the input Desc descriptor
* @param [in] x x data addr
* @param [in] beta beta data addr
* @param [in] yDesc the output Desc descriptor
* @param [in] y y data addr
* @return ccStatus_t
*/
ccStatus_t ccQuantizeCoreForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t yDesc, void *y);
#ifndef DAVINCI_LITE
ccStatus_t ccActivationBackward(ccHandle_t handle, const ccActivationDescriptor_t activationDesc, const void *alpha,
const ccTensorDescriptor_t dyDesc, const void *dy, const ccTensorDescriptor_t xDesc,
const void *x, const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
#endif
ccStatus_t ccL2LossForward(ccHandle_t handle, const ccL2LossDescriptor_t l2lossDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y);
/**
* @ingroup dnn
* @brief get the output dimension info of top k v2
* @param [in] xDesc descriptor of input tensor x
* @param [in] yDesc descriptor of input tensor y
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetTopKV2OutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc, const void *k,
const int64_t axis, int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief top k v2 forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor x
* @param [in] x input data x in device memory
* @param [in] yDesc descriptor of input tensor y
* @param [in] y input data y in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccTopKV2Forward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t kDesc, const void *k, const void *beta, const bool sorted,
const int64_t axis, void *workSpace, const uint32_t workSpaceSizeInBytes,
const ccTensorDescriptor_t outputValuesDesc, void *outputValues,
const ccTensorDescriptor_t outputIndicesDesc, void *outputIndices);
/**
* @ingroup dnn
* @brief get the workspace size of top k v2
* @param [in] xDesc descriptor of input tensor x
* @param [in] yDesc descriptor of input tensor y
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] sizeInBytes point to workspace size
* @return ccStatus_t
*/
ccStatus_t ccGetTopKV2ForwardWorkspaceSize(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t kDesc,
const ccTensorDescriptor_t indiceDesc, const void *k, const int64_t axis,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief Get unsorted segment reduction output dim
* @param [in] xDesc descriptor of input tensor
* @param [in] segmentIdsDesc descriptor of input segmentIds tensor
* @param [in] segmentsNum output slice num
* @param [out] dimCnt output tensor dim cnt
* @param [out] dim output tensor dim
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetUnsortedSegmentReductionOutputDim(const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t segmentIdsDesc, int32_t segmentsNum,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief reduce all forward computation
* @param [in] handle cce handle
* @param [in] segmentsNum output slice num
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] segmentIdsDesc descriptor of input segmentIds tensor
* @param [in] x input segmentIds data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccUnsortedSegmentSumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
const void *x, const ccTensorDescriptor_t segmentIdsDesc, const void *segmentIds,
const int32_t segmentsNum, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief reverse sequence forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor x
* @param [in] x input data x in device memory
* @param [in] yDesc descriptor of input tensor y
* @param [in] y input data y in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccReverseSequenceForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t inputDesc,
const void *input, const ccTensorDescriptor_t seqLengthsDesc,
const void *seqLengths, int64_t seqAxis, int64_t batchAxis, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief realdiv between two tensors.
* @param [in] alpha reserved.
* @param [in] xDesc description of the left operator tensor.
* @param [in] x data point of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [in] y data point of the right operator tensor.
* @param [in] beta reserved.
* @param [in] outputDesc description of the output tensor.
* @param [output] output data point of the output tensor.
* @return ccStatus_t
*/
ccStatus_t ccEqualForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get output shape of realdiv.
* @param [in] xDesc description of the left operator tensor.
* @param [in] yDesc description of the right operator tensor.
* @param [out] dimCnt output tensor dim cnt
* @param [out] dim output tensor dim
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetEqualOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t yDesc, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief invert permutation forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccInvertPermutationForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc,
const void *x, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief get the workspace size of non max suppression
* @param [in] handle descriptor of handle
* @param [in] scoresDesc descriptor of input tensor scoresDesc
* @param [in] boxesDesc descriptor of input tensor boxesDesc
* @param [in|out] sizeInBytes point to workspace size
* @return ccStatus_t
*/
ccStatus_t ccGetNonMaxSuppressionWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t scoresDesc,
const ccTensorDescriptor_t boxesDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief get the output dim of non max suppression
* @param [in] scoresDesc descriptor of input tensor scoresDesc
* @param [in] maxOutPutSize the max size of output
* @param [in|out] dimCnt point to the count of dim
* @param [in|out] dim[] the array of output dim
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetNonMaxSuppressionOutputDim(const ccTensorDescriptor_t scoresDesc, const int32_t maxOutPutSize,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief multinomial forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] numSamples number of independent samples to draw for each row slice
* @param [in] seed1 sed to create a random seed for the distribution
* @param [in] seed2 sed to create a random seed for the distribution
* @param [in] workSpace work space for inter access
* @param [in] workSpaceSizeInBytes work space size
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccMultinomialForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
int32_t numSamples, int64_t seed1, int64_t seed2, void *workSpace,
uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief get output dim of generated one-hot tensor.
* @param [in] indicesDesc Indices description of one-hot position.
* @param [in] depth On/off value description.
* @param [in] axis Data pointer of on/off value.
* @param [output] dimCnt Description of the generated one-hot tensor.
* @param [output] dim Data pointer of output.
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetOneHotOutputDim(const ccTensorDescriptor_t indicesDesc, int32_t depth, int32_t axis, int32_t *dimCnt,
int32_t *dim, int32_t dimLen);
/**
* @ingroup dnn
* @brief generate an one-hot Tensor use given on/off value.
* @param [in] handle Stream handle.
* @param [in] alpha reserved.
* @param [in] indicesDesc Indices description of one-hot position.
* @param [in] indices Data pointer of indices.
* @param [in] onDesc On value description.
* @param [in] on Data pointer of on value.
* @param [in] offDesc Off value description.
* @param [in] off Data pointer of off value.
* @param [in] depth On/off value description.
* @param [in] axis Data pointer of on/off value.
* @param [in] beta reserved.
* @param [in] outputDesc Description of the generated one-hot tensor.
* @param [output] output Data pointer of output.
* @return ccStatus_t
*/
ccStatus_t ccOneHotForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t indicesDesc,
const void *indices, const ccTensorDescriptor_t onDesc, const void *on,
const ccTensorDescriptor_t offDesc, const void *off, const int32_t depth, const int32_t axis,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the workspaceSize of multinomial
* @param [in] xDesc descriptor of input tensor
* @param [in] numSamples number sample
* @param [out] sizeInBytes wor space size of byte
* @return ccStatus_t
*/
ccStatus_t ccGetMultinomialWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief get the output dimension info of multinomial
* @param [in] xDesc descriptor of input tensor
* @param [in] numSample number of independent samples to draw for each row slice
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetMultinomialOutputDim(const ccTensorDescriptor_t xDesc, int32_t numSample, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief get the output dimension info of BiasAddBackward
* @param [in] dyDesc descriptor of input tensor
* @param [in] out] n outputTensor [N]CHW
* @param [in|out] c outputTensor N[C]HW
* @param [in|out] h outputTensor NC[H]W
* @param [in|out] w outputTensor NCH[W]
* @return ccStatus_t
*/
ccStatus_t ccGetBiasAddBackwardOutputDim(const ccTensorDescriptor_t dyDesc, int32_t *n, int32_t *c, int32_t *h,
int32_t *w);
/**
* @ingroup dnn
* @brief biasadd backward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] dyDesc descriptor of input data
* @param [in] dy input data in device memory
* @param [in] beta common scale factor
* @param [in] dbDesc descriptor of output data
* @param [in|out] db output data in device memory
* @return ccStatus_t
*/
#ifndef DAVINCI_LITE
ccStatus_t ccBiasAddBackward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t dyDesc, const void *dy,
const void *beta, const ccTensorDescriptor_t dbDesc, void *db);
ccStatus_t ccMaxPoolWithArgmaxForward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t yDesc, void *y, const ccTensorDescriptor_t argMaskDesc,
void *argMask);
#endif
ccStatus_t ccCreatePoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
ccStatus_t ccDestroyPoolingMaskDescriptor(ccTensorDescriptor_t *poolingMaskDesc);
ccStatus_t ccSetPoolingMaskTensorDescriptor(ccTensorDescriptor_t poolingMaskDesc, ccTensorFormat_t format,
ccDataType_t dataType, int32_t n, int32_t c, int32_t h, int32_t w,
int32_t windowH, int32_t windowW);
ccStatus_t ccGetPoolingMaskTensorSizeInBytes(ccTensorDescriptor_t poolingMaskDesc, uint32_t *size);
/**
* @ingroup dnn
* @brief get the mask output dimension info of maxpooling training forward
* @param [in] pooling descriptor of convolution operator
* @param [in] xDesc descriptor of input tensor
* @param [in|out] n point to batch size
* @param [in|out] c point to channels
* @param [in|out] h point to height of feature map
* @param [in|out] w point to width of feature map
* @param [in|out] windowH point to height of window
* @param [in|out] windowW point to width of windowW
* @return ccStatus_t
*/
ccStatus_t ccGetPoolingMaskDim(const ccPoolingDescriptor_t poolingDesc, const ccTensorDescriptor_t xDesc, int32_t *n,
int32_t *c, int32_t *h, int32_t *w, int32_t *windowH, int32_t *windowW);
#ifndef DAVINCI_LITE
ccStatus_t ccSoftmaxCrossEntropyLoss(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
const ccTensorDescriptor_t logitsDesc, const void *logits,
const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
const void *beta, const ccTensorDescriptor_t lossDesc, void *loss);
ccStatus_t ccSoftmaxCrossEntropyDx(ccHandle_t handle, ccSoftmaxAlgo_t algo, ccSoftmaxMode_t mode,
ccCrossEntropyMode_t ceMode, const void *alpha, const void *scale,
const ccTensorDescriptor_t logitsDesc, const void *logits,
const ccTensorDescriptor_t labelsDesc, const void *labels, const void *labelSmooth,
const void *beta, const ccTensorDescriptor_t dxDesc, void *dx);
ccStatus_t ccAvgPoolingBackward(ccHandle_t handle, const ccPoolingDescriptor_t poolingDesc, const void *alpha,
const ccTensorDescriptor_t dyDesc, const void *dy, const void *beta,
const ccTensorDescriptor_t dxDesc, const void *dx);
ccStatus_t ccTrainingAssignOp(ccHandle_t handle, const ccAssignOpMode_t assignOpDesc, const void *alpha,
const void *beta, const ccTensorDescriptor_t aDesc, void *a,
const ccTensorDescriptor_t bDesc, const void *b);
/**
* @ingroup dnn
* @brief momentum optimizer for variable update
* @param [in] handle cce handle
* @param [in] inputDesc descriptor of input tensor: gradient,accumulation,variable
* @param [in] gradient gradient input
* @param [in|out] accumulation accumulation input and updated output
* @param [in|out] variable variable input and updated output
* @param [in] algo indicate whether need FP16 output
* @param [in] momentum scaler to control accumulation
* @param [in] learningRate scaler
* @param [in] lossScaleReciprocal scaler
* @param [in] workSpace additional memory address
* @param [in] workSpaceSizeInBytes additional memory size
* @param [out] variableUpdatedFP16Desc descriptor of FP16 output tensor: variableUpdatedFP16
* @param [out] variableUpdatedFP16 variableUpdatedFP16
* @return ccStatus_t
*/
ccStatus_t ccApplyMomentum(ccHandle_t handle, const ccTensorDescriptor_t inputDesc, const void *gradient,
void *accumulation, void *variable, const ccMomentumAlgo_t algo, const void *momentum,
const void *learningRate, const void *lossScaleReciprocal, void *workSpace,
const uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t variableUpdatedFP16Desc,
void *variableUpdatedFP16);
ccStatus_t ccSsdClassifyLossTrain(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t labelDesc,
const void *label, const ccTensorDescriptor_t greaterConstDesc,
const void *greaterConst, const ccTensorDescriptor_t subConstDesc,
const void *subConst, const ccTensorDescriptor_t sparseDesc, const void *sparse,
const void *beta, const ccTensorDescriptor_t castoutDesc, const void *castout,
const ccTensorDescriptor_t muloutDesc, const void *mulout);
#endif
/**
* @ingroup dnn
* @brief get the workspace size of applymomentum
* @param [in] inputDesc descriptor of input tensor
* @return ccStatus_t
*/
ccStatus_t ccGetApplyMomentumWorkspaceSize(const ccTensorDescriptor_t inputDesc, uint32_t *sizeInBytes);
#ifndef DAVINCI_LITE
ccStatus_t ccHwck2FracZ(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
const ccFilterDescriptor_t yDesc, void *y);
ccStatus_t ccFracZ2Hwck(ccHandle_t handle, const ccFilterDescriptor_t xDesc, const void *x,
const ccFilterDescriptor_t yDesc, void *y);
ccStatus_t ccAddNForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
const void *x[], const void *beta, void *workSpace, uint32_t workSpaceSizeInBytes,
const ccTensorDescriptor_t yDesc, void *y);
#endif
ccStatus_t ccGetAddNForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc, const int32_t inputNum,
const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes);
ccStatus_t ccGetAddNForwardOutputDim(const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t *dim, int32_t dimLen);
ccStatus_t ccAddTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
ccStatus_t ccGetAddTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
uint32_t *sizeInBytes);
ccStatus_t ccGetAddTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
ccStatus_t ccMulTrainForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t wDesc, const void *w, const void *beta, void *workSpace,
uint32_t workSpaceSizeInBytes, const ccTensorDescriptor_t yDesc, void *y);
ccStatus_t ccGetMulTrainForwardWorkspaceSize(ccHandle_t handle, const ccTensorDescriptor_t xDesc,
const ccTensorDescriptor_t wDesc, const ccTensorDescriptor_t yDesc,
uint32_t *sizeInBytes);
ccStatus_t ccGetMulTrainForwardOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief get workspace size
* @param [in] xDesc descriptor of input tensor
* @param [in|out] sizeInBytes workspace size
* @return ccStatus_t
*/
ccStatus_t ccGetRandomShuffleWorkspaceSize(const ccTensorDescriptor_t xDesc, uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief random shuffle forward computation
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] workspace temporary space
* @param [in] workspaceSizeInBytes temporary space size
* @param [in] seed random seed used to generate random number
* @param [in] seed2 random seed used to generate random number
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccRandomShuffleForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
void *workspace, const uint32_t workspaceSizeInBytes, const int64_t seed1,
const int64_t seed2, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief sin forward:
* data type only support float float16 double
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] input input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSinForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief cos forward:
* data type only support float float16 double
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] input input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccCosForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief tan forward:
* data type only support float float16 double
* data format only support ND
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] input input data in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccTanForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *input,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of unstack
* @param [in] xDesc descriptor of input tensor
* @param [in] axis the axis to unstack along
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetUnstackOutputDim(const ccTensorDescriptor_t xDesc, int32_t axis, int32_t *dimCnt, int32_t dim[],
int32_t dimLen);
/**
* @ingroup dnn
* @brief unstack forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data
* @param [in] x input data in device memory
* @param [in] num the length of the dimension axis
* @param [in] axis the axis to unstack along
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccUnstackForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
int32_t num, int32_t axis, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output[]);
ccStatus_t ccResizeNearestNeighborCpuForward(ccHandle_t handle, const ccResizeNearestNeighborDescriptor_t resizeDesc,
const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of resize nearest neighbor
* @param [in] resizeDesc descriptor of resize
* @param [in] xDesc descriptor of input tensor
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetResizeNearestNeighborOutputDim(const ccResizeNearestNeighborDescriptor_t resizeDesc,
const ccTensorDescriptor_t xDesc, int32_t *dimCnt, int32_t dim[],
int32_t dimLen);
/**
* @ingroup dnn
* @brief create descriptor of ResizeNearestNeighbor
* @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr
* @return ccStatus_t
*/
ccStatus_t ccCreateResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of ResizeNearestNeighbor
* @param [in|out] resizeDesc point to descriptor of ResizeNearestNeighbor attr
* @return ccStatus_t
*/
ccStatus_t ccDestroyResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t *resizeDesc);
/**
* @ingroup dnn
* @brief set descriptor of ResizeNearestNeighbor.
* @param [in|out] resizeDesc descriptor of resize nearest neighbor operator
* @param [in] alignCorners whether the centers of input and output are aligned
* @param [in] height height of output
* @param [in] width width of output
* @return ccStatus_t
*/
ccStatus_t ccSetResizeNearestNeighborDescriptor(ccResizeNearestNeighborDescriptor_t resizeDesc, bool alignCorners,
int32_t height, int32_t width);
/**
* @ingroup dnn
* [ccGetPadV2OutputDim]
* @brief get the output dimension info of pad
* @param [in] xDesc descriptor of input tensor x
* @param [in] padDesc descriptor of input paddings
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetPadV2OutputDim(const ccTensorDescriptor_t xDesc, const ccPadV2Descriptor_t padDesc, int32_t *dimCnt,
int32_t dim[], int32_t dimLen);
ccStatus_t ccPadV2CpuForward(ccHandle_t handle, const ccPadV2Descriptor_t padDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief create descriptor of parameters for padv2 function
* @param [in] point to descriptor of parameters for padv2 function
* @return ccStatus_t
*/
ccStatus_t ccCreatePadV2Descriptor(ccPadV2Descriptor_t *padDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of parameters for padv2 function
* @param [in] point to descriptor of parameters for padv2 function
* @return ccStatus_t
*/
ccStatus_t ccDestroyPadV2Descriptor(ccPadV2Descriptor_t *padDesc);
/**
* @brief init descriptor for parameter of padv2 function
* @param [in|out] padDesc descriptor of pad
* @param [in] padShapeCnt padshape count
* @param [in] padShapeLow padshape low
* @param [in] padShapeHigh padshape high
* @param [in] padMode pad mode
* @param [in] padValue pad value ptr
* @param [in] padValueType pad value data type
* @return ccStatus_t
*/
ccStatus_t ccSetPadV2Descriptor(ccPadV2Descriptor_t padDesc, const int32_t padShapeCnt, const int32_t padShapeLow[],
const int32_t padShapeHigh[], const ccPadMode_t padMode, const void *padValue,
const ccDataType_t padValueType);
/**
* @ingroup dnn
* @brief create descriptor of batchToSpace
* @param [in|out] batchToSpaceDesc point to descriptor of batchToSpace
* @return ccStatus_t
*/
ccStatus_t ccCreateBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
/**
* @ingroup dnn
* @brief set batchToSpaceDesc
* @param [in|out] batchToSpaceDesc descriptor of batchToSpace
* @param [in] blockShape blockShape of batchToSpace
* @param [in] crops crops of batchToSpace
* @param [in] blockShapeLength blockShapeLength of batchToSpace
* @return ccStatus_t
*/
ccStatus_t ccSetBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t paramsDesc, const int32_t *blockShape,
const int32_t *crops, const int32_t blockShapeLength);
/**
* @ingroup dnn
* @brief get batchToSpaceDesc
* @param [in|out] batchToSpaceDesc descriptor of batchToSpace
* @param [in] blockShape blockShape of batchToSpace
* @param [in] crops crops of batchToSpace
* @param [in] blockShapeLength blockShapeLength of batchToSpace
* @return ccStatus_t
*/
ccStatus_t ccGetBatchToSpaceDescriptor(const ccBatchToSpaceDescriptor_t paramsDesc, int32_t *blockShape, int32_t *crops,
int32_t *blockShapeLength);
/**
* @ingroup dnn
* @brief destroy descriptor of batchToSpace
* @param [in] *batchToSpaceDesc descriptor of batchToSpace
* @return ccStatus_t
*/
ccStatus_t ccDestroyBatchToSpaceDescriptor(ccBatchToSpaceDescriptor_t *batchToSpaceDesc);
/**
* @ingroup dnn
* @brief get the output dimension info of batch to space
* @param [in] xDesc descriptor of input tensor
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetBatchToSpaceOutputDim(const ccTensorDescriptor_t xDesc,
const ccBatchToSpaceDescriptor_t batchToSpaceDesc, int32_t *dimCnt, int32_t dim[],
int32_t dimLen);
/**
* @ingroup dnn
* @brief batch to space forward computation
* @param [in] handle cce handle
* @param [in] paramsDesc descriptor of input params
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccBatchToSpaceForward(ccHandle_t handle, const ccBatchToSpaceDescriptor_t paramsDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief create descriptor of spaceToBatch
* @param [in|out] spaceToBatchDesc point to descriptor of spaceToBatch
* @return ccStatus_t
*/
ccStatus_t ccCreateSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
/**
* @ingroup dnn
* @brief set spaceToBatchDesc
* @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
* @param [in] blockShape blockShape of spaceToBatch
* @param [in] paddings paddings of spaceToBatch
* @param [in] blockShapeLength blockShapeLength of spaceToBatch
* @return ccStatus_t
*/
ccStatus_t ccSetSpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t paramsDesc, const int32_t *blockShape,
const int32_t *paddings, const int32_t blockShapeLength);
/**
* @ingroup dnn
* @brief get spaceToBatchDesc
* @param [in|out] spaceToBatchDesc descriptor of spaceToBatch
* @param [in] blockShape blockShape of spaceToBatch
* @param [in] paddings paddings of spaceToBatch
* @param [in] blockShapeLength blockShapeLength of spaceToBatch
* @return ccStatus_t
*/
ccStatus_t ccGetSpaceToBatchDescriptor(const ccSpaceToBatchDescriptor_t paramsDesc, int32_t *blockShape,
int32_t *paddings, int32_t *blockShapeLength);
/**
* @ingroup dnn
* @brief destroy descriptor of spaceToBatch
* @param [in] *spaceToBatchDesc descriptor of spaceToBatch
* @return ccStatus_t
*/
ccStatus_t ccDestroySpaceToBatchDescriptor(ccSpaceToBatchDescriptor_t *spaceToBatchDesc);
/**
* @ingroup dnn
* @brief get the output dimension info of space to batch
* @param [in] xDesc descriptor of input tensor
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetSpaceToBatchOutputDim(const ccTensorDescriptor_t xDesc,
const ccSpaceToBatchDescriptor_t spaceToBatchDesc, int32_t *dimCnt, int32_t dim[],
int32_t dimLen);
/**
* @ingroup dnn
* @brief space to batch forward computation
* @param [in] handle cce handle
* @param [in] paramsDesc descriptor of input params
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccSpaceToBatchForward(ccHandle_t handle, const ccSpaceToBatchDescriptor_t paramsDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
ccStatus_t ccTransFilterDesc2TensorDesc(ccFilterDescriptor_t wDesc, ccTensorDescriptor_t tensorDesc);
/*
* @brief get the output dimension info of extractImagePatches
* @param [in] xDesc descriptor of input tensor x
* @param [in] ksizes ksizes array
* @param [in] strides strides array
* @param [in] rates rates array
* @param [in] padding padding type
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @return ccStatus_t
*/
ccStatus_t ccGetExtractImagePatchesOutputDim(const ccTensorDescriptor_t xDesc, const ccIntArray_t *ksizes,
const ccIntArray_t *strides, const ccIntArray_t *rates,
const ccExtractImagePatchesPadType_t padding, int32_t *dimCnt,
int32_t dim[], const int32_t dimLen);
/**
* @ingroup dnn
* @brief cum forward.
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data, dimCnt:1~8
* @param [in] x input data in device memory
* @param [in] axisDesc scale factor, dimCnt:0
* @param [in] axis which axis to cum calc, device memory
* @param [in] beta common scale factor
* @param [in] opType calc type, eg. sum, prod....
* @param [in] exclusive cum flag, true or false
* @param [in] reverse cum flag, true or false
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccCumForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t axisDesc, const void *axis, const void *beta, const CumOpType opType,
const bool exclusive, const bool reverse, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief ExtractImagePatches forward.
* @param [in] handle cce handle
* @param [in] ksizes ksizes array
* @param [in] strides strides array
* @param [in] rates rates array
* @param [in] padding padding type
* @param [in] alpha common scale factor
* @param [in] xDesc descriptor of input data x
* @param [in] x input data x in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccExtractImagePatchesForward(ccHandle_t handle, const ccIntArray_t *ksizes, const ccIntArray_t *strides,
const ccIntArray_t *rates, const ccExtractImagePatchesPadType_t padding,
const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const void *beta, const ccTensorDescriptor_t outputDesc, void *output);
/**
* @brief get argmax output dim info
* @param [in] argDesc argmaxmin descriptor
* @param [in] xDesc descriptor of input tensor
* @param [in|out] dimCnt output dim count
* @param [in|out] dim output dim
* @param [in| dimlen length of dim
* @return ccStatus_t
*/
ccStatus_t ccGetArgMaxOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
int32_t *dimCnt, int32_t dim[], int32_t dimLen);
/**
* @ingroup dnn
* @brief argmax forward computation
* @param [in] handle cce handle
* @param [in] argDesc argmaxmin descriptor
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] workSpace workspace pointer
* @param [in] workSpaceSizeInBytes workspace size in bytes
* @param [in] beta bias factors
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccArgMaxForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, void *workSpace,
const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t outputDesc,
void *output);
/**
* @ingroup dnn
* @brief get the output dimension info of argmaxmin
* @param [in] argDesc descriptor of tagCcArgmaxmin
* @param [in] xDesc descriptor of input tensor
* @param [in|out] sizeInBytes workspace size
* @return ccStatus_t
*/
ccStatus_t ccGetArgMaxWorkspaceSize(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief create descriptor of Argmaxmin
* @param [in|out] resizeDesc point to descriptor of Argmaxmin attr
* @return ccStatus_t
*/
ccStatus_t ccCreateArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of Interp
* @param [in|out] resizeDesc point to descriptor of Argmaxmin attr
* @return ccStatus_t
*/
ccStatus_t ccDestroyArgmaxminDescriptor(ccArgmaxminDescriptor_t *argDesc);
/**
* @ingroup dnn
* @brief destroy descriptor of Interp
* @param [in|out] argDesc descriptor of tagCcArgmaxmin
* @param [in] axisType
* @param [in] outMaxVal whether to return the maximum value
* @param [in] topK number that returns the maximum index or maximum value
* @param [in] axis Describes which axis of the input Tensor to reduce across
* @param [in] keepDims whether to keep reduced dim
* @param [in] reduceSize the num of elements to be reduce to get topK elements, reduceSize=-1 means the total num
* of elements in axis dimension
* @param [in] reduceStride the stride for reduce operation, reduceStride=1 means the layout of target data is
* continuous
* @return ccStatus_t
*/
ccStatus_t ccSetArgmaxminDescriptor(ccArgmaxminDescriptor_t argDesc, int32_t axisType, bool outMaxVal, int64_t topK,
int64_t axis, bool keepDims, int64_t reduceSize = -1, int64_t reduceDStride = 1);
ccStatus_t ccArgMinForward(ccHandle_t handle, const ccArgmaxminDescriptor_t argDesc, const void *alpha,
const ccTensorDescriptor_t xDesc, const void *x, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
ccStatus_t ccGetArgMinOutputDim(const ccArgmaxminDescriptor_t argDesc, const ccTensorDescriptor_t xDesc,
int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
/**
* @ingroup dnn
* @brief lsh projection forward computation
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] hashDesc descriptor of input tensor hashDesc
* @param [in] hash input data hash in device memory
* @param [in] weightDesc descriptor of input tensor weightDesc
* @param [in] weight input data weight in device memory
* @param [in] inputDesc descriptor of input tensor inputDesc
* @param [in] lookup input data lookup in device memory
* @param [in] type 1:SPARSE 2.DENSE
* @param [in] beta bias factors
* @param [in] workSpace workSpace data in device memory
* @param [in] workSpaceSizeInBytes workSpace length
* @param [in] outputDesc descriptor of output tensor
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccLshProjectionForward(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t hashDesc,
const void *hash, const ccTensorDescriptor_t weightDesc, const void *weight,
const ccTensorDescriptor_t inputDesc, const void *input, const LSHProjectionType type,
const void *beta, void *workSpace, const uint32_t workSpaceSizeInBytes,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @ingroup dnn
* @brief get the workspace size of lsh projection
* @param [in] inputDesc descriptor of input tensor input
* @param [in] hashDataType data type of hash
* @param [in|out] sizeInBytes workspace size
* @return ccStatus_t
*/
ccStatus_t ccGetLshProjectionForwardWorkspaceSize(const ccTensorDescriptor_t inputDesc, const ccDataType_t hashDataType,
uint32_t *sizeInBytes);
/**
* @ingroup dnn
* @brief get the output dimension info of LshProjection,
* @param [in] hashDesc descriptor of hash
* @param [in] type type of mode
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in] dimLen dim length
* @return ccStatus_t
*/
ccStatus_t ccGetLshProjectionOutputDim(const ccTensorDescriptor_t hashDesc, const LSHProjectionType type,
int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
/**
* @ingroup dnn
* @brief get the weight dimension info of LshProjection,
* @param [in] inputDesc descriptor of input
* @param [in|out] dimCnt point to the weight dimCnt
* @param [in|out] dim arrays to save dims
* @param [in] dimLen dim length
* @return ccStatus_t
*/
ccStatus_t ccGetLshProjectionWeightDim(const ccTensorDescriptor_t inputDesc, int32_t *dimCnt, int32_t dim[],
const int32_t dimLen);
/**
* @ingroup dnn
* @brief init descriptor for parameter of upsample function
* @param [in] handle cce handle
* @param [in] upsamplePara input para in host memory
* @param [in] alpha common scale factor
* @param [in] bottomDesc descriptor of input data bottomDesc
* @param [in] bottom input data bottom in device memory
* @param [in] bottomMaskDesc descriptor of input data bottomMaskDesc
* @param [in] bottomMask input data bottomMask in device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor of output data
* @param [in|out] output output data in device memory
* @return ccStatus_t
*/
ccStatus_t ccUpsampleForward(ccHandle_t handle, const ccUpsampleParaDescriptor_t upsamplePara, const void *alpha,
const ccTensorDescriptor_t bottomDesc, const void *bottom,
const ccTensorDescriptor_t bottomMaskDesc, const void *bottomMask, const void *beta,
const ccTensorDescriptor_t outputDesc, void *output);
/**
* @brief creat descriptor for parameter of usample function
* @param [in|out] upsampleDesc descriptor of upsamplepara
* @return ccStatus_t
*/
ccStatus_t ccCreateUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
/**
* @brief destroy descriptor for parameter of upsample function
* @param [in|out] upsampleDesc descriptor of upsamplepara
* @return ccStatus_t
*/
ccStatus_t ccDestroyUpsampleDescriptor(ccUpsampleParaDescriptor_t *upsampleDesc);
/**
* @brief set descriptor for parameter of upsample function
* @param [in|out] upsampleDesc descriptor of upsamplepara
* @param [in] scale the scale of height and width
* @param [in] scaleHeight the scale of height
* @param [in] scaleWidth the scale of Width
* @param [in] upsampleHeight the height of output
* @param [in] upsampleWidth the width of output
* @param [in] padOutHeight pad value height
* @param [in] padOutWidth pad value width
* @return ccStatus_t
*/
ccStatus_t ccSetUpsampleDescriptor(ccUpsampleParaDescriptor_t upsampleDesc, const int32_t scale,
const int32_t scaleHeight, const int32_t scaleWidth, const int32_t upsampleHeight,
const int32_t upsampleWidth, const bool padOutHeight, const bool padOutWidth);
/**
* @ingroup dnn
* @brief get the output dimension info of upsample
* @param [in] upsamplePara para of upsample
* @param [in] bottomDesc descriptor of input bottom tensor
* @param [in|out] dimCnt point to the output dimCnt
* @param [in|out] dim arrays to save dims
* @param [in] dimLen the len of dim array
* @return ccStatus_t
*/
ccStatus_t ccGetUpsampleOutputDim(const ccUpsampleParaDescriptor_t upsamplePara, const ccTensorDescriptor_t bottomDesc,
int32_t *dimCnt, int32_t dim[], const int32_t dimLen);
#ifndef DAVINCI_LITE
ccStatus_t ccMatmul(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t wDesc, const void *w, const ccTensorDescriptor_t biasDesc,
const void *bias, const ccFullConnectFwdAlgo_t algo, void *workSpace,
const uint32_t workSpaceSizeInBytes, const void *beta, const ccTensorDescriptor_t yDesc, void *y,
const bool transposeA, const bool transposeB);
ccStatus_t ccGetMatmulOutputDim(const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc, int32_t *n,
int32_t *c, int32_t *h, int32_t *w, bool transposeA, bool transposeB);
ccStatus_t ccGetMatmulWorkspaceSize(ccHandle_t handle, const ccFullConnectFwdAlgo_t algo,
const ccTensorDescriptor_t xDesc, const ccTensorDescriptor_t wDesc,
const ccTensorDescriptor_t yDesc, uint32_t *sizeInBytes, bool transposeA,
bool transposeB);
#endif
/**
* @ingroup dnn
* @brief gather_v2 function
* @param [in] handle cce handle
* @param [in] alpha common scale factor
* @param [in] paramsDesc descriptor
* @param [in] params device memory
* @param [in] indicesDesc descriptor
* @param [in] indices device memory
* @param [in] axisDesc descriptor
* @param [in] axis device memory
* @param [in] beta common scale factor
* @param [in] outputDesc descriptor
* @param [in|out] output device memory
* @return ccStatus_t
*/
ccStatus_t ccGatherV2(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t paramsDesc, const void *params,
const ccTensorDescriptor_t indicesDesc, const void *indices, const ccTensorDescriptor_t axisDesc,
const void *axis, const void *beta, const ccTensorDescriptor_t outputDesc, const void *output);
/**
* @ingroup dnn
* @brief memory_clear function
* @param [in] handle cce handle
* @param [in] addrSpaceSizeInBytes addr space size
* @param [in|out] addr device memory
* @return ccStatus_t
*/
ccStatus_t ccMemoryClear(ccHandle_t handle, const uint64_t addrSpaceSizeInBytes, const void *addr);
/**
* @ingroup dnn
* @brief check input is overflow
* @param [in] handle cce handle
* @param [in] alpha scaling factors
* @param [in] xDesc descriptor of input tensor
* @param [in] x input data in device memory
* @param [in] yDesc descriptor of output tensor
* @param [in|out] y output data in device memory
* @param [in] beta scaling factors
* @return ccStatus_t
*/
ccStatus_t ccIsFinite(ccHandle_t handle, const void *alpha, const ccTensorDescriptor_t xDesc, const void *x,
const ccTensorDescriptor_t yDesc, const void *y, const void *beta);
}; // namespace cce
#endif // DNN_OP_H__