!1241 update include header files 0311

From: @shenwei41
Reviewed-by: @lilongfei15,@xsmq
Signed-off-by: @xsmq
pull/1241/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit c0f3dcb4f4

@ -53,9 +53,9 @@ static const uint32_t ACL_ERROR_GE_AIPP_MODE_INVALID = 145016;
static const uint32_t ACL_ERROR_GE_OP_TASK_TYPE_INVALID = 145017;
static const uint32_t ACL_ERROR_GE_OP_KERNEL_TYPE_INVALID = 145018;
static const uint32_t ACL_ERROR_GE_PLGMGR_PATH_INVALID = 145019;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_FORMAT_INVALID = 145020;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_SHAPE_INVALID = 145021;
static const uint32_t ACL_ERROR_GE_TRANSSHAPE_DATATYPE_INVALID = 145022;
static const uint32_t ACL_ERROR_GE_FORMAT_INVALID = 145020;
static const uint32_t ACL_ERROR_GE_SHAPE_INVALID = 145021;
static const uint32_t ACL_ERROR_GE_DATATYPE_INVALID = 145022;
static const uint32_t ACL_ERROR_GE_MEMORY_ALLOCATION = 245000;
static const uint32_t ACL_ERROR_GE_MEMORY_OPERATE_FAILED = 245001;
static const uint32_t ACL_ERROR_GE_INTERNAL_ERROR = 545000;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,58 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*!
* \file avg_pool_1d_ops.h
* \brief
*/
#ifndef OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
#define OPS_BUILT_IN_OP_PROTO_INC_AVGPOOL1DOPS_H_
#include "graph/operator_reg.h"
namespace ge {
/**
*@brief Generate an auxiliary matrix . \n
*@par Inputs:
* @li x: A tensor. Must be one of the following types:uint8, int8,int16, int32,
int64, float16, float, double.The format must be NHWC NCHW NC1HWC0.
*@par Attributes:
*@li ksize: Kernel size. Input type is int.
*@li strides: Input type is int.
*@li pads: Input type is listInt .
*@li ceil_mode: Bool, default value is false.
*@li count_include_pad: Bool, default value is false. \n
*@par Outputs:
*y_tensor: A tensor with the same types as "x" . \n
*@par Third-party framework compatibility
*Compatible with the TensorFlow operator Unbatch.
*/
REG_OP(AvgPool1DAvgMatrix)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
DT_INT32, DT_INT64, DT_DOUBLE}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16, DT_INT8, DT_INT16, DT_UINT8,
DT_INT32, DT_INT64, DT_DOUBLE}))
.REQUIRED_ATTR(ksize, Int)
.REQUIRED_ATTR(strides, Int)
.REQUIRED_ATTR(pads, ListInt)
.ATTR(ceil_mode, Bool, false)
.ATTR(count_include_pad, Bool, false)
.OP_END_FACTORY_REG(AvgPool1DAvgMatrix)
}
#endif

@ -2454,6 +2454,25 @@ REG_OP(Eltwise)
.ATTR(coeff, ListFloat, {})
.OP_END_FACTORY_REG(Eltwise)
/**
*@brief Computes the inverse error function of each element of input. \n
*@par Inputs:
*One inputs, including:
* @li input_x: A tensor. Must be one of the following types:
* float16, float32. \n
*@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n
*@par Third-party framework compatibility
*Compatible with the Pytorch operator Erfinv. \n
*/
REG_OP(Erfinv)
.INPUT(input_x, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(output_y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(Erfinv)
/**
*@brief Computes element-wise population count. \n

@ -1516,6 +1516,96 @@ REG_OP(DenseImageWarp)
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DenseImageWarp)
/**
*@brief Calculate the resize_d function. \n
*@par Inputs:
*One inputs, including:
* @li x: A tensor. Must be one of the following types:
* float16, float32. \n
*@par Attributes:
*@li sizes: An optional listInt. \n
*@li scales: An optional listFloat.
Defaults to none. \n
*@li roi: An optional listInt.
Defaults to none. \n
*@li coordinate_transformation_mode: An optional String.
Defaults to "half_pixel". \n
*@li cubic_coeff_a: An optional float.
Defaults to -0.75. \n
*@li exclude_outside: An optional int.
Defaults to 0. \n
*@li extrapolation_value: An optional float.
Defaults to 0.0. \n
*@li mode: An optional String.
Defaults to "nearest". \n
*@li nearest_mode: An optional String.
Defaults to "round_prefer_floor". \n
*@par Outputs:
*y: A Tensor with the same type of x's,
shape depends on x and sizes. \n
*/
REG_OP(ResizeD)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(sizes, ListInt)
.ATTR(scales, ListFloat, {})
.ATTR(roi, ListInt, {})
.ATTR(coordinate_transformation_mode, String, "half_pixel")
.ATTR(cubic_coeff_a, Float, -0.75)
.ATTR(exclude_outside, Int, 0)
.ATTR(extrapolation_value, Float, 0.0)
.ATTR(mode, String, "nearest")
.ATTR(nearest_mode, String, "round_prefer_floor")
.OP_END_FACTORY_REG(ResizeD)
/**
*@brief Calculate the resize_grad_d function. \n
*@par Inputs:
*One inputs, including:
* @li grads: A tensor. Must be one of the following types:
* float16, float32. \n
*@par Attributes:
*@li original_size: An optional listInt. \n
*@li roi: An optional listInt.
Defaults to none. \n
*@li scales: An optional listFloat.
Defaults to none. \n
*@li coordinate_transformation_mode: An optional String.
Defaults to "half_pixel". \n
*@li cubic_coeff_a: An optional float.
Defaults to -0.75. \n
*@li exclude_outside: An optional int.
Defaults to 0. \n
*@li extrapolation_value: An optional float.
Defaults to 0.0. \n
*@li mode: An optional String.
Defaults to "nearest". \n
*@li nearest_mode: An optional String.
Defaults to "round_prefer_floor". \n
*@par Outputs:
*y: A Tensor with the same type of x's,
shape depends on x and sizes. \n
*/
REG_OP(ResizeGradD)
.INPUT(grads, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.REQUIRED_ATTR(original_size, ListInt)
.ATTR(roi, ListInt, {})
.ATTR(scales, ListFloat, {})
.ATTR(coordinate_transformation_mode, String, "half_pixel")
.ATTR(cubic_coeff_a, Float, -0.75)
.ATTR(exclude_outside, Int, 0)
.ATTR(extrapolation_value, Float, 0.0)
.ATTR(mode, String, "nearest")
.ATTR(nearest_mode, String, "round_prefer_floor")
.OP_END_FACTORY_REG(ResizeGradD)
/**
*@brief Computes the gradients of DenseImageWarp with respect to image and flow. \n
@ -1535,5 +1625,81 @@ REG_OP(DenseImageWarpGrad)
.OUTPUT(grad_image, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(grad_flow, TensorType({DT_FLOAT, DT_FLOAT16}))
.OP_END_FACTORY_REG(DenseImageWarpGrad)
/**
*@brief This operation samples input X by using interpolation based on flow field grid,
which is usually gennerated by affine_grid. The grid of shape [N, H, W, 2] is the concatenation of
(x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of
input data x and y is indexng the 3rd dimention (in height dimension), finally results is
the interpolation value of 4 nearest corner points. The output tensor shape will be [N, C, H, W].
*@par Inputs:
*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
*@par Attributes:
*@li interpolation_mode: An optional string specifying the interpolation method. Only 'bilinear' is
supported for now .
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
*@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" .
*@par Outputs:
*y: Returns 4-D Tensor with the same dtype as `X`.
*@par Third-party framework compatibility
*Compatible with pytorch GridSampler2D operator.
*/
REG_OP(GridSampler2D)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(interpolation_mode, String, "bilinear")
.ATTR(padding_mode, String, "zeros")
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(GridSampler2D)
/**
*@brief This operation unnormalize input Grid, which is usually gennerated by affine_grid.
*@par Inputs:
*@li grid: flow field grid, 4-D Tensor with shape `[batch, height, width, 2]`.
*@li assist: Assist matrix, a 4-D tensor of type float16.
*@par Attributes:
*@li align_corners: An optional bool. If "true", the centers of the corner
pixels of the input and output tensors are aligned. Defaults to "false" .
*@par Outputs:
*diff: Returns 4-D Tensor with the same shape and dtype as `grid`.
*position: Returns 4-D Tensor with the same shape as `grid`.
*/
REG_OP(GridUnnormal)
.INPUT(grid, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(assist, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(diff, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(position, TensorType({DT_INT32}))
.ATTR(align_corners, Bool, false)
.OP_END_FACTORY_REG(GridUnnormal)
/**
*@brief This operation unfold input X based on unnormalized grid, which is gennerated by GridUnnormal.
*@par Inputs:
*@li x: 4-D Tensor with shape `[batch, channels, height, width]`.
*@li position: 4-D Tensor with shape `[batch, output_height, output_width, 2]`.
*@par Attributes:
*@li padding_mode: An optional string specifying the pad method. Only 'zeros' is supported for now .
*@par Outputs:
*y: Returns 4-D Tensor with the same dtype as `x`.
*/
REG_OP(ImageUnfold)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(position, TensorType({DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(padding_mode, String, "zeros")
.OP_END_FACTORY_REG(ImageUnfold)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_IMAGE_OPS_H_

File diff suppressed because it is too large Load Diff

@ -982,6 +982,65 @@ REG_OP(SoftMarginLossGrad)
.ATTR(reduction, String, "mean")
.OP_END_FACTORY_REG(SoftMarginLossGrad)
/**
*@brief Computes batched the p-norm distance between each pair of
*the two collections of row vectors. \n
*@par Inputs:
*Two inputs, including:
* @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
* float16, float32. \n
* @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
* float16, float32. \n
*@par Attributes:
* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
*@par Outputs:
* y: A Tensor with the same type of x1's and with shape BxPxR. \n
*@par Third-party framework compatibility
*Compatible with the Pytorch operator Cdist. \n
*/
REG_OP(Cdist)
.INPUT(x1, TensorType({DT_FLOAT16, DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16, DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
.ATTR(p, Float, 2.0)
.OP_END_FACTORY_REG(Cdist)
/**
*@brief Computes the grad of x1 in cdist. \n
*@par Inputs:
*Four inputs, including:
* @li grad: Grad with shape BxPxR. Must be one of the following types:
* float16, float32. \n
* @li x1: A tensor with shpae: BxPXM. Must be one of the following types:
* float16, float32. \n
* @li x2: A tensor with shpae: BxRxM. Must be one of the following types:
* float16, float32. \n
* @li cdist: Output tensor of cdist forward with shpae: BxPXR.
* Must be one of the following types: float16, float32. \n
*@par Attributes:
* @li p: An optional float >= 0 or inf. Defaults to 2.0. \n
*@par Outputs:
* y: A Tensor with the same type and shape of x1's. \n
*@par Third-party framework compatibility
*Compatible with the Pytorch operator Cdist Backward. \n
*/
REG_OP(CdistGrad)
.INPUT(grad, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x1, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(x2, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(cdist, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.ATTR(p, Float, 2.0)
.OP_END_FACTORY_REG(CdistGrad)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATH_OPS_H_

@ -1065,7 +1065,37 @@ REG_OP(Tril)
.ATTR(diagonal, Int, 0)
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(Tril)
/**
*@brief Concatenates a list of N tensors along the first dimension.
*@par Inputs:
* Two inputs, including:
* @li values: A list of Tensors. Must be one of the following types: int32, float16, float32.
* Tensors to be concatenated. All must have size 1 in the first dimension and same shape.
* It's a dynamic input.
* @li shape: A Tensor of the same type as "x".
* The final shape of the result. Should be equal to the shapes of any input
* but with the number of input values in the first dimension . \n
*@par Attributes:
*equation: The subscripts for the Einstein summation. \n
*tensor_size: tensor size of input \n
*@par Outputs:
*@li y: Sums the product of the elements of the input operands along dimensions specified
using a notation based on the Einstein summation convention. \n
*@attention Constraints:
*Input tensor_size must be Int. \n
*@par Third-party framework compatibility
*Compatible with Pytorch einsum operator.
*/
REG_OP(EinSum)
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32}))
.REQUIRED_ATTR(equation, String)
.REQUIRED_ATTR(tensor_size, Int)
.OP_END_FACTORY_REG(EinSum)
} // namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_MATRIX_CALCULATION_OPS_H_

@ -427,6 +427,33 @@ REG_OP(MVN)
.ATTR(eps, Float, 1e-9)
.OP_END_FACTORY_REG(MVN)
/**
*@brief Normalizes the input . \n
*@par Inputs:
* One input:
*x: An NCHW tensor of type float16 or float32 . \n
*@par Attributes:
*@li eps: An optional float32 epsilon for not dividing by zero. Defaults to "1e-9" . \n
*@li axes: A list of Intefers, along which axis to reduce. Defaults to "[0, 2, 3]" . \n
*@par Outputs:
*y: An NCHW tensor of type float16 or float32 . \n
*@attention Constraints:
* The input tensor must have the NCHW format, whose shape length must be 4.
*@par Third-party framework compatibility
* Compatible with the ONNX operator MeanVarianceNormalization.
*/
REG_OP(MVNV2)
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16})) /* "First operand." */
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16})) /* "Result, has same element type as inputs" */
.ATTR(eps, Float, 1e-9)
.ATTR(axes, ListInt, {0, 2, 3})
.OP_END_FACTORY_REG(MVNV2)
/**
*@brief Normalizes the input "x1" . \n
@ -1205,6 +1232,33 @@ REG_OP(Centralization)
.ATTR(axes, ListInt, {-1})
.OP_END_FACTORY_REG(Centralization)
/**
*@brief Roll the tensor along the given dimension(s).
* Elements that are shifted beyond the last position are re-introduced at the first position.
* If a dimension is not specified, the tensor will be flattened before rolling and then restored to the original shape. \n
*@par Inputs:
*One inputs, including:
* @li x: A tensor . Must be one of the following types:
* float16, float32, int32, uint32, int8, uint8. \n
*@par Attributes:
* @li shifts: The number of places by which the elements of the tensor are shifted. \n
* @li dims: Axis along which to roll. \n
*@par Outputs:
* y: A Tensor with the same type and shape of x's. \n
*@par Third-party framework compatibility
*Compatible with the Pytorch operator Roll. \n
*/
REG_OP(Roll)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT,DT_INT32,DT_UINT32,DT_INT8,DT_UINT8}))
.REQUIRED_ATTR(shifts, ListInt)
.ATTR(dims, ListInt, {})
.OP_END_FACTORY_REG(Roll)
/**
*@brief Calculate the loss. Creates a criterion that optimizes a two-class classification
logistic loss between input_x and input_y (containing 1 or -1). \n

@ -49,5 +49,60 @@ REG_OP(InTopKV2)
.INPUT(k, TensorType({IndexNumberType}))
.OUTPUT(precision, TensorType({DT_BOOL}))
.OP_END_FACTORY_REG(InTopKV2)
/**
*@brief Performs batch normalization . \n
*@par Inputs:
* Five inputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li x: A 4D or 5D Tensor of type float16 or float32, with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li scale: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the scaling factor.
*@li offset: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the offset.
*@li mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the mean used for inference. Must be "None" if the
operation is used for training.
*@li variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be
5D if input "x" is with format NC1HWC0. Specifies the variance used for inference. Must be "None"
if the operation is used for training . \n
*@par Attributes:
*@li epsilon: An optional float32, specifying the small value added to variance to avoid dividing by zero. Defaults to "0.0001".
*@li data_format: An optional string, specifying the format of "x". Defaults to "NHWC".
*@li is_training: An optional bool, specifying if the operation is used for training or inference. Defaults to "True" . \n
*@par Outputs:
* Five outputs, including: (NHWC, NCHW, or NC1HWC0 supported)
*@li y: A 4D or 5D Tensor of type float16 or float32 for the normalized "x", with format NHWC or NCHW for 4D or NC1HWC0 for 5D.
*@li batch_mean: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW. Must be 5D
if input "x" is with format NC1HWC0. Specifies the mean of "x".
*@li batch_variance: A Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x".
*@li reserve_space_1: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the mean of "x" for gradient computation. Pass "None" to skip this output.
*@li reserve_space_2: An optional Tensor of type float32. Must be 1D if input "x" is with format NHWC or NCHW.
Must be 5D if input "x" is with format NC1HWC0. Specifies the variance of "x" for gradient computation. Pass "None" to skip this output . \n
*@attention Constraints:
*@li If the operation is used for inference and outputs "reserve_space_1" and "reserve_space_2" are available,
then "reserve_space_1" has the same value as "mean" and "reserve_space_2" has the same value as "variance".
*@li For Ascend 310, the result accuracy fails to reach 1 due to the square root instruction . \n
*/
REG_OP(FusedBatchNormV2)
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
.INPUT(scale, TensorType({DT_FLOAT}))
.INPUT(offset, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(mean, TensorType({DT_FLOAT}))
.OPTIONAL_INPUT(variance, TensorType({DT_FLOAT}))
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
.OUTPUT(batch_mean, TensorType({DT_FLOAT}))
.OUTPUT(batch_variance, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_1, TensorType({DT_FLOAT}))
.OUTPUT(reserve_space_2, TensorType({DT_FLOAT}))
.ATTR(epsilon, Float, 0.0001)
.ATTR(data_format, String, "NHWC")
.ATTR(is_training, Bool, true)
.OP_END_FACTORY_REG(FusedBatchNormV2)
}// namespace ge
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_OPS_H_

@ -1502,14 +1502,14 @@ REG_OP(AdaptiveAvgPool2d)
* @brief Compute gradients of adaptive averagev2 pooling function.
* @par Inputs:
* @li input_grad: A NCHW Tensor. Must be one of the following data types:
* @li input_grad: A Tensor. Must be one of the following data types:
* float16, float32.
* @par Attributes:
* @li orig_input_shape: A required tuple or list of type int32.
* @par Outputs:
* @li output_grad: A tensor with the same shape and type as "orig_input_shape".
* @li output_grad: A tensor with the same type as "input_grad".
* @par Third-party framework compatibility
* Compatible with the Pytorch operator AdaptiveAvgPool2dGrad.

@ -530,6 +530,34 @@ REG_OP(Elu)
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(Elu)
/**
*@brief Continuously Differentiable Exponential Linear Uints:
* Perform the linear uint element-wise on the input tensor X using formula:
* max(0, x) + min(0, alpha * (exp(x/alpha) - 1)). \n
*@par Inputs:
*x: A float16, float32 or double, for the input data type . \n
*@par Attributes:
*alpha: A float32. Defines at which negative value the ELU saturates. Defaults to "1.0" . \n
*@par Outputs:
*y: A float16, float32 or double, for the normalized result . \n
*@attention Constraints:
*@li The input is of type float16 or float32 . \n
*@par Multiple batches supported or not
*Supported
*@par Third-party framework compatibility
*@li Compatible with ONNX's Celu operator
*/
REG_OP(Celu)
.INPUT(x, TensorType::FloatingDataType())
.OUTPUT(y, TensorType::FloatingDataType())
.ATTR(alpha, Float, 1.0)
.OP_END_FACTORY_REG(Celu)
/**
*@brief Computes gradients for the exponential linear (Elu) operation.
*

@ -101,7 +101,7 @@ REG_OP(FillD)
*/
REG_OP(BroadcastTo)
.INPUT(x, TensorType::BasicType())
.INPUT(shape, TensorType({DT_INT32}))
.INPUT(shape, TensorType({DT_INT32,DT_INT64}))
.OUTPUT(y, TensorType::BasicType())
.OP_END_FACTORY_REG(BroadcastTo)

@ -239,6 +239,30 @@ REG_OP(GatherV2D)
.REQUIRED_ATTR(axis, Int)
.OP_END_FACTORY_REG(GatherV2D)
/**
*@Gathers values along an axis specified by dim . \n
*@par Inputs:
*@li x: A Tensor. Must be one of the following types: float16, float32, int32, int64.
*@li index: A Tensor. Must be one of the following types: int64 . \n
*@par Attributes:
* dim: the axis along which to index . \n
*@par Outputs:
* y: A Tensor. Has the same type as "x" . \n
*@par Third-party framework compatibility
*Compatible with the PyTorch operator Gather.
*/
REG_OP(GatherElements)
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.INPUT(index, TensorType({DT_INT64}))
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT, DT_INT32, DT_INT64}))
.ATTR(dim, Int, 0)
.OP_END_FACTORY_REG(GatherElements)
/**
*@brief Extracts a strided slice of a tensor. Roughly speaking, this op
extracts a slice of size (end-begin)/stride from the given input tensor.
@ -486,6 +510,38 @@ REG_OP(UnsortedSegmentSum)
.OUTPUT(y, TensorType::NumberType())
.OP_END_FACTORY_REG(UnsortedSegmentSum)
/**
*@brief Creates a one-dimensional tensor of size steps whose values are evenly spaced from start to
* end, inclusive, on a logarithmic scale with base base. \n
*@par Inputs:
*One inputs, including:
* @li assist: A tensor. Must be one of the following types:
* float16, float32. \n
* @par Attributes:
* @li start: An required float. Used to select the start. \n
* @li end: An required float. Used to select the end. \n
* @li steps: An optional int.Defaults to 100. \n
* @li base: An optional float.Defaults to 10.0. \n
* @li dtype: An optional int.Defaults to 1. \n
*@par Outputs:
*y: A Tensor with the same type and shape of input_x's. \n
*@par Third-party framework compatibility
*Compatible with the Pytorch operator logspaced. \n
*/
REG_OP(LogSpaceD)
.INPUT(assist, TensorType({DT_FLOAT, DT_FLOAT16}))
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
.REQUIRED_ATTR (start, Float)
.REQUIRED_ATTR (end, Float)
.ATTR(steps, Int, 100)
.ATTR(base, Float, 10.0)
.ATTR(dtype, Int, 1)
.OP_END_FACTORY_REG(LogSpaceD)
/**
*@brief Computes the sum along segments of a tensor . \n

@ -339,7 +339,7 @@ RTS_API rtError_t rtLabelCreateEx(rtLabel_t *label, rtStream_t stream);
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
RTS_API rtError_t rtLabelCreateExV2(rtLabel_t *label, rtModel_t model, rtStream_t stream);
/**
* @ingroup dvrt_base

@ -132,6 +132,11 @@ typedef struct tagRtPlatformConfig {
uint32_t platformConfig;
} rtPlatformConfig_t;
typedef enum tagRTTaskTimeoutType {
RT_TIMEOUT_TYPE_OP_WAIT = 0,
RT_TIMEOUT_TYPE_OP_EXECUTE,
} rtTaskTimeoutType_t;
/**
* @ingroup
* @brief get AI core count
@ -203,6 +208,24 @@ RTS_API rtError_t rtGetRuntimeVersion(uint32_t *runtimeVersion);
*/
RTS_API rtError_t rtGetDeviceCapability(int32_t deviceId, int32_t moduleType, int32_t featureType, int32_t *value);
/**
* @ingroup
* @brief set event wait task timeout time.
* @param [in] timeout
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtSetOpWaitTimeOut(uint32_t timeout);
/**
* @ingroup
* @brief set op execute task timeout time.
* @param [in] timeout
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtSetOpExecuteTimeOut(uint32_t timeout);
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif

@ -188,7 +188,7 @@ typedef void (*rtCallback_t)(void *fnData);
/**
* @ingroup rt_kernel
* @brief kernel mode
*/
**/
#define RT_DEFAULT_KERNEL_MODE (0x00)
#define RT_NORMAL_KERNEL_MODE (0x01)
#define RT_ALL_KERNEL_MODE (0x02)
@ -211,7 +211,7 @@ RTS_API rtError_t rtDevBinaryRegister(const rtDevBinary_t *bin, void **handle);
/**
* @ingroup rt_kernel
* @brief register device binary
* @brief register device binary with all kernel
* @param [in] bin device binary description
* @param [out] handle device binary handle
* @return RT_ERROR_NONE for ok
@ -330,7 +330,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @ingroup rt_kernel
* @brief launch kernel with handle to device
* @param [in] handle program
* @param [in] devFunc device function description
* @param [in] devFunc device function description.
* @param [in] blockDim block dimentions
* @param [in] args argments address for kernel function
* @param [in] argsSize argements size
@ -341,7 +341,7 @@ RTS_API rtError_t rtKernelLaunch(const void *stubFunc, uint32_t blockDim, void *
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtKernelLaunchWithHandle(void *handle, const void *devFunc, uint32_t blockDim, void *args, uint32_t argsSize,
rtSmDesc_t *smDesc, rtStream_t stream, const void *kernelInfo);
rtSmDesc_t *smDesc, rtStream_t stream_, const void *kernelInfo);
/**
* @ingroup rt_kernel

@ -133,12 +133,13 @@ typedef struct tagAllKernelTaskInfo {
uint16_t argsCount;
uint16_t argsSize;
uint16_t reserved;
const void *dev_func;
void *devfunc;
void *handle;
uint8_t *smDesc;
uint8_t *args;
uint16_t *argsOffset;
} rtAllKernelTaskInfo_t;
typedef struct tagKernelTaskInfoEx {
uint32_t flags;
uint32_t argsSize;
@ -263,7 +264,7 @@ typedef struct tagTaskInfo {
union {
rtKernelTaskInfoEx_t kernelTaskEx;
rtKernelTaskInfo_t kernelTask;
rtAllKernelTaskInfo_t allkernelTask;
rtAllKernelTaskInfo_t allKernelTask;
rtEventTaskInfo_t eventTask;
rtStreamSwitchTaskInfo_t streamSwitchTask;
rtStreamActiveTaskInfo_t streamActiveTask;
@ -285,10 +286,27 @@ typedef struct tagTaskInfo {
} u;
} rtTaskInfo_t;
typedef struct tagNodeInfo_t {
uint32_t nodeIdx;
uint32_t reserved[1];
} rtNodeInfo;
typedef struct tagHwtsInfo_t {
uint16_t taskId;
uint16_t sqExeHead;
uint16_t streamExeHead;
uint16_t reserved[2];
} rtHwtsInfo;
typedef struct tagLabelDevInfo_t {
uint16_t modelId;
uint16_t streamId;
uint16_t labelId;
union {
rtNodeInfo nodeInfo;
rtHwtsInfo hwtsInfo;
uint16_t reserved[5];
}u;
}rtLabelDevInfo;
typedef rtError_t (*rtTaskGenCallback)(rtModel_t model, rtTaskInfo_t *taskInfo);

@ -189,6 +189,28 @@ RTS_API rtError_t rtStreamActive(rtStream_t activeStream, rtStream_t stream);
*/
RTS_API rtError_t rtStreamSwitchN(void *ptr, uint32_t size, void *valuePtr, rtStream_t *trueStreamPtr,
uint32_t elementSize, rtStream_t stream, rtSwitchDataType_t dataType);
/*
* @ingroup dvrt_stream
* @brief enable debug for dump overflow exception with stream
* @param [in] addr: ddr address of kernel exception dumpped
* @param [in] stream: stream handle
* @param [in] flag: debug flag
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtDebugRegisterForStream(rtStream_t stream, uint32_t flag, const void *addr,
uint32_t *streamId, uint32_t *taskId);
/*
* @ingroup rt_model
* @brief disable debug for dump overflow exception with stream
* @param [in] stream: stream handle
* @return RT_ERROR_NONE for ok
* @return RT_ERROR_INVALID_VALUE for error input
*/
RTS_API rtError_t rtDebugUnRegisterForStream(rtStream_t stream);
#if defined(__cplusplus) && !defined(COMPILE_OMG_PACKAGE)
}
#endif

@ -11,93 +11,11 @@
/** @defgroup aoe aoe调优接口 */
#ifndef TUNE_API_H
#define TUNE_API_H
#include <vector>
#include <map>
#include <string>
#include "graph/graph.h"
#include "ge/ge_api.h"
#include "aoe_types.h"
/**
* @ingroup aoe
*
* aoe status
*/
enum MsTuneStatus {
MSTUNE_SUCCESS, /** tune success */
MSTUNE_FAILED, /** tune failed */
};
// Option key: for train options sets
const std::string MSTUNE_SELF_KEY = "mstune";
const std::string MSTUNE_GEINIT_KEY = "initialize";
const std::string MSTUNE_GESESS_KEY = "session";
#ifdef __cplusplus
extern "C" {
#endif
struct RunnerInitConfig {
// onilne online
std::string profPath;
std::string parserPath;
// ncs only
std::vector<uint32_t> devList;
};
struct RunnerOpInfo {
std::string opName;
uint64_t opCostTime;
uint64_t aicoreCostTime;
// gradient_split only
std::string modelName;
std::string opType;
std::vector<uint64_t> start;
std::vector<uint64_t> end;
};
struct RunnerModelInfo {
uint64_t totalCostTime;
};
struct RunnerRunResult {
std::vector<RunnerModelInfo> modelInfo;
std::vector<RunnerOpInfo> opInfo;
};
struct RunnerResult {
uint64_t totalCostTime;
std::map<std::string, uint64_t> opCostTime;
std::map<std::string, uint64_t> aicoreCostTime;
};
struct RunnerDataBuf {
void *ptr = nullptr;
size_t size = 0;
};
struct AOEBufferData {
std::shared_ptr<uint8_t> data = nullptr;
uint64_t length;
};
struct RunnerConfig {
bool isProf;
uint32_t loop;
// offline only
std::vector<RunnerDataBuf> input;
std::vector<RunnerDataBuf> output;
std::string modelPath;
RunnerDataBuf modelData;
// online only
uint32_t devId;
std::vector<std::vector<ge::Tensor>> inputs;
std::vector<ge::Graph> dependGraph; // run graph (for training)
};
#ifdef __cplusplus
}
#endif
/**
* @ingroup aoe
* @par :

Loading…
Cancel
Save