You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1501 lines
69 KiB
1501 lines
69 KiB
/**
|
|
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*!
|
|
* \file nn_detect_ops.h
|
|
* \brief
|
|
*/
|
|
#ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
|
|
#define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
|
|
|
|
#include "graph/operator_reg.h"
|
|
#include "graph/operator.h"
|
|
|
|
namespace ge {
|
|
|
|
/**
|
|
*@brief Generates bounding boxes based on "rois" and "deltas".
|
|
* It is a customized FasterRcnn operator . \n
|
|
|
|
*@par Inputs:
|
|
* Two inputs, including:
|
|
*@li rois: Region of interests (ROIs) generated by the region proposal
|
|
* network (RPN). A 2D Tensor of type float32 or float16 with shape (N, 4).
|
|
* "N" indicates the number of ROIs, and the value "4" refers to "x0", "x1",
|
|
* "y0", and "y1".
|
|
*@li deltas: Absolute variation between the ROIs generated by the RPN and
|
|
* ground truth boxes. A 2D Tensor of type float32 or float16 with shape (N, 4).
|
|
* "N" indicates the number of errors, and 4 indicates "dx", "dy", "dw", and "dh" . \n
|
|
|
|
*@par Attributes:
|
|
*@li means: An index of type int. Defaults to [0,0,0,0].
|
|
* "deltas" = "deltas" x "stds" + "means".
|
|
*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
|
|
* "deltas" = "deltas" x "stds" + "means".
|
|
*@li max_shape: Shape [h, w], specifying the size of the image transferred to
|
|
* the network. Used to ensure that the bbox shape after conversion does not
|
|
* exceed "max_shape".
|
|
*@li wh_ratio_clip: Defaults to "16/1000". The values of "dw" and "dh" fall
|
|
* within (-wh_ratio_clip, wh_ratio_clip) . \n
|
|
|
|
*@par Outputs:
|
|
*bboxes: Bboxes generated based on "rois" and "deltas". Have the same format
|
|
* and type as "rois".
|
|
*/
|
|
REG_OP(BoundingBoxDecode)
|
|
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(deltas, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OUTPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
|
|
.ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
|
|
.REQUIRED_ATTR(max_shape, ListInt)
|
|
.ATTR(wh_ratio_clip, Float, 0.016)
|
|
.OP_END_FACTORY_REG(BoundingBoxDecode)
|
|
|
|
/**
|
|
*@brief Computes the coordinate variations between bboxes and ground truth
|
|
* boxes. It is a customized FasterRcnn operator . \n
|
|
|
|
*@par Inputs:
|
|
* Two inputs, including:
|
|
*@li anchor_box: Anchor boxes. A 2D Tensor of float32 with shape (N, 4).
|
|
* "N" indicates the number of bounding boxes, and the value "4" refers to
|
|
* "x0", "x1", "y0", and "y1".
|
|
*@li ground_truth_box: Ground truth boxes. A 2D Tensor of float32 with
|
|
* shape (N, 4). "N" indicates the number of bounding boxes, and the value "4"
|
|
* refers to "x0", "x1", "y0", and "y1" . \n
|
|
|
|
*@par Attributes:
|
|
*@li means: An index of type int. Defaults to [0,0,0,0].
|
|
* "deltas" = "deltas" x "stds" + "means".
|
|
*@li stds: An index of type int. Defaults to [1.0,1.0,1.0,1.0].
|
|
* "deltas" = "deltas" x "stds" + "means" . \n
|
|
|
|
*@par Outputs:
|
|
*delats: A 2D Tensor of type float32 with shape (N, 4), specifying the variations between all anchor boxes and ground truth boxes.
|
|
*/
|
|
REG_OP(BoundingBoxEncode)
|
|
.INPUT(anchor_box, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(ground_truth_box, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OUTPUT(delats, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.ATTR(means, ListFloat, {0.0, 0.0, 0.0, 0.0})
|
|
.ATTR(stds, ListFloat, {1.0, 1.0, 1.0, 1.0})
|
|
.OP_END_FACTORY_REG(BoundingBoxEncode)
|
|
|
|
/**
|
|
*@brief Judges whether the bounding box is valid. It is a customized
|
|
* FasterRcnn operator . \n
|
|
|
|
*@par Inputs:
|
|
* Two inputs, including:
|
|
*@li bbox_tensor: Bounding box. A 2D Tensor of type float16 with shape (N, 4).
|
|
* "N" indicates the number of bounding boxes, the value "4" indicates "x0",
|
|
* "x1", "y0", and "y1".
|
|
*@li img_metas: Valid boundary value of the image. A 1D Tensor of type float16
|
|
* with shape (16,)
|
|
|
|
*@par Outputs:
|
|
*valid_tensor: A bool with shape (N, 1), specifying whether an input anchor is
|
|
* in an image. "1" indicates valid, while "0" indicates invalid . \n
|
|
|
|
*@attention Constraints:
|
|
* 16 "img_metas" are input. The first three numbers (height, width, ratio) are
|
|
* valid, specifying the valid boundary (heights x ratio, weights x ratio).
|
|
*/
|
|
REG_OP(CheckValid)
|
|
.INPUT(bbox_tensor, TensorType({DT_FLOAT16}))
|
|
.INPUT(img_metas, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(valid_tensor, TensorType({DT_INT8}))
|
|
.OP_END_FACTORY_REG(CheckValid)
|
|
|
|
/**
|
|
*@brief Computes the intersection over union (iou) or the intersection over
|
|
* foreground (iof) based on the ground-truth and predicted regions . \n
|
|
|
|
*@par Inputs:
|
|
* Two inputs, including:
|
|
*@li bboxes: Bounding boxes, a 2D Tensor of type float16 or float32 with
|
|
* shape (N, 4). "N" indicates the number of bounding boxes, and the value
|
|
* "4" refers to "x0", "x1", "y0", and "y1".
|
|
*@li gtboxes: Ground-truth boxes, a 2D Tensor of type float16 or float32
|
|
* with shape (M, 4). "M" indicates the number of ground truth boxes, and
|
|
* the value "4" refers to "x0", "x1", "y0", and "y1" . \n
|
|
|
|
*@par Attributes:
|
|
*mode: Computation mode, a character string with the value range of [iou, iof] . \n
|
|
|
|
*@par Outputs:
|
|
*overlap: A 2D Tensor of type float16 or float32 with shape [M, N], specifying
|
|
* the IoU or IoF ratio . \n
|
|
|
|
*@attention Constraints:
|
|
* Only computation of float16 data is supported. To avoid overflow, the input
|
|
* length and width are scaled by 0.2 internally.
|
|
*/
|
|
REG_OP(Iou)
|
|
.INPUT(bboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(gtboxes, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OUTPUT(overlap, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.ATTR(mode, String, "iou")
|
|
.OP_END_FACTORY_REG(Iou)
|
|
|
|
/**
|
|
*@brief Performs the backpropagation of ROIAlign for training scenarios . \n
|
|
|
|
*@par Inputs:
|
|
* Three inputs, including:
|
|
*@li ydiff: A 5HD gradient input of type float32.
|
|
*@li rois: ROI position. A 2D Tensor of float32 with shape (N, 5). "N" indicates the number of ROIs,
|
|
the value "5" indicates the indexes of images where the ROIs are located, "x0", "x1", "y0", and "y1".
|
|
*@li rois_n: An optional input, specifying the number of valid ROIs. This parameter is reserved . \n
|
|
|
|
*@par Attributes:
|
|
*@li xdiff_shape: A required list of 4 ints, obtained based on the shape of "features" of ROIAlign.
|
|
*@li pooled_width: A required attribute of type int, specifying the W dimension.
|
|
*@li pooled_height: A required attribute of type int, specifying the H dimension.
|
|
*@li spatial_scale: A required attribute of type float, specifying the scaling ratio of "features" to the original image.
|
|
*@li sample_num: An optional attribute of type int, specifying the horizontal and vertical
|
|
sampling frequency of each output. If this attribute is set to "0", the sampling frequency is
|
|
equal to the rounded up value of "rois", which is a floating point number. Defaults to "2" . \n
|
|
|
|
*@par Outputs:
|
|
*xdiff: Gradient added to input "features". Has the same 5HD shape as input "features".
|
|
*/
|
|
REG_OP(ROIAlignGrad)
|
|
.INPUT(ydiff, TensorType({DT_FLOAT}))
|
|
.INPUT(rois, TensorType({DT_FLOAT}))
|
|
.OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
|
|
.OUTPUT(xdiff, TensorType({DT_FLOAT}))
|
|
.REQUIRED_ATTR(xdiff_shape, ListInt)
|
|
.REQUIRED_ATTR(pooled_width, Int)
|
|
.REQUIRED_ATTR(pooled_height, Int)
|
|
.REQUIRED_ATTR(spatial_scale, Float)
|
|
.ATTR(sample_num, Int, 2)
|
|
.OP_END_FACTORY_REG(ROIAlignGrad)
|
|
|
|
/**
|
|
*@brief Obtains the ROI feature matrix from the feature map. It is a customized FasterRcnn operator . \n
|
|
|
|
*@par Inputs:
|
|
* Three inputs, including:
|
|
*@li features: A 5HD Tensor of type float32 or float16.
|
|
*@li rois: ROI position. A 2D Tensor of float32 or float16 with shape (N, 5). "N" indicates the number of ROIs,
|
|
the value "5" indicates the indexes of images where the ROIs are located,
|
|
* "x0", "y0", "x1", and "y1".
|
|
*@li rois_n: An optional input of type int32, specifying the number of valid ROIs. This parameter is reserved . \n
|
|
|
|
*@par Attributes:
|
|
*@li spatial_scale: A required attribute of type float32, specifying the scaling ratio of "features" to the original image.
|
|
*@li pooled_height: A required attribute of type int32, specifying the H dimension.
|
|
*@li pooled_width: A required attribute of type int32, specifying the W dimension.
|
|
*@li sample_num: An optional attribute of type int32, specifying the horizontal and vertical sampling frequency of each output. If this attribute is set to "0",
|
|
* the sampling frequency is equal to the rounded up value of "rois", which is a floating point number. Defaults to "2".
|
|
*@li roi_end_mode: An optional attribute of type int32. Defaults to "1" . \n
|
|
|
|
*@par Outputs:
|
|
* output: Outputs the feature sample of each ROI position. The format is 5HD Tensor of type float32 or float16.
|
|
The axis N is the number of input ROIs. Axes H, W, and C are consistent
|
|
* with the values of "pooled_height",
|
|
* "pooled_width", and "features", respectively.
|
|
*/
|
|
REG_OP(ROIAlign)
|
|
.INPUT(features, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(rois, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OPTIONAL_INPUT(rois_n, TensorType({DT_INT32}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.REQUIRED_ATTR(spatial_scale, Float)
|
|
.REQUIRED_ATTR(pooled_height, Int)
|
|
.REQUIRED_ATTR(pooled_width, Int)
|
|
.ATTR(sample_num, Int, 2)
|
|
.ATTR(roi_end_mode, Int, 1)
|
|
.OP_END_FACTORY_REG(ROIAlign)
|
|
|
|
/**
|
|
*@brief Performs SSD prior box detection . \n
|
|
|
|
*@par Inputs:
|
|
* Two inputs, including:
|
|
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
|
|
*@li img: source image. Has the same type and format as "x" . \n
|
|
|
|
*@par Attributes:
|
|
*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
|
|
*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
|
|
*@li aspect_ratio: An required float32, specifying the aspect ratio for generated rectangle boxes. The height
|
|
is min_size/sqrt(aspect_ratio), the width is min_size*sqrt(aspect_ratio). Defaults to "1.0".
|
|
*@li img_h: An optional int32, specifying the source image height. Defaults to "0".
|
|
*@li img_w: An optional int32, specifying the source image width. Defaults to "0".
|
|
*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image. Defaults to "0.0".
|
|
*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image. Defaults to "0.0".
|
|
*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
|
|
*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
|
|
*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
|
|
*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
|
|
|
|
*@par Outputs:
|
|
*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
|
|
|
|
*@attention Constraints:
|
|
* This operator applies only to SSD networks.
|
|
*@see SSDDetectionOutput()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(PriorBox)
|
|
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.REQUIRED_ATTR(min_size, ListFloat)
|
|
.REQUIRED_ATTR(max_size, ListFloat)
|
|
.REQUIRED_ATTR(aspect_ratio, ListFloat)
|
|
.ATTR(img_h, Int, 0)
|
|
.ATTR(img_w, Int, 0)
|
|
.ATTR(step_h, Float, 0.0)
|
|
.ATTR(step_w, Float, 0.0)
|
|
.ATTR(flip, Bool, true)
|
|
.ATTR(clip, Bool, false)
|
|
.ATTR(offset, Float, 0.5)
|
|
.ATTR(variance, ListFloat, {0.1})
|
|
.OP_END_FACTORY_REG(PriorBox);
|
|
|
|
/**
|
|
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
|
|
|
|
*@par Inputs:
|
|
* Six inputs, including:
|
|
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
|
|
*@li img: source image. Has the same type and format as "x".
|
|
*@li data_h: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map height.
|
|
*@li data_w: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the matrix for indexing the feature map width.
|
|
*@li box_height: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the height of each prior box.
|
|
*@li box_width: An NC1HWC0 or NCHW tensor of type float32 or float16, specifying the width of each prior box . \n
|
|
|
|
*@par Attributes:
|
|
*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
|
|
*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
|
|
*@li img_h: An optional int32, specifying the height of the source image.
|
|
*@li img_w: An optional int32, specifying the width of the source image.
|
|
*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
|
|
*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
|
|
*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
|
|
*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
|
|
*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
|
|
*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
|
|
|
|
*@par Outputs:
|
|
*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
|
|
|
|
*@attention Constraints:
|
|
* This operator applies only to SSD networks.
|
|
*@see SSDDetectionOutput()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
|
|
*/
|
|
REG_OP(PriorBoxD)
|
|
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(data_h, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(data_w, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(box_height, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(box_width, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.REQUIRED_ATTR(min_size, ListFloat)
|
|
.REQUIRED_ATTR(max_size, ListFloat)
|
|
.ATTR(img_h, Int, 0)
|
|
.ATTR(img_w, Int, 0)
|
|
.ATTR(step_h, Float, 0.0)
|
|
.ATTR(step_w, Float, 0.0)
|
|
.ATTR(flip, Bool, true)
|
|
.ATTR(clip, Bool, false)
|
|
.ATTR(offset, Float, 0.5)
|
|
.ATTR(variance, ListFloat, {0.1})
|
|
.OP_END_FACTORY_REG(PriorBoxD);
|
|
|
|
/**
|
|
*@brief Performs SSD prior box detection, with four additional matrices and the "aspect_ratio" attribute deleted compared to PriorBox . \n
|
|
|
|
*@par Inputs:
|
|
* Six inputs, including:
|
|
*@li x: An NC1HWC0 or NCHW feature map of type is float32 or float16.
|
|
*@li img: source image. Has the same type and format as "x".
|
|
*@li boxes: An ND tensor of type float32 or float16, specifying the prior box information. Same as output y
|
|
|
|
*@par Attributes:
|
|
*@li min_size: A required float32, specifying the minimum edge length of a square prior box.
|
|
*@li max_size: A required float32, specifying the maximum edge length of a square prior box: sqrt(min_size * max_size)
|
|
*@li img_h: An optional int32, specifying the height of the source image.
|
|
*@li img_w: An optional int32, specifying the width of the source image.
|
|
*@li step_h: An optional float32, specifying the height step for mapping the center point from the feature map to the source image.
|
|
*@li step_w: An optional float32, specifying the width step for mapping the center point from the feature map to the source image.
|
|
*@li flip: An optional bool. If "True", "aspect_ratio" will be flipped. Defaults to "True".
|
|
*@li clip: An optional bool. If "True", a prior box is clipped to within [0, 1]. Defaults to "False".
|
|
*@li offset: An optional float32, specifying the offset. Defaults to "0.5".
|
|
*@li variance: An optional float32, specifying the variance of a prior box, either one or four variances. Defaults to "0.1" (one value) . \n
|
|
|
|
*@par Outputs:
|
|
*y: An ND tensor of type float32 or float16, specifying the prior box information, including its coordinates and variance . \n
|
|
|
|
*@attention Constraints:
|
|
* This operator applies only to SSD networks.
|
|
*@see SSDDetectionOutput()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS DEPRECATED. Please use PriorBox instead.
|
|
*/
|
|
REG_OP(PriorBoxDV2)
|
|
.INPUT(x, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(img, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.REQUIRED_ATTR(min_size, ListFloat)
|
|
.REQUIRED_ATTR(max_size, ListFloat)
|
|
.ATTR(img_h, Int, 0)
|
|
.ATTR(img_w, Int, 0)
|
|
.ATTR(step_h, Float, 0.0)
|
|
.ATTR(step_w, Float, 0.0)
|
|
.ATTR(flip, Bool, true)
|
|
.ATTR(clip, Bool, false)
|
|
.ATTR(offset, Float, 0.5)
|
|
.ATTR(variance, ListFloat, {0.1})
|
|
.OP_END_FACTORY_REG(PriorBoxDV2);
|
|
|
|
/**
|
|
*@brief Performs Position Sensitive ROI Pooling . \n
|
|
|
|
*@par Inputs:
|
|
* Two inputs, including:
|
|
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
|
|
* map, dimension C1 must be equal to
|
|
* (int(output_dim+15)/C0))*group_size*group_size.
|
|
*@li rois: A tensor of type float16 or float32, with shape
|
|
* [batch, 5, rois_num], describing the ROIs, each ROI consists of five
|
|
* elements: "batch_id", "x1", "y1", "x2", and "y2", which "batch_id" indicates
|
|
* the index of the input feature map, "x1", "y1", "x2", or "y2" must be
|
|
* greater than or equal to "0.0" . \n
|
|
|
|
*@par Attributes:
|
|
*@li output_dim: A required int32, specifying the number of output channels,
|
|
* must be greater than 0.
|
|
*@li group_size: A required int32, specifying the number of groups to encode
|
|
* position-sensitive score maps, must be within the range (0, 128).
|
|
*@li spatial_scale: A required float32, scaling factor for mapping the input
|
|
* coordinates to the ROI coordinates . \n
|
|
|
|
*@par Outputs:
|
|
*y: An NC1HWC0 tensor of type float16 or float32, describing the result
|
|
* feature map . \n
|
|
|
|
*@attention Constraints:
|
|
* HC1HWC0: channel must be Group_size squared, rois_num is a multiple of 16
|
|
*/
|
|
REG_OP(PSROIPooling)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.REQUIRED_ATTR(output_dim, Int)
|
|
.REQUIRED_ATTR(group_size, Int)
|
|
.REQUIRED_ATTR(spatial_scale, Float)
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(PSROIPooling)
|
|
|
|
/**
|
|
*@brief Returns detection result . \n
|
|
|
|
*@par Inputs:
|
|
* Four inputs, including:
|
|
*@li rois: An NCHW tensor of type floa16 or float32, output from operator proposal_d at the preceding layer, used as the input of operator FSRDetectionOutput.
|
|
*@li bbox_delta: An NCHWC0 tensor of type floa16 or float32, specifying the prediction offset, used to update the coordinates [x1, y1, x2, y2] of each ROI.
|
|
*@li score: An NCHWC0 tensor of type floa16 or float32, specifying the probability of each class. Class 0 is the background class.
|
|
*@li im_info: An ND tensor of type float16 or float32, specifying the Image information.
|
|
*@li actual_rois_num: An optional NCHW tensor of type int32, specifying the number of valid boxes per batch.
|
|
*@par Attributes:
|
|
*@li batch_rois: An optional int32, specifying the number of images to be predicted. Defaults to "1".
|
|
*@li num_classes: An required int32, specifying the number of classes to be predicted. The value must be greater than 0.
|
|
*@li score_threshold: An required float32, specifying the threshold for box filtering. The value range is [0.0, 1.0].
|
|
*@li iou_threshold: An required float32, specifying the confidence threshold for box filtering, which is the output "obj" of operator Region. The value range is (0.0, 1.0).
|
|
*@par Outputs:
|
|
*@li box: A tensor of type float16 or float32 for proposal of actual output, with output shape [batch, numBoxes,8].
|
|
* 8 means [x1, y1, x2, y2, score, label, batchID, NULL], the maximum value of numBoxes is 1024.
|
|
That is, take min (the maximum number of input boxes, 1024)
|
|
*@li actual_bbox_num: A tensor of type int32 With shape [bacth, num_classes], specifying the number of output boxes . \n
|
|
|
|
*@attention Constraints:
|
|
*@li totalnum < max_rois_num * batch_rois.
|
|
*@li "score" must be with shape (total_num, (num_classes+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
|
|
*@li "bbox_delta" must be with shape (total_num, (num_classes*4+15)//16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(FSRDetectionOutput)
|
|
.INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(im_info, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.OPTIONAL_INPUT(actual_rois_num, TensorType({DT_INT32}))
|
|
.OUTPUT(actual_bbox_num, TensorType({DT_INT32}))
|
|
.OUTPUT(box, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.ATTR(batch_rois, Int, 1)
|
|
.REQUIRED_ATTR(num_classes, Int)
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(iou_threshold, Float)
|
|
.OP_END_FACTORY_REG(FSRDetectionOutput)
|
|
|
|
/**
|
|
*@brief Returns detection result . \n
|
|
|
|
*@par Inputs:
|
|
* Four inputs, including:
|
|
*@li bbox_delta: An ND tensor of type floa16 or float32, specifying the box loc predictions, used as the input of operator SSDDetectionOutput.
|
|
*@li score: An ND tensor of type floa16 or float32, specifying the box confidences data, used as the input of operator SSDDetectionOutput.
|
|
*@li anchors: An ND tensor of type floa16 or float32, output from operator PriorBoxD, used as the input of operator SSDDetectionOutput.
|
|
*@par Attributes:
|
|
*@li num_classes: An optional int32, specifying the number of classes to be predicted. Defaults to "2". The value must be greater than 1 and lesser than 1025.
|
|
*@li share_location: An optional bool, specify the shared location. Defaults to True
|
|
*@li background_label_id: An optional int32, specify the background label id. Must be 0
|
|
*@li iou_threshold: An optional float32, specify the nms threshold
|
|
*@li top_k: An optional int32, specify the topk value. Defaults to 200
|
|
*@li eta: An optional float32, specify the eta value. Defaults to 1.0
|
|
*@li variance_encoded_in_target: An optional bool, specify whether variance encoded in target or not. Defaults to False
|
|
*@li code_type: An optional int32, specify the code type. Defaults to 1(only supports 2). The corner is 1, center_size is 2, corner_size is 3
|
|
*@li keep_top_k: An optional int32, specify the topk value after nms. Defaults to -1
|
|
*@li confidence_threshold: An optional float32, specify the topk filter threshold. Only consider detections with confidence greater than the threshold
|
|
*@li kernel_name: An optional string, specifying the operator name. Defaults to "ssd_detection_output".
|
|
*@par Outputs:
|
|
*@li out_boxnum: A tensor of type int32, specifying the number of output boxes.
|
|
*@li y: A tensor of type float16 or float32 with shape [batch,keep_top_k, 8], describing the information of each output box.
|
|
* In output shape, 8 means (batchID, label(classID), score (class probability), xmin, ymin, xmax, ymax, null)
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(SSDDetectionOutput)
|
|
.INPUT(bbox_delta, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(score, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(anchors, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.OUTPUT(out_boxnum, TensorType({DT_INT32}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.ATTR(num_classes, Int, 2)
|
|
.ATTR(share_location, Bool, true)
|
|
.ATTR(background_label_id, Int, 0)
|
|
.ATTR(iou_threshold, Float, 0.3)
|
|
.ATTR(top_k, Int, 200)
|
|
.ATTR(eta, Float, 1.0)
|
|
.ATTR(variance_encoded_in_target, Bool, false)
|
|
.ATTR(code_type, Int, 1)
|
|
.ATTR(keep_top_k, Int, -1)
|
|
.ATTR(confidence_threshold, Float, 0.0)
|
|
.OP_END_FACTORY_REG(SSDDetectionOutput)
|
|
|
|
/**
|
|
*@brief Normalizes data. It is called Region on YOLO v2 and Yolo on YOLO v3 . \n
|
|
|
|
*@par Inputs:
|
|
*x: An NCHW tensor of type float16 or float32. The data is with shape (N, boxes*(coords+obj+classes), H, W),
|
|
where, "obj" indicates the confidence of an object, and only one confidence is supported. Boxes are arranged
|
|
as xx...xyy...yww...whh...hbb...bc0c0..c0c1c1...c1......cncn...cn . \n
|
|
|
|
*@par Attributes:
|
|
*@li boxes: A required int32, specifying the number of anchor boxes. Defaults to "5" for V2 or "3" for V3.
|
|
*@li coords: An int32, specifying the number of parameters required for locating an object. The value is fixed at "4", corresponding to (x,y,w,h).
|
|
*@li classes: An int32, specifying the number of prediction classes. Defaults to "80". The value range is [1, 1024].
|
|
*@li yolo_version: A string, specifying the YOLO version, either "V2" or "V3".Defaults to "V3"
|
|
*@li softmax: A bool, specifying whether to perform softmax, valid only when "yolo_version = V2". Defaults to "false".
|
|
*@li background: A bool, specifying the operation types of the obj and classes, used in conjunction with "softmax" and valid only when "yolo_version = V2". Defaults to "false".
|
|
*@li softmaxtree: A bool, Fixed to False, defined in Lite, but not used. Defaults to "false" . \n
|
|
|
|
*@par Outputs:
|
|
*@li coord_data: A float16 or float32 with shape [N, boxes*coords, ceilx(height*width*2+32, 32)/2],
|
|
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the coordinates of a detected box.
|
|
*@li obj_prob: A float16 or float32 with shape [N, ceilx(boxes*height*width *2+32, 32)/2],
|
|
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the confidence.
|
|
*@li classes_prob: A float16 or float32 with shape [N, classes, ceilx(boxes*height*width *2+32, 32)/2],
|
|
* where "ceil" indicates that a detected box is aligned upwards with the second parameter. Specifies the prediction classes . \n
|
|
|
|
*@attention Constraints:
|
|
*@li This operator applies to YOLO v2 and v3 networks.
|
|
*@li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput.
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(Yolo)
|
|
.INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.ATTR(boxes, Int, 3)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 80)
|
|
.ATTR(yolo_version, String, "V3")
|
|
.ATTR(softmax, Bool, false)
|
|
.ATTR(background, Bool, false)
|
|
.ATTR(softmaxtree, Bool, false)
|
|
.OP_END_FACTORY_REG(Yolo)
|
|
|
|
/**
|
|
*@brief Performs YOLO V2 detection . \n
|
|
|
|
*@par Inputs:
|
|
* Four inputs, including:
|
|
*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov3DetectionOutput.
|
|
* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
|
|
*@li img_info: A float16 or float32, describing the image information including the required image height and width
|
|
* and the actual image height and width.
|
|
*
|
|
*@par Attributes:
|
|
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
|
|
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
|
|
*@li coords: Specifies the number of coordinate parameters. Must be 4.
|
|
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
|
|
*@li relative: An optional bool. Defaults to and must be "true".
|
|
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering,
|
|
* which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
|
|
|
|
*@li post_nms_topn: An optional int32. This attribute is reserved.
|
|
*@li score_threshold: A required float, specifying the class score threshold for box filtering,
|
|
which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
|
|
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
|
|
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
|
|
*
|
|
*@par Outputs:
|
|
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
|
|
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
|
|
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
|
|
* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
|
|
*
|
|
*@attention Constraints:
|
|
*@li This operator applies only to the YOLO v2 network.
|
|
*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator.
|
|
*
|
|
*@see Yolo()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(YoloV2DetectionOutput)
|
|
.INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.REQUIRED_ATTR(biases, ListFloat)
|
|
.ATTR(boxes, Int, 5)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 20)
|
|
.ATTR(relative, Bool, true)
|
|
.ATTR(obj_threshold, Float, 0.5)
|
|
.ATTR(post_nms_topn, Int, 512)
|
|
.ATTR(score_threshold, Float, 0.5)
|
|
.ATTR(iou_threshold, Float, 0.45)
|
|
.ATTR(pre_nms_topn, Int, 512)
|
|
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(box_out_num, TensorType({DT_INT32}))
|
|
.OP_END_FACTORY_REG(YoloV2DetectionOutput)
|
|
|
|
/**
|
|
*@brief Performs YOLO V2 detection . \n
|
|
|
|
*@par Inputs:
|
|
*Six inputs, including:
|
|
*@li The outputs of operator Yolo at the preceding layer (that is, one Yolo operator on YOLO v2) are used as the inputs of operator Yolov2DetectionOutput.
|
|
* Each Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
|
|
*@li imginfo: A float16, describing the image information including the required image height and width
|
|
* and the actual image height and width.
|
|
*@li windex: A windex tensor with shape [height, weight]. Has the same type as the inputs.
|
|
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed.
|
|
|
|
*@li hindex: A hindex tensor with shape [height, weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]].
|
|
|
|
*
|
|
*@par Attributes:
|
|
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
|
|
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
|
|
*@li coords: Specifies the number of coordinate parameters. Must be 4.
|
|
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 20].
|
|
*@li relative: An optional bool. Defaults to and must be "true".
|
|
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
|
|
*@li post_nms_topn: An optional int32. This attribute is reserved.
|
|
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
|
|
|
|
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
|
|
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
|
|
*
|
|
*@par Outputs:
|
|
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn]. describing the information of each output box,
|
|
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
|
|
*@li boxoutnum: A tensor of type int32 with shape [batch,8,1,1], specifying the number of output boxes. It means only the first one of the 8 numbers is valid,
|
|
* the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
|
|
*
|
|
*@attention Constraints:
|
|
*@li This operator applies only to the YOLO v2 network.
|
|
*@li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \n
|
|
|
|
*@see Yolo()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV2DetectionOutput instead.
|
|
*/
|
|
REG_OP(YoloV2DetectionOutputD)
|
|
.INPUT(coord_data, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.REQUIRED_ATTR(biases, ListFloat)
|
|
.ATTR(boxes, Int, 5)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 20)
|
|
.ATTR(relative, Bool, true)
|
|
.ATTR(obj_threshold, Float, 0.5)
|
|
.ATTR(post_nms_topn, Int, 512)
|
|
.ATTR(score_threshold, Float, 0.5)
|
|
.ATTR(iou_threshold, Float, 0.45)
|
|
.ATTR(pre_nms_topn, Int, 512)
|
|
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(box_out_num, TensorType({DT_INT32}))
|
|
.OP_END_FACTORY_REG(YoloV2DetectionOutputD)
|
|
|
|
/**
|
|
*@brief Performs YOLO V3 detection . \n
|
|
|
|
*@par Inputs:
|
|
*Ten inputs, including:
|
|
*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class".
|
|
* There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
|
|
*@li img_info: A float16 or float32, describing the image information including the required image height and width
|
|
* and the actual image height and width.
|
|
|
|
*@par Attributes:
|
|
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
|
|
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
|
|
*@li coords: Specifies the number of coordinate parameters. Must be 4.
|
|
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
|
|
*@li relative: An optional bool. Defaults to and must be "true".
|
|
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0] . \n
|
|
|
|
*@li post_nms_topn: An optional int32. This attribute is reserved.
|
|
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0] . \n
|
|
|
|
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
|
|
|
|
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
|
|
|
|
*@par Outputs:
|
|
*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
|
|
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
|
|
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
|
|
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
|
|
|
|
*@attention Constraints:
|
|
*@li This operator applies only to the YOLO v3 network.
|
|
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \n
|
|
|
|
*@see Yolo()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(YoloV3DetectionOutput)
|
|
.INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.REQUIRED_ATTR(biases_low, ListFloat)
|
|
.REQUIRED_ATTR(biases_mid, ListFloat)
|
|
.REQUIRED_ATTR(biases_high, ListFloat)
|
|
.ATTR(boxes, Int, 3)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 80)
|
|
.ATTR(relative, Bool, true)
|
|
.ATTR(obj_threshold, Float, 0.5)
|
|
.ATTR(post_nms_topn, Int, 512)
|
|
.ATTR(score_threshold, Float, 0.5)
|
|
.ATTR(iou_threshold, Float, 0.45)
|
|
.ATTR(pre_nms_topn, Int, 512)
|
|
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(box_out_num, TensorType({DT_INT32}))
|
|
.OP_END_FACTORY_REG(YoloV3DetectionOutput)
|
|
|
|
/**
|
|
*@brief Performs YOLO V3 detection . \n
|
|
|
|
*@par Inputs:
|
|
*16 Input, including:
|
|
*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
|
|
* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
|
|
*@li imginfo: A float16, describing the image information including the required image height and width
|
|
* and the actual image height and width.
|
|
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
|
|
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)] is formed for the three Yolo outputs, respectively . \n
|
|
|
|
*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs.
|
|
* [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
|
|
s
|
|
*@par Attributes:
|
|
*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
|
|
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
|
|
*@li coords: Specifies the number of coordinate parameters. Must be 4.
|
|
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
|
|
*@li relative: An optional bool. Defaults to and must be "true".
|
|
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
|
|
*@li post_nms_topn: An optional int32. This attribute is reserved.
|
|
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
|
|
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
|
|
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
|
|
|
|
*@par Outputs:
|
|
*@li boxout: A tensor of type float16 or float32 with shape [batch,6*post_nms_topn], describing the information of each output box.
|
|
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
|
|
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
|
|
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
|
|
|
|
*@attention Constraints:
|
|
*@li This operator applies only to the YOLO v3 network.
|
|
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
|
|
*@see Yolo()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutput instead.
|
|
*/
|
|
REG_OP(YoloV3DetectionOutputD)
|
|
.INPUT(coord_data_low, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(coord_data_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(coord_data_high, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(obj_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob_low, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob_mid, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(classes_prob_high, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(img_info, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(windex1, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(windex2, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(windex3, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(hindex1, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(hindex2, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(hindex3, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.REQUIRED_ATTR(biases_low, ListFloat)
|
|
.REQUIRED_ATTR(biases_mid, ListFloat)
|
|
.REQUIRED_ATTR(biases_high, ListFloat)
|
|
.ATTR(boxes, Int, 3)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 80)
|
|
.ATTR(relative, Bool, true)
|
|
.ATTR(obj_threshold, Float, 0.5)
|
|
.ATTR(post_nms_topn, Int, 512)
|
|
.ATTR(score_threshold, Float, 0.5)
|
|
.ATTR(iou_threshold, Float, 0.45)
|
|
.ATTR(pre_nms_topn, Int, 512)
|
|
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(box_out_num, TensorType({DT_INT32}))
|
|
.OP_END_FACTORY_REG(YoloV3DetectionOutputD)
|
|
|
|
/**
|
|
*@brief Performs YOLO V3 detection . \n
|
|
|
|
*@par Inputs:
|
|
*Ten inputs, including:
|
|
*@li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs. A Yolo operator has three outputs: "coords", "obj", and "class". \n
|
|
There are three Yolo operators at Yolov3DetectionOutput's preceding layer on Yolo v3. For details, see the description of operator Yolo.
|
|
*@li img_info: A float16 or float32, describing the image information including the required image height and width \n
|
|
* and the actual image height and width.
|
|
|
|
*@par Attributes:
|
|
*@li biases: A required float. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
|
|
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
|
|
*@li coords: Specifies the number of coordinate parameters. Must be 4.
|
|
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
|
|
*@li relative: An optional bool. Defaults to and must be "true".
|
|
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
|
|
|
|
*@li post_nms_topn: An optional int32. This attribute is reserved.
|
|
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
|
|
|
|
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].\n
|
|
|
|
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
|
|
|
|
*@par Outputs:
|
|
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
|
|
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
|
|
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
|
|
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
|
|
|
|
*@attention Constraints:\n
|
|
*@li This operator applies only to the YOLO v3 network.
|
|
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
|
|
|
|
*@see Yolo()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(YoloV3DetectionOutputV2)
|
|
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.REQUIRED_ATTR(biases, ListFloat)
|
|
.ATTR(boxes, Int, 3)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 80)
|
|
.ATTR(relative, Bool, true)
|
|
.ATTR(obj_threshold, Float, 0.5)
|
|
.ATTR(post_nms_topn, Int, 512)
|
|
.ATTR(score_threshold, Float, 0.5)
|
|
.ATTR(iou_threshold, Float, 0.45)
|
|
.ATTR(pre_nms_topn, Int, 512)
|
|
.ATTR(N, Int, 10)
|
|
.ATTR(resize_origin_img_to_net, Bool, false)
|
|
.ATTR(out_box_dim, Int, 3)
|
|
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(box_out_num, TensorType({DT_INT32}))
|
|
.OP_END_FACTORY_REG(YoloV3DetectionOutputV2)
|
|
|
|
/**
|
|
*@brief Performs YOLO V3 detection.
|
|
|
|
*@par Inputs:
|
|
*16 Input, including:
|
|
*@li The outputs of operator Yolo at the preceding layer (that is, three Yolo operators on YOLO v3) are used as the inputs of operator Yolov3DetectionOutput.
|
|
* A Yolo operator has three outputs: "coords", "obj", and "class". For details, see the description of operator Yolo.
|
|
*@li imginfo: A float16, describing the image information including the required image height and width
|
|
* and the actual image height and width.
|
|
*@li windex: A windex tensor with shape [height,weight]. Has the same type as the inputs.
|
|
* [[0,1,2...(weight-1)],[0,1,2...(w-1)]...[0,1,2...(weight-1)]] consisting of h groups of [0, 1, 2...(weight-1)]
|
|
* is formed for the three Yolo outputs, respectively .It's a dynamic input. \n
|
|
|
|
*@li hindex: A hindex tensor with shape [height,weight]. Has the same type as the inputs. [[0,0...0],[1,1...1],[2,2...2]...[height-1,height-1...,height-1]] is formed for the three Yolo outputs, respectively . \n
|
|
*@par Attributes:
|
|
*@li biases: A required float32. "biases = Number of Yolo operators at the preceding layer x 2 x boxes"
|
|
*@li boxes: A required int32, specifying the number of anchor boxes predicted for each Yolo layer.
|
|
*@li coords: Specifies the number of coordinate parameters. Must be 4.
|
|
*@li classes: A required int32, specifying the number of classes to be predicted. The value range is [1, 80].
|
|
*@li relative: An optional bool. Defaults to and must be "true".
|
|
*@li obj_threshold: A required float, specifying the confidence threshold for box filtering, which is the output "obj" of operator Yolo). The value range is [0.0, 1.0].
|
|
*@li post_nms_topn: An optional int32. This attribute is reserved.
|
|
*@li score_threshold: A required float, specifying the class score threshold for box filtering, which is the output "class" of operator Yolo). The value range is [0.0, 1.0].
|
|
*@li iou_threshold: A required float, specifying the intersection-over-union (IOU) threshold for box filtering. The value range is [0.0, 1.0].
|
|
*@li pre_nms_topn: An optional int, specifying the number of boxes for non-maximum suppression (NMS). Defaults to "512".
|
|
*
|
|
*@par Outputs:
|
|
*@li boxout: A tensor of type float16 or float32 with shape [batch,6,post_nms_topn](out_box_dim == 3) or [batch, 6*post_nms_topn](out_box_dim == 2),
|
|
* describing the information of each output box.
|
|
* In output shape, 6 means x1, y1, x2, y2, score, label(class). Output by the number of box_out_num.
|
|
*@li boxoutnum: A tensor of type int32 with shape [batch,8], specifying the number of output boxes.
|
|
* The output shape means only the first one of the 8 numbers is valid, the number of valid boxes in each batch, the maximum number of valid boxes in each batch is 1024
|
|
*
|
|
*@attention Constraints:
|
|
*@li This operator applies only to the YOLO v3 network.
|
|
*@li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators.
|
|
*@see Yolo()
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
|
|
* @par Restrictions:
|
|
* Warning: THIS FUNCTION IS DEPRECATED. Please use YoloV3DetectionOutputV2 instead.
|
|
*/
|
|
REG_OP(YoloV3DetectionOutputV2D)
|
|
.DYNAMIC_INPUT(x, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.DYNAMIC_INPUT(windex, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.DYNAMIC_INPUT(hindex, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.REQUIRED_ATTR(biases, ListFloat)
|
|
.ATTR(boxes, Int, 3)
|
|
.ATTR(coords, Int, 4)
|
|
.ATTR(classes, Int, 80)
|
|
.ATTR(relative, Bool, true)
|
|
.ATTR(obj_threshold, Float, 0.5)
|
|
.ATTR(post_nms_topn, Int, 512)
|
|
.ATTR(score_threshold, Float, 0.5)
|
|
.ATTR(iou_threshold, Float, 0.45)
|
|
.ATTR(pre_nms_topn, Int, 512)
|
|
.ATTR(N, Int, 10)
|
|
.ATTR(resize_origin_img_to_net, Bool, false)
|
|
.ATTR(out_box_dim, Int, 3)
|
|
.OUTPUT(box_out, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(box_out_num, TensorType({DT_INT32}))
|
|
.OP_END_FACTORY_REG(YoloV3DetectionOutputV2D)
|
|
|
|
/**
|
|
*@brief Spatial Pyramid Pooling, multi-level pooling.
|
|
* Pooling out(n, sigma(c*2^i*2^i)) tensor, i in range[0,pyramid_height) . \n
|
|
|
|
*@par Inputs:
|
|
*x: An NCHW tensor, support float16 or float32 type . \n
|
|
|
|
*@par Attributes:
|
|
* @li pyramid_height: An required int32.
|
|
* Multi-level pooling out from 2^0 to 2^(pyramid_height-1).
|
|
* @li pool_method: An optional int32, pooling method: 0-MAX, 1-AVE.
|
|
* Defaults to "0" . \n
|
|
|
|
*@par Outputs:
|
|
*y: A NCHW tensor, support float16 or float32 type . \n
|
|
|
|
*@attention Constraints:
|
|
* @li pyramid_height: pyramid_heigjt should be in range [0,7).
|
|
* Pooling paramter should statisfied with caffe pooling param(pad<kernel).
|
|
* @li feature_size:input feture map h and w should be [1, 510] . \n
|
|
|
|
*@par Third-party framework compatibility
|
|
* Compatible with the Caffe operator SPP.
|
|
*/
|
|
REG_OP(SPP)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.REQUIRED_ATTR(pyramid_height, Int)
|
|
.ATTR(pool_method, Int, 0)
|
|
.OP_END_FACTORY_REG(SPP)
|
|
|
|
/**
|
|
*@brief Performs Region of Interest (ROI) Pooling . \n
|
|
|
|
*@par Inputs:
|
|
* Three inputs, including:
|
|
*@li x: An NC1HWC0 tensor of type float16 or float32, describing the feature
|
|
* map.
|
|
*@li rois: A tensor of type float16 or float32, with shape
|
|
* [batch, 5, roi_max_num], describing the RIOs.
|
|
*@li roi_actual_num: A optional tensor of type int32, with shape [batch, 8], specifying
|
|
* the number of ROIs per batch . \n
|
|
|
|
*@par Attributes:
|
|
*@li pooled_h: A required int32, specifying the pooled H. Must be greater
|
|
* than 0.
|
|
*@li pooled_w: A required int32, specifying the pooled W. Must be greater
|
|
* than 0.
|
|
*@li spatial_scale_h: An required scaling factor for mapping the input
|
|
* coordinates of height to the ROI coordinates.
|
|
*@li spatial_scale_w: An required scaling factor for mapping the input
|
|
* coordinates of width to the ROI coordinates . \n
|
|
|
|
*@par Outputs:
|
|
*y: An NC1HWC0 tensor of type float16 or float32, describing the result
|
|
* feature map . \n
|
|
|
|
*@attention Constraints:
|
|
*@li For the feature map input:
|
|
(1) If pooled_h = pooled_w = 2, the feature map size must not exceed 50.
|
|
(2) If pooled_h = pooled_w = 3, the feature map size must not exceed 60.
|
|
(3) If pooled_h = pooled_w = 4, the feature map size must not exceed 70.
|
|
(4) If pooled_h = pooled_w = 5, the feature map size must not exceed 70.
|
|
(5) If pooled_h = pooled_w = 6, the feature map size must not exceed 80.
|
|
(6) If pooled_h = pooled_w = 7, the feature map size must not exceed 80.
|
|
(7) If pooled_h = pooled_w = 8, the feature map size must not exceed 80.
|
|
(8) If pooled_h = pooled_w = 9, the feature map size must not exceed 70.
|
|
(9) If pooled_h = pooled_w = 10, the feature map size must not exceed 70.
|
|
(10) If pooled_h = pooled_w = 11, the feature map size must not exceed 70.
|
|
(11) If pooled_h = pooled_w = 12, the feature map size must not exceed 70.
|
|
(12) If pooled_h = pooled_w = 13, the feature map size must not exceed 70.
|
|
(13) If pooled_h = pooled_w = 14, the feature map size must not exceed 70.
|
|
(14) If pooled_h = pooled_w = 15, the feature map size must not exceed 70.
|
|
(15) If pooled_h = pooled_w = 16, the feature map size must not exceed 70.
|
|
(16) If pooled_h = pooled_w = 17, the feature map size must not exceed 50.
|
|
(17) If pooled_h = pooled_w = 18, the feature map size must not exceed 40.
|
|
(18) If pooled_h = pooled_w = 19, the feature map size must not exceed 40.
|
|
(19) If pooled_h = pooled_w = 20, the feature map size must not exceed 40.
|
|
*@par Third-party framework compatibility
|
|
* It is a custom operator. It has no corresponding operator in Caffe.
|
|
*/
|
|
REG_OP(ROIPooling)
|
|
.INPUT(x, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.INPUT(rois, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.OPTIONAL_INPUT(roi_actual_num, TensorType({DT_INT32}))
|
|
.REQUIRED_ATTR(pooled_h, Int)
|
|
.REQUIRED_ATTR(pooled_w, Int)
|
|
.REQUIRED_ATTR(spatial_scale_h, Float)
|
|
.REQUIRED_ATTR(spatial_scale_w, Float)
|
|
.OUTPUT(y, TensorType({DT_FLOAT, DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(ROIPooling)
|
|
|
|
/**
|
|
*@brief Computes decode bbox function.
|
|
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li box_predictions: A Tensor. Must be float16.
|
|
* @li anchors: A Tensor. Must have the same type as box_predictions.
|
|
|
|
*@par Attributes:
|
|
* @ decode_clip: required, float, threahold of decode process.
|
|
|
|
*@par Outputs:
|
|
* @ decoded_boxes: A Tensor. Must have the same type as box_predictions.
|
|
* N-D with shape [N, 4].
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(DecodeBbox)
|
|
.INPUT(box_predictions, TensorType{DT_FLOAT16})
|
|
.INPUT(anchors, TensorType{DT_FLOAT16})
|
|
.OUTPUT(decoded_boxes, TensorType{DT_FLOAT16})
|
|
.REQUIRED_ATTR(decode_clip, Float)
|
|
.OP_END_FACTORY_REG(DecodeBbox)
|
|
|
|
/**
|
|
*@brief Computes ClipBoxes function . \n
|
|
|
|
*@par Inputs:
|
|
*@li boxes_input: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
*@li img_size: A Tensor. Must be int32. shape [H, W] . \n
|
|
|
|
*@par Outputs:
|
|
*boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4].
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(ClipBoxes)
|
|
.INPUT(boxes_input, TensorType({DT_FLOAT16}))
|
|
.INPUT(img_size, TensorType({DT_INT32}))
|
|
.OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(ClipBoxes)
|
|
|
|
/**
|
|
*@brief Computes ClipBoxesD function . \n
|
|
|
|
*@par Attributes:
|
|
*img_size: A Tensor of shape [H, W] . \n
|
|
|
|
*@par Inputs:
|
|
*boxes_input: A Tensor. Must be float16. N-D with shape [N, 4] . \n
|
|
|
|
*@par Outputs:
|
|
*boxes_output: A Tensor. Must have the same type as boxes_output. N-D with shape [N, 4] . \n
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(ClipBoxesD)
|
|
.INPUT(boxes_input, TensorType({DT_FLOAT16}))
|
|
.REQUIRED_ATTR(img_size, ListInt)
|
|
.OUTPUT(boxes_output, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(ClipBoxesD)
|
|
|
|
/**
|
|
*@brief Computes Fastrcnn Predictions function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li rois: A Tensor. Must be float16. N-D with shape [N*C, 4].
|
|
* @li score: A Tensor. Must be float16. N-D with shape [N, C+1].
|
|
*
|
|
*@par Attributes:
|
|
* @li nms_threshold: required, float, threahold of nms process.
|
|
* @li score_threshold: required, float, threahold of topk process.
|
|
* @li k: required, Int, threahold of topk process.
|
|
*@par Outputs:
|
|
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
*/
|
|
REG_OP(FastrcnnPredictions)
|
|
.INPUT(rois, TensorType({DT_FLOAT16}))
|
|
.INPUT(score, TensorType({DT_FLOAT16}))
|
|
.REQUIRED_ATTR(nms_threshold, Float)
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(k, Int)
|
|
.OUTPUT(sorted_rois, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(sorted_scores, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(sorted_classes, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(FastrcnnPredictions)
|
|
|
|
/**
|
|
*@brief Computes Fastrcnn RpnProposals function . \n
|
|
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
* @li img_size: A Tensor. Must be int32. shape [H, W] . \n
|
|
|
|
*@par Attributes:
|
|
* @li score_threshold: required, float, threahold of topk process.
|
|
* @li k: required, Int, threahold of topk process.
|
|
* @li min_size: required, float, threahold of nms process.
|
|
* @li nms_threshold: required, float, threahold of nms process.
|
|
* @li post_nms_num: required, float, threahold of nms process.
|
|
* @li score_filter: bool, mark of score_filter. Defaults to "true"
|
|
* @li box_filter: bool, mark of box_filter. Defaults to "true"
|
|
* @li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
|
|
|
|
*@par Outputs:
|
|
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1] . \n
|
|
|
|
* @par Third-party framework compatibility
|
|
* Compatible with the TensorFlow operator Unpack.
|
|
*/
|
|
REG_OP(RpnProposals)
|
|
.INPUT(rois, TensorType({DT_FLOAT16}))
|
|
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
|
|
.INPUT(img_size, TensorType({DT_INT32}))
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(k, Int)
|
|
.REQUIRED_ATTR(min_size, Float)
|
|
.REQUIRED_ATTR(nms_threshold, Float)
|
|
.REQUIRED_ATTR(post_nms_num, Int)
|
|
.ATTR(score_filter, Bool, true)
|
|
.ATTR(box_filter, Bool, true)
|
|
.ATTR(score_sigmoid, Bool, false)
|
|
.OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(RpnProposals)
|
|
|
|
/**
|
|
*@brief Computes Fastrcnn RpnProposalsD function . \n
|
|
|
|
*@par Inputs:
|
|
*@li rois: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
*@li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1] . \n
|
|
|
|
*@par Attributes:
|
|
*@li img_size: A Tensor size of image. Must be int32. shape [H, W].
|
|
*@li score_threshold: required, float, threahold of topk process.
|
|
*@li k: required, Int, threahold of topk process.
|
|
*@li min_size: required, float, threahold of nms process.
|
|
*@li nms_threshold: required, float, threahold of nms process.
|
|
*@li post_nms_num: required, float, threahold of nms process.
|
|
*@li score_filter: bool, mark of score_filter. Defaults to "true"
|
|
*@li box_filter: bool, mark of box_filter. Defaults to "true"
|
|
*@li score_sigmoid: bool, mark of score_sigmoid. Defaults to "false"
|
|
|
|
*@par Outputs:
|
|
*sorted_box: A Tensor of output. Must be float16. N-D with shape [N, 1] . \n
|
|
|
|
* @par Third-party framework compatibility
|
|
* Compatible with the pytorch operator RPNProposals . \n
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS DEPRECATED. Please use RpnProposals instead.
|
|
*/
|
|
REG_OP(RpnProposalsD)
|
|
.INPUT(rois, TensorType({DT_FLOAT16}))
|
|
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
|
|
.REQUIRED_ATTR(img_size, ListInt)
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(k, Int)
|
|
.REQUIRED_ATTR(min_size, Float)
|
|
.REQUIRED_ATTR(nms_threshold, Float)
|
|
.REQUIRED_ATTR(post_nms_num, Int)
|
|
.ATTR(score_filter, Bool, true)
|
|
.ATTR(box_filter, Bool, true)
|
|
.ATTR(score_sigmoid, Bool, false)
|
|
.OUTPUT(sorted_box, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(RpnProposalsD)
|
|
|
|
/**
|
|
*@brief Computes Score Filte Pre-Sort function.
|
|
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li rois: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
* @li cls_bg_prob: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
|
|
*@par Attributes:
|
|
* @li score_threshold: required, float, threahold of topk process.
|
|
* @li k: required, Int, threahold of topk process.
|
|
* @li score_filter: bool, mark of score_filter. Defaults to "true"
|
|
* @li core_max_num: int, max number of core. Defaults to "8"
|
|
*@par Outputs:
|
|
* @li sorted_proposal: A Tensor. Must be float16.
|
|
* N-D with shape [8*6002, 8].
|
|
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
|
|
*/
|
|
|
|
REG_OP(ScoreFiltePreSort)
|
|
.INPUT(rois, TensorType({DT_FLOAT16}))
|
|
.INPUT(cls_bg_prob, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(sorted_proposal, TensorType({ DT_FLOAT16}))
|
|
.OUTPUT(proposal_num, TensorType({ DT_UINT32}))
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(k, Int)
|
|
.ATTR(score_filter, Bool, true)
|
|
.ATTR(core_max_num, Int, 8)
|
|
.OP_END_FACTORY_REG(ScoreFiltePreSort)
|
|
|
|
/**
|
|
*@brief Computes Score Filte Pre-Sort function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li sorted_proposal: A Tensor. Must be float16.
|
|
* N-D with shape [8*6002, 8].
|
|
* @li proposal_num: A Tensor. Must be uint32. N-D with shape [8, 8].
|
|
*
|
|
*@par Attributes:
|
|
* @li min_size: required, float, threahold of nms process.
|
|
* @li score_threshold: required, float, threahold of topk process.
|
|
* @li k: required, Int, threahold of topk process.
|
|
* @li min_size: required, float, threahold of nms process.
|
|
* @li nms_threshold: required, float, threahold of nms process.
|
|
* @li post_nms_num: required, float, threahold of nms process.
|
|
* @li box_filter: bool, mark of box_filter. Defaults to "true"
|
|
* @li core_max_num: int, max number of core. Defaults to "8"
|
|
*@par Outputs:
|
|
* @li sorted_rois: A Tensor. Must be float16. N-D with shape [N, 4].
|
|
* @li sorted_scores: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
* @li sorted_classes: A Tensor. Must be float16. N-D with shape [N, 1].
|
|
*/
|
|
REG_OP(RpnProposalPostProcessing)
|
|
.INPUT(sorted_proposal, TensorType({DT_FLOAT16}))
|
|
.INPUT(proposal_num, TensorType({DT_UINT32}))
|
|
.OUTPUT(sorted_box, TensorType({ DT_FLOAT16}))
|
|
.REQUIRED_ATTR(img_size, ListInt)
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(k, Int)
|
|
.REQUIRED_ATTR(min_size, Float)
|
|
.REQUIRED_ATTR(nms_threshold, Float)
|
|
.REQUIRED_ATTR(post_nms_num, Int)
|
|
.ATTR(box_filter, Bool, true)
|
|
.ATTR(core_max_num, Int, 8)
|
|
.OP_END_FACTORY_REG(RpnProposalPostProcessing)
|
|
/**
|
|
*@brief Computes DecodeBoundariesTarget function.
|
|
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li boundary_predictions: A Tensor. Must be float16.
|
|
* @li anchors: A Tensor. Must be float16.
|
|
|
|
*@par Outputs:
|
|
* @ boundary_encoded: A Tensor. Must be float16.
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(DecodeBoundariesTarget)
|
|
.INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
|
|
.INPUT(anchors, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(DecodeBoundariesTarget)
|
|
|
|
/**
|
|
*@brief Computes DecodeCornerpointsTargetBG function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li keypoints_prediction: A Tensor. Must be float16.
|
|
* @li anchors: A Tensor. Must be float16.
|
|
*
|
|
*@par Outputs:
|
|
* @ keypoints_decoded: A Tensor. Must be float16.
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(DecodeCornerpointsTargetBG)
|
|
.INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
|
|
.INPUT(anchors, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(DecodeCornerpointsTargetBG);
|
|
|
|
/**
|
|
*@brief Computes DecodeCornerpointsTargetWrtCenterV1 function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li keypoints_prediction: A Tensor. Must be float16.
|
|
* @li anchors: A Tensor. Must be float16.
|
|
*
|
|
*@par Outputs:
|
|
* @ keypoints_decoded: A Tensor. Must be float16.
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(DecodeCornerpointsTargetWrtCenterV1)
|
|
.INPUT(keypoints_prediction, TensorType({DT_FLOAT16}))
|
|
.INPUT(anchors, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(keypoints_decoded, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(DecodeCornerpointsTargetWrtCenterV1)
|
|
|
|
/**
|
|
*@brief Computes DecodeWheelsTarget function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li boundary_predictions: A Tensor. Must be float16.
|
|
* @li anchors: A Tensor. Must be float16.
|
|
*
|
|
*@par Outputs:
|
|
* @ boundary_encoded: A Tensor. Must be float16.
|
|
|
|
*@par Restrictions:
|
|
*Warning: THIS FUNCTION IS EXPERIMENTAL. Please do not use.
|
|
*/
|
|
REG_OP(DecodeWheelsTarget)
|
|
.INPUT(boundary_predictions, TensorType({DT_FLOAT16}))
|
|
.INPUT(anchors, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(boundary_encoded, TensorType({DT_FLOAT16}))
|
|
.OP_END_FACTORY_REG(DecodeWheelsTarget)
|
|
|
|
/**
|
|
*@brief Computes nms for input boxes and score, support multiple batch and classes.
|
|
* will do clip to window, score filter, top_k, and nms
|
|
|
|
*@par Inputs:
|
|
* Four inputs, including:
|
|
*@li boxes: boxes, a 4D Tensor of type float16 with
|
|
* shape (batch, num_anchors, num_classes, 4). "batch" indicates the batch size of image,
|
|
* and "num_anchors" indicates num of boxes, and "num_classes" indicates classes of detect.
|
|
* and the value "4" refers to "x0", "x1", "y0", and "y1".
|
|
*@li scores: boxes, a 4D Tensor of type float16 with
|
|
* shape (batch, num_anchors, num_classes).
|
|
*@li clip_window: window size, a 2D Tensor of type float16 with
|
|
* shape (batch, 4). 4" refers to "anchor_x0", "anchor_x1", "anchor_y0", and "anchor_y1".
|
|
*@li num_valid_boxes: valid boxes number for each batch, a 1D Tensor of type int32 with
|
|
* shape (batch,) . \n
|
|
|
|
*@par Attributes:
|
|
*@li score_threshold: A required attribute of type float32, specifying the score filter iou iou_threshold.
|
|
*@li iou_threshold: A required attribute of type float32, specifying the nms iou iou_threshold.
|
|
*@li max_size_per_class: A required attribute of type int, specifying the nms output num per class.
|
|
*@li max_total_size: A required attribute of type int, specifying the the nms output num per batch.
|
|
*@li change_coordinate_frame: A optional attribute of type bool, whether to normalize coordinates after clipping.
|
|
*@li transpose_box: A optional attribute of type bool, whether inserted transpose before this op. must be "false" . \n
|
|
|
|
*@par Outputs:
|
|
*@li nmsed_boxes: A 3D Tensor of type float16 with shape (batch, max_total_size, 4),
|
|
* specifying the output nms boxes per batch.
|
|
*@li nmsed_scores: A 2D Tensor of type float16 with shape (batch, max_total_size),
|
|
* specifying the output nms score per batch.
|
|
*@li nmsed_classes: A 2D Tensor of type float16 with shape (batch, max_total_size),
|
|
* specifying the output nms class per batch.
|
|
*@li nmsed_num: A 1D Tensor of type int32 with shape (batch), specifying the valid num of nmsed_boxes . \n
|
|
|
|
*@attention Constraints:
|
|
* Only computation of float16 data is supported.
|
|
*/
|
|
REG_OP(BatchMultiClassNonMaxSuppression)
|
|
.INPUT(boxes, TensorType({DT_FLOAT16}))
|
|
.INPUT(scores, TensorType({DT_FLOAT16}))
|
|
.OPTIONAL_INPUT(clip_window, TensorType({DT_FLOAT16}))
|
|
.OPTIONAL_INPUT(num_valid_boxes, TensorType({DT_INT32}))
|
|
.OUTPUT(nmsed_boxes, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(nmsed_scores, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(nmsed_classes, TensorType({DT_FLOAT16}))
|
|
.OUTPUT(nmsed_num, TensorType({DT_INT32}))
|
|
.REQUIRED_ATTR(score_threshold, Float)
|
|
.REQUIRED_ATTR(iou_threshold, Float)
|
|
.REQUIRED_ATTR(max_size_per_class, Int)
|
|
.REQUIRED_ATTR(max_total_size, Int)
|
|
.ATTR(change_coordinate_frame, Bool, false)
|
|
.ATTR(transpose_box, Bool, false)
|
|
.OP_END_FACTORY_REG(BatchMultiClassNonMaxSuppression)
|
|
|
|
/**
|
|
* @brief To absolute the bounding box . \n
|
|
|
|
* @par Inputs:
|
|
* @li normalized_boxes: A 3D Tensor of type float16 or float32.
|
|
* @li shape_hw: A 1D Tensor of type int32 . \n
|
|
|
|
* @par Attributes:
|
|
* @li reversed_box: An optional bool, specifying the last two dims is "4,num" or
|
|
* "num,4", "true" for "4,num", "false" for "num,4". Defaults to "false" . \n
|
|
|
|
* @par Outputs:
|
|
* y: A Tensor. Has the same type and shape as "normalized_boxes" . \n
|
|
|
|
* @attention Constraints:
|
|
* "normalized_boxes"'s shape must be (batch,num,4) or (batch,4,num).
|
|
* "shape_hw"'s shape must be (4,)
|
|
*/
|
|
REG_OP(ToAbsoluteBBox)
|
|
.INPUT(normalized_boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(shape_hw, TensorType({DT_INT32}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.ATTR(reversed_box, Bool, false)
|
|
.OP_END_FACTORY_REG(ToAbsoluteBBox)
|
|
|
|
/**
|
|
*@brief Computes Normalize bbox function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li boxes: A Tensor. Must be float16 or float32.
|
|
* @li shape_hw: A Tensor. Must be int32.
|
|
*
|
|
*@par Attributes:
|
|
* reversed_box: optional, bool. Defaults to "False"
|
|
*
|
|
*@par Outputs:
|
|
* y: A Tensor. Must have the same type and shape as boxes.
|
|
*/
|
|
REG_OP(NormalizeBBox)
|
|
.INPUT(boxes, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.INPUT(shape_hw, TensorType({DT_INT32}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16, DT_FLOAT}))
|
|
.ATTR(reversed_box, Bool, false)
|
|
.OP_END_FACTORY_REG(NormalizeBBox)
|
|
|
|
/**
|
|
*@brief Computes decode bboxv2 function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* @li boxes: A Tensor. Must be float16 or float32.
|
|
* @li anchors: A Tensor. Must be int32.
|
|
*
|
|
*@par Attributes:
|
|
* @li scales: optional, listfloat, .
|
|
* @li decode_clip: optional, float, threahold of decode process.
|
|
* @li reversed_boxes: optional, bool,.
|
|
*
|
|
*@par Outputs:
|
|
* y: A Tensor. Must have the same type as box_predictions.
|
|
*/
|
|
REG_OP(DecodeBboxV2)
|
|
.INPUT(boxes, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.INPUT(anchors, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.OUTPUT(y, TensorType({DT_FLOAT16,DT_FLOAT}))
|
|
.ATTR(scales, ListFloat, {1.0, 1.0, 1.0, 1.0})
|
|
.ATTR(decode_clip, Float, 0.0)
|
|
.ATTR(reversed_box, Bool, false)
|
|
.OP_END_FACTORY_REG(DecodeBboxV2)
|
|
|
|
/**
|
|
*@brief Computes sort function.
|
|
*
|
|
*@par Inputs:
|
|
*Inputs include:
|
|
* x: A Tensor. Must be float16 or float32.
|
|
*
|
|
*@par Attributes:
|
|
* @li axis: optional, int.
|
|
* @li descending: optional,bool.
|
|
*
|
|
*@par Outputs:
|
|
* @li y1: A Tensor. Must have the same type as x.
|
|
* @li y2: A Tensor. Indices of y1 in x.Dtype must be int32.
|
|
*/
|
|
REG_OP(Sort)
|
|
.INPUT(x, TensorType({ DT_FLOAT16 }))
|
|
.OUTPUT(y1, TensorType({ DT_FLOAT16 }))
|
|
.OUTPUT(y2, TensorType({ DT_INT32 }))
|
|
.ATTR(axis, Int, -1)
|
|
.ATTR(descending, Bool, false)
|
|
.OP_END_FACTORY_REG(Sort)
|
|
|
|
} // namespace ge
|
|
|
|
#endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
|