/**
* Copyright 2019 - 2020 Huawei Technologies Co . , Ltd
*
* Licensed under the Apache License , Version 2.0 ( the " License " ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an " AS IS " BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
*/
/*!
* \ file nn_detect_ops . h
* \ brief
*/
# ifndef OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
# define OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_
# include "graph/operator_reg.h"
# include "graph/operator.h"
namespace ge {
/**
* @ brief Generates bounding boxes based on " rois " and " deltas " .
* It is a customized FasterRcnn operator . \ n
* @ par Inputs :
* Two inputs , including :
* @ li rois : Region of interests ( ROIs ) generated by the region proposal
* network ( RPN ) . A 2 D Tensor of type float32 or float16 with shape ( N , 4 ) .
* " N " indicates the number of ROIs , and the value " 4 " refers to " x0 " , " x1 " ,
* " y0 " , and " y1 " .
* @ li deltas : Absolute variation between the ROIs generated by the RPN and
* ground truth boxes . A 2 D Tensor of type float32 or float16 with shape ( N , 4 ) .
* " N " indicates the number of errors , and 4 indicates " dx " , " dy " , " dw " , and " dh " . \ n
* @ par Attributes :
* @ li means : An index of type int . Defaults to [ 0 , 0 , 0 , 0 ] .
* " deltas " = " deltas " x " stds " + " means " .
* @ li stds : An index of type int . Defaults to [ 1.0 , 1.0 , 1.0 , 1.0 ] .
* " deltas " = " deltas " x " stds " + " means " .
* @ li max_shape : Shape [ h , w ] , specifying the size of the image transferred to
* the network . Used to ensure that the bbox shape after conversion does not
* exceed " max_shape " .
* @ li wh_ratio_clip : Defaults to " 16/1000 " . The values of " dw " and " dh " fall
* within ( - wh_ratio_clip , wh_ratio_clip ) . \ n
* @ par Outputs :
* bboxes : Bboxes generated based on " rois " and " deltas " . Have the same format
* and type as " rois " .
*/
REG_OP ( BoundingBoxDecode )
. INPUT ( rois , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( deltas , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( bboxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( means , ListFloat , { 0.0 , 0.0 , 0.0 , 0.0 } )
. ATTR ( stds , ListFloat , { 1.0 , 1.0 , 1.0 , 1.0 } )
. REQUIRED_ATTR ( max_shape , ListInt )
. ATTR ( wh_ratio_clip , Float , 0.016 )
. OP_END_FACTORY_REG ( BoundingBoxDecode )
/**
* @ brief Computes the coordinate variations between bboxes and ground truth
* boxes . It is a customized FasterRcnn operator . \ n
* @ par Inputs :
* Two inputs , including :
* @ li anchor_box : Anchor boxes . A 2 D Tensor of float32 with shape ( N , 4 ) .
* " N " indicates the number of bounding boxes , and the value " 4 " refers to
* " x0 " , " x1 " , " y0 " , and " y1 " .
* @ li ground_truth_box : Ground truth boxes . A 2 D Tensor of float32 with
* shape ( N , 4 ) . " N " indicates the number of bounding boxes , and the value " 4 "
* refers to " x0 " , " x1 " , " y0 " , and " y1 " . \ n
* @ par Attributes :
* @ li means : An index of type int . Defaults to [ 0 , 0 , 0 , 0 ] .
* " deltas " = " deltas " x " stds " + " means " .
* @ li stds : An index of type int . Defaults to [ 1.0 , 1.0 , 1.0 , 1.0 ] .
* " deltas " = " deltas " x " stds " + " means " . \ n
* @ par Outputs :
* delats : A 2 D Tensor of type float32 with shape ( N , 4 ) , specifying the variations between all anchor boxes and ground truth boxes .
*/
REG_OP ( BoundingBoxEncode )
. INPUT ( anchor_box , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( ground_truth_box , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( delats , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( means , ListFloat , { 0.0 , 0.0 , 0.0 , 0.0 } )
. ATTR ( stds , ListFloat , { 1.0 , 1.0 , 1.0 , 1.0 } )
. OP_END_FACTORY_REG ( BoundingBoxEncode )
/**
* @ brief Judges whether the bounding box is valid . It is a customized
* FasterRcnn operator . \ n
* @ par Inputs :
* Two inputs , including :
* @ li bbox_tensor : Bounding box . A 2 D Tensor of type float16 with shape ( N , 4 ) .
* " N " indicates the number of bounding boxes , the value " 4 " indicates " x0 " ,
* " x1 " , " y0 " , and " y1 " .
* @ li img_metas : Valid boundary value of the image . A 1 D Tensor of type float16
* with shape ( 16 , )
* @ par Outputs :
* valid_tensor : A bool with shape ( N , 1 ) , specifying whether an input anchor is
* in an image . " 1 " indicates valid , while " 0 " indicates invalid . \ n
* @ attention Constraints :
* 16 " img_metas " are input . The first three numbers ( height , width , ratio ) are
* valid , specifying the valid boundary ( heights x ratio , weights x ratio ) .
*/
REG_OP ( CheckValid )
. INPUT ( bbox_tensor , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( img_metas , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( valid_tensor , TensorType ( { DT_INT8 } ) )
. OP_END_FACTORY_REG ( CheckValid )
/**
* @ brief Computes the intersection over union ( iou ) or the intersection over
* foreground ( iof ) based on the ground - truth and predicted regions . \ n
* @ par Inputs :
* Two inputs , including :
* @ li bboxes : Bounding boxes , a 2 D Tensor of type float16 or float32 with
* shape ( N , 4 ) . " N " indicates the number of bounding boxes , and the value
* " 4 " refers to " x0 " , " x1 " , " y0 " , and " y1 " .
* @ li gtboxes : Ground - truth boxes , a 2 D Tensor of type float16 or float32
* with shape ( M , 4 ) . " M " indicates the number of ground truth boxes , and
* the value " 4 " refers to " x0 " , " x1 " , " y0 " , and " y1 " . \ n
* @ par Attributes :
* mode : Computation mode , a character string with the value range of [ iou , iof ] . \ n
* @ par Outputs :
* overlap : A 2 D Tensor of type float16 or float32 with shape [ M , N ] , specifying
* the IoU or IoF ratio . \ n
* @ attention Constraints :
* Only computation of float16 data is supported . To avoid overflow , the input
* length and width are scaled by 0.2 internally .
*/
REG_OP ( Iou )
. INPUT ( bboxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( gtboxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( overlap , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( mode , String , " iou " )
. OP_END_FACTORY_REG ( Iou )
/**
* @ brief Performs the backpropagation of ROIAlign for training scenarios . \ n
* @ par Inputs :
* Three inputs , including :
* @ li ydiff : A 5 HD gradient input of type float32 .
* @ li rois : ROI position . A 2 D Tensor of float32 with shape ( N , 5 ) . " N " indicates the number of ROIs ,
the value " 5 " indicates the indexes of images where the ROIs are located , " x0 " , " x1 " , " y0 " , and " y1 " .
* @ li rois_n : An optional input , specifying the number of valid ROIs . This parameter is reserved . \ n
* @ par Attributes :
* @ li xdiff_shape : A required list of 4 ints , obtained based on the shape of " features " of ROIAlign .
* @ li pooled_width : A required attribute of type int , specifying the W dimension .
* @ li pooled_height : A required attribute of type int , specifying the H dimension .
* @ li spatial_scale : A required attribute of type float , specifying the scaling ratio of " features " to the original image .
* @ li sample_num : An optional attribute of type int , specifying the horizontal and vertical
sampling frequency of each output . If this attribute is set to " 0 " , the sampling frequency is
equal to the rounded up value of " rois " , which is a floating point number . Defaults to " 2 " . \ n
* @ par Outputs :
* xdiff : Gradient added to input " features " . Has the same 5 HD shape as input " features " .
*/
REG_OP ( ROIAlignGrad )
. INPUT ( ydiff , TensorType ( { DT_FLOAT } ) )
. INPUT ( rois , TensorType ( { DT_FLOAT } ) )
. OPTIONAL_INPUT ( rois_n , TensorType ( { DT_INT32 } ) )
. OUTPUT ( xdiff , TensorType ( { DT_FLOAT } ) )
. REQUIRED_ATTR ( xdiff_shape , ListInt )
. REQUIRED_ATTR ( pooled_width , Int )
. REQUIRED_ATTR ( pooled_height , Int )
. REQUIRED_ATTR ( spatial_scale , Float )
. ATTR ( sample_num , Int , 2 )
. OP_END_FACTORY_REG ( ROIAlignGrad )
/**
* @ brief Obtains the ROI feature matrix from the feature map . It is a customized FasterRcnn operator . \ n
* @ par Inputs :
* Three inputs , including :
* @ li features : A 5 HD Tensor of type float32 or float16 .
* @ li rois : ROI position . A 2 D Tensor of float32 or float16 with shape ( N , 5 ) . " N " indicates the number of ROIs ,
the value " 5 " indicates the indexes of images where the ROIs are located ,
* " x0 " , " y0 " , " x1 " , and " y1 " .
* @ li rois_n : An optional input of type int32 , specifying the number of valid ROIs . This parameter is reserved . \ n
* @ par Attributes :
* @ li spatial_scale : A required attribute of type float32 , specifying the scaling ratio of " features " to the original image .
* @ li pooled_height : A required attribute of type int32 , specifying the H dimension .
* @ li pooled_width : A required attribute of type int32 , specifying the W dimension .
* @ li sample_num : An optional attribute of type int32 , specifying the horizontal and vertical sampling frequency of each output . If this attribute is set to " 0 " ,
* the sampling frequency is equal to the rounded up value of " rois " , which is a floating point number . Defaults to " 2 " .
* @ li roi_end_mode : An optional attribute of type int32 . Defaults to " 1 " . \ n
* @ par Outputs :
* output : Outputs the feature sample of each ROI position . The format is 5 HD Tensor of type float32 or float16 .
The axis N is the number of input ROIs . Axes H , W , and C are consistent
* with the values of " pooled_height " ,
* " pooled_width " , and " features " , respectively .
*/
REG_OP ( ROIAlign )
. INPUT ( features , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( rois , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OPTIONAL_INPUT ( rois_n , TensorType ( { DT_INT32 } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( spatial_scale , Float )
. REQUIRED_ATTR ( pooled_height , Int )
. REQUIRED_ATTR ( pooled_width , Int )
. ATTR ( sample_num , Int , 2 )
. ATTR ( roi_end_mode , Int , 1 )
. OP_END_FACTORY_REG ( ROIAlign )
/**
* @ brief Performs SSD prior box detection . \ n
* @ par Inputs :
* Two inputs , including :
* @ li x : An NC1HWC0 or NCHW feature map of type is float32 or float16 .
* @ li img : source image . Has the same type and format as " x " . \ n
* @ par Attributes :
* @ li min_size : A required float32 , specifying the minimum edge length of a square prior box .
* @ li max_size : A required float32 , specifying the maximum edge length of a square prior box : sqrt ( min_size * max_size )
* @ li aspect_ratio : An required float32 , specifying the aspect ratio for generated rectangle boxes . The height
is min_size / sqrt ( aspect_ratio ) , the width is min_size * sqrt ( aspect_ratio ) . Defaults to " 1.0 " .
* @ li img_h : An optional int32 , specifying the source image height . Defaults to " 0 " .
* @ li img_w : An optional int32 , specifying the source image width . Defaults to " 0 " .
* @ li step_h : An optional float32 , specifying the height step for mapping the center point from the feature map to the source image . Defaults to " 0.0 " .
* @ li step_w : An optional float32 , specifying the width step for mapping the center point from the feature map to the source image . Defaults to " 0.0 " .
* @ li flip : An optional bool . If " True " , " aspect_ratio " will be flipped . Defaults to " True " .
* @ li clip : An optional bool . If " True " , a prior box is clipped to within [ 0 , 1 ] . Defaults to " False " .
* @ li offset : An optional float32 , specifying the offset . Defaults to " 0.5 " .
* @ li variance : An optional float32 , specifying the variance of a prior box , either one or four variances . Defaults to " 0.1 " ( one value ) . \ n
* @ par Outputs :
* y : An ND tensor of type float32 or float16 , specifying the prior box information , including its coordinates and variance . \ n
* @ attention Constraints :
* This operator applies only to SSD networks .
* @ see SSDDetectionOutput ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( PriorBox )
. INPUT ( x , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( min_size , ListFloat )
. REQUIRED_ATTR ( max_size , ListFloat )
. REQUIRED_ATTR ( aspect_ratio , ListFloat )
. ATTR ( img_h , Int , 0 )
. ATTR ( img_w , Int , 0 )
. ATTR ( step_h , Float , 0.0 )
. ATTR ( step_w , Float , 0.0 )
. ATTR ( flip , Bool , true )
. ATTR ( clip , Bool , false )
. ATTR ( offset , Float , 0.5 )
. ATTR ( variance , ListFloat , { 0.1 } )
. OP_END_FACTORY_REG ( PriorBox ) ;
/**
* @ brief Performs SSD prior box detection , with four additional matrices and the " aspect_ratio " attribute deleted compared to PriorBox . \ n
* @ par Inputs :
* Six inputs , including :
* @ li x : An NC1HWC0 or NCHW feature map of type is float32 or float16 .
* @ li img : source image . Has the same type and format as " x " .
* @ li data_h : An NC1HWC0 or NCHW tensor of type float32 or float16 , specifying the matrix for indexing the feature map height .
* @ li data_w : An NC1HWC0 or NCHW tensor of type float32 or float16 , specifying the matrix for indexing the feature map width .
* @ li box_height : An NC1HWC0 or NCHW tensor of type float32 or float16 , specifying the height of each prior box .
* @ li box_width : An NC1HWC0 or NCHW tensor of type float32 or float16 , specifying the width of each prior box . \ n
* @ par Attributes :
* @ li min_size : A required float32 , specifying the minimum edge length of a square prior box .
* @ li max_size : A required float32 , specifying the maximum edge length of a square prior box : sqrt ( min_size * max_size )
* @ li img_h : An optional int32 , specifying the height of the source image .
* @ li img_w : An optional int32 , specifying the width of the source image .
* @ li step_h : An optional float32 , specifying the height step for mapping the center point from the feature map to the source image .
* @ li step_w : An optional float32 , specifying the width step for mapping the center point from the feature map to the source image .
* @ li flip : An optional bool . If " True " , " aspect_ratio " will be flipped . Defaults to " True " .
* @ li clip : An optional bool . If " True " , a prior box is clipped to within [ 0 , 1 ] . Defaults to " False " .
* @ li offset : An optional float32 , specifying the offset . Defaults to " 0.5 " .
* @ li variance : An optional float32 , specifying the variance of a prior box , either one or four variances . Defaults to " 0.1 " ( one value ) . \ n
* @ par Outputs :
* y : An ND tensor of type float32 or float16 , specifying the prior box information , including its coordinates and variance . \ n
* @ attention Constraints :
* This operator applies only to SSD networks .
* @ see SSDDetectionOutput ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
* @ par Restrictions :
* Warning : THIS FUNCTION IS DEPRECATED . Please use PriorBox instead .
*/
REG_OP ( PriorBoxD )
. INPUT ( x , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( data_h , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( data_w , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( box_height , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( box_width , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( min_size , ListFloat )
. REQUIRED_ATTR ( max_size , ListFloat )
. ATTR ( img_h , Int , 0 )
. ATTR ( img_w , Int , 0 )
. ATTR ( step_h , Float , 0.0 )
. ATTR ( step_w , Float , 0.0 )
. ATTR ( flip , Bool , true )
. ATTR ( clip , Bool , false )
. ATTR ( offset , Float , 0.5 )
. ATTR ( variance , ListFloat , { 0.1 } )
. OP_END_FACTORY_REG ( PriorBoxD ) ;
/**
* @ brief Performs SSD prior box detection , with four additional matrices and the " aspect_ratio " attribute deleted compared to PriorBox . \ n
* @ par Inputs :
* Six inputs , including :
* @ li x : An NC1HWC0 or NCHW feature map of type is float32 or float16 .
* @ li img : source image . Has the same type and format as " x " .
* @ li boxes : An ND tensor of type float32 or float16 , specifying the prior box information . Same as output y
* @ par Attributes :
* @ li min_size : A required float32 , specifying the minimum edge length of a square prior box .
* @ li max_size : A required float32 , specifying the maximum edge length of a square prior box : sqrt ( min_size * max_size )
* @ li img_h : An optional int32 , specifying the height of the source image .
* @ li img_w : An optional int32 , specifying the width of the source image .
* @ li step_h : An optional float32 , specifying the height step for mapping the center point from the feature map to the source image .
* @ li step_w : An optional float32 , specifying the width step for mapping the center point from the feature map to the source image .
* @ li flip : An optional bool . If " True " , " aspect_ratio " will be flipped . Defaults to " True " .
* @ li clip : An optional bool . If " True " , a prior box is clipped to within [ 0 , 1 ] . Defaults to " False " .
* @ li offset : An optional float32 , specifying the offset . Defaults to " 0.5 " .
* @ li variance : An optional float32 , specifying the variance of a prior box , either one or four variances . Defaults to " 0.1 " ( one value ) . \ n
* @ par Outputs :
* y : An ND tensor of type float32 or float16 , specifying the prior box information , including its coordinates and variance . \ n
* @ attention Constraints :
* This operator applies only to SSD networks .
* @ see SSDDetectionOutput ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
* @ par Restrictions :
* Warning : THIS FUNCTION IS DEPRECATED . Please use PriorBox instead .
*/
REG_OP ( PriorBoxDV2 )
. INPUT ( x , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( boxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( min_size , ListFloat )
. REQUIRED_ATTR ( max_size , ListFloat )
. ATTR ( img_h , Int , 0 )
. ATTR ( img_w , Int , 0 )
. ATTR ( step_h , Float , 0.0 )
. ATTR ( step_w , Float , 0.0 )
. ATTR ( flip , Bool , true )
. ATTR ( clip , Bool , false )
. ATTR ( offset , Float , 0.5 )
. ATTR ( variance , ListFloat , { 0.1 } )
. OP_END_FACTORY_REG ( PriorBoxDV2 ) ;
/**
* @ brief Performs Position Sensitive ROI Pooling . \ n
* @ par Inputs :
* Two inputs , including :
* @ li x : An NC1HWC0 tensor of type float16 or float32 , describing the feature
* map , dimension C1 must be equal to
* ( int ( output_dim + 15 ) / C0 ) ) * group_size * group_size .
* @ li rois : A tensor of type float16 or float32 , with shape
* [ batch , 5 , rois_num ] , describing the ROIs , each ROI consists of five
* elements : " batch_id " , " x1 " , " y1 " , " x2 " , and " y2 " , which " batch_id " indicates
* the index of the input feature map , " x1 " , " y1 " , " x2 " , or " y2 " must be
* greater than or equal to " 0.0 " . \ n
* @ par Attributes :
* @ li output_dim : A required int32 , specifying the number of output channels ,
* must be greater than 0.
* @ li group_size : A required int32 , specifying the number of groups to encode
* position - sensitive score maps , must be within the range ( 0 , 128 ) .
* @ li spatial_scale : A required float32 , scaling factor for mapping the input
* coordinates to the ROI coordinates . \ n
* @ par Outputs :
* y : An NC1HWC0 tensor of type float16 or float32 , describing the result
* feature map . \ n
* @ attention Constraints :
* HC1HWC0 : channel must be Group_size squared , rois_num is a multiple of 16
*/
REG_OP ( PSROIPooling )
. INPUT ( x , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( rois , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. REQUIRED_ATTR ( output_dim , Int )
. REQUIRED_ATTR ( group_size , Int )
. REQUIRED_ATTR ( spatial_scale , Float )
. OUTPUT ( y , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( PSROIPooling )
/**
* @ brief Returns detection result . \ n
* @ par Inputs :
* Four inputs , including :
* @ li rois : An NCHW tensor of type floa16 or float32 , output from operator proposal_d at the preceding layer , used as the input of operator FSRDetectionOutput .
* @ li bbox_delta : An NCHWC0 tensor of type floa16 or float32 , specifying the prediction offset , used to update the coordinates [ x1 , y1 , x2 , y2 ] of each ROI .
* @ li score : An NCHWC0 tensor of type floa16 or float32 , specifying the probability of each class . Class 0 is the background class .
* @ li im_info : An ND tensor of type float16 or float32 , specifying the Image information .
* @ li actual_rois_num : An optional NCHW tensor of type int32 , specifying the number of valid boxes per batch .
* @ par Attributes :
* @ li batch_rois : An optional int32 , specifying the number of images to be predicted . Defaults to " 1 " .
* @ li num_classes : An required int32 , specifying the number of classes to be predicted . The value must be greater than 0.
* @ li score_threshold : An required float32 , specifying the threshold for box filtering . The value range is [ 0.0 , 1.0 ] .
* @ li iou_threshold : An required float32 , specifying the confidence threshold for box filtering , which is the output " obj " of operator Region . The value range is ( 0.0 , 1.0 ) .
* @ par Outputs :
* @ li box : A tensor of type float16 or float32 for proposal of actual output , with output shape [ batch , numBoxes , 8 ] .
* 8 means [ x1 , y1 , x2 , y2 , score , label , batchID , NULL ] , the maximum value of numBoxes is 1024.
That is , take min ( the maximum number of input boxes , 1024 )
* @ li actual_bbox_num : A tensor of type int32 With shape [ bacth , num_classes ] , specifying the number of output boxes . \ n
* @ attention Constraints :
* @ li totalnum < max_rois_num * batch_rois .
* @ li " score " must be with shape ( total_num , ( num_classes + 15 ) //16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
* @ li " bbox_delta " must be with shape ( total_num , ( num_classes * 4 + 15 ) //16, 1, 1, 16), where "total_num" indicates the number of valid input boxes of all images.
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( FSRDetectionOutput )
. INPUT ( rois , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( bbox_delta , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( score , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( im_info , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. OPTIONAL_INPUT ( actual_rois_num , TensorType ( { DT_INT32 } ) )
. OUTPUT ( actual_bbox_num , TensorType ( { DT_INT32 } ) )
. OUTPUT ( box , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. ATTR ( batch_rois , Int , 1 )
. REQUIRED_ATTR ( num_classes , Int )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( iou_threshold , Float )
. OP_END_FACTORY_REG ( FSRDetectionOutput )
/**
* @ brief Returns detection result . \ n
* @ par Inputs :
* Four inputs , including :
* @ li bbox_delta : An ND tensor of type floa16 or float32 , specifying the box loc predictions , used as the input of operator SSDDetectionOutput .
* @ li score : An ND tensor of type floa16 or float32 , specifying the box confidences data , used as the input of operator SSDDetectionOutput .
* @ li anchors : An ND tensor of type floa16 or float32 , output from operator PriorBoxD , used as the input of operator SSDDetectionOutput .
* @ par Attributes :
* @ li num_classes : An optional int32 , specifying the number of classes to be predicted . Defaults to " 2 " . The value must be greater than 1 and lesser than 1025.
* @ li share_location : An optional bool , specify the shared location . Defaults to True
* @ li background_label_id : An optional int32 , specify the background label id . Must be 0
* @ li iou_threshold : An optional float32 , specify the nms threshold
* @ li top_k : An optional int32 , specify the topk value . Defaults to 200
* @ li eta : An optional float32 , specify the eta value . Defaults to 1.0
* @ li variance_encoded_in_target : An optional bool , specify whether variance encoded in target or not . Defaults to False
* @ li code_type : An optional int32 , specify the code type . Defaults to 1 ( only supports 2 ) . The corner is 1 , center_size is 2 , corner_size is 3
* @ li keep_top_k : An optional int32 , specify the topk value after nms . Defaults to - 1
* @ li confidence_threshold : An optional float32 , specify the topk filter threshold . Only consider detections with confidence greater than the threshold
* @ li kernel_name : An optional string , specifying the operator name . Defaults to " ssd_detection_output " .
* @ par Outputs :
* @ li out_boxnum : A tensor of type int32 , specifying the number of output boxes .
* @ li y : A tensor of type float16 or float32 with shape [ batch , keep_top_k , 8 ] , describing the information of each output box .
* In output shape , 8 means ( batchID , label ( classID ) , score ( class probability ) , xmin , ymin , xmax , ymax , null )
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( SSDDetectionOutput )
. INPUT ( bbox_delta , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( score , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( anchors , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. OUTPUT ( out_boxnum , TensorType ( { DT_INT32 } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. ATTR ( num_classes , Int , 2 )
. ATTR ( share_location , Bool , true )
. ATTR ( background_label_id , Int , 0 )
. ATTR ( iou_threshold , Float , 0.3 )
. ATTR ( top_k , Int , 200 )
. ATTR ( eta , Float , 1.0 )
. ATTR ( variance_encoded_in_target , Bool , false )
. ATTR ( code_type , Int , 1 )
. ATTR ( keep_top_k , Int , - 1 )
. ATTR ( confidence_threshold , Float , 0.0 )
. OP_END_FACTORY_REG ( SSDDetectionOutput )
/**
* @ brief Normalizes data . It is called Region on YOLO v2 and Yolo on YOLO v3 . \ n
* @ par Inputs :
* x : An NCHW tensor of type float16 or float32 . The data is with shape ( N , boxes * ( coords + obj + classes ) , H , W ) ,
where , " obj " indicates the confidence of an object , and only one confidence is supported . Boxes are arranged
as xx . . . xyy . . . yww . . . whh . . . hbb . . . bc0c0 . . c0c1c1 . . . c1 . . . . . . cncn . . . cn . \ n
* @ par Attributes :
* @ li boxes : A required int32 , specifying the number of anchor boxes . Defaults to " 5 " for V2 or " 3 " for V3 .
* @ li coords : An int32 , specifying the number of parameters required for locating an object . The value is fixed at " 4 " , corresponding to ( x , y , w , h ) .
* @ li classes : An int32 , specifying the number of prediction classes . Defaults to " 80 " . The value range is [ 1 , 1024 ] .
* @ li yolo_version : A string , specifying the YOLO version , either " V2 " or " V3 " . Defaults to " V3 "
* @ li softmax : A bool , specifying whether to perform softmax , valid only when " yolo_version = V2 " . Defaults to " false " .
* @ li background : A bool , specifying the operation types of the obj and classes , used in conjunction with " softmax " and valid only when " yolo_version = V2 " . Defaults to " false " .
* @ li softmaxtree : A bool , Fixed to False , defined in Lite , but not used . Defaults to " false " . \ n
* @ par Outputs :
* @ li coord_data : A float16 or float32 with shape [ N , boxes * coords , ceilx ( height * width * 2 + 32 , 32 ) / 2 ] ,
* where " ceil " indicates that a detected box is aligned upwards with the second parameter . Specifies the coordinates of a detected box .
* @ li obj_prob : A float16 or float32 with shape [ N , ceilx ( boxes * height * width * 2 + 32 , 32 ) / 2 ] ,
* where " ceil " indicates that a detected box is aligned upwards with the second parameter . Specifies the confidence .
* @ li classes_prob : A float16 or float32 with shape [ N , classes , ceilx ( boxes * height * width * 2 + 32 , 32 ) / 2 ] ,
* where " ceil " indicates that a detected box is aligned upwards with the second parameter . Specifies the prediction classes . \ n
* @ attention Constraints :
* @ li This operator applies to YOLO v2 and v3 networks .
* @ li The succeeding layer of the Yolo operator must be operator Yolov3DetectionOutput .
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( Yolo )
. INPUT ( x , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( coord_data , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( obj_prob , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( classes_prob , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( boxes , Int , 3 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 80 )
. ATTR ( yolo_version , String , " V3 " )
. ATTR ( softmax , Bool , false )
. ATTR ( background , Bool , false )
. ATTR ( softmaxtree , Bool , false )
. OP_END_FACTORY_REG ( Yolo )
/**
* @ brief Performs YOLO V2 detection . \ n
* @ par Inputs :
* Four inputs , including :
* @ li The outputs of operator Yolo at the preceding layer ( that is , one Yolo operator on YOLO v2 ) are used as the inputs of operator Yolov3DetectionOutput .
* Each Yolo operator has three outputs : " coords " , " obj " , and " class " . For details , see the description of operator Yolo .
* @ li img_info : A float16 or float32 , describing the image information including the required image height and width
* and the actual image height and width .
*
* @ par Attributes :
* @ li biases : A required float . " biases = Number of Yolo operators at the preceding layer x 2 x boxes "
* @ li boxes : A required int32 , specifying the number of anchor boxes predicted for each Yolo layer .
* @ li coords : Specifies the number of coordinate parameters . Must be 4.
* @ li classes : A required int32 , specifying the number of classes to be predicted . The value range is [ 1 , 20 ] .
* @ li relative : An optional bool . Defaults to and must be " true " .
* @ li obj_threshold : A required float , specifying the confidence threshold for box filtering ,
* which is the output " obj " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] . \ n
* @ li post_nms_topn : An optional int32 . This attribute is reserved .
* @ li score_threshold : A required float , specifying the class score threshold for box filtering ,
which is the output " class " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li iou_threshold : A required float , specifying the intersection - over - union ( IOU ) threshold for box filtering . The value range is [ 0.0 , 1.0 ] .
* @ li pre_nms_topn : An optional int , specifying the number of boxes for non - maximum suppression ( NMS ) . Defaults to " 512 " .
*
* @ par Outputs :
* @ li boxout : A tensor of type float16 or float32 with shape [ batch , 6 , post_nms_topn ] . describing the information of each output box ,
* In output shape , 6 means x1 , y1 , x2 , y2 , score , label ( class ) . Output by the number of box_out_num .
* @ li boxoutnum : A tensor of type int32 with shape [ batch , 8 , 1 , 1 ] , specifying the number of output boxes . It means only the first one of the 8 numbers is valid ,
* the number of valid boxes in each batch , the maximum number of valid boxes in each batch is 1024
*
* @ attention Constraints :
* @ li This operator applies only to the YOLO v2 network .
* @ li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator .
*
* @ see Yolo ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( YoloV2DetectionOutput )
. INPUT ( coord_data , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img_info , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( biases , ListFloat )
. ATTR ( boxes , Int , 5 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 20 )
. ATTR ( relative , Bool , true )
. ATTR ( obj_threshold , Float , 0.5 )
. ATTR ( post_nms_topn , Int , 512 )
. ATTR ( score_threshold , Float , 0.5 )
. ATTR ( iou_threshold , Float , 0.45 )
. ATTR ( pre_nms_topn , Int , 512 )
. OUTPUT ( box_out , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( box_out_num , TensorType ( { DT_INT32 } ) )
. OP_END_FACTORY_REG ( YoloV2DetectionOutput )
/**
* @ brief Performs YOLO V2 detection . \ n
* @ par Inputs :
* Six inputs , including :
* @ li The outputs of operator Yolo at the preceding layer ( that is , one Yolo operator on YOLO v2 ) are used as the inputs of operator Yolov2DetectionOutput .
* Each Yolo operator has three outputs : " coords " , " obj " , and " class " . For details , see the description of operator Yolo .
* @ li imginfo : A float16 , describing the image information including the required image height and width
* and the actual image height and width .
* @ li windex : A windex tensor with shape [ height , weight ] . Has the same type as the inputs .
* [ [ 0 , 1 , 2. . . ( weight - 1 ) ] , [ 0 , 1 , 2. . . ( w - 1 ) ] . . . [ 0 , 1 , 2. . . ( weight - 1 ) ] ] consisting of h groups of [ 0 , 1 , 2. . . ( weight - 1 ) ] is formed .
* @ li hindex : A hindex tensor with shape [ height , weight ] . Has the same type as the inputs . [ [ 0 , 0. . .0 ] , [ 1 , 1. . .1 ] , [ 2 , 2. . .2 ] . . . [ height - 1 , height - 1. . . , height - 1 ] ] .
*
* @ par Attributes :
* @ li biases : A required float . " biases = Number of Yolo operators at the preceding layer x 2 x boxes "
* @ li boxes : A required int32 , specifying the number of anchor boxes predicted for each Yolo layer .
* @ li coords : Specifies the number of coordinate parameters . Must be 4.
* @ li classes : A required int32 , specifying the number of classes to be predicted . The value range is [ 1 , 20 ] .
* @ li relative : An optional bool . Defaults to and must be " true " .
* @ li obj_threshold : A required float , specifying the confidence threshold for box filtering , which is the output " obj " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li post_nms_topn : An optional int32 . This attribute is reserved .
* @ li score_threshold : A required float , specifying the class score threshold for box filtering , which is the output " class " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] . \ n
* @ li iou_threshold : A required float , specifying the intersection - over - union ( IOU ) threshold for box filtering . The value range is [ 0.0 , 1.0 ] .
* @ li pre_nms_topn : An optional int , specifying the number of boxes for non - maximum suppression ( NMS ) . Defaults to " 512 " .
*
* @ par Outputs :
* @ li boxout : A tensor of type float16 or float32 with shape [ batch , 6 , post_nms_topn ] . describing the information of each output box ,
* In output shape , 6 means x1 , y1 , x2 , y2 , score , label ( class ) . Output by the number of box_out_num .
* @ li boxoutnum : A tensor of type int32 with shape [ batch , 8 , 1 , 1 ] , specifying the number of output boxes . It means only the first one of the 8 numbers is valid ,
* the number of valid boxes in each batch , the maximum number of valid boxes in each batch is 1024
*
* @ attention Constraints :
* @ li This operator applies only to the YOLO v2 network .
* @ li The preceding layer of operator Yolov2DetectionOutput must be one Yolo operator . \ n
* @ see Yolo ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
* @ par Restrictions :
* Warning : THIS FUNCTION IS DEPRECATED . Please use YoloV2DetectionOutput instead .
*/
REG_OP ( YoloV2DetectionOutputD )
. INPUT ( coord_data , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img_info , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( windex , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( hindex , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( biases , ListFloat )
. ATTR ( boxes , Int , 5 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 20 )
. ATTR ( relative , Bool , true )
. ATTR ( obj_threshold , Float , 0.5 )
. ATTR ( post_nms_topn , Int , 512 )
. ATTR ( score_threshold , Float , 0.5 )
. ATTR ( iou_threshold , Float , 0.45 )
. ATTR ( pre_nms_topn , Int , 512 )
. OUTPUT ( box_out , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( box_out_num , TensorType ( { DT_INT32 } ) )
. OP_END_FACTORY_REG ( YoloV2DetectionOutputD )
/**
* @ brief Performs YOLO V3 detection . \ n
* @ par Inputs :
* Ten inputs , including :
* @ li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs . A Yolo operator has three outputs : " coords " , " obj " , and " class " .
* There are three Yolo operators at Yolov3DetectionOutput ' s preceding layer on Yolo v3 . For details , see the description of operator Yolo .
* @ li img_info : A float16 or float32 , describing the image information including the required image height and width
* and the actual image height and width .
* @ par Attributes :
* @ li biases : A required float . " biases = Number of Yolo operators at the preceding layer x 2 x boxes "
* @ li boxes : A required int32 , specifying the number of anchor boxes predicted for each Yolo layer .
* @ li coords : Specifies the number of coordinate parameters . Must be 4.
* @ li classes : A required int32 , specifying the number of classes to be predicted . The value range is [ 1 , 80 ] .
* @ li relative : An optional bool . Defaults to and must be " true " .
* @ li obj_threshold : A required float , specifying the confidence threshold for box filtering , which is the output " obj " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] . \ n
* @ li post_nms_topn : An optional int32 . This attribute is reserved .
* @ li score_threshold : A required float , specifying the class score threshold for box filtering , which is the output " class " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] . \ n
* @ li iou_threshold : A required float , specifying the intersection - over - union ( IOU ) threshold for box filtering . The value range is [ 0.0 , 1.0 ] .
* @ li pre_nms_topn : An optional int , specifying the number of boxes for non - maximum suppression ( NMS ) . Defaults to " 512 " .
* @ par Outputs :
* @ li boxout : A tensor of type float16 or float32 with shape [ batch , 6 * post_nms_topn ] , describing the information of each output box .
* In output shape , 6 means x1 , y1 , x2 , y2 , score , label ( class ) . Output by the number of box_out_num .
* @ li boxoutnum : A tensor of type int32 with shape [ batch , 8 ] , specifying the number of output boxes .
* The output shape means only the first one of the 8 numbers is valid , the number of valid boxes in each batch , the maximum number of valid boxes in each batch is 1024
* @ attention Constraints :
* @ li This operator applies only to the YOLO v3 network .
* @ li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators . \ n
* @ see Yolo ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( YoloV3DetectionOutput )
. INPUT ( coord_data_low , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( coord_data_mid , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( coord_data_high , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob_low , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob_mid , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob_high , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob_low , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob_mid , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob_high , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img_info , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( biases_low , ListFloat )
. REQUIRED_ATTR ( biases_mid , ListFloat )
. REQUIRED_ATTR ( biases_high , ListFloat )
. ATTR ( boxes , Int , 3 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 80 )
. ATTR ( relative , Bool , true )
. ATTR ( obj_threshold , Float , 0.5 )
. ATTR ( post_nms_topn , Int , 512 )
. ATTR ( score_threshold , Float , 0.5 )
. ATTR ( iou_threshold , Float , 0.45 )
. ATTR ( pre_nms_topn , Int , 512 )
. OUTPUT ( box_out , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( box_out_num , TensorType ( { DT_INT32 } ) )
. OP_END_FACTORY_REG ( YoloV3DetectionOutput )
/**
* @ brief Performs YOLO V3 detection . \ n
* @ par Inputs :
* 16 Input , including :
* @ li The outputs of operator Yolo at the preceding layer ( that is , three Yolo operators on YOLO v3 ) are used as the inputs of operator Yolov3DetectionOutput .
* A Yolo operator has three outputs : " coords " , " obj " , and " class " . For details , see the description of operator Yolo .
* @ li imginfo : A float16 , describing the image information including the required image height and width
* and the actual image height and width .
* @ li windex : A windex tensor with shape [ height , weight ] . Has the same type as the inputs .
* [ [ 0 , 1 , 2. . . ( weight - 1 ) ] , [ 0 , 1 , 2. . . ( w - 1 ) ] . . . [ 0 , 1 , 2. . . ( weight - 1 ) ] ] consisting of h groups of [ 0 , 1 , 2. . . ( weight - 1 ) ] is formed for the three Yolo outputs , respectively . \ n
* @ li hindex : A hindex tensor with shape [ height , weight ] . Has the same type as the inputs .
* [ [ 0 , 0. . .0 ] , [ 1 , 1. . .1 ] , [ 2 , 2. . .2 ] . . . [ height - 1 , height - 1. . . , height - 1 ] ] is formed for the three Yolo outputs , respectively . \ n
s
* @ par Attributes :
* @ li biases : A required float32 . " biases = Number of Yolo operators at the preceding layer x 2 x boxes "
* @ li boxes : A required int32 , specifying the number of anchor boxes predicted for each Yolo layer .
* @ li coords : Specifies the number of coordinate parameters . Must be 4.
* @ li classes : A required int32 , specifying the number of classes to be predicted . The value range is [ 1 , 80 ] .
* @ li relative : An optional bool . Defaults to and must be " true " .
* @ li obj_threshold : A required float , specifying the confidence threshold for box filtering , which is the output " obj " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li post_nms_topn : An optional int32 . This attribute is reserved .
* @ li score_threshold : A required float , specifying the class score threshold for box filtering , which is the output " class " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li iou_threshold : A required float , specifying the intersection - over - union ( IOU ) threshold for box filtering . The value range is [ 0.0 , 1.0 ] .
* @ li pre_nms_topn : An optional int , specifying the number of boxes for non - maximum suppression ( NMS ) . Defaults to " 512 " .
* @ par Outputs :
* @ li boxout : A tensor of type float16 or float32 with shape [ batch , 6 * post_nms_topn ] , describing the information of each output box .
* In output shape , 6 means x1 , y1 , x2 , y2 , score , label ( class ) . Output by the number of box_out_num .
* @ li boxoutnum : A tensor of type int32 with shape [ batch , 8 ] , specifying the number of output boxes .
* The output shape means only the first one of the 8 numbers is valid , the number of valid boxes in each batch , the maximum number of valid boxes in each batch is 1024
* @ attention Constraints :
* @ li This operator applies only to the YOLO v3 network .
* @ li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators .
* @ see Yolo ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
* @ par Restrictions :
* Warning : THIS FUNCTION IS DEPRECATED . Please use YoloV3DetectionOutput instead .
*/
REG_OP ( YoloV3DetectionOutputD )
. INPUT ( coord_data_low , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( coord_data_mid , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( coord_data_high , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob_low , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob_mid , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( obj_prob_high , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob_low , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob_mid , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( classes_prob_high , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( img_info , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( windex1 , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( windex2 , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( windex3 , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( hindex1 , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( hindex2 , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( hindex3 , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( biases_low , ListFloat )
. REQUIRED_ATTR ( biases_mid , ListFloat )
. REQUIRED_ATTR ( biases_high , ListFloat )
. ATTR ( boxes , Int , 3 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 80 )
. ATTR ( relative , Bool , true )
. ATTR ( obj_threshold , Float , 0.5 )
. ATTR ( post_nms_topn , Int , 512 )
. ATTR ( score_threshold , Float , 0.5 )
. ATTR ( iou_threshold , Float , 0.45 )
. ATTR ( pre_nms_topn , Int , 512 )
. OUTPUT ( box_out , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( box_out_num , TensorType ( { DT_INT32 } ) )
. OP_END_FACTORY_REG ( YoloV3DetectionOutputD )
/**
* @ brief Performs YOLO V3 detection . \ n
* @ par Inputs :
* Ten inputs , including :
* @ li Operator Yolov3DetectionOutput takes the outputs of operator Yolo as its inputs . A Yolo operator has three outputs : " coords " , " obj " , and " class " . \ n
There are three Yolo operators at Yolov3DetectionOutput ' s preceding layer on Yolo v3 . For details , see the description of operator Yolo .
* @ li img_info : A float16 or float32 , describing the image information including the required image height and width \ n
* and the actual image height and width .
* @ par Attributes :
* @ li biases : A required float . " biases = Number of Yolo operators at the preceding layer x 2 x boxes "
* @ li boxes : A required int32 , specifying the number of anchor boxes predicted for each Yolo layer .
* @ li coords : Specifies the number of coordinate parameters . Must be 4.
* @ li classes : A required int32 , specifying the number of classes to be predicted . The value range is [ 1 , 80 ] .
* @ li relative : An optional bool . Defaults to and must be " true " .
* @ li obj_threshold : A required float , specifying the confidence threshold for box filtering , which is the output " obj " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li post_nms_topn : An optional int32 . This attribute is reserved .
* @ li score_threshold : A required float , specifying the class score threshold for box filtering , which is the output " class " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li iou_threshold : A required float , specifying the intersection - over - union ( IOU ) threshold for box filtering . The value range is [ 0.0 , 1.0 ] . \ n
* @ li pre_nms_topn : An optional int , specifying the number of boxes for non - maximum suppression ( NMS ) . Defaults to " 512 " .
* @ par Outputs :
* @ li boxout : A tensor of type float16 or float32 with shape [ batch , 6 , post_nms_topn ] ( out_box_dim = = 3 ) or [ batch , 6 * post_nms_topn ] ( out_box_dim = = 2 ) ,
* In output shape , 6 means x1 , y1 , x2 , y2 , score , label ( class ) . Output by the number of box_out_num .
* @ li boxoutnum : A tensor of type int32 with shape [ batch , 8 ] , specifying the number of output boxes .
* The output shape means only the first one of the 8 numbers is valid , the number of valid boxes in each batch , the maximum number of valid boxes in each batch is 1024
* @ attention Constraints : \ n
* @ li This operator applies only to the YOLO v3 network .
* @ li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators .
* @ see Yolo ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( YoloV3DetectionOutputV2 )
. DYNAMIC_INPUT ( x , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( biases , ListFloat )
. ATTR ( boxes , Int , 3 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 80 )
. ATTR ( relative , Bool , true )
. ATTR ( obj_threshold , Float , 0.5 )
. ATTR ( post_nms_topn , Int , 512 )
. ATTR ( score_threshold , Float , 0.5 )
. ATTR ( iou_threshold , Float , 0.45 )
. ATTR ( pre_nms_topn , Int , 512 )
. ATTR ( N , Int , 10 )
. ATTR ( resize_origin_img_to_net , Bool , false )
. ATTR ( out_box_dim , Int , 3 )
. OUTPUT ( box_out , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( box_out_num , TensorType ( { DT_INT32 } ) )
. OP_END_FACTORY_REG ( YoloV3DetectionOutputV2 )
/**
* @ brief Performs YOLO V3 detection .
* @ par Inputs :
* 16 Input , including :
* @ li The outputs of operator Yolo at the preceding layer ( that is , three Yolo operators on YOLO v3 ) are used as the inputs of operator Yolov3DetectionOutput .
* A Yolo operator has three outputs : " coords " , " obj " , and " class " . For details , see the description of operator Yolo .
* @ li imginfo : A float16 , describing the image information including the required image height and width
* and the actual image height and width .
* @ li windex : A windex tensor with shape [ height , weight ] . Has the same type as the inputs .
* [ [ 0 , 1 , 2. . . ( weight - 1 ) ] , [ 0 , 1 , 2. . . ( w - 1 ) ] . . . [ 0 , 1 , 2. . . ( weight - 1 ) ] ] consisting of h groups of [ 0 , 1 , 2. . . ( weight - 1 ) ]
* is formed for the three Yolo outputs , respectively . It ' s a dynamic input . \ n
* @ li hindex : A hindex tensor with shape [ height , weight ] . Has the same type as the inputs . [ [ 0 , 0. . .0 ] , [ 1 , 1. . .1 ] , [ 2 , 2. . .2 ] . . . [ height - 1 , height - 1. . . , height - 1 ] ] is formed for the three Yolo outputs , respectively . \ n
* @ par Attributes :
* @ li biases : A required float32 . " biases = Number of Yolo operators at the preceding layer x 2 x boxes "
* @ li boxes : A required int32 , specifying the number of anchor boxes predicted for each Yolo layer .
* @ li coords : Specifies the number of coordinate parameters . Must be 4.
* @ li classes : A required int32 , specifying the number of classes to be predicted . The value range is [ 1 , 80 ] .
* @ li relative : An optional bool . Defaults to and must be " true " .
* @ li obj_threshold : A required float , specifying the confidence threshold for box filtering , which is the output " obj " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li post_nms_topn : An optional int32 . This attribute is reserved .
* @ li score_threshold : A required float , specifying the class score threshold for box filtering , which is the output " class " of operator Yolo ) . The value range is [ 0.0 , 1.0 ] .
* @ li iou_threshold : A required float , specifying the intersection - over - union ( IOU ) threshold for box filtering . The value range is [ 0.0 , 1.0 ] .
* @ li pre_nms_topn : An optional int , specifying the number of boxes for non - maximum suppression ( NMS ) . Defaults to " 512 " .
*
* @ par Outputs :
* @ li boxout : A tensor of type float16 or float32 with shape [ batch , 6 , post_nms_topn ] ( out_box_dim = = 3 ) or [ batch , 6 * post_nms_topn ] ( out_box_dim = = 2 ) ,
* describing the information of each output box .
* In output shape , 6 means x1 , y1 , x2 , y2 , score , label ( class ) . Output by the number of box_out_num .
* @ li boxoutnum : A tensor of type int32 with shape [ batch , 8 ] , specifying the number of output boxes .
* The output shape means only the first one of the 8 numbers is valid , the number of valid boxes in each batch , the maximum number of valid boxes in each batch is 1024
*
* @ attention Constraints :
* @ li This operator applies only to the YOLO v3 network .
* @ li The preceding layer of operator Yolov3DetectionOutput must be three Yolo operators .
* @ see Yolo ( )
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
* @ par Restrictions :
* Warning : THIS FUNCTION IS DEPRECATED . Please use YoloV3DetectionOutputV2 instead .
*/
REG_OP ( YoloV3DetectionOutputV2D )
. DYNAMIC_INPUT ( x , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. DYNAMIC_INPUT ( windex , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. DYNAMIC_INPUT ( hindex , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. REQUIRED_ATTR ( biases , ListFloat )
. ATTR ( boxes , Int , 3 )
. ATTR ( coords , Int , 4 )
. ATTR ( classes , Int , 80 )
. ATTR ( relative , Bool , true )
. ATTR ( obj_threshold , Float , 0.5 )
. ATTR ( post_nms_topn , Int , 512 )
. ATTR ( score_threshold , Float , 0.5 )
. ATTR ( iou_threshold , Float , 0.45 )
. ATTR ( pre_nms_topn , Int , 512 )
. ATTR ( N , Int , 10 )
. ATTR ( resize_origin_img_to_net , Bool , false )
. ATTR ( out_box_dim , Int , 3 )
. OUTPUT ( box_out , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( box_out_num , TensorType ( { DT_INT32 } ) )
. OP_END_FACTORY_REG ( YoloV3DetectionOutputV2D )
/**
* @ brief Spatial Pyramid Pooling , multi - level pooling .
* Pooling out ( n , sigma ( c * 2 ^ i * 2 ^ i ) ) tensor , i in range [ 0 , pyramid_height ) . \ n
* @ par Inputs :
* x : An NCHW tensor , support float16 or float32 type . \ n
* @ par Attributes :
* @ li pyramid_height : An required int32 .
* Multi - level pooling out from 2 ^ 0 to 2 ^ ( pyramid_height - 1 ) .
* @ li pool_method : An optional int32 , pooling method : 0 - MAX , 1 - AVE .
* Defaults to " 0 " . \ n
* @ par Outputs :
* y : A NCHW tensor , support float16 or float32 type . \ n
* @ attention Constraints :
* @ li pyramid_height : pyramid_heigjt should be in range [ 0 , 7 ) .
* Pooling paramter should statisfied with caffe pooling param ( pad < kernel ) .
* @ li feature_size : input feture map h and w should be [ 1 , 510 ] . \ n
* @ par Third - party framework compatibility
* Compatible with the Caffe operator SPP .
*/
REG_OP ( SPP )
. INPUT ( x , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. REQUIRED_ATTR ( pyramid_height , Int )
. ATTR ( pool_method , Int , 0 )
. OP_END_FACTORY_REG ( SPP )
/**
* @ brief Performs Region of Interest ( ROI ) Pooling . \ n
* @ par Inputs :
* Three inputs , including :
* @ li x : An NC1HWC0 tensor of type float16 or float32 , describing the feature
* map .
* @ li rois : A tensor of type float16 or float32 , with shape
* [ batch , 5 , roi_max_num ] , describing the RIOs .
* @ li roi_actual_num : A optional tensor of type int32 , with shape [ batch , 8 ] , specifying
* the number of ROIs per batch . \ n
* @ par Attributes :
* @ li pooled_h : A required int32 , specifying the pooled H . Must be greater
* than 0.
* @ li pooled_w : A required int32 , specifying the pooled W . Must be greater
* than 0.
* @ li spatial_scale_h : An required scaling factor for mapping the input
* coordinates of height to the ROI coordinates .
* @ li spatial_scale_w : An required scaling factor for mapping the input
* coordinates of width to the ROI coordinates . \ n
* @ par Outputs :
* y : An NC1HWC0 tensor of type float16 or float32 , describing the result
* feature map . \ n
* @ attention Constraints :
* @ li For the feature map input :
( 1 ) If pooled_h = pooled_w = 2 , the feature map size must not exceed 50.
( 2 ) If pooled_h = pooled_w = 3 , the feature map size must not exceed 60.
( 3 ) If pooled_h = pooled_w = 4 , the feature map size must not exceed 70.
( 4 ) If pooled_h = pooled_w = 5 , the feature map size must not exceed 70.
( 5 ) If pooled_h = pooled_w = 6 , the feature map size must not exceed 80.
( 6 ) If pooled_h = pooled_w = 7 , the feature map size must not exceed 80.
( 7 ) If pooled_h = pooled_w = 8 , the feature map size must not exceed 80.
( 8 ) If pooled_h = pooled_w = 9 , the feature map size must not exceed 70.
( 9 ) If pooled_h = pooled_w = 10 , the feature map size must not exceed 70.
( 10 ) If pooled_h = pooled_w = 11 , the feature map size must not exceed 70.
( 11 ) If pooled_h = pooled_w = 12 , the feature map size must not exceed 70.
( 12 ) If pooled_h = pooled_w = 13 , the feature map size must not exceed 70.
( 13 ) If pooled_h = pooled_w = 14 , the feature map size must not exceed 70.
( 14 ) If pooled_h = pooled_w = 15 , the feature map size must not exceed 70.
( 15 ) If pooled_h = pooled_w = 16 , the feature map size must not exceed 70.
( 16 ) If pooled_h = pooled_w = 17 , the feature map size must not exceed 50.
( 17 ) If pooled_h = pooled_w = 18 , the feature map size must not exceed 40.
( 18 ) If pooled_h = pooled_w = 19 , the feature map size must not exceed 40.
( 19 ) If pooled_h = pooled_w = 20 , the feature map size must not exceed 40.
* @ par Third - party framework compatibility
* It is a custom operator . It has no corresponding operator in Caffe .
*/
REG_OP ( ROIPooling )
. INPUT ( x , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. INPUT ( rois , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. OPTIONAL_INPUT ( roi_actual_num , TensorType ( { DT_INT32 } ) )
. REQUIRED_ATTR ( pooled_h , Int )
. REQUIRED_ATTR ( pooled_w , Int )
. REQUIRED_ATTR ( spatial_scale_h , Float )
. REQUIRED_ATTR ( spatial_scale_w , Float )
. OUTPUT ( y , TensorType ( { DT_FLOAT , DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( ROIPooling )
/**
* @ brief Computes decode bbox function .
* @ par Inputs :
* Inputs include :
* @ li box_predictions : A Tensor . Must be float16 .
* @ li anchors : A Tensor . Must have the same type as box_predictions .
* @ par Attributes :
* @ decode_clip : required , float , threahold of decode process .
* @ par Outputs :
* @ decoded_boxes : A Tensor . Must have the same type as box_predictions .
* N - D with shape [ N , 4 ] .
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( DecodeBbox )
. INPUT ( box_predictions , TensorType { DT_FLOAT16 } )
. INPUT ( anchors , TensorType { DT_FLOAT16 } )
. OUTPUT ( decoded_boxes , TensorType { DT_FLOAT16 } )
. REQUIRED_ATTR ( decode_clip , Float )
. OP_END_FACTORY_REG ( DecodeBbox )
/**
* @ brief Computes ClipBoxes function . \ n
* @ par Inputs :
* @ li boxes_input : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li img_size : A Tensor . Must be int32 . shape [ H , W ] . \ n
* @ par Outputs :
* boxes_output : A Tensor . Must have the same type as boxes_output . N - D with shape [ N , 4 ] .
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( ClipBoxes )
. INPUT ( boxes_input , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( img_size , TensorType ( { DT_INT32 } ) )
. OUTPUT ( boxes_output , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( ClipBoxes )
/**
* @ brief Computes ClipBoxesD function . \ n
* @ par Attributes :
* img_size : A Tensor of shape [ H , W ] . \ n
* @ par Inputs :
* boxes_input : A Tensor . Must be float16 . N - D with shape [ N , 4 ] . \ n
* @ par Outputs :
* boxes_output : A Tensor . Must have the same type as boxes_output . N - D with shape [ N , 4 ] . \ n
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( ClipBoxesD )
. INPUT ( boxes_input , TensorType ( { DT_FLOAT16 } ) )
. REQUIRED_ATTR ( img_size , ListInt )
. OUTPUT ( boxes_output , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( ClipBoxesD )
/**
* @ brief Computes Fastrcnn Predictions function .
*
* @ par Inputs :
* Inputs include :
* @ li rois : A Tensor . Must be float16 . N - D with shape [ N * C , 4 ] .
* @ li score : A Tensor . Must be float16 . N - D with shape [ N , C + 1 ] .
*
* @ par Attributes :
* @ li nms_threshold : required , float , threahold of nms process .
* @ li score_threshold : required , float , threahold of topk process .
* @ li k : required , Int , threahold of topk process .
* @ par Outputs :
* @ li sorted_rois : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li sorted_scores : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
* @ li sorted_classes : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
*/
REG_OP ( FastrcnnPredictions )
. INPUT ( rois , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( score , TensorType ( { DT_FLOAT16 } ) )
. REQUIRED_ATTR ( nms_threshold , Float )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( k , Int )
. OUTPUT ( sorted_rois , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( sorted_scores , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( sorted_classes , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( FastrcnnPredictions )
/**
* @ brief Computes Fastrcnn RpnProposals function . \ n
* @ par Inputs :
* Inputs include :
* @ li rois : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li cls_bg_prob : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
* @ li img_size : A Tensor . Must be int32 . shape [ H , W ] . \ n
* @ par Attributes :
* @ li score_threshold : required , float , threahold of topk process .
* @ li k : required , Int , threahold of topk process .
* @ li min_size : required , float , threahold of nms process .
* @ li nms_threshold : required , float , threahold of nms process .
* @ li post_nms_num : required , float , threahold of nms process .
* @ li score_filter : bool , mark of score_filter . Defaults to " true "
* @ li box_filter : bool , mark of box_filter . Defaults to " true "
* @ li score_sigmoid : bool , mark of score_sigmoid . Defaults to " false "
* @ par Outputs :
* @ li sorted_rois : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li sorted_scores : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
* @ li sorted_classes : A Tensor . Must be float16 . N - D with shape [ N , 1 ] . \ n
* @ par Third - party framework compatibility
* Compatible with the TensorFlow operator Unpack .
*/
REG_OP ( RpnProposals )
. INPUT ( rois , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( cls_bg_prob , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( img_size , TensorType ( { DT_INT32 } ) )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( k , Int )
. REQUIRED_ATTR ( min_size , Float )
. REQUIRED_ATTR ( nms_threshold , Float )
. REQUIRED_ATTR ( post_nms_num , Int )
. ATTR ( score_filter , Bool , true )
. ATTR ( box_filter , Bool , true )
. ATTR ( score_sigmoid , Bool , false )
. OUTPUT ( sorted_box , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( RpnProposals )
/**
* @ brief Computes Fastrcnn RpnProposalsD function . \ n
* @ par Inputs :
* @ li rois : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li cls_bg_prob : A Tensor . Must be float16 . N - D with shape [ N , 1 ] . \ n
* @ par Attributes :
* @ li img_size : A Tensor size of image . Must be int32 . shape [ H , W ] .
* @ li score_threshold : required , float , threahold of topk process .
* @ li k : required , Int , threahold of topk process .
* @ li min_size : required , float , threahold of nms process .
* @ li nms_threshold : required , float , threahold of nms process .
* @ li post_nms_num : required , float , threahold of nms process .
* @ li score_filter : bool , mark of score_filter . Defaults to " true "
* @ li box_filter : bool , mark of box_filter . Defaults to " true "
* @ li score_sigmoid : bool , mark of score_sigmoid . Defaults to " false "
* @ par Outputs :
* sorted_box : A Tensor of output . Must be float16 . N - D with shape [ N , 1 ] . \ n
* @ par Third - party framework compatibility
* Compatible with the pytorch operator RPNProposals . \ n
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
* @ par Restrictions :
* Warning : THIS FUNCTION IS DEPRECATED . Please use RpnProposals instead .
*/
REG_OP ( RpnProposalsD )
. INPUT ( rois , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( cls_bg_prob , TensorType ( { DT_FLOAT16 } ) )
. REQUIRED_ATTR ( img_size , ListInt )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( k , Int )
. REQUIRED_ATTR ( min_size , Float )
. REQUIRED_ATTR ( nms_threshold , Float )
. REQUIRED_ATTR ( post_nms_num , Int )
. ATTR ( score_filter , Bool , true )
. ATTR ( box_filter , Bool , true )
. ATTR ( score_sigmoid , Bool , false )
. OUTPUT ( sorted_box , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( RpnProposalsD )
/**
* @ brief Computes Score Filte Pre - Sort function .
* @ par Inputs :
* Inputs include :
* @ li rois : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li cls_bg_prob : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
* @ par Attributes :
* @ li score_threshold : required , float , threahold of topk process .
* @ li k : required , Int , threahold of topk process .
* @ li score_filter : bool , mark of score_filter . Defaults to " true "
* @ li core_max_num : int , max number of core . Defaults to " 8 "
* @ par Outputs :
* @ li sorted_proposal : A Tensor . Must be float16 .
* N - D with shape [ 8 * 6002 , 8 ] .
* @ li proposal_num : A Tensor . Must be uint32 . N - D with shape [ 8 , 8 ] .
*/
REG_OP ( ScoreFiltePreSort )
. INPUT ( rois , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( cls_bg_prob , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( sorted_proposal , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( proposal_num , TensorType ( { DT_UINT32 } ) )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( k , Int )
. ATTR ( score_filter , Bool , true )
. ATTR ( core_max_num , Int , 8 )
. OP_END_FACTORY_REG ( ScoreFiltePreSort )
/**
* @ brief Computes Score Filte Pre - Sort function .
*
* @ par Inputs :
* Inputs include :
* @ li sorted_proposal : A Tensor . Must be float16 .
* N - D with shape [ 8 * 6002 , 8 ] .
* @ li proposal_num : A Tensor . Must be uint32 . N - D with shape [ 8 , 8 ] .
*
* @ par Attributes :
* @ li min_size : required , float , threahold of nms process .
* @ li score_threshold : required , float , threahold of topk process .
* @ li k : required , Int , threahold of topk process .
* @ li min_size : required , float , threahold of nms process .
* @ li nms_threshold : required , float , threahold of nms process .
* @ li post_nms_num : required , float , threahold of nms process .
* @ li box_filter : bool , mark of box_filter . Defaults to " true "
* @ li core_max_num : int , max number of core . Defaults to " 8 "
* @ par Outputs :
* @ li sorted_rois : A Tensor . Must be float16 . N - D with shape [ N , 4 ] .
* @ li sorted_scores : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
* @ li sorted_classes : A Tensor . Must be float16 . N - D with shape [ N , 1 ] .
*/
REG_OP ( RpnProposalPostProcessing )
. INPUT ( sorted_proposal , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( proposal_num , TensorType ( { DT_UINT32 } ) )
. OUTPUT ( sorted_box , TensorType ( { DT_FLOAT16 } ) )
. REQUIRED_ATTR ( img_size , ListInt )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( k , Int )
. REQUIRED_ATTR ( min_size , Float )
. REQUIRED_ATTR ( nms_threshold , Float )
. REQUIRED_ATTR ( post_nms_num , Int )
. ATTR ( box_filter , Bool , true )
. ATTR ( core_max_num , Int , 8 )
. OP_END_FACTORY_REG ( RpnProposalPostProcessing )
/**
* @ brief Computes DecodeBoundariesTarget function .
* @ par Inputs :
* Inputs include :
* @ li boundary_predictions : A Tensor . Must be float16 .
* @ li anchors : A Tensor . Must be float16 .
* @ par Outputs :
* @ boundary_encoded : A Tensor . Must be float16 .
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( DecodeBoundariesTarget )
. INPUT ( boundary_predictions , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( anchors , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( boundary_encoded , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( DecodeBoundariesTarget )
/**
* @ brief Computes DecodeCornerpointsTargetBG function .
*
* @ par Inputs :
* Inputs include :
* @ li keypoints_prediction : A Tensor . Must be float16 .
* @ li anchors : A Tensor . Must be float16 .
*
* @ par Outputs :
* @ keypoints_decoded : A Tensor . Must be float16 .
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( DecodeCornerpointsTargetBG )
. INPUT ( keypoints_prediction , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( anchors , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( keypoints_decoded , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( DecodeCornerpointsTargetBG ) ;
/**
* @ brief Computes DecodeCornerpointsTargetWrtCenterV1 function .
*
* @ par Inputs :
* Inputs include :
* @ li keypoints_prediction : A Tensor . Must be float16 .
* @ li anchors : A Tensor . Must be float16 .
*
* @ par Outputs :
* @ keypoints_decoded : A Tensor . Must be float16 .
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( DecodeCornerpointsTargetWrtCenterV1 )
. INPUT ( keypoints_prediction , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( anchors , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( keypoints_decoded , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( DecodeCornerpointsTargetWrtCenterV1 )
/**
* @ brief Computes DecodeWheelsTarget function .
*
* @ par Inputs :
* Inputs include :
* @ li boundary_predictions : A Tensor . Must be float16 .
* @ li anchors : A Tensor . Must be float16 .
*
* @ par Outputs :
* @ boundary_encoded : A Tensor . Must be float16 .
* @ par Restrictions :
* Warning : THIS FUNCTION IS EXPERIMENTAL . Please do not use .
*/
REG_OP ( DecodeWheelsTarget )
. INPUT ( boundary_predictions , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( anchors , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( boundary_encoded , TensorType ( { DT_FLOAT16 } ) )
. OP_END_FACTORY_REG ( DecodeWheelsTarget )
/**
* @ brief Computes nms for input boxes and score , support multiple batch and classes .
* will do clip to window , score filter , top_k , and nms
* @ par Inputs :
* Four inputs , including :
* @ li boxes : boxes , a 4 D Tensor of type float16 with
* shape ( batch , num_anchors , num_classes , 4 ) . " batch " indicates the batch size of image ,
* and " num_anchors " indicates num of boxes , and " num_classes " indicates classes of detect .
* and the value " 4 " refers to " x0 " , " x1 " , " y0 " , and " y1 " .
* @ li scores : boxes , a 4 D Tensor of type float16 with
* shape ( batch , num_anchors , num_classes ) .
* @ li clip_window : window size , a 2 D Tensor of type float16 with
* shape ( batch , 4 ) . 4 " refers to " anchor_x0 " , " anchor_x1 " , " anchor_y0 " , and " anchor_y1 " .
* @ li num_valid_boxes : valid boxes number for each batch , a 1 D Tensor of type int32 with
* shape ( batch , ) . \ n
* @ par Attributes :
* @ li score_threshold : A required attribute of type float32 , specifying the score filter iou iou_threshold .
* @ li iou_threshold : A required attribute of type float32 , specifying the nms iou iou_threshold .
* @ li max_size_per_class : A required attribute of type int , specifying the nms output num per class .
* @ li max_total_size : A required attribute of type int , specifying the the nms output num per batch .
* @ li change_coordinate_frame : A optional attribute of type bool , whether to normalize coordinates after clipping .
* @ li transpose_box : A optional attribute of type bool , whether inserted transpose before this op . must be " false " . \ n
* @ par Outputs :
* @ li nmsed_boxes : A 3 D Tensor of type float16 with shape ( batch , max_total_size , 4 ) ,
* specifying the output nms boxes per batch .
* @ li nmsed_scores : A 2 D Tensor of type float16 with shape ( batch , max_total_size ) ,
* specifying the output nms score per batch .
* @ li nmsed_classes : A 2 D Tensor of type float16 with shape ( batch , max_total_size ) ,
* specifying the output nms class per batch .
* @ li nmsed_num : A 1 D Tensor of type int32 with shape ( batch ) , specifying the valid num of nmsed_boxes . \ n
* @ attention Constraints :
* Only computation of float16 data is supported .
*/
REG_OP ( BatchMultiClassNonMaxSuppression )
. INPUT ( boxes , TensorType ( { DT_FLOAT16 } ) )
. INPUT ( scores , TensorType ( { DT_FLOAT16 } ) )
. OPTIONAL_INPUT ( clip_window , TensorType ( { DT_FLOAT16 } ) )
. OPTIONAL_INPUT ( num_valid_boxes , TensorType ( { DT_INT32 } ) )
. OUTPUT ( nmsed_boxes , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( nmsed_scores , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( nmsed_classes , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( nmsed_num , TensorType ( { DT_INT32 } ) )
. REQUIRED_ATTR ( score_threshold , Float )
. REQUIRED_ATTR ( iou_threshold , Float )
. REQUIRED_ATTR ( max_size_per_class , Int )
. REQUIRED_ATTR ( max_total_size , Int )
. ATTR ( change_coordinate_frame , Bool , false )
. ATTR ( transpose_box , Bool , false )
. OP_END_FACTORY_REG ( BatchMultiClassNonMaxSuppression )
/**
* @ brief To absolute the bounding box . \ n
* @ par Inputs :
* @ li normalized_boxes : A 3 D Tensor of type float16 or float32 .
* @ li shape_hw : A 1 D Tensor of type int32 . \ n
* @ par Attributes :
* @ li reversed_box : An optional bool , specifying the last two dims is " 4,num " or
* " num,4 " , " true " for " 4,num " , " false " for " num,4 " . Defaults to " false " . \ n
* @ par Outputs :
* y : A Tensor . Has the same type and shape as " normalized_boxes " . \ n
* @ attention Constraints :
* " normalized_boxes " ' s shape must be ( batch , num , 4 ) or ( batch , 4 , num ) .
* " shape_hw " ' s shape must be ( 4 , )
*/
REG_OP ( ToAbsoluteBBox )
. INPUT ( normalized_boxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( shape_hw , TensorType ( { DT_INT32 } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( reversed_box , Bool , false )
. OP_END_FACTORY_REG ( ToAbsoluteBBox )
/**
* @ brief Computes Normalize bbox function .
*
* @ par Inputs :
* Inputs include :
* @ li boxes : A Tensor . Must be float16 or float32 .
* @ li shape_hw : A Tensor . Must be int32 .
*
* @ par Attributes :
* reversed_box : optional , bool . Defaults to " False "
*
* @ par Outputs :
* y : A Tensor . Must have the same type and shape as boxes .
*/
REG_OP ( NormalizeBBox )
. INPUT ( boxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( shape_hw , TensorType ( { DT_INT32 } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( reversed_box , Bool , false )
. OP_END_FACTORY_REG ( NormalizeBBox )
/**
* @ brief Computes decode bboxv2 function .
*
* @ par Inputs :
* Inputs include :
* @ li boxes : A Tensor . Must be float16 or float32 .
* @ li anchors : A Tensor . Must be int32 .
*
* @ par Attributes :
* @ li scales : optional , listfloat , .
* @ li decode_clip : optional , float , threahold of decode process .
* @ li reversed_boxes : optional , bool , .
*
* @ par Outputs :
* y : A Tensor . Must have the same type as box_predictions .
*/
REG_OP ( DecodeBboxV2 )
. INPUT ( boxes , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. INPUT ( anchors , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. OUTPUT ( y , TensorType ( { DT_FLOAT16 , DT_FLOAT } ) )
. ATTR ( scales , ListFloat , { 1.0 , 1.0 , 1.0 , 1.0 } )
. ATTR ( decode_clip , Float , 0.0 )
. ATTR ( reversed_box , Bool , false )
. OP_END_FACTORY_REG ( DecodeBboxV2 )
/**
* @ brief Computes sort function .
*
* @ par Inputs :
* Inputs include :
* x : A Tensor . Must be float16 or float32 .
*
* @ par Attributes :
* @ li axis : optional , int .
* @ li descending : optional , bool .
*
* @ par Outputs :
* @ li y1 : A Tensor . Must have the same type as x .
* @ li y2 : A Tensor . Indices of y1 in x . Dtype must be int32 .
*/
REG_OP ( Sort )
. INPUT ( x , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( y1 , TensorType ( { DT_FLOAT16 } ) )
. OUTPUT ( y2 , TensorType ( { DT_INT32 } ) )
. ATTR ( axis , Int , - 1 )
. ATTR ( descending , Bool , false )
. OP_END_FACTORY_REG ( Sort )
} // namespace ge
# endif // OPS_BUILT_IN_OP_PROTO_INC_NN_DETECT_OPS_H_