Add `matrix_nms_op` (#24400)

* Add `matrix_nms_op`

test=develop

* Make ci happy

test=develop

* Exit early when no detection

test=develop

* Fix license year

test=develop

* Output index as well

test=develop

* Match nms2 lod behavior and add `return_index` flag

test=develop

* Make CI happy

test=develop

* Fix wording

test=develop
fix_copy_if_different
Yang Zhang 5 years ago committed by GitHub
parent 064d0ce386
commit 6d6efafeeb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -32,6 +32,7 @@ detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc)
detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc)
detection_library(matrix_nms_op SRCS matrix_nms_op.cc DEPS gpc)
detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc)
detection_library(yolo_box_op SRCS yolo_box_op.cc yolo_box_op.cu)

File diff suppressed because it is too large Load Diff

@ -57,6 +57,7 @@ __all__ = [
'box_clip',
'multiclass_nms',
'locality_aware_nms',
'matrix_nms',
'retinanet_detection_output',
'distribute_fpn_proposals',
'box_decoder_and_assign',
@ -3523,6 +3524,133 @@ def locality_aware_nms(bboxes,
return output
def matrix_nms(bboxes,
scores,
score_threshold,
post_threshold,
nms_top_k,
keep_top_k,
use_gaussian=False,
gaussian_sigma=2.,
background_label=0,
normalized=True,
return_index=False,
name=None):
"""
**Matrix NMS**
This operator does matrix non maximum suppression (NMS).
First selects a subset of candidate bounding boxes that have higher scores
than score_threshold (if provided), then the top k candidate is selected if
nms_top_k is larger than -1. Score of the remaining candidate are then
decayed according to the Matrix NMS scheme.
Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
Args:
bboxes (Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes,
N is the batch size. Each bounding box has four
coordinate values and the layout is
[xmin, ymin, xmax, ymax], when box size equals to 4.
The data type is float32 or float64.
scores (Variable): A 3-D Tensor with shape [N, C, M]
represents the predicted confidence predictions.
N is the batch size, C is the class number, M is
number of bounding boxes. For each category there
are total M scores which corresponding M bounding
boxes. Please note, M is equal to the 2nd dimension
of BBoxes. The data type is float32 or float64.
score_threshold (float): Threshold to filter out bounding boxes with
low confidence score.
post_threshold (float): Threshold to filter out bounding boxes with
low confidence score AFTER decaying.
nms_top_k (int): Maximum number of detections to be kept according to
the confidences after the filtering detections based
on score_threshold.
keep_top_k (int): Number of total bboxes to be kept per image after NMS
step. -1 means keeping all bboxes after NMS step.
use_gaussian (bool): Use Gaussian as the decay function. Default: False
gaussian_sigma (float): Sigma for Gaussian decay function. Default: 2.0
background_label (int): The index of background label, the background
label will be ignored. If set to -1, then all
categories will be considered. Default: 0
normalized (bool): Whether detections are normalized. Default: True
return_index(bool): Whether return selected index. Default: False
name(str): Name of the matrix nms op. Default: None.
Returns:
A tuple with two Variables: (Out, Index) if return_index is True,
otherwise, one Variable(Out) is returned.
Out (Variable): A 2-D LoDTensor with shape [No, 6] containing the
detection results.
Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
(After version 1.3, when no boxes detected, the lod is changed
from {0} to {1})
Index (Variable): A 2-D LoDTensor with shape [No, 1] containing the
selected indices, which are absolute values cross batches.
Examples:
.. code-block:: python
import paddle.fluid as fluid
boxes = fluid.data(name='bboxes', shape=[None,81, 4],
dtype='float32', lod_level=1)
scores = fluid.data(name='scores', shape=[None,81],
dtype='float32', lod_level=1)
out = fluid.layers.matrix_nms(bboxes=boxes,
scores=scores,
background_label=0,
score_threshold=0.5,
post_threshold=0.1,
nms_top_k=400,
keep_top_k=200,
normalized=False)
"""
check_variable_and_dtype(bboxes, 'BBoxes', ['float32', 'float64'],
'matrix_nms')
check_variable_and_dtype(scores, 'Scores', ['float32', 'float64'],
'matrix_nms')
check_type(score_threshold, 'score_threshold', float, 'matrix_nms')
check_type(post_threshold, 'post_threshold', float, 'matrix_nms')
check_type(nms_top_k, 'nums_top_k', int, 'matrix_nms')
check_type(keep_top_k, 'keep_top_k', int, 'matrix_nms')
check_type(normalized, 'normalized', bool, 'matrix_nms')
check_type(use_gaussian, 'use_gaussian', bool, 'matrix_nms')
check_type(gaussian_sigma, 'gaussian_sigma', float, 'matrix_nms')
check_type(background_label, 'background_label', int, 'matrix_nms')
helper = LayerHelper('matrix_nms', **locals())
output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
index = helper.create_variable_for_type_inference(dtype='int')
helper.append_op(
type="matrix_nms",
inputs={'BBoxes': bboxes,
'Scores': scores},
attrs={
'background_label': background_label,
'score_threshold': score_threshold,
'post_threshold': post_threshold,
'nms_top_k': nms_top_k,
'gaussian_sigma': gaussian_sigma,
'use_gaussian': use_gaussian,
'keep_top_k': keep_top_k,
'normalized': normalized
},
outputs={'Out': output,
'Index': index})
output.stop_gradient = True
if return_index:
return output, index
else:
return output
def distribute_fpn_proposals(fpn_rois,
min_level,
max_level,

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save