add detection output operator for supporting retinanet (#17896)

* test=develop
add detection output for supporting retinanet

* test=develop
add test_layers.py

* test=develop
add API.spec

* test=develop
alter test_retinanet_detection_output.py

* test=develop
alter round 2

* test=develop
alter retinanet_detection_output

* test=develop
alter paddle/fluid/API.spec

* test=devlop
alter detection.py

* test=develop
alter retinanet_detection_output

* test=develop
alter paddle/fluid/API.spec

* test=develop
alter detection.py

* test=develop
alter API.spec

* test=develop
alter retinanet_detection_output

* test=develop
alter paddle/fluid/API.spec

* test=develop
alter python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py

* test=develop
alter python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py

* test=develop
fix grammer error

* test=develop
fix grammer error

* test=develop
fix grammer error

* test=develop
alter python/paddle/fluid/tests/unittests/test_layers.py

* test=develop
alter paddle/fluid/API.spec
revert-18229-add_multi_gpu_install_check
FlyingQianMM 6 years ago committed by GitHub
parent 0941e3e013
commit ff83655f7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -362,6 +362,7 @@ paddle.fluid.layers.yolov3_loss (ArgSpec(args=['x', 'gt_box', 'gt_label', 'ancho
paddle.fluid.layers.yolo_box (ArgSpec(args=['x', 'img_size', 'anchors', 'class_num', 'conf_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'f332fb8c5bb581bd1a6b5be450a99990'))
paddle.fluid.layers.box_clip (ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '04384378ff00a42ade8fabd52e27cbc5'))
paddle.fluid.layers.multiclass_nms (ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)), ('document', 'ca7d1107b6c5d2d6d8221039a220fde0'))
paddle.fluid.layers.retinanet_detection_output (ArgSpec(args=['bboxes', 'scores', 'anchors', 'im_info', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'nms_eta'], varargs=None, keywords=None, defaults=(0.05, 1000, 100, 0.3, 1.0)), ('document', '078d28607ce261a0cba2b965a79f6bb8'))
paddle.fluid.layers.distribute_fpn_proposals (ArgSpec(args=['fpn_rois', 'min_level', 'max_level', 'refer_level', 'refer_scale', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '7bb011ec26bace2bc23235aa4a17647d'))
paddle.fluid.layers.box_decoder_and_assign (ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'box_score', 'box_clip', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'dfc953994fd8fef35c49dd9c6eea37a5'))
paddle.fluid.layers.collect_fpn_proposals (ArgSpec(args=['multi_rois', 'multi_scores', 'min_level', 'max_level', 'post_nms_top_n', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '82ffd896ecc3c005ae1cad40854dcace'))

@ -36,6 +36,7 @@ detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc)
detection_library(yolo_box_op SRCS yolo_box_op.cc yolo_box_op.cu)
detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu)
detection_library(sigmoid_focal_loss_op SRCS sigmoid_focal_loss_op.cc sigmoid_focal_loss_op.cu)
detection_library(retinanet_detection_output_op SRCS retinanet_detection_output_op.cc)
if(WITH_GPU)
detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub)

@ -53,6 +53,7 @@ __all__ = [
'yolo_box',
'box_clip',
'multiclass_nms',
'retinanet_detection_output',
'distribute_fpn_proposals',
'box_decoder_and_assign',
'collect_fpn_proposals',
@ -2548,6 +2549,113 @@ def box_clip(input, im_info, name=None):
return output
def retinanet_detection_output(bboxes,
scores,
anchors,
im_info,
score_threshold=0.05,
nms_top_k=1000,
keep_top_k=100,
nms_threshold=0.3,
nms_eta=1.):
"""
**Detection Output Layer for Retinanet.**
This operation is to get the detection results by performing following
steps:
1. Decode top-scoring bounding box predictions per FPN level according
to the anchor boxes.
2. Merge top predictions from all levels and apply multi-class non
maximum suppression (NMS) on them to get the final detections.
Args:
bboxes(List): A list of tensors from multiple FPN levels. Each
element is a 3-D Tensor with shape [N, Mi, 4] representing the
predicted locations of Mi bounding boxes. N is the batch size,
Mi is the number of bounding boxes from i-th FPN level and each
bounding box has four coordinate values and the layout is
[xmin, ymin, xmax, ymax].
scores(List): A list of tensors from multiple FPN levels. Each
element is a 3-D Tensor with shape [N, Mi, C] representing the
predicted confidence predictions. N is the batch size, C is the
class number (excluding background), Mi is the number of bounding
boxes from i-th FPN level. For each bounding box, there are total
C scores.
anchors(List): A 2-D Tensor with shape [Mi, 4] represents the locations
of Mi anchor boxes from all FPN level. Each bounding box has four
coordinate values and the layout is [xmin, ymin, xmax, ymax].
im_info(Variable): A 2-D LoDTensor with shape [N, 3] represents the
image information. N is the batch size, each image information
includes height, width and scale.
score_threshold(float): Threshold to filter out bounding boxes
with a confidence score.
nms_top_k(int): Maximum number of detections per FPN layer to be
kept according to the confidences before NMS.
keep_top_k(int): Number of total bounding boxes to be kept per image after
NMS step. -1 means keeping all bounding boxes after NMS step.
nms_threshold(float): The threshold to be used in NMS.
nms_eta(float): The parameter for adaptive NMS.
Returns:
Variable:
The detection output is a LoDTensor with shape [No, 6].
Each row has six values: [label, confidence, xmin, ymin, xmax, ymax].
`No` is the total number of detections in this mini-batch. For each
instance, the offsets in first dimension are called LoD, the offset
number is N + 1, N is the batch size. The i-th image has
`LoD[i + 1] - LoD[i]` detected results, if it is 0, the i-th image
has no detected results. If all images have no detected results,
LoD will be set to 0, and the output tensor is empty (None).
Examples:
.. code-block:: python
import paddle.fluid as fluid
bboxes = layers.data(name='bboxes', shape=[1, 21, 4],
append_batch_size=False, dtype='float32')
scores = layers.data(name='scores', shape=[1, 21, 10],
append_batch_size=False, dtype='float32')
anchors = layers.data(name='anchors', shape=[21, 4],
append_batch_size=False, dtype='float32')
im_info = layers.data(name="im_info", shape=[1, 3],
append_batch_size=False, dtype='float32')
nmsed_outs = fluid.layers.retinanet_detection_output(
bboxes=[bboxes, bboxes],
scores=[scores, scores],
anchors=[anchors, anchors],
im_info=im_info,
score_threshold=0.05,
nms_top_k=1000,
keep_top_k=100,
nms_threshold=0.3,
nms_eta=1.)
"""
helper = LayerHelper('retinanet_detection_output', **locals())
output = helper.create_variable_for_type_inference(
dtype=helper.input_dtype('scores'))
helper.append_op(
type="retinanet_detection_output",
inputs={
'BBoxes': bboxes,
'Scores': scores,
'Anchors': anchors,
'ImInfo': im_info
},
attrs={
'score_threshold': score_threshold,
'nms_top_k': nms_top_k,
'nms_threshold': nms_threshold,
'keep_top_k': keep_top_k,
'nms_eta': 1.,
},
outputs={'Out': output})
output.stop_gradient = True
return output
def multiclass_nms(bboxes,
scores,
score_threshold,

@ -2093,6 +2093,41 @@ class TestBook(LayerTest):
x=input, label=label, fg_num=fg_num, gamma=2., alpha=0.25)
return (out)
def test_retinanet_detection_output(self):
with program_guard(fluid.default_main_program(),
fluid.default_startup_program()):
bboxes = layers.data(
name='bboxes',
shape=[1, 21, 4],
append_batch_size=False,
dtype='float32')
scores = layers.data(
name='scores',
shape=[1, 21, 10],
append_batch_size=False,
dtype='float32')
anchors = layers.data(
name='anchors',
shape=[21, 4],
append_batch_size=False,
dtype='float32')
im_info = layers.data(
name="im_info",
shape=[1, 3],
append_batch_size=False,
dtype='float32')
nmsed_outs = layers.retinanet_detection_output(
bboxes=[bboxes, bboxes],
scores=[scores, scores],
anchors=[anchors, anchors],
im_info=im_info,
score_threshold=0.05,
nms_top_k=1000,
keep_top_k=100,
nms_threshold=0.3,
nms_eta=1.)
return (nmsed_outs)
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save