fix generate_proposal_labels in cascade-rcnn series model, test=develop (#27892)

* fix generate_proposal_labels in cascade-rcnn series model, test=develop * fix example code & unittest, test=develop * update code from review comments, test=develop
5 years ago · d1e1f17482
parent afe68cb992
commit d1e1f17482
5 changed files with 303 additions and 147 deletions
--- a/paddle/fluid/operators/detection/bbox_util.h
+++ b/paddle/fluid/operators/detection/bbox_util.h
@ -149,5 +149,20 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx,
  }
 }

+// Calculate max IoU between each box and ground-truth and
+// each row represents one box
+template <typename T>
+void MaxIoU(const framework::Tensor& iou, framework::Tensor* max_iou) {
+  const T* iou_data = iou.data<T>();
+  int row = iou.dims()[0];
+  int col = iou.dims()[1];
+  T* max_iou_data = max_iou->data<T>();
+  for (int i = 0; i < row; ++i) {
+    const T* v = iou_data + i * col;
+    T max_v = *std::max_element(v, v + col);
+    max_iou_data[i] = max_v;
+  }
+}
+
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@ -2601,7 +2601,9 @@ def generate_proposal_labels(rpn_rois,
                             class_nums=None,
                             use_random=True,
                             is_cls_agnostic=False,
-                             is_cascade_rcnn=False):
+                             is_cascade_rcnn=False,
+                             max_overlap=None,
+                             return_max_overlap=False):
    """

    **Generate Proposal Labels of Faster-RCNN**
@ -2638,25 +2640,29 @@ def generate_proposal_labels(rpn_rois,
        use_random(bool): Use random sampling to choose foreground and background boxes.
        is_cls_agnostic(bool): bbox regression use class agnostic simply which only represent fg and bg boxes.
        is_cascade_rcnn(bool): it will filter some bbox crossing the image's boundary when setting True.
+        max_overlap(Variable): Maximum overlap between each proposal box and ground-truth.
+        return_max_overlap(bool): Whether return the maximum overlap between each sampled RoI and ground-truth.

    Returns:
        tuple:
-        A tuple with format``(rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights)``.
+        A tuple with format``(rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, max_overlap)``.

        - **rois**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4]``. The data type is the same as ``rpn_rois``.
        - **labels_int32**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 1]``. The data type must be int32.
        - **bbox_targets**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The regression targets of all RoIs. The data type is the same as ``rpn_rois``.
        - **bbox_inside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of foreground boxes' regression loss. The data type is the same as ``rpn_rois``.
        - **bbox_outside_weights**: 2-D LoDTensor with shape ``[batch_size_per_im * batch_size, 4 * class_num]``. The weights of regression loss. The data type is the same as ``rpn_rois``.
-
+        - **max_overlap**: 1-D LoDTensor with shape ``[P]``. P is the number of output ``rois``. The maximum overlap between each sampled RoI and ground-truth.

    Examples:
        .. code-block:: python

+            import paddle
            import paddle.fluid as fluid
+            paddle.enable_static()
            rpn_rois = fluid.data(name='rpn_rois', shape=[None, 4], dtype='float32')
-            gt_classes = fluid.data(name='gt_classes', shape=[None, 1], dtype='float32')
-            is_crowd = fluid.data(name='is_crowd', shape=[None, 1], dtype='float32')
+            gt_classes = fluid.data(name='gt_classes', shape=[None, 1], dtype='int32')
+            is_crowd = fluid.data(name='is_crowd', shape=[None, 1], dtype='int32')
            gt_boxes = fluid.data(name='gt_boxes', shape=[None, 4], dtype='float32')
            im_info = fluid.data(name='im_info', shape=[None, 3], dtype='float32')
            rois, labels, bbox, inside_weights, outside_weights = fluid.layers.generate_proposal_labels(
@ -2673,6 +2679,8 @@ def generate_proposal_labels(rpn_rois,
                             'generate_proposal_labels')
    check_variable_and_dtype(is_crowd, 'is_crowd', ['int32'],
                             'generate_proposal_labels')
+    if is_cascade_rcnn:
+        assert max_overlap is not None, "Input max_overlap of generate_proposal_labels should not be None if is_cascade_rcnn is True"

    rois = helper.create_variable_for_type_inference(dtype=rpn_rois.dtype)
    labels_int32 = helper.create_variable_for_type_inference(
@ -2683,22 +2691,28 @@ def generate_proposal_labels(rpn_rois,
        dtype=rpn_rois.dtype)
    bbox_outside_weights = helper.create_variable_for_type_inference(
        dtype=rpn_rois.dtype)
+    max_overlap_with_gt = helper.create_variable_for_type_inference(
+        dtype=rpn_rois.dtype)

+    inputs = {
+        'RpnRois': rpn_rois,
+        'GtClasses': gt_classes,
+        'IsCrowd': is_crowd,
+        'GtBoxes': gt_boxes,
+        'ImInfo': im_info,
+    }
+    if max_overlap is not None:
+        inputs['MaxOverlap'] = max_overlap
    helper.append_op(
        type="generate_proposal_labels",
-        inputs={
-            'RpnRois': rpn_rois,
-            'GtClasses': gt_classes,
-            'IsCrowd': is_crowd,
-            'GtBoxes': gt_boxes,
-            'ImInfo': im_info
-        },
+        inputs=inputs,
        outputs={
            'Rois': rois,
            'LabelsInt32': labels_int32,
            'BboxTargets': bbox_targets,
            'BboxInsideWeights': bbox_inside_weights,
-            'BboxOutsideWeights': bbox_outside_weights
+            'BboxOutsideWeights': bbox_outside_weights,
+            'MaxOverlapWithGT': max_overlap_with_gt
        },
        attrs={
            'batch_size_per_im': batch_size_per_im,
@ -2718,7 +2732,10 @@ def generate_proposal_labels(rpn_rois,
    bbox_targets.stop_gradient = True
    bbox_inside_weights.stop_gradient = True
    bbox_outside_weights.stop_gradient = True
+    max_overlap_with_gt.stop_gradient = True

+    if return_max_overlap:
+        return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights, max_overlap_with_gt
    return rois, labels_int32, bbox_targets, bbox_inside_weights, bbox_outside_weights


--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@ -289,40 +289,39 @@ class TestAnchorGenerator(unittest.TestCase):


 class TestGenerateProposalLabels(unittest.TestCase):
+    def check_out(self, outs):
+        rois = outs[0]
+        labels_int32 = outs[1]
+        bbox_targets = outs[2]
+        bbox_inside_weights = outs[3]
+        bbox_outside_weights = outs[4]
+        assert rois.shape[1] == 4
+        assert rois.shape[0] == labels_int32.shape[0]
+        assert rois.shape[0] == bbox_targets.shape[0]
+        assert rois.shape[0] == bbox_inside_weights.shape[0]
+        assert rois.shape[0] == bbox_outside_weights.shape[0]
+        assert bbox_targets.shape[1] == 4 * self.class_nums
+        assert bbox_inside_weights.shape[1] == 4 * self.class_nums
+        assert bbox_outside_weights.shape[1] == 4 * self.class_nums
+        if len(outs) == 6:
+            max_overlap_with_gt = outs[5]
+            assert max_overlap_with_gt.shape[0] == rois.shape[0]
+
    def test_generate_proposal_labels(self):
        program = Program()
        with program_guard(program):
-            rpn_rois = layers.data(
-                name='rpn_rois',
-                shape=[4, 4],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False)
-            gt_classes = layers.data(
-                name='gt_classes',
-                shape=[6],
-                dtype='int32',
-                lod_level=1,
-                append_batch_size=False)
-            is_crowd = layers.data(
-                name='is_crowd',
-                shape=[6],
-                dtype='int32',
-                lod_level=1,
-                append_batch_size=False)
-            gt_boxes = layers.data(
-                name='gt_boxes',
-                shape=[6, 4],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False)
-            im_info = layers.data(
-                name='im_info',
-                shape=[1, 3],
-                dtype='float32',
-                lod_level=1,
-                append_batch_size=False)
-            class_nums = 5
+            rpn_rois = fluid.data(
+                name='rpn_rois', shape=[4, 4], dtype='float32', lod_level=1)
+            gt_classes = fluid.data(
+                name='gt_classes', shape=[6], dtype='int32', lod_level=1)
+            is_crowd = fluid.data(
+                name='is_crowd', shape=[6], dtype='int32', lod_level=1)
+            gt_boxes = fluid.data(
+                name='gt_boxes', shape=[6, 4], dtype='float32', lod_level=1)
+            im_info = fluid.data(name='im_info', shape=[1, 3], dtype='float32')
+            max_overlap = fluid.data(
+                name='max_overlap', shape=[4], dtype='float32', lod_level=1)
+            self.class_nums = 5
            outs = fluid.layers.generate_proposal_labels(
                rpn_rois=rpn_rois,
                gt_classes=gt_classes,
@ -335,20 +334,27 @@ class TestGenerateProposalLabels(unittest.TestCase):
                bg_thresh_hi=0.5,
                bg_thresh_lo=0.0,
                bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
-                class_nums=class_nums)
+                class_nums=self.class_nums)
+            outs_1 = fluid.layers.generate_proposal_labels(
+                rpn_rois=rpn_rois,
+                gt_classes=gt_classes,
+                is_crowd=is_crowd,
+                gt_boxes=gt_boxes,
+                im_info=im_info,
+                batch_size_per_im=2,
+                fg_fraction=0.5,
+                fg_thresh=0.5,
+                bg_thresh_hi=0.5,
+                bg_thresh_lo=0.0,
+                bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
+                class_nums=self.class_nums,
+                is_cascade_rcnn=True,
+                max_overlap=max_overlap,
+                return_max_overlap=True)
+
+            self.check_out(outs)
+            self.check_out(outs_1)
            rois = outs[0]
-            labels_int32 = outs[1]
-            bbox_targets = outs[2]
-            bbox_inside_weights = outs[3]
-            bbox_outside_weights = outs[4]
-            assert rois.shape[1] == 4
-            assert rois.shape[0] == labels_int32.shape[0]
-            assert rois.shape[0] == bbox_targets.shape[0]
-            assert rois.shape[0] == bbox_inside_weights.shape[0]
-            assert rois.shape[0] == bbox_outside_weights.shape[0]
-            assert bbox_targets.shape[1] == 4 * class_nums
-            assert bbox_inside_weights.shape[1] == 4 * class_nums
-            assert bbox_outside_weights.shape[1] == 4 * class_nums


 class TestGenerateMaskLabels(unittest.TestCase):
--- a/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposal_labels_op.py