add rpn target assign op (#11449)

* Add region proposal network (RPN) target assign operator and Python API for Faster-RCNN.
7 years ago · 50aa6ba6f5
parent 6a749d126f
commit 50aa6ba6f5
4 changed files with 517 additions and 2 deletions
--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@ -27,7 +27,8 @@ anchor_generator_op.cu)
 detection_library(target_assign_op SRCS target_assign_op.cc
 target_assign_op.cu)
 detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
-    polygon_box_transform_op.cu)
+polygon_box_transform_op.cu)
+detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)

 # Export local libraries to parent
 set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)
--- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc
+++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@ -30,6 +30,7 @@ __all__ = [
    'detection_output',
    'ssd_loss',
    'detection_map',
+    'rpn_target_assign',
    'anchor_generator',
 ]

@ -44,6 +45,135 @@ for _OP in set(__auto__):
    globals()[_OP] = generate_layer_fn(_OP)


+def rpn_target_assign(loc,
+                      scores,
+                      anchor_box,
+                      gt_box,
+                      rpn_batch_size_per_im=256,
+                      fg_fraction=0.25,
+                      rpn_positive_overlap=0.7,
+                      rpn_negative_overlap=0.3):
+    """
+    ** Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection. **
+
+    This layer can be, for given the  Intersection-over-Union (IoU) overlap
+    between anchors and ground truth boxes, to assign classification and
+    regression targets to each each anchor, these target labels are used for
+    train RPN. The classification targets is a binary class label (of being
+    an object or not). Following the paper of Faster-RCNN, the positive labels
+    are two kinds of anchors: (i) the anchor/anchors with the highest IoU
+    overlap with a ground-truth box, or (ii) an anchor that has an IoU overlap
+    higher than rpn_positive_overlap(0.7) with any ground-truth box. Note
+    that a single ground-truth box may assign positive labels to multiple
+    anchors. A non-positive anchor is when its IoU ratio is lower than
+    rpn_negative_overlap (0.3) for all ground-truth boxes. Anchors that are
+    neither positive nor negative do not contribute to the training objective.
+    The regression targets are the encoded ground-truth boxes associated with
+    the positive anchors.
+
+    Args:
+        loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
+            predicted locations of M bounding bboxes. N is the batch size,
+            and each bounding box has four coordinate values and the layout
+            is [xmin, ymin, xmax, ymax].
+        scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
+            predicted confidence predictions. N is the batch size, C is the
+            class number, M is number of bounding boxes. For each category
+            there are total M scores which corresponding M bounding boxes.
+        anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
+            each box is represented as [xmin, ymin, xmax, ymax],
+            [xmin, ymin] is the left top coordinate of the anchor box,
+            if the input is image feature map, they are close to the origin
+            of the coordinate system. [xmax, ymax] is the right bottom
+            coordinate of the anchor box.
+        gt_box (Variable): The ground-truth boudding boxes (bboxes) are a 2D
+            LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth
+            bboxes of mini-batch input.
+        rpn_batch_size_per_im(int): Total number of RPN examples per image.
+        fg_fraction(float): Target fraction of RoI minibatch that is labeled
+            foreground (i.e. class > 0), 0-th class is background.
+        rpn_positive_overlap(float): Minimum overlap required between an anchor
+            and ground-truth box for the (anchor, gt box) pair to be a positive
+            example.
+        rpn_negative_overlap(float): Maximum overlap allowed between an anchor
+            and ground-truth box for the (anchor, gt box) pair to be a negative
+            examples.
+
+    Returns:
+        tuple: 
+               A tuple(predicted_scores, predicted_location, target_label,
+               target_bbox) is returned. The predicted_scores and
+               predicted_location is the predicted result of the RPN.
+               The target_label and target_bbox is the ground truth,
+               respectively. The predicted_location is a 2D Tensor with shape
+               [F, 4], and the shape of target_bbox is same as the shape of
+               the predicted_location, F is the number of the foreground
+               anchors. The predicted_scores is a 2D Tensor with shape
+               [F + B, 1], and the shape of target_label is same as the shape
+               of the predicted_scores, B is the number of the background
+               anchors, the F and B is depends on the input of this operator. 
+
+    Examples:
+        .. code-block:: python
+
+        loc = layers.data(name='location', shape=[2, 80],
+                          append_batch_size=False, dtype='float32')
+        scores = layers.data(name='scores', shape=[2, 40],
+                          append_batch_size=False, dtype='float32')
+        anchor_box = layers.data(name='anchor_box', shape=[20, 4],
+                          append_batch_size=False, dtype='float32')
+        gt_box = layers.data(name='gt_box', shape=[10, 4],
+                         append_batch_size=False, dtype='float32')
+        loc_pred, score_pred, loc_target, score_target =
+            fluid.layers.detection_output(loc=location,
+                                          scores=scores,
+                                          anchor_box=anchor_box,
+                                          gt_box=gt_box)
+    """
+
+    helper = LayerHelper('rpn_target_assign', **locals())
+    # 1. Compute the regression target bboxes
+    target_bbox = box_coder(
+        prior_box=anchor_box,
+        target_box=gt_box,
+        code_type='encode_center_size',
+        box_normalized=False)
+
+    # 2. Compute overlaps between the prior boxes and the gt boxes overlaps
+    iou = iou_similarity(x=gt_box, y=anchor_box)
+
+    # 3. Assign target label to anchors
+    loc_index = helper.create_tmp_variable(dtype=anchor_box.dtype)
+    score_index = helper.create_tmp_variable(dtype=anchor_box.dtype)
+    target_label = helper.create_tmp_variable(dtype=anchor_box.dtype)
+    helper.append_op(
+        type="rpn_target_assign",
+        inputs={'Overlap': iou, },
+        outputs={
+            'LocationIndex': loc_index,
+            'ScoreIndex': score_index,
+            'TargetLabel': target_label,
+        },
+        attrs={
+            'rpn_batch_size_per_im': rpn_batch_size_per_im,
+            'rpn_positive_overlap': rpn_positive_overlap,
+            'rpn_negative_overlap': rpn_negative_overlap,
+            'fg_fraction': fg_fraction,
+        })
+
+    # 4. Reshape and gather the target entry
+    scores = nn.reshape(x=scores, shape=(-1, 1))
+    loc = nn.reshape(x=loc, shape=(-1, 4))
+    target_label = nn.reshape(x=target_label, shape=(-1, 1))
+    target_bbox = nn.reshape(x=target_bbox, shape=(-1, 4))
+
+    predicted_scores = nn.gather(scores, score_index)
+    predicted_location = nn.gather(loc, loc_index)
+    target_label = nn.gather(target_label, score_index)
+    target_bbox = nn.gather(target_bbox, loc_index)
+    return predicted_scores, predicted_loc, target_label, target_bbox
+
+
 def detection_output(loc,
                     scores,
                     prior_box,
@ -388,7 +518,6 @@ def target_assign(input,

    Returns:
        tuple: 
-        
               A tuple(out, out_weight) is returned. out is a 3D Tensor with 
               shape [N, P, K], N and P is the same as they are in 
               `neg_indices`, K is the same as it in input of X. If 
--- a/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rpn_target_assign_op.py
@ -0,0 +1,103 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import paddle.fluid.core as core
+from op_test import OpTest
+
+
+def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
+                      rpn_negative_overlap, fg_fraction):
+    iou = np.transpose(iou)
+    anchor_to_gt_max = iou.max(axis=1)
+    gt_to_anchor_argmax = iou.argmax(axis=0)
+    gt_to_anchor_max = iou[gt_to_anchor_argmax, np.arange(iou.shape[1])]
+    anchors_with_max_overlap = np.where(iou == gt_to_anchor_max)[0]
+
+    tgt_lbl = np.ones((iou.shape[0], ), dtype=np.int32) * -1
+    tgt_lbl[anchors_with_max_overlap] = 1
+    tgt_lbl[anchor_to_gt_max >= rpn_positive_overlap] = 1
+
+    num_fg = int(fg_fraction * rpn_batch_size_per_im)
+    fg_inds = np.where(tgt_lbl == 1)[0]
+    if len(fg_inds) > num_fg:
+        disable_inds = np.random.choice(
+            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
+        tgt_lbl[disable_inds] = -1
+    fg_inds = np.where(tgt_lbl == 1)[0]
+
+    num_bg = rpn_batch_size_per_im - np.sum(tgt_lbl == 1)
+    bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
+    if len(bg_inds) > num_bg:
+        enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
+        tgt_lbl[enable_inds] = 0
+    bg_inds = np.where(tgt_lbl == 0)[0]
+
+    loc_index = fg_inds
+    score_index = np.hstack((fg_inds, bg_inds))
+    tgt_lbl = np.expand_dims(tgt_lbl, axis=1)
+    return loc_index, score_index, tgt_lbl
+
+
+class TestRpnTargetAssignOp(OpTest):
+    def setUp(self):
+        iou = np.random.random((10, 8)).astype("float32")
+        self.op_type = "rpn_target_assign"
+        self.inputs = {'DistMat': iou}
+        self.attrs = {
+            'rpn_batch_size_per_im': 256,
+            'rpn_positive_overlap': 0.95,
+            'rpn_negative_overlap': 0.3,
+            'fg_fraction': 0.25,
+            'fix_seed': True
+        }
+        loc_index, score_index, tgt_lbl = rpn_target_assign(iou, 256, 0.95, 0.3,
+                                                            0.25)
+        self.outputs = {
+            'LocationIndex': loc_index,
+            'ScoreIndex': score_index,
+            'TargetLabel': tgt_lbl,
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+
+class TestRpnTargetAssignOp2(OpTest):
+    def setUp(self):
+        iou = np.random.random((10, 20)).astype("float32")
+        self.op_type = "rpn_target_assign"
+        self.inputs = {'DistMat': iou}
+        self.attrs = {
+            'rpn_batch_size_per_im': 128,
+            'rpn_positive_overlap': 0.5,
+            'rpn_negative_overlap': 0.5,
+            'fg_fraction': 0.5,
+            'fix_seed': True
+        }
+        loc_index, score_index, tgt_lbl = rpn_target_assign(iou, 128, 0.5, 0.5,
+                                                            0.5)
+        self.outputs = {
+            'LocationIndex': loc_index,
+            'ScoreIndex': score_index,
+            'TargetLabel': tgt_lbl,
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+
+if __name__ == '__main__':
+    unittest.main()