add rpn target assign op (#11449)

* Add region proposal network (RPN) target assign operator and Python API for Faster-RCNN.
guochaorong-patch-1
Yuan Gao 7 years ago committed by qingqing01
parent 6a749d126f
commit 50aa6ba6f5

@ -27,7 +27,8 @@ anchor_generator_op.cu)
detection_library(target_assign_op SRCS target_assign_op.cc
target_assign_op.cu)
detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
polygon_box_transform_op.cu)
polygon_box_transform_op.cu)
detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
# Export local libraries to parent
set(DETECTION_LIBRARY ${LOCAL_DETECTION_LIBS} PARENT_SCOPE)

File diff suppressed because it is too large Load Diff

@ -30,6 +30,7 @@ __all__ = [
'detection_output',
'ssd_loss',
'detection_map',
'rpn_target_assign',
'anchor_generator',
]
@ -44,6 +45,135 @@ for _OP in set(__auto__):
globals()[_OP] = generate_layer_fn(_OP)
def rpn_target_assign(loc,
scores,
anchor_box,
gt_box,
rpn_batch_size_per_im=256,
fg_fraction=0.25,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3):
"""
** Target Assign Layer for region proposal network (RPN) in Faster-RCNN detection. **
This layer can be, for given the Intersection-over-Union (IoU) overlap
between anchors and ground truth boxes, to assign classification and
regression targets to each each anchor, these target labels are used for
train RPN. The classification targets is a binary class label (of being
an object or not). Following the paper of Faster-RCNN, the positive labels
are two kinds of anchors: (i) the anchor/anchors with the highest IoU
overlap with a ground-truth box, or (ii) an anchor that has an IoU overlap
higher than rpn_positive_overlap(0.7) with any ground-truth box. Note
that a single ground-truth box may assign positive labels to multiple
anchors. A non-positive anchor is when its IoU ratio is lower than
rpn_negative_overlap (0.3) for all ground-truth boxes. Anchors that are
neither positive nor negative do not contribute to the training objective.
The regression targets are the encoded ground-truth boxes associated with
the positive anchors.
Args:
loc(Variable): A 3-D Tensor with shape [N, M, 4] represents the
predicted locations of M bounding bboxes. N is the batch size,
and each bounding box has four coordinate values and the layout
is [xmin, ymin, xmax, ymax].
scores(Variable): A 3-D Tensor with shape [N, M, C] represents the
predicted confidence predictions. N is the batch size, C is the
class number, M is number of bounding boxes. For each category
there are total M scores which corresponding M bounding boxes.
anchor_box(Variable): A 2-D Tensor with shape [M, 4] holds M boxes,
each box is represented as [xmin, ymin, xmax, ymax],
[xmin, ymin] is the left top coordinate of the anchor box,
if the input is image feature map, they are close to the origin
of the coordinate system. [xmax, ymax] is the right bottom
coordinate of the anchor box.
gt_box (Variable): The ground-truth boudding boxes (bboxes) are a 2D
LoDTensor with shape [Ng, 4], Ng is the total number of ground-truth
bboxes of mini-batch input.
rpn_batch_size_per_im(int): Total number of RPN examples per image.
fg_fraction(float): Target fraction of RoI minibatch that is labeled
foreground (i.e. class > 0), 0-th class is background.
rpn_positive_overlap(float): Minimum overlap required between an anchor
and ground-truth box for the (anchor, gt box) pair to be a positive
example.
rpn_negative_overlap(float): Maximum overlap allowed between an anchor
and ground-truth box for the (anchor, gt box) pair to be a negative
examples.
Returns:
tuple:
A tuple(predicted_scores, predicted_location, target_label,
target_bbox) is returned. The predicted_scores and
predicted_location is the predicted result of the RPN.
The target_label and target_bbox is the ground truth,
respectively. The predicted_location is a 2D Tensor with shape
[F, 4], and the shape of target_bbox is same as the shape of
the predicted_location, F is the number of the foreground
anchors. The predicted_scores is a 2D Tensor with shape
[F + B, 1], and the shape of target_label is same as the shape
of the predicted_scores, B is the number of the background
anchors, the F and B is depends on the input of this operator.
Examples:
.. code-block:: python
loc = layers.data(name='location', shape=[2, 80],
append_batch_size=False, dtype='float32')
scores = layers.data(name='scores', shape=[2, 40],
append_batch_size=False, dtype='float32')
anchor_box = layers.data(name='anchor_box', shape=[20, 4],
append_batch_size=False, dtype='float32')
gt_box = layers.data(name='gt_box', shape=[10, 4],
append_batch_size=False, dtype='float32')
loc_pred, score_pred, loc_target, score_target =
fluid.layers.detection_output(loc=location,
scores=scores,
anchor_box=anchor_box,
gt_box=gt_box)
"""
helper = LayerHelper('rpn_target_assign', **locals())
# 1. Compute the regression target bboxes
target_bbox = box_coder(
prior_box=anchor_box,
target_box=gt_box,
code_type='encode_center_size',
box_normalized=False)
# 2. Compute overlaps between the prior boxes and the gt boxes overlaps
iou = iou_similarity(x=gt_box, y=anchor_box)
# 3. Assign target label to anchors
loc_index = helper.create_tmp_variable(dtype=anchor_box.dtype)
score_index = helper.create_tmp_variable(dtype=anchor_box.dtype)
target_label = helper.create_tmp_variable(dtype=anchor_box.dtype)
helper.append_op(
type="rpn_target_assign",
inputs={'Overlap': iou, },
outputs={
'LocationIndex': loc_index,
'ScoreIndex': score_index,
'TargetLabel': target_label,
},
attrs={
'rpn_batch_size_per_im': rpn_batch_size_per_im,
'rpn_positive_overlap': rpn_positive_overlap,
'rpn_negative_overlap': rpn_negative_overlap,
'fg_fraction': fg_fraction,
})
# 4. Reshape and gather the target entry
scores = nn.reshape(x=scores, shape=(-1, 1))
loc = nn.reshape(x=loc, shape=(-1, 4))
target_label = nn.reshape(x=target_label, shape=(-1, 1))
target_bbox = nn.reshape(x=target_bbox, shape=(-1, 4))
predicted_scores = nn.gather(scores, score_index)
predicted_location = nn.gather(loc, loc_index)
target_label = nn.gather(target_label, score_index)
target_bbox = nn.gather(target_bbox, loc_index)
return predicted_scores, predicted_loc, target_label, target_bbox
def detection_output(loc,
scores,
prior_box,
@ -388,7 +518,6 @@ def target_assign(input,
Returns:
tuple:
A tuple(out, out_weight) is returned. out is a 3D Tensor with
shape [N, P, K], N and P is the same as they are in
`neg_indices`, K is the same as it in input of X. If

@ -0,0 +1,103 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
def rpn_target_assign(iou, rpn_batch_size_per_im, rpn_positive_overlap,
rpn_negative_overlap, fg_fraction):
iou = np.transpose(iou)
anchor_to_gt_max = iou.max(axis=1)
gt_to_anchor_argmax = iou.argmax(axis=0)
gt_to_anchor_max = iou[gt_to_anchor_argmax, np.arange(iou.shape[1])]
anchors_with_max_overlap = np.where(iou == gt_to_anchor_max)[0]
tgt_lbl = np.ones((iou.shape[0], ), dtype=np.int32) * -1
tgt_lbl[anchors_with_max_overlap] = 1
tgt_lbl[anchor_to_gt_max >= rpn_positive_overlap] = 1
num_fg = int(fg_fraction * rpn_batch_size_per_im)
fg_inds = np.where(tgt_lbl == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = np.random.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
tgt_lbl[disable_inds] = -1
fg_inds = np.where(tgt_lbl == 1)[0]
num_bg = rpn_batch_size_per_im - np.sum(tgt_lbl == 1)
bg_inds = np.where(anchor_to_gt_max < rpn_negative_overlap)[0]
if len(bg_inds) > num_bg:
enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
tgt_lbl[enable_inds] = 0
bg_inds = np.where(tgt_lbl == 0)[0]
loc_index = fg_inds
score_index = np.hstack((fg_inds, bg_inds))
tgt_lbl = np.expand_dims(tgt_lbl, axis=1)
return loc_index, score_index, tgt_lbl
class TestRpnTargetAssignOp(OpTest):
def setUp(self):
iou = np.random.random((10, 8)).astype("float32")
self.op_type = "rpn_target_assign"
self.inputs = {'DistMat': iou}
self.attrs = {
'rpn_batch_size_per_im': 256,
'rpn_positive_overlap': 0.95,
'rpn_negative_overlap': 0.3,
'fg_fraction': 0.25,
'fix_seed': True
}
loc_index, score_index, tgt_lbl = rpn_target_assign(iou, 256, 0.95, 0.3,
0.25)
self.outputs = {
'LocationIndex': loc_index,
'ScoreIndex': score_index,
'TargetLabel': tgt_lbl,
}
def test_check_output(self):
self.check_output()
class TestRpnTargetAssignOp2(OpTest):
def setUp(self):
iou = np.random.random((10, 20)).astype("float32")
self.op_type = "rpn_target_assign"
self.inputs = {'DistMat': iou}
self.attrs = {
'rpn_batch_size_per_im': 128,
'rpn_positive_overlap': 0.5,
'rpn_negative_overlap': 0.5,
'fg_fraction': 0.5,
'fix_seed': True
}
loc_index, score_index, tgt_lbl = rpn_target_assign(iou, 128, 0.5, 0.5,
0.5)
self.outputs = {
'LocationIndex': loc_index,
'ScoreIndex': score_index,
'TargetLabel': tgt_lbl,
}
def test_check_output(self):
self.check_output()
if __name__ == '__main__':
unittest.main()
Loading…
Cancel
Save