add op locality_aware_nms, test=develop (#20976)

custom_op_abi
LielinJiang 6 years ago committed by whs
parent fc385777e4
commit 06063b7001

@ -21,6 +21,7 @@ detection_library(iou_similarity_op SRCS iou_similarity_op.cc
iou_similarity_op.cu)
detection_library(mine_hard_examples_op SRCS mine_hard_examples_op.cc)
detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc poly_util.cc gpc.cc)
detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc poly_util.cc gpc.cc)
detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu)
detection_library(density_prior_box_op SRCS density_prior_box_op.cc density_prior_box_op.cu)
detection_library(anchor_generator_op SRCS anchor_generator_op.cc

File diff suppressed because it is too large Load Diff

@ -13,7 +13,7 @@ limitations under the License. */
#include <glog/logging.h>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detection/poly_util.h"
#include "paddle/fluid/operators/detection/nms_util.h"
namespace paddle {
namespace operators {
@ -85,84 +85,6 @@ class MultiClassNMSOp : public framework::OperatorWithKernel {
}
};
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
template <class T>
static inline void GetMaxScoreIndex(
const std::vector<T>& scores, const T threshold, int top_k,
std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
template <class T>
static inline T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
template <class T>
static inline T JaccardOverlap(const T* box1, const T* box2,
const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
T norm = normalized ? static_cast<T>(0.) : static_cast<T>(1.);
T inter_w = inter_xmax - inter_xmin + norm;
T inter_h = inter_ymax - inter_ymin + norm;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <class T>
T PolyIoU(const T* box1, const T* box2, const size_t box_size,
const bool normalized) {
T bbox1_area = PolyArea<T>(box1, box_size, normalized);
T bbox2_area = PolyArea<T>(box2, box_size, normalized);
T inter_area = PolyOverlapArea<T>(box1, box2, box_size, normalized);
if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) {
// If coordinate values are invalid
// if area size <= 0, return 0.
return T(0.);
} else {
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <class T>
void SliceOneClass(const platform::DeviceContext& ctx,
const framework::Tensor& items, const int class_id,

@ -0,0 +1,103 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <utility>
#include <vector>
#include "paddle/fluid/operators/detection/poly_util.h"
namespace paddle {
namespace operators {
template <class T>
bool SortScorePairDescend(const std::pair<float, T>& pair1,
const std::pair<float, T>& pair2) {
return pair1.first > pair2.first;
}
template <class T>
static inline void GetMaxScoreIndex(
const std::vector<T>& scores, const T threshold, int top_k,
std::vector<std::pair<T, int>>* sorted_indices) {
for (size_t i = 0; i < scores.size(); ++i) {
if (scores[i] > threshold) {
sorted_indices->push_back(std::make_pair(scores[i], i));
}
}
// Sort the score pair according to the scores in descending order
std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
SortScorePairDescend<int>);
// Keep top_k scores if needed.
if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
sorted_indices->resize(top_k);
}
}
template <class T>
static inline T BBoxArea(const T* box, const bool normalized) {
if (box[2] < box[0] || box[3] < box[1]) {
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
return static_cast<T>(0.);
} else {
const T w = box[2] - box[0];
const T h = box[3] - box[1];
if (normalized) {
return w * h;
} else {
// If coordinate values are not within range [0, 1].
return (w + 1) * (h + 1);
}
}
}
template <class T>
static inline T JaccardOverlap(const T* box1, const T* box2,
const bool normalized) {
if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] ||
box2[3] < box1[1]) {
return static_cast<T>(0.);
} else {
const T inter_xmin = std::max(box1[0], box2[0]);
const T inter_ymin = std::max(box1[1], box2[1]);
const T inter_xmax = std::min(box1[2], box2[2]);
const T inter_ymax = std::min(box1[3], box2[3]);
T norm = normalized ? static_cast<T>(0.) : static_cast<T>(1.);
T inter_w = inter_xmax - inter_xmin + norm;
T inter_h = inter_ymax - inter_ymin + norm;
const T inter_area = inter_w * inter_h;
const T bbox1_area = BBoxArea<T>(box1, normalized);
const T bbox2_area = BBoxArea<T>(box2, normalized);
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
template <class T>
T PolyIoU(const T* box1, const T* box2, const size_t box_size,
const bool normalized) {
T bbox1_area = PolyArea<T>(box1, box_size, normalized);
T bbox2_area = PolyArea<T>(box2, box_size, normalized);
T inter_area = PolyOverlapArea<T>(box1, box2, box_size, normalized);
if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) {
// If coordinate values are invalid
// if area size <= 0, return 0.
return T(0.);
} else {
return inter_area / (bbox1_area + bbox2_area - inter_area);
}
}
} // namespace operators
} // namespace paddle

@ -53,6 +53,7 @@ __all__ = [
'yolo_box',
'box_clip',
'multiclass_nms',
'locality_aware_nms',
'retinanet_detection_output',
'distribute_fpn_proposals',
'box_decoder_and_assign',
@ -3147,6 +3148,124 @@ def multiclass_nms(bboxes,
return output
def locality_aware_nms(bboxes,
scores,
score_threshold,
nms_top_k,
keep_top_k,
nms_threshold=0.3,
normalized=True,
nms_eta=1.,
background_label=-1,
name=None):
"""
**Local Aware NMS**
`Local Aware NMS <https://arxiv.org/abs/1704.03155>`_ is to do locality-aware non maximum
suppression (LANMS) on boxes and scores.
Firstly, this operator merge box and score according their IOU
(intersection over union). In the NMS step, this operator greedily selects a
subset of detection bounding boxes that have high scores larger than score_threshold,
if providing this threshold, then selects the largest nms_top_k confidences scores
if nms_top_k is larger than -1. Then this operator pruns away boxes that have high
IOU overlap with already selected boxes by adaptive threshold NMS based on parameters
of nms_threshold and nms_eta.
Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
Args:
bboxes (Variable): A 3-D Tensor with shape [N, M, 4 or 8 16 24 32]
represents the predicted locations of M bounding
bboxes, N is the batch size. Each bounding box
has four coordinate values and the layout is
[xmin, ymin, xmax, ymax], when box size equals to 4.
The data type is float32 or float64.
scores (Variable): A 3-D Tensor with shape [N, C, M] represents the
predicted confidence predictions. N is the batch
size, C is the class number, M is number of bounding
boxes. Now only support 1 class. For each category
there are total M scores which corresponding M bounding
boxes. Please note, M is equal to the 2nd dimension of
BBoxes. The data type is float32 or float64.
background_label (int): The index of background label, the background
label will be ignored. If set to -1, then all
categories will be considered. Default: -1
score_threshold (float): Threshold to filter out bounding boxes with
low confidence score. If not provided,
consider all boxes.
nms_top_k (int): Maximum number of detections to be kept according to
the confidences aftern the filtering detections based
on score_threshold.
nms_threshold (float): The threshold to be used in NMS. Default: 0.3
nms_eta (float): The threshold to be used in NMS. Default: 1.0
keep_top_k (int): Number of total bboxes to be kept per image after NMS
step. -1 means keeping all bboxes after NMS step.
normalized (bool): Whether detections are normalized. Default: True
name(str): Name of the locality aware nms op, please refer to :ref:`api_guide_Name` .
Default: None.
Returns:
Variable: A 2-D LoDTensor with shape [No, 6] represents the detections.
Each row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]
or A 2-D LoDTensor with shape [No, 10] represents the detections.
Each row has 10 values:
[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the
total number of detections. If there is no detected boxes for all
images, lod will be set to {1} and Out only contains one value
which is -1.
(After version 1.3, when no boxes detected, the lod is changed
from {0} to {1}). The data type is float32 or float64.
Examples:
.. code-block:: python
import paddle.fluid as fluid
boxes = fluid.data(name='bboxes', shape=[None, 81, 8],
dtype='float32')
scores = fluid.data(name='scores', shape=[None, 1, 81],
dtype='float32')
out = fluid.layers.locality_aware_nms(bboxes=boxes,
scores=scores,
score_threshold=0.5,
nms_top_k=400,
nms_threshold=0.3,
keep_top_k=200,
normalized=False)
"""
shape = scores.shape
assert len(shape) == 3, "dim size of scores must be 3"
assert shape[
1] == 1, "locality_aware_nms only support one class, Tensor score shape must be [N, 1, M]"
helper = LayerHelper('locality_aware_nms', **locals())
output = helper.create_variable_for_type_inference(dtype=bboxes.dtype)
out = {'Out': output}
helper.append_op(
type="locality_aware_nms",
inputs={'BBoxes': bboxes,
'Scores': scores},
attrs={
'background_label': background_label,
'score_threshold': score_threshold,
'nms_top_k': nms_top_k,
'nms_threshold': nms_threshold,
'nms_eta': nms_eta,
'keep_top_k': keep_top_k,
'nms_eta': nms_eta,
'normalized': normalized
},
outputs={'Out': output})
output.stop_gradient = True
return output
def distribute_fpn_proposals(fpn_rois,
min_level,
max_level,

Loading…
Cancel
Save