You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/python/paddle/v2/fluid/layers/detection.py

224 lines
8.5 KiB

# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
All layers just related to the detection neural network.
"""
from ..layer_helper import LayerHelper
from ..framework import Variable
from tensor import concat
from ops import reshape
from operator import mul
import math
__all__ = ['prior_box', ]
def prior_box(inputs,
image,
min_ratio,
max_ratio,
aspect_ratios,
base_size,
steps=None,
step_w=None,
step_h=None,
offset=0.5,
variance=[0.1, 0.1, 0.1, 0.1],
flip=False,
clip=False,
min_sizes=None,
max_sizes=None,
name=None):
"""
**Prior_boxes**
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
The details of this algorithm, please refer the section 2.2 of SSD paper
(SSD: Single Shot MultiBox Detector)<https://arxiv.org/abs/1512.02325>`_ .
Args:
inputs(list): The list of input Variables, the format of all Variables is NCHW.
image(Variable): The input image data of PriorBoxOp, the layout is NCHW.
min_ratio(int): the min ratio of generated prior boxes.
max_ratio(int): the max ratio of generated prior boxes.
aspect_ratios(list): the aspect ratios of generated prior boxes.
The length of input and aspect_ratios must be equal.
base_size(int): the base_size is used to get min_size and max_size
according to min_ratio and max_ratio.
step_w(list, optional, default=None): Prior boxes step across width.
If step_w[i] == 0.0, the prior boxes step across width of the inputs[i]
will be automatically calculated.
step_h(list, optional, default=None): Prior boxes step across height,
If step_h[i] == 0.0, the prior boxes step across height of the inputs[i]
will be automatically calculated.
offset(float, optional, default=0.5): Prior boxes center offset.
variance(list, optional, default=[0.1, 0.1, 0.1, 0.1]): the variances
to be encoded in prior boxes.
flip(bool, optional, default=False): Whether to flip aspect ratios.
clip(bool, optional, default=False): Whether to clip out-of-boundary boxes.
min_sizes(list, optional, default=None): If `len(inputs) <=2`, min_sizes must
be set up, and the length of min_sizes should equal to the length of inputs.
max_sizes(list, optional, default=None): If `len(inputs) <=2`, max_sizes must
be set up, and the length of min_sizes should equal to the length of inputs.
name(str, optional, None): Name of the prior box layer.
Returns:
boxes(Variable): the output prior boxes of PriorBoxOp. The layout is
[num_priors, 4]. num_priors is the total box count of each
position of inputs.
Variances(Variable): the expanded variances of PriorBoxOp. The layout
is [num_priors, 4]. num_priors is the total box count of each
position of inputs
Examples:
.. code-block:: python
prior_boxes(
inputs = [conv1, conv2, conv3, conv4, conv5, conv6],
image = data,
min_ratio = 20, # 0.20
max_ratio = 90, # 0.90
steps = [8., 16., 32., 64., 100., 300.],
aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
base_size = 300,
offset = 0.5,
variance = [0.1,0.1,0.1,0.1],
flip=True,
clip=True)
"""
def _prior_box_(input,
image,
min_sizes,
max_sizes,
aspect_ratios,
variance,
flip=False,
clip=False,
step_w=0.0,
step_h=0.0,
offset=0.5,
name=None):
helper = LayerHelper("prior_box", **locals())
dtype = helper.input_dtype()
box = helper.create_tmp_variable(dtype)
var = helper.create_tmp_variable(dtype)
helper.append_op(
type="prior_box",
inputs={"Input": input,
"Image": image},
outputs={"Boxes": box,
"Variances": var},
attrs={
'min_sizes': min_sizes,
'max_sizes': max_sizes,
'aspect_ratios': aspect_ratios,
'variances': variance,
'flip': flip,
'clip': clip,
'step_w': step_w,
'step_h': step_h,
'offset': offset
})
return box, var
def _reshape_with_axis_(input, axis=1):
if not (axis > 0 and axis < len(input.shape)):
raise ValueError(
"The axis should be smaller than the arity of input and bigger than 0."
)
new_shape = [-1, reduce(mul, input.shape[axis:len(input.shape)], 1)]
out = reshape(x=input, shape=new_shape)
return out
assert isinstance(inputs, list), 'inputs should be a list.'
num_layer = len(inputs)
if num_layer <= 2:
assert min_sizes is not None and max_sizes is not None
assert len(min_sizes) == num_layer and len(max_sizes) == num_layer
else:
min_sizes = []
max_sizes = []
step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2)))
for ratio in xrange(min_ratio, max_ratio + 1, step):
min_sizes.append(base_size * ratio / 100.)
max_sizes.append(base_size * (ratio + step) / 100.)
min_sizes = [base_size * .10] + min_sizes
max_sizes = [base_size * .20] + max_sizes
if aspect_ratios:
if not (isinstance(aspect_ratios, list) and
len(aspect_ratios) == num_layer):
raise ValueError(
'aspect_ratios should be list and the length of inputs '
'and aspect_ratios should be the same.')
if step_h:
if not (isinstance(step_h, list) and len(step_h) == num_layer):
raise ValueError(
'step_h should be list and the length of inputs and '
'step_h should be the same.')
if step_w:
if not (isinstance(step_w, list) and len(step_w) == num_layer):
raise ValueError(
'step_w should be list and the length of inputs and '
'step_w should be the same.')
if steps:
if not (isinstance(steps, list) and len(steps) == num_layer):
raise ValueError(
'steps should be list and the length of inputs and '
'step_w should be the same.')
step_w = steps
step_h = steps
box_results = []
var_results = []
for i, input in enumerate(inputs):
min_size = min_sizes[i]
max_size = max_sizes[i]
aspect_ratio = []
if not isinstance(min_size, list):
min_size = [min_size]
if not isinstance(max_size, list):
max_size = [max_size]
if aspect_ratios:
aspect_ratio = aspect_ratios[i]
if not isinstance(aspect_ratio, list):
aspect_ratio = [aspect_ratio]
box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio,
variance, flip, clip, step_w[i]
if step_w else 0.0, step_h[i]
if step_w else 0.0, offset)
box_results.append(box)
var_results.append(var)
if len(box_results) == 1:
box = box_results[0]
var = var_results[0]
else:
reshaped_boxes = []
reshaped_vars = []
for i in range(len(box_results)):
reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3))
reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3))
box = concat(reshaped_boxes)
var = concat(reshaped_vars)
return box, var