add deformable psroi pooling (#17827)

* add deformable psroi pooling * test=develop * test=develop * test=develop modify format * fix bug * test=develop run ci * test=develop add API.spec * add test_layers.py * run ci again * test=develop run ci again * run ci again * test=develop run ci again * test=develop run ci again * test=develop run ci again * add space between two lines * test=develop add space between two lines * test=develop add space between lines * test=develop modify comment in nn.py * test=develop add space between two lines * test=develop add space between two lines * update API.spec * run ci again * test=develop run ci again * rerun ci * test=develop rerun ci * change input shape * run ci * test=develop run ci * modify format of nn.py * test=develop * test=develop * test=develop update API.spec * test=develop fix API doc * modify API comment * modift API comment * test=develop update API.spec * test=develop modify comment * test=develop modift comment * test=develop modift comment * test=develop update API.spec * test=develop modify comment * test=develop add inference in nn.py * test=develop update API.spec * test=develop resolve confict * test=develop update API.spec
6 years ago · 871af28d6c
parent 40885c225b
commit 871af28d6c
7 changed files with 1786 additions and 0 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -239,6 +239,7 @@ paddle.fluid.layers.where (ArgSpec(args=['condition'], varargs=None, keywords=No
 paddle.fluid.layers.sign (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'ccf6bb7912afd2818d24bc45461e807a'))
 paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'deformable_groups', 'im2col_step', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, None, None, None)), ('document', 'c896b66265a60bd3c5510f66e6e02919'))
 paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddings', 'dilations', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None)), ('document', '3f884662ad443d9ecc2b3734b4f61ad6'))
+paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '65b8dbe13e00c4dc8224652f6ff89540'))
 paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '6e19128b46936edf9f3fad77860a1da8'))
 paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'dce69a78638da8f7ad80b1fc00ed2029'))
 paddle.fluid.layers.read_file (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '32181f6037e387fb6e68a5beaafe33b6'))
--- a/paddle/fluid/operators/deformable_psroi_pooling_op.cc
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cc
--- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu
--- a/paddle/fluid/operators/deformable_psroi_pooling_op.h
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -204,6 +204,7 @@ __all__ = [
    'sign',
    'deformable_conv',
    'unfold',
+    'deformable_roi_pooling',
 ]

 kIgnoreIndex = -100
@ -12168,3 +12169,117 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
            "dilations": dilations
        })
    return out
+
+
+def deformable_roi_pooling(input,
+                           rois,
+                           trans,
+                           no_trans=False,
+                           spatial_scale=1.0,
+                           group_size=[1, 1],
+                           pooled_height=1,
+                           pooled_width=1,
+                           part_size=None,
+                           sample_per_part=1,
+                           trans_std=0.1,
+                           position_sensitive=False,
+                           name=None):
+    """
+    Deformable PSROI Pooling Layer
+    
+    Args:
+       input (Variable):The input of Deformable PSROIPooling.The shape of input tensor is 
+                        [N,C,H,W]. Where N is batch size,C is number of input channels,H 
+                        is height of the feature, and W is the width of the feature.
+       rois (Variable): ROIs (Regions of Interest) to pool over.It should be
+                        a 2-D LoDTensor of shape (num_rois, 4), the lod level
+                        is 1. Given as [[x1, y1, x2, y2], ...], (x1, y1) is
+                        the top left coordinates, and (x2, y2) is the bottom
+                        right coordinates.
+       trans (Variable): Offset of features on ROIs while pooling.The format is NCHW, where 
+                         N is number of ROIs, C is number of channels, which indicate the offset distance 
+                         in the x and y directions, H is pooled height, and W is pooled width.
+       no_trans (bool): Whether to add offset to get new value or not while roi pooling, which 
+                          value is True or False. Default: False.
+       spatial_scale (float): Ratio of input feature map height (or width) to raw image height (or width).
+                             Equals the reciprocal of total stride in convolutional layers, Default: 1.0.
+       group_size (list|tuple): The number of groups which input channels are divided.(eg.number of input channels 
+                         is k1*k2*(C+1), which k1 and k2 are group width and height and C+1 is number of output
+                         chanels. eg.(4, 6), which 4 is height of group and 6 is width of group. Default: [1, 1].
+       pooled_height (integer): The pooled output height. Default: 1.
+       pooled_width (integer): The pooled output width. Default: 1.
+       part_size (list|tuple): The height and width of offset, eg.(4, 6), which height is 4 and width is 6, Default: 
+                        if None, default value is [pooled_height, pooled_width].
+       sample_per_part (integer): The number of samples in each bin. Default: 1.
+       trans_std (float): Coefficient of offset. Default: 0.1.
+       position_sensitive (bool): Whether to choose deformable psroi pooling mode or not. Default: False.
+       name (str): Name of layer. Default: None.
+    Returns:
+        Variable: The tensor variable storing the deformable psroi pooling \
+                  result.
+
+
+    Examples:
+      .. code-block:: python
+
+        input = fluid.layers.data(name="input",
+                                  shape=[2, 192, 64, 64], 
+                                  dtype='float32', 
+                                  append_batch_size=False)                   
+        rois = fluid.layers.data(name="rois",
+                                 shape=[4],
+                                 dtype='float32', 
+                                 lod_level=1)
+        trans = fluid.layers.data(name="trans",
+                                  shape=[2, 384, 64, 64], 
+                                  dtype='float32', 
+                                  append_batch_size=False) 
+        x = fluid.layers.nn.deformable_roi_pooling(input=input, 
+                                                     rois=rois, 
+                                                     trans=trans, 
+                                                     no_trans=False,
+                                                     spatial_scale=1.0, 
+                                                     group_size=(1, 1),
+                                                     pooled_height=8,
+                                                     pooled_width=8,
+                                                     part_size=(8, 8),
+                                                     sample_per_part=4, 
+                                                     trans_std=0.1,
+                                                     position_sensitive=False)
+    """
+
+    input_channels = input.shape[1]
+    if position_sensitive == False:
+        output_channels = input_channels
+    else:
+        output_channels = input_channels / pooled_height / pooled_width
+
+    if part_size is None:
+        part_height = pooled_height
+        part_width = pooled_width
+        part_size = [part_height, part_width]
+    part_size = utils.convert_to_list(part_size, 2, 'part_size')
+    group_size = utils.convert_to_list(group_size, 2, 'group_size')
+    helper = LayerHelper('deformable_psroi_pooling', **locals())
+    dtype = helper.input_dtype()
+    output = helper.create_variable_for_type_inference(dtype)
+    top_count = helper.create_variable_for_type_inference(dtype='int32')
+    helper.append_op(
+        type="deformable_psroi_pooling",
+        inputs={"Input": input,
+                "ROIs": rois,
+                "Trans": trans},
+        outputs={"Output": output,
+                 "TopCount": top_count},
+        attrs={
+            "no_trans": no_trans,
+            "spatial_scale": spatial_scale,
+            "output_dim": output_channels,
+            "group_size": group_size,
+            "pooled_height": pooled_height,
+            "pooled_width": pooled_width,
+            "part_size": part_size,
+            "sample_per_part": sample_per_part,
+            "trans_std": trans_std
+        })
+    return output
--- a/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py
+++ b/python/paddle/fluid/tests/unittests/test_deformable_psroi_pooling.py
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@ -1995,6 +1995,35 @@ class TestBook(LayerTest):
            out = layers.unfold(x, [3, 3], 1, 1, 1)
            return (out)

+    def test_deform_roi_pooling(self):
+        with program_guard(fluid.default_main_program(),
+                           fluid.default_startup_program()):
+            input = layers.data(
+                name='input',
+                shape=[2, 3, 32, 32],
+                dtype='float32',
+                append_batch_size=False)
+            rois = layers.data(
+                name="rois", shape=[4], dtype='float32', lod_level=1)
+            trans = layers.data(
+                name="trans",
+                shape=[2, 3, 32, 32],
+                dtype='float32',
+                append_batch_size=False)
+            out = layers.deformable_roi_pooling(
+                input=input,
+                rois=rois,
+                trans=trans,
+                no_trans=False,
+                spatial_scale=1.0,
+                group_size=(1, 1),
+                pooled_height=8,
+                pooled_width=8,
+                part_size=(8, 8),
+                sample_per_part=4,
+                trans_std=0.1)
+        return (out)
+

 if __name__ == '__main__':
    unittest.main()