You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
383 lines
16 KiB
383 lines
16 KiB
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from ...device import get_cudnn_version
|
|
from ...fluid.framework import core, in_dygraph_mode, Variable
|
|
from ...fluid.layer_helper import LayerHelper
|
|
from ...fluid.data_feeder import check_variable_and_dtype
|
|
from ...fluid import dygraph_utils
|
|
import numpy as np
|
|
|
|
# TODO: define specitial functions used in computer vision task
|
|
# from ...fluid.layers import affine_channel #DEFINE_ALIAS
|
|
# from ...fluid.layers import anchor_generator #DEFINE_ALIAS
|
|
# from ...fluid.layers import bipartite_match #DEFINE_ALIAS
|
|
# from ...fluid.layers import box_clip #DEFINE_ALIAS
|
|
# from ...fluid.layers import box_coder #DEFINE_ALIAS
|
|
# from ...fluid.layers import box_decoder_and_assign #DEFINE_ALIAS
|
|
# from ...fluid.layers import collect_fpn_proposals #DEFINE_ALIAS
|
|
# from ...fluid.layers import deformable_roi_pooling #DEFINE_ALIAS
|
|
# from ...fluid.layers import density_prior_box #DEFINE_ALIAS
|
|
# from ...fluid.layers import detection_output #DEFINE_ALIAS
|
|
# from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS
|
|
# from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS
|
|
# from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS
|
|
# from ...fluid.layers import generate_proposals #DEFINE_ALIAS
|
|
# from ...fluid.layers import image_resize #DEFINE_ALIAS
|
|
# from ...fluid.layers import prior_box #DEFINE_ALIAS
|
|
# from ...fluid.layers import prroi_pool #DEFINE_ALIAS
|
|
# from ...fluid.layers import psroi_pool #DEFINE_ALIAS
|
|
# from ...fluid.layers import resize_bilinear #DEFINE_ALIAS
|
|
# from ...fluid.layers import resize_nearest #DEFINE_ALIAS
|
|
# from ...fluid.layers import resize_trilinear #DEFINE_ALIAS
|
|
# from ...fluid.layers import roi_align #DEFINE_ALIAS
|
|
# from ...fluid.layers import roi_pool #DEFINE_ALIAS
|
|
# from ...fluid.layers import space_to_depth #DEFINE_ALIAS
|
|
# from ...fluid.layers import yolo_box #DEFINE_ALIAS
|
|
# from ...fluid.layers import yolov3_loss #DEFINE_ALIAS
|
|
# from ...fluid.layers import fsp_matrix #DEFINE_ALIAS
|
|
# from ...fluid.layers import image_resize_short #DEFINE_ALIAS
|
|
# from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS
|
|
# from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS
|
|
# from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS
|
|
# from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS
|
|
# from ...fluid.layers import shuffle_channel #DEFINE_ALIAS
|
|
|
|
__all__ = [
|
|
'affine_grid',
|
|
'grid_sample',
|
|
'pixel_shuffle'
|
|
]
|
|
|
|
|
|
def affine_grid(theta, out_shape, align_corners=True, name=None):
|
|
"""
|
|
It generates a grid of (x,y) coordinates using the parameters of
|
|
the affine transformation that correspond to a set of points where
|
|
the input feature map should be sampled to produce the transformed
|
|
output feature map.
|
|
|
|
Args:
|
|
theta (Tensor) - A tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters.
|
|
The data type can be float32 or float64.
|
|
out_shape (Tensor | list | tuple): The shape of target output with format [batch_size, channel, height, width].
|
|
``out_shape`` can be a Tensor or a list or tuple. The data
|
|
type must be int32.
|
|
align_corners(bool): Whether to align corners of target feature map and source feature map. Default: True.
|
|
name(str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.
|
|
|
|
Returns:
|
|
Tensor, A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`.
|
|
|
|
Raises:
|
|
ValueError: If the type of arguments is not supported.
|
|
|
|
Examples:
|
|
|
|
.. code-block:: python
|
|
|
|
import paddle
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
paddle.disable_static()
|
|
# theta shape = [1, 2, 3]
|
|
theta = np.array([[[-0.7, -0.4, 0.3],
|
|
[ 0.6, 0.5, 1.5]]]).astype("float32")
|
|
theta_t = paddle.to_tensor(theta)
|
|
y_t = F.affine_grid(
|
|
theta_t,
|
|
[1, 2, 3, 3],
|
|
align_corners=False)
|
|
print(y_t.numpy())
|
|
|
|
#[[[[ 1.0333333 0.76666665]
|
|
# [ 0.76666665 1.0999999 ]
|
|
# [ 0.5 1.4333333 ]]
|
|
#
|
|
# [[ 0.5666667 1.1666666 ]
|
|
# [ 0.3 1.5 ]
|
|
# [ 0.03333333 1.8333334 ]]
|
|
#
|
|
# [[ 0.10000002 1.5666667 ]
|
|
# [-0.16666666 1.9000001 ]
|
|
# [-0.43333334 2.2333333 ]]]]
|
|
"""
|
|
helper = LayerHelper('affine_grid')
|
|
|
|
if not isinstance(theta, Variable):
|
|
raise ValueError("The theta should be a Tensor.")
|
|
check_variable_and_dtype(theta, 'theta', ['float32', 'float64'],
|
|
'affine_grid')
|
|
cudnn_version = get_cudnn_version()
|
|
if cudnn_version is not None and cudnn_version >= 6000 and align_corners:
|
|
use_cudnn = True
|
|
else:
|
|
use_cudnn = False
|
|
|
|
if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \
|
|
isinstance(out_shape, Variable)):
|
|
raise ValueError("The out_shape should be a list, tuple or Tensor.")
|
|
|
|
if in_dygraph_mode():
|
|
_out_shape = out_shape.numpy().tolist() if isinstance(
|
|
out_shape, Variable) else out_shape
|
|
return core.ops.affine_grid(theta, "output_shape", _out_shape,
|
|
"align_corners", align_corners, "use_cudnn",
|
|
use_cudnn)
|
|
|
|
out = helper.create_variable_for_type_inference(theta.dtype)
|
|
ipts = {'Theta': theta}
|
|
attrs = {"align_corners": align_corners, "use_cudnn": use_cudnn}
|
|
if isinstance(out_shape, Variable):
|
|
ipts['OutputShape'] = out_shape
|
|
check_variable_and_dtype(out_shape, 'out_shape', ['int32'],
|
|
'affine_grid')
|
|
else:
|
|
attrs['output_shape'] = out_shape
|
|
|
|
helper.append_op(
|
|
type='affine_grid',
|
|
inputs=ipts,
|
|
outputs={'Output': out},
|
|
attrs=None if len(attrs) == 0 else attrs)
|
|
return out
|
|
|
|
|
|
def grid_sample(x,
|
|
grid,
|
|
mode='bilinear',
|
|
padding_mode='zeros',
|
|
align_corners=True,
|
|
name=None):
|
|
"""
|
|
This operation samples input X by using bilinear interpolation or
|
|
nearest interpolation based on flow field grid, which is usually
|
|
generated by :code:`affine_grid` . The grid of shape [N, H, W, 2]
|
|
is the concatenation of (x, y) coordinates with shape [N, H, W] each,
|
|
where x is indexing the 4th dimension (in width dimension) of input
|
|
data x and y is indexing the 3rd dimension (in height dimension),
|
|
finally results is the bilinear interpolation or nearest value of 4 nearest corner
|
|
points. The output tensor shape will be [N, C, H, W].
|
|
|
|
|
|
Step 1:
|
|
|
|
Get (x, y) grid coordinates and scale to [0, H-1/W-1].
|
|
|
|
.. code-block:: text
|
|
|
|
grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1)
|
|
grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1)
|
|
|
|
Step 2:
|
|
|
|
Indices input data X with grid (x, y) in each [H, W] area, and bilinear
|
|
interpolate point value by 4 nearest points or nearest interpolate point value
|
|
by nearest point.
|
|
|
|
.. code-block:: text
|
|
|
|
wn ------- y_n ------- en
|
|
| | |
|
|
| d_n |
|
|
| | |
|
|
x_w --d_w-- grid--d_e-- x_e
|
|
| | |
|
|
| d_s |
|
|
| | |
|
|
ws ------- y_s ------- wn
|
|
|
|
For bilinear interpolation:
|
|
x_w = floor(x) // west side x coord
|
|
x_e = x_w + 1 // east side x coord
|
|
y_n = floor(y) // north side y coord
|
|
y_s = y_s + 1 // south side y coord
|
|
d_w = grid_x - x_w // distance to west side
|
|
d_e = x_e - grid_x // distance to east side
|
|
d_n = grid_y - y_n // distance to north side
|
|
d_s = y_s - grid_y // distance to south side
|
|
wn = X[:, :, y_n, x_w] // north-west point value
|
|
en = X[:, :, y_n, x_e] // north-east point value
|
|
ws = X[:, :, y_s, x_w] // south-east point value
|
|
es = X[:, :, y_s, x_w] // north-east point value
|
|
|
|
output = wn * d_e * d_s + en * d_w * d_s
|
|
+ ws * d_e * d_n + es * d_w * d_n
|
|
|
|
Args:
|
|
x(Tensor): The input tensor, which is a 4-d tensor with shape
|
|
[N, C, H, W], N is the batch size, C is the channel
|
|
number, H and W is the feature height and width.
|
|
The data type is float32 or float64.
|
|
grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The
|
|
data type is float32 or float64.
|
|
mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'.
|
|
Default: 'bilinear'.
|
|
padding_mode(str, optional) The padding method used when source index
|
|
is out of input images. It can be 'zeros', 'reflection' and 'border'.
|
|
Default: zeros.
|
|
align_corners(bool, optional): If `align_corners` is true, it will projects
|
|
-1 and 1 to the centers of the corner pixels. Otherwise, it will
|
|
projects -1 and 1 to the image edges.
|
|
name(str, optional): For detailed information, please refer
|
|
to :ref:`api_guide_Name`. Usually name is no need to set and
|
|
None by default.
|
|
|
|
Returns:
|
|
Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor.
|
|
|
|
Examples:
|
|
|
|
.. code-block:: python
|
|
|
|
import paddle
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
|
|
# shape=[1, 1, 3, 3]
|
|
x = np.array([[[[-0.6, 0.8, -0.5],
|
|
[-0.5, 0.2, 1.2],
|
|
[ 1.4, 0.3, -0.2]]]]).astype("float64")
|
|
|
|
# grid shape = [1, 3, 4, 2]
|
|
grid = np.array(
|
|
[[[[ 0.2, 0.3],
|
|
[-0.4, -0.3],
|
|
[-0.9, 0.3],
|
|
[-0.9, -0.6]],
|
|
[[ 0.4, 0.1],
|
|
[ 0.9, -0.8],
|
|
[ 0.4, 0.5],
|
|
[ 0.5, -0.2]],
|
|
[[ 0.1, -0.8],
|
|
[-0.3, -1. ],
|
|
[ 0.7, 0.4],
|
|
[ 0.2, 0.8]]]]).astype("float64")
|
|
|
|
|
|
x = paddle.to_tensor(x)
|
|
grid = paddle.to_tensor(grid)
|
|
y_t = F.grid_sample(
|
|
x,
|
|
grid,
|
|
mode='bilinear',
|
|
padding_mode='border',
|
|
align_corners=True)
|
|
print(y_t.numpy())
|
|
|
|
# output shape = [1, 1, 3, 4]
|
|
# [[[[ 0.34 0.016 0.086 -0.448]
|
|
# [ 0.55 -0.076 0.35 0.59 ]
|
|
# [ 0.596 0.38 0.52 0.24 ]]]]
|
|
"""
|
|
helper = LayerHelper("grid_sample", **locals())
|
|
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sample')
|
|
check_variable_and_dtype(grid, 'grid', ['float32', 'float64'],
|
|
'grid_sample')
|
|
|
|
_modes = ['bilinear', 'nearest']
|
|
_padding_modes = ['zeros', 'reflection', 'border']
|
|
if mode not in _modes:
|
|
raise ValueError(
|
|
"The mode of grid sample function should be in {}, but got: {}".
|
|
format(_modes, mode))
|
|
if padding_mode not in _padding_modes:
|
|
raise ValueError(
|
|
"The padding mode of grid sample function should be in {}, but got: {}".
|
|
format(_padding_modes, padding_mode))
|
|
|
|
if not isinstance(align_corners, bool):
|
|
raise ValueError("The align corners should be bool, but got: {}".format(
|
|
align_corners))
|
|
|
|
cudnn_version = get_cudnn_version()
|
|
use_cudnn = False
|
|
if (cudnn_version is not None
|
|
) and align_corners and mode == 'bilinear' and padding_mode == 'zeros':
|
|
use_cudnn = True
|
|
ipts = {'X': x, 'Grid': grid}
|
|
attrs = {
|
|
'mode': mode,
|
|
'padding_mode': padding_mode,
|
|
'align_corners': align_corners,
|
|
'use_cudnn': use_cudnn
|
|
}
|
|
|
|
if in_dygraph_mode():
|
|
attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners',
|
|
align_corners, 'use_cudnn', use_cudnn)
|
|
out = getattr(core.ops, 'grid_sampler')(x, grid, *attrs)
|
|
else:
|
|
out = helper.create_variable_for_type_inference(x.dtype)
|
|
helper.append_op(
|
|
type='grid_sampler',
|
|
inputs=ipts,
|
|
attrs=attrs,
|
|
outputs={'Output': out})
|
|
return out
|
|
|
|
|
|
def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
|
|
"""
|
|
This API implements pixel shuffle operation.
|
|
See more details in :ref:`api_nn_vision_PixelShuffle` .
|
|
Parameters:
|
|
x(Tensor): 4-D tensor, the data type should be float32 or float64.
|
|
upscale_factor(int): factor to increase spatial resolution.
|
|
data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width].
|
|
name (str, optional): The default value is None. Normally there is no need for user to set this property.
|
|
Returns:
|
|
Out(tensor): Reshaped tensor according to the new dimension.
|
|
Raises:
|
|
ValueError: If the square of upscale_factor cannot divide the channels of input.
|
|
Examples:
|
|
.. code-block:: python
|
|
import paddle
|
|
import paddle.nn.functional as F
|
|
import numpy as np
|
|
x = np.random.randn(2, 9, 4, 4).astype(np.float32)
|
|
paddle.disable_static()
|
|
x_var = paddle.to_tensor(x)
|
|
out_var = F.pixel_shuffle(x_var, 3)
|
|
out = out_var.numpy()
|
|
print(out.shape)
|
|
# (2, 1, 12, 12)
|
|
"""
|
|
if not in_dygraph_mode():
|
|
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
|
|
'pixel_shuffle')
|
|
|
|
if not isinstance(upscale_factor, int):
|
|
raise TypeError("upscale factor must be int type")
|
|
|
|
if data_format not in ["NCHW", "NHWC"]:
|
|
raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'."
|
|
"But recevie Attr(data_format): {} ".format(
|
|
data_format))
|
|
|
|
if in_dygraph_mode():
|
|
return core.ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
|
|
"data_format", data_format)
|
|
|
|
helper = LayerHelper("pixel_shuffle", **locals())
|
|
|
|
out = helper.create_variable_for_type_inference(dtype=x.dtype)
|
|
helper.append_op(
|
|
type="pixel_shuffle",
|
|
inputs={"X": x},
|
|
outputs={"Out": out},
|
|
attrs={"upscale_factor": upscale_factor,
|
|
"data_format": data_format})
|
|
return out
|