You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/python/paddle/nn/layer/extension.py

107 lines
3.9 KiB

# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['RowConv']
from ...fluid.dygraph import layers
from .. import functional as F
class RowConv(layers.Layer):
"""
:alias_main: paddle.nn.RowConv
:alias: paddle.nn.RowConv,paddle.nn.layer.RowConv,paddle.nn.layer.extension.RowConv
**Row-convolution operator**
The row convolution is called lookahead convolution. This operator was
introduced in the following paper for
`DeepSpeech2 <http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf>`_.
The main motivation is that a bidirectional RNN, useful in DeepSpeech like
speech models, learns representation for a sequence by performing a
forward and a backward pass through the entire sequence. However, unlike
unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online
and low-latency setting. The lookahead convolution incorporates information
from future subsequences in a computationally efficient manner to improve
unidirectional recurrent neural networks. The row convolution operator is
different from the 1D sequence convolution, and is computed as follows:
Given an input sequence X of length t and input dimension D, and a filter
(W) of size context * D.
More details about row_conv please refer to the design document
`<https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645>`_ .
Parameters:
num_channels (int): input data's feature size.
future_context_size (int): Future context size. Please note, the shape
of convolution kernel is [future_context_size + 1, D].
param_attr (ParamAttr): Attributes of parameters, including
name, initializer etc. Default: None.
act (str): Non-linear activation to be applied to output variable. Default: None.
dtype (str, optional): Data type, it can be "float32". Default: "float32".
Attributes:
weight (Parameter): shape [future_context_size + 1, D], the learnable
weight (convolution kernel) of this layer.
Returns:
None
Examples:
.. code-block:: python
from paddle import fluid, nn
import paddle.fluid.dygraph as dg
import paddle.nn.functional as F
import numpy as np
batch_size = 4
time_steps = 8
feature_size = 6
context_size = 4
x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32)
place = fluid.CPUPlace()
with dg.guard(place):
x_var = dg.to_variable(x)
conv = nn.RowConv(feature_size, context_size)
y_var = conv(x_var)
y_np = y_var.numpy()
print(y_np.shape)
# (4, 8, 6)
"""
def __init__(self,
num_channels,
future_context_size,
param_attr=None,
act=None,
dtype="float32"):
super(RowConv, self).__init__()
self._dtype = dtype
self._param_attr = param_attr
self._act = act
filter_shape = [future_context_size + 1, num_channels]
self.weight = self.create_parameter(
filter_shape, attr=param_attr, dtype=dtype)
def forward(self, input):
out = F.row_conv(input, self.weight, act=self._act)
return out