Add row_conv and hsigmoid into paddle.nn(functional and layer) (#23517)
* add approximation for gelu, test=develop * add functional conv * add test and doc for function convs, test=develop * update ConvTransposeOp's InferShape and error message, test=develop * add hsigmoid, row_conv in paddle.nn(functional and layer), test=develop * fix hyperlinks in docstringrevert-22778-infer_var_type
parent
4231d84077
commit
600cb8c828
@ -0,0 +1,219 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from paddle import fluid, nn
|
||||
import paddle.fluid.dygraph as dg
|
||||
import paddle.nn.functional as F
|
||||
import paddle.fluid.initializer as I
|
||||
import numpy as np
|
||||
import unittest
|
||||
|
||||
|
||||
class HSigmoidTestCase(unittest.TestCase):
|
||||
def __init__(self,
|
||||
methodName="runTest",
|
||||
batch_size=4,
|
||||
feature_size=6,
|
||||
num_classes=8,
|
||||
labels=None,
|
||||
path_code=None,
|
||||
path_table=None,
|
||||
is_sparse=False,
|
||||
dtype="float32"):
|
||||
super(HSigmoidTestCase, self).__init__()
|
||||
self.batch_size = batch_size
|
||||
self.feature_size = feature_size
|
||||
self.num_classes = num_classes
|
||||
self.dtype = dtype
|
||||
self.is_sparse = is_sparse
|
||||
|
||||
self.labels = labels
|
||||
self.path_code = path_code
|
||||
self.path_table = path_table
|
||||
self.is_custom = path_code is not None and path_table is not None
|
||||
|
||||
def setUp(self):
|
||||
input_shape = (self.batch_size, self.feature_size)
|
||||
self.input = np.random.uniform(
|
||||
-1, 1, size=input_shape).astype(self.dtype)
|
||||
if self.labels is None:
|
||||
self.labels = np.random.randint(
|
||||
0, self.num_classes, size=(self.batch_size, 1)).astype(np.int64)
|
||||
C = self.num_classes if self.is_custom else self.num_classes - 1
|
||||
self.weight_shape = (C, self.feature_size)
|
||||
self.weight = np.random.randn(*self.weight_shape).astype(self.dtype)
|
||||
self.bias_shape = (C, 1)
|
||||
self.bias = np.random.randn(*self.bias_shape).astype(self.dtype)
|
||||
|
||||
def fluid_layer(self, place):
|
||||
main = fluid.Program()
|
||||
start = fluid.Program()
|
||||
with fluid.unique_name.guard():
|
||||
with fluid.program_guard(main, start):
|
||||
x = fluid.data(
|
||||
"input", [-1, self.feature_size], dtype=self.dtype)
|
||||
label = fluid.data("labels", [-1, 1], dtype="int64")
|
||||
if self.is_custom:
|
||||
path_table = fluid.data(
|
||||
"path_table", [-1, -1], dtype="int64")
|
||||
path_code = fluid.data("path_code", [-1, -1], dtype="int64")
|
||||
else:
|
||||
path_table = path_code = None
|
||||
y = fluid.layers.hsigmoid(
|
||||
x,
|
||||
label,
|
||||
self.num_classes,
|
||||
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||
bias_attr=I.NumpyArrayInitializer(self.bias),
|
||||
path_table=path_table,
|
||||
path_code=path_code,
|
||||
is_custom=self.is_custom,
|
||||
is_sparse=self.is_sparse, )
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(start)
|
||||
feed_dict = {"input": self.input, "labels": self.labels}
|
||||
if self.is_custom:
|
||||
feed_dict["path_code"] = self.path_code
|
||||
feed_dict["path_table"] = self.path_table
|
||||
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y])
|
||||
return y_np
|
||||
|
||||
def functional(self, place):
|
||||
main = fluid.Program()
|
||||
start = fluid.Program()
|
||||
with fluid.unique_name.guard():
|
||||
with fluid.program_guard(main, start):
|
||||
x = fluid.data(
|
||||
"input", [-1, self.feature_size], dtype=self.dtype)
|
||||
label = fluid.data("labels", [-1, 1], dtype="int64")
|
||||
if self.is_custom:
|
||||
path_table = fluid.data(
|
||||
"path_table", [-1, -1], dtype="int64")
|
||||
path_code = fluid.data("path_code", [-1, -1], dtype="int64")
|
||||
else:
|
||||
path_table = path_code = None
|
||||
w = fluid.data("weight", self.weight_shape, dtype=self.dtype)
|
||||
b = fluid.data("bias", self.bias_shape, dtype=self.dtype)
|
||||
y = F.hsigmoid(
|
||||
x,
|
||||
label,
|
||||
w,
|
||||
b,
|
||||
self.num_classes,
|
||||
is_sparse=self.is_sparse,
|
||||
path_table=path_table,
|
||||
path_code=path_code)
|
||||
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(start)
|
||||
feed_dict = {
|
||||
"input": self.input,
|
||||
"labels": self.labels,
|
||||
"weight": self.weight,
|
||||
"bias": self.bias
|
||||
}
|
||||
if self.is_custom:
|
||||
feed_dict["path_code"] = self.path_code
|
||||
feed_dict["path_table"] = self.path_table
|
||||
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y])
|
||||
return y_np
|
||||
|
||||
def nn_layer(self, place):
|
||||
with dg.guard(place):
|
||||
x_var = dg.to_variable(self.input)
|
||||
label_var = dg.to_variable(self.labels)
|
||||
if self.is_custom:
|
||||
path_code_var = dg.to_variable(self.path_code)
|
||||
path_table_var = dg.to_variable(self.path_table)
|
||||
else:
|
||||
path_code_var = path_table_var = None
|
||||
hierarchical_softmax = nn.HSigmoid(
|
||||
self.feature_size,
|
||||
self.num_classes,
|
||||
is_custom=self.is_custom,
|
||||
is_sparse=self.is_sparse,
|
||||
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||
bias_attr=I.NumpyArrayInitializer(self.bias),
|
||||
dtype=self.dtype)
|
||||
y_var = hierarchical_softmax(
|
||||
x_var,
|
||||
label_var,
|
||||
path_table=path_table_var,
|
||||
path_code=path_code_var)
|
||||
y_np = y_var.numpy()
|
||||
return y_np
|
||||
|
||||
def _test_equivalence(self, place):
|
||||
result1 = self.fluid_layer(place)
|
||||
result2 = self.functional(place)
|
||||
result3 = self.nn_layer(place)
|
||||
np.testing.assert_array_almost_equal(result1, result2)
|
||||
np.testing.assert_array_almost_equal(result2, result3)
|
||||
|
||||
def runTest(self):
|
||||
place = fluid.CPUPlace()
|
||||
self._test_equivalence(place)
|
||||
|
||||
|
||||
class HSigmoidTestErrorCase(HSigmoidTestCase):
|
||||
def runTest(self):
|
||||
place = fluid.CPUPlace()
|
||||
with dg.guard(place):
|
||||
with self.assertRaises(ValueError):
|
||||
self.nn_layer()
|
||||
|
||||
def nn_layer(self):
|
||||
x_var = dg.to_variable(self.input)
|
||||
label_var = dg.to_variable(self.labels)
|
||||
if self.is_custom:
|
||||
path_code_var = dg.to_variable(self.path_code)
|
||||
path_table_var = dg.to_variable(self.path_table)
|
||||
else:
|
||||
path_code_var = path_table_var = None
|
||||
hierarchical_softmax = nn.HSigmoid(
|
||||
self.feature_size,
|
||||
self.num_classes,
|
||||
is_custom=self.is_custom,
|
||||
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||
bias_attr=I.NumpyArrayInitializer(self.bias),
|
||||
dtype=self.dtype)
|
||||
y_var = hierarchical_softmax(
|
||||
x_var,
|
||||
label_var,
|
||||
path_table=path_table_var,
|
||||
path_code=path_code_var)
|
||||
y_np = y_var.numpy()
|
||||
return y_np
|
||||
|
||||
|
||||
def load_tests(loader, standard_tests, pattern):
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(HSigmoidTestCase(methodName="runTest"))
|
||||
suite.addTest(
|
||||
HSigmoidTestCase(
|
||||
methodName="runTest",
|
||||
batch_size=4,
|
||||
feature_size=6,
|
||||
num_classes=8,
|
||||
labels=np.array([0, 1, 4, 5]).astype(np.int64),
|
||||
path_table=np.array([(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (
|
||||
0, 1, 4, -1, -1), (0, 2, -1, -1, -1)]).astype(np.int64),
|
||||
path_code=np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
|
||||
1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]).astype(np.int64)))
|
||||
suite.addTest(HSigmoidTestErrorCase(methodName="runTest", num_classes=1))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -0,0 +1,131 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
from paddle import fluid, nn
|
||||
import paddle.fluid.dygraph as dg
|
||||
import paddle.fluid.initializer as I
|
||||
import paddle.nn.functional as F
|
||||
import unittest
|
||||
|
||||
|
||||
class RowConvTestCase(unittest.TestCase):
|
||||
def __init__(self,
|
||||
methodName='runTest',
|
||||
batch_size=4,
|
||||
num_channels=8,
|
||||
time_steps=12,
|
||||
context_size=3,
|
||||
act=None,
|
||||
dtype="float32"):
|
||||
super(RowConvTestCase, self).__init__(methodName=methodName)
|
||||
self.batch_size = batch_size
|
||||
self.num_channels = num_channels
|
||||
self.time_steps = time_steps
|
||||
self.context_size = context_size
|
||||
self.act = act
|
||||
self.dtype = dtype
|
||||
|
||||
def setUp(self):
|
||||
input_shape = (self.batch_size, self.time_steps, self.num_channels)
|
||||
self.input = np.random.uniform(size=input_shape).astype(self.dtype)
|
||||
self.weight_shape = weight_shape = (self.context_size + 1,
|
||||
self.num_channels)
|
||||
self.weight = np.random.uniform(size=weight_shape).astype(self.dtype)
|
||||
|
||||
def fluid_layer(self, place):
|
||||
main = fluid.Program()
|
||||
start = fluid.Program()
|
||||
with fluid.unique_name.guard():
|
||||
with fluid.program_guard(main, start):
|
||||
x = fluid.data(
|
||||
"input", [-1, -1, self.num_channels], dtype=self.dtype)
|
||||
y = fluid.layers.row_conv(
|
||||
x,
|
||||
self.context_size,
|
||||
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||
act=self.act)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(start)
|
||||
y_np, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
|
||||
return y_np
|
||||
|
||||
def functional_declarative(self, place):
|
||||
main = fluid.Program()
|
||||
start = fluid.Program()
|
||||
with fluid.unique_name.guard():
|
||||
with fluid.program_guard(main, start):
|
||||
x = fluid.data(
|
||||
"input", [-1, -1, self.num_channels], dtype=self.dtype)
|
||||
w = fluid.data("weight", self.weight_shape, dtype=self.dtype)
|
||||
y = F.row_conv(x, w, act=self.act)
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(start)
|
||||
y_np, = exe.run(main,
|
||||
feed={"input": self.input,
|
||||
"weight": self.weight},
|
||||
fetch_list=[y])
|
||||
return y_np
|
||||
|
||||
def functional_imperative(self, place):
|
||||
with dg.guard(place):
|
||||
x_var = dg.to_variable(self.input)
|
||||
w_var = dg.to_variable(self.weight)
|
||||
y_var = F.row_conv(x_var, w_var, act=self.act)
|
||||
y_np = y_var.numpy()
|
||||
return y_np
|
||||
|
||||
def nn_layer(self, place):
|
||||
with dg.guard(place):
|
||||
x_var = dg.to_variable(self.input)
|
||||
conv = nn.RowConv(
|
||||
self.num_channels,
|
||||
self.context_size,
|
||||
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||
act=self.act,
|
||||
dtype=self.dtype)
|
||||
y_var = conv(x_var)
|
||||
y_np = y_var.numpy()
|
||||
return y_np
|
||||
|
||||
def _test_equivalence(self, place):
|
||||
result1 = self.fluid_layer(place)
|
||||
result2 = self.functional_declarative(place)
|
||||
result3 = self.functional_imperative(place)
|
||||
result4 = self.nn_layer(place)
|
||||
np.testing.assert_array_almost_equal(result1, result2)
|
||||
np.testing.assert_array_almost_equal(result2, result3)
|
||||
np.testing.assert_array_almost_equal(result3, result4)
|
||||
|
||||
def runTest(self):
|
||||
place = fluid.CPUPlace()
|
||||
self._test_equivalence(place)
|
||||
|
||||
if fluid.core.is_compiled_with_cuda():
|
||||
palce = fluid.CUDAPlace(0)
|
||||
self._test_equivalence(place)
|
||||
|
||||
|
||||
def load_tests(loader, standard_tests, pattern):
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(RowConvTestCase(methodName="runTest"))
|
||||
suite.addTest(RowConvTestCase(methodName="runTest", act="sigmoid"))
|
||||
suite.addTest(
|
||||
RowConvTestCase(
|
||||
methodName="runTest", context_size=5, act="sigmoid"))
|
||||
return suite
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -0,0 +1,103 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__all__ = ["RowConv"]
|
||||
|
||||
from ...fluid.dygraph import layers
|
||||
from .. import functional as F
|
||||
|
||||
|
||||
class RowConv(layers.Layer):
|
||||
"""
|
||||
**Row-convolution operator**
|
||||
|
||||
The row convolution is called lookahead convolution. This operator was
|
||||
introduced in the following paper for
|
||||
`DeepSpeech2 <http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf>`_.
|
||||
|
||||
The main motivation is that a bidirectional RNN, useful in DeepSpeech like
|
||||
speech models, learns representation for a sequence by performing a
|
||||
forward and a backward pass through the entire sequence. However, unlike
|
||||
unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online
|
||||
and low-latency setting. The lookahead convolution incorporates information
|
||||
from future subsequences in a computationally efficient manner to improve
|
||||
unidirectional recurrent neural networks. The row convolution operator is
|
||||
different from the 1D sequence convolution, and is computed as follows:
|
||||
|
||||
Given an input sequence X of length t and input dimension D, and a filter
|
||||
(W) of size context * D.
|
||||
|
||||
More details about row_conv please refer to the design document
|
||||
`<https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645>`_ .
|
||||
|
||||
Parameters:
|
||||
num_channels (int): input data's feature size.
|
||||
future_context_size (int): Future context size. Please note, the shape
|
||||
of convolution kernel is [future_context_size + 1, D].
|
||||
param_attr (ParamAttr): Attributes of parameters, including
|
||||
name, initializer etc. Default: None.
|
||||
act (str): Non-linear activation to be applied to output variable. Default: None.
|
||||
dtype (str, optional): Data type, it can be "float32". Default: "float32".
|
||||
|
||||
Attributes:
|
||||
weight (Parameter): shape [future_context_size + 1, D], the learnable
|
||||
weight (convolution kernel) of this layer.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Examples:
|
||||
.. code-block:: python
|
||||
|
||||
from paddle import fluid, nn
|
||||
import paddle.fluid.dygraph as dg
|
||||
import paddle.nn.functional as F
|
||||
import numpy as np
|
||||
|
||||
batch_size = 4
|
||||
time_steps = 8
|
||||
feature_size = 6
|
||||
context_size = 4
|
||||
|
||||
x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
with dg.guard(place):
|
||||
x_var = dg.to_variable(x)
|
||||
conv = nn.RowConv(feature_size, context_size)
|
||||
y_var = conv(x_var)
|
||||
y_np = y_var.numpy()
|
||||
print(y_np.shape)
|
||||
|
||||
# (4, 8, 6)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
num_channels,
|
||||
future_context_size,
|
||||
param_attr=None,
|
||||
act=None,
|
||||
dtype="float32"):
|
||||
super(RowConv, self).__init__()
|
||||
self._dtype = dtype
|
||||
self._param_attr = param_attr
|
||||
self._act = act
|
||||
|
||||
filter_shape = [future_context_size + 1, num_channels]
|
||||
self.weight = self.create_parameter(
|
||||
filter_shape, attr=param_attr, dtype=dtype)
|
||||
|
||||
def forward(self, input):
|
||||
out = F.row_conv(input, self.weight, act=self._act)
|
||||
return out
|
Loading…
Reference in new issue