Add row_conv and hsigmoid into paddle.nn(functional and layer) (#23517)
* add approximation for gelu, test=develop * add functional conv * add test and doc for function convs, test=develop * update ConvTransposeOp's InferShape and error message, test=develop * add hsigmoid, row_conv in paddle.nn(functional and layer), test=develop * fix hyperlinks in docstringrevert-22778-infer_var_type
parent
4231d84077
commit
600cb8c828
@ -0,0 +1,219 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from paddle import fluid, nn
|
||||||
|
import paddle.fluid.dygraph as dg
|
||||||
|
import paddle.nn.functional as F
|
||||||
|
import paddle.fluid.initializer as I
|
||||||
|
import numpy as np
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
class HSigmoidTestCase(unittest.TestCase):
|
||||||
|
def __init__(self,
|
||||||
|
methodName="runTest",
|
||||||
|
batch_size=4,
|
||||||
|
feature_size=6,
|
||||||
|
num_classes=8,
|
||||||
|
labels=None,
|
||||||
|
path_code=None,
|
||||||
|
path_table=None,
|
||||||
|
is_sparse=False,
|
||||||
|
dtype="float32"):
|
||||||
|
super(HSigmoidTestCase, self).__init__()
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.feature_size = feature_size
|
||||||
|
self.num_classes = num_classes
|
||||||
|
self.dtype = dtype
|
||||||
|
self.is_sparse = is_sparse
|
||||||
|
|
||||||
|
self.labels = labels
|
||||||
|
self.path_code = path_code
|
||||||
|
self.path_table = path_table
|
||||||
|
self.is_custom = path_code is not None and path_table is not None
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
input_shape = (self.batch_size, self.feature_size)
|
||||||
|
self.input = np.random.uniform(
|
||||||
|
-1, 1, size=input_shape).astype(self.dtype)
|
||||||
|
if self.labels is None:
|
||||||
|
self.labels = np.random.randint(
|
||||||
|
0, self.num_classes, size=(self.batch_size, 1)).astype(np.int64)
|
||||||
|
C = self.num_classes if self.is_custom else self.num_classes - 1
|
||||||
|
self.weight_shape = (C, self.feature_size)
|
||||||
|
self.weight = np.random.randn(*self.weight_shape).astype(self.dtype)
|
||||||
|
self.bias_shape = (C, 1)
|
||||||
|
self.bias = np.random.randn(*self.bias_shape).astype(self.dtype)
|
||||||
|
|
||||||
|
def fluid_layer(self, place):
|
||||||
|
main = fluid.Program()
|
||||||
|
start = fluid.Program()
|
||||||
|
with fluid.unique_name.guard():
|
||||||
|
with fluid.program_guard(main, start):
|
||||||
|
x = fluid.data(
|
||||||
|
"input", [-1, self.feature_size], dtype=self.dtype)
|
||||||
|
label = fluid.data("labels", [-1, 1], dtype="int64")
|
||||||
|
if self.is_custom:
|
||||||
|
path_table = fluid.data(
|
||||||
|
"path_table", [-1, -1], dtype="int64")
|
||||||
|
path_code = fluid.data("path_code", [-1, -1], dtype="int64")
|
||||||
|
else:
|
||||||
|
path_table = path_code = None
|
||||||
|
y = fluid.layers.hsigmoid(
|
||||||
|
x,
|
||||||
|
label,
|
||||||
|
self.num_classes,
|
||||||
|
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||||
|
bias_attr=I.NumpyArrayInitializer(self.bias),
|
||||||
|
path_table=path_table,
|
||||||
|
path_code=path_code,
|
||||||
|
is_custom=self.is_custom,
|
||||||
|
is_sparse=self.is_sparse, )
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
exe.run(start)
|
||||||
|
feed_dict = {"input": self.input, "labels": self.labels}
|
||||||
|
if self.is_custom:
|
||||||
|
feed_dict["path_code"] = self.path_code
|
||||||
|
feed_dict["path_table"] = self.path_table
|
||||||
|
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y])
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def functional(self, place):
|
||||||
|
main = fluid.Program()
|
||||||
|
start = fluid.Program()
|
||||||
|
with fluid.unique_name.guard():
|
||||||
|
with fluid.program_guard(main, start):
|
||||||
|
x = fluid.data(
|
||||||
|
"input", [-1, self.feature_size], dtype=self.dtype)
|
||||||
|
label = fluid.data("labels", [-1, 1], dtype="int64")
|
||||||
|
if self.is_custom:
|
||||||
|
path_table = fluid.data(
|
||||||
|
"path_table", [-1, -1], dtype="int64")
|
||||||
|
path_code = fluid.data("path_code", [-1, -1], dtype="int64")
|
||||||
|
else:
|
||||||
|
path_table = path_code = None
|
||||||
|
w = fluid.data("weight", self.weight_shape, dtype=self.dtype)
|
||||||
|
b = fluid.data("bias", self.bias_shape, dtype=self.dtype)
|
||||||
|
y = F.hsigmoid(
|
||||||
|
x,
|
||||||
|
label,
|
||||||
|
w,
|
||||||
|
b,
|
||||||
|
self.num_classes,
|
||||||
|
is_sparse=self.is_sparse,
|
||||||
|
path_table=path_table,
|
||||||
|
path_code=path_code)
|
||||||
|
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
exe.run(start)
|
||||||
|
feed_dict = {
|
||||||
|
"input": self.input,
|
||||||
|
"labels": self.labels,
|
||||||
|
"weight": self.weight,
|
||||||
|
"bias": self.bias
|
||||||
|
}
|
||||||
|
if self.is_custom:
|
||||||
|
feed_dict["path_code"] = self.path_code
|
||||||
|
feed_dict["path_table"] = self.path_table
|
||||||
|
y_np, = exe.run(main, feed=feed_dict, fetch_list=[y])
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def nn_layer(self, place):
|
||||||
|
with dg.guard(place):
|
||||||
|
x_var = dg.to_variable(self.input)
|
||||||
|
label_var = dg.to_variable(self.labels)
|
||||||
|
if self.is_custom:
|
||||||
|
path_code_var = dg.to_variable(self.path_code)
|
||||||
|
path_table_var = dg.to_variable(self.path_table)
|
||||||
|
else:
|
||||||
|
path_code_var = path_table_var = None
|
||||||
|
hierarchical_softmax = nn.HSigmoid(
|
||||||
|
self.feature_size,
|
||||||
|
self.num_classes,
|
||||||
|
is_custom=self.is_custom,
|
||||||
|
is_sparse=self.is_sparse,
|
||||||
|
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||||
|
bias_attr=I.NumpyArrayInitializer(self.bias),
|
||||||
|
dtype=self.dtype)
|
||||||
|
y_var = hierarchical_softmax(
|
||||||
|
x_var,
|
||||||
|
label_var,
|
||||||
|
path_table=path_table_var,
|
||||||
|
path_code=path_code_var)
|
||||||
|
y_np = y_var.numpy()
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def _test_equivalence(self, place):
|
||||||
|
result1 = self.fluid_layer(place)
|
||||||
|
result2 = self.functional(place)
|
||||||
|
result3 = self.nn_layer(place)
|
||||||
|
np.testing.assert_array_almost_equal(result1, result2)
|
||||||
|
np.testing.assert_array_almost_equal(result2, result3)
|
||||||
|
|
||||||
|
def runTest(self):
|
||||||
|
place = fluid.CPUPlace()
|
||||||
|
self._test_equivalence(place)
|
||||||
|
|
||||||
|
|
||||||
|
class HSigmoidTestErrorCase(HSigmoidTestCase):
|
||||||
|
def runTest(self):
|
||||||
|
place = fluid.CPUPlace()
|
||||||
|
with dg.guard(place):
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
self.nn_layer()
|
||||||
|
|
||||||
|
def nn_layer(self):
|
||||||
|
x_var = dg.to_variable(self.input)
|
||||||
|
label_var = dg.to_variable(self.labels)
|
||||||
|
if self.is_custom:
|
||||||
|
path_code_var = dg.to_variable(self.path_code)
|
||||||
|
path_table_var = dg.to_variable(self.path_table)
|
||||||
|
else:
|
||||||
|
path_code_var = path_table_var = None
|
||||||
|
hierarchical_softmax = nn.HSigmoid(
|
||||||
|
self.feature_size,
|
||||||
|
self.num_classes,
|
||||||
|
is_custom=self.is_custom,
|
||||||
|
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||||
|
bias_attr=I.NumpyArrayInitializer(self.bias),
|
||||||
|
dtype=self.dtype)
|
||||||
|
y_var = hierarchical_softmax(
|
||||||
|
x_var,
|
||||||
|
label_var,
|
||||||
|
path_table=path_table_var,
|
||||||
|
path_code=path_code_var)
|
||||||
|
y_np = y_var.numpy()
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
|
||||||
|
def load_tests(loader, standard_tests, pattern):
|
||||||
|
suite = unittest.TestSuite()
|
||||||
|
suite.addTest(HSigmoidTestCase(methodName="runTest"))
|
||||||
|
suite.addTest(
|
||||||
|
HSigmoidTestCase(
|
||||||
|
methodName="runTest",
|
||||||
|
batch_size=4,
|
||||||
|
feature_size=6,
|
||||||
|
num_classes=8,
|
||||||
|
labels=np.array([0, 1, 4, 5]).astype(np.int64),
|
||||||
|
path_table=np.array([(0, 2, -1, -1, -1), (0, 1, 3, -1, -1), (
|
||||||
|
0, 1, 4, -1, -1), (0, 2, -1, -1, -1)]).astype(np.int64),
|
||||||
|
path_code=np.array([(0, 0, -1, -1, -1), (1, 1, 1, -1, -1), (
|
||||||
|
1, 0, 0, -1, -1), (0, 1, -1, -1, -1)]).astype(np.int64)))
|
||||||
|
suite.addTest(HSigmoidTestErrorCase(methodName="runTest", num_classes=1))
|
||||||
|
return suite
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
@ -0,0 +1,131 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from paddle import fluid, nn
|
||||||
|
import paddle.fluid.dygraph as dg
|
||||||
|
import paddle.fluid.initializer as I
|
||||||
|
import paddle.nn.functional as F
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
|
||||||
|
class RowConvTestCase(unittest.TestCase):
|
||||||
|
def __init__(self,
|
||||||
|
methodName='runTest',
|
||||||
|
batch_size=4,
|
||||||
|
num_channels=8,
|
||||||
|
time_steps=12,
|
||||||
|
context_size=3,
|
||||||
|
act=None,
|
||||||
|
dtype="float32"):
|
||||||
|
super(RowConvTestCase, self).__init__(methodName=methodName)
|
||||||
|
self.batch_size = batch_size
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.time_steps = time_steps
|
||||||
|
self.context_size = context_size
|
||||||
|
self.act = act
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
input_shape = (self.batch_size, self.time_steps, self.num_channels)
|
||||||
|
self.input = np.random.uniform(size=input_shape).astype(self.dtype)
|
||||||
|
self.weight_shape = weight_shape = (self.context_size + 1,
|
||||||
|
self.num_channels)
|
||||||
|
self.weight = np.random.uniform(size=weight_shape).astype(self.dtype)
|
||||||
|
|
||||||
|
def fluid_layer(self, place):
|
||||||
|
main = fluid.Program()
|
||||||
|
start = fluid.Program()
|
||||||
|
with fluid.unique_name.guard():
|
||||||
|
with fluid.program_guard(main, start):
|
||||||
|
x = fluid.data(
|
||||||
|
"input", [-1, -1, self.num_channels], dtype=self.dtype)
|
||||||
|
y = fluid.layers.row_conv(
|
||||||
|
x,
|
||||||
|
self.context_size,
|
||||||
|
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||||
|
act=self.act)
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
exe.run(start)
|
||||||
|
y_np, = exe.run(main, feed={"input": self.input}, fetch_list=[y])
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def functional_declarative(self, place):
|
||||||
|
main = fluid.Program()
|
||||||
|
start = fluid.Program()
|
||||||
|
with fluid.unique_name.guard():
|
||||||
|
with fluid.program_guard(main, start):
|
||||||
|
x = fluid.data(
|
||||||
|
"input", [-1, -1, self.num_channels], dtype=self.dtype)
|
||||||
|
w = fluid.data("weight", self.weight_shape, dtype=self.dtype)
|
||||||
|
y = F.row_conv(x, w, act=self.act)
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
exe.run(start)
|
||||||
|
y_np, = exe.run(main,
|
||||||
|
feed={"input": self.input,
|
||||||
|
"weight": self.weight},
|
||||||
|
fetch_list=[y])
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def functional_imperative(self, place):
|
||||||
|
with dg.guard(place):
|
||||||
|
x_var = dg.to_variable(self.input)
|
||||||
|
w_var = dg.to_variable(self.weight)
|
||||||
|
y_var = F.row_conv(x_var, w_var, act=self.act)
|
||||||
|
y_np = y_var.numpy()
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def nn_layer(self, place):
|
||||||
|
with dg.guard(place):
|
||||||
|
x_var = dg.to_variable(self.input)
|
||||||
|
conv = nn.RowConv(
|
||||||
|
self.num_channels,
|
||||||
|
self.context_size,
|
||||||
|
param_attr=I.NumpyArrayInitializer(self.weight),
|
||||||
|
act=self.act,
|
||||||
|
dtype=self.dtype)
|
||||||
|
y_var = conv(x_var)
|
||||||
|
y_np = y_var.numpy()
|
||||||
|
return y_np
|
||||||
|
|
||||||
|
def _test_equivalence(self, place):
|
||||||
|
result1 = self.fluid_layer(place)
|
||||||
|
result2 = self.functional_declarative(place)
|
||||||
|
result3 = self.functional_imperative(place)
|
||||||
|
result4 = self.nn_layer(place)
|
||||||
|
np.testing.assert_array_almost_equal(result1, result2)
|
||||||
|
np.testing.assert_array_almost_equal(result2, result3)
|
||||||
|
np.testing.assert_array_almost_equal(result3, result4)
|
||||||
|
|
||||||
|
def runTest(self):
|
||||||
|
place = fluid.CPUPlace()
|
||||||
|
self._test_equivalence(place)
|
||||||
|
|
||||||
|
if fluid.core.is_compiled_with_cuda():
|
||||||
|
palce = fluid.CUDAPlace(0)
|
||||||
|
self._test_equivalence(place)
|
||||||
|
|
||||||
|
|
||||||
|
def load_tests(loader, standard_tests, pattern):
|
||||||
|
suite = unittest.TestSuite()
|
||||||
|
suite.addTest(RowConvTestCase(methodName="runTest"))
|
||||||
|
suite.addTest(RowConvTestCase(methodName="runTest", act="sigmoid"))
|
||||||
|
suite.addTest(
|
||||||
|
RowConvTestCase(
|
||||||
|
methodName="runTest", context_size=5, act="sigmoid"))
|
||||||
|
return suite
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
@ -0,0 +1,103 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
__all__ = ["RowConv"]
|
||||||
|
|
||||||
|
from ...fluid.dygraph import layers
|
||||||
|
from .. import functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class RowConv(layers.Layer):
|
||||||
|
"""
|
||||||
|
**Row-convolution operator**
|
||||||
|
|
||||||
|
The row convolution is called lookahead convolution. This operator was
|
||||||
|
introduced in the following paper for
|
||||||
|
`DeepSpeech2 <http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf>`_.
|
||||||
|
|
||||||
|
The main motivation is that a bidirectional RNN, useful in DeepSpeech like
|
||||||
|
speech models, learns representation for a sequence by performing a
|
||||||
|
forward and a backward pass through the entire sequence. However, unlike
|
||||||
|
unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online
|
||||||
|
and low-latency setting. The lookahead convolution incorporates information
|
||||||
|
from future subsequences in a computationally efficient manner to improve
|
||||||
|
unidirectional recurrent neural networks. The row convolution operator is
|
||||||
|
different from the 1D sequence convolution, and is computed as follows:
|
||||||
|
|
||||||
|
Given an input sequence X of length t and input dimension D, and a filter
|
||||||
|
(W) of size context * D.
|
||||||
|
|
||||||
|
More details about row_conv please refer to the design document
|
||||||
|
`<https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645>`_ .
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
num_channels (int): input data's feature size.
|
||||||
|
future_context_size (int): Future context size. Please note, the shape
|
||||||
|
of convolution kernel is [future_context_size + 1, D].
|
||||||
|
param_attr (ParamAttr): Attributes of parameters, including
|
||||||
|
name, initializer etc. Default: None.
|
||||||
|
act (str): Non-linear activation to be applied to output variable. Default: None.
|
||||||
|
dtype (str, optional): Data type, it can be "float32". Default: "float32".
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
weight (Parameter): shape [future_context_size + 1, D], the learnable
|
||||||
|
weight (convolution kernel) of this layer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
from paddle import fluid, nn
|
||||||
|
import paddle.fluid.dygraph as dg
|
||||||
|
import paddle.nn.functional as F
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
batch_size = 4
|
||||||
|
time_steps = 8
|
||||||
|
feature_size = 6
|
||||||
|
context_size = 4
|
||||||
|
|
||||||
|
x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32)
|
||||||
|
|
||||||
|
place = fluid.CPUPlace()
|
||||||
|
with dg.guard(place):
|
||||||
|
x_var = dg.to_variable(x)
|
||||||
|
conv = nn.RowConv(feature_size, context_size)
|
||||||
|
y_var = conv(x_var)
|
||||||
|
y_np = y_var.numpy()
|
||||||
|
print(y_np.shape)
|
||||||
|
|
||||||
|
# (4, 8, 6)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
num_channels,
|
||||||
|
future_context_size,
|
||||||
|
param_attr=None,
|
||||||
|
act=None,
|
||||||
|
dtype="float32"):
|
||||||
|
super(RowConv, self).__init__()
|
||||||
|
self._dtype = dtype
|
||||||
|
self._param_attr = param_attr
|
||||||
|
self._act = act
|
||||||
|
|
||||||
|
filter_shape = [future_context_size + 1, num_channels]
|
||||||
|
self.weight = self.create_parameter(
|
||||||
|
filter_shape, attr=param_attr, dtype=dtype)
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
out = F.row_conv(input, self.weight, act=self._act)
|
||||||
|
return out
|
Loading…
Reference in new issue