Add new one hot function in nn.functional (#26183)

* add input.py file * write input.py * fix init file * add unit tests * fix dygraph and input shape * Revert "fix dygraph and input shape" This reverts commit 89ad8664124c1872640b68c0a4418c377ad6aa6f. * fixed pylint * fix deprecated * fix old op * fix old op * set check_dygraph=True * Revert "set check_dygraph=True" This reverts commit a8e93e33281f84e5f6fbc2b4bf05ea3e63611519. * test commit * fix doc and change test file name
5 years ago · 8c48c7daec
parent 6914a12f82
commit 8c48c7daec
5 changed files with 322 additions and 0 deletions
--- a/python/paddle/fluid/input.py
+++ b/python/paddle/fluid/input.py
@ -17,10 +17,12 @@ import warnings
 from .framework import Variable, in_dygraph_mode
 from .layer_helper import LayerHelper
 from .data_feeder import check_variable_and_dtype, check_dtype
+from ..utils import deprecated

 __all__ = ['one_hot', 'embedding']


+@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot')
 def one_hot(input, depth, allow_out_of_range=False):
    """
    :alias_main: paddle.nn.functional.one_hot
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -35,6 +35,7 @@ from . import utils
 from .. import unique_name
 from functools import reduce
 from .. import core
+from ...utils import deprecated
 from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
 import paddle
 from paddle.utils import deprecated
@ -5800,6 +5801,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
    return loss


+@deprecated(since='2.0.0', update_to='paddle.nn.functional.one_hot')
 def one_hot(input, depth, allow_out_of_range=False):
    """

--- a/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_functional_hot_op.py
@ -0,0 +1,207 @@
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import math
+from op_test import OpTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.nn.functional as functional
+import paddle.fluid.framework as framework
+from paddle.fluid.framework import Program, program_guard
+
+
+class TestOneHotOp(OpTest):
+    def setUp(self):
+        self.op_type = 'one_hot_v2'
+        depth = 10
+        depth_np = np.array(10).astype('int32')
+        dimension = 12
+        x_lod = [[4, 1, 3, 3]]
+        x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
+        x = np.array(x).astype('int32').reshape([sum(x_lod[0])])
+
+        out = np.zeros(shape=(np.product(x.shape), depth)).astype('float32')
+
+        for i in range(np.product(x.shape)):
+            out[i, x[i]] = 1.0
+
+        self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np}
+        self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)}
+        self.outputs = {'Out': (out, x_lod)}
+
+    def test_check_output(self):
+        self.check_output(check_dygraph=False)
+
+
+class TestOneHotOp_attr(OpTest):
+    def setUp(self):
+        self.op_type = 'one_hot_v2'
+        depth = 10
+        dimension = 12
+        x_lod = [[4, 1, 3, 3]]
+        x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
+        x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1])
+
+        out = np.zeros(shape=(np.product(x.shape[:-1]), 1,
+                              depth)).astype('float32')
+
+        for i in range(np.product(x.shape)):
+            out[i, 0, x[i]] = 1.0
+
+        self.inputs = {'X': (x, x_lod)}
+        self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth}
+        self.outputs = {'Out': (out, x_lod)}
+
+    def test_check_output(self):
+        self.check_output(check_dygraph=False)
+
+
+class TestOneHotOp_default_dtype(OpTest):
+    def setUp(self):
+        self.op_type = 'one_hot_v2'
+        depth = 10
+        depth_np = np.array(10).astype('int32')
+        dimension = 12
+        x_lod = [[4, 1, 3, 3]]
+        x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
+        x = np.array(x).astype('int32').reshape([sum(x_lod[0])])
+
+        out = np.zeros(shape=(np.product(x.shape), depth)).astype('float32')
+
+        for i in range(np.product(x.shape)):
+            out[i, x[i]] = 1.0
+
+        self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np}
+        self.attrs = {}
+        self.outputs = {'Out': (out, x_lod)}
+
+    def test_check_output(self):
+        self.check_output(check_dygraph=False)
+
+
+class TestOneHotOp_default_dtype_attr(OpTest):
+    def setUp(self):
+        self.op_type = 'one_hot_v2'
+        depth = 10
+        dimension = 12
+        x_lod = [[4, 1, 3, 3]]
+        x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))]
+        x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1])
+
+        out = np.zeros(shape=(np.product(x.shape[:-1]), 1,
+                              depth)).astype('float32')
+
+        for i in range(np.product(x.shape)):
+            out[i, 0, x[i]] = 1.0
+
+        self.inputs = {'X': (x, x_lod)}
+        self.attrs = {'depth': depth}
+        self.outputs = {'Out': (out, x_lod)}
+
+    def test_check_output(self):
+        self.check_output(check_dygraph=False)
+
+
+class TestOneHotOp_exception(unittest.TestCase):
+    def setUp(self):
+        self.op_type = 'one_hot_v2'
+        self.depth = 10
+        self.place = core.CPUPlace()
+        self.dimension = 12
+        self.x = core.LoDTensor()
+        x_lod = [[4, 1, 3, 3]]
+        data = [np.random.randint(11, 20) for i in range(sum(x_lod[0]))]
+        data = np.array(data).astype('int').reshape([sum(x_lod[0]), 1])
+        self.x.set(data, self.place)
+        self.x.set_recursive_sequence_lengths(x_lod)
+
+    def test_check_output(self):
+        program = Program()
+        with program_guard(program):
+            x = fluid.layers.data(
+                name='x', shape=[self.dimension], dtype='float32', lod_level=1)
+            block = program.current_block()
+            one_hot_out = block.create_var(
+                name="one_hot_out",
+                type=core.VarDesc.VarType.LOD_TENSOR,
+                dtype='float32')
+            block.append_op(
+                type='one_hot',
+                inputs={'X': x},
+                attrs={'depth': self.depth},
+                outputs={'Out': one_hot_out})
+            exe = fluid.Executor(self.place)
+
+            def run():
+                exe.run(feed={'x': self.x},
+                        fetch_list=[one_hot_out],
+                        return_numpy=False)
+
+            self.assertRaises(core.EnforceNotMet, run)
+
+
+class TestOneHotOpApi(unittest.TestCase):
+    def test_api(self):
+        num_classes = 10
+        self._run(num_classes)
+
+    def test_api_with_depthTensor(self):
+        num_classes = fluid.layers.assign(input=np.array([10], dtype=np.int32))
+        self._run(num_classes)
+
+    def test_api_with_dygraph(self):
+        num_classes = 10
+        label = np.array(
+            [np.random.randint(0, num_classes - 1)
+             for i in range(6)]).reshape([6, 1])
+        with fluid.dygraph.guard():
+            one_hot_label = functional.one_hot(
+                x=fluid.dygraph.to_variable(label), num_classes=num_classes)
+
+    def _run(self, num_classes):
+        label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+        one_hot_label = functional.one_hot(x=label, num_classes=num_classes)
+
+        place = fluid.CPUPlace()
+        label_data = np.array([np.random.randint(0, 10 - 1)
+                               for i in range(6)]).reshape([6, 1])
+
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+        ret = exe.run(feed={'label': label_data, },
+                      fetch_list=[one_hot_label],
+                      return_numpy=False)
+
+
+class BadInputTestOnehotV2(unittest.TestCase):
+    def test_error(self):
+        with fluid.program_guard(fluid.Program()):
+
+            def test_bad_x():
+                label = fluid.layers.data(
+                    name="label",
+                    shape=[4],
+                    append_batch_size=False,
+                    dtype="float32")
+                one_hot_label = functional.one_hot(x=label, num_classes=4)
+
+            self.assertRaises(TypeError, test_bad_x)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/nn/functional/init.py
+++ b/python/paddle/nn/functional/init.py
@ -198,3 +198,4 @@ from .vision import shuffle_channel  #DEFINE_ALIAS
 from .vision import space_to_depth  #DEFINE_ALIAS
 from .vision import yolo_box  #DEFINE_ALIAS
 from .vision import yolov3_loss  #DEFINE_ALIAS
+from .input import one_hot  #DEFINE_ALIAS
--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@ -0,0 +1,110 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import warnings
+from ...fluid.framework import Variable, in_dygraph_mode
+from ...fluid.layer_helper import LayerHelper
+from ...fluid.layers import core
+from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
+
+__all__ = ['one_hot']
+
+
+def one_hot(x, num_classes, name=None):
+    """
+
+    The operator converts each id in the input 'x' to an one-hot vector with a
+    num_classes length. The value in the vector dimension corresponding to the id
+    is 1, and the value in the remaining dimension is 0.
+
+    The shape of output Tensor is generated by appending num_classes dimension
+    behind the last dimension of the 'x' shape.
+
+    .. code-block:: text
+
+        Example 1:
+
+        input:
+            x.shape = [4]
+            x.data = [1, 1, 3, 0]
+            num_classes = 4
+
+        output:
+            Out.shape = [4, 4]
+            Out.data = [[0., 1., 0., 0.],
+                        [0., 1., 0., 0.],
+                        [0., 0., 0., 1.],
+                        [1., 0., 0., 0.]]
+
+        Example 2:
+
+        input:
+            x.shape = [4]
+            x.data = [1, 1, 5, 0]
+            num_classes = 4
+
+        output: Throw an exception for Illegal value
+            The second dimension in X is 5, which is greater than num_classes,
+            so it throws an exception.
+
+
+    Args:
+        x(Tensor): Tensor with shape :math:`[N_1, N_2, ..., N_k]` ,
+            which contains at least one dimension. The data type is int32 or int64.
+        num_classes(int): An integer defining the num_classes of the one hot dimension. If input 'x'
+            is word id, num_classes is generally the dictionary size.
+
+    Returns:
+        Tensor: The one-hot representations of 'x'. A Tensor with type float32.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle.fluid as fluid
+            # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4].
+            label = fluid.data(name="label", shape=[4, 1], dtype="int64")
+            # label.shape = [4]
+            # label.data = [1, 1, 3, 0]
+            one_hot_label = fluid.one_hot(x=label, num_classes=4)
+            # one_hot_label.shape = [4, 4]
+            # one_hot_label.data = [[0., 1., 0., 0.],
+                                    [0., 1., 0., 0.],
+                                    [0., 0., 0., 1.],
+                                    [1., 0., 0., 0.]]
+    """
+
+    if in_dygraph_mode():
+        return core.ops.one_hot_v2(x, 'depth', num_classes,
+                                   'allow_out_of_range', False)
+    else:
+        check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'one_hot_v2')
+        helper = LayerHelper("one_hot_v2", **locals())
+
+        one_hot_out = helper.create_variable_for_type_inference(dtype='float32')
+        if not isinstance(num_classes, Variable):
+            # user attribute 
+            inputs = {'X': x}
+            attrs = {'depth': num_classes, 'allow_out_of_range': False}
+        else:
+            num_classes.stop_gradient = True
+            inputs = {'X': x, 'depth_tensor': num_classes}
+            attrs = {'allow_out_of_range': False}
+        helper.append_op(
+            type="one_hot_v2",
+            inputs=inputs,
+            attrs=attrs,
+            outputs={'Out': one_hot_out},
+            stop_gradient=True)
+        return one_hot_out