Add trace op (#23873)

* add trace op, test=develop * Optimized the implementation of trace op, test=develop * fix a bug of include in trace_op.h, test=develop * move trace API from creation to math, test=develop * modified en doc. test=develop * add complex trace api * add complex sum api, test=develop * modified en doc of complex sum and trace, test=develop * modified doc and trace API, test=develop * modified en doc of trace and sum, test=develop * modified comment in complex kron API, test=develop * OP Should Not Have Unused Input, test=develop * add GetExpectedKernelType, test=develop
5 years ago · 077e5a0fe5
parent fa43d74a3a
commit 077e5a0fe5
11 changed files with 885 additions and 6 deletions
--- a/paddle/fluid/operators/trace_op.cc
+++ b/paddle/fluid/operators/trace_op.cc
@ -0,0 +1,172 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/trace_op.h"
+
+namespace paddle {
+namespace operators {
+
+class TraceOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Input"), true,
+        platform::errors::NotFound("Input of TraceOp is not found."));
+
+    PADDLE_ENFORCE_EQ(
+        ctx->HasOutput("Out"), true,
+        platform::errors::NotFound("Output of TraceOp is not found."));
+
+    int dim1 = ctx->Attrs().Get<int>("dim1");
+    int dim2 = ctx->Attrs().Get<int>("dim2");
+
+    auto x_dims = ctx->GetInputDim("Input");
+
+    int dim1_ = dim1 < 0 ? x_dims.size() + dim1 : dim1;
+    int dim2_ = dim2 < 0 ? x_dims.size() + dim2 : dim2;
+
+    PADDLE_ENFORCE_GE(
+        x_dims.size(), 2,
+        platform::errors::OutOfRange(
+            "trace requires an tensor of at least two dimensions"));
+    PADDLE_ENFORCE_LT(
+        dim1_, x_dims.size(),
+        platform::errors::OutOfRange(
+            "Attr(dim1) is out of range (expected to be in range of [%ld, "
+            "%ld], but got %ld).",
+            -(x_dims.size()), (x_dims.size() - 1), dim1));
+    PADDLE_ENFORCE_LT(
+        dim2_, x_dims.size(),
+        platform::errors::OutOfRange(
+            "Attr(dim2) is out of range (expected to be in range of [%ld, "
+            "%ld], but got %ld).",
+            -(x_dims.size()), (x_dims.size() - 1), dim2));
+    PADDLE_ENFORCE_NE(dim1_, dim2_,
+                      platform::errors::InvalidArgument(
+                          "The dimensions should not be identical "
+                          "%ld vs %ld.",
+                          dim1, dim2));
+
+    auto sizes = vectorize(x_dims);
+    if (x_dims.size() == 2) {
+      sizes.clear();
+      sizes.push_back(1);
+    } else {
+      sizes.erase(sizes.begin() + std::max(dim1_, dim2_));
+      sizes.erase(sizes.begin() + std::min(dim1_, dim2_));
+    }
+    ctx->SetOutputDim("Out", framework::make_ddim(sizes));
+  }
+};
+
+class TraceOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("Input",
+             "(Tensor) The input tensor, from which the diagonals are taken.");
+    AddOutput("Out", "(Tensor) the sum along diagonals of the input tensor");
+    AddAttr<int>(
+        "offset",
+        R"DOC((int, default 0), offset of the diagonal from the main diagonal. Can be both positive and negative. Defaults to 0.
+        )DOC")
+        .SetDefault(0);
+    AddAttr<int>(
+        "dim1",
+        R"DOC((int, default 0), the first dim of the 2-D planes from which the diagonals should be taken. 
+        Can be both positive and negative. Default: 0.
+        )DOC")
+        .SetDefault(-2);
+    AddAttr<int>(
+        "dim2",
+        R"DOC((int, default 1), the second dim of the 2-D planes from which the diagonals should be taken. 
+        Can be both positive and negative. Default: 1.
+        )DOC")
+        .SetDefault(-1);
+    AddComment(R"DOC(
+Trace Operator.
+Return the sum along diagonals of the input tensor.
+The behavior of this operator is similar to how `numpy.trace` works.
+
+If Input is 2-D, returns the sum of diagonal. 
+If Input has larger dimensions, then returns an tensor of diagonals sum, diagonals be taken from
+the 2-D planes specified by dim1 and dim2.
+
+)DOC");
+  }
+};
+class TraceOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE_EQ(
+        ctx->HasInput("Input"), true,
+        platform::errors::NotFound("Input(Input) of TraceOp is not found."));
+    PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("Input")), true,
+                      platform::errors::NotFound(
+                          "Output(Input@GRAD) of TraceGradOp is not found."));
+    ctx->SetOutputDim(framework::GradVarName("Input"),
+                      ctx->GetInputDim("Input"));
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
+                                       ctx, framework::GradVarName("Out")),
+                                   ctx.GetPlace());
+  }
+};
+
+template <typename T>
+class TraceGradOpMaker : public framework::SingleGradOpMaker<T> {
+ public:
+  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
+
+ protected:
+  void Apply(GradOpPtr<T> grad_op) const override {
+    grad_op->SetType("trace_grad");
+    grad_op->SetInput("Input", this->Input("Input"));
+    grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
+    grad_op->SetOutput(framework::GradVarName("Input"),
+                       this->InputGrad("Input"));
+    grad_op->SetAttrMap(this->Attrs());
+  }
+};
+
+DECLARE_NO_NEED_BUFFER_VARS_INFERER(TraceGradNoNeedBufferVarsInference,
+                                    "Input");
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(trace, ops::TraceOp, ops::TraceOpMaker,
+                  ops::TraceGradOpMaker<paddle::framework::OpDesc>,
+                  ops::TraceGradOpMaker<paddle::imperative::OpBase>);
+
+REGISTER_OPERATOR(trace_grad, ops::TraceOpGrad,
+                  ops::TraceGradNoNeedBufferVarsInference);
+REGISTER_OP_CPU_KERNEL(
+    trace, ops::TraceKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::TraceKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::TraceKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::TraceKernel<paddle::platform::CPUDeviceContext, int64_t>);
+REGISTER_OP_CPU_KERNEL(
+    trace_grad, ops::TraceGradKernel<paddle::platform::CPUDeviceContext, int>,
+    ops::TraceGradKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::TraceGradKernel<paddle::platform::CPUDeviceContext, double>,
+    ops::TraceGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
--- a/paddle/fluid/operators/trace_op.cu
+++ b/paddle/fluid/operators/trace_op.cu
@ -0,0 +1,70 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/reduce_ops/cub_reduce.h"
+#include "paddle/fluid/operators/trace_op.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct IdentityFunctor {
+  HOSTDEVICE explicit inline IdentityFunctor() {}
+
+  HOSTDEVICE inline T operator()(const T& x) const { return x; }
+};
+
+template <typename DeviceContext, typename T>
+class TraceCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* input = context.Input<framework::Tensor>("Input");
+    auto* out = context.Output<framework::Tensor>("Out");
+
+    const int64_t offset = context.Attr<int>("offset");
+    const int64_t dim1 = context.Attr<int>("dim1");
+    const int64_t dim2 = context.Attr<int>("dim2");
+
+    T* out_data = out->mutable_data<T>(context.GetPlace());
+    const framework::Tensor diag =
+        Diagonal<DeviceContext, T>(context, input, offset, dim1, dim2);
+    if (diag.numel() > 0) {
+      auto stream = context.cuda_device_context().stream();
+      std::vector<int> reduce_dims;
+      reduce_dims.push_back(out->dims().size());
+      TensorReduce<T, T, cub::Sum, IdentityFunctor<T>>(
+          diag, out, reduce_dims, static_cast<T>(0), cub::Sum(),
+          IdentityFunctor<T>(), stream);
+    }
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace platform = paddle::platform;
+REGISTER_OP_CUDA_KERNEL(
+    trace, ops::TraceCUDAKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::TraceCUDAKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::TraceCUDAKernel<paddle::platform::CUDADeviceContext,
+                         platform::float16>,
+    ops::TraceCUDAKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::TraceCUDAKernel<paddle::platform::CUDADeviceContext, double>);
+REGISTER_OP_CUDA_KERNEL(
+    trace_grad, ops::TraceGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::TraceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::TraceGradKernel<paddle::platform::CUDADeviceContext,
+                         platform::float16>,
+    ops::TraceGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::TraceGradKernel<paddle::platform::CUDADeviceContext, double>);
--- a/paddle/fluid/operators/trace_op.h
+++ b/paddle/fluid/operators/trace_op.h
--- a/python/paddle/init.py
+++ b/python/paddle/init.py
@ -173,6 +173,7 @@ from .tensor.math import erf  #DEFINE_ALIAS
 from .tensor.math import addcmul  #DEFINE_ALIAS
 from .tensor.math import addmm  #DEFINE_ALIAS
 from .tensor.math import clamp  #DEFINE_ALIAS
+from .tensor.math import trace  #DEFINE_ALIAS
 from .tensor.math import kron  #DEFINE_ALIAS
 # from .tensor.random import gaussin        #DEFINE_ALIAS
 # from .tensor.random import uniform        #DEFINE_ALIAS
--- a/python/paddle/complex/tensor/math.py
+++ b/python/paddle/complex/tensor/math.py
@ -19,8 +19,13 @@ from ...fluid import layers
 from ...tensor import math

 __all__ = [
-    'elementwise_add', 'elementwise_sub', 'elementwise_mul', 'elementwise_div',
-    'kron'
+    'elementwise_add',
+    'elementwise_sub',
+    'elementwise_mul',
+    'elementwise_div',
+    'kron',
+    'trace',
+    'sum',
 ]


@ -231,6 +236,106 @@ def elementwise_div(x, y, axis=-1, name=None):
        name=name)


+def trace(input, offset=0, dim1=0, dim2=1, name=None):
+    """
+    The layer to compute the trace for a complex number tensor. input :attr:`input` must be a ComplexVariable. 
+    See the detailed description for the function and other arguments 
+    in :ref:`api_tensor_math_trace` . 
+    
+    Args:
+        input(ComplexVariable): The input ComplexVariable. Must be at least 2-dimensional. 
+            The supported data types include complex64 and complex128.
+        offset(int, optional): Which diagonals in input tensor will be taken. Default: 0 (main diagonals).
+        dim1(int, optional): The first dimension with respect to take diagonal. Default: 0.
+        dim2(int, optional): The second dimension with respect to take diagonal. Default: 1.
+        name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
+    
+    Returns:
+        ComplexVariable: The trace result of input tensor, it's data type is the same as input data type.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            import paddle.fluid.dygraph as dg
+            import numpy as np
+            
+            case1 = np.random.randn(3, 10, 10).astype('float64') + 1j * np.random.randn(3, 10, 10).astype('float64')
+            
+            with dg.guard():
+                case1 = dg.to_variable(case1)
+                data1 = paddle.complex.trace(case1, offset=1, dim1=1, dim2=2) # data1.shape = [3]
+    """
+    complex_variable_exists([input], "trace")
+    real = math.trace(input.real, offset, dim1, dim2, name)
+    imag = math.trace(input.imag, offset, dim1, dim2, name)
+
+    return ComplexVariable(real, imag)
+
+
+def sum(input, dim=None, keep_dim=False, name=None):
+    """
+    The layer to compute the sum for a complex number tensor elements over the given dimension. input :attr:`input` must be a ComplexVariable. 
+    See the detailed description for the function and other arguments 
+    in :ref:`api_tensor_math_sum` . 
+
+    Args:
+        input(ComplexVariable): The input ComplexVariable with any number of dimensions. 
+            The supported data types include complex64 and complex128.
+        dim (list|int, optional): The dimensions along which the sum is performed. If
+            :attr:`None`, sum all elements of :attr:`input` and return a
+            Tensor variable with a single element, otherwise must be in the
+            range :math:`[-rank(input), rank(input))`. If :math:`dim[i] < 0`,
+            the dimension to reduce is :math:`rank + dim[i]`.
+        keep_dim (bool, optional): Whether to reserve the reduced dimension in the
+            output Tensor. The result tensor will have one fewer dimension
+            than the :attr:`input` unless :attr:`keep_dim` is true, default
+            value is False.
+        name(str, optional): The default value is None.  Normally there is no need for
+            user to set this property.  For more information, please refer to :ref:`api_guide_Name`
+
+    Returns:
+        ComplexVariable: Results of summation operation on the specified dim of input tensor,
+        it's data type is the same as input.
+
+    Raises:
+        ValueError: the :attr:`dtype` must be float64 or int64.
+    
+    Examples:
+        .. code-block:: python
+
+            import paddle.complex as cpx
+            import paddle.fluid.dygraph as dg
+            import numpy as np
+
+            with dg.guard():
+                # x is a Tensor variable with following elements:
+                #    [[0.2, 0.3, 0.5, 0.9], 
+                #     [0.1, 0.2, 0.6, 0.7]]
+                # Each example is followed by the corresponding output tensor.
+                x = np.array([[0.2, 0.3, 0.5, 0.9],[0.1, 0.2, 0.6, 0.7]]) + 1j * np.array([[0.3, 0.4, 0.5, 0.2],[0.3, 0.6, 0.8, 0.3]])
+                x = dg.to_variable(x)
+                out1 = cpx.sum(x)  # [3.5+3.4j]
+                out2 = cpx.sum(x, dim=0)  # [0.3+0.6j, 0.5+1.j, 1.1+1.3j, 1.6+0.5j]
+                out3 = cpx.sum(x, dim=-1)  # [1.9+1.4j, 1.6+2.j]
+                out4 = cpx.sum(x, dim=1, keep_dim=True)  # [[1.9+1.4j], [1.6+2.j]]
+
+                # y is a Tensor variable with shape [2, 2, 2] and elements as below:
+                #      [[[1, 2], [3, 4]],
+                #      [[5, 6], [7, 8]]]
+                # Each example is followed by the corresponding output tensor.
+                y = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) + 1j * np.array([[[4, 3], [2, 1]], [[8, 7], [6, 5]]])
+                y = dg.to_variable(y)
+                out5 = cpx.sum(y, dim=[1, 2]) # [10.+10.j, 26.+26.j]
+                out6 = cpx.sum(y, dim=[0, 1]) # [16.+20.j, 20.+16.j]
+
+    """
+    complex_variable_exists([input], "sum")
+    real = math.sum(input.real, dim=dim, keep_dim=keep_dim, name=name)
+    imag = math.sum(input.imag, dim=dim, keep_dim=keep_dim, name=name)
+    return ComplexVariable(real, imag)
+
+
 def kron(x, y, name=None):
    """
    The kronecker product of two complex tensors. At least one of inputs :attr:`x` 
@ -282,7 +387,7 @@ def kron(x, y, name=None):
    complex_variable_exists([x, y], "kron")

    # X = A + Bi, Y = C+Di
-    # kron(A, B) = kron(A, C) - kron(B, D) + (kron(A, D) + kron(B, C))i
+    # kron(X, Y) = kron(A, C) - kron(B, D) + (kron(A, D) + kron(B, C))i
    (a, b) = (x.real, x.imag) if is_complex(x) else (x, None)
    (c, d) = (y.real, y.imag) if is_complex(y) else (y, None)

--- a/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_sum_layer.py
@ -0,0 +1,42 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from numpy.random import random as rand
+import paddle.complex as cpx
+import paddle.fluid as fluid
+import paddle.fluid.dygraph as dg
+
+
+class TestComplexSumLayer(unittest.TestCase):
+    def setUp(self):
+        self._dtype = "float64"
+        self._places = [fluid.CPUPlace()]
+        if fluid.core.is_compiled_with_cuda():
+            self._places.append(fluid.CUDAPlace(0))
+
+    def test_complex_x(self):
+        input = rand([2, 10, 10]).astype(self._dtype) + 1j * rand(
+            [2, 10, 10]).astype(self._dtype)
+        for place in self._places:
+            with dg.guard(place):
+                var_x = dg.to_variable(input)
+                result = cpx.sum(var_x, dim=[1, 2]).numpy()
+                target = np.sum(input, axis=(1, 2))
+                self.assertTrue(np.allclose(result, target))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_complex_trace_layer.py
@ -0,0 +1,42 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from numpy.random import random as rand
+import paddle.complex as cpx
+import paddle.fluid as fluid
+import paddle.fluid.dygraph as dg
+
+
+class TestComplexTraceLayer(unittest.TestCase):
+    def setUp(self):
+        self._dtype = "float64"
+        self._places = [fluid.CPUPlace()]
+        if fluid.core.is_compiled_with_cuda():
+            self._places.append(fluid.CUDAPlace(0))
+
+    def test_complex_x(self):
+        input = rand([2, 20, 2, 3]).astype(self._dtype) + 1j * rand(
+            [2, 20, 2, 3]).astype(self._dtype)
+        for place in self._places:
+            with dg.guard(place):
+                var_x = dg.to_variable(input)
+                result = cpx.trace(var_x, offset=1, dim1=0, dim2=2).numpy()
+                target = np.trace(input, offset=1, axis1=0, axis2=2)
+                self.assertTrue(np.allclose(result, target))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_trace_op.py
+++ b/python/paddle/fluid/tests/unittests/test_trace_op.py
@ -0,0 +1,89 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+from op_test import OpTest
+import paddle.nn.functional as F
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+import paddle.tensor as tensor
+
+
+class TestTraceOp(OpTest):
+    def setUp(self):
+        self.op_type = "trace"
+        self.init_config()
+        self.outputs = {'Out': self.target}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(['Input'], 'Out')
+
+    def init_config(self):
+        self.case = np.random.randn(20, 6).astype('float64')
+        self.inputs = {'Input': self.case}
+        self.attrs = {'offset': 0, 'dim1': 0, 'dim2': 1}
+        self.target = np.trace(self.inputs['Input'])
+
+
+class TestTraceOpCase1(TestTraceOp):
+    def init_config(self):
+        self.case = np.random.randn(2, 20, 2, 3).astype('float32')
+        self.inputs = {'Input': self.case}
+        self.attrs = {'offset': 1, 'dim1': 0, 'dim2': 2}
+        self.target = np.trace(
+            self.inputs['Input'],
+            offset=self.attrs['offset'],
+            axis1=self.attrs['dim1'],
+            axis2=self.attrs['dim2'])
+
+
+class TestTraceOpCase2(TestTraceOp):
+    def init_config(self):
+        self.case = np.random.randn(2, 20, 2, 3).astype('float32')
+        self.inputs = {'Input': self.case}
+        self.attrs = {'offset': -5, 'dim1': 1, 'dim2': -1}
+        self.target = np.trace(
+            self.inputs['Input'],
+            offset=self.attrs['offset'],
+            axis1=self.attrs['dim1'],
+            axis2=self.attrs['dim2'])
+
+
+class TestTraceAPICase(unittest.TestCase):
+    def test_case1(self):
+        case = np.random.randn(2, 20, 2, 3).astype('float32')
+        data1 = fluid.data(name='data1', shape=[2, 20, 2, 3], dtype='float32')
+        out1 = tensor.trace(data1)
+        out2 = tensor.trace(data1, offset=-5, dim1=1, dim2=-1)
+
+        place = core.CPUPlace()
+        exe = fluid.Executor(place)
+        results = exe.run(fluid.default_main_program(),
+                          feed={"data1": case},
+                          fetch_list=[out1, out2],
+                          return_numpy=True)
+        target1 = np.trace(case)
+        target2 = np.trace(case, offset=-5, axis1=1, axis2=-1)
+        self.assertTrue(np.allclose(results[0], target1))
+        self.assertTrue(np.allclose(results[1], target2))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/tensor/init.py
+++ b/python/paddle/tensor/init.py
@ -151,6 +151,7 @@ from .math import erf  #DEFINE_ALIAS
 from .math import addcmul  #DEFINE_ALIAS
 from .math import addmm  #DEFINE_ALIAS
 from .math import clamp  #DEFINE_ALIAS
+from .math import trace  #DEFINE_ALIAS
 from .math import kron  #DEFINE_ALIAS
 # from .random import gaussin        #DEFINE_ALIAS
 # from .random import uniform        #DEFINE_ALIAS
--- a/python/paddle/tensor/creation.py
+++ b/python/paddle/tensor/creation.py
@ -13,7 +13,7 @@
 # limitations under the License.

 from __future__ import print_function
-from ..fluid.framework import Variable, in_dygraph_mode
+from ..fluid.framework import Variable
 from ..fluid.initializer import Constant
 from ..fluid.layers import core
 from ..fluid.layer_helper import LayerHelper
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@ -14,12 +14,13 @@
 """
 math functions
 """
-
 from __future__ import print_function

 from paddle.common_ops_import import *
 from ..fluid import layers
-from ..fluid.framework import core, _varbase_creator
+from ..fluid.framework import core, _varbase_creator, in_dygraph_mode, Variable
+from ..fluid.layer_helper import LayerHelper
+from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype
 from ..fluid.layers.layer_function_generator import _generate_doc_string_
 import sys

@ -111,6 +112,7 @@ __all__ = [
        'addcmul',
        'addmm',
        'clamp',
+        'trace',
        'kron'
 ]

@ -1520,6 +1522,99 @@ def clamp(input, min=None, max=None, output=None, name=None):

    return output

+def trace(input, offset=0, dim1=0, dim2=1, out=None, name=None):
+    """
+    This OP computes the sum along diagonals of the input tensor.
+    
+    If ``input`` is 2D, returns the sum of diagonal. 
+
+    If ``input`` has larger dimensions, then returns an tensor of diagonals sum, diagonals be taken from
+    the 2D planes specified by dim1 and dim2. By default, the 2D planes formed by the first and second dimensions 
+    of the input tensor.
+
+    The argument ``offset`` determines where diagonals are taken from input tensor:
+
+    - If offset = 0, it is the main diagonal.
+    - If offset > 0, it is above the main diagonal.
+    - If offset < 0, it is below the main diagonal.
+    
+    Args:
+        input(Variable): The input tensor. Must be at least 2-dimensional. The input data type should be float32, float64, int32, int64.
+        offset(int, optional): Which diagonals in input tensor will be taken. Default: 0 (main diagonals).
+        dim1(int, optional): The first dimension with respect to take diagonal. Default: 0.
+        dim2(int, optional): The second dimension with respect to take diagonal. Default: 1.
+        name (str, optional): Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
+
+    Returns:
+        Variable: the output data type is the same as input data type.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+            import paddle.fluid.dygraph as dg
+            import numpy as np
+            
+            case1 = np.random.randn(2, 3).astype('float32')
+            case2 = np.random.randn(3, 10, 10).astype('float32')
+            case3 = np.random.randn(3, 10, 5, 10).astype('float32')
+            
+            with dg.guard():
+                case1 = dg.to_variable(case1)
+                case2 = dg.to_variable(case2)
+                case3 = dg.to_variable(case3)
+                data1 = paddle.trace(case1) # data1.shape = [1]
+                data2 = paddle.trace(case2, offset=1, dim1=1, dim2=2) # data2.shape = [3]
+                data3 = paddle.trace(case3, offset=-3, dim1=1, dim2=-1) # data2.shape = [3, 5]
+    """
+    inputs = {'Input': [input]}
+    attrs = {'offset': offset, 'dim1': dim1, 'dim2': dim2}
+
+    def __check_input(input, offset, dim1, dim2):
+        check_dtype(input.dtype, 'Input',
+                    ['int32', 'int64', 'float16', 'float32', 'float64'],
+                    'trace')
+
+        input_shape = list(input.shape)
+        assert len(input_shape) >= 2,                     \
+                "The input must be at least 2-dimensional, "   \
+                "But received Input's dimensional: %s.\n" %  \
+                len(input_shape)
+
+        dim1_ = dim1 if dim1 >= 0 else len(input_shape) + dim1
+        dim2_ = dim2 if dim2 >= 0 else len(input_shape) + dim2
+
+        assert dim1_ < len(input_shape),     \
+            "The argument dim1 is out of range (expected to be in range of [%d, %d], but got %d).\n"  \
+            % (-(len(input_shape)), len(input_shape) - 1, dim1)
+
+        assert dim2_ < len(input_shape),   \
+            "The argument dim2 is out of range (expected to be in range of [%d, %d], but got %d).\n"   \
+            % (-(len(input_shape)), len(input_shape) - 1, dim2)
+
+
+        assert  dim1_ != dim2_,   \
+               "dim1 and dim2 cannot be the same dimension." \
+                "But received dim1 = %d, dim2 = %d\n"%(dim1, dim2)
+
+    if not in_dygraph_mode():
+        __check_input(input, offset, dim1, dim2)
+    helper = LayerHelper('trace', **locals())
+
+    if out is None:
+        out = helper.create_variable_for_type_inference(dtype=input.dtype)
+    else:
+        check_variable_and_dtype(out, 'out', ['float16', 'float32', 'float64', 'int32', 'int64'], 'trace')
+
+    helper.append_op(
+        type='trace',
+        inputs={'Input': [input]},
+        attrs={'offset': offset,
+               'dim1': dim1,
+               'dim2': dim2},
+        outputs={'Out': [out]})
+    return out
+
@templatedoc(op_type="kron")
 def kron(x, y, out=None, name=None):
    """${comment}