add dist op (#23503)

* add dist op
5 years ago · 4773e3f541
parent 1c08a2136e
commit 4773e3f541
7 changed files with 674 additions and 3 deletions
--- a/paddle/fluid/operators/dist_op.cc
+++ b/paddle/fluid/operators/dist_op.cc
@ -0,0 +1,119 @@
 // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/operators/dist_op.h"
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 namespace paddle {
 namespace operators {
 class DistOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext *ctx) const override {
    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Dist");
    OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "Dist");
    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Dist");
    ctx->SetOutputDim("Out", {1});
  }
 };
 class DistOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X", "The input Tensor of Dist Op.");
    AddInput("Y", "The Right-hand-side input Tensor of Dist Op.");
    AddOutput("Out",
              "The output of Dist Op, "
              "which is the p-norm of (X - Y)");
    AddAttr<float>("p", "the norm to be computed.").SetDefault(2.0f);
    AddComment(R"DOC(
 Dist Operator.
 Given two tensors X and Y, compute Lp-norm of (X-Y). It is not a norm in a strict sense,
 only as a measure of distance. The shapes of X and Y must be broadcastable. Where, Z = X - Y,
 When p = 0, defining $0^0 = 0$, the zero-norm of Z is simply the number of non-zero elements of z.
 $$
 ||Z||_{0} = \lim_{p \rightarrow 0} \sum_{i=1}^{m} |z_i|^p
 $$
 When p = inf, the inf-norm of Z is the maximum element of Z.
 $$
 ||Z||_\infty=\max_i |z_i|
 $$
 When p = -inf, the negative-inf-norm of Z is the minimum element of Z.
 $$
 ||Z||_{-\infty}=\min_i |z_i|
 $$
 Otherwise, the p-norm of Z follows the formula,
 $$
 ||Z||_{p} = (\sum_{i=i}^{m} |z_i|^p)^{1/p}
 $$
    )DOC");
  }
 };
 class DistOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext *ctx) const override {
    auto x_dims = ctx->GetInputDim("X");
    auto y_dims = ctx->GetInputDim("Y");
    if (ctx->HasOutput(framework::GradVarName("X"))) {
      ctx->SetOutputDim(framework::GradVarName("X"), x_dims);
    }
    if (ctx->HasOutput(framework::GradVarName("Y"))) {
      ctx->SetOutputDim(framework::GradVarName("Y"), y_dims);
    }
  }
 };
 template <typename T>
 class DistGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
 protected:
  void Apply(GradOpPtr<T> op) const override {
    op->SetType(this->ForwardOpType() + "_grad");
    op->SetInput("X", this->Input("X"));
    op->SetInput("Y", this->Input("Y"));
    op->SetInput("Out", this->Output("Out"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
    op->SetOutput(framework::GradVarName("Y"), this->InputGrad("Y"));
    op->SetAttrMap(this->Attrs());
  }
 };
 }  // namespace operators
 }  // namespace paddle
 namespace ops = paddle::operators;
 REGISTER_OPERATOR(dist, ops::DistOp, ops::DistOpMaker,
                  ops::DistGradOpMaker<paddle::framework::OpDesc>,
                  ops::DistGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(dist_grad, ops::DistOpGrad);
 REGISTER_OP_CPU_KERNEL(
    dist, ops::DistKernel<paddle::platform::CPUDeviceContext, float>,
    ops::DistKernel<paddle::platform::CPUDeviceContext, double>);
 REGISTER_OP_CPU_KERNEL(
    dist_grad, ops::DistGradKernel<paddle::platform::CPUDeviceContext, float>,
    ops::DistGradKernel<paddle::platform::CPUDeviceContext, double>)
--- a/paddle/fluid/operators/dist_op.cu
+++ b/paddle/fluid/operators/dist_op.cu
@ -0,0 +1,23 @@
 // Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/operators/dist_op.h"
 namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(
    dist, ops::DistKernel<paddle::platform::CUDADeviceContext, float>,
    ops::DistKernel<paddle::platform::CUDADeviceContext, double>);
 REGISTER_OP_CUDA_KERNEL(
    dist_grad, ops::DistGradKernel<paddle::platform::CUDADeviceContext, float>,
    ops::DistGradKernel<paddle::platform::CUDADeviceContext, double>);
--- a/paddle/fluid/operators/dist_op.h
+++ b/paddle/fluid/operators/dist_op.h
--- a/python/paddle/init.py
+++ b/python/paddle/init.py
@ -152,7 +152,7 @@ from .tensor.linalg import matmul  #DEFINE_ALIAS
 # from .tensor.linalg import einsum   #DEFINE_ALIAS
 # from .tensor.linalg import morm   #DEFINE_ALIAS
 # from .tensor.linalg import transpose   #DEFINE_ALIAS
-# from .tensor.linalg import dist   #DEFINE_ALIAS
+from .tensor.linalg import dist  #DEFINE_ALIAS
 # from .tensor.linalg import t   #DEFINE_ALIAS
 # from .tensor.linalg import cross   #DEFINE_ALIAS
 # from .tensor.linalg import cholesky   #DEFINE_ALIAS
--- a/python/paddle/fluid/tests/unittests/test_dist_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_op.py
@ -0,0 +1,165 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 def dist(x, y, p):
    if p == 0.:
        out = np.count_nonzero(x - y)
    elif p == float("inf"):
        out = np.max(np.abs(x - y))
    elif p == float("-inf"):
        out = np.min(np.abs(x - y))
    else:
        out = np.power(np.sum(np.power(np.abs(x - y), p)), 1.0 / p)
    return np.array(out).astype(x.dtype)
 class TestDistOp(OpTest):
    def setUp(self):
        self.op_type = 'dist'
        self.attrs = {}
        self.init_case()
        self.inputs = {
            "X": np.random.random(self.x_shape).astype("float64"),
            "Y": np.random.random(self.y_shape).astype("float64")
        }
        self.attrs["p"] = self.p
        self.outputs = {
            "Out": dist(self.inputs["X"], self.inputs["Y"], self.attrs["p"])
        }
        self.gradient = self.calc_gradient()
    def init_case(self):
        self.x_shape = (120)
        self.y_shape = (120)
        self.p = 0.
    def calc_gradient(self):
        x = self.inputs["X"]
        y = self.inputs["Y"]
        p = self.attrs["p"]
        if p == 0:
            grad = np.zeros(x.shape)
        elif p in [float("inf"), float("-inf")]:
            norm = dist(x, y, p)
            x_minux_y_abs = np.abs(x - y)
            grad = np.sign(x - y)
            grad[x_minux_y_abs != norm] = 0
        else:
            norm = dist(x, y, p)
            grad = np.power(norm, 1 - p) * np.power(np.abs(x - y),
                                                    p - 1) * np.sign(x - y)
        def get_reduce_dims(x, y):
            x_reduce_dims = []
            y_reduce_dims = []
            if x.ndim >= y.ndim:
                y_reshape = tuple([1] * (x.ndim - y.ndim) + list(y.shape))
                y = y.reshape(y_reshape)
            else:
                x_reshape = tuple([1] * (y.ndim - x.ndim) + list(x.shape))
                x = x.reshape(x_reshape)
            for i in range(x.ndim):
                if x.shape[i] > y.shape[i]:
                    y_reduce_dims.append(i)
                elif x.shape[i] < y.shape[i]:
                    x_reduce_dims.append(i)
            return x_reduce_dims, y_reduce_dims
        x_reduce_dims, y_reduce_dims = get_reduce_dims(x, y)
        if len(x_reduce_dims) != 0:
            x_grad = np.sum(grad, tuple(x_reduce_dims)).reshape(x.shape)
        else:
            x_grad = grad
        if len(y_reduce_dims) != 0:
            y_grad = -np.sum(grad, tuple(y_reduce_dims)).reshape(y.shape)
        else:
            y_grad = -grad
        return x_grad, y_grad
    def test_check_output(self):
        self.check_output()
    def test_check_grad(self):
        self.check_grad(["X", "Y"], "Out", user_defined_grads=self.gradient)
 class TestDistOpCase1(TestDistOp):
    def init_case(self):
        self.x_shape = (3, 5, 5, 6)
        self.y_shape = (5, 5, 6)
        self.p = 1.
 class TestDistOpCase2(TestDistOp):
    def init_case(self):
        self.x_shape = (10, 10)
        self.y_shape = (4, 10, 10)
        self.p = 2.
 class TestDistOpCase3(TestDistOp):
    def init_case(self):
        self.x_shape = (15, 10)
        self.y_shape = (15, 10)
        self.p = float("inf")
 class TestDistOpCase4(TestDistOp):
    def init_case(self):
        self.x_shape = (2, 3, 4, 5, 8)
        self.y_shape = (3, 1, 5, 8)
        self.p = float("-inf")
 class TestDistOpCase5(TestDistOp):
    def init_case(self):
        self.x_shape = (4, 1, 4, 8)
        self.y_shape = (2, 2, 1, 4, 4, 8)
        self.p = 1.5
 class TestDistAPI(unittest.TestCase):
    def test_api(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            x = fluid.data(name='x', shape=[2, 3, 4, 5], dtype='float64')
            y = fluid.data(name='y', shape=[3, 1, 5], dtype='float64')
            p = 2
            x_i = np.random.random((2, 3, 4, 5)).astype("float64")
            y_i = np.random.random((3, 1, 5)).astype("float64")
            result = paddle.dist(x, y, p)
            place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
            ) else fluid.CPUPlace()
            exe = fluid.Executor(place)
            out = exe.run(fluid.default_main_program(),
                          feed={'x': x_i,
                                'y': y_i},
                          fetch_list=[result])
            self.assertTrue(np.allclose(dist(x_i, y_i, p), out[0]))
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/tensor/init.py
+++ b/python/paddle/tensor/init.py
@ -127,7 +127,7 @@ from .linalg import matmul  #DEFINE_ALIAS
 # from .linalg import einsum   #DEFINE_ALIAS
 # from .linalg import morm   #DEFINE_ALIAS
 # from .linalg import transpose   #DEFINE_ALIAS
-# from .linalg import dist   #DEFINE_ALIAS
+from .linalg import dist  #DEFINE_ALIAS
 # from .linalg import t   #DEFINE_ALIAS
 # from .linalg import cross   #DEFINE_ALIAS
 # from .linalg import cholesky   #DEFINE_ALIAS
--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.common_ops_import import *
 from ..fluid.layer_helper import LayerHelper
 from ..fluid.data_feeder import check_variable_and_dtype, check_type
 from ..fluid.framework import in_dygraph_mode
 # TODO: define functions of linear algebra   
 __all__ = [
@ -20,7 +23,7 @@ __all__ = [
    #  'einsum',
    #  'morm',
    #  'transpose',
-    #  'dist',
+    'dist',
    #  't',
    #  'cross',
    #  'cholesky',
@ -156,3 +159,78 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
        outputs={'Out': out},
        attrs=attrs)
    return out
 def dist(x, y, p=2):
    """
    This OP returns the p-norm of (x - y). It is not a norm in a strict sense, only as a measure
    of distance. The shapes of x and y must be broadcastable.
    Where, z = x - y,
    When p = 0, defining $0^0=0$, the zero-norm of z is simply the number of non-zero elements of z.
    .. math::
        ||z||_{0}=\lim_{p \\rightarrow 0}\sum_{i=1}^{m}|z_i|^{p}
    When p = inf, the inf-norm of z is the maximum element of z.
    .. math::
        ||z||_\infty=\max_i |z_i|
    When p = -inf, the negative-inf-norm of z is the minimum element of z.
    .. math::
        ||z||_{-\infty}=\min_i |z_i|
    Otherwise, the p-norm of z follows the formula,
    .. math::
        ||z||_{p}=(\sum_{i=1}^{m}|z_i|^p)^{\\frac{1}{p}}
    Args:
        x (Variable): 1-D to 6-D Tensor, its data type is float32 or float64.
        y (Variable): 1-D to 6-D Tensor, its data type is float32 or float64.
        p (float, optional): The norm to be computed, its data type is float32 or float64. Default: 2.
    Returns:
        Variable: Tensor that is the p-norm of (x - y).
    Examples:
        .. code-block:: python
            import paddle
            import paddle.fluid as fluid
            import numpy as np
            with fluid.dygraph.guard():
                x = fluid.dygraph.to_variable(np.array([[3, 3],[3, 3]]).astype(np.float32))
                y = fluid.dygraph.to_variable(np.array([[3, 3],[3, 1]]).astype(np.float32))
                out = paddle.dist(x, y, 0)
                print(out.numpy()) # out = [1.]
                out = paddle.dist(x, y, 2)
                print(out.numpy()) # out = [2.]
                out = paddle.dist(x, y, float("inf"))
                print(out.numpy()) # out = [2.]
                out = paddle.dist(x, y, float("-inf"))
                print(out.numpy()) # out = [0.]
    """
    check_variable_and_dtype(x, 'dtype', ['float32', 'float64'], 'dist')
    check_variable_and_dtype(y, 'dtype', ['float32', 'float64'], 'dist')
    check_type(p, 'p', (float, int), 'dist')
    helper = LayerHelper("dist", **locals())
    out = helper.create_variable_for_type_inference(x.dtype)
    inputs = {"X": [x], "Y": [y]}
    outputs = {'Out': [out]}
    attrs = {"p": float(p)}
    helper.append_op(
        type='dist', inputs=inputs, outputs={'Out': out}, attrs=attrs)
    return out