add bilinear tensor product op

8 years ago · 611ee68b78
parent 154e1d0491
commit 611ee68b78
4 changed files with 383 additions and 0 deletions
--- a/paddle/operators/bilinear_tensor_product_op.cc
+++ b/paddle/operators/bilinear_tensor_product_op.cc
@ -0,0 +1,153 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/bilinear_tensor_product_op.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+
+class BilinearTensorProductOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Weight"),
+                   "Input(Weight) should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) should not be null.");
+    auto x_dims = ctx->GetInputDim("X");
+    auto y_dims = ctx->GetInputDim("Y");
+    auto weight_dims = ctx->GetInputDim("Weight");
+
+    PADDLE_ENFORCE_EQ(x_dims.size(), 1, "The input X must be a vector.");
+    PADDLE_ENFORCE_EQ(y_dims.size(), 1, "The input Y must be a vector.");
+    PADDLE_ENFORCE_EQ(weight_dims.size(), 3,
+                      "The input Weight must be a 3D tensor.");
+    PADDLE_ENFORCE_GT(weight_dims[0], 0,
+                      "The first dimension of Weight must be larger than 0.");
+    PADDLE_ENFORCE_GT(weight_dims[1], 0,
+                      "The second dimension of Weight must be larger than 0.");
+    PADDLE_ENFORCE_GT(weight_dims[2], 0,
+                      "The third dimension of Weight must be larger than 0.");
+    PADDLE_ENFORCE_EQ(x_dims[0], weight_dims[1],
+                      "The dimension of X must be equal with the second "
+                      "dimension of the Weight.");
+    PADDLE_ENFORCE_EQ(y_dims[0], weight_dims[2],
+                      "The dimension of Y must be equal with the third "
+                      "dimension of the Weight.");
+
+    auto bias = Input("Bias");
+    if (bias != framework::kEmptyVarName) {
+      auto bias_dims = ctx->GetInputDim("Bias");
+      PADDLE_ENFORCE_EQ(bias_dims.size(), 1,
+                        "The input Bias must be a vector.");
+      PADDLE_ENFORCE_EQ(bias_dims[0], weight_dims[0],
+                        "The dimension of Bias must be equal with the first "
+                        "dimension of the Weight.");
+    }
+
+    ctx->SetOutputDim("Out", {weight_dims[0]});
+  }
+};
+
+class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  BilinearTensorProductOpMaker(framework::OpProto* proto,
+                               framework::OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X", "The first input of tensor op");
+    AddInput("Y", "The second input of tensor op");
+    AddInput("Weight", "The input weight of tensor op");
+    AddInput("Bias", "The input bias of tensor op");
+    AddOutput("Out", "The output of tensor op");
+    AddComment(R"DOC(
+Bilinear Tensor Product operator.
+Given input X and Y, a 3D tensor weight, and bias. Each entry of the output is
+computed by one slice i = 1, . . . , k of the tensor: Out_i = X*W_i*Y + Bias_i .
+
+The equation of this operator is:
+
+    Out = \sum_{i} X*W_i*Y + Bias
+
+)DOC");
+  }
+};
+
+class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
+                   "Input (Out@GRAD) should not be null");
+    auto x_dims = ctx->GetInputDim("X");
+    auto y_dims = ctx->GetInputDim("Y");
+    auto weight_dims = ctx->GetInputDim("Weight");
+    auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
+
+    PADDLE_ENFORCE_EQ(out_dims.size(), 1, "The Out@GRAD must be a vector.");
+    PADDLE_ENFORCE_EQ(
+        weight_dims[0], out_dims[0],
+        "The dimension of Out@GRAD must be equal with the third dimension of "
+        "the Weight.");
+
+    auto bias = Input("Bias");
+    if (bias != framework::kEmptyVarName) {
+      auto bias_dims = ctx->GetInputDim("Bias");
+      PADDLE_ENFORCE_EQ(bias_dims.size(), 1, "Input Bias must be a vector.");
+      PADDLE_ENFORCE_EQ(
+          bias_dims[0], out_dims[0],
+          "The dimension of Bias must be equal with the Out@GRAD ");
+      auto bias_grad_name = framework::GradVarName("Bias");
+      if (ctx->HasOutput(bias_grad_name))
+        ctx->SetOutputDim(bias_grad_name, bias_dims);
+    }
+
+    auto x_grad_name = framework::GradVarName("X");
+    auto y_grad_name = framework::GradVarName("Y");
+    auto weight_grad_name = framework::GradVarName("Weight");
+
+    if (ctx->HasOutput(x_grad_name)) {
+      ctx->SetOutputDim(x_grad_name, x_dims);
+    }
+    if (ctx->HasOutput(y_grad_name)) {
+      ctx->SetOutputDim(y_grad_name, y_dims);
+    }
+    if (ctx->HasOutput(weight_grad_name)) {
+      ctx->SetOutputDim(weight_grad_name, weight_dims);
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP(bilinear_tensor_product, ops::BilinearTensorProductOp,
+            ops::BilinearTensorProductOpMaker, bilinear_tensor_product_grad,
+            ops::BilinearTensorProductOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    bilinear_tensor_product,
+    ops::BilinearTensorProductKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+    bilinear_tensor_product_grad,
+    ops::BilinearTensorProductGradKernel<paddle::platform::CPUPlace, float>);
--- a/paddle/operators/bilinear_tensor_product_op.cu
+++ b/paddle/operators/bilinear_tensor_product_op.cu
@ -0,0 +1,24 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+#include "paddle/operators/bilinear_tensor_product_op.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(
+    bilinear_tensor_product,
+    ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, float>);
+REGISTER_OP_GPU_KERNEL(
+    bilinear_tensor_product_grad,
+    ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>);
--- a/paddle/operators/bilinear_tensor_product_op.h
+++ b/paddle/operators/bilinear_tensor_product_op.h
@ -0,0 +1,176 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   You may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/math/math_function.h"
+#include "paddle/platform/transform.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using platform::Transform;
+
+template <typename Place, typename T>
+class BilinearTensorProductKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* x = ctx.Input<Tensor>("X");
+    auto* y = ctx.Input<Tensor>("Y");
+    auto* weight = ctx.Input<Tensor>("Weight");
+    auto* bias = ctx.Input<Tensor>("Bias");
+    auto* out = ctx.Output<Tensor>("Out");
+    out->mutable_data<T>(ctx.GetPlace());
+
+    auto weight_dims = weight->dims();
+    Tensor left_mul_vec;
+    left_mul_vec.mutable_data<T>(framework::make_ddim({weight_dims[2]}),
+                                 ctx.GetPlace());
+    if (bias) {
+      out->CopyFrom(*bias, ctx.GetPlace(), ctx.device_context());
+    }
+    for (int i = 0; i < weight_dims[0]; ++i) {
+      Tensor weight_mat = weight->Slice(i, i + 1).Resize(
+          framework::make_ddim({weight_dims[1], weight_dims[2]}));
+      math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans, 1,
+                           weight_dims[2], weight_dims[1], 1, x->data<T>(),
+                           weight_mat.data<T>(), 0, left_mul_vec.data<T>());
+      if (bias) {
+        math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
+                             1, 1, weight_dims[2], 1, left_mul_vec.data<T>(),
+                             y->data<T>(), 1, &(out->data<T>()[i]));
+      } else {
+        math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
+                             1, 1, weight_dims[2], 1, left_mul_vec.data<T>(),
+                             y->data<T>(), 0, &(out->data<T>()[i]));
+      }
+    }
+  }
+};
+
+template <typename T>
+class ScaleFunctor {
+ public:
+  explicit ScaleFunctor(const T* scale) : scale_(scale) {}
+
+  HOSTDEVICE T operator()(const T& x) const { return x * (*scale_); }
+
+ private:
+  const T* scale_;
+};
+
+template <typename Place, typename T>
+class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    const Tensor* x = ctx.Input<Tensor>("X");
+    const Tensor* y = ctx.Input<Tensor>("Y");
+    const Tensor* weight = ctx.Input<Tensor>("Weight");
+    Tensor* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
+    Tensor* d_y = ctx.Output<Tensor>(framework::GradVarName("Y"));
+    Tensor* d_weight = ctx.Output<Tensor>(framework::GradVarName("Weight"));
+    Tensor* d_bias = ctx.Output<Tensor>(framework::GradVarName("Bias"));
+    const Tensor* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
+    auto* d_out_ptr = d_out->data<T>();
+    auto weight_dims = weight->dims();
+
+    // Get the first matrix of Weight.
+    Tensor weight_mat_0 = weight->Slice(0, 1).Resize(
+        framework::make_ddim({weight_dims[1], weight_dims[2]}));
+
+    // Create the intermediate variable for gradient.
+    int numel_x = x->numel();
+    int numel_y = y->numel();
+    const T* x_ptr = x->data<T>();
+    const T* y_ptr = y->data<T>();
+    Tensor x_scale;
+    T* x_scale_ptr = x_scale.mutable_data<T>(
+        framework::make_ddim({weight_dims[1]}), ctx.GetPlace());
+    Tensor y_scale;
+    T* y_scale_ptr = y_scale.mutable_data<T>(
+        framework::make_ddim({weight_dims[2]}), ctx.GetPlace());
+    Transform<Place> trans;
+
+    // Caculate the gradient of X according to the first matrix of Weight.
+    if (d_x) {
+      d_x->mutable_data<T>(ctx.GetPlace());
+      trans(ctx.device_context(), y_ptr, y_ptr + numel_y, y_scale_ptr,
+            ScaleFunctor<T>(&d_out_ptr[0]));
+      math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasTrans, 1,
+                           weight_dims[1], weight_dims[2], 1, y_scale.data<T>(),
+                           weight_mat_0.data<T>(), 0, d_x->data<T>());
+    }
+
+    // Caculate the gradient of Y according to the first matrix of Weight.
+    if (d_y) {
+      d_y->mutable_data<T>(ctx.GetPlace());
+      trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr,
+            ScaleFunctor<T>(&d_out_ptr[0]));
+      math::gemm<Place, T>(ctx.device_context(), CblasTrans, CblasNoTrans,
+                           weight_dims[2], 1, weight_dims[1], 1,
+                           weight_mat_0.data<T>(), x_scale.data<T>(), 0,
+                           d_y->data<T>());
+    }
+
+    // Caculate the gradient of X and Y completly.
+    if (d_x || d_y) {
+      for (int i = 1; i < weight_dims[0]; ++i) {
+        Tensor weight_mat = weight->Slice(i, i + 1).Resize(
+            framework::make_ddim({weight_dims[1], weight_dims[2]}));
+        if (d_x) {
+          trans(ctx.device_context(), y_ptr, y_ptr + numel_y, y_scale_ptr,
+                ScaleFunctor<T>(&d_out_ptr[i]));
+          math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasTrans,
+                               1, weight_dims[1], weight_dims[2], 1,
+                               y_scale.data<T>(), weight_mat.data<T>(), 1,
+                               d_x->data<T>());
+        }
+        if (d_y) {
+          trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr,
+                ScaleFunctor<T>(&d_out_ptr[i]));
+          math::gemm<Place, T>(ctx.device_context(), CblasTrans, CblasNoTrans,
+                               weight_dims[2], 1, weight_dims[1], 1,
+                               weight_mat.data<T>(), x_scale.data<T>(), 1,
+                               d_y->data<T>());
+        }
+      }
+    }
+
+    // Caculate the gradient of Weight.
+    if (d_weight) {
+      d_weight->mutable_data<T>(ctx.GetPlace());
+      for (int i = 0; i < weight_dims[0]; ++i) {
+        Tensor d_weight_mat = d_weight->Slice(i, i + 1).Resize(
+            framework::make_ddim({weight_dims[1], weight_dims[2]}));
+        trans(ctx.device_context(), x_ptr, x_ptr + numel_x, x_scale_ptr,
+              ScaleFunctor<T>(&d_out_ptr[i]));
+        math::gemm<Place, T>(ctx.device_context(), CblasTrans, CblasNoTrans,
+                             weight_dims[1], weight_dims[2], 1, 1,
+                             x_scale.data<T>(), y->data<T>(), 0,
+                             d_weight_mat.data<T>());
+      }
+    }
+
+    // Caculate the gradient of Bias.
+    if (d_bias) {
+      d_bias->mutable_data<T>(ctx.GetPlace());
+      d_bias->CopyFrom(*d_out, ctx.GetPlace(), ctx.device_context());
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py
+++ b/python/paddle/v2/framework/tests/test_bilinear_tensor_product_op.py
@ -0,0 +1,30 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestBilinearTensorProductOp(OpTest):
+    def setUp(self):
+        self.op_type = "bilinear_tensor_product"
+        self.inputs = {
+            'X': np.random.random(3).astype("float32"),
+            'Y': np.random.random(4).astype("float32"),
+            'Weight': np.random.random((5, 3, 4)).astype("float32"),
+            'Bias': np.random.random(5).astype("float32")
+        }
+        self.outputs = {
+            'Out': np.matmul(
+                np.matmul(self.inputs['Weight'], self.inputs['Y']),
+                self.inputs['X']) + self.inputs['Bias']
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(
+            ['X', 'Y', 'Weight', 'Bias'], 'Out', max_relative_error=0.5)
+
+
+if __name__ == "__main__":
+    unittest.main()