refine memory transform

mobile_baidu
peterzhang2029 8 years ago
parent f5cb52ca3e
commit 47269273ff

@ -34,34 +34,34 @@ class BilinearTensorProductOp : public framework::OperatorWithKernel {
auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight");
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "The input X must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(y_dims.size(), 2, "The input Y must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(weight_dims.size(), 3,
PADDLE_ENFORCE_EQ(x_dims.size(), 2UL, "The input X must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(y_dims.size(), 2UL, "The input Y must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(weight_dims.size(), 3UL,
"The input Weight must be a 3D tensor.");
PADDLE_ENFORCE_GT(weight_dims[0], 0,
PADDLE_ENFORCE(weight_dims[0],
"The first dimension of Weight must be larger than 0.");
PADDLE_ENFORCE_GT(weight_dims[1], 0,
PADDLE_ENFORCE(weight_dims[1],
"The second dimension of Weight must be larger than 0.");
PADDLE_ENFORCE_GT(weight_dims[2], 0,
PADDLE_ENFORCE(weight_dims[2],
"The third dimension of Weight must be larger than 0.");
PADDLE_ENFORCE_EQ(x_dims[0], y_dims[0],
"The first dimension(batch_size) of X must be "
"equal with the first dimension of the Y.");
"equal to the first dimension of the Y.");
PADDLE_ENFORCE_EQ(x_dims[1], weight_dims[1],
"The second dimension of X must be equal with the second "
"The second dimension of X must be equal to the second "
"dimension of the Weight.");
PADDLE_ENFORCE_EQ(y_dims[1], weight_dims[2],
"The second dimension of Y must be equal with the third "
"The second dimension of Y must be equal to the third "
"dimension of the Weight.");
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(bias_dims.size(), 2,
PADDLE_ENFORCE_EQ(bias_dims.size(), 2UL,
"The input Bias must have 2 dimensions.");
PADDLE_ENFORCE_EQ(bias_dims[0], 1,
PADDLE_ENFORCE_EQ(bias_dims[0], 1UL,
"The first dimention of input Bias must be 1.");
PADDLE_ENFORCE_EQ(bias_dims[1], weight_dims[0],
"The second dimension of Bias must be equal with the "
"The second dimension of Bias must be equal to the "
"first dimension of the Weight.");
}
@ -75,12 +75,12 @@ class BilinearTensorProductOpMaker : public framework::OpProtoAndCheckerMaker {
BilinearTensorProductOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of BilinearTensorProduct op");
AddInput("Y", "The second input of BilinearTensorProduct op");
AddInput("Weight", "The input weight of BilinearTensorProduct op");
AddInput("Bias", "The input bias of BilinearTensorProduct op")
AddInput("X", "The first input of BilinearTensorProduct op.");
AddInput("Y", "The second input of BilinearTensorProduct op.");
AddInput("Weight", "The input weight of BilinearTensorProduct op.");
AddInput("Bias", "The input bias of BilinearTensorProduct op.")
.AsDispensable();
AddOutput("Out", "The output of BilinearTensorProduct op");
AddOutput("Out", "The output of BilinearTensorProduct op.");
AddComment(R"DOC(
Bilinear Tensor Product operator.
Given input X and Y, a 3D tensor weight, and bias. Each column of the
@ -99,30 +99,32 @@ class BilinearTensorProductOpGrad : public framework::OperatorWithKernel {
protected:
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null");
PADDLE_ENFORCE(ctx->HasInput("Weight"), "Input(Weight) should not be null");
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Y"), "Input(Y) should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Weight"),
"Input(Weight) should not be null.");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Input (Out@GRAD) should not be null");
"Input (Out@GRAD) should not be null.");
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
auto weight_dims = ctx->GetInputDim("Weight");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
PADDLE_ENFORCE_EQ(out_dims.size(), 2, "The Out@GRAD must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(out_dims.size(), 2UL,
"The Out@GRAD must be a 2D Tensor.");
PADDLE_ENFORCE_EQ(
x_dims[0], out_dims[0],
"The first dimension(batch_size) of Out@GRAD must be equal with "
"the first dimension of the X.");
"The first dimension(batch_size) of Out@GRAD must be equal to "
"the first dimension of the Input(X).");
PADDLE_ENFORCE_EQ(weight_dims[0], out_dims[1],
"The second dimension of Out@GRAD must be equal with "
"the third dimension of the Weight.");
"The second dimension of Out@GRAD must be equal to "
"the third dimension of the Input(Weight).");
if (ctx->HasInput("Bias")) {
auto bias_dims = ctx->GetInputDim("Bias");
PADDLE_ENFORCE_EQ(bias_dims[1], out_dims[1],
"The second dimension of Bias must be equal with "
"the second dimension of the Out@GRAD.");
"The second dimension of Out@GRAD must be equal to "
"the second dimension of the Input(Bias).");
auto bias_grad_name = framework::GradVarName("Bias");
if (ctx->HasOutput(bias_grad_name))
ctx->SetOutputDim(bias_grad_name, bias_dims);

@ -15,85 +15,10 @@
#define EIGEN_USE_GPU
#include "paddle/operators/bilinear_tensor_product_op.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class BilinearTensorProductCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* weight = ctx.Input<Tensor>("Weight");
auto* bias = ctx.Input<Tensor>("Bias");
auto* out = ctx.Output<Tensor>("Out");
out->mutable_data<T>(ctx.GetPlace());
auto y_mat = EigenMatrix<T>::From(*y);
auto batch_size = x->dims()[0];
auto weight_dims = weight->dims();
auto place = ctx.GetEigenDevice<Place>();
auto cpu_place = ctx.GetEigenDevice<platform::CPUPlace>();
// Copy the output to cpu.
Tensor output_cpu;
output_cpu.CopyFrom(*out, platform::CPUPlace(), ctx.device_context());
auto* output_cpu_ptr = output_cpu.data<T>();
auto output_cpu_mat = EigenMatrix<T>::From(output_cpu);
// Create the temporary variables.
Tensor left_mul;
left_mul.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[2]}),
ctx.GetPlace());
auto left_mul_mat = EigenMatrix<T>::From(left_mul);
Tensor output_col;
output_col.mutable_data<T>(framework::make_ddim({batch_size}),
ctx.GetPlace());
auto output_col_vec = EigenVector<T>::From(output_col);
for (size_t i = 0; i < weight_dims[0]; ++i) {
Tensor weight_mat = weight->Slice(i, i + 1).Resize(
framework::make_ddim({weight_dims[1], weight_dims[2]}));
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, weight_dims[2], weight_dims[1], 1,
x->data<T>(), weight_mat.data<T>(), 0,
left_mul.data<T>());
output_col_vec.device(place) =
(left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
// Copy the output_col to cpu.
Tensor output_col_cpu;
output_col_cpu.CopyFrom(output_col, platform::CPUPlace(),
ctx.device_context());
auto* output_col_ptr = output_col_cpu.data<T>();
for (size_t j = 0; j < batch_size; ++j) {
output_cpu_ptr[i + j * weight_dims[0]] = output_col_ptr[j];
}
}
if (bias) {
// Copy the bias to cpu.
Tensor bias_cpu;
bias_cpu.CopyFrom(*bias, platform::CPUPlace(), ctx.device_context());
auto bias_vec = EigenMatrix<T>::From(bias_cpu);
Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_cpu_mat.device(cpu_place) =
bias_vec.broadcast(bcast) + output_cpu_mat;
}
// Copy the output to gpu.
out->CopyFrom(output_cpu, platform::GPUPlace(), ctx.device_context());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product,
ops::BilinearTensorProductCUDAKernel<paddle::platform::GPUPlace, float>);
ops::BilinearTensorProductKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(
bilinear_tensor_product_grad,
ops::BilinearTensorProductGradKernel<paddle::platform::GPUPlace, float>);

@ -1,7 +1,7 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
@ -21,7 +21,7 @@
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
@ -49,34 +49,27 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> {
auto weight_dims = weight->dims();
auto place = ctx.GetEigenDevice<Place>();
// Create the temporary variables.
// Create the intermediate variables.
Tensor left_mul;
left_mul.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[2]}),
ctx.GetPlace());
auto left_mul_mat = EigenMatrix<T>::From(left_mul);
Tensor output_col;
output_col.mutable_data<T>(framework::make_ddim({weight_dims[0]}),
ctx.GetPlace());
auto output_col_vec = EigenVector<T>::From(output_col);
for (size_t i = 0; i < weight_dims[0]; ++i) {
auto output_col_vec = output_mat.chip(i, 1);
Tensor weight_mat = weight->Slice(i, i + 1).Resize(
framework::make_ddim({weight_dims[1], weight_dims[2]}));
math::gemm<Place, T>(ctx.device_context(), CblasNoTrans, CblasNoTrans,
batch_size, weight_dims[2], weight_dims[1], 1,
x->data<T>(), weight_mat.data<T>(), 0,
left_mul.data<T>());
output_col_vec = (left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
for (size_t j = 0; j < batch_size; ++j) {
output_mat(j, i) = output_col_vec(j);
}
output_col_vec.device(place) =
(left_mul_mat * y_mat).sum(Eigen::DSizes<int, 1>(1));
}
if (bias) {
auto bias_vec = EigenMatrix<T>::From(*bias);
Eigen::DSizes<int, 2> bcast(batch_size, 1);
output_mat.device(place) = bias_vec.broadcast(bcast) + output_mat;
} else {
output_mat.device(place) = output_mat;
}
}
};
@ -102,7 +95,7 @@ class BilinearTensorProductGradKernel : public framework::OpKernel<T> {
auto d_out_mat = EigenMatrix<T>::From(*d_out);
auto place = ctx.GetEigenDevice<Place>();
// Create the temporary variables for gradient.
// Create the intermediate variables for gradient.
Tensor x_scale;
x_scale.mutable_data<T>(framework::make_ddim({batch_size, weight_dims[1]}),
ctx.GetPlace());

Loading…
Cancel
Save