Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into seq_expand_op
commit
97f1b98759
@ -0,0 +1,50 @@
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
|
||||
|
||||
INCLUDE_DIRECTORIES(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
|
||||
|
||||
|
||||
if(WITH_DSO)
|
||||
# If we use DSO, we do not build nccl, just download the dependencies
|
||||
set(NCCL_BUILD_COMMAND "")
|
||||
set(NCCL_INSTALL_COMMAND "")
|
||||
set(NCCL_INSTALL_DIR "")
|
||||
else()
|
||||
# otherwise, we build nccl and link it.
|
||||
set(NCCL_BUILD_COMMAND "make -j 8")
|
||||
set(NCCL_INSTALL_COMMAND "make install")
|
||||
SET(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_nccl
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
|
||||
GIT_TAG "v1.3.4-1"
|
||||
PREFIX "${NCCL_SOURCE_DIR}"
|
||||
UPDATE_COMMAND ""
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
|
||||
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
|
||||
INSTALL_DIR "${NCCL_INSTALL_DIR}"
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
|
||||
if (WITH_DSO)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
||||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
|
||||
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
|
||||
add_library(nccl STATIC ${dummyfile})
|
||||
else()
|
||||
add_library(nccl INTERFACE)
|
||||
endif()
|
||||
else()
|
||||
ADD_LIBRARY(nccl STATIC IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET nccl PROPERTY IMPORTED_LOCATION
|
||||
${NCCL_INSTALL_DIR}/lib/libnccl.a)
|
||||
endif()
|
||||
|
||||
add_dependencies(nccl extern_nccl)
|
||||
|
||||
LIST(APPEND external_project_dependencies nccl)
|
@ -0,0 +1,107 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/conv2dtranspose_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
|
||||
PADDLE_ENFORCE(ctx->HasInput("Input"),
|
||||
"Input(Input) of Conv2DTransposeOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasInput("Filter"),
|
||||
"Input(Filter) of Conv2DTransposeOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Output"),
|
||||
"Output(Output) of Conv2DTransposeOp should not be null.");
|
||||
|
||||
auto in_dims = ctx->GetInputDim("Input");
|
||||
auto filter_dims = ctx->GetInputDim("Filter");
|
||||
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
|
||||
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
|
||||
|
||||
for (size_t i = 0; i < paddings.size(); ++i) {
|
||||
PADDLE_ENFORCE_EQ(paddings[i], 0,
|
||||
"No Padding allowed in conv transpose op.");
|
||||
}
|
||||
|
||||
PADDLE_ENFORCE_EQ(in_dims.size(), 4,
|
||||
"Conv2DTransposeOp input should be 4-D tensor.");
|
||||
PADDLE_ENFORCE_EQ(filter_dims.size(), 4,
|
||||
"Conv2DTransposeOp filter should be 4-D tensor.");
|
||||
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
|
||||
"input and kernel input dimension should be equal.");
|
||||
|
||||
auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2];
|
||||
auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3];
|
||||
ctx->SetOutputDim("Output",
|
||||
{in_dims[0], filter_dims[1], output_height, output_width});
|
||||
}
|
||||
|
||||
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
|
||||
framework::OpProto* proto, framework::OpAttrChecker* op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput(
|
||||
"Input",
|
||||
"(Tensor) The input tensor of convolution transpose operator. "
|
||||
"The format of input tensor is NCHW. Where N is batch size, C is the "
|
||||
"number of input channels, H and W is the height and width of image.");
|
||||
AddInput("Filter",
|
||||
"(Tensor) The filter tensor of convolution transpose operator."
|
||||
"The format of the filter tensor is CMHW, where C is the number of "
|
||||
"output image channels, M is the number of input image channels, "
|
||||
"H and W is height and width of filter. "
|
||||
"We enforce groups number == 1 and padding == 0 in "
|
||||
"convolution transpose Scenario.");
|
||||
AddOutput("Output",
|
||||
"(Tensor) The output tensor of convolution transpose operator."
|
||||
"The format of output tensor is also NCHW.");
|
||||
AddAttr<std::vector<int>>("strides",
|
||||
"strides of convolution transpose operator.")
|
||||
.SetDefault({1, 1});
|
||||
AddAttr<std::vector<int>>("paddings",
|
||||
"paddings of convolution transpose operator.")
|
||||
.SetDefault({0, 0});
|
||||
AddComment(R"DOC(
|
||||
The convolution transpose operation calculates the output based on the input, filter
|
||||
and strides, paddings, groups parameters. The size of each dimension of the
|
||||
parameters is checked in the infer-shape.
|
||||
)DOC");
|
||||
}
|
||||
|
||||
void Conv2DTransposeOpGrad::InferShape(
|
||||
framework::InferShapeContext* ctx) const {
|
||||
auto in_dims = ctx->GetInputDim("Input");
|
||||
auto filter_dims = ctx->GetInputDim("Filter");
|
||||
if (ctx->HasOutput(framework::GradVarName("Input"))) {
|
||||
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
|
||||
}
|
||||
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
|
||||
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP(conv2dtranspose, ops::Conv2DTransposeOp,
|
||||
ops::Conv2DTransposeOpMaker, conv2dtranspose_grad,
|
||||
ops::Conv2DTransposeOpGrad);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
conv2dtranspose,
|
||||
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
conv2dtranspose_grad,
|
||||
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>);
|
@ -0,0 +1,24 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/conv2dtranspose_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_GPU_KERNEL(
|
||||
conv2dtranspose,
|
||||
ops::GemmConv2DTransposeKernel<paddle::platform::GPUPlace, float>);
|
||||
REGISTER_OP_GPU_KERNEL(
|
||||
conv2dtranspose_grad,
|
||||
ops::GemmConv2DTransposeGradKernel<paddle::platform::GPUPlace, float>);
|
File diff suppressed because it is too large
Load Diff
@ -1,200 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/net_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class FCOp : public NetOp {
|
||||
public:
|
||||
FCOp(const std::string &type, const framework::VariableNameMap &inputs,
|
||||
const framework::VariableNameMap &outputs,
|
||||
const framework::AttributeMap &attrs)
|
||||
: NetOp(type, inputs, outputs, attrs) {
|
||||
PADDLE_ENFORCE(!Inputs("X").empty(),
|
||||
"Inputs(X) of FCOp should not be null.");
|
||||
PADDLE_ENFORCE(!Inputs("W").empty(),
|
||||
"Inputs(W) of FCOp should not be null.");
|
||||
PADDLE_ENFORCE(!Outputs("MulOut").empty(),
|
||||
"Outputs(MulOut) of FCOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Output("Out"), framework::kEmptyVarName,
|
||||
"Output(Out) of FCOp should not be null.");
|
||||
|
||||
auto x = Inputs("X");
|
||||
auto w = Inputs("W");
|
||||
auto mul_out = Outputs("MulOut");
|
||||
PADDLE_ENFORCE_EQ(
|
||||
x.size(), w.size(),
|
||||
"The size of inputs X(%d) should be the same as that of weights W(%d).",
|
||||
x.size(), w.size());
|
||||
PADDLE_ENFORCE_EQ(mul_out.size(), x.size(),
|
||||
"The size of intermediate mul_out(%d) should be the same "
|
||||
"as that of inputs X(%d).",
|
||||
mul_out.size(), x.size());
|
||||
|
||||
size_t n = x.size();
|
||||
PADDLE_ENFORCE_GE(n, static_cast<size_t>(1),
|
||||
"The size of inputs X(%d) should be no less than 1.", n);
|
||||
|
||||
auto x_num_col_dims = Attr<std::vector<int>>("xNumColDims");
|
||||
|
||||
// Set all values or set no values (use the default value)
|
||||
if (!x_num_col_dims.empty()) {
|
||||
PADDLE_ENFORCE_EQ(x_num_col_dims.size(), n,
|
||||
"The size of attribute xNumColDims(%d) should be the "
|
||||
"same as that of inputs X(%d).",
|
||||
x_num_col_dims.size(), n);
|
||||
} else {
|
||||
x_num_col_dims.resize(n);
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
x_num_col_dims[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// mul_out[i] = X[i] * W[i]
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
framework::AttributeMap mul_attr;
|
||||
mul_attr["x_num_col_dims"] = static_cast<int>(x_num_col_dims[i]);
|
||||
mul_attr["y_num_col_dims"] = static_cast<int>(1);
|
||||
AppendOp(
|
||||
framework::OpRegistry::CreateOp("mul", {{"X", {x[i]}}, {"Y", {w[i]}}},
|
||||
{{"Out", {mul_out[i]}}}, mul_attr));
|
||||
}
|
||||
|
||||
// sum_out = X[0] * W[0] + ... + X[n-1] * W[n-1]
|
||||
auto sum_out = mul_out[0];
|
||||
if (n > 1) {
|
||||
PADDLE_ENFORCE_NE(Output("SumOut"), framework::kEmptyVarName,
|
||||
"Output(SumOut) of FCOp should not be null when the "
|
||||
"size of Inputs(X) > 1.");
|
||||
|
||||
sum_out = Output("SumOut");
|
||||
AppendOp(framework::OpRegistry::CreateOp("sum", {{"X", {mul_out}}},
|
||||
{{"Out", {sum_out}}}, {}));
|
||||
} else {
|
||||
if (Output("SumOut") != framework::kEmptyVarName) {
|
||||
this->Rename(Output("SumOut"), framework::kEmptyVarName);
|
||||
}
|
||||
}
|
||||
|
||||
// add_out = sum_out + b
|
||||
auto b = Input("B");
|
||||
auto add_out = sum_out;
|
||||
if (b != framework::kEmptyVarName) {
|
||||
PADDLE_ENFORCE_NE(
|
||||
Output("AddOut"), framework::kEmptyVarName,
|
||||
"Output(AddOut) of FCOp should not be null when Input(B) is set.");
|
||||
|
||||
add_out = Output("AddOut");
|
||||
AppendOp(framework::OpRegistry::CreateOp(
|
||||
"elementwise_add", {{"X", {sum_out}}, {"Y", {Input("B")}}},
|
||||
{{"Out", {add_out}}}, {}));
|
||||
} else {
|
||||
if (Output("AddOut") != framework::kEmptyVarName) {
|
||||
this->Rename(Output("AddOut"), framework::kEmptyVarName);
|
||||
}
|
||||
}
|
||||
|
||||
auto activation = Attr<std::string>("activation");
|
||||
AppendOp(framework::OpRegistry::CreateOp(activation, {{"X", {add_out}}},
|
||||
{{"Y", {Output("Out")}}}, {}));
|
||||
CompleteAddOp(false);
|
||||
}
|
||||
};
|
||||
|
||||
class FCOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
FCOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X",
|
||||
"(A vector of Tensors) each input Tensor can be of arbitrary "
|
||||
"dimension, and will be reshaped to a 2-D matrix of size "
|
||||
"(minibatch, number_of_input_features) according to attribute "
|
||||
"xNumColDims.")
|
||||
.AsDuplicable();
|
||||
AddInput("W",
|
||||
"(A vector of Tensors) the weights of FC operator, a "
|
||||
"vector of 2-D matrix of size "
|
||||
"(number_of_input_features, number_of_neurons).")
|
||||
.AsDuplicable();
|
||||
AddInput("B",
|
||||
"(Tensor) the bias of FC operator, a 1-D vector of size "
|
||||
"number_of_neurons.");
|
||||
|
||||
AddOutput("Out",
|
||||
"(Tensor) the activated output matrix of FC operator, a 2-D "
|
||||
"matrix of size (minibatch, number_of_neurons).");
|
||||
AddOutput("MulOut",
|
||||
"(A vector of Tensors) the intermediate outputs of FC operator, "
|
||||
"each Tensor saving the product of X_i * W_i.")
|
||||
.AsIntermediate()
|
||||
.AsDuplicable();
|
||||
AddOutput(
|
||||
"SumOut",
|
||||
"(Tensor) the intermediate output of FC operator, "
|
||||
"saving the sum of the products of X and W, that is sum{X_i * W_i}.")
|
||||
.AsIntermediate();
|
||||
AddOutput("AddOut",
|
||||
"(Tensor) the non-actived output of FC operator, "
|
||||
"saving sum{X_i * W_i} + B.")
|
||||
.AsIntermediate();
|
||||
AddAttr<std::string>(
|
||||
"activation",
|
||||
"(string, default identity) the activation type of FC operator.")
|
||||
.SetDefault("identity")
|
||||
.InEnum({"identity", "sigmoid", "softmax"});
|
||||
AddAttr<std::vector<int>>(
|
||||
"xNumColDims",
|
||||
"(std::vector<int>) The inputs Tensors of FC operator can be of "
|
||||
"more than 2 dimensions. In that case, each input Tensor `X_i` will be "
|
||||
"reshaped to a 2-D matrix. The matrix's first dimension "
|
||||
"(the length of column) will be the product of `X_i`'s last "
|
||||
"`xNumColDims_i` dimensions, that is "
|
||||
"`X_i.dims[0] x ... x X_i.dims[xNumColDims_i - 1]`. "
|
||||
"The matrix's second dimension (the length of row) will be the product "
|
||||
"of `X_i`'s first `rank - xNumColDims_i` dimensions, that is "
|
||||
"`X_i.dims[xNumColDims_i] x ... x X_i.dims[rank - 1]`)")
|
||||
.SetDefault(std::vector<int>{});
|
||||
|
||||
AddComment(R"DOC(
|
||||
Fully Connected Operator, known as Fully Connected Layer or Inner Product Layer
|
||||
in Convolutional Neural Networks. Neurons in a fully connected layer have
|
||||
full connections to all activations in the previous layer.
|
||||
It computes an inner product of a set of
|
||||
learned weights with a matrix multiplication followed by a bias offset
|
||||
(optionally).
|
||||
|
||||
Equation:
|
||||
Out = Act(sum_n{X_i * W_i} + B)
|
||||
|
||||
where X_i is Tensor that will be reshaped to a 2-D matrix of size (M x K),
|
||||
usually M is the minibatch size and K is the number of input features.
|
||||
W_i is a 2-D matrix of size (K x N), where N means the number of neurons
|
||||
in the fully connected layer. B is a 1-D vector of size N.
|
||||
Thus, the output Out is a 2-D matrix of size (M x N).
|
||||
Activation type can be set to `identity` (default), `sigmoid` or `softmax`.
|
||||
|
||||
All the inputs can carry the LoD (Level of Details) information,
|
||||
or not. But the output only shares the LoD with first input (`X[0]`).
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_WITHOUT_GRADIENT(fc, ops::FCOp, ops::FCOpMaker);
|
@ -1,63 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/net_op.h"
|
||||
#include "paddle/operators/scale_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
// The identity operator is an alias of the scale operator. This is also an
|
||||
// example for creating an alias for an existing operator.
|
||||
template <typename AttrType>
|
||||
class IdentityOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
IdentityOpMaker(framework::OpProto *proto,
|
||||
framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X", "The input tensor of identity operator.");
|
||||
AddOutput("Y", "The output tensor of identity operator.");
|
||||
AddComment(R"DOC(
|
||||
The identity operator is an alias of the scale operator
|
||||
with the attribute scale fixed to 1.0.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
template <typename AttrType>
|
||||
class IdentityOp : public NetOp {
|
||||
public:
|
||||
IdentityOp(const std::string &type, const framework::VariableNameMap &inputs,
|
||||
const framework::VariableNameMap &outputs,
|
||||
const framework::AttributeMap &attrs)
|
||||
: NetOp(type, inputs, outputs, attrs) {
|
||||
PADDLE_ENFORCE_NE(Input("X"), framework::kEmptyVarName,
|
||||
"Input(X) of IdentityOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Output("Y"), framework::kEmptyVarName,
|
||||
"Output(Y) of IdentityOp should not be null.");
|
||||
|
||||
AppendOp(framework::OpRegistry::CreateOp(
|
||||
"scale", {{"X", {Input("X")}}}, {{"Out", {Output("Y")}}},
|
||||
{{"scale", static_cast<AttrType>(1)}}));
|
||||
CompleteAddOp(false);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_WITHOUT_GRADIENT(identity, ops::IdentityOp<float>,
|
||||
ops::IdentityOpMaker<float>);
|
@ -1,113 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/net_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class InterpOp : public NetOp {
|
||||
public:
|
||||
InterpOp(const std::string &type, const framework::VariableNameMap &inputs,
|
||||
const framework::VariableNameMap &outputs,
|
||||
const framework::AttributeMap &attrs)
|
||||
: NetOp(type, inputs, outputs, attrs) {
|
||||
PADDLE_ENFORCE_NE(Input("X"), framework::kEmptyVarName,
|
||||
"Input(X) of InterpOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Input("Y"), framework::kEmptyVarName,
|
||||
"Input(Y) of InterpOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Input("W"), framework::kEmptyVarName,
|
||||
"Input(W) of InterpOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Output("SubOut"), framework::kEmptyVarName,
|
||||
"Output(SubOut) of InterpOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Output("MulOut"), framework::kEmptyVarName,
|
||||
"Output(MulOut) of InterpOp should not be null.");
|
||||
PADDLE_ENFORCE_NE(Output("Out"), framework::kEmptyVarName,
|
||||
"Output(Out) of InterpOp should not be null.");
|
||||
|
||||
// SubOut = X - Y
|
||||
auto x = Input("X");
|
||||
auto y = Input("Y");
|
||||
auto sub_out = Output("SubOut");
|
||||
AppendOp(framework::OpRegistry::CreateOp(
|
||||
"elementwise_sub", {{"X", {x}}, {"Y", {y}}}, {{"Out", {sub_out}}}, {}));
|
||||
|
||||
// MulOut = SubOut * W = (X - Y) * W
|
||||
auto w = Input("W");
|
||||
auto mul_out = Output("MulOut");
|
||||
AppendOp(framework::OpRegistry::CreateOp(
|
||||
"elementwise_mul", {{"X", {sub_out}}, {"Y", {w}}}, {{"Out", {mul_out}}},
|
||||
{{"axis", 0}}));
|
||||
|
||||
// Out = MulOut + Y = (X - Y) * W + Y = X * W + Y * (1 - W)
|
||||
AppendOp(framework::OpRegistry::CreateOp("elementwise_add",
|
||||
{{"X", {mul_out}}, {"Y", {y}}},
|
||||
{{"Out", {Output("Out")}}}, {}));
|
||||
|
||||
CompleteAddOp(false);
|
||||
}
|
||||
};
|
||||
|
||||
class InterpOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
InterpOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X",
|
||||
"(Tensor), 2-D Matrix of shape [batch_size, data_dim]"
|
||||
"containing data samples, the first input of interp_op");
|
||||
AddInput("Y",
|
||||
"(Tensor), 2-D Matrix of shape `[batch_size, data_dim]`"
|
||||
"containing data samples, the second input of interp_op");
|
||||
AddInput("W",
|
||||
"(Tensor), 1-D Vector of shape [batch_size],"
|
||||
"the interpolated values in the half-open interval [0.0, 1.0)");
|
||||
AddOutput("SubOut",
|
||||
"(Tensor), the intermediate subtraction outputs, saving X - Y.")
|
||||
.AsIntermediate();
|
||||
AddOutput("MulOut",
|
||||
"(Tensor), the intermediate multiplication outputs,"
|
||||
"saving the elementwise multiplication of (X - Y) and W.")
|
||||
.AsIntermediate();
|
||||
AddOutput("Out",
|
||||
"(Tensor), the output of interp_op, same shape with X,"
|
||||
"returns the first-dimensional piecewise linear interpolant "
|
||||
"between X and Y");
|
||||
AddComment(R"DOC(
|
||||
Linear Interpolation with two inputs, used in NEURAL TURING MACHINE.
|
||||
|
||||
Equation:
|
||||
Out.row[i] = X.row[i] * W[i] + Y.row[i] * (1 - W[i])
|
||||
= (X.row[i] - Y.row[i]) * W[i] + Y.row[i]
|
||||
|
||||
Example:
|
||||
X = [[1,2],[3,4]],
|
||||
Y = [[2,1],[4,3]],
|
||||
W = [0.3, 0.4]
|
||||
|
||||
Then, Out = [[1.7,1.3],[3.6,3.4]]
|
||||
|
||||
where 1.7 = 1*0.3+2*(1-0.3),
|
||||
1.3 = 2*0.3+1*(1-0.3),
|
||||
3.6 = 3*0.4+4*(1-0.4),
|
||||
3.4 = 4*0.4+3*(1-0.4)
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_WITHOUT_GRADIENT(interp, ops::InterpOp, ops::InterpOpMaker);
|
@ -0,0 +1,226 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/lstm_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class LSTMOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("Input"),
|
||||
"Input(Input) of LSTM should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Hidden"),
|
||||
"Output(Hidden) of LSTM should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Cell"),
|
||||
"Output(Cell) of LSTM should not be null.");
|
||||
|
||||
auto x_dims = ctx->GetInputDim("Input");
|
||||
PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank must be 2.");
|
||||
|
||||
if (ctx->HasInput("H0")) {
|
||||
PADDLE_ENFORCE(ctx->HasInput("C0"),
|
||||
"Input(Cell) and Input(Hidden) of LSTM should not "
|
||||
"be null at the same time.");
|
||||
auto h_dims = ctx->GetInputDim("H0");
|
||||
auto c_dims = ctx->GetInputDim("C0");
|
||||
PADDLE_ENFORCE(h_dims == c_dims,
|
||||
"The dimension of Input(H0) and Input(C0) "
|
||||
"should be the same.");
|
||||
}
|
||||
|
||||
int frame_size = x_dims[1] / 4;
|
||||
auto w_dims = ctx->GetInputDim("Weight");
|
||||
PADDLE_ENFORCE_EQ(w_dims.size(), 2,
|
||||
"The rank of Input(Weight) should be 2.");
|
||||
PADDLE_ENFORCE_EQ(w_dims[0], frame_size,
|
||||
"The first dimension of Input(Weight) "
|
||||
"should be %d.",
|
||||
frame_size);
|
||||
PADDLE_ENFORCE_EQ(w_dims[1], 4 * frame_size,
|
||||
"The second dimension of Input(Weight) "
|
||||
"should be 4 * %d.",
|
||||
frame_size);
|
||||
auto b_dims = ctx->GetInputDim("Bias");
|
||||
PADDLE_ENFORCE_EQ(b_dims.size(), 2, "The rank of Input(Bias) should be 2.");
|
||||
PADDLE_ENFORCE_EQ(b_dims[0], 1,
|
||||
"The first dimension of Input(Bias) should be 1.");
|
||||
if (ctx->Attrs().Get<bool>("usePeepholes")) {
|
||||
PADDLE_ENFORCE_EQ(b_dims[1], 7 * frame_size,
|
||||
"The second dimension of Input(Bias) should be "
|
||||
"7 * %d if enable peepholes connection",
|
||||
frame_size);
|
||||
} else {
|
||||
PADDLE_ENFORCE_EQ(b_dims[1], 4 * frame_size,
|
||||
"The second dimension of Input(Bias) should be "
|
||||
"4 * %d if disable peepholes connection",
|
||||
frame_size);
|
||||
}
|
||||
ctx->SetOutputDim("Hidden", {x_dims[0], frame_size});
|
||||
ctx->SetOutputDim("Cell", {x_dims[0], frame_size});
|
||||
ctx->SetOutputDim("BatchGate", x_dims);
|
||||
ctx->ShareLoD("Input", "Hidden");
|
||||
ctx->ShareLoD("Input", "Cell");
|
||||
}
|
||||
};
|
||||
|
||||
class LSTMOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
LSTMOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("Input",
|
||||
"(LoDTensor) the first input is a LodTensor, which support "
|
||||
"variable-time length input sequence. The underlying tensor in "
|
||||
"this LoDTensor is a matrix with shape (T X 4D), where, T is the "
|
||||
"total time steps in this mini-batch, D is the hidden size.");
|
||||
AddInput("H0",
|
||||
"(Tensor, optional) the initial hidden state is an optional "
|
||||
"input. This is a tensor with shape (N x D), where N is the "
|
||||
"batch size, D is the hidden size.");
|
||||
AddInput("C0",
|
||||
"(Tensor, optional) the initial cell state is an optional "
|
||||
"input. This is a tensor with shape (N x D), where N is the "
|
||||
"batch size. `H0` and `C0` can be NULL but only at the same time");
|
||||
AddInput("Weight",
|
||||
"(Tensor) the learnable hidden-hidden weights."
|
||||
" - The shape is (D x 4D), where D is the hidden size. "
|
||||
" - Weight = {W_ch, W_ih, W_fh, W_oh}");
|
||||
AddInput("Bias",
|
||||
"(Tensor) the learnable weights, which contains two parts: "
|
||||
"input-hidden bias weight and peephole connections weight if "
|
||||
"setting `usePeepholes` True. "
|
||||
"1. `usePeepholes = False` "
|
||||
" - The shape is (1 x 4D). "
|
||||
" - Bias = {b_c, b_i, b_f, b_o}."
|
||||
"2. `usePeepholes = True` "
|
||||
" - The shape is (1 x 7D). "
|
||||
" - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}.");
|
||||
AddOutput("BatchGate",
|
||||
"(LoDTensor) This LoDTensor contains input gate, forget gate "
|
||||
"and output gate after the nonlinear computation. This "
|
||||
"LoDTensor has the same shape with the reorganized input, which "
|
||||
"was also be called batch input. The LoD size is 2. The first "
|
||||
"LoD is the batch offsets and the second LoD contains the "
|
||||
"indexes, which denote the position of reorganized sequence "
|
||||
"in the raw input.")
|
||||
.AsIntermediate();
|
||||
AddOutput("Hidden",
|
||||
"(LoDTensor) the hidden state lod tensor of LSTM operator. "
|
||||
"The shape and lod is the same with the `Input`.");
|
||||
AddOutput("Cell",
|
||||
"(LoDTensor) the cell state lod tensor of LSTM operator. "
|
||||
"The shape and lod is the same with the `Input`.");
|
||||
AddAttr<bool>("usePeepholes",
|
||||
"(bool, defalut: True) "
|
||||
"whether to enable diagonal/peephole connections.")
|
||||
.SetDefault(true);
|
||||
AddAttr<bool>("isReverse",
|
||||
"(bool, defalut: False) "
|
||||
"whether to compute reversed LSTM.")
|
||||
.SetDefault(false);
|
||||
AddAttr<std::string>(
|
||||
"gateActivation",
|
||||
"(string, default: sigmoid)"
|
||||
"The activation for input gate, forget gate and output "
|
||||
"gate, `sigmoid` by default.")
|
||||
.SetDefault("sigmoid");
|
||||
AddAttr<std::string>("cellActivation",
|
||||
"(string, default: tanh)"
|
||||
"The activation for cell output, `tanh` by defalut.")
|
||||
.SetDefault("tanh");
|
||||
AddAttr<std::string>("candidateActivation",
|
||||
"(string, default: tanh)"
|
||||
"The activation for candidate hidden state, "
|
||||
"`tanh` by default.")
|
||||
.SetDefault("tanh");
|
||||
AddComment(R"DOC(Long-Short Term Memory (LSTM) Operator
|
||||
|
||||
The defalut implementation is diagonal/peephole connection [1], the formula is
|
||||
as follows
|
||||
|
||||
i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
|
||||
|
||||
f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
|
||||
|
||||
\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
|
||||
|
||||
o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
|
||||
|
||||
c_t = f_t ⊙ c_{t-1} + i_t ⊙ \tilde{c_t}
|
||||
|
||||
h_t = o_t ⊙ act_h(c_t)
|
||||
|
||||
where the W terms denote weight matrices (e.g. \f$W_{xi}\f$ is the matrix
|
||||
of weights from the input gate to the input), \f$W_{ic}, W_{fc}, W_{oc}\f$
|
||||
are diagonal weight matrices for peephole connections. In our implenmention,
|
||||
We use vectors to reprenset these diagonal weight matrices. The b terms
|
||||
denote bias vectors (\f$b_i\f$ is the input gate bias vector), \f$\sigma\f$
|
||||
is the non-line actications, such as logistic sigmoid function, and
|
||||
\f$i, f, o\f$ and \f$c\f$ are respectively the input gate, forget gate,
|
||||
output gate and cell activation vectors, all of which are the same size as
|
||||
the cell output activation vector \f$h\f$.
|
||||
|
||||
The ⊙ is the element-wise product of the vectors, \f$act_g\f$ and \f$act_h\f$
|
||||
are the cell input and cell output activation functions, `tanh` is usually
|
||||
used for them. \f$\tilde{c_t}\f$ is also called candidate hidden state,
|
||||
which is computed based on the current input and the previous hidden state.
|
||||
|
||||
Set `usePeepholes` False to disable peephole connection [2]. The formula
|
||||
is omitted here.
|
||||
|
||||
@note These \f$W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}\f$
|
||||
operations on the input x_{t} were NOT included in this operator.
|
||||
Users can choose to use fully-connect operator before LSTM operator.
|
||||
|
||||
[1] Hasim Sak, Andrew Senior, and Francoise Beaufays. Long short-term memory
|
||||
recurrent neural network architectures for large scale acoustic modeling.
|
||||
INTERSPEECH, 2014.
|
||||
|
||||
[2] S. Hochreiter and J. Schmidhuber. Long Short-Term Memory.
|
||||
Neural Computation, 9(8):1735-1780, 1997.
|
||||
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class LSTMGradOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Hidden")),
|
||||
"Input(Hidden@GRAD) should not be null");
|
||||
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Cell")),
|
||||
"Input(Cell@GRAD) should not be null");
|
||||
ctx->SetOutputDim(framework::GradVarName("Weight"),
|
||||
ctx->GetInputDim("Weight"));
|
||||
ctx->SetOutputDim(framework::GradVarName("Bias"), ctx->GetInputDim("Bias"));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP(lstm, ops::LSTMOp, ops::LSTMOpMaker, lstm_grad, ops::LSTMGradOp);
|
||||
REGISTER_OP_CPU_KERNEL(lstm, ops::LSTMKernel<paddle::platform::CPUPlace, float>,
|
||||
ops::LSTMKernel<paddle::platform::CPUPlace, double>);
|
||||
REGISTER_OP_CPU_KERNEL(lstm_grad,
|
||||
ops::LSTMGradKernel<paddle::platform::CPUPlace, float>,
|
||||
ops::LSTMGradKernel<paddle::platform::CPUPlace, double>);
|
@ -0,0 +1,23 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
#include "paddle/operators/lstm_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_GPU_KERNEL(lstm, ops::LSTMKernel<paddle::platform::GPUPlace, float>,
|
||||
ops::LSTMKernel<paddle::platform::GPUPlace, double>);
|
||||
REGISTER_OP_GPU_KERNEL(lstm_grad,
|
||||
ops::LSTMGradKernel<paddle::platform::GPUPlace, float>,
|
||||
ops::LSTMGradKernel<paddle::platform::GPUPlace, double>);
|
@ -0,0 +1,139 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/math/lstm_compute.h"
|
||||
#include "paddle/operators/math/math_function.h"
|
||||
#include "paddle/operators/math/sequence2batch.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using framework::LoDTensor;
|
||||
using framework::Tensor;
|
||||
template <typename T, int MajorType = Eigen::RowMajor,
|
||||
typename IndexType = Eigen::DenseIndex>
|
||||
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
|
||||
|
||||
template <typename Place, typename T>
|
||||
class LSTMKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* input = ctx.Input<framework::LoDTensor>("Input");
|
||||
auto* weight = ctx.Input<framework::Tensor>("Weight");
|
||||
auto* bias = ctx.Input<framework::Tensor>("Bias");
|
||||
|
||||
auto* batch_gate = ctx.Output<framework::LoDTensor>("BatchGate");
|
||||
batch_gate->mutable_data<T>(ctx.GetPlace());
|
||||
auto* hidden_out = ctx.Output<framework::LoDTensor>("Hidden");
|
||||
hidden_out->mutable_data<T>(ctx.GetPlace());
|
||||
auto* cell_out = ctx.Output<framework::LoDTensor>("Cell");
|
||||
cell_out->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
// Now the function ShareLoD in InferShape is not implemented.
|
||||
// So copy LoD here.
|
||||
ctx.ShareLoD("Input", "Hidden");
|
||||
ctx.ShareLoD("Input", "Cell");
|
||||
|
||||
bool is_reverse = ctx.Attr<bool>("isReverse");
|
||||
math::LoDTensor2BatchFunctor<Place, T> to_batch;
|
||||
to_batch(ctx.device_context(), *input, *batch_gate, is_reverse);
|
||||
|
||||
auto in_dims = input->dims();
|
||||
int frame_size = static_cast<int>(in_dims[1] / 4);
|
||||
framework::DDim dims({in_dims[0], frame_size});
|
||||
|
||||
if (bias) {
|
||||
Eigen::array<int, 2> extents({{1, 4 * frame_size}});
|
||||
Eigen::array<int, 2> offsets({{0, 0}});
|
||||
auto b = EigenMatrix<T>::From(*bias);
|
||||
auto gate = EigenMatrix<T>::From(*batch_gate);
|
||||
gate.device(ctx.GetEigenDevice<Place>()) =
|
||||
gate +
|
||||
b.slice(offsets, extents)
|
||||
.reshape(Eigen::array<int, 2>({{1, frame_size * 4}}))
|
||||
.broadcast(
|
||||
Eigen::array<int, 2>({{static_cast<int>(in_dims[0]), 1}}));
|
||||
}
|
||||
|
||||
math::LstmMetaValue<T> lstm_value;
|
||||
T* bias_data = const_cast<T*>(bias->data<T>());
|
||||
// the code style in LstmMetaValue will be updated later.
|
||||
lstm_value.checkIg = bias_data + 4 * frame_size;
|
||||
lstm_value.checkFg = lstm_value.checkIg + frame_size;
|
||||
lstm_value.checkOg = lstm_value.checkFg + frame_size;
|
||||
lstm_value.prevStateValue = nullptr;
|
||||
|
||||
framework::LoDTensor batch_out, batch_cell, batch_cell_pre_act;
|
||||
batch_out.mutable_data<T>(dims, ctx.GetPlace());
|
||||
batch_cell.mutable_data<T>(dims, ctx.GetPlace());
|
||||
batch_cell_pre_act.mutable_data<T>(dims, ctx.GetPlace());
|
||||
|
||||
auto batch_starts = batch_gate->lod()[0];
|
||||
size_t num_batch = batch_starts.size() - 1;
|
||||
auto gate_act = ctx.Attr<std::string>("gateActivation");
|
||||
auto cell_act = ctx.Attr<std::string>("cellActivation");
|
||||
auto cand_act = ctx.Attr<std::string>("candidateActivation");
|
||||
|
||||
for (size_t n = 0; n < num_batch; n++) {
|
||||
int bstart = static_cast<int>(batch_starts[n]);
|
||||
int bend = static_cast<int>(batch_starts[n + 1]);
|
||||
|
||||
Tensor gate_t = batch_gate->Slice(bstart, bend);
|
||||
Tensor out_t = batch_out.Slice(bstart, bend);
|
||||
Tensor cell_t = batch_cell.Slice(bstart, bend);
|
||||
Tensor cell_pre_act_t = batch_cell_pre_act.Slice(bstart, bend);
|
||||
|
||||
int cur_batch_size = bend - bstart;
|
||||
|
||||
if (n != 0) {
|
||||
int pre_h_start = static_cast<int>(batch_starts[n - 1]);
|
||||
int pre_h_end = pre_h_start + cur_batch_size;
|
||||
auto pre_hidden_t = batch_out.Slice(pre_h_start, pre_h_end);
|
||||
math::matmul<Place, T>(ctx.device_context(), pre_hidden_t, false,
|
||||
*weight, false, static_cast<T>(1.0), &gate_t,
|
||||
static_cast<T>(1.0));
|
||||
}
|
||||
// else if : FIXME support the initial hidden and cell
|
||||
|
||||
lstm_value.gateValue = gate_t.data<T>();
|
||||
lstm_value.outputValue = out_t.data<T>();
|
||||
lstm_value.stateValue = cell_t.data<T>();
|
||||
lstm_value.stateActiveValue = cell_pre_act_t.data<T>();
|
||||
math::LstmUnitFunctor<Place, T>::compute(ctx.device_context(), lstm_value,
|
||||
frame_size, cur_batch_size,
|
||||
gate_act, cell_act, cand_act);
|
||||
lstm_value.prevStateValue = lstm_value.stateValue;
|
||||
}
|
||||
|
||||
math::Batch2LoDTensorFunctor<Place, T> to_seq;
|
||||
batch_out.set_lod(batch_gate->lod());
|
||||
// restore the output hidden in LoDTensor from the batch hidden
|
||||
to_seq(ctx.device_context(), batch_out, *hidden_out);
|
||||
|
||||
batch_cell.set_lod(batch_gate->lod());
|
||||
// restore the output cell state in LoDTensor from the batch cell
|
||||
to_seq(ctx.device_context(), batch_cell, *cell_out);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Place, typename T>
|
||||
class LSTMGradKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,5 @@
|
||||
if(WITH_AVX)
|
||||
cc_library(activation_functions SRCS hl_cpu_functions.cc hl_avx_functions.cc)
|
||||
else()
|
||||
cc_library(activation_functions SRCS hl_cpu_functions.cc)
|
||||
endif()
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue