Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into add-GRUOp-dev

mobile_baidu
guosheng 7 years ago
commit c4f7f3a562

@ -45,8 +45,9 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table)
cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)

@ -21,6 +21,7 @@ limitations under the License. */
#include <vector>
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h"
@ -70,10 +71,12 @@ static void CreateTensor(Variable* var, VarDesc::VarType var_type) {
var->GetMutable<FeedFetchList>();
} else if (var_type == VarDesc::STEP_SCOPES) {
var->GetMutable<std::vector<framework::Scope>>();
} else if (var_type == VarDesc::LOD_RANK_TABLE) {
var->GetMutable<LoDRankTable>();
} else {
PADDLE_THROW(
"Variable type %d is not in "
"[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST]",
"[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST, LOD_RANK_TABLE]",
var_type);
}
}

@ -116,6 +116,7 @@ message VarDesc {
FEED_MINIBATCH = 3;
FETCH_LIST = 4;
STEP_SCOPES = 5;
LOD_RANK_TABLE = 6;
}
required string name = 1;
required VarType type = 2;

@ -0,0 +1,43 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/lod_rank_table.h"
namespace paddle {
namespace framework {
void LoDRankTable::Reset(const LoD& lod, size_t level) {
this->coarse_lod_.clear();
this->items_.clear();
PADDLE_ENFORCE(level < lod.size(),
"Cannot rank lod since the level %d is less than lod size %d",
level, lod.size());
coarse_lod_.reserve(level);
for (size_t i = 0; i < level; ++i) {
coarse_lod_.push_back(lod[i]);
}
auto& vec = lod[level];
for (size_t i = 0; i < vec.size() - 1; ++i) {
TableItem item;
item.index = i;
item.length = vec[i + 1] - vec[i];
items_.emplace_back(item);
}
std::sort(items_.begin(), items_.end(),
[](const TableItem& a, const TableItem& b) {
return a.length > b.length;
});
}
} // namespace framework
} // namespace paddle

@ -0,0 +1,55 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor.h"
namespace paddle {
namespace framework {
// LoD Rank Table stores the `level` of `lod` which is ordered by sequence
// length in descending order. It is useful when implement dynamic RNN and is
// shared by dynamic RNN memory, dynamic RNN slice input and dynamic RNN slice
// output operators.
//
// The table item contains two element. The length of sequence and the index of
// sequence in that level.
//
// LoDRankTable also stores the coarse_lod, which is the lod information whose
// level is less than input level, in order to restore the output LoD
// information.
class LoDRankTable {
public:
struct TableItem {
size_t index;
size_t length;
};
LoDRankTable() {}
void Reset(const LoD& lod, size_t level);
const std::vector<TableItem>& items() const { return this->items_; }
const LoD& coarse_lod() const { return this->coarse_lod_; }
size_t level() const { return coarse_lod_.size(); }
private:
LoD coarse_lod_;
std::vector<TableItem> items_;
};
} // namespace framework
} // namespace paddle

@ -408,7 +408,6 @@ class OperatorWithKernel : public OperatorBase {
// indicate kernel DataType by input data. Defaultly all input data must be
// same.
virtual DataType IndicateDataType(const ExecutionContext& ctx) const {
VLOG(3) << "Default IndicateDataType " << this->Type();
auto& scope = ctx.scope();
int data_type = -1;
for (auto& input : this->inputs_) {
@ -425,7 +424,6 @@ class OperatorWithKernel : public OperatorBase {
}
if (t != nullptr) {
int tmp = static_cast<int>(ToDataType(t->type()));
VLOG(3) << "Input " << ipt_name << " with data_type " << tmp;
PADDLE_ENFORCE(tmp == data_type || data_type == -1,
"DataType of Paddle Op %s must be the same.",
Type());

@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "glog/logging.h"
#include "paddle/framework/framework.pb.h"
namespace paddle {

@ -141,6 +141,7 @@ set(DEPS_OPS
pool_with_index_op
nccl_op
sequence_conv_op
lod_rank_table_op
lstm_op
gru_op)
@ -150,6 +151,7 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(sum_op DEPS net_op selected_rows_functor)
op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling)
op_library(lod_rank_table_op SRCS lod_rank_table_op.cc DEPS lod_rank_table)
if(WITH_GPU)
op_library(nccl_op DEPS nccl_common)
endif()

@ -33,7 +33,7 @@ class AccuracyOp : public framework::OperatorWithKernel {
auto inference_dim = ctx->GetInputDim("Out");
auto label_dim = ctx->GetInputDim("Label");
// Assume indices has same shape with infernece, because
// Assume indices has same shape as inference, because
// it's the output of topk.
PADDLE_ENFORCE_EQ(label_dim.size(), 2, "label's rank must be 2.");
@ -60,20 +60,24 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
// TODO(typhoonzero): support both inference value and indices.
AddInput("Out", "topk (inferences) the network output");
AddInput("Indices", "topk (indices) the network output");
AddInput("Out", "The network output of topk (inferences)");
AddInput("Indices", "The the network output of topk (indices)");
AddInput("Label", "Label of the training data");
// TODO(typhoonzero): AddInput("Weight", ...
AddOutput("Accuracy", "The accuracy of current batch");
AddComment(R"DOC(
Accuracy. It will print accuracy rate for classification.
The accuracy is:
.. math::
accuracy = \\frac{NumOfCorrectPredicts}{NumOfAllSamples})
Accuracy Operator.
It will print accuracy rate for classification.
The accuracy is calculated as follows:
$$accuracy = \frac{NumOfCorrectPredicts}{NumOfAllSamples}$$
Both the input Out and Label can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD information
with the input Out(Inference).
Both the input `Out` and `Label` can carry the LoD (Level of Details)
information, or not. But the output only shares the LoD with input `Inference`.
)DOC");
}
};

@ -51,6 +51,10 @@ class BatchNormOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("SavedMean"), "");
PADDLE_ENFORCE(ctx->HasOutput("SavedVariance"), "");
const float epsilon = ctx->Attrs().Get<float>("epsilon");
PADDLE_ENFORCE_GE(epsilon, 0.0, "epsilon should be larger than 0");
PADDLE_ENFORCE_LE(epsilon, 0.001, "epsilon should not be too large");
// make sure Mean/MeanOut and Variance/VarianceOut share memory in Python
PADDLE_ENFORCE_EQ(ctx->Inputs("Mean")[0], ctx->Outputs("MeanOut")[0],
"Mean and MeanOut should share the same memory");
@ -297,7 +301,6 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
framework::DataType IndicateDataType(
const framework::ExecutionContext &ctx) const override {
VLOG(3) << "IndicateDataType " << this->Type();
const auto *var = ctx.InputVar(framework::GradVarName("Y"));
if (var == nullptr) {
PADDLE_THROW("can't find Y@GRAD");

@ -56,20 +56,24 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
public:
ConcatOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "the input tensors of concat operator.").AsDuplicable();
AddOutput("Out", "the output tensor of concat operator.");
AddComment(R"DOC(
Join the input tensors along with the axis.
Examples:
Input[0] = [[1,2],[3,4]]
Input[1] = [[5,6]]
axis = 0
Output = [[1,2],
[3,4],
[5,6]]
)DOC");
AddAttr<int>("axis", "The axis which the inputs will be joined with.")
AddInput("X", "Input tensors of concat operator.").AsDuplicable();
AddOutput("Out", "Output tensor of concat operator.");
AddAttr<int>("axis",
"The axis along which the input tensors will be concatenated.")
.SetDefault(0);
AddComment(R"DOC(
Concat Operator.
Concatenate the input tensors along dimension axis.
Examples:
Input[0] = [[1,2],[3,4]]
Input[1] = [[5,6]]
axis = 0
Output = [[1,2],
[3,4],
[5,6]]
)DOC");
}
};

@ -216,11 +216,12 @@ class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker {
AddOutput("IndexTensors", "Index Tensors contains indices for true/false");
AddComment(R"DOC(
Sample dependent Cond Operator:
Given Cond[i] as a 1/0 vector to indicate true/false
The equation is:
Out[i] = subnet_t[i], if Cond[i] == true
Out[i] = subnet_t[i], if Cond[i] == false
Sample Dependent Conditional Operator.
Given Cond[i] as a 1/0 vector to indicate true/false:
Out[i] = subnet_true[i], if Cond[i] == true
Out[i] = subnet_false[i], if Cond[i] == false
)DOC");
}
};

@ -56,17 +56,18 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
AddInput(
"Input",
"The input tensor of convolution operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of channels, H and W is the height and width of image.");
"The format of input tensor is NCHW, where N is batch size, C is the "
"number of channels, H is the height of the image, "
"and W is the width of the image.");
AddInput("Filter",
"The filter tensor of convolution operator."
"The filter tensor of convolution operator. "
"The format of the filter tensor is MCHW, where M is the number of "
"output image channels, C is the number of input image channels, "
"H and W is height and width of filter. "
"If the groups attribute is greater than 1, C equal the number of "
"H is the height of the filter, and W is the width of the filter. "
"If the groups attribute is greater than 1, C equals the number of "
"input image channels divided by the groups.");
AddOutput("Output",
"The output tensor of convolution operator."
"The output tensor of convolution operator. "
"The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
.SetDefault({1, 1});
@ -74,16 +75,19 @@ Conv2DOpMaker::Conv2DOpMaker(framework::OpProto* proto,
.SetDefault({0, 0});
AddAttr<int>(
"groups",
"group size of convolution operator. "
"Refer to grouped convolution in Alex Krizhevsky's paper: "
"when group=2, the first half of the filters are only connected to the "
"first half of the input channels, and the second half only connected "
"to the second half.")
"Group size of convolution operator. "
"According to grouped convolution in Alex Krizhevsky's Deep CNN paper: "
"when group=2, the first half of the filters is only connected to the "
"first half of the input channels, while the second half of the filters "
"is only connected to the second half of the input channels.")
.SetDefault(1);
AddComment(R"DOC(
The convolution operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Convolution Operator.
The convolution operation calculates the output based on the input, filter,
strides, paddings, and groups parameters. The size of each dimension of the
parameters is checked in the infer-shape method.
)DOC");
}

@ -54,15 +54,16 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
AddInput(
"Input",
"(Tensor) The input tensor of convolution transpose operator. "
"The format of input tensor is NCHW. Where N is batch size, C is the "
"number of input channels, H and W is the height and width of image.");
"The format of input tensor is NCHW, where N is batch size, C is the "
"number of input channels, H is the height of the image, and "
"W is the width of the image.");
AddInput("Filter",
"(Tensor) The filter tensor of convolution transpose operator."
"The format of the filter tensor is CMHW, where C is the number of "
"output image channels, M is the number of input image channels, "
"H and W is height and width of filter. "
"H is the height of the filter, and W is the width of the filter. "
"We enforce groups number == 1 and padding == 0 in "
"convolution transpose Scenario.");
"the convolution transpose scenario.");
AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator."
"The format of output tensor is also NCHW.");
@ -73,9 +74,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
"paddings of convolution transpose operator.")
.SetDefault({0, 0});
AddComment(R"DOC(
The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape.
Convolution Transpose Operator.
The convolution transpose operation calculates the output based on the input,
filter, strides, paddings, and groups parameters. The size of each dimension
of the parameters is checked in the infer-shape method.
)DOC");
}

@ -29,7 +29,7 @@ class CudnnConvOpMaker : public Conv2DOpMaker {
"workspace is a section of GPU memory which will be "
"allocated/freed each time the operator runs, larger "
"workspace size can increase performance but also requires "
"better hardward. This size should be carefully setted.")
"better hardware. This size should be chosen carefully.")
.SetDefault(4096);
}
};

@ -96,14 +96,13 @@ as used in the Neural Turing Machine: https://arxiv.org/abs/1410.5401
The equation is:
\f[
Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}
\f]
$$Out[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} X_{i+j} * Y_{j}$$
where X's index is computed modulo M, and b's index is computed modulo N.
where X's index is computed modulo M, and Y's index is computed modulo N.
Both inputs X and Y can carry LoD (Level of Details) information.
However, the output only shares the LoD information with input X.
Both of the input `X` and `Y` can carry LoD (Level of Details) information.
However, the output only shares the LoD information with input `X`.
)DOC");
}
};

@ -79,15 +79,16 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Cosine Similarity Operator.
The equation is: Out = X^T * Y / (sqrt(X^T * X) * sqrt(Y^T * Y)).
$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$
The input `X` and `Y` must have the same shape, except that the 1st dimension
of input `Y` could be just 1 (different from input `X`), which will be
broadcasted to match the shape of input `X` before computing their cosine
The input X and Y must have the same shape, except that the 1st dimension
of input Y could be just 1 (different from input X), which will be
broadcasted to match the shape of input X before computing their cosine
similarity.
Both the input `X` and `Y` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
Both the input X and Y can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.
)DOC");
}
};

@ -0,0 +1,136 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/crf_decoding_op.h"
namespace paddle {
namespace operators {
class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
public:
CRFDecodingOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Emission",
"(LoDTensor, default: LoDTensor<float>). A LoDTensor with shape "
"[N x D] where N is the size of the mini-batch and D is the total "
"tag number. This input is the unscaled emission weight matrix of "
"the linear_chain_crf operator.");
AddInput(
"Transition",
"(Tensor, default: Tensor<float>). A Tensor with shape [(D + 2) x D]. "
"This input is the transition weights learned by the linear_chain_crf "
"operator, denoted as w. The 1st row of w are transition weights for "
"the start mask. The 2nd row of w are transition weights for the end "
"mask. Transition weights between other tags begin from the 3rd row of "
"w. See more details in comments of the linear_chain_crf operator.");
AddInput(
"Label",
"(LoDTensor, LoDTensor<int>). The ground truth with shape "
"[N x 1]. This input is optional. See more details in the operator's "
"comments.")
.AsDispensable();
AddOutput("ViterbiPath",
"(LoDTensor, LoDTensor<int>). The decoding results. What to "
"return changes depending on whether the Input(Label) (the groud "
"truth) is given. See more details in the operator's comment.");
AddComment(R"DOC(
The crf_decoding operator reads the emission feature weights and the transition
freature weights learned by the linear_chain_crf operator. It implements the
Viterbi algorithm which is a dynamic programming algorithm for finding the most
likely sequence of hidden states, called the Viterbi path, that results in a
sequence of observed tags.
The output of this operator changes according to whether Input(Label) is given:
1. Input(Label) is given:
This happens in training. This operator is used to co-work with the chunk_eval
operator.
When Input(Label) is given, the crf_decoding operator returns a row vector
with shape [N x 1] whose values are fixed to be 0, indicating an incorrect
prediction, or 1 indicating a tag is correctly predicted. Such an ouput is the
input to chunk_eval operator.
2. Input(Label) is not given:
This is the standard decoding process.
The crf_decoding operator returns a row vecotr with shape [N x 1] whose values
range from 0 to maximum tag number - 1. Each element indicates an index of a
predicted tag.
)DOC");
}
};
class CRFDecodingOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Emission"),
"Input(Emission) should be not null.");
PADDLE_ENFORCE(ctx->HasInput("Transition"),
"Input(Transition) should be not null.");
PADDLE_ENFORCE(ctx->HasOutput("ViterbiPath"),
"Output(ViterbiPath) should be not null.");
auto emission_dims = ctx->GetInputDim("Emission");
PADDLE_ENFORCE_EQ(emission_dims.size(), 2UL,
"The Input(Emission) should be a 2-D tensor.");
PADDLE_ENFORCE(emission_dims[0], "An empty mini-batch is not allowed.");
auto transition_dims = ctx->GetInputDim("Transition");
PADDLE_ENFORCE_EQ(transition_dims.size(), 2UL,
"The Input(Transition) should be a 2-D tensor.");
PADDLE_ENFORCE_EQ(
transition_dims[0] - 2, transition_dims[1],
"An invalid dimension for the Input(Transition), which should "
"be a 2-D tensor with shape [(D + 2) x D].");
PADDLE_ENFORCE_EQ(
emission_dims[1], transition_dims[1],
"The 2nd dimension of the Input(Emission) and the Input(Transition) "
"should be equal to the tag number.");
if (ctx->HasInput("Label")) {
auto label_dims = ctx->GetInputDim("Label");
PADDLE_ENFORCE(label_dims.size() == 2UL && label_dims[1] == 1UL,
"The Input(Label) should be a 2-D tensor with the 2nd "
"dimensions fixed to 1.");
PADDLE_ENFORCE_EQ(
emission_dims[0], label_dims[0],
"The height of Input(Emission) and the height of Input(Label) "
"should be the same.");
}
ctx->ShareLoD("Emission", /*->*/ "ViterbiPath");
ctx->SetOutputDim("ViterbiPath", {emission_dims[0], 1});
}
protected:
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<LoDTensor>("Emission")->type());
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(crf_decoding, ops::CRFDecodingOp,
ops::CRFDecodingOpMaker);
REGISTER_OP_CPU_KERNEL(
crf_decoding, ops::CRFDecodingOpKernel<paddle::platform::CPUPlace, float>,
ops::CRFDecodingOpKernel<paddle::platform::CPUPlace, double>);

@ -0,0 +1,127 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
#include "paddle/operators/math/math_function.h"
namespace paddle {
namespace operators {
using framework::LoDTensor;
using framework::LoD;
using framework::Tensor;
template <typename Place, typename T>
class CRFDecodingOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"The crf_decoding operator can only run on CPU.");
auto* emission_weights = ctx.Input<LoDTensor>("Emission");
auto* transition_weights = ctx.Input<Tensor>("Transition");
auto* label = ctx.Input<LoDTensor>("Label");
auto* decoded_path = ctx.Output<Tensor>("ViterbiPath");
PADDLE_ENFORCE_EQ(emission_weights->NumLevels(), 1UL,
"The Input(Emission) should be a sequence.");
auto lod = emission_weights->lod();
PADDLE_ENFORCE(lod.size(), "Input(Emission) must be a sequence.");
const size_t level = 0;
const size_t seq_num = lod[level].size() - 1;
int* path = decoded_path->mutable_data<int>(platform::CPUPlace());
math::SetConstant<platform::CPUPlace, int>()(ctx.device_context(),
decoded_path, 0);
for (size_t i = 0; i < seq_num; ++i) {
int start_pos = static_cast<int>(lod[level][i]);
int end_pos = static_cast<int>(lod[level][i + 1]);
Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos);
Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights,
&decoded_path_one_seq);
}
if (label) {
PADDLE_ENFORCE_EQ(label->NumLevels(), 1UL,
"The Input(Label) should be a sequence.");
const int* label_value = label->data<int>();
size_t batch_size = emission_weights->dims()[0];
for (size_t i = 0; i < batch_size; ++i) {
path[i] = label_value[i] == path[i] ? 1 : 0;
}
}
}
private:
void Decode(const Tensor& emission_weights, const Tensor& transition_weights,
Tensor* decoded_path) const {
auto emission_dims = emission_weights.dims();
const size_t seq_len = emission_dims[0];
const size_t tag_num = emission_dims[1];
const size_t state_trans_base_idx = 2;
const T* x = emission_weights.data<T>();
const T* w = transition_weights.data<T>();
int* path = decoded_path->data<int>();
// alpha is a memo table. An element alpha(k, v) records the score of the
// best sequence of tags from position 1 to position k with v being the end
// tag.
Tensor alpha;
T* alpha_value = alpha.mutable_data<T>(emission_dims, platform::CPUPlace());
Tensor track;
int* track_value =
track.mutable_data<int>(emission_dims, platform::CPUPlace());
for (size_t i = 0; i < tag_num; ++i) alpha_value[i] = w[i] + x[i];
for (size_t k = 1; k < seq_len; ++k) {
for (size_t i = 0; i < tag_num; ++i) {
T max_score = -std::numeric_limits<T>::max();
int max_j = 0;
for (size_t j = 0; j < tag_num; ++j) {
T score = alpha_value[(k - 1) * tag_num + j] +
w[(j + state_trans_base_idx) * tag_num + i];
if (score > max_score) {
max_score = score;
max_j = j;
}
}
alpha_value[k * tag_num + i] = max_score + x[k * tag_num + i];
track_value[k * tag_num + i] = max_j;
}
}
T max_score = -std::numeric_limits<T>::max();
int max_i = 0;
for (size_t i = 0; i < tag_num; ++i) {
T score = alpha_value[(seq_len - 1) * tag_num + i] + w[tag_num + i];
if (score > max_score) {
max_score = score;
max_i = i;
}
}
path[seq_len - 1] = max_i;
for (int k = seq_len - 1; k >= 1; --k) {
path[k - 1] = max_i = track_value[k * tag_num + max_i];
}
}
};
} // namespace operators
} // namespace paddle

@ -56,34 +56,35 @@ class CropOpMaker : public framework::OpProtoAndCheckerMaker {
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"The input of pad op. "
"The input should be a k-D tensor(k > 0 and k < 7)");
"The input should be a k-D tensor(k > 0 and k < 7).");
AddInput("Y",
"The input used as reference for cropping"
" with the same dimension as X. ")
"The input used as reference for cropping, "
"which is of the same dimensions as X.")
.AsDispensable();
AddOutput("Out",
"The output of crop op "
"with the same dimension as X.");
"The output of crop op, "
"which is of the same dimensions as X.");
AddAttr<std::vector<int>>("offsets",
"A list<int> describing offsets to be cropped."
"The size of offsets list should be as same as "
"dimension size of input X.");
"A list<int> describing offsets to be cropped. "
"The size of offsets list should be the same as "
"the dimension size of input X.");
AddAttr<std::vector<int>>("shape",
"A list<int> describing the shape of output."
"The size of shape list should be as same as "
"dimension size of input X.")
"A list<int> describing the shape of output. "
"The size of shape list should be the same as "
"the dimension size of input X.")
.SetDefault(std::vector<int>());
AddComment(R"DOC(
Crop Operator.
Crop input into output, as specified by offsets and shape.
There are two ways to set shape:
1. referenc input: crop input X as shape as reference input.
1. reference input: crop input X into the same shape as reference input.
The dimension of reference input should
be as same as input X.
2. shape list: crop input X by shape described by a list<int>.
The size of shape list should be as same as
dimension size of input X.
be the same as the dimension of input X.
2. shape list: crop input X into the shape described by a list<int>.
The size of shape list should be the same as
the dimension size of input X.
The input should be a k-D tensor(k > 0 and k < 7). As an example:
@ -91,20 +92,20 @@ Given:
X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]]
[0, 0, 0, 0, 0]],
and
offsets = [0, 1]
offsets = [0, 1],
and
shape = [2, 2]
shape = [2, 2],
then we get
we get:
Out = [[1, 2],
[3, 4]]
[3, 4]].
)DOC");
}

@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
}
protected:
// Explicitly set that data type of the output of the cross_entropy operator
// Explicitly set that the data type of computation kernel of cross_entropy
// is determined by its input "X".
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
@ -96,7 +96,8 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
}
protected:
// CrossEntropy's data type just determined by "X"
// Explicitly set that the data type of computation kernel of cross_entropy
// is determined by its input "X".
framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override {
return framework::ToDataType(ctx.Input<Tensor>("X")->type());
@ -117,9 +118,9 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
"Label",
"(Tensor, default Tensor<int>), the ground truth which is "
"a 2-D tensor. "
"When soft_label is set to false, `Label` is a Tensor<int> with shape "
"When soft_label is set to false, Label is a Tensor<int> with shape "
"[N x 1]. "
"When soft_label is set to true, `Label` is a Tensor<float/double> "
"When soft_label is set to true, Label is a Tensor<float/double> "
"with shape [N x K].");
AddOutput("Y",
"(Tensor, default Tensor<float>), a 2-D tensor "
@ -137,13 +138,13 @@ computation.
1) One-hot cross-entropy:
soft_label = false, Label[i, 0] indicates the class index for sample i:
Y[i] = -log(X[i, Label[i]])
$Y[i] = -\log(X[i, Label[i]])$
2) Soft-label cross-entropy:
soft_label = true, Label[i, j] indicates the soft label of class j
for sample i:
Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}
$Y[i] = \sum_j{-Label[i, j] * log(X[i, j])}$
Please make sure that in this case the summuation of each row of Label
equals one.
@ -153,8 +154,9 @@ computation.
non-zero element (equals 1), soft-label cross-entropy degenerates to a
one-hot cross-entropy with one-hot label representation.
Both the input `X` and `Label` can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD with input `X`.
Both the input X and Label can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input X.
)DOC");
}
};

@ -75,11 +75,18 @@ class DecayedAdagradOpMaker : public framework::OpProtoAndCheckerMaker {
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Decayed Adagrad Optimizer.
Decayed Adagrad
The update is done as follows:
moment_out = decay * moment + (1 - decay) * grad * grad
param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon)
$$
moment\_out = decay * moment + (1 - decay) * grad * grad \\
param\_out = param - \frac{learning\_rate * grad}{\sqrt{moment\_out} + epsilon}
$$
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have an epsilon attribute. It is added here for numerical
stability to avoid the division by zero error.
)DOC");
}

@ -43,22 +43,24 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
DropoutOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
AddAttr<bool>("is_training", "Whether in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddInput("X", "The input of dropout op.");
AddOutput("Out", "The output of dropout op.");
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
AddAttr<float>("dropout_prob", "Probability of setting units to zero.")
.SetDefault(.5f);
AddAttr<bool>("is_training", "True if in training phase.").SetDefault(true);
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
AddComment(R"DOC(
Dropout Operator.
'Dropout' refers to randomly dropping out units in a nerual network. It is a
Dropout refers to randomly dropping out units in a nerual network. It is a
regularization technique for reducing overfitting by preventing neuron
co-adaption during training. The dropout operator randomly set (according to
the given dropout probability) the outputs of some units to zero, while others
being set to their inputs.
are set equal to their corresponding inputs.
)DOC");
}
};

@ -386,12 +386,13 @@ class DynamicRecurrentOpProtoAndCheckerMaker
RNNAlgorithm::kArgNames[RNNAlgorithm::ComputeMode::kForward];
// inputs and outputs stored in proto
AddInput(name.inlinks,
"the inputs that need to be segmented for each step.")
"The inputs that need to be segmented for each step.")
.AsDuplicable();
AddInput(name.initial_states, "variables to initialize states.")
AddInput(name.initial_states, "Variables to initialize the states.")
.AsDuplicable();
AddOutput(name.outlinks, "the outputs that need to concated for all steps.")
AddOutput(name.outlinks,
"The outputs that need to be concatenated for all steps.")
.AsDuplicable();
AddOutput(name.step_scopes, "step scopes");
@ -399,7 +400,12 @@ class DynamicRecurrentOpProtoAndCheckerMaker
AddAttr<std::vector<std::string>>(name.ex_states, "names of ex_states");
AddAttr<std::vector<std::string>>(name.states, "names of states");
AddComment("This is a RNN operator for varience-length sequences.");
AddComment(R"DOC(
Dynamic Recurrent Operator.
This is a RNN operator for varience-length sequences.
)DOC");
}
};

@ -22,7 +22,7 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
ElementwiseAddOpMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: ElementwiseOpMaker(proto, op_checker) {
SetComment("add", "Out = X + Y");
SetComment("Add", "$Out = X + Y$");
AddComment(comment_);
}
};

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save