Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_add_axis

Adaptive_data_structure_for_SwitchOrderLayer
fengjiayi 8 years ago
commit 86655cb963

@ -434,9 +434,9 @@ lambda_cost
.. autoclass:: paddle.v2.layer.lambda_cost
:noindex:
mse_cost
square_error_cost
--------
.. autoclass:: paddle.v2.layer.mse_cost
.. autoclass:: paddle.v2.layer.square_error_cost
:noindex:
rank_cost

@ -0,0 +1,51 @@
# Design Doc: Computations as Graphs
A primary goal of the refactorization of PaddlePaddle is a more flexible representation of deep learning computation, in particular, a graph of operators and variables, instead of sequences of layers as before.
This document explains that the construction of a graph as three steps:
- construct the forward part
- construct the backward part
- construct the optimization part
Let us take the problem of image classification as a simple example. The application program that trains the model looks like:
```python
x = layer.data("images")
l = layer.data("label")
y = layer.fc(x)
cost = layer.mse(y, l)
optimize(cost)
train(cost, reader=mnist.train())
```
### Forward Part
The first four lines of above program build the forward part of the graph.
![](images/graph_construction_example_forward_only.png)
In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b.
In this example, all operators are created as `OpDesc` protobuf messages, and all variables are `VarDesc`. These protobuf messages are saved in a `BlockDesc` protobuf message.
### Backward Part
The fifth line `optimize(cost)` calls two functions, `ConstructBackwardGraph` and `ConstructOptimizationGraph`.
`ConstructBackwardGraph` traverses the forward graph in the `BlockDesc` protobuf message and builds the backward part.
![](images/graph_construction_example_forward_backward.png)
According to the chain rule of gradient computation, `ConstructBackwardGraph` would
1. create a gradient operator G for each operator F,
1. make all inputs, outputs, and outputs' gradient of F as inputs of G,
1. create gradients for all inputs of F, except for those who don't have gradients, like x and l, and
1. make all these gradients as outputs of G.
### Optimization Part
For each parameter, like W and b created by `layer.fc`, marked as double circles in above graphs, `ConstructOptimizationGraph` creates an optimization operator to apply its gradient. Here results in the complete graph:
![](images/graph_construction_example_all.png)

@ -0,0 +1,59 @@
IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has M (M<=N) instances, each corresponds to a true element in `cond`.
```python
import paddle as pd
x = var()
y = var()
cond = var()
b = pd.create_ifop(inputs=[x], output_num=1)
with b.true_block():
x = b.inputs(0)
z = operator.add(x, y)
b.set_output(0, operator.softmax(z))
out = b(cond)
```
If we want the output still has N instances, we can use IfElseOp with a default value, whose minibatch size must be N:
```python
import paddle as pd
x = var()
y = var()
cond = var()
default_value = var()
b = pd.create_ifelseop(inputs=[x], output_num=1)
with b.true_block():
x = b.inputs(0)
z = operator.add(x, y)
b.set_output(0, operator.softmax(z))
with b.false_block():
x = b.inputs(0)
z = layer.fc(x)
b.set_output(0, operator.softmax(z))
out = b(cond)
```
If only true_block is set in an IfElseOp, we can have a default value for false as:
```python
import paddle as pd
x = var()
y = var()
cond = var()
default_value = var()
b = pd.create_ifelseop(inputs=[x], output_num=1, default_value)
with b.true_block():
x = b.inputs(0)
z = operator.add(x, y)
b.set_output(0, operator.softmax(z))
out = b(cond)
```
where default_value is a list of vars for `cond` == False.

@ -0,0 +1,11 @@
cat ./graph_construction_example.dot | \
sed 's/color=red/color=red, style=invis/g' | \
sed 's/color=green/color=green, style=invis/g' | \
dot -Tpng > graph_construction_example_forward_only.png
cat ./graph_construction_example.dot | \
sed 's/color=green/color=green, style=invis/g' | \
dot -Tpng > graph_construction_example_forward_backward.png
cat ./graph_construction_example.dot | \
dot -Tpng > graph_construction_example_all.png

@ -0,0 +1,65 @@
digraph ImageClassificationGraph {
///////// The forward part /////////
FeedX [label="Feed", color=blue, shape=box];
FeedY [label="Feed", color=blue, shape=box];
FC [label="FC", color=blue, shape=box];
MSE [label="MSE", color=blue, shape=box];
x [label="x", color=blue, shape=oval];
l [label="l", color=blue, shape=oval];
y [label="y", color=blue, shape=oval];
W [label="W", color=blue, shape=doublecircle];
b [label="b", color=blue, shape=doublecircle];
cost [label="cost", color=blue, shape=oval];
FeedX -> x -> FC -> y -> MSE -> cost [color=blue];
FeedY -> l [color=blue];
W -> FC [color=blue];
b -> FC [color=blue];
l -> MSE [color=blue];
////////// The backward part /////////
MSE_Grad [label="MSE_grad", color=red, shape=box];
FC_Grad [label="FC_grad", color=red, shape=box];
d_cost [label="d cost", color=red, shape=oval];
d_y [label="d y", color=red, shape=oval];
d_b [label="d b", color=red, shape=oval];
d_W [label="d W", color=red, shape=oval];
cost -> MSE_Grad [color=red];
d_cost -> MSE_Grad [color=red];
x -> MSE_Grad [color=red];
l -> MSE_Grad [color=red];
y -> MSE_Grad -> d_y [color=red];
x -> FC_Grad [color=red];
y -> FC_Grad [color=red];
d_y -> FC_Grad [color=red];
W -> FC_Grad -> d_W [color=red];
b -> FC_Grad -> d_b [color=red];
////////// The optimizaiton part //////////
OPT_W [label="SGD", color=green, shape=box];
OPT_b [label="SGD", color=green, shape=box];
W -> OPT_W [color=green];
b -> OPT_b [color=green];
d_W -> OPT_W -> W [color=green];
d_b -> OPT_b -> b [color=green];
////////// Groupings //////////
subgraph clusterMSE {
style=invis;
MSE;
MSE_Grad;
}
subgraph clusterFC {
style=invis;
FC;
FC_Grad;
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

@ -55,7 +55,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍
# 线性计算网络层: ȳ = wx + b
ȳ = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
# 计算误差函数,即 ȳ 和真实 y 之间的距离
cost = mse_cost(input= ȳ, label=y)
cost = square_error_cost(input= ȳ, label=y)
outputs(cost)
@ -69,7 +69,7 @@ PaddlePaddle是源于百度的一个深度学习平台。这份简短的介绍
- **数据层**:数据层 `data_layer` 是神经网络的入口,它读入数据并将它们传输到接下来的网络层。这里数据层有两个,分别对应于变量 `x``y`
- **全连接层**:全连接层 `fc_layer` 是基础的计算单元这里利用它建模变量之间的线性关系。计算单元是神经网络的核心PaddlePaddle支持大量的计算单元和任意深度的网络连接从而可以拟合任意的函数来学习复杂的数据关系。
- **回归误差代价层**:回归误差代价层 `mse_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。
- **回归误差代价层**:回归误差代价层 `square_error_cost` 是众多误差代价函数层的一种,它们在训练过程作为网络的出口,用来计算模型的误差,是模型参数优化的目标函数。
定义了网络结构并保存为 `trainer_config.py` 之后,运行以下训练命令:

@ -49,7 +49,7 @@ To recover this relationship between ``X`` and ``Y``, we use a neural network wi
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
cost = mse_cost(input=y_predict, label=y)
cost = square_error_cost(input=y_predict, label=y)
outputs(cost)
Some of the most fundamental usages of PaddlePaddle are demonstrated:

@ -8,7 +8,7 @@ paddle.init(use_gpu=False)
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(2))
y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear())
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
cost = paddle.layer.mse_cost(input=y_predict, label=y)
cost = paddle.layer.square_error_cost(input=y_predict, label=y)
# create parameters
parameters = paddle.parameters.create(cost)

@ -81,9 +81,9 @@ PaddlePaddle支持不同类型的输入数据主要包括四种类型
.. code-block:: bash
y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear())
cost = paddle.layer.mse_cost(input=y_predict, label=y)
cost = paddle.layer.square_error_cost(input=y_predict, label=y)
其中x与y为之前描述的输入层而y_predict是接收x作为输入接上一个全连接层cost接收y_predict与y作为输入接上方误差层。
其中x与y为之前描述的输入层而y_predict是接收x作为输入接上一个全连接层cost接收y_predict与y作为输入接上方误差层。
最后一层cost中记录了神经网络的所有拓扑结构通过组合不同的layer我们即可完成神经网络的搭建。
@ -147,4 +147,4 @@ PaddlePaddle支持不同类型的输入数据主要包括四种类型
.. literalinclude:: src/train.py
:linenos:
有关线性回归的实际应用可以参考PaddlePaddle book的 `第一章节 <http://book.paddlepaddle.org/index.html>`_
有关线性回归的实际应用可以参考PaddlePaddle book的 `第一章节 <http://book.paddlepaddle.org/index.html>`_

@ -178,13 +178,13 @@ class MulKernel : public framework::OpKernel {
```c++
namespace ops = paddle::operators;
REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, ops::MulOpGrad);
REGISTER_OP(mul, ops::MulOp, ops::MulOpMaker, mul_grad, ops::MulOpGrad);
REGISTER_OP_CPU_KERNEL(mul, ops::MulKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(mul_grad,
ops::MulGradKernel<paddle::platform::CPUPlace, float>);
```
- `REGISTER_OP` 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`并且注册`ops::MulOpGrad`为其反向Op。
- `REGISTER_OP` 注册`ops::MulOp`类,类型名为`mul`,该类的`ProtoMaker`为`ops::MulOpMaker`注册`ops::MulOpGrad`,类型名为`mul_grad`
- `REGISTER_OP_WITHOUT_GRADIENT` 用于注册没有反向的Op。
- `REGISTER_OP_CPU_KERNEL` :注册`ops::MulKernel`类,并特化模板参数为`paddle::platform::CPUPlace`和`float`类型,同理,注册`ops::MulKernel`类。

@ -213,7 +213,7 @@ I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions
I1116 09:10:17.123764 50 Util.cpp:143] Call runInitFunctions done.
[WARNING 2016-11-16 09:10:17,227 default_decorators.py:40] please use keyword arguments in paddle config.
[INFO 2016-11-16 09:10:17,239 networks.py:1282] The input order is [movie_id, title, genres, user_id, gender, age, occupation, rating]
[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__mse_cost_0__]
[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__square_error_cost_0__]
I1116 09:10:17.392917 50 Trainer.cpp:170] trainer mode: Normal
I1116 09:10:17.613910 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process
I1116 09:10:17.680917 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process

@ -18,7 +18,7 @@ A backward network is built up with several backward operators. Backward operato
For example, we have got a `mul_op`, and we can register it's information and corresponding backward operator by the following macro:
```cpp
REGISTER_OP(mul, MulOp, MulOpMaker, MulOpGrad);
REGISTER_OP(mul, MulOp, MulOpMaker, mul_grad, MulOpGrad);
```
`mul` is the operator's type. `MulOp` and `MulOpMaker` are the operator class and the operator maker class respectively.

@ -127,8 +127,8 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker {
public:
FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("x", "x");
AddOutput("out", "out");
AddInput("Src", "x");
AddOutput("Dst", "out");
AddComment("");
}
};
@ -138,7 +138,7 @@ class AddOpMaker : public OpProtoAndCheckerMaker {
AddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "x").AsDuplicable();
AddOutput("Y", "y");
AddOutput("Out", "out");
AddComment("");
}
};
@ -148,14 +148,16 @@ class AddOpMaker : public OpProtoAndCheckerMaker {
namespace f = paddle::framework;
namespace ops = paddle::operators;
using EnforceNotMet = paddle::platform::EnforceNotMet;
REGISTER_OP(rowwise_add, f::NOP, f::RowWiseAddOpMaker, f::NOP);
REGISTER_OP(mul, f::NOP, f::MulOpMaker, f::NOP);
REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, f::NOP);
REGISTER_OP(rowwise_add, f::NOP, f::RowWiseAddOpMaker, rowwise_add_grad,
f::NOP);
REGISTER_OP(mul, f::NOP, f::MulOpMaker, mul_grad, f::NOP);
REGISTER_OP(sigmoid, f::NOP, f::SigmoidOpMaker, sigmoid_grad, f::NOP);
REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NOP, f::NoGradOpMaker);
REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NOP, f::FillZeroOpMaker);
REGISTER_OP(add, f::NOP, f::AddOpMaker, f::NOP);
REGISTER_OP(add, f::NOP, f::AddOpMaker, add_grad, f::NOP);
REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, f::NOP);
REGISTER_OP(many_output_op, f::NOP, f::ManyOutputOpMaker, many_output_op_grad,
f::NOP);
TEST(Backward, simple_op_grad) {
auto fwd = f::OpRegistry::CreateOp(

@ -54,8 +54,8 @@ TEST(GradOpBuilder, AddTwo) {
EXPECT_EQ(grad_add_op->Output(f::GradVarName("Y")), f::GradVarName("y"));
}
REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker, f::NOP);
REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker, f::NOP);
REGISTER_OP(mult_io, f::NOP, f::MutiInOutOpMaker, mult_io_grad, f::NOP);
REGISTER_OP(io_ignored, f::NOP, f::IOIgnoredOpMaker, io_ignored_grad, f::NOP);
TEST(GradOpBuilder, MutiInOut) {
std::shared_ptr<f::OperatorBase> test_op(f::OpRegistry::CreateOp(

@ -80,9 +80,19 @@ class OpInfoMap {
}
const OpInfo& Get(const std::string& type) const {
auto op_info_ptr = GetNullable(type);
PADDLE_ENFORCE_NOT_NULL(op_info_ptr, "Operator %s has not been registered",
type);
return *op_info_ptr;
}
const OpInfo* GetNullable(const std::string& type) const {
auto it = map_.find(type);
PADDLE_ENFORCE(it != map_.end(), "Operator %s are not found", type);
return it->second;
if (it == map_.end()) {
return nullptr;
} else {
return &it->second;
}
}
template <typename Callback>

@ -33,7 +33,8 @@ namespace framework {
class OpRegistry {
public:
template <typename OpType, typename ProtoMakerType, typename GradOpType>
static void RegisterOp(const std::string& op_type) {
static void RegisterOp(const std::string& op_type,
const std::string& grad_op_type) {
PADDLE_ENFORCE(!OpInfoMap::Instance().Has(op_type),
"'%s' is registered more than once.", op_type);
OpInfo op_info;
@ -42,9 +43,9 @@ class OpRegistry {
const VariableNameMap& outputs, const AttributeMap& attrs) {
return new OpType(type, inputs, outputs, attrs);
};
op_info.grad_op_type_ = grad_op_type;
if (std::type_index(typeid(ProtoMakerType)) !=
std::type_index(typeid(NOPMaker))) {
op_info.grad_op_type_ = op_type + "_grad";
op_info.proto_ = new OpProto;
op_info.checker_ = new OpAttrChecker;
auto maker = ProtoMakerType(op_info.proto_, op_info.checker_);
@ -54,14 +55,15 @@ class OpRegistry {
op_info.proto_->IsInitialized(),
"Fail to initialize %s's OpProto, because %s is not initialized",
op_type, op_info.proto_->InitializationErrorString());
// register gradient op
RegisterOp<GradOpType, NOPMaker, NOP>(op_info.grad_op_type_);
} else {
op_info.grad_op_type_ = "";
op_info.proto_ = nullptr;
op_info.checker_ = nullptr;
}
OpInfoMap::Instance().Insert(op_type, op_info);
// register gradient op
if (!grad_op_type.empty()) {
RegisterOp<GradOpType, NOPMaker, NOP>(grad_op_type, "");
}
}
static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
@ -90,8 +92,10 @@ class Registrar {
template <typename OpType, typename ProtoMakerType, typename GradOpType>
class OpRegistrar : public Registrar {
public:
explicit OpRegistrar(const char* op_type) {
OpRegistry::RegisterOp<OpType, ProtoMakerType, GradOpType>(op_type);
explicit OpRegistrar(const char* op_type) { OpRegistrar(op_type, ""); }
OpRegistrar(const char* op_type, const char* grad_op_type) {
OpRegistry::RegisterOp<OpType, ProtoMakerType, GradOpType>(op_type,
grad_op_type);
}
};
@ -117,7 +121,8 @@ class OpKernelRegistrar : public Registrar {
/**
* Macro to register Operator.
*/
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_class) \
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_type, "REGISTER_OP must be called in global namespace"); \
class _OpClass_##op_type##_ : public op_class { \
@ -132,14 +137,14 @@ class OpKernelRegistrar : public Registrar {
}; \
static ::paddle::framework::OpRegistrar< \
_OpClass_##op_type##_, op_maker_class, _OpGradClass_##op_type##_> \
__op_registrar_##op_type##__(#op_type); \
__op_registrar_##op_type##__(#op_type, #grad_op_type); \
int TouchOpRegistrar_##op_type() { \
__op_registrar_##op_type##__.Touch(); \
return 0; \
}
#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \
REGISTER_OP(op_type, op_class, op_maker_class, ::paddle::framework::NOP)
REGISTER_OP(op_type, op_class, op_maker_class, , ::paddle::framework::NOP)
/**
* Macro to register OperatorKernel.

@ -33,12 +33,12 @@ ExecutionContext::GetEigenDevice<platform::GPUPlace, Eigen::GpuDevice>() const {
}
#endif
const std::string& OperatorBase::Input(const std::string& name) const {
std::string OperatorBase::Input(const std::string& name) const {
auto& ins = Inputs(name);
PADDLE_ENFORCE_EQ(ins.size(), 1UL,
PADDLE_ENFORCE_LE(ins.size(), 1UL,
"Op %s input %s should contain only one variable", type_,
name);
return ins[0];
return ins.empty() ? kEmptyVarName : ins[0];
}
const std::vector<std::string>& OperatorBase::Inputs(
@ -49,12 +49,12 @@ const std::vector<std::string>& OperatorBase::Inputs(
return it->second;
}
const std::string& OperatorBase::Output(const std::string& name) const {
std::string OperatorBase::Output(const std::string& name) const {
auto& outs = Outputs(name);
PADDLE_ENFORCE_EQ(outs.size(), 1UL,
PADDLE_ENFORCE_LE(outs.size(), 1UL,
"Op %s output %s should contain only one variable", type_,
name);
return outs[0];
return outs.empty() ? kEmptyVarName : outs[0];
}
const std::vector<std::string>& OperatorBase::Outputs(
@ -119,16 +119,8 @@ OperatorBase::OperatorBase(const std::string& type,
const VariableNameMap& outputs,
const AttributeMap& attrs)
: type_(type), inputs_(inputs), outputs_(outputs), attrs_(attrs) {
static std::atomic<size_t> gUniqId(0UL);
for (auto& output : outputs_) {
for (auto& output_name : output.second) {
if (output_name == kTempVarName) {
output_name += type_;
output_name += "@";
output_name += std::to_string(gUniqId.fetch_add(1));
}
}
}
GenerateTemporaryNames();
CheckAllInputOutputSet();
}
std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
@ -156,6 +148,35 @@ std::vector<std::string> OperatorBase::OutputVars(bool has_intermediate) const {
return ret_val;
}
void OperatorBase::CheckAllInputOutputSet() const {
auto& info_map = OpInfoMap::Instance();
auto* op_info = info_map.GetNullable(Type());
if (op_info == nullptr || op_info->proto_ == nullptr) return;
for (auto& in : op_info->Proto().inputs()) {
PADDLE_ENFORCE(inputs_.find(in.name()) != inputs_.end(),
"Type %s's input %s is not set", Type(), in.name());
}
for (auto& out : op_info->Proto().outputs()) {
PADDLE_ENFORCE(outputs_.find(out.name()) != outputs_.end(),
"Type %s's output %s is not set", Type(), out.name());
}
}
void OperatorBase::GenerateTemporaryNames() {
static std::atomic<size_t> gUniqId(0UL);
for (auto& output : outputs_) {
for (auto& output_name : output.second) {
if (output_name == kTempVarName) {
output_name += type_;
output_name += "@";
output_name += std::to_string(gUniqId.fetch_add(1));
}
}
}
}
void OpProtoAndCheckerMaker::Validate() {
validated_ = true;
CheckNoDuplicatedInOutAttrs();

@ -95,12 +95,12 @@ class OperatorBase {
const VariableNameMap& Inputs() const { return inputs_; }
const VariableNameMap& Outputs() const { return outputs_; }
//! Get a input with argument's name described in `op_proto`
const std::string& Input(const std::string& name) const;
std::string Input(const std::string& name) const;
//! Get a input which has multiple variables.
const std::vector<std::string>& Inputs(const std::string& name) const;
//! Get a output with argument's name described in `op_proto`
const std::string& Output(const std::string& name) const;
std::string Output(const std::string& name) const;
//! Get an output which has multiple variables.
//! TODO add a vector_view to prevent memory copy.
const std::vector<std::string>& Outputs(const std::string& name) const;
@ -127,6 +127,10 @@ class OperatorBase {
// IG (Inputs Gradients)
VariableNameMap outputs_;
AttributeMap attrs_;
private:
void GenerateTemporaryNames();
void CheckAllInputOutputSet() const;
};
// Macro for define a clone method.
@ -229,6 +233,15 @@ class InferShapeContext {
InferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(op), scope_(scope) {}
const OperatorBase& op() const { return op_; }
const Scope& scope() const { return scope_; }
template <typename T>
inline const T& GetAttr(const std::string& name) const {
return op_.GetAttr<T>(name);
}
size_t InputSize(const std::string& name) const {
return op_.Inputs(name).size();
}
@ -238,11 +251,13 @@ class InferShapeContext {
}
const Variable* InputVar(const std::string& name) const {
return scope_.FindVar(op_.Input(name));
auto ipt = op_.Input(name);
return ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
}
Variable* OutputVar(const std::string& name) const {
return scope_.FindVar(op_.Output(name));
auto opt = op_.Output(name);
return opt == kEmptyVarName ? nullptr : scope_.FindVar(opt);
}
const std::vector<const Variable*> MultiInputVar(
@ -250,9 +265,11 @@ class InferShapeContext {
auto names = op_.Inputs(name);
std::vector<const Variable*> res;
res.reserve(names.size());
std::transform(
names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) { return scope_.FindVar(name); });
std::transform(names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) {
return name == kEmptyVarName ? nullptr
: scope_.FindVar(name);
});
return res;
}
@ -260,24 +277,24 @@ class InferShapeContext {
auto names = op_.Outputs(name);
std::vector<const Variable*> res;
res.reserve(names.size());
std::transform(
names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) { return scope_.FindVar(name); });
std::transform(names.begin(), names.end(), std::back_inserter(res),
[this](const std::string& name) {
return name == kEmptyVarName ? nullptr
: scope_.FindVar(name);
});
return res;
}
template <typename T>
const T* Input(const std::string& name) const {
auto* var = InputVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name);
return &var->Get<T>();
return var == nullptr ? nullptr : &var->Get<T>();
}
template <typename T>
T* Output(const std::string& name) const {
auto var = OutputVar(name);
PADDLE_ENFORCE_NOT_NULL(var, "Output(%s) should not be nullptr", name);
return var->GetMutable<T>();
return var == nullptr ? nullptr : var->GetMutable<T>();
}
template <typename T>
@ -288,10 +305,7 @@ class InferShapeContext {
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE_NOT_NULL(
var, "MultiInput(%s:%s) should not be nullptr", name,
sub_name);
return &var->Get<T>();
return var == nullptr ? nullptr : &var->Get<T>();
});
return res;
}
@ -304,14 +318,12 @@ class InferShapeContext {
std::transform(names.begin(), names.end(), std::back_inserter(res),
[&](const std::string& sub_name) {
auto var = scope_.FindVar(sub_name);
PADDLE_ENFORCE_NOT_NULL(
var, "MultiOutput(%s:%s) should not be nullptr.", name,
sub_name);
return var->GetMutable<T>();
return var == nullptr ? nullptr : var->GetMutable<T>();
});
return res;
}
private:
const OperatorBase& op_;
const Scope& scope_;
};

@ -122,10 +122,10 @@ class CPUKernelTest : public OpKernel {
public:
void Compute(const ExecutionContext& ctx) const {
std::cout << "this is cpu kernel" << std::endl;
std::cout << ctx.op_.DebugString() << std::endl;
std::cout << ctx.op().DebugString() << std::endl;
cpu_kernel_run_num++;
ASSERT_EQ(ctx.op_.Input("x"), "IN1");
ASSERT_EQ(ctx.op_.Output("y"), "OUT1");
ASSERT_EQ(ctx.op().Input("x"), "IN1");
ASSERT_EQ(ctx.op().Output("y"), "OUT1");
}
};
@ -148,7 +148,7 @@ class OpKernelTestMultiInputsProtoAndCheckerMaker
class CPUKernalMultiInputsTest : public OpKernel {
public:
void Compute(const ExecutionContext& ctx) const {
auto xs = ctx.op_.Inputs("xs");
auto xs = ctx.op().Inputs("xs");
ASSERT_EQ(xs.size(), 3UL);
ASSERT_EQ(xs[0], "x0");
ASSERT_EQ(xs[1], "x1");
@ -172,10 +172,10 @@ class CPUKernalMultiInputsTest : public OpKernel {
auto outTensor0 = ctx.MultiOutput<Tensor>("ys");
ASSERT_EQ(outTensor0.size(), 2U);
auto k = ctx.op_.Input("k");
auto k = ctx.op().Input("k");
ASSERT_EQ(k, "k0");
auto ys = ctx.op_.Outputs("ys");
auto ys = ctx.op().Outputs("ys");
ASSERT_EQ(ys.size(), 2UL);
ASSERT_EQ(ys[0], "y0");
ASSERT_EQ(ys[1], "y1");

@ -14,18 +14,20 @@ limitations under the License. */
#include "Evaluator.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "paddle/utils/StringUtil.h"
namespace paddle {
/**
* calculate sequence-to-sequence edit distance
*/
class CTCErrorEvaluator : public NotGetableEvaluator {
class CTCErrorEvaluator : public Evaluator {
private:
MatrixPtr outActivations_;
int numTimes_, numClasses_, numSequences_, blank_;
real deletions_, insertions_, substitutions_;
int seqClassficationError_;
mutable std::unordered_map<std::string, real> evalResults_;
std::vector<int> path2String(const std::vector<int>& path) {
std::vector<int> str;
@ -183,6 +185,18 @@ private:
return stringAlignment(gtStr, recogStr);
}
void storeLocalValues() const {
evalResults_["error"] = numSequences_ ? totalScore_ / numSequences_ : 0;
evalResults_["deletion_error"] =
numSequences_ ? deletions_ / numSequences_ : 0;
evalResults_["insertion_error"] =
numSequences_ ? insertions_ / numSequences_ : 0;
evalResults_["substitution_error"] =
numSequences_ ? substitutions_ / numSequences_ : 0;
evalResults_["sequence_error"] =
(real)seqClassficationError_ / numSequences_;
}
public:
CTCErrorEvaluator()
: numTimes_(0),
@ -245,16 +259,12 @@ public:
}
virtual void printStats(std::ostream& os) const {
os << config_.name() << "="
<< (numSequences_ ? totalScore_ / numSequences_ : 0);
os << " deletions error"
<< "=" << (numSequences_ ? deletions_ / numSequences_ : 0);
os << " insertions error"
<< "=" << (numSequences_ ? insertions_ / numSequences_ : 0);
os << " substitutions error"
<< "=" << (numSequences_ ? substitutions_ / numSequences_ : 0);
os << " sequences error"
<< "=" << (real)seqClassficationError_ / numSequences_;
storeLocalValues();
os << config_.name() << " error = " << evalResults_["error"];
os << " deletions error = " << evalResults_["deletion_error"];
os << " insertions error = " << evalResults_["insertion_error"];
os << " substitution error = " << evalResults_["substitution_error"];
os << " sequence error = " << evalResults_["sequence_error"];
}
virtual void distributeEval(ParameterClient2* client) {
@ -272,6 +282,37 @@ public:
seqClassficationError_ = (int)buf[4];
numSequences_ = (int)buf[5];
}
void getNames(std::vector<std::string>* names) {
storeLocalValues();
names->reserve(names->size() + evalResults_.size());
for (auto it = evalResults_.begin(); it != evalResults_.end(); ++it) {
names->push_back(config_.name() + "." + it->first);
}
}
real getValue(const std::string& name, Error* err) const {
storeLocalValues();
std::vector<std::string> buffers;
paddle::str::split(name, '.', &buffers);
auto it = evalResults_.find(buffers[buffers.size() - 1]);
if (it == evalResults_.end()) {
*err = Error("Evaluator does not have the key %s", name.c_str());
return 0.0f;
}
return it->second;
}
std::string getType(const std::string& name, Error* err) const {
this->getValue(name, err);
if (!err->isOK()) {
return "";
}
return "ctc_edit_distance";
}
};
REGISTER_EVALUATOR(ctc_edit_distance, CTCErrorEvaluator);

@ -268,7 +268,13 @@ public:
}
// get type of evaluator
std::string getTypeImpl() const { return "chunk"; }
std::string getType(const std::string& name, Error* err) const {
this->getValue(name, err);
if (!err->isOK()) {
return "";
}
return "chunk";
}
private:
void storeLocalValues() const {

@ -211,6 +211,7 @@ public:
*err = Error("Not implemented");
return .0f;
}
std::string getType(const std::string& name, Error* err) const {
*err = Error("Not implemented");
return "";
@ -331,6 +332,7 @@ private:
protected:
std::string getTypeImpl() const;
};
/**
* @brief precision, recall and f1 score Evaluator
* \f[
@ -358,6 +360,12 @@ public:
virtual void distributeEval(ParameterClient2* client);
void getNames(std::vector<std::string>* names);
real getValue(const std::string& name, Error* err) const;
std::string getType(const std::string& name, Error* err) const;
struct StatsInfo {
/// numbers of true positives
double TP;
@ -428,11 +436,6 @@ private:
mutable std::unordered_map<std::string, real> values_;
void storeLocalValues() const;
// Evaluator interface
public:
void getNames(std::vector<std::string>* names);
real getValue(const std::string& name, Error* err) const;
std::string getType(const std::string& name, Error* err) const;
};
/*

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save