Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop

revert-4814-Add_sequence_project_op
zchen0211 8 years ago
commit e59ca752fc

@ -0,0 +1,63 @@
# Prune
## Motivation
We want to support running inference, training and checkpointing in one `ProgramDesc`. We implement
`void Prune(const ProgramDesc* input, ProgramDesc* output)` function, which takes a `ProgramDesc`
and generate a pruned `ProgramDesc`.
## Challenge
Pruning need to support both variables and operators being evaluation targets. Consider the following
different situations.
```python
# Case 1: run foward pass.
cost_np = session.run(target=cost)
# Case 2: run backward passing.
opts_np, _ = session.run(target=[cost, opt])
# Case 3: run checkpointing
_ = session.run(target=checkpoint)
```
## Solution
To support evaluation of operators, we add `is_target` field in the `OpDesc`.
```c++
message OpDesc {
required string type = 3;
repeated Var inputs = 1;
repeated Var outputs = 2;
repeated Attr attrs = 4;
optional bool is_target = 5 [ default = false ];
};
```
To support evaluation of variables, we add [fetch_op](https://github.com/PaddlePaddle/Paddle/pull/4599).
For each variable in the `target`, we insert a `fetch_op` into the `ProgramDesc` with `variable` being
`fetch_op`'s input. Then we also set `fetch_op` is a target.
### Algorithm
If an operator needs to be run, it must fall into one of the following cases:
1. It is the target.
2. It is depended by some other ops, meaning its output is some other op's input.
The first case can be checked by `op_desc.is_traget()` . The second case can be implement as
```c++
bool HasDependentVar(const OpDesc& op_desc, const std::set<string>& dependent_vars) {
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
if (dependent_vars.count(argu) != 0) {
return true;
}
}
}
return false;
}
```
Then the whole algorithm can be implemented as the following [code](https://github.com/tonyyang-svail/Paddle/blob/prune_impl/paddle/framework/prune.cc).

@ -177,9 +177,6 @@ REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, grad_op_class)
REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class)
```
### USE Macros
Make sure the registration process is executed and linked.
---
# Registration Process
1. Write an Op class and its gradient Op class, if required.
@ -188,8 +185,6 @@ Make sure the registration process is executed and linked.
1. Call maker class to complete `proto` and `checker`
2. Using the completed `proto` and `checker`, it will add a new key-value pair to the `OpInfoMap`
4. Invoke the `USE` macro in which the Op is used to make sure that it is linked.
---
# Backward Module (1/2)
### Create Backward Operator

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

@ -0,0 +1,100 @@
import gzip
import math
import paddle.v2 as paddle
embsize = 32
hiddensize = 256
N = 5
def wordemb(inlayer):
wordemb = paddle.layer.embedding(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=True))
return wordemb
def main():
# for local training
cluster_train = False
if not cluster_train:
paddle.init(use_gpu=False, trainer_count=1)
else:
paddle.init(
use_gpu=False,
trainer_count=2,
port=7164,
ports_num=1,
ports_num_for_sparse=1,
num_gradient_servers=1)
word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
'w') as f:
trainer.save_parameter_to_tar(f)
result = trainer.test(
paddle.batch(
paddle.dataset.imikolov.test(word_dict, N), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost,
parameters,
adagrad,
is_local=not cluster_train)
trainer.train(
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()

@ -0,0 +1,123 @@
import math
import os
import paddle.v2 as paddle
import pickle
embsize = 32
hiddensize = 256
N = 5
cluster_train_file = "./train_data_dir/train/train.txt"
cluster_test_file = "./test_data_dir/test/test.txt"
node_id = os.getenv("OMPI_COMM_WORLD_RANK")
if not node_id:
raise EnvironmentError("must provied OMPI_COMM_WORLD_RANK")
def wordemb(inlayer):
wordemb = paddle.layer.embedding(
input=inlayer,
size=embsize,
param_attr=paddle.attr.Param(
name="_proj",
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=True))
return wordemb
def cluster_reader_cluster(filename, node_id):
def cluster_reader():
with open("-".join([filename, "%05d" % int(node_id)]), "r") as f:
for l in f:
csv_data = [int(cell) for cell in l.split(",")]
yield tuple(csv_data)
return cluster_reader
def main():
# get arguments from env
# for local training
TRUTH = ["true", "True", "TRUE", "1", "yes", "Yes", "YES"]
cluster_train = os.getenv('PADDLE_CLUSTER_TRAIN', "False") in TRUTH
use_gpu = os.getenv('PADDLE_INIT_USE_GPU', "False")
if not cluster_train:
paddle.init(
use_gpu=use_gpu,
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")))
else:
paddle.init(
use_gpu=use_gpu,
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")),
port=int(os.getenv("PADDLE_INIT_PORT", "7164")),
ports_num=int(os.getenv("PADDLE_INIT_PORTS_NUM", "1")),
ports_num_for_sparse=int(
os.getenv("PADDLE_INIT_PORTS_NUM_FOR_SPARSE", "1")),
num_gradient_servers=int(
os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")),
trainer_id=int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")),
pservers=os.getenv("PADDLE_INIT_PSERVERS", "127.0.0.1"))
fn = open("thirdparty/wuyi_train_thdpty/word_dict.pickle", "r")
word_dict = pickle.load(fn)
fn.close()
dict_size = len(word_dict)
firstword = paddle.layer.data(
name="firstw", type=paddle.data_type.integer_value(dict_size))
secondword = paddle.layer.data(
name="secondw", type=paddle.data_type.integer_value(dict_size))
thirdword = paddle.layer.data(
name="thirdw", type=paddle.data_type.integer_value(dict_size))
fourthword = paddle.layer.data(
name="fourthw", type=paddle.data_type.integer_value(dict_size))
nextword = paddle.layer.data(
name="fifthw", type=paddle.data_type.integer_value(dict_size))
Efirst = wordemb(firstword)
Esecond = wordemb(secondword)
Ethird = wordemb(thirdword)
Efourth = wordemb(fourthword)
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
hidden1 = paddle.layer.fc(input=contextemb,
size=hiddensize,
act=paddle.activation.Sigmoid(),
layer_attr=paddle.attr.Extra(drop_rate=0.5),
bias_attr=paddle.attr.Param(learning_rate=2),
param_attr=paddle.attr.Param(
initial_std=1. / math.sqrt(embsize * 8),
learning_rate=1))
predictword = paddle.layer.fc(input=hidden1,
size=dict_size,
bias_attr=paddle.attr.Param(learning_rate=2),
act=paddle.activation.Softmax())
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
result = trainer.test(
paddle.batch(
cluster_reader_cluster(cluster_test_file, node_id), 32))
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics,
result.metrics)
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
parameters = paddle.parameters.create(cost)
adagrad = paddle.optimizer.AdaGrad(
learning_rate=3e-3,
regularization=paddle.optimizer.L2Regularization(8e-4))
trainer = paddle.trainer.SGD(cost,
parameters,
adagrad,
is_local=not cluster_train)
trainer.train(
paddle.batch(cluster_reader_cluster(cluster_train_file, node_id), 32),
num_passes=30,
event_handler=event_handler)
if __name__ == '__main__':
main()

@ -0,0 +1,41 @@
import paddle.v2 as paddle
import tarfile
import os
import pickle
SPLIT_COUNT = 3
N = 5
def file_len(fd):
for i, l in enumerate(fd):
pass
return i + 1
def split_from_reader_by_line(filename, reader, split_count):
fn = open(filename, "w")
for batch_id, batch_data in enumerate(reader()):
batch_data_str = [str(d) for d in batch_data]
fn.write(",".join(batch_data_str))
fn.write("\n")
fn.close()
fn = open(filename, "r")
total_line_count = file_len(fn)
fn.close()
per_file_lines = total_line_count / split_count + 1
cmd = "split -d -a 5 -l %d %s %s-" % (per_file_lines, filename, filename)
os.system(cmd)
word_dict = paddle.dataset.imikolov.build_dict()
with open("word_dict.pickle", "w") as dict_f:
pickle.dump(word_dict, dict_f)
split_from_reader_by_line("train.txt",
paddle.dataset.imikolov.train(word_dict, N),
SPLIT_COUNT)
split_from_reader_by_line("test.txt",
paddle.dataset.imikolov.test(word_dict, N),
SPLIT_COUNT)

@ -20,13 +20,14 @@ proto_library(framework_proto SRCS framework.proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope proto_desc glog)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
py_proto_compile(framework_py_proto SRCS framework.proto)
@ -42,7 +43,10 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
cc_library(backward SRCS backward.cc DEPS net_op)
cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog)
cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
cc_library(tensor_array SRCS tensor_array.cc DEPS lod_tensor)
cc_test(tensor_array_test SRCS tensor_array_test.cc DEPS tensor_array place)

@ -107,6 +107,19 @@ BlockDesc *BlockDescBind::Proto() {
Flush();
return desc_;
}
BlockDescBind::BlockDescBind(const BlockDescBind &other, BlockDesc *desc,
ProgramDescBind *prog)
: prog_(prog), desc_(desc) {
need_update_ = true;
for (auto &op : other.ops_) {
ops_.emplace_back(new OpDescBind(*op));
}
for (auto &it : other.vars_) {
auto *var = new VarDescBind(*it.second);
vars_[it.first].reset(var);
}
}
void BlockDescBind::ClearPBOps() {
auto ops = this->desc_->mutable_ops();

@ -16,8 +16,10 @@ limitations under the License. */
#include <deque>
#include <memory>
#include <set>
#include <unordered_map>
#include <vector>
#include "paddle/framework/op_desc.h"
#include "paddle/framework/var_desc.h"
#include "paddle/platform/macros.h"
@ -36,6 +38,9 @@ class BlockDescBind {
BlockDescBind(ProgramDescBind *prog, BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) {}
BlockDescBind(const BlockDescBind &other, BlockDesc *desc,
ProgramDescBind *prog);
~BlockDescBind() {
this->ClearPBVars();
this->ClearPBOps();
@ -51,6 +56,14 @@ class BlockDescBind {
bool HasVar(const std::string &var_name) const;
std::set<std::string> LocalVarNames() const {
std::set<std::string> var_names;
for (auto &var : vars_) {
var_names.insert(var.first);
}
return var_names;
}
std::vector<VarDescBind *> AllVars() const;
BlockDescBind *ParentBlock() const;

@ -68,9 +68,13 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id) {
for (auto& var : block.vars()) {
if (var.persistable()) {
scope->Var(var.name());
auto* ptr = scope->Var(var.name());
VLOG(3) << "Create Variable " << var.name()
<< " global, which pointer is " << ptr;
} else {
local_scope.Var(var.name());
auto* ptr = local_scope.Var(var.name());
VLOG(3) << "Create Variable " << var.name()
<< " locally, which pointer is " << ptr;
}
}

@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "glog/logging.h"
#include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/variable.h"
@ -24,6 +26,7 @@ void SetFeedVariable(const LoDTensor& input, const std::string& var_name,
size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = GetGlobalScope().Var(var_name);
auto& feed_inputs =
*(g_feed_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
@ -40,10 +43,15 @@ LoDTensor& GetFetchVariable(const std::string& var_name, size_t index) {
// Since we want to fetch LodTensor from a variable, the variable must
// be created alreadly.
Variable* g_fetch_value = GetGlobalScope().FindVar(var_name);
auto& fetch_outputs =
*(g_fetch_value->GetMutable<std::vector<paddle::framework::LoDTensor>>());
PADDLE_ENFORCE(g_fetch_value->IsType<FeedFetchList>(),
"Only %s can be invoked by GetFetchVariable",
typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return fetch_outputs[index];
return tensor;
}
} // namespace framework

@ -55,6 +55,7 @@ message OpDesc {
repeated Var inputs = 1;
repeated Var outputs = 2;
repeated Attr attrs = 4;
optional bool is_target = 5 [ default = false ];
};
// OpProto describes a C++ framework::OperatorBase derived class.
@ -111,6 +112,8 @@ message VarDesc {
enum VarType {
LOD_TENSOR = 1;
SELECTED_ROWS = 2;
FEED_MINIBATCH = 3;
FETCH_LIST = 4;
}
required string name = 1;
required VarType type = 2;

@ -74,12 +74,12 @@ class LoDTensor : public Tensor {
LoD lod() const { return lod_; }
/*
* Get a element from LoD.
* Get the start offset and end offset of an element from LoD.
*/
size_t lod_element(size_t level, size_t elem) const {
std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const {
PADDLE_ENFORCE_LT(level, NumLevels());
PADDLE_ENFORCE_LT(elem, NumElements(level));
return (lod_)[level][elem];
return std::make_pair((lod_)[level][elem], (lod_)[level][elem + 1]);
}
/*

@ -36,8 +36,8 @@ TEST(LoDTensor, LoDInGPU) {
lod_tensor.mutable_data<float>(place);
lod_tensor.set_lod(src_lod);
CHECK_EQ(lod_tensor.lod_element(0, 2), 4UL);
CHECK_EQ(lod_tensor.lod_element(0, 4), 8UL);
CHECK_EQ(lod_tensor.lod_element(0, 2).first, 4UL);
CHECK_EQ(lod_tensor.lod_element(0, 4).first, 8UL);
auto lod = lod_tensor.lod();

@ -20,6 +20,8 @@ limitations under the License. */
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#include "paddle/framework/attribute.h"
#include "paddle/framework/details/op_registry.h"
#include "paddle/framework/framework.pb.h"

@ -20,12 +20,13 @@ limitations under the License. */
#include <unordered_map>
#include <vector>
#include "op_info.h"
#include "glog/logging.h" // For VLOG
#include "paddle/framework/attribute.h"
#include "paddle/framework/block_desc.h"
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/op_info.h"
#include "paddle/framework/scope.h"
#include "paddle/framework/shape_inference.h"
#include "paddle/framework/tensor.h"
@ -573,6 +574,7 @@ class OperatorWithKernel : public OperatorBase {
void Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const final {
VLOG(3) << "Running operator " << this->Type();
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
this->InferShape(&infer_shape_ctx);

@ -39,5 +39,14 @@ ProgramDescBind::ProgramDescBind() {
block->set_parent_idx(-1);
blocks_.emplace_back(new BlockDescBind(this, block));
}
ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
prog_ = o.prog_;
for (int i = 0; i < prog_.blocks_size(); ++i) {
auto *block = prog_.mutable_blocks(i);
blocks_.emplace_back(new BlockDescBind(*o.blocks_[i], block, this));
}
}
} // namespace framework
} // namespace paddle

@ -28,6 +28,8 @@ class ProgramDescBind {
public:
ProgramDescBind();
ProgramDescBind(const ProgramDescBind &o);
BlockDescBind *AppendBlock(const BlockDescBind &parent);
BlockDescBind *Block(size_t idx) { return blocks_[idx].get(); }
@ -40,8 +42,6 @@ class ProgramDescBind {
ProgramDesc prog_;
std::vector<std::unique_ptr<BlockDescBind>> blocks_;
DISABLE_COPY_AND_ASSIGN(ProgramDescBind);
};
} // namespace framework
} // namespace paddle

@ -0,0 +1,83 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/program_desc.h"
#include "gtest/gtest.h"
#include "paddle/framework/block_desc.h"
namespace paddle {
namespace framework {
TEST(ProgramDesc, copy_ctor) {
ProgramDescBind program;
auto* global_block = program.Block(0);
auto* x = global_block->Var("X");
x->SetType(VarDesc_VarType_LOD_TENSOR);
x->SetLoDLevel(0);
x->SetDataType(FP32);
x->SetShape({1000, 784});
auto* y = global_block->Var("Y");
y->SetType(VarDesc_VarType_LOD_TENSOR);
y->SetLoDLevel(0);
y->SetDataType(FP32);
y->SetShape({784, 100});
auto* op = global_block->AppendOp();
op->SetType("mul");
op->SetInput("X", {x->Name()});
op->SetInput("Y", {y->Name()});
auto* out = global_block->Var("Out");
out->SetType(VarDesc_VarType_LOD_TENSOR);
op->SetOutput("Y", {out->Name()});
ProgramDescBind program_copy(program);
auto* global_block_copy = program_copy.Block(0);
ASSERT_NE(global_block, global_block_copy);
auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) {
ASSERT_TRUE(global_block_copy->HasVar(name));
auto* copy = global_block_copy->Var(name);
ASSERT_NE(copy, var_before);
ASSERT_EQ(copy->Name(), var_before->Name());
ASSERT_EQ(copy->GetType(), var_before->GetType());
ASSERT_EQ(copy->Shape(), var_before->Shape());
ASSERT_EQ(copy->Proto()->SerializeAsString(),
var_before->Proto()->SerializeAsString());
};
ASSERT_EQ(global_block->LocalVarNames(), global_block_copy->LocalVarNames());
ASSERT_EQ(3, global_block_copy->LocalVarNames().size());
assert_same_var("X", x);
assert_same_var("Y", y);
assert_same_var("Out", out);
for (size_t i = 0; i < global_block->OpSize(); ++i) {
auto op_origin = global_block->Op(i);
auto op_copy = global_block->Op(i);
ASSERT_EQ(op_origin->Type(), op_copy->Type());
ASSERT_EQ(op_origin->Inputs(), op_copy->Inputs());
ASSERT_EQ(op_origin->Outputs(), op_copy->Outputs());
ASSERT_EQ(op_copy->Proto()->SerializeAsString(),
op_origin->Proto()->SerializeAsString());
}
// Not check block's protostr are same it because the order of vars could be
// different and it is correct.
}
} // namespace framework
} // namespace paddle

@ -0,0 +1,109 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/prune.h"
#include <algorithm>
#include <set>
#include <string>
#include <vector>
#include <glog/logging.h>
namespace paddle {
namespace framework {
const std::string kFeedOpType = "feed";
const std::string kFetchOpType = "fetch";
bool HasDependentVar(const OpDesc& op_desc,
const std::set<std::string>& dependent_vars) {
for (auto& var : op_desc.outputs()) {
for (auto& argu : var.arguments()) {
if (dependent_vars.count(argu) != 0) {
return true;
}
}
}
return false;
}
bool IsTarget(const OpDesc& op_desc) {
if (op_desc.has_is_target()) {
return op_desc.is_target();
}
return false;
}
void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
// TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op
auto& block = input.blocks(block_id);
auto& ops = block.ops();
bool expect_feed = true;
for (auto& op_desc : ops) {
PADDLE_ENFORCE(op_desc.type() != kFeedOpType || expect_feed,
"All FeedOps are at the beginning of the ProgramDesc");
expect_feed = (op_desc.type() == kFeedOpType);
}
bool expect_fetch = true;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
PADDLE_ENFORCE(op_desc.type() != kFetchOpType || expect_fetch,
"All FetchOps must at the end of the ProgramDesc");
expect_fetch = (op_desc.type() == kFetchOpType);
}
std::set<std::string> dependent_vars;
std::vector<bool> should_run;
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
auto& op_desc = *op_iter;
if (IsTarget(op_desc) || HasDependentVar(op_desc, dependent_vars)) {
// insert its input to the dependency graph
for (auto& var : op_desc.inputs()) {
for (auto& argu : var.arguments()) {
dependent_vars.insert(argu);
}
}
should_run.push_back(true);
} else {
should_run.push_back(false);
}
}
// since we are traversing the ProgramDesc in reverse order
// we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end());
output = input;
auto* op_field = output.mutable_blocks(block_id)->mutable_ops();
op_field->Clear();
for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) {
*op_field->Add() = input.blocks(block_id).ops(i);
}
}
}
void Prune(const ProgramDesc& input, ProgramDesc& output) {
prune_impl(input, output, 0);
}
} // namespace framework
} // namespace paddle

@ -0,0 +1,26 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/framework.pb.h"
#include "paddle/platform/enforce.h"
namespace paddle {
namespace framework {
void Prune(const ProgramDesc& input, ProgramDesc& output);
} // namespace framework
} // namespace paddle

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save