Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop
commit
e59ca752fc
Binary file not shown.
@ -0,0 +1,63 @@
|
||||
# Prune
|
||||
|
||||
## Motivation
|
||||
|
||||
We want to support running inference, training and checkpointing in one `ProgramDesc`. We implement
|
||||
`void Prune(const ProgramDesc* input, ProgramDesc* output)` function, which takes a `ProgramDesc`
|
||||
and generate a pruned `ProgramDesc`.
|
||||
|
||||
## Challenge
|
||||
|
||||
Pruning need to support both variables and operators being evaluation targets. Consider the following
|
||||
different situations.
|
||||
|
||||
```python
|
||||
# Case 1: run foward pass.
|
||||
cost_np = session.run(target=cost)
|
||||
# Case 2: run backward passing.
|
||||
opts_np, _ = session.run(target=[cost, opt])
|
||||
# Case 3: run checkpointing
|
||||
_ = session.run(target=checkpoint)
|
||||
```
|
||||
|
||||
## Solution
|
||||
|
||||
To support evaluation of operators, we add `is_target` field in the `OpDesc`.
|
||||
|
||||
```c++
|
||||
message OpDesc {
|
||||
required string type = 3;
|
||||
repeated Var inputs = 1;
|
||||
repeated Var outputs = 2;
|
||||
repeated Attr attrs = 4;
|
||||
optional bool is_target = 5 [ default = false ];
|
||||
};
|
||||
```
|
||||
|
||||
To support evaluation of variables, we add [fetch_op](https://github.com/PaddlePaddle/Paddle/pull/4599).
|
||||
For each variable in the `target`, we insert a `fetch_op` into the `ProgramDesc` with `variable` being
|
||||
`fetch_op`'s input. Then we also set `fetch_op` is a target.
|
||||
|
||||
### Algorithm
|
||||
|
||||
If an operator needs to be run, it must fall into one of the following cases:
|
||||
|
||||
1. It is the target.
|
||||
2. It is depended by some other ops, meaning its output is some other op's input.
|
||||
|
||||
The first case can be checked by `op_desc.is_traget()` . The second case can be implement as
|
||||
|
||||
```c++
|
||||
bool HasDependentVar(const OpDesc& op_desc, const std::set<string>& dependent_vars) {
|
||||
for (auto& var : op_desc.outputs()) {
|
||||
for (auto& argu : var.arguments()) {
|
||||
if (dependent_vars.count(argu) != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
```
|
||||
|
||||
Then the whole algorithm can be implemented as the following [code](https://github.com/tonyyang-svail/Paddle/blob/prune_impl/paddle/framework/prune.cc).
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
After Width: | Height: | Size: 142 KiB |
After Width: | Height: | Size: 33 KiB |
@ -0,0 +1,100 @@
|
||||
import gzip
|
||||
import math
|
||||
|
||||
import paddle.v2 as paddle
|
||||
|
||||
embsize = 32
|
||||
hiddensize = 256
|
||||
N = 5
|
||||
|
||||
|
||||
def wordemb(inlayer):
|
||||
wordemb = paddle.layer.embedding(
|
||||
input=inlayer,
|
||||
size=embsize,
|
||||
param_attr=paddle.attr.Param(
|
||||
name="_proj",
|
||||
initial_std=0.001,
|
||||
learning_rate=1,
|
||||
l2_rate=0,
|
||||
sparse_update=True))
|
||||
return wordemb
|
||||
|
||||
|
||||
def main():
|
||||
# for local training
|
||||
cluster_train = False
|
||||
|
||||
if not cluster_train:
|
||||
paddle.init(use_gpu=False, trainer_count=1)
|
||||
else:
|
||||
paddle.init(
|
||||
use_gpu=False,
|
||||
trainer_count=2,
|
||||
port=7164,
|
||||
ports_num=1,
|
||||
ports_num_for_sparse=1,
|
||||
num_gradient_servers=1)
|
||||
word_dict = paddle.dataset.imikolov.build_dict()
|
||||
dict_size = len(word_dict)
|
||||
firstword = paddle.layer.data(
|
||||
name="firstw", type=paddle.data_type.integer_value(dict_size))
|
||||
secondword = paddle.layer.data(
|
||||
name="secondw", type=paddle.data_type.integer_value(dict_size))
|
||||
thirdword = paddle.layer.data(
|
||||
name="thirdw", type=paddle.data_type.integer_value(dict_size))
|
||||
fourthword = paddle.layer.data(
|
||||
name="fourthw", type=paddle.data_type.integer_value(dict_size))
|
||||
nextword = paddle.layer.data(
|
||||
name="fifthw", type=paddle.data_type.integer_value(dict_size))
|
||||
|
||||
Efirst = wordemb(firstword)
|
||||
Esecond = wordemb(secondword)
|
||||
Ethird = wordemb(thirdword)
|
||||
Efourth = wordemb(fourthword)
|
||||
|
||||
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
|
||||
hidden1 = paddle.layer.fc(input=contextemb,
|
||||
size=hiddensize,
|
||||
act=paddle.activation.Sigmoid(),
|
||||
layer_attr=paddle.attr.Extra(drop_rate=0.5),
|
||||
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||
param_attr=paddle.attr.Param(
|
||||
initial_std=1. / math.sqrt(embsize * 8),
|
||||
learning_rate=1))
|
||||
predictword = paddle.layer.fc(input=hidden1,
|
||||
size=dict_size,
|
||||
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||
act=paddle.activation.Softmax())
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
|
||||
'w') as f:
|
||||
trainer.save_parameter_to_tar(f)
|
||||
result = trainer.test(
|
||||
paddle.batch(
|
||||
paddle.dataset.imikolov.test(word_dict, N), 32))
|
||||
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics,
|
||||
result.metrics)
|
||||
|
||||
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
|
||||
|
||||
parameters = paddle.parameters.create(cost)
|
||||
adagrad = paddle.optimizer.AdaGrad(
|
||||
learning_rate=3e-3,
|
||||
regularization=paddle.optimizer.L2Regularization(8e-4))
|
||||
trainer = paddle.trainer.SGD(cost,
|
||||
parameters,
|
||||
adagrad,
|
||||
is_local=not cluster_train)
|
||||
trainer.train(
|
||||
paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
|
||||
num_passes=30,
|
||||
event_handler=event_handler)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,123 @@
|
||||
import math
|
||||
import os
|
||||
import paddle.v2 as paddle
|
||||
import pickle
|
||||
|
||||
embsize = 32
|
||||
hiddensize = 256
|
||||
N = 5
|
||||
cluster_train_file = "./train_data_dir/train/train.txt"
|
||||
cluster_test_file = "./test_data_dir/test/test.txt"
|
||||
node_id = os.getenv("OMPI_COMM_WORLD_RANK")
|
||||
if not node_id:
|
||||
raise EnvironmentError("must provied OMPI_COMM_WORLD_RANK")
|
||||
|
||||
|
||||
def wordemb(inlayer):
|
||||
wordemb = paddle.layer.embedding(
|
||||
input=inlayer,
|
||||
size=embsize,
|
||||
param_attr=paddle.attr.Param(
|
||||
name="_proj",
|
||||
initial_std=0.001,
|
||||
learning_rate=1,
|
||||
l2_rate=0,
|
||||
sparse_update=True))
|
||||
return wordemb
|
||||
|
||||
|
||||
def cluster_reader_cluster(filename, node_id):
|
||||
def cluster_reader():
|
||||
with open("-".join([filename, "%05d" % int(node_id)]), "r") as f:
|
||||
for l in f:
|
||||
csv_data = [int(cell) for cell in l.split(",")]
|
||||
yield tuple(csv_data)
|
||||
|
||||
return cluster_reader
|
||||
|
||||
|
||||
def main():
|
||||
# get arguments from env
|
||||
|
||||
# for local training
|
||||
TRUTH = ["true", "True", "TRUE", "1", "yes", "Yes", "YES"]
|
||||
cluster_train = os.getenv('PADDLE_CLUSTER_TRAIN', "False") in TRUTH
|
||||
use_gpu = os.getenv('PADDLE_INIT_USE_GPU', "False")
|
||||
|
||||
if not cluster_train:
|
||||
paddle.init(
|
||||
use_gpu=use_gpu,
|
||||
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")))
|
||||
else:
|
||||
paddle.init(
|
||||
use_gpu=use_gpu,
|
||||
trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")),
|
||||
port=int(os.getenv("PADDLE_INIT_PORT", "7164")),
|
||||
ports_num=int(os.getenv("PADDLE_INIT_PORTS_NUM", "1")),
|
||||
ports_num_for_sparse=int(
|
||||
os.getenv("PADDLE_INIT_PORTS_NUM_FOR_SPARSE", "1")),
|
||||
num_gradient_servers=int(
|
||||
os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")),
|
||||
trainer_id=int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")),
|
||||
pservers=os.getenv("PADDLE_INIT_PSERVERS", "127.0.0.1"))
|
||||
fn = open("thirdparty/wuyi_train_thdpty/word_dict.pickle", "r")
|
||||
word_dict = pickle.load(fn)
|
||||
fn.close()
|
||||
dict_size = len(word_dict)
|
||||
firstword = paddle.layer.data(
|
||||
name="firstw", type=paddle.data_type.integer_value(dict_size))
|
||||
secondword = paddle.layer.data(
|
||||
name="secondw", type=paddle.data_type.integer_value(dict_size))
|
||||
thirdword = paddle.layer.data(
|
||||
name="thirdw", type=paddle.data_type.integer_value(dict_size))
|
||||
fourthword = paddle.layer.data(
|
||||
name="fourthw", type=paddle.data_type.integer_value(dict_size))
|
||||
nextword = paddle.layer.data(
|
||||
name="fifthw", type=paddle.data_type.integer_value(dict_size))
|
||||
|
||||
Efirst = wordemb(firstword)
|
||||
Esecond = wordemb(secondword)
|
||||
Ethird = wordemb(thirdword)
|
||||
Efourth = wordemb(fourthword)
|
||||
|
||||
contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
|
||||
hidden1 = paddle.layer.fc(input=contextemb,
|
||||
size=hiddensize,
|
||||
act=paddle.activation.Sigmoid(),
|
||||
layer_attr=paddle.attr.Extra(drop_rate=0.5),
|
||||
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||
param_attr=paddle.attr.Param(
|
||||
initial_std=1. / math.sqrt(embsize * 8),
|
||||
learning_rate=1))
|
||||
predictword = paddle.layer.fc(input=hidden1,
|
||||
size=dict_size,
|
||||
bias_attr=paddle.attr.Param(learning_rate=2),
|
||||
act=paddle.activation.Softmax())
|
||||
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 100 == 0:
|
||||
result = trainer.test(
|
||||
paddle.batch(
|
||||
cluster_reader_cluster(cluster_test_file, node_id), 32))
|
||||
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
|
||||
event.pass_id, event.batch_id, event.cost, event.metrics,
|
||||
result.metrics)
|
||||
|
||||
cost = paddle.layer.classification_cost(input=predictword, label=nextword)
|
||||
parameters = paddle.parameters.create(cost)
|
||||
adagrad = paddle.optimizer.AdaGrad(
|
||||
learning_rate=3e-3,
|
||||
regularization=paddle.optimizer.L2Regularization(8e-4))
|
||||
trainer = paddle.trainer.SGD(cost,
|
||||
parameters,
|
||||
adagrad,
|
||||
is_local=not cluster_train)
|
||||
trainer.train(
|
||||
paddle.batch(cluster_reader_cluster(cluster_train_file, node_id), 32),
|
||||
num_passes=30,
|
||||
event_handler=event_handler)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,41 @@
|
||||
import paddle.v2 as paddle
|
||||
import tarfile
|
||||
import os
|
||||
import pickle
|
||||
|
||||
SPLIT_COUNT = 3
|
||||
N = 5
|
||||
|
||||
|
||||
def file_len(fd):
|
||||
for i, l in enumerate(fd):
|
||||
pass
|
||||
return i + 1
|
||||
|
||||
|
||||
def split_from_reader_by_line(filename, reader, split_count):
|
||||
fn = open(filename, "w")
|
||||
for batch_id, batch_data in enumerate(reader()):
|
||||
batch_data_str = [str(d) for d in batch_data]
|
||||
fn.write(",".join(batch_data_str))
|
||||
fn.write("\n")
|
||||
fn.close()
|
||||
|
||||
fn = open(filename, "r")
|
||||
total_line_count = file_len(fn)
|
||||
fn.close()
|
||||
per_file_lines = total_line_count / split_count + 1
|
||||
cmd = "split -d -a 5 -l %d %s %s-" % (per_file_lines, filename, filename)
|
||||
os.system(cmd)
|
||||
|
||||
|
||||
word_dict = paddle.dataset.imikolov.build_dict()
|
||||
with open("word_dict.pickle", "w") as dict_f:
|
||||
pickle.dump(word_dict, dict_f)
|
||||
|
||||
split_from_reader_by_line("train.txt",
|
||||
paddle.dataset.imikolov.train(word_dict, N),
|
||||
SPLIT_COUNT)
|
||||
split_from_reader_by_line("test.txt",
|
||||
paddle.dataset.imikolov.test(word_dict, N),
|
||||
SPLIT_COUNT)
|
@ -0,0 +1,83 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/program_desc.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/framework/block_desc.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
TEST(ProgramDesc, copy_ctor) {
|
||||
ProgramDescBind program;
|
||||
auto* global_block = program.Block(0);
|
||||
auto* x = global_block->Var("X");
|
||||
x->SetType(VarDesc_VarType_LOD_TENSOR);
|
||||
x->SetLoDLevel(0);
|
||||
x->SetDataType(FP32);
|
||||
x->SetShape({1000, 784});
|
||||
|
||||
auto* y = global_block->Var("Y");
|
||||
y->SetType(VarDesc_VarType_LOD_TENSOR);
|
||||
y->SetLoDLevel(0);
|
||||
y->SetDataType(FP32);
|
||||
y->SetShape({784, 100});
|
||||
|
||||
auto* op = global_block->AppendOp();
|
||||
op->SetType("mul");
|
||||
op->SetInput("X", {x->Name()});
|
||||
op->SetInput("Y", {y->Name()});
|
||||
|
||||
auto* out = global_block->Var("Out");
|
||||
out->SetType(VarDesc_VarType_LOD_TENSOR);
|
||||
op->SetOutput("Y", {out->Name()});
|
||||
|
||||
ProgramDescBind program_copy(program);
|
||||
|
||||
auto* global_block_copy = program_copy.Block(0);
|
||||
ASSERT_NE(global_block, global_block_copy);
|
||||
|
||||
auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) {
|
||||
ASSERT_TRUE(global_block_copy->HasVar(name));
|
||||
auto* copy = global_block_copy->Var(name);
|
||||
ASSERT_NE(copy, var_before);
|
||||
ASSERT_EQ(copy->Name(), var_before->Name());
|
||||
ASSERT_EQ(copy->GetType(), var_before->GetType());
|
||||
ASSERT_EQ(copy->Shape(), var_before->Shape());
|
||||
ASSERT_EQ(copy->Proto()->SerializeAsString(),
|
||||
var_before->Proto()->SerializeAsString());
|
||||
};
|
||||
|
||||
ASSERT_EQ(global_block->LocalVarNames(), global_block_copy->LocalVarNames());
|
||||
ASSERT_EQ(3, global_block_copy->LocalVarNames().size());
|
||||
assert_same_var("X", x);
|
||||
assert_same_var("Y", y);
|
||||
assert_same_var("Out", out);
|
||||
|
||||
for (size_t i = 0; i < global_block->OpSize(); ++i) {
|
||||
auto op_origin = global_block->Op(i);
|
||||
auto op_copy = global_block->Op(i);
|
||||
|
||||
ASSERT_EQ(op_origin->Type(), op_copy->Type());
|
||||
ASSERT_EQ(op_origin->Inputs(), op_copy->Inputs());
|
||||
ASSERT_EQ(op_origin->Outputs(), op_copy->Outputs());
|
||||
|
||||
ASSERT_EQ(op_copy->Proto()->SerializeAsString(),
|
||||
op_origin->Proto()->SerializeAsString());
|
||||
}
|
||||
|
||||
// Not check block's protostr are same it because the order of vars could be
|
||||
// different and it is correct.
|
||||
}
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,109 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/prune.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
const std::string kFeedOpType = "feed";
|
||||
const std::string kFetchOpType = "fetch";
|
||||
|
||||
bool HasDependentVar(const OpDesc& op_desc,
|
||||
const std::set<std::string>& dependent_vars) {
|
||||
for (auto& var : op_desc.outputs()) {
|
||||
for (auto& argu : var.arguments()) {
|
||||
if (dependent_vars.count(argu) != 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsTarget(const OpDesc& op_desc) {
|
||||
if (op_desc.has_is_target()) {
|
||||
return op_desc.is_target();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
|
||||
// TODO(tonyyang-svail):
|
||||
// - will change to use multiple blocks for RNN op and Cond Op
|
||||
|
||||
auto& block = input.blocks(block_id);
|
||||
auto& ops = block.ops();
|
||||
|
||||
bool expect_feed = true;
|
||||
for (auto& op_desc : ops) {
|
||||
PADDLE_ENFORCE(op_desc.type() != kFeedOpType || expect_feed,
|
||||
"All FeedOps are at the beginning of the ProgramDesc");
|
||||
expect_feed = (op_desc.type() == kFeedOpType);
|
||||
}
|
||||
|
||||
bool expect_fetch = true;
|
||||
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
|
||||
auto& op_desc = *op_iter;
|
||||
PADDLE_ENFORCE(op_desc.type() != kFetchOpType || expect_fetch,
|
||||
"All FetchOps must at the end of the ProgramDesc");
|
||||
expect_fetch = (op_desc.type() == kFetchOpType);
|
||||
}
|
||||
|
||||
std::set<std::string> dependent_vars;
|
||||
std::vector<bool> should_run;
|
||||
for (auto op_iter = ops.rbegin(); op_iter != ops.rend(); ++op_iter) {
|
||||
auto& op_desc = *op_iter;
|
||||
|
||||
if (IsTarget(op_desc) || HasDependentVar(op_desc, dependent_vars)) {
|
||||
// insert its input to the dependency graph
|
||||
for (auto& var : op_desc.inputs()) {
|
||||
for (auto& argu : var.arguments()) {
|
||||
dependent_vars.insert(argu);
|
||||
}
|
||||
}
|
||||
|
||||
should_run.push_back(true);
|
||||
} else {
|
||||
should_run.push_back(false);
|
||||
}
|
||||
}
|
||||
|
||||
// since we are traversing the ProgramDesc in reverse order
|
||||
// we reverse the should_run vector
|
||||
std::reverse(should_run.begin(), should_run.end());
|
||||
|
||||
output = input;
|
||||
auto* op_field = output.mutable_blocks(block_id)->mutable_ops();
|
||||
op_field->Clear();
|
||||
for (size_t i = 0; i < should_run.size(); ++i) {
|
||||
if (should_run[i]) {
|
||||
*op_field->Add() = input.blocks(block_id).ops(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Prune(const ProgramDesc& input, ProgramDesc& output) {
|
||||
prune_impl(input, output, 0);
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,26 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/framework/framework.pb.h"
|
||||
#include "paddle/platform/enforce.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
void Prune(const ProgramDesc& input, ProgramDesc& output);
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue