Merge remote-tracking branch 'upstream/develop' into windows/build

panyx0718-patch-1
peizhilin 6 years ago
commit 30ddc07a7e

@ -43,6 +43,7 @@
| qingqing01 | Qing-Qing Dang |
| reyoung | Yang Yu |
| Superjom | Chun-Wei Yan |
| tensor-tang | Jian Tang |
| tianbingsz | Tian-Bing Xu |
| tpatejko | Tomasz Patejko |
| typhoonzero | Yi Wu |

@ -184,11 +184,10 @@ endif ()
set(module "inference")
copy(inference_lib DEPS ${inference_deps}
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${src_dir}/${module}/api/paddle_inference_api.h
${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
)
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${src_dir}/${module}/api/paddle_*.h
${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
set(module "platform")
copy(platform_lib DEPS profiler_py_proto
@ -223,12 +222,12 @@ copy(third_party DEPS fluid_lib_dist
DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
)
# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
# only need libpaddle_fluid.so/a and paddle_*.h for inference-only library
copy(inference_api_lib DEPS fluid_lib_dist
SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
)
SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_*.h
DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
)
add_custom_target(inference_lib_dist DEPENDS third_party inference_api_lib)

@ -34,4 +34,5 @@ if(TENSORRT_FOUND)
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()

@ -103,7 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode'], varargs=None, keywords=None, defaults=(False, -100, False))
paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
@ -184,6 +184,7 @@ paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name']
paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@ -273,6 +274,7 @@ paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, k
paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.thresholded_relu ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.prior_box ArgSpec(args=['input', 'image', 'min_sizes', 'max_sizes', 'aspect_ratios', 'variance', 'flip', 'clip', 'steps', 'offset', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, [1.0], [0.1, 0.1, 0.2, 0.2], False, False, [0.0, 0.0], 0.5, None, False))
paddle.fluid.layers.density_prior_box ArgSpec(args=['input', 'image', 'densities', 'fixed_sizes', 'fixed_ratios', 'variance', 'clip', 'steps', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, None, [0.1, 0.1, 0.2, 0.2], False, [0.0, 0.0], 0.5, None))
paddle.fluid.layers.multi_box_head ArgSpec(args=['inputs', 'image', 'base_size', 'num_classes', 'aspect_ratios', 'min_ratio', 'max_ratio', 'min_sizes', 'max_sizes', 'steps', 'step_w', 'step_h', 'offset', 'variance', 'flip', 'clip', 'kernel_size', 'pad', 'stride', 'name', 'min_max_aspect_ratios_order'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None, None, 0.5, [0.1, 0.1, 0.2, 0.2], True, False, 1, 0, 1, None, False))
paddle.fluid.layers.bipartite_match ArgSpec(args=['dist_matrix', 'match_type', 'dist_threshold', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.target_assign ArgSpec(args=['input', 'matched_indices', 'negative_indices', 'mismatch_value', 'name'], varargs=None, keywords=None, defaults=(None, None, None))

@ -30,8 +30,8 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor(
local_scopes_(local_scopes),
places_(places),
graph_(std::move(graph)),
pool_(strategy.num_threads_ +
1), // add one more thread for generate op_deps
pool_(strategy.num_threads_),
prepare_pool_(1), // add one more thread for generate op_deps
fetch_ctxs_(places) {
for (auto &op : ir::FilterByNodeWrapper<OpHandleBase>(*graph_)) {
int dep = static_cast<int>(op->NotReadyInputSize());
@ -160,7 +160,7 @@ void FastThreadedSSAGraphExecutor::RunOpAsync(
});
}
void FastThreadedSSAGraphExecutor::PrepareAtomicOpDeps() {
atomic_op_deps_ = pool_.enqueue([&] {
atomic_op_deps_ = prepare_pool_.enqueue([&] {
auto *op_deps = new std::unordered_map<OpHandleBase *, std::atomic<int>>;
for (auto &pair : op_deps_) {
(*op_deps)[pair.first] = pair.second;

@ -46,6 +46,7 @@ class FastThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::vector<OpHandleBase *> bootstrap_ops_;
::ThreadPool pool_;
::ThreadPool prepare_pool_;
platform::DeviceContextPool fetch_ctxs_;
std::atomic<int> remaining_;

@ -359,6 +359,7 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars,
bool keep_kids) {
PADDLE_ENFORCE_NOT_NULL(scope);
Scope* local_scope = scope;
if (create_vars) {
if (create_local_scope) {

@ -5,6 +5,7 @@ file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
# Usage: pass_library(target inference) will append to paddle_inference_pass.h
unset(INFER_IR_PASSES CACHE) # clear the global variable
function(pass_library TARGET DEST)
set(options "")
set(oneValueArgs "")
@ -15,10 +16,11 @@ function(pass_library TARGET DEST)
if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference")
message(STATUS "add pass ${TARGET} ${DEST}")
file(APPEND ${pass_file} "USE_PASS(${TARGET});\n")
set(PASS_LIBRARY ${TARGET} ${PASS_LIBRARY} PARENT_SCOPE)
set(INFER_IR_PASSES ${INFER_IR_PASSES} ${TARGET} CACHE INTERNAL "")
endif()
endfunction()
cc_library(node SRCS node.cc DEPS proto_desc)
cc_library(graph SRCS graph.cc DEPS node pretty_log)
cc_library(graph_helper SRCS graph_helper.cc DEPS graph)

@ -91,10 +91,10 @@ void FindWhileOp(Graph* graph) {
#undef OP_SET_IN
#undef OP_SET_OUT
auto* X = graph->RetriveNode(34);
auto* LSTMOUT = graph->RetriveNode(81);
auto* cell_init = graph->RetriveNode(6);
auto* hidden_init = graph->RetriveNode(8);
auto* X = graph->RetrieveNode(34);
auto* LSTMOUT = graph->RetrieveNode(81);
auto* cell_init = graph->RetrieveNode(6);
auto* hidden_init = graph->RetrieveNode(8);
auto* lstm_op = graph->CreateOpNode(&op_desc);
PrepareParameters(graph, param);
@ -212,11 +212,11 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
float* out_data = out->mutable_data<float>(platform::CPUPlace());
std::array<const float*, 4> tensors{
{W_forget_w0.data<float>(), W_input_w0.data<float>(),
W_output_w0.data<float>(), W_cell_w0.data<float>()}};
W_forget_w0.data<float>(), W_input_w0.data<float>(),
W_output_w0.data<float>(), W_cell_w0.data<float>()};
std::array<const float*, 4> tensors1{
{W_forget_w1.data<float>(), W_input_w1.data<float>(),
W_output_w1.data<float>(), W_cell_w1.data<float>()}};
W_forget_w1.data<float>(), W_input_w1.data<float>(),
W_output_w1.data<float>(), W_cell_w1.data<float>()};
for (int row = 0; row < D; row++) {
for (int col = 0; col < 4; col++) {
@ -239,8 +239,8 @@ void PrepareLSTMBias(const LoDTensor& B_forget, const LoDTensor& B_input,
const LoDTensor& B_output, const LoDTensor& B_cell,
LoDTensor* out) {
std::array<const float*, 4> tensors{
{B_forget.data<float>(), B_input.data<float>(), B_output.data<float>(),
B_cell.data<float>()}};
B_forget.data<float>(), B_input.data<float>(), B_output.data<float>(),
B_cell.data<float>()};
PADDLE_ENFORCE_EQ(B_forget.dims().size(), 1);
int D = B_forget.dims()[0];

@ -84,8 +84,6 @@ void CheckProgram(const ProgramDesc &program) {
Graph::Graph(const ProgramDesc &program) : program_(program) {
CheckProgram(program_);
// Make the nodes id start from 0.
Node::ResetId();
auto var_nodes = InitFromProgram(program_);
ResolveHazard(var_nodes);
}

@ -116,13 +116,17 @@ class Graph {
// Create a normal variable with non-null VarDesc.
ir::Node *CreateVarNode(VarDesc *var_desc) {
PADDLE_ENFORCE(var_desc);
return AddNode(new ir::Node(var_desc));
auto *x = AddNode(new ir::Node(var_desc));
x->SetId(num_node_created_++);
return x;
}
// Create a normal runnable operator with OpDesc.
ir::Node *CreateOpNode(OpDesc *op_desc) {
PADDLE_ENFORCE(op_desc);
return AddNode(new ir::Node(op_desc));
auto *x = AddNode(new ir::Node(op_desc));
x->SetId(num_node_created_++);
return x;
}
// Create a control dependency var that connects 2 operations. The
@ -132,13 +136,17 @@ class Graph {
// TODO(panyx0718): control var name should be really unique.
const std::string name = string::Sprintf(
"%s@%llu", ir::Node::kControlDepVarName, node_set_.size());
return AddNode(new ir::Node(name, ir::Node::Type::kVariable));
auto *x = AddNode(new ir::Node(name, ir::Node::Type::kVariable));
x->SetId(num_node_created_++);
return x;
}
// A more free style way of creating a graph node. Mostly use for test
// or "copy" from another node. Avoid using it if possible.
ir::Node *CreateEmptyNode(const std::string &name, ir::Node::Type type) {
return AddNode(new ir::Node(name, type));
auto *x = AddNode(new ir::Node(name, type));
x->SetId(num_node_created_++);
return x;
}
// Clear all node information of the graph and return the ownership of the
@ -160,7 +168,7 @@ class Graph {
}
// NOTE low performance, but simple and secure.
Node *RetriveNode(int id) {
Node *RetrieveNode(int id) {
for (auto &node : nodes_) {
if (node.second->id() == id) {
return node.second.get();
@ -169,6 +177,7 @@ class Graph {
return nullptr;
}
const ProgramDesc &program() const { return program_; }
std::map<std::string, std::vector<ir::Node *>> InitFromProgram(
const ProgramDesc &program);
@ -190,6 +199,7 @@ class Graph {
std::map<std::string, std::function<void(void)>> attr_dels_;
std::map<ir::Node *, std::unique_ptr<ir::Node>> nodes_;
std::unordered_set<ir::Node *> node_set_;
size_t num_node_created_{0}; // help to generate a unique node id.
};
bool IsControlDepVar(const ir::Node &var);

@ -167,10 +167,12 @@ struct HitGroup {
bool Match(Node *node, PDNode *pat) {
if (nodes_.count(node)) {
if (!roles.count(pat)) return false;
return roles[pat] == node;
if (roles.count(pat) && roles[pat] == node) return true;
return false;
} else {
if (roles.count(pat) && roles[pat] != node) return false;
return true;
}
return !roles.count(pat) || roles.at(pat) == node;
}
void Register(Node *node, PDNode *pat) {
@ -198,7 +200,6 @@ GraphPatternDetector::DetectPatterns() {
std::vector<GraphPatternDetector::subgraph_t> result;
std::vector<HitGroup> init_groups;
std::array<std::vector<HitGroup>, 2> bi_records;
// PADDLE_ENFORCE(!pattern_.edges().empty(), "At least one edge is needed");
auto *first_pnode = pattern_.edges().empty() ? pattern().nodes().front().get()
: pattern_.edges().front().first;
if (!pdnodes2nodes_.count(first_pnode)) return result;
@ -228,11 +229,12 @@ GraphPatternDetector::DetectPatterns() {
VLOG(80) << "check " << source->id() << " -- " << target->id();
// TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) {
HitGroup new_group = group;
if (IsNodesLink(source, target) &&
new_group.Match(source, edge.first)) {
new_group.Register(source, edge.first);
if (new_group.Match(target, edge.second)) {
if (IsNodesLink(source, target)) {
HitGroup new_group = group;
bool flag = new_group.Match(source, edge.first) &&
new_group.Match(target, edge.second);
if (flag) {
new_group.Register(source, edge.first);
new_group.Register(target, edge.second);
cur_groups.push_back(new_group);
// TODO(Superjomn) need to unique
@ -261,14 +263,16 @@ GraphPatternDetector::DetectPatterns() {
return result;
}
bool GraphItemCMP(const std::pair<PDNode *, Node *> &a,
struct GraphItemLessThan {
bool operator()(const std::pair<PDNode *, Node *> &a,
const std::pair<PDNode *, Node *> &b) {
if (a.first != b.first) {
return a.first < b.first;
} else {
return a.second < b.second;
if (a.first != b.first) {
return a.first < b.first;
} else {
return a.second < b.second;
}
}
}
};
// TODO(Superjomn) enhance the function as it marks unique unique as duplicates
// see https://github.com/PaddlePaddle/Paddle/issues/13550
@ -282,7 +286,7 @@ void GraphPatternDetector::UniquePatterns(
for (auto &g : *subgraphs) {
// Sort the items in the sub-graph, and transform to a string key.
std::vector<std::pair<PDNode *, Node *>> sorted_keys(g.begin(), g.end());
std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemCMP);
std::sort(sorted_keys.begin(), sorted_keys.end(), GraphItemLessThan());
std::stringstream ss;
for (auto &item : sorted_keys) {
ss << item.first << ":" << item.second;

@ -310,8 +310,8 @@ void GraphSafeRemoveNodes(Graph* graph,
const std::unordered_set<const Node*>& nodes);
// Some pre-defined patterns those can be reused in multiple passes.
// The related Fluid Layer or Op should be one pattern here for better reusage
// accross different fusion.
// The related Fluid Layer or Op should be one pattern here for better re-usage
// across different fusion.
namespace patterns {
struct KeyCounter {

@ -35,10 +35,11 @@ std::unique_ptr<Graph> GraphToProgramPass::ApplyImpl(
new proto::ProgramDesc(*program.Proto()));
auto block = program_pb->mutable_blocks(kRootBlockIndex);
block->set_idx(kRootBlockIndex);
block->clear_vars();
std::unordered_set<std::string> visited_vars;
for (ir::Node* n : graph->Nodes()) {
if (n->NodeType() == ir::Node::Type::kVariable) {
if (n->IsVar()) {
if (n->Var() && visited_vars.count(n->Var()->Name()) == 0) {
visited_vars.insert(n->Var()->Name());
block->add_vars()->MergeFrom(*n->Var()->Proto());

@ -66,6 +66,76 @@ NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) {
}
Node *NodesDFSIterator::operator->() { return stack_.top(); }
inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
return node.inputs.size() == n;
}
NodesTSIterator::NodesTSIterator(const std::vector<Node *> &source) {
PADDLE_ENFORCE(!source.empty(),
"Start points of topological sorting should not be empty!");
// CHECK all the inputs' in-degree is 0
for (auto *node : source) {
PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0));
}
std::unordered_set<Node *> visited;
std::unordered_set<Node *> to_visit{source.begin(), source.end()};
std::vector<Node *> inlink_visited;
while (!to_visit.empty()) {
std::vector<Node *> queue(to_visit.begin(), to_visit.end());
for (auto *p : queue) {
inlink_visited.clear();
std::copy_if(p->inputs.begin(), p->inputs.end(),
std::back_inserter(inlink_visited),
[&](Node *x) -> bool { return visited.count(x) != 0; });
if (inlink_visited.size() == p->inputs.size()) {
sorted_.push_back(p);
for (auto *_ : p->outputs) {
if (!visited.count(_)) {
to_visit.insert(_);
}
}
to_visit.erase(p);
visited.insert(p);
}
}
}
}
NodesTSIterator::NodesTSIterator(const NodesTSIterator &other)
: sorted_(other.sorted_), cursor_(other.cursor_) {}
Node &NodesTSIterator::operator*() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return *sorted_[cursor_];
}
NodesTSIterator &NodesTSIterator::operator++() {
if (++cursor_ >= sorted_.size()) {
sorted_.clear();
cursor_ = 0;
}
return *this;
}
NodesTSIterator &NodesTSIterator::operator=(const NodesTSIterator &other) {
cursor_ = other.cursor_;
sorted_ = other.sorted_;
return *this;
}
bool NodesTSIterator::operator==(const NodesTSIterator &other) {
return sorted_ == other.sorted_ && cursor_ == other.cursor_;
}
Node *NodesTSIterator::operator->() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return sorted_[cursor_];
}
} // namespace ir
} // namespace framework
} // namespace paddle

@ -62,6 +62,32 @@ struct NodesDFSIterator
std::unordered_set<Node *> visited_;
};
// Topological sorting iterator on nodes.
struct NodesTSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesTSIterator() = default;
NodesTSIterator(const std::vector<Node *> &source);
NodesTSIterator(NodesTSIterator &&other)
: sorted_(std::move(other.sorted_)), cursor_(other.cursor_) {
other.cursor_ = 0;
}
NodesTSIterator(const NodesTSIterator &other);
Node &operator*();
NodesTSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator &operator=(const NodesTSIterator &other);
bool operator==(const NodesTSIterator &other);
bool operator!=(const NodesTSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::vector<Node *> sorted_;
size_t cursor_{0};
};
/*
* GraphTraits contains some graph traversal algorithms.
*
@ -76,6 +102,14 @@ struct GraphTraits {
NodesDFSIterator());
}
static iterator_range<NodesTSIterator> TS(const Graph &g) {
auto start_points = ExtractStartPoints(g);
PADDLE_ENFORCE(!start_points.empty());
NodesTSIterator x(start_points);
return iterator_range<NodesTSIterator>(NodesTSIterator(start_points),
NodesTSIterator());
}
private:
// The nodes those have no input will be treated as start points.
static std::vector<Node *> ExtractStartPoints(const Graph &g) {

@ -119,37 +119,30 @@ class Node {
int id_;
private:
// ID can only set by a Graph.
void SetId(int id) { id_ = id; }
friend class Graph;
friend std::unique_ptr<Node> CreateNodeForTest(const std::string& name,
Node::Type type);
explicit Node(const std::string& name, Type type)
: name_(name),
var_desc_(nullptr),
op_desc_(nullptr),
type_(type),
id_(count_++) {}
: name_(name), var_desc_(nullptr), op_desc_(nullptr), type_(type) {}
explicit Node(VarDesc* var_desc)
: name_(var_desc->Name()),
var_desc_(new VarDesc(*var_desc)),
op_desc_(nullptr),
type_(Type::kVariable),
id_(count_++) {}
type_(Type::kVariable) {}
explicit Node(OpDesc* op_desc)
: name_(op_desc->Type()),
var_desc_(nullptr),
op_desc_(new OpDesc(*op_desc, op_desc->Block())),
type_(Type::kOperation),
id_(count_++) {}
type_(Type::kOperation) {}
Node() = delete;
static int count_;
// Please don't use this API or make this public.
static void ResetId() { count_ = 0; }
boost::any wrapper_;
std::function<void(void)> wrapper_deleter_;
std::type_index wrapper_type_ = std::type_index(typeid(void));

@ -93,6 +93,7 @@ class Pass {
protected:
virtual std::unique_ptr<Graph> ApplyImpl(std::unique_ptr<Graph> graph) const {
LOG(FATAL) << "Calling virtual Pass not implemented.";
return graph;
}
private:

@ -57,60 +57,58 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
}
}
void NaiveExecutor::Prepare(Scope *parent_scope,
const ProgramDesc &program_desc, int block_id,
bool with_feed_fetch_ops) {
if (!parent_scope) {
void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
int block_id, bool with_feed_fetch_ops) {
if (!scope) {
scope_ = new framework::Scope;
} else {
scope_ = &parent_scope->NewScope();
scope_ = scope;
}
CreateVariables(program_desc, scope_, block_id);
VLOG(3) << "NaiveExecutor init with scope " << scope;
CreateOps(program_desc, block_id, with_feed_fetch_ops);
}
void NaiveExecutor::Run() {
for (auto &op : ops_) {
VLOG(40) << "run " << op->Type();
VLOG(3) << std::this_thread::get_id() << " run " << op->Type()
<< " on scope " << scope_;
op->Run(*scope_, place_);
}
}
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
int block_id) {
PADDLE_ENFORCE(scope);
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id,
bool persistable, Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope);
auto &global_block = desc.Block(block_id);
const Scope *ancestor_scope = scope;
while (ancestor_scope->parent()) {
ancestor_scope = ancestor_scope->parent();
const auto *anc = scope;
PADDLE_ENFORCE(anc->parent() != anc);
while (anc->parent()) {
anc = anc->parent();
}
if (ancestor_scope != scope) {
for (auto &var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
// Create persistable vars in ancestor scope.
if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name());
for (auto &var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
if (persistable == var->Persistable()) {
if (persistable) {
if (!anc->FindVar(var->Name())) {
auto *ptr = const_cast<Scope *>(anc)->Var(var->Name());
VLOG(3) << scope << " Create persistable variable " << var->Name()
<< ", which pointer is " << ptr;
InitializeVariable(ptr, var->GetType());
}
} else {
auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
VLOG(3) << scope << " Create variable " << var->Name()
<< ", which pointer is " << ptr;
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}

@ -35,8 +35,14 @@ class NaiveExecutor {
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
int block_id, bool with_feed_fetch_ops);
void Prepare(Scope* scope, const ProgramDesc& program_desc, int block_id,
bool with_feed_fetch_ops);
// Create variables before head.
// Create parameters if persistable is ture, or create the temporary variables
// instead.
void CreateVariables(const ProgramDesc& desc, int block_id, bool persistable,
Scope* scope);
// Run all the operators.
void Run();
@ -49,8 +55,6 @@ class NaiveExecutor {
void CleanFeedFetchOps();
protected:
void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
void CreateOps(const ProgramDesc& desc, int block_id,
bool with_feed_fetch_ops);

@ -39,7 +39,7 @@ TEST(NaiveExecutor, Basic) {
auto place = platform::CPUPlace();
NaiveExecutor exe(place);
exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
exe.Prepare(nullptr, program, 0, false);
auto* a_tensor = exe.FindTensor("a");
auto* b_tensor = exe.FindTensor("b");
auto* c_tensor = exe.FindTensor("c");

@ -15,7 +15,9 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include <memory> // for unique_ptr
#include <queue>
#include <set>
#include <unordered_set>
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/string/printf.h"
@ -36,6 +38,16 @@ DEFINE_double(
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0");
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef ON_INFER
#define SCOPE_LOCK_GUARD
#else
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#endif
namespace paddle {
namespace framework {
@ -49,18 +61,18 @@ int64_t GetEagerDeletionThreshold() {
Scope::~Scope() { DropKids(); }
Scope& Scope::NewScope() const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable* Scope::Var(const std::string& name) {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return VarInternal(name);
}
Variable* Scope::Var(std::string* name) {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) {
*name = new_name;
@ -69,34 +81,34 @@ Variable* Scope::Var(std::string* name) {
}
Variable* Scope::FindVar(const std::string& name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return FindVarInternal(name);
}
Variable* Scope::FindLocalVar(const std::string& name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return FindVarLocally(name);
}
const Scope* Scope::FindScope(const Variable* var) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return FindScopeInternal(var);
}
void Scope::DropKids() {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
for (Scope* s : kids_) delete s;
kids_.clear();
}
bool Scope::HasKid(const Scope* scope) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
return it != this->kids_.end();
}
std::vector<std::string> Scope::LocalVarNames() const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
std::vector<std::string> known_vars;
known_vars.reserve(this->vars_.size());
for (auto& p : vars_) {
@ -106,9 +118,10 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
void Scope::DeleteScope(Scope* scope) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
this, scope);
this->kids_.erase(it);
// When making memory benchmark on Fluid, we have to delete scope sync.
if (FLAGS_benchmark || FLAGS_eager_delete_scope) {
@ -119,7 +132,7 @@ void Scope::DeleteScope(Scope* scope) const {
}
void Scope::EraseVars(const std::vector<std::string>& var_names) {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
std::set<std::string> var_set(var_names.begin(), var_names.end());
for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) {
@ -132,12 +145,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
@ -189,5 +202,46 @@ Variable* Scope::FindVarLocally(const std::string& name) const {
return nullptr;
}
std::string GenScopeTreeDebugInfo(Scope* root) {
std::stringstream os;
if (!root) return "";
// level traversal
std::queue<Scope*> queue;
queue.push(root);
std::vector<Scope*> scopes;
while (!queue.empty()) {
auto* end = queue.back();
Scope* q = nullptr;
while (q != end) {
q = queue.front();
queue.pop();
os << q << " ";
scopes.push_back(q);
for (auto* c : q->kids()) {
queue.push(c);
}
}
// end of a level
os << "\n------------------------------------------\n";
}
os << "\nDetails:\n\n";
for (Scope* q : scopes) {
os << "====\n";
os << q << ":\n";
for (auto& var : q->LocalVarNames()) {
os << " - " << var << "\n";
}
}
return os.str();
}
} // namespace framework
} // namespace paddle

@ -78,11 +78,11 @@ class Scope {
/// Drop all kids scopes belonged to this scope.
void DropKids();
std::list<Scope*>& kids() const { return kids_; }
/// Find if a scope exists in the kid scopes
bool HasKid(const Scope* scope) const;
const std::list<Scope*>& kids() const { return kids_; }
// enumerate all the variables current contains.
std::vector<std::string> LocalVarNames() const;
@ -118,12 +118,17 @@ class Scope {
// Scope in `kids_` are owned by this class.
mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr};
const Scope* parent_{nullptr};
DISABLE_COPY_AND_ASSIGN(Scope);
private:
mutable std::mutex mutex_;
};
// Generate some debug string about the inherience structure of scope, quite
// naive.
std::string GenScopeTreeDebugInfo(Scope*);
} // namespace framework
} // namespace paddle

@ -63,6 +63,26 @@ struct TensorCopyVisitor {
int64_t size_;
};
struct TensorFillVisitor {
TensorFillVisitor(framework::Tensor* dst, int64_t dst_offset, int64_t size,
float value)
: dst_(dst), dst_offset_(dst_offset), size_(size) {}
template <typename T>
void apply() const {
// TODO(qiao): support other place
platform::CPUPlace cpu;
auto* tensor_data = dst_->mutable_data<T>(cpu);
auto* start = tensor_data + dst_offset_;
auto* end = start + size_;
std::fill(start, end, static_cast<T>(0.0));
}
framework::Tensor* dst_;
int64_t dst_offset_;
int64_t size_;
};
void SerializeToStream(std::ostream& os, const SelectedRows& selected_rows,
const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version
@ -120,7 +140,17 @@ bool SelectedRows::HasKey(int64_t key) const {
: true;
}
int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) {
int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown,
bool is_test) {
if (is_test) {
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
return -1;
} else {
return iter->second;
}
}
rwlock_->RDLock();
auto iter = id_to_index_.find(key);
if (iter == id_to_index_.end()) {
@ -172,7 +202,7 @@ void SelectedRows::SyncIndex() {
}
void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown) {
bool auto_grown, bool is_test) {
PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized.");
if (ids.numel() == 0) {
@ -183,11 +213,19 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
"output tensor should have the same shape with table "
"except the dims[0].");
for (int i = 0; i < ids.numel(); ++i) {
int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown);
framework::VisitDataType(
framework::ToDataType(value_->type()),
TensorCopyVisitor(value, i * value_width, *value_.get(),
index * value_width, value_width));
auto id = ids.data<int64_t>()[i];
int64_t index = AutoGrownIndex(id, auto_grown, is_test);
if (index < 0) {
VLOG(5) << "id " << id << " not in the table, return 0";
framework::VisitDataType(
framework::ToDataType(value_->type()),
TensorFillVisitor(value, i * value_width, value_width, 0.0));
} else {
framework::VisitDataType(
framework::ToDataType(value_->type()),
TensorCopyVisitor(value, i * value_width, *value_.get(),
index * value_width, value_width));
}
}
}
}

@ -105,7 +105,7 @@ class SelectedRows {
* the value
*/
void Get(const framework::Tensor& ids, framework::Tensor* value,
bool auto_grown = false);
bool auto_grown = false, bool is_test = false);
/*
* @brief Get the index of the key from id_to_index_ map. If the key not
@ -118,7 +118,7 @@ class SelectedRows {
*
* @return index of the key.
*/
int64_t AutoGrownIndex(int64_t key, bool auto_grown);
int64_t AutoGrownIndex(int64_t key, bool auto_grown, bool is_test = false);
void SyncIndex();

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save