Combine Inference Analysis with IR (#13914)

panyx0718-patch-1
Yan Chunwei 7 years ago committed by GitHub
parent 893c1b01d8
commit 9f252e0032
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -164,7 +164,7 @@ endif()
set(module "inference")
copy(inference_lib DEPS ${inference_deps}
SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${src_dir}/${module}/api/paddle_inference_api.h
${src_dir}/${module}/api/paddle_*.h
${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
)
@ -202,10 +202,10 @@ copy(third_party DEPS fluid_lib_dist
DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR}
)
# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library
# only need libpaddle_fluid.so/a and paddle_*.h for inference-only library
copy(inference_api_lib DEPS fluid_lib_dist
SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.*
${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h
${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_*.h
DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include
)

@ -34,4 +34,5 @@ if(TENSORRT_FOUND)
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()

@ -359,6 +359,7 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars,
bool keep_kids) {
PADDLE_ENFORCE_NOT_NULL(scope);
Scope* local_scope = scope;
if (create_vars) {
if (create_local_scope) {

@ -5,6 +5,7 @@ file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n")
# Usage: pass_library(target inference) will append to paddle_inference_pass.h
unset(INFER_IR_PASSES CACHE) # clear the global variable
function(pass_library TARGET DEST)
set(options "")
set(oneValueArgs "")
@ -15,10 +16,11 @@ function(pass_library TARGET DEST)
if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference")
message(STATUS "add pass ${TARGET} ${DEST}")
file(APPEND ${pass_file} "USE_PASS(${TARGET});\n")
set(PASS_LIBRARY ${TARGET} ${PASS_LIBRARY} PARENT_SCOPE)
set(INFER_IR_PASSES ${INFER_IR_PASSES} ${TARGET} CACHE INTERNAL "")
endif()
endfunction()
cc_library(node SRCS node.cc DEPS proto_desc)
cc_library(graph SRCS graph.cc DEPS node pretty_log)
cc_library(graph_helper SRCS graph_helper.cc DEPS graph)

@ -91,10 +91,10 @@ void FindWhileOp(Graph* graph) {
#undef OP_SET_IN
#undef OP_SET_OUT
auto* X = graph->RetriveNode(34);
auto* LSTMOUT = graph->RetriveNode(81);
auto* cell_init = graph->RetriveNode(6);
auto* hidden_init = graph->RetriveNode(8);
auto* X = graph->RetrieveNode(34);
auto* LSTMOUT = graph->RetrieveNode(81);
auto* cell_init = graph->RetrieveNode(6);
auto* hidden_init = graph->RetrieveNode(8);
auto* lstm_op = graph->CreateOpNode(&op_desc);
PrepareParameters(graph, param);

@ -84,8 +84,6 @@ void CheckProgram(const ProgramDesc &program) {
Graph::Graph(const ProgramDesc &program) : program_(program) {
CheckProgram(program_);
// Make the nodes id start from 0.
Node::ResetId();
auto var_nodes = InitFromProgram(program_);
ResolveHazard(var_nodes);
}

@ -116,13 +116,17 @@ class Graph {
// Create a normal variable with non-null VarDesc.
ir::Node *CreateVarNode(VarDesc *var_desc) {
PADDLE_ENFORCE(var_desc);
return AddNode(new ir::Node(var_desc));
auto *x = AddNode(new ir::Node(var_desc));
x->SetId(num_node_created_++);
return x;
}
// Create a normal runnable operator with OpDesc.
ir::Node *CreateOpNode(OpDesc *op_desc) {
PADDLE_ENFORCE(op_desc);
return AddNode(new ir::Node(op_desc));
auto *x = AddNode(new ir::Node(op_desc));
x->SetId(num_node_created_++);
return x;
}
// Create a control dependency var that connects 2 operations. The
@ -132,13 +136,17 @@ class Graph {
// TODO(panyx0718): control var name should be really unique.
const std::string name = string::Sprintf(
"%s@%llu", ir::Node::kControlDepVarName, node_set_.size());
return AddNode(new ir::Node(name, ir::Node::Type::kVariable));
auto *x = AddNode(new ir::Node(name, ir::Node::Type::kVariable));
x->SetId(num_node_created_++);
return x;
}
// A more free style way of creating a graph node. Mostly use for test
// or "copy" from another node. Avoid using it if possible.
ir::Node *CreateEmptyNode(const std::string &name, ir::Node::Type type) {
return AddNode(new ir::Node(name, type));
auto *x = AddNode(new ir::Node(name, type));
x->SetId(num_node_created_++);
return x;
}
// Clear all node information of the graph and return the ownership of the
@ -160,7 +168,7 @@ class Graph {
}
// NOTE low performance, but simple and secure.
Node *RetriveNode(int id) {
Node *RetrieveNode(int id) {
for (auto &node : nodes_) {
if (node.second->id() == id) {
return node.second.get();
@ -169,6 +177,7 @@ class Graph {
return nullptr;
}
const ProgramDesc &program() const { return program_; }
std::map<std::string, std::vector<ir::Node *>> InitFromProgram(
const ProgramDesc &program);
@ -190,6 +199,7 @@ class Graph {
std::map<std::string, std::function<void(void)>> attr_dels_;
std::map<ir::Node *, std::unique_ptr<ir::Node>> nodes_;
std::unordered_set<ir::Node *> node_set_;
size_t num_node_created_{0}; // help to generate a unique node id.
};
bool IsControlDepVar(const ir::Node &var);

@ -310,8 +310,8 @@ void GraphSafeRemoveNodes(Graph* graph,
const std::unordered_set<const Node*>& nodes);
// Some pre-defined patterns those can be reused in multiple passes.
// The related Fluid Layer or Op should be one pattern here for better reusage
// accross different fusion.
// The related Fluid Layer or Op should be one pattern here for better re-usage
// across different fusion.
namespace patterns {
struct KeyCounter {

@ -35,10 +35,11 @@ std::unique_ptr<Graph> GraphToProgramPass::ApplyImpl(
new proto::ProgramDesc(*program.Proto()));
auto block = program_pb->mutable_blocks(kRootBlockIndex);
block->set_idx(kRootBlockIndex);
block->clear_vars();
std::unordered_set<std::string> visited_vars;
for (ir::Node* n : graph->Nodes()) {
if (n->NodeType() == ir::Node::Type::kVariable) {
if (n->IsVar()) {
if (n->Var() && visited_vars.count(n->Var()->Name()) == 0) {
visited_vars.insert(n->Var()->Name());
block->add_vars()->MergeFrom(*n->Var()->Proto());

@ -66,6 +66,76 @@ NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) {
}
Node *NodesDFSIterator::operator->() { return stack_.top(); }
inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) {
return node.inputs.size() == n;
}
NodesTSIterator::NodesTSIterator(const std::vector<Node *> &source) {
PADDLE_ENFORCE(!source.empty(),
"Start points of topological sorting should not be empty!");
// CHECK all the inputs' in-degree is 0
for (auto *node : source) {
PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0));
}
std::unordered_set<Node *> visited;
std::unordered_set<Node *> to_visit{source.begin(), source.end()};
std::vector<Node *> inlink_visited;
while (!to_visit.empty()) {
std::vector<Node *> queue(to_visit.begin(), to_visit.end());
for (auto *p : queue) {
inlink_visited.clear();
std::copy_if(p->inputs.begin(), p->inputs.end(),
std::back_inserter(inlink_visited),
[&](Node *x) -> bool { return visited.count(x) != 0; });
if (inlink_visited.size() == p->inputs.size()) {
sorted_.push_back(p);
for (auto *_ : p->outputs) {
if (!visited.count(_)) {
to_visit.insert(_);
}
}
to_visit.erase(p);
visited.insert(p);
}
}
}
}
NodesTSIterator::NodesTSIterator(const NodesTSIterator &other)
: sorted_(other.sorted_), cursor_(other.cursor_) {}
Node &NodesTSIterator::operator*() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return *sorted_[cursor_];
}
NodesTSIterator &NodesTSIterator::operator++() {
if (++cursor_ >= sorted_.size()) {
sorted_.clear();
cursor_ = 0;
}
return *this;
}
NodesTSIterator &NodesTSIterator::operator=(const NodesTSIterator &other) {
cursor_ = other.cursor_;
sorted_ = other.sorted_;
return *this;
}
bool NodesTSIterator::operator==(const NodesTSIterator &other) {
return sorted_ == other.sorted_ && cursor_ == other.cursor_;
}
Node *NodesTSIterator::operator->() {
PADDLE_ENFORCE_LT(cursor_, sorted_.size());
return sorted_[cursor_];
}
} // namespace ir
} // namespace framework
} // namespace paddle

@ -62,6 +62,32 @@ struct NodesDFSIterator
std::unordered_set<Node *> visited_;
};
// Topological sorting iterator on nodes.
struct NodesTSIterator
: public std::iterator<std::forward_iterator_tag, Node *> {
NodesTSIterator() = default;
NodesTSIterator(const std::vector<Node *> &source);
NodesTSIterator(NodesTSIterator &&other)
: sorted_(std::move(other.sorted_)), cursor_(other.cursor_) {
other.cursor_ = 0;
}
NodesTSIterator(const NodesTSIterator &other);
Node &operator*();
NodesTSIterator &operator++();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator &operator=(const NodesTSIterator &other);
bool operator==(const NodesTSIterator &other);
bool operator!=(const NodesTSIterator &other) { return !(*this == other); }
Node *operator->();
private:
std::vector<Node *> sorted_;
size_t cursor_{0};
};
/*
* GraphTraits contains some graph traversal algorithms.
*
@ -76,6 +102,14 @@ struct GraphTraits {
NodesDFSIterator());
}
static iterator_range<NodesTSIterator> TS(const Graph &g) {
auto start_points = ExtractStartPoints(g);
PADDLE_ENFORCE(!start_points.empty());
NodesTSIterator x(start_points);
return iterator_range<NodesTSIterator>(NodesTSIterator(start_points),
NodesTSIterator());
}
private:
// The nodes those have no input will be treated as start points.
static std::vector<Node *> ExtractStartPoints(const Graph &g) {

@ -18,7 +18,6 @@ namespace paddle {
namespace framework {
namespace ir {
constexpr char Node::kControlDepVarName[];
int Node::count_ = 0;
std::unique_ptr<Node> CreateNodeForTest(const std::string& name,
Node::Type type) {

@ -115,37 +115,30 @@ class Node {
int id_;
private:
// ID can only set by a Graph.
void SetId(int id) { id_ = id; }
friend class Graph;
friend std::unique_ptr<Node> CreateNodeForTest(const std::string& name,
Node::Type type);
explicit Node(const std::string& name, Type type)
: name_(name),
var_desc_(nullptr),
op_desc_(nullptr),
type_(type),
id_(count_++) {}
: name_(name), var_desc_(nullptr), op_desc_(nullptr), type_(type) {}
explicit Node(VarDesc* var_desc)
: name_(var_desc->Name()),
var_desc_(new VarDesc(*var_desc)),
op_desc_(nullptr),
type_(Type::kVariable),
id_(count_++) {}
type_(Type::kVariable) {}
explicit Node(OpDesc* op_desc)
: name_(op_desc->Type()),
var_desc_(nullptr),
op_desc_(new OpDesc(*op_desc, op_desc->Block())),
type_(Type::kOperation),
id_(count_++) {}
type_(Type::kOperation) {}
Node() = delete;
static int count_;
// Please don't use this API or make this public.
static void ResetId() { count_ = 0; }
boost::any wrapper_;
std::function<void(void)> wrapper_deleter_;
std::type_index wrapper_type_ = std::type_index(typeid(void));

@ -93,6 +93,7 @@ class Pass {
protected:
virtual std::unique_ptr<Graph> ApplyImpl(std::unique_ptr<Graph> graph) const {
LOG(FATAL) << "Calling virtual Pass not implemented.";
return graph;
}
private:

@ -57,60 +57,58 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) {
}
}
void NaiveExecutor::Prepare(Scope *parent_scope,
const ProgramDesc &program_desc, int block_id,
bool with_feed_fetch_ops) {
if (!parent_scope) {
void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc,
int block_id, bool with_feed_fetch_ops) {
if (!scope) {
scope_ = new framework::Scope;
} else {
scope_ = &parent_scope->NewScope();
scope_ = scope;
}
CreateVariables(program_desc, scope_, block_id);
VLOG(3) << "NaiveExecutor init with scope " << scope;
CreateOps(program_desc, block_id, with_feed_fetch_ops);
}
void NaiveExecutor::Run() {
for (auto &op : ops_) {
VLOG(40) << "run " << op->Type();
VLOG(3) << std::this_thread::get_id() << " run " << op->Type()
<< " on scope " << scope_;
op->Run(*scope_, place_);
}
}
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
int block_id) {
PADDLE_ENFORCE(scope);
void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id,
bool persistable, Scope *scope) {
PADDLE_ENFORCE_NOT_NULL(scope);
auto &global_block = desc.Block(block_id);
const Scope *ancestor_scope = scope;
while (ancestor_scope->parent()) {
ancestor_scope = ancestor_scope->parent();
const auto *anc = scope;
PADDLE_ENFORCE(anc->parent() != anc);
while (anc->parent()) {
anc = anc->parent();
}
if (ancestor_scope != scope) {
for (auto &var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
// Create persistable vars in ancestor scope.
if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name());
for (auto &var : global_block.AllVars()) {
if (var->Name() == framework::kEmptyVarName) {
continue;
}
if (persistable == var->Persistable()) {
if (persistable) {
if (!anc->FindVar(var->Name())) {
auto *ptr = const_cast<Scope *>(anc)->Var(var->Name());
VLOG(3) << scope << " Create persistable variable " << var->Name()
<< ", which pointer is " << ptr;
InitializeVariable(ptr, var->GetType());
}
} else {
auto *ptr = const_cast<Scope *>(scope)->Var(var->Name());
VLOG(3) << scope << " Create variable " << var->Name()
<< ", which pointer is " << ptr;
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}

@ -35,8 +35,14 @@ class NaiveExecutor {
// Create child scope.
// Create variables.
// @with_feed_fetch_ops: whether to work with the feed and fetch operators.
void Prepare(Scope* parent_scope, const ProgramDesc& program_desc,
int block_id, bool with_feed_fetch_ops);
void Prepare(Scope* scope, const ProgramDesc& program_desc, int block_id,
bool with_feed_fetch_ops);
// Create variables before head.
// Create parameters if persistable is ture, or create the temporary variables
// instead.
void CreateVariables(const ProgramDesc& desc, int block_id, bool persistable,
Scope* scope);
// Run all the operators.
void Run();
@ -49,8 +55,6 @@ class NaiveExecutor {
void CleanFeedFetchOps();
protected:
void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id);
void CreateOps(const ProgramDesc& desc, int block_id,
bool with_feed_fetch_ops);

@ -39,7 +39,7 @@ TEST(NaiveExecutor, Basic) {
auto place = platform::CPUPlace();
NaiveExecutor exe(place);
exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/);
exe.Prepare(nullptr, program, 0, false);
auto* a_tensor = exe.FindTensor("a");
auto* b_tensor = exe.FindTensor("b");
auto* c_tensor = exe.FindTensor("c");

@ -15,7 +15,9 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include <memory> // for unique_ptr
#include <queue>
#include <set>
#include <unordered_set>
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/string/printf.h"
@ -36,6 +38,16 @@ DEFINE_double(
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0");
// When in inference scenario, the scopes will not be written by two threads in
// a mean time, but a scope may be read by multiple threads concurrently, and
// the mutex will cause serious performance issue.
// So the mutex is disabled when `ON_INFER`.
#ifdef ON_INFER
#define SCOPE_LOCK_GUARD
#else
#define SCOPE_LOCK_GUARD std::lock_guard<std::mutex> lock(mutex_);
#endif
namespace paddle {
namespace framework {
@ -49,18 +61,18 @@ int64_t GetEagerDeletionThreshold() {
Scope::~Scope() { DropKids(); }
Scope& Scope::NewScope() const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
kids_.push_back(new Scope(this));
return *kids_.back();
}
Variable* Scope::Var(const std::string& name) {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return VarInternal(name);
}
Variable* Scope::Var(std::string* name) {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
if (name != nullptr) {
*name = new_name;
@ -69,34 +81,34 @@ Variable* Scope::Var(std::string* name) {
}
Variable* Scope::FindVar(const std::string& name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return FindVarInternal(name);
}
Variable* Scope::FindLocalVar(const std::string& name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return FindVarLocally(name);
}
const Scope* Scope::FindScope(const Variable* var) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
return FindScopeInternal(var);
}
void Scope::DropKids() {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
for (Scope* s : kids_) delete s;
kids_.clear();
}
bool Scope::HasKid(const Scope* scope) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
return it != this->kids_.end();
}
std::vector<std::string> Scope::LocalVarNames() const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
std::vector<std::string> known_vars;
known_vars.reserve(this->vars_.size());
for (auto& p : vars_) {
@ -106,9 +118,10 @@ std::vector<std::string> Scope::LocalVarNames() const {
}
void Scope::DeleteScope(Scope* scope) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope",
this, scope);
this->kids_.erase(it);
// When making memory benchmark on Fluid, we have to delete scope sync.
if (FLAGS_benchmark || FLAGS_eager_delete_scope) {
@ -119,7 +132,7 @@ void Scope::DeleteScope(Scope* scope) const {
}
void Scope::EraseVars(const std::vector<std::string>& var_names) {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
std::set<std::string> var_set(var_names.begin(), var_names.end());
for (auto it = vars_.begin(); it != vars_.end();) {
if (var_set.find(it->first) != var_set.end()) {
@ -132,12 +145,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
void Scope::Rename(const std::string& origin_name,
const std::string& new_name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
RenameInternal(origin_name, new_name);
}
std::string Scope::Rename(const std::string& origin_name) const {
std::lock_guard<std::mutex> lock(mutex_);
SCOPE_LOCK_GUARD
auto new_name = string::Sprintf("%p.%d", this, vars_.size());
RenameInternal(origin_name, new_name);
return new_name;
@ -189,5 +202,46 @@ Variable* Scope::FindVarLocally(const std::string& name) const {
return nullptr;
}
std::string GenScopeTreeDebugInfo(Scope* root) {
std::stringstream os;
if (!root) return "";
// level traversal
std::queue<Scope*> queue;
queue.push(root);
std::vector<Scope*> scopes;
while (!queue.empty()) {
auto* end = queue.back();
Scope* q = nullptr;
while (q != end) {
q = queue.front();
queue.pop();
os << q << " ";
scopes.push_back(q);
for (auto* c : q->kids()) {
queue.push(c);
}
}
// end of a level
os << "\n------------------------------------------\n";
}
os << "\nDetails:\n\n";
for (Scope* q : scopes) {
os << "====\n";
os << q << ":\n";
for (auto& var : q->LocalVarNames()) {
os << " - " << var << "\n";
}
}
return os.str();
}
} // namespace framework
} // namespace paddle

@ -78,11 +78,11 @@ class Scope {
/// Drop all kids scopes belonged to this scope.
void DropKids();
std::list<Scope*>& kids() const { return kids_; }
/// Find if a scope exists in the kid scopes
bool HasKid(const Scope* scope) const;
const std::list<Scope*>& kids() const { return kids_; }
// enumerate all the variables current contains.
std::vector<std::string> LocalVarNames() const;
@ -118,12 +118,17 @@ class Scope {
// Scope in `kids_` are owned by this class.
mutable std::list<Scope*> kids_;
Scope const* parent_{nullptr};
const Scope* parent_{nullptr};
DISABLE_COPY_AND_ASSIGN(Scope);
private:
mutable std::mutex mutex_;
};
// Generate some debug string about the inherience structure of scope, quite
// naive.
std::string GenScopeTreeDebugInfo(Scope*);
} // namespace framework
} // namespace paddle

@ -27,13 +27,9 @@ set(SHARED_INFERENCE_SRCS
io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc
${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc)
if (WITH_GPU AND TENSORRT_FOUND)
set(STATIC_INFERENCE_APIS ${STATIC_INFERENCE_APIS} paddle_inference_tensorrt_subgraph_engine)
set(SHARED_INFERENCE_SRCS ${SHARED_INFERENCE_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/api/api_tensorrt_subgraph_engine.cc)
endif()
# Create static library
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder)
if(NOT APPLE)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
@ -43,7 +39,7 @@ endif()
# Create shared library
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder)
set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid)
if(NOT APPLE)

@ -1,24 +1,25 @@
cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass)
set(analysis_deps
framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log)
unset(analysis_deps CACHE)
set(analysis_deps # analysis_deps can be extended accross the project
framework_proto proto_desc graph pass paddle_fluid_api executor pretty_log
ir_pass_manager
CACHE INTERNAL "")
cc_library(analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc
add_subdirectory(ir_passes)
add_subdirectory(passes)
cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES})
cc_library(argument SRCS argument.cc DEPS scope proto_desc)
cc_library(analysis_pass SRCS analysis_pass.cc DEPS proto_desc)
cc_library(analysis SRCS
analyzer.cc
helper.cc
# passes
analysis_pass.cc
fluid_to_data_flow_graph_pass.cc
data_flow_graph_to_fluid_pass.cc
dfg_graphviz_draw_pass.cc
tensorrt_subgraph_pass.cc
tensorrt_subgraph_node_mark_pass.cc
fluid_to_ir_pass.cc
model_store_pass.cc
DEPS ${analysis_deps})
analysis_pass
DEPS ${analysis_deps}
)
cc_test(test_node SRCS node_tester.cc DEPS analysis)
cc_test(test_dot SRCS dot_tester.cc DEPS analysis)
cc_binary(inference_analyzer SRCS analyzer_main.cc DEPS analysis paddle_fluid)
function(inference_analysis_test TARGET)
if(WITH_TESTING)
@ -34,13 +35,3 @@ function(inference_analysis_test TARGET)
endfunction(inference_analysis_test)
inference_analysis_test(test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS paddle_inference_api)
inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc)
inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc)
inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc)
inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc)
inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc)
inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc)
inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc)
inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc)
inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc)
inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc)

@ -19,42 +19,36 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/inference/analysis/argument.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/node.h"
namespace paddle {
namespace inference {
namespace analysis {
/*
* AnalysisPass is a pass used to control the IR passes.
*/
class AnalysisPass {
public:
AnalysisPass() = default;
virtual ~AnalysisPass() = default;
// Mutable Pass.
virtual bool Initialize(Argument *argument) { return false; }
// Readonly Pass.
virtual bool Initialize(const Argument &argument) { return false; }
// Virtual method overriden by subclasses to do any necessary clean up after
// all passes have run.
virtual bool Finalize() { return false; }
// Create a debugger Pass that draw the DFG by graphviz toolkit.
virtual AnalysisPass *CreateGraphvizDebugerPass() const { return nullptr; }
// Run on a single DataFlowGraph.
virtual void Run(DataFlowGraph *x) = 0;
// Run on a single Graph.
void Run(Argument* argument) { RunImpl(argument); }
// Human-readable short representation.
virtual std::string repr() const = 0;
// Human-readable long description.
virtual std::string description() const { return "No DOC"; }
};
// GraphPass processes on any GraphType.
class DataFlowGraphPass : public AnalysisPass {};
protected:
// User should implement these.
virtual void RunImpl(Argument* argument) = 0;
Argument* argument_{nullptr};
};
} // namespace analysis
} // namespace inference

@ -15,138 +15,23 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h"
#include "paddle/fluid/inference/analysis/model_store_pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h"
#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h"
DEFINE_bool(IA_enable_tensorrt_subgraph_engine, false,
"Enable subgraph to TensorRT engine for acceleration");
DEFINE_bool(IA_enable_ir, false, "Turn on IR support");
DEFINE_string(IA_graphviz_log_root, "./",
"Graphviz debuger for data flow graphs.");
DEFINE_string(IA_output_storage_path, "", "optimized model output path");
#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h"
#include "paddle/fluid/inference/analysis/passes/passes.h"
namespace paddle {
namespace inference {
namespace analysis {
class DfgPassManagerImpl final : public DfgPassManager {
public:
DfgPassManagerImpl() {
// TODO(Superjomn) set the key with pass reprs.
if (!FLAGS_IA_enable_ir) {
AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass);
} else {
AddPass("fluid-to-ir-pass", new FluidToIrPass);
}
TryAddTensorRtPass();
AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass);
if (!FLAGS_IA_output_storage_path.empty()) {
AddPass("model-store-pass", new ModelStorePass);
}
}
Analyzer::Analyzer() {}
std::string repr() const override { return "dfg-pass-manager"; }
std::string description() const override { return "DFG pass manager."; }
void Analyzer::Run(Argument *argument) { RunIrAnalysis(argument); }
private:
void AddPass(const std::string& name, AnalysisPass* pass) {
VLOG(30) << "Adding pass " << name;
Register(name, pass);
AddGraphvizDebugerPass(pass);
}
void Analyzer::RunIrAnalysis(Argument *argument) {
std::vector<std::string> passes({"ir_analysis_compose_pass"});
void TryAddTensorRtPass() {
if (FLAGS_IA_enable_tensorrt_subgraph_engine) {
auto trt_teller = [&](const Node* node) {
std::unordered_set<std::string> teller_set(
{"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid",
"depthwise_conv2d", "batch_norm", "concat", "tanh", "pad",
"elementwise_add", "dropout"});
if (!node->IsFunction()) return false;
const auto* func = static_cast<const Function*>(node);
if (teller_set.count(func->func_type())) {
return true;
} else {
return false;
}
};
AddPass("tensorrt-subgraph-marker",
new TensorRTSubgraphNodeMarkPass(trt_teller));
AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller));
}
}
// Add the graphviz debuger pass if the parent pass has one.
void AddGraphvizDebugerPass(AnalysisPass* pass) {
auto* debuger_pass = pass->CreateGraphvizDebugerPass();
if (debuger_pass) {
Register(debuger_pass->repr(), debuger_pass);
}
for (auto &pass : passes) {
PassRegistry::Global().Retreive(pass)->Run(argument);
}
};
Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); }
void Analyzer::Run(Argument* argument) {
std::vector<std::string> passes;
passes.push_back("graph_viz_pass"); // add graphviz for debug.
#ifdef PADDLE_WITH_MKLDNN
if (use_mkldnn_) {
VLOG(30) << "Adding MKL-DNN placement pass";
passes.push_back("mkldnn_placement_pass");
}
#endif
// infer_clean_graph_pass should be the first default pass
// after mkldnn_placement_pass.
passes.push_back("infer_clean_graph_pass");
passes.push_back("graph_viz_pass"); // add graphviz for debug.
for (auto& pass : ir_passes_) {
// skip mkldnn pass when use_mkldnn_ = false;
bool skip_pass = (!use_mkldnn_) && pass.find("mkldnn") != std::string::npos;
if (!disabled_ir_passes_.count(pass) && !skip_pass) {
passes.push_back(pass);
passes.push_back("graph_viz_pass"); // add graphviz for debug.
}
}
argument->Set(kFluidToIrPassesAttr, new std::vector<std::string>(passes));
for (auto& x : data_) {
PADDLE_ENFORCE(x->Initialize(argument));
x->RunAll();
PADDLE_ENFORCE(x->Finalize());
}
}
Analyzer& Analyzer::IncludeAllIrPasses() {
ir_passes_ = all_ir_passes_;
return *this;
}
Analyzer& Analyzer::DisableIrPasses(const std::vector<std::string>& passes) {
disabled_ir_passes_.insert(passes.begin(), passes.end());
return *this;
}
Analyzer& Analyzer::IncludeIrPasses(const std::vector<std::string>& passes) {
ir_passes_ = passes;
return *this;
}
Analyzer& Analyzer::SetUseMkldnn(bool use_mkldnn) {
use_mkldnn_ = use_mkldnn;
return *this;
}
} // namespace analysis

@ -40,56 +40,21 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
namespace paddle {
namespace inference {
namespace analysis {
class Analyzer : public OrderedRegistry<PassManager> {
class Analyzer final {
public:
// Register all the pass-managers.
Analyzer();
void Run(Argument* argument);
Analyzer& DisableIrPasses(const std::vector<std::string>& passes);
Analyzer& IncludeIrPasses(const std::vector<std::string>& passes);
Analyzer& IncludeAllIrPasses();
Analyzer& SetUseMkldnn(bool use_mkldnn);
DISABLE_COPY_AND_ASSIGN(Analyzer);
private:
// All avaiable IR passes.
// The bigger fuse comes first, so that the small operators prefer to be
// merged in a larger fuse op. The small fusion will not break the pattern of
// larger fusion.
const std::vector<std::string> all_ir_passes_{{
// Manual update the passes here.
"attention_lstm_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
"embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
"fc_gru_fuse_pass", //
"mul_gru_fuse_pass", //
"seq_concat_fc_fuse_pass", //
"fc_fuse_pass", //
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
#ifdef PADDLE_WITH_MKLDNN
"depthwise_conv_mkldnn_pass", //
"conv_bias_mkldnn_fuse_pass", //
"conv_relu_mkldnn_fuse_pass", //
"conv_elementwise_add_mkldnn_fuse_pass", //
#endif
}};
std::unordered_set<std::string> disabled_ir_passes_;
// Ir passes to run
std::vector<std::string> ir_passes_;
bool use_mkldnn_;
protected:
void RunIrAnalysis(Argument* argument);
};
} // namespace analysis

@ -27,21 +27,21 @@ namespace analysis {
using namespace framework; // NOLINT
TEST(Analyzer, analysis_without_tensorrt) {
FLAGS_IA_enable_tensorrt_subgraph_engine = false;
Argument argument;
argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
argument.SetModelDir(FLAGS_inference_model_dir);
argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
Analyzer analyser;
analyser.Run(&argument);
}
TEST(Analyzer, analysis_with_tensorrt) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true;
Argument argument;
argument.Set<int>("minimum_subgraph_size", new int(0));
argument.Set<int>("max_batch_size", new int(3));
argument.Set<int>("workspace_size", new int(1 << 20));
argument.Set<std::string>("precision_mode", new std::string("FP32"));
argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir));
argument.SetTensorRtMaxBatchSize(3);
argument.SetTensorRtWorkspaceSize(1 << 20);
argument.SetModelDir(FLAGS_inference_model_dir);
argument.SetIrAnalysisPasses({"infer_clean_graph_pass"});
Analyzer analyser;
analyser.Run(&argument);
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save