Merge pull request #13416 from panyx0718/ir

PassBuilder
6 years ago · 7cd2761736
parent 43a3af86be cbdf9833e3
commit 7cd2761736
15 changed files with 454 additions and 109 deletions
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@ -150,11 +150,10 @@ else()
 endif()
 if (NOT WIN32)
-  cc_library(parallel_executor SRCS parallel_executor.cc DEPS
+cc_library(parallel_executor SRCS parallel_executor.cc DEPS
-          threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
+        threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
-          graph graph_viz_pass multi_devices_graph_pass
+        graph build_strategy
-          multi_devices_graph_print_pass multi_devices_graph_check_pass
+        fast_threaded_ssa_graph_executor)
          fast_threaded_ssa_graph_executor fuse_elewise_add_act_pass)
 endif() # NOT WIN32
 cc_library(prune SRCS prune.cc DEPS framework_proto)
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@ -54,3 +54,8 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu
 #        device_context reduce_op_handle )
 cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc
        DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context)
 cc_library(build_strategy SRCS build_strategy.cc DEPS
        graph_viz_pass multi_devices_graph_pass
        multi_devices_graph_print_pass multi_devices_graph_check_pass
        fuse_elewise_add_act_pass)
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@ -0,0 +1,126 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/details/build_strategy.h"
 #include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
 #include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 namespace paddle {
 namespace framework {
 namespace details {
 class ParallelExecutorPassBuilder : public ir::PassBuilder {
 public:
  explicit ParallelExecutorPassBuilder(const BuildStrategy &strategy)
      : ir::PassBuilder(), strategy_(strategy) {
    // Add a graph viz pass to record a graph.
    if (!strategy_.debug_graphviz_path_.empty()) {
      auto viz_pass = AppendPass("graph_viz_pass");
      const std::string graph_path = string::Sprintf(
          "%s%s", strategy_.debug_graphviz_path_.c_str(), "_original_graph");
      viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
    }
    // Add op fusion.
    if (strategy.fuse_elewise_add_act_ops_) {
      auto fuse_elewise_add_act_pass = AppendPass("fuse_elewise_add_act_pass");
      // Add a graph viz pass to record a graph.
      if (!strategy.debug_graphviz_path_.empty()) {
        auto viz_pass = AppendPass("graph_viz_pass");
        const std::string graph_path = string::Sprintf(
            "%s%s", strategy.debug_graphviz_path_.c_str(), "_fused_graph");
        viz_pass->Set<std::string>("graph_viz_path",
                                   new std::string(graph_path));
      }
    }
    // Convert graph to run on multi-devices.
    auto multi_devices_pass = AppendPass("multi_devices_pass");
    multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy",
                                                         &strategy_);
    // Add a graph print pass to record a graph with device info.
    if (!strategy_.debug_graphviz_path_.empty()) {
      auto multi_devices_print_pass = AppendPass("multi_devices_print_pass");
      multi_devices_print_pass->SetNotOwned<const std::string>(
          "debug_graphviz_path", &strategy_.debug_graphviz_path_);
      multi_devices_print_pass->Set<details::GraphvizSSAGraphPrinter>(
          "graph_printer", new details::GraphvizSSAGraphPrinter);
    }
    // Verify that the graph is correct for multi-device executor.
    AppendPass("multi_devices_check_pass");
  }
 private:
  BuildStrategy strategy_;
 };
 std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy()
    const {
  pass_builder_.reset(new ParallelExecutorPassBuilder(*this));
  return pass_builder_;
 }
 std::unique_ptr<ir::Graph> BuildStrategy::Apply(
    const ProgramDesc &main_program, const std::vector<platform::Place> &places,
    const std::string &loss_var_name,
    const std::unordered_set<std::string> &param_names,
    const std::vector<Scope *> &local_scopes,
 #ifdef PADDLE_WITH_CUDA
    const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const {
 #else
    const bool use_cuda) const {
 #endif
  // Create a default one if not initialized by user.
  if (!pass_builder_) {
    CreatePassesFromStrategy();
  }
  std::unique_ptr<ir::Graph> graph(new ir::Graph(main_program));
  for (std::shared_ptr<ir::Pass> &pass : pass_builder_->AllPasses()) {
    if (pass->Type() == "multi_devices_pass") {
      pass->Erase("places");
      pass->SetNotOwned<const std::vector<platform::Place>>("places", &places);
      pass->Erase("loss_var_name");
      pass->SetNotOwned<const std::string>("loss_var_name", &loss_var_name);
      pass->Erase("params");
      pass->SetNotOwned<const std::unordered_set<std::string>>("params",
                                                               &param_names);
      pass->Erase("local_scopes");
      pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
                                                    &local_scopes);
 #ifdef PADDLE_WITH_CUDA
      platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
      pass->Erase("nccl_ctxs");
      pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
 #endif
    }
    graph = pass->Apply(std::move(graph));
  }
  return graph;
 }
 }  // namespace details
 }  // namespace framework
 }  // namespace paddle
 USE_PASS(fuse_elewise_add_act_pass);
 USE_PASS(graph_viz_pass);
 USE_PASS(multi_devices_pass);
 USE_PASS(multi_devices_check_pass);
 USE_PASS(multi_devices_print_pass);
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@ -15,6 +15,17 @@
 #pragma once
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/ir/pass_builder.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
 #ifdef PADDLE_WITH_CUDA
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif
 namespace paddle {
 namespace framework {
@ -57,6 +68,30 @@ struct BuildStrategy {
  bool fuse_elewise_add_act_ops_{false};
  bool enable_data_balance_{false};
  // User normally doesn't need to call this API.
  // The PassBuilder allows for more customized insert, remove of passes
  // from python side.
  // A new PassBuilder is created based on configs defined above and
  // passes are owned by the PassBuilder.
  std::shared_ptr<ir::PassBuilder> CreatePassesFromStrategy() const;
  // Apply the passes built by the pass_builder_. The passes will be
  // applied to the Program and output an ir::Graph.
  std::unique_ptr<ir::Graph> Apply(
      const ProgramDesc &main_program,
      const std::vector<platform::Place> &places,
      const std::string &loss_var_name,
      const std::unordered_set<std::string> &param_names,
      const std::vector<Scope *> &local_scopes,
 #ifdef PADDLE_WITH_CUDA
      const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const;
 #else
      const bool use_cuda) const;
 #endif
 private:
  mutable std::shared_ptr<ir::PassBuilder> pass_builder_;
 };
 }  // namespace details
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@ -41,6 +41,8 @@ cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass
 set(GLOB_PASS_LIB ${PASS_LIBRARY} CACHE INTERNAL "Global PASS library")
 cc_library(pass_builder SRCS pass_builder.cc DEPS pass)
 cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper)
 cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry)
 cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry)
--- a/paddle/fluid/framework/ir/pass.cc
+++ b/paddle/fluid/framework/ir/pass.cc
@ -19,7 +19,6 @@ namespace paddle {
 namespace framework {
 namespace ir {
 std::unique_ptr<Graph> Pass::Apply(std::unique_ptr<Graph> graph) const {
  PADDLE_ENFORCE(!applied_, "Pass can only Apply() once.");
  PADDLE_ENFORCE(graph.get(), "graph passed to Pass::Apply() cannot be empty.");
  for (const std::string& attr : required_pass_attrs_) {
    PADDLE_ENFORCE(attrs_.find(attr) != attrs_.end(),
--- a/paddle/fluid/framework/ir/pass.h
+++ b/paddle/fluid/framework/ir/pass.h
@ -42,6 +42,8 @@ class Pass {
    attr_dels_.clear();
  }
  std::string Type() const { return type_; }
  std::unique_ptr<Graph> Apply(std::unique_ptr<Graph> graph) const;
  // Get a reference to the attributed previously set.
@ -52,6 +54,21 @@ class Pass {
    return *boost::any_cast<AttrType *>(attrs_.at(attr_name));
  }
  bool Has(const std::string &attr_name) const {
    return attrs_.find(attr_name) != attrs_.end();
  }
  void Erase(const std::string &attr_name) {
    if (!Has(attr_name)) {
      return;
    }
    if (attr_dels_.find(attr_name) != attr_dels_.end()) {
      attr_dels_[attr_name]();
      attr_dels_.erase(attr_name);
    }
    attrs_.erase(attr_name);
  }
  // Set a pointer to the attribute. Pass takes ownership of the attribute.
  template <typename AttrType>
  void Set(const std::string &attr_name, AttrType *attr) {
@ -68,13 +85,15 @@ class Pass {
  // should delete the attribute.
  template <typename AttrType>
  void SetNotOwned(const std::string &attr_name, AttrType *attr) {
-    PADDLE_ENFORCE(attrs_.count(attr_name) == 0);
+    PADDLE_ENFORCE(attrs_.count(attr_name) == 0, "%s already set in the pass",
                   attr_name);
    attrs_[attr_name] = attr;
  }
 protected:
-  virtual std::unique_ptr<Graph> ApplyImpl(
+  virtual std::unique_ptr<Graph> ApplyImpl(std::unique_ptr<Graph> graph) const {
-      std::unique_ptr<Graph> graph) const = 0;
+    LOG(FATAL) << "Calling virtual Pass not implemented.";
  }
 private:
  template <typename PassType>
@ -89,7 +108,10 @@ class Pass {
    required_graph_attrs_.insert(attrs.begin(), attrs.end());
  }
  void RegisterType(const std::string &type) { type_ = type; }
  mutable bool applied_{false};
  std::string type_;
  std::unordered_set<std::string> required_pass_attrs_;
  std::unordered_set<std::string> required_graph_attrs_;
  std::map<std::string, boost::any> attrs_;
@ -143,10 +165,11 @@ struct PassRegistrar : public Registrar {
    PADDLE_ENFORCE(!PassRegistry::Instance().Has(pass_type),
                   "'%s' is registered more than once.", pass_type);
    PassRegistry::Instance().Insert(
-        pass_type, [this]() -> std::unique_ptr<Pass> {
+        pass_type, [this, pass_type]() -> std::unique_ptr<Pass> {
          std::unique_ptr<Pass> pass(new PassType());
          pass->RegisterRequiredPassAttrs(this->required_pass_attrs_);
          pass->RegisterRequiredGraphAttrs(this->required_graph_attrs_);
          pass->RegisterType(pass_type);
          return pass;
        });
  }
--- a/paddle/fluid/framework/ir/pass_builder.cc
+++ b/paddle/fluid/framework/ir/pass_builder.cc
@ -0,0 +1,43 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/fluid/framework/ir/pass_builder.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 std::shared_ptr<Pass> PassBuilder::AppendPass(const std::string& pass_type) {
  auto pass = ir::PassRegistry::Instance().Get(pass_type);
  passes_.emplace_back(pass.release());
  return passes_.back();
 }
 void PassBuilder::RemovePass(size_t idx) {
  PADDLE_ENFORCE(passes_.size() > idx);
  passes_.erase(passes_.begin() + idx);
 }
 std::shared_ptr<Pass> PassBuilder::InsertPass(size_t idx,
                                              const std::string& pass_type) {
  PADDLE_ENFORCE(passes_.size() >= idx);
  std::shared_ptr<Pass> pass(
      ir::PassRegistry::Instance().Get(pass_type).release());
  passes_.insert(passes_.begin() + idx, std::move(pass));
  return passes_[idx];
 }
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/pass_builder.h
+++ b/paddle/fluid/framework/ir/pass_builder.h
@ -0,0 +1,49 @@
 /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <string>
 #include <vector>
 #include "paddle/fluid/framework/ir/pass.h"
 namespace paddle {
 namespace framework {
 namespace ir {
 class PassBuilder {
 public:
  PassBuilder() {}
  virtual ~PassBuilder() {}
  // Append a new pass to the end.
  std::shared_ptr<Pass> AppendPass(const std::string& pass_type);
  // Insert a new pass after `idx`.
  std::shared_ptr<Pass> InsertPass(size_t idx, const std::string& pass_type);
  // Remove a new pass at `idx`.
  void RemovePass(size_t idx);
  // Returns a list of all passes.
  std::vector<std::shared_ptr<Pass>> AllPasses() const { return passes_; }
 protected:
  std::vector<std::shared_ptr<Pass>> passes_;
 };
 }  // namespace ir
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/ir/pass_test.cc
+++ b/paddle/fluid/framework/ir/pass_test.cc
@ -82,12 +82,10 @@ TEST(PassTest, TestPassAttrCheck) {
  ASSERT_EQ(graph->Get<int>("copy_test_pass_attr"), 2);
  ASSERT_EQ(graph->Get<int>("copy_test_graph_attr"), 2);
-  try {
+  // Allow apply more than once.
-    graph = pass->Apply(std::move(graph));
+  graph.reset(new Graph(prog));
-  } catch (paddle::platform::EnforceNotMet e) {
+  graph->Set<int>("test_graph_attr", new int);
-    exception = std::string(e.what());
+  graph = pass->Apply(std::move(graph));
  }
  ASSERT_TRUE(exception.find("Pass can only Apply() once") != exception.npos);
  pass = PassRegistry::Instance().Get("test_pass");
  pass->SetNotOwned<int>("test_pass_attr", &val);
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@ -19,15 +19,13 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 #ifdef PADDLE_WITH_CUDA
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif
 #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
-#include "paddle/fluid/framework/details/multi_devices_graph_check_pass.h"
+#include "paddle/fluid/framework/details/multi_devices_helper.h"
 #include "paddle/fluid/framework/details/multi_devices_graph_print_pass.h"
 #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
 #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
 #include "paddle/fluid/platform/profiler.h"
@ -35,80 +33,6 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 std::unique_ptr<ir::Graph> ApplyParallelExecutorPass(
    const ProgramDesc &main_program, const std::vector<platform::Place> &places,
    const std::string &loss_var_name,
    const std::unordered_set<std::string> &param_names,
    const std::vector<Scope *> &local_scopes, const bool use_cuda,
 #ifdef PADDLE_WITH_CUDA
    const BuildStrategy &strategy, platform::NCCLContextMap *nccl_ctxs) {
 #else
    const BuildStrategy &strategy) {
 #endif
  // Convert the program to graph.
  std::unique_ptr<ir::Graph> graph(new ir::Graph(main_program));
  // Apply a graph viz pass to record a graph.
  if (!strategy.debug_graphviz_path_.empty()) {
    auto viz_pass = ir::PassRegistry::Instance().Get("graph_viz_pass");
    const std::string graph_path = string::Sprintf(
        "%s%s", strategy.debug_graphviz_path_.c_str(), "_original_graph");
    viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
    graph = viz_pass->Apply(std::move(graph));
  }
  // Apply op fusion.
  if (strategy.fuse_elewise_add_act_ops_) {
    auto fuse_elewise_add_act_pass =
        ir::PassRegistry::Instance().Get("fuse_elewise_add_act_pass");
    graph = fuse_elewise_add_act_pass->Apply(std::move(graph));
    // Apply a graph viz pass to record a graph.
    if (!strategy.debug_graphviz_path_.empty()) {
      auto viz_pass = ir::PassRegistry::Instance().Get("graph_viz_pass");
      const std::string graph_path = string::Sprintf(
          "%s%s", strategy.debug_graphviz_path_.c_str(), "_fused_graph");
      viz_pass->Set<std::string>("graph_viz_path", new std::string(graph_path));
      graph = viz_pass->Apply(std::move(graph));
    }
  }
  // Convert graph to run on multi-devices.
  auto multi_devices_pass =
      ir::PassRegistry::Instance().Get("multi_devices_pass");
  multi_devices_pass->SetNotOwned<const std::vector<platform::Place>>("places",
                                                                      &places);
  multi_devices_pass->SetNotOwned<const std::string>("loss_var_name",
                                                     &loss_var_name);
  multi_devices_pass->SetNotOwned<const std::unordered_set<std::string>>(
      "params", &param_names);
  multi_devices_pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
                                                              &local_scopes);
  multi_devices_pass->SetNotOwned<const BuildStrategy>("strategy", &strategy);
 #ifdef PADDLE_WITH_CUDA
  platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
  multi_devices_pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
 #endif
  graph = multi_devices_pass->Apply(std::move(graph));
  // Apply a graph print pass to record a graph with device info.
  if (!strategy.debug_graphviz_path_.empty()) {
    auto multi_devices_print_pass =
        ir::PassRegistry::Instance().Get("multi_devices_print_pass");
    multi_devices_print_pass->SetNotOwned<const std::string>(
        "debug_graphviz_path", &strategy.debug_graphviz_path_);
    multi_devices_print_pass->Set<details::GraphvizSSAGraphPrinter>(
        "graph_printer", new details::GraphvizSSAGraphPrinter);
    graph = multi_devices_print_pass->Apply(std::move(graph));
  }
  // Verify that the graph is correct for multi-device executor.
  auto multi_devices_check_pass =
      ir::PassRegistry::Instance().Get("multi_devices_check_pass");
  graph = multi_devices_check_pass->Apply(std::move(graph));
  return graph;
 }
 class ParallelExecutorPrivate {
 public:
  explicit ParallelExecutorPrivate(const std::vector<platform::Place> &places)
@ -199,10 +123,9 @@ ParallelExecutor::ParallelExecutor(
 // Step 3. Convert main_program to SSA form and dependency graph. Also, insert
 // ncclOp
 #ifdef PADDLE_WITH_CUDA
-  std::unique_ptr<ir::Graph> graph = ApplyParallelExecutorPass(
+  std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
      main_program, member_->places_, loss_var_name, params,
-      member_->local_scopes_, member_->use_cuda_, build_strategy,
+      member_->local_scopes_, member_->use_cuda_, member_->nccl_ctxs_.get());
      member_->nccl_ctxs_.get());
  auto max_memory_size = GetEagerDeletionThreshold();
  if (max_memory_size >= 0) {
@ -228,9 +151,9 @@ ParallelExecutor::ParallelExecutor(
    }
  }
 #else
-  std::unique_ptr<ir::Graph> graph = ApplyParallelExecutorPass(
+  std::unique_ptr<ir::Graph> graph =
-      main_program, member_->places_, loss_var_name, params,
+      build_strategy.Apply(main_program, member_->places_, loss_var_name,
-      member_->local_scopes_, member_->use_cuda_, build_strategy);
+                           params, member_->local_scopes_, member_->use_cuda_);
 #endif
  if (exec_strategy.type_ == ExecutionStrategy::kDefault) {
@ -373,12 +296,6 @@ ParallelExecutor::~ParallelExecutor() {
 }  // namespace framework
 }  // namespace paddle
 USE_PASS(fuse_elewise_add_act_pass);
 USE_PASS(graph_viz_pass);
 USE_PASS(multi_devices_pass);
 USE_PASS(multi_devices_check_pass);
 USE_PASS(multi_devices_print_pass);
 #ifdef PADDLE_WITH_CUDA
 USE_PASS(reference_count_pass);
 #endif
--- a/paddle/fluid/framework/parallel_executor.h
+++ b/paddle/fluid/framework/parallel_executor.h
@ -14,14 +14,14 @@ limitations under the License. */
 #pragma once
 #include <paddle/fluid/framework/details/build_strategy.h>
 #include <atomic>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>
 #include "paddle/fluid/framework/details/build_strategy.h"
 #include "paddle/fluid/framework/details/execution_strategy.h"
 #include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/op_info.h"
 #include "paddle/fluid/framework/program_desc.h"
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@ -1,5 +1,5 @@
-set(PYBIND_DEPS pybind python proto_desc memory executor prune  feed_fetch_method)
+set(PYBIND_DEPS pybind python proto_desc memory executor prune  feed_fetch_method pass_builder)
 set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc)
 if(NOT WIN32)
 list(APPEND PYBIND_DEPS parallel_executor profiler)
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@ -25,6 +25,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/executor.h"
 #include "paddle/fluid/framework/feed_fetch_method.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/ir/pass_builder.h"
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
@ -595,6 +596,29 @@ All parameter, weight, gradient are variables in Paddle.
  m.def("is_profiler_enabled", platform::IsProfileEnabled);
  m.def("reset_profiler", platform::ResetProfiler);
  py::class_<ir::Pass, std::shared_ptr<ir::Pass>> pass(m, "Pass");
  pass.def(py::init())
      .def("set_str", [](ir::Pass &self, const std::string &name,
                         const std::string &attr) {
        self.Set<std::string>(name, new std::string(attr));
      });
  py::class_<ir::PassBuilder, std::shared_ptr<ir::PassBuilder>> pb(
      m, "PassBuilder");
  pb.def(py::init())
      .def("append_pass",
           [](ir::PassBuilder &self,
              const std::string &pass_type) -> std::shared_ptr<ir::Pass> {
             return self.AppendPass(pass_type);
           })
      .def("all_passes", [](ir::PassBuilder &self) { return self.AllPasses(); })
      .def("insert_pass",
           [](ir::PassBuilder &self, size_t idx, const std::string &pass_type) {
             return self.InsertPass(idx, pass_type);
           })
      .def("remove_pass",
           [](ir::PassBuilder &self, size_t idx) { self.RemovePass(idx); });
  // -- python binds for parallel executor.
  py::class_<ParallelExecutor> pe(m, "ParallelExecutor");
  py::class_<ExecutionStrategy> exec_strategy(pe, "ExecutionStrategy");
@ -677,7 +701,11 @@ All parameter, weight, gradient are variables in Paddle.
                    },
                    [](BuildStrategy &self, bool b) {
                      self.fuse_elewise_add_act_ops_ = b;
-                    });
+                    })
      .def("_create_passes_from_strategy",
           [](BuildStrategy &self) -> std::shared_ptr<ir::PassBuilder> {
             return self.CreatePassesFromStrategy();
           });
  pe.def(py::init<const std::vector<platform::Place> &,
                  const std::unordered_set<std::string> &,
--- a/python/paddle/fluid/tests/unittests/test_pass_builder.py
+++ b/python/paddle/fluid/tests/unittests/test_pass_builder.py
@ -0,0 +1,121 @@
 # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import numpy as np
 import unittest
 import os
 import sys
 import math
 def simple_fc_net():
    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = img
    for _ in range(4):
        hidden = fluid.layers.fc(
            hidden,
            size=200,
            act='tanh',
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=1.0)))
    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    loss = fluid.layers.mean(loss)
    return loss
 class TestPassBuilder(unittest.TestCase):
    def check_network_convergence(self, use_cuda, build_strategy=None):
        os.environ['CPU_NUM'] = str(4)
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = simple_fc_net()
            test_program = main.clone(for_test=True)
            opt = fluid.optimizer.SGD(learning_rate=0.001)
            opt.minimize(loss)
            batch_size = 32
            image = np.random.normal(size=(batch_size, 784)).astype('float32')
            label = np.random.randint(0, 10, (batch_size, 1), dtype="int64")
            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(startup)
            feed_dict = {'image': image, 'label': label}
            train_exe = fluid.ParallelExecutor(
                use_cuda=use_cuda,
                loss_name=loss.name,
                main_program=main,
                build_strategy=build_strategy)
            test_exe = fluid.ParallelExecutor(
                use_cuda=use_cuda,
                main_program=test_program,
                share_vars_from=train_exe,
                build_strategy=build_strategy)
            for i in range(5):
                test_loss, = test_exe.run([loss.name], feed=feed_dict)
                train_loss, = train_exe.run([loss.name], feed=feed_dict)
                avg_test_loss_val = np.array(test_loss).mean()
                if math.isnan(float(avg_test_loss_val)):
                    sys.exit("got NaN loss, testing failed.")
                avg_train_loss_val = np.array(train_loss).mean()
                if math.isnan(float(avg_train_loss_val)):
                    sys.exit("got NaN loss, training failed.")
                self.assertTrue(
                    np.allclose(
                        train_loss, test_loss, atol=1e-8),
                    "Train loss: " + str(train_loss) + "\n Test loss:" +
                    str(test_loss))
    def test_parallel_testing_with_new_strategy(self):
        build_strategy = fluid.BuildStrategy()
        pass_builder = build_strategy._create_passes_from_strategy()
        origin_len = len(pass_builder.all_passes())
        viz_pass = pass_builder.append_pass("graph_viz_pass")
        self.assertEqual(origin_len + 1, len(pass_builder.all_passes()))
        pass_builder.insert_pass(
            len(pass_builder.all_passes()), "graph_viz_pass")
        self.assertEqual(origin_len + 2, len(pass_builder.all_passes()))
        pass_builder.remove_pass(len(pass_builder.all_passes()) - 1)
        self.assertEqual(origin_len + 1, len(pass_builder.all_passes()))
        viz_pass.set_str("graph_viz_path", "/tmp/test_viz_pass")
        self.check_network_convergence(
            use_cuda=core.is_compiled_with_cuda(),
            build_strategy=build_strategy)
        try:
            os.stat("/tmp/test_viz_pass")
        except os.error:
            self.assertFalse(True)
 if __name__ == '__main__':
    unittest.main()