ParallelExecutor And dependency engine

7 years ago · baef1124fb
parent 8f061e43b7
commit baef1124fb
6 changed files with 433 additions and 23 deletions
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
--- a/paddle/fluid/framework/parallel_executor.h
+++ b/paddle/fluid/framework/parallel_executor.h
@ -28,32 +28,33 @@ limitations under the License. */
 namespace paddle {
 namespace framework {

-struct AllReduceCallBack {
-  void operator()(framework::OperatorBase* op);
-
-  std::unordered_set<std::string> param_grad_names_;
-  platform::DeviceContext dev_ctx;
-};
-
+class ParallelExecutorPrivate;
+class VarHandle;
+class OpHandle;
 class ParallelExecutor {
+ public:
  explicit ParallelExecutor(const std::vector<platform::Place>& places,
-                            const std::unordered_set& params);
-
-  /* @Brief
-   * Runtime evaluation of the given ProgramDesc under certain Scope
-   *
-   * @param
-   *  ProgramDesc
-   *  Scope
-   */
-  void Run(const ProgramDesc& prog, Scope* scope, int block_id,
-           bool create_local_scope = true, bool create_vars = true);
+                            const std::unordered_set<std::string>& params,
+                            const ProgramDesc& startup_program,
+                            const ProgramDesc& main_program,
+                            const std::string& loss_var_name, Scope* scope);
+
+  std::vector<LoDTensor> Run(const std::vector<std::string>& fetch_tensors);

 private:
-  std::vector<framework::Executor> exes_;
-  std::vector<framework::Scope*> scopes_;
-  std::vector<AllReduceCallBack> all_reduce_callbacks_;
-  platform::Communicator nccl_com_;
+  ParallelExecutorPrivate* member_;
+
+  void BCastParamsToGPUs(const ProgramDesc& startup_program) const;
+
+  VarHandle* GetVarHandle(const std::string& each_var_name,
+                          const platform::Place& place) const;
+
+  void GenerateVar(OpHandle* op_handle, const std::string& each_var_name,
+                   const platform::Place& place) const;
+
+  void ConstructDependencyGraph(const std::unordered_set<std::string>& params,
+                                const ProgramDesc& main_program,
+                                const std::string& loss_var_name) const;
 };

 }  // namespace framework
--- a/paddle/fluid/platform/place.h
+++ b/paddle/fluid/platform/place.h
@ -65,6 +65,17 @@ bool is_cpu_place(const Place &);
 bool places_are_same_class(const Place &, const Place &);
 bool is_same_place(const Place &, const Place &);

+struct PlaceHash {
+  std::size_t operator()(const Place &p) const {
+    std::hash<int> ihash;
+    size_t dev_id = 0;
+    if (is_gpu_place(p)) {
+      dev_id = boost::get<CUDAPlace>(p).device;
+    }
+    return ihash(dev_id << 2 | p.which());
+  }
+};
+
 std::ostream &operator<<(std::ostream &, const Place &);

 template <typename Visitor>
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@ -2,6 +2,7 @@ if(WITH_PYTHON)
  cc_library(paddle_pybind SHARED
    SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
    DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
+         parallel_executor
    ${GLOB_OP_LIB})
  if(NOT APPLE AND NOT ANDROID)
    target_link_libraries(paddle_pybind rt)
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@ -25,6 +25,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/lod_rank_table.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/lod_tensor_array.h"
+#include "paddle/fluid/framework/parallel_executor.h"
 #include "paddle/fluid/framework/prune.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/framework/selected_rows.h"
@ -488,6 +489,19 @@ All parameter, weight, gradient are variables in Paddle.
  m.def("disable_profiler", platform::DisableProfiler);
  m.def("reset_profiler", platform::ResetProfiler);

+  py::class_<ParallelExecutor>(m, "ParallelExecutor")
+      .def(
+          "__init__",
+          [](ParallelExecutor &self, const std::vector<platform::Place> &places,
+             const std::unordered_set<std::string> &params,
+             const ProgramDesc &startup_program,
+             const ProgramDesc &main_program, const std::string &loss_var_name,
+             Scope *scope) {
+            new (&self) ParallelExecutor(places, params, startup_program,
+                                         main_program, loss_var_name, scope);
+          })
+      .def("run", [](ParallelExecutor &self) { self.Run({}); });
+
  BindRecordIOWriter(m);
  return m.ptr();
 }
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py
@ -0,0 +1,47 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import paddle.fluid as fluid
+
+
+class ParallelExecutor(unittest.TestCase):
+    def test_main(self):
+        main = fluid.Program()
+        startup = fluid.Program()
+
+        with fluid.program_guard(main, startup):
+            reader = fluid.layers.open_recordio_file(
+                filename='tmp',
+                shapes=[[-1, 784], [-1, 1]],
+                lod_levels=[0, 0],
+                dtypes=['float32', 'int64'])
+            img, label = fluid.layers.read_file(reader)
+            hidden = fluid.layers.fc(img, size=200, act='tanh')
+            prediction = fluid.layers.fc(hidden, size=10, act='softmax')
+            loss = fluid.layers.cross_entropy(input=prediction, label=label)
+            loss = fluid.layers.mean(loss)
+            adam = fluid.optimizer.Adam()
+            adam.minimize(loss)
+        act_places = []
+        for each in [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]:
+            p = fluid.core.Place()
+            p.set_place(each)
+            act_places.append(p)
+
+        exe = fluid.core.ParallelExecutor(
+            act_places,
+            set([p.name for p in main.global_block().iter_parameters()]),
+            startup.desc, main.desc, loss.name, fluid.global_scope())
+        exe.run()