ParallelExecutor And dependency engine

helinwang-patch-1
Yu Yang 7 years ago
parent 8f061e43b7
commit baef1124fb

File diff suppressed because it is too large Load Diff

@ -28,32 +28,33 @@ limitations under the License. */
namespace paddle {
namespace framework {
struct AllReduceCallBack {
void operator()(framework::OperatorBase* op);
std::unordered_set<std::string> param_grad_names_;
platform::DeviceContext dev_ctx;
};
class ParallelExecutorPrivate;
class VarHandle;
class OpHandle;
class ParallelExecutor {
public:
explicit ParallelExecutor(const std::vector<platform::Place>& places,
const std::unordered_set& params);
/* @Brief
* Runtime evaluation of the given ProgramDesc under certain Scope
*
* @param
* ProgramDesc
* Scope
*/
void Run(const ProgramDesc& prog, Scope* scope, int block_id,
bool create_local_scope = true, bool create_vars = true);
const std::unordered_set<std::string>& params,
const ProgramDesc& startup_program,
const ProgramDesc& main_program,
const std::string& loss_var_name, Scope* scope);
std::vector<LoDTensor> Run(const std::vector<std::string>& fetch_tensors);
private:
std::vector<framework::Executor> exes_;
std::vector<framework::Scope*> scopes_;
std::vector<AllReduceCallBack> all_reduce_callbacks_;
platform::Communicator nccl_com_;
ParallelExecutorPrivate* member_;
void BCastParamsToGPUs(const ProgramDesc& startup_program) const;
VarHandle* GetVarHandle(const std::string& each_var_name,
const platform::Place& place) const;
void GenerateVar(OpHandle* op_handle, const std::string& each_var_name,
const platform::Place& place) const;
void ConstructDependencyGraph(const std::unordered_set<std::string>& params,
const ProgramDesc& main_program,
const std::string& loss_var_name) const;
};
} // namespace framework

@ -65,6 +65,17 @@ bool is_cpu_place(const Place &);
bool places_are_same_class(const Place &, const Place &);
bool is_same_place(const Place &, const Place &);
struct PlaceHash {
std::size_t operator()(const Place &p) const {
std::hash<int> ihash;
size_t dev_id = 0;
if (is_gpu_place(p)) {
dev_id = boost::get<CUDAPlace>(p).device;
}
return ihash(dev_id << 2 | p.which());
}
};
std::ostream &operator<<(std::ostream &, const Place &);
template <typename Visitor>

@ -2,6 +2,7 @@ if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
parallel_executor
${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID)
target_link_libraries(paddle_pybind rt)

@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/parallel_executor.h"
#include "paddle/fluid/framework/prune.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
@ -488,6 +489,19 @@ All parameter, weight, gradient are variables in Paddle.
m.def("disable_profiler", platform::DisableProfiler);
m.def("reset_profiler", platform::ResetProfiler);
py::class_<ParallelExecutor>(m, "ParallelExecutor")
.def(
"__init__",
[](ParallelExecutor &self, const std::vector<platform::Place> &places,
const std::unordered_set<std::string> &params,
const ProgramDesc &startup_program,
const ProgramDesc &main_program, const std::string &loss_var_name,
Scope *scope) {
new (&self) ParallelExecutor(places, params, startup_program,
main_program, loss_var_name, scope);
})
.def("run", [](ParallelExecutor &self) { self.Run({}); });
BindRecordIOWriter(m);
return m.ptr();
}

@ -0,0 +1,47 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
class ParallelExecutor(unittest.TestCase):
def test_main(self):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
reader = fluid.layers.open_recordio_file(
filename='tmp',
shapes=[[-1, 784], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'])
img, label = fluid.layers.read_file(reader)
hidden = fluid.layers.fc(img, size=200, act='tanh')
prediction = fluid.layers.fc(hidden, size=10, act='softmax')
loss = fluid.layers.cross_entropy(input=prediction, label=label)
loss = fluid.layers.mean(loss)
adam = fluid.optimizer.Adam()
adam.minimize(loss)
act_places = []
for each in [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]:
p = fluid.core.Place()
p.set_place(each)
act_places.append(p)
exe = fluid.core.ParallelExecutor(
act_places,
set([p.name for p in main.global_block().iter_parameters()]),
startup.desc, main.desc, loss.name, fluid.global_scope())
exe.run()
Loading…
Cancel
Save