From 0621c327f1d0dd272ab7248c50e9afa8ae0fc0c0 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Tue, 13 Mar 2018 23:52:35 +0000 Subject: [PATCH 001/180] init commit --- doc/design/parallel_executor.md | 52 ++++++++++++++++++ paddle/fluid/framework/CMakeLists.txt | 2 + paddle/fluid/framework/executor.cc | 13 +++++ paddle/fluid/framework/executor.h | 1 + paddle/fluid/framework/parallel_executor.cc | 19 +++++++ paddle/fluid/framework/parallel_executor.h | 61 +++++++++++++++++++++ 6 files changed, 148 insertions(+) create mode 100644 doc/design/parallel_executor.md create mode 100644 paddle/fluid/framework/parallel_executor.cc create mode 100644 paddle/fluid/framework/parallel_executor.h diff --git a/doc/design/parallel_executor.md b/doc/design/parallel_executor.md new file mode 100644 index 0000000000..567eede1bd --- /dev/null +++ b/doc/design/parallel_executor.md @@ -0,0 +1,52 @@ +# ParallelExecutor Design Doc + +## Introduction + +We introduce `ParallelExecutor` to run multi-GPU training in PaddlePaddle Fluid. It supports +1. keeping a copy of the parameters on each GPU +1. allreduce on a separate stream allowing computation and communication overlap + +An example of switching single GPU training to multiple GPUs: +```python +cost = your_neural_network() +opt = fluid.optimizer.SGDOptimizer() +opt.minimize(avg_cost) + +# change Executor -> ParallelExecutor +exe = fluid.ParallelExecutor(gpu_list=[0, 1]) + +for iter in xranges(iter_num): + exe.run() +``` + +## Design + +In the constructor, a list of parameter, whose gradients need to be allreduced, is given. + +During the runtime, `ParallelExecutor` starts `#gpu` threads to run each `Executor`. For every +operator run on each GPU, it will automatically sync with different streams when necessary. + +```c++ +// if op's input is params' grad: + // sync with allreduce stream + // e.g. sgd should wait for allreduce to be finished +SyncMultipleStreams(op); + +op->Run(*local_scope, place_); + +// if op's output is params' grad: +// sync with computation stream +// e.g. allreduce shoudl wait for fc_grad to be finished. +SyncMultipleStreams(op); +``` + + +## API + +The `ParallelExecutor.run` has similar interface as `Executor.run`. Besides +1. Scope: we don't expose `scope` in `ParallelExecutor.run` since `ParallelExecutor` has its +own scope to maintain NCCL. +1. Feed: we don't expose `feed` in the API either, because the whole point of implementing +parallel_executor is the speed. The input for NN should be implemented in an reader OP. +1. Fetch: we return the fetched value on all GPUs as a list. (e.g. `exe.run(..., fetch=loss)` +with return `[loss_on_gpu0, loss_on_gpu1]`) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 15e5574ecf..934bb43ffe 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -86,6 +86,8 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) +cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope + framework_proto backward glog lod_rank_table feed_fetch_method executor) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 5cae38b2a8..6ee3f18dd4 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -305,10 +305,23 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, } // if (create_vars) for (auto& op : ctx->ops_) { + // TODO(ty): + // e.g. sgd should wait for allreduce to be finished + // if op's input is params' grad: + // sync with allreduce stream + // SyncMultipleStreams(op); + VLOG(4) << place_ << " " << op->DebugStringEx(local_scope); op->Run(*local_scope, place_); VLOG(3) << place_ << " " << op->DebugStringEx(local_scope); + // TODO(ty): + // e.g. allreduce shoudl wait for fc_grad to be finished. + // if op's output is params' grad: + // sync with computation stream + // apply allreduce on allreduce stream + // SyncMultipleStreams(op); + if (FLAGS_benchmark) { VLOG(2) << "Memory used after operator " + op->Type() + " running: " << memory::memory_usage(place_); diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 28ce331515..8d8a7cf4db 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -47,6 +47,7 @@ class Executor { const std::string& feed_holder_name = "feed", const std::string& fetch_holder_name = "fetch"); + private: static ExecutorPrepareContext* Prepare(const ProgramDesc& program, int block_id); diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc new file mode 100644 index 0000000000..e9f213ae2c --- /dev/null +++ b/paddle/fluid/framework/parallel_executor.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/parallel_executor.h" + +namespace paddle { +namespace framework {} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h new file mode 100644 index 0000000000..47e0005e58 --- /dev/null +++ b/paddle/fluid/framework/parallel_executor.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" + +#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace framework { + +struct AllReduceCallBack { + void operator()(framework::OperatorBase* op); + + std::unordered_set param_grad_names_; + platform::DeviceContext dev_ctx; +}; + +class ParallelExecutor { + explicit ParallelExecutor(const std::vector& places, + const std::unordered_set& params); + + /* @Brief + * Runtime evaluation of the given ProgramDesc under certain Scope + * + * @param + * ProgramDesc + * Scope + */ + void Run(const ProgramDesc& prog, Scope* scope, int block_id, + bool create_local_scope = true, bool create_vars = true); + + private: + std::vector exes_; + std::vector scopes_; + AllReduceCallBack all_reduce_callbacks_; + std::unordered_set params_; // where to initilize it? + platform::Communicator nccl_com_; +}; + +} // namespace framework +} // namespace paddle From e67325cdaf8ce85342dab45b06dbc286c77a5555 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 14 Mar 2018 00:11:32 +0000 Subject: [PATCH 002/180] update readme --- doc/design/parallel_executor.md | 42 +++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/doc/design/parallel_executor.md b/doc/design/parallel_executor.md index 567eede1bd..78ef74f159 100644 --- a/doc/design/parallel_executor.md +++ b/doc/design/parallel_executor.md @@ -30,23 +30,45 @@ operator run on each GPU, it will automatically sync with different streams when // if op's input is params' grad: // sync with allreduce stream // e.g. sgd should wait for allreduce to be finished -SyncMultipleStreams(op); +CallBack->BeforeOp(op); op->Run(*local_scope, place_); // if op's output is params' grad: // sync with computation stream // e.g. allreduce shoudl wait for fc_grad to be finished. -SyncMultipleStreams(op); +CallBack->AfterOp(op); ``` +And the `Callback` object can be implemented as the following -## API +```c++ +struct AllReduceCallBack { + void BeforeOp(framework::OperatorBase* op); + void AfterOp(framework::OperatorBase* op); + + std::unordered_set reduced_param_grad_names; + std::unordered_set param_grad_names_; + + platform::DeviceContext* computation_dev_ctx; // computation device context + platform::DeviceContext* communication_dev_ctx; // communication device context -The `ParallelExecutor.run` has similar interface as `Executor.run`. Besides -1. Scope: we don't expose `scope` in `ParallelExecutor.run` since `ParallelExecutor` has its -own scope to maintain NCCL. -1. Feed: we don't expose `feed` in the API either, because the whole point of implementing -parallel_executor is the speed. The input for NN should be implemented in an reader OP. -1. Fetch: we return the fetched value on all GPUs as a list. (e.g. `exe.run(..., fetch=loss)` -with return `[loss_on_gpu0, loss_on_gpu1]`) + framework::Scope* scope; + platform::NCCL::Communicator* nccl_com; +}; + +AllReduceCallBack::BeforeOp(framework::OperatorBase* op) { + if (op->Input() in reduced_param_grad_names) { + communication_dev_ctx->Wait(); + reduced_param_grad_names.erase(op->Input()) + } +} + +AllReduceCallBack::AfterOp(framework::OperatorBase* op) { + if (op->Output() in param_grad_names) { + computation_dev_ctx->Wait(); + reduced_param_grad_names.insert(op->Output()); + ncclAllreduce(scope, op->Output(), communication_dev_ctx); + } +} +``` From 8f061e43b71b398d37aebc3576e2c2f21d5fae73 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 14 Mar 2018 00:16:11 +0000 Subject: [PATCH 003/180] delete param name --- paddle/fluid/framework/parallel_executor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 47e0005e58..f67b926694 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -52,8 +52,7 @@ class ParallelExecutor { private: std::vector exes_; std::vector scopes_; - AllReduceCallBack all_reduce_callbacks_; - std::unordered_set params_; // where to initilize it? + std::vector all_reduce_callbacks_; platform::Communicator nccl_com_; }; From baef1124fb4cc8876a0119af34ca1500df682f9d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 14 Mar 2018 21:13:29 +0800 Subject: [PATCH 004/180] ParallelExecutor And dependency engine --- paddle/fluid/framework/parallel_executor.cc | 338 +++++++++++++++++- paddle/fluid/framework/parallel_executor.h | 45 +-- paddle/fluid/platform/place.h | 11 + paddle/fluid/pybind/CMakeLists.txt | 1 + paddle/fluid/pybind/pybind.cc | 14 + .../tests/unittests/test_parallel_executor.py | 47 +++ 6 files changed, 433 insertions(+), 23 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_parallel_executor.py diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index e9f213ae2c..7488458743 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -13,7 +13,343 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" +#include "lod_tensor.h" +#include "op_registry.h" namespace paddle { -namespace framework {} // namespace framework +namespace framework { + +struct OpHandle; + +struct VarHandle { + size_t version_; + std::string name_; + platform::Place place_; + + OpHandle *generated_op_; + std::vector deps_ops_; +}; + +struct OpHandle { + std::vector inputs_; + std::vector outputs_; + platform::DeviceContext *dev_ctx_; + + std::string DebugString() { + std::stringstream ss; + ss << "("; + for (auto *var : inputs_) { + ss << var->name_ << ":" << var->place_ << ", "; + } + ss << ") --> ("; + for (auto *var : outputs_) { + ss << var->name_ << ":" << var->place_ << ", "; + } + ss << ")\n"; + return ss.str(); + } + + virtual ~OpHandle() {} +}; + +struct ComputationOpHandle : public OpHandle { + std::unique_ptr op_; + + explicit ComputationOpHandle(const OpDesc &op_desc) + : op_(framework::OpRegistry::CreateOp(op_desc)) {} +}; + +struct ScaleLossGradOpHandle : public OpHandle {}; + +struct NCCLAllReduceOpHandle : public OpHandle {}; + +class ParallelExecutorPrivate { + public: + std::unordered_map + local_scopes_; + std::unordered_map + dev_ctxs_; + platform::Place main_place_; + + std::unordered_map>, + platform::PlaceHash> + vars_; + std::vector> ops_; +}; + +// TODO(yy): Move this function somewhere +ncclDataType_t ToNCCLDataType(std::type_index type) { + // FIXME!! + return ncclFloat; +} + +ParallelExecutor::ParallelExecutor( + const std::vector &places, + const std::unordered_set ¶ms, + const ProgramDesc &startup_program, const ProgramDesc &main_program, + const std::string &loss_var_name, Scope *scope) + : member_(new ParallelExecutorPrivate()) { + // Step 1. RunStartupProgram and Bcast the params to devs. + Executor exe(places[0]); + exe.Run(startup_program, scope, 0); + // Create local scopes + for (auto &place : places) { + member_->local_scopes_[place] = &scope->NewScope(); + } + member_->main_place_ = places[0]; + + // Bcast Parameters to all GPUs + if (platform::is_gpu_place(member_->main_place_)) { // Is CUDA + // BCastParamsToGPUs(startup_program); + } + // Startup Program has been run. All local scopes has correct parameters. + + // Step 2. Convert main_program to SSA form and dependency graph. Also, insert + // ncclOp + ConstructDependencyGraph(params, main_program, loss_var_name); +} + +void ParallelExecutor::ConstructDependencyGraph( + const std::unordered_set ¶ms, + const ProgramDesc &main_program, const std::string &loss_var_name) const { + std::unordered_set grads; + for (auto &each_param : params) { + grads.insert(each_param + "@GRAD"); + } + + bool is_forwarding = true; + for (auto *op : main_program.Block(0).AllOps()) { + bool change_forward = false; + + if (!is_forwarding) { + // FIXME(yy): Do not hard code like this + if (op->OutputArgumentNames().size() == 1 && + op->OutputArgumentNames()[0] == loss_var_name + "@GRAD") { + continue; // Drop fill 1. for backward coeff; + } + } + + for (auto &pair : member_->local_scopes_) { + member_->ops_.emplace_back(new ComputationOpHandle(*op)); + auto *op_handle = member_->ops_.back().get(); + + auto var_names = op->InputArgumentNames(); + + for (auto &each_var_name : var_names) { + auto &place = pair.first; + VarHandle *var = GetVarHandle(each_var_name, place); + op_handle->inputs_.emplace_back(var); + var->deps_ops_.emplace_back(op_handle); + } + var_names = op->OutputArgumentNames(); + + for (auto &each_var_name : var_names) { + auto &place = pair.first; + GenerateVar(op_handle, each_var_name, place); + } + + if (is_forwarding) { + if (var_names.size() == 1 && var_names[0] == loss_var_name) { + // Insert ScaleCost OpHandle + member_->ops_.emplace_back(new ScaleLossGradOpHandle()); + + op_handle = member_->ops_.back().get(); + auto &place = pair.first; + VarHandle *loss = GetVarHandle(loss_var_name, place); + loss->deps_ops_.emplace_back(op_handle); + op_handle->inputs_.emplace_back(loss); + GenerateVar(op_handle, loss_var_name + "@GRAD", place); + change_forward = true; + LOG(INFO) << "Scale Loss " << op_handle->DebugString(); + } + } + } + + if (change_forward) { + is_forwarding = false; + } + + if (!is_forwarding) { + auto var_names = op->OutputArgumentNames(); + for (auto &og : var_names) { + if (grads.count(og) != 0) { // is param grad + // Insert NCCL AllReduce Op + member_->ops_.emplace_back(new NCCLAllReduceOpHandle()); + auto *op_handle = member_->ops_.back().get(); + + for (auto &pair : member_->local_scopes_) { + auto &place = pair.first; + auto &vars = member_->vars_[place][og]; + + if (vars.empty()) { // This device has no data. continue. + continue; + } + auto *prev_grad = &vars[vars.size() - 1]; + op_handle->inputs_.emplace_back(prev_grad); + prev_grad->deps_ops_.emplace_back(op_handle); + auto &var = vars[vars.size()]; + var.place_ = place; + var.generated_op_ = op_handle; + var.name_ = og; + var.version_ = vars.size() - 1; + op_handle->outputs_.emplace_back(&var); + } + } + } + } + } +} + +void ParallelExecutor::GenerateVar(OpHandle *op_handle, + const std::string &each_var_name, + const platform::Place &place) const { + auto &vars = member_->vars_[place][each_var_name]; + size_t version = vars.size(); + auto &var = vars[version]; + var.version_ = version; + var.generated_op_ = op_handle; + var.name_ = each_var_name; + var.place_ = place; + op_handle->outputs_.emplace_back(&var); +} + +VarHandle *ParallelExecutor::GetVarHandle(const std::string &each_var_name, + const platform::Place &place) const { + auto &var_holders = member_->vars_[place]; + auto &var_holder = var_holders[each_var_name]; + VarHandle *var = nullptr; + if (var_holder.empty()) { + auto &init_var = var_holder[0]; + init_var.place_ = place; + init_var.name_ = each_var_name; + init_var.generated_op_ = nullptr; + init_var.version_ = 0; + var = &init_var; + } else { + var = &var_holder.rbegin()->second; + } + return var; +} + +void ParallelExecutor::BCastParamsToGPUs( + const ProgramDesc &startup_program) const { + auto *main_scope = member_->local_scopes_[member_->main_place_]; + for (auto *var_desc : startup_program.Block(0).AllVars()) { + if (var_desc->GetType() == proto::VarType::LOD_TENSOR) { + auto &main_tensor = + main_scope->FindVar(var_desc->Name())->Get(); + + ncclDataType_t data_type = ToNCCLDataType(main_tensor.type()); + auto &dims = main_tensor.dims(); + size_t numel = main_tensor.numel(); + std::vector> mems; + mems.emplace_back( + const_cast(main_tensor.data()), + new platform::CUDADeviceContext( + boost::get(member_->main_place_))); + + for (auto &pair : member_->local_scopes_) { + if (pair.first == member_->main_place_) { + continue; + } + + auto local_scope = pair.second; + auto *t = local_scope->Var(var_desc->Name())->GetMutable(); + t->Resize(dims); + mems.emplace_back(t->mutable_data(pair.first, main_tensor.type()), + new platform::CUDADeviceContext( + boost::get(pair.first))); + } + + // TODO(yy): Invoke ncclBCast here. mems, numel, data_type. The mems[0] + // is the src, rests are dests. + + (void)(data_type); + (void)(numel); + + // Free Communication Ctx + for (auto &pair : mems) { + // Release Communication Ctx + + // FIXME: Store CUDA DevCtx to member. Since NCCL All Reduce will use + // this + delete pair.second; + } + } + } +} + +std::vector ParallelExecutor::Run( + const std::vector &fetch_tensors) { + // Version --> VarHandle + std::unordered_set pending_vars; + std::unordered_map pending_ops; + + for (auto &place_pair : member_->vars_) { + for (auto &name_pair : place_pair.second) { + for (auto &version_pair : name_pair.second) { + pending_vars.insert(&version_pair.second); + } + } + } + + for (auto &op : member_->ops_) { + pending_ops.insert({op.get(), op->inputs_.size()}); + } + + std::unordered_set complete_op; + + size_t num_op = pending_ops.size(); + + while (complete_op.size() != num_op) { + std::vector to_remove; + for (auto &var : pending_vars) { + if (var->generated_op_ == nullptr || + complete_op.count(var->generated_op_) != 0) { + to_remove.push_back(var); + } + } + for (auto *var : to_remove) { + pending_vars.erase(var); + } + + std::vector to_run; + for (auto *var : to_remove) { + for (auto *op : var->deps_ops_) { + if (var->name_ == "mean_0.tmp_0@GRAD") { + LOG(INFO) << op->DebugString(); + } + auto &num = pending_ops[op]; + --num; + if (num == 0) { + to_run.emplace_back(op); + } + } + } + + for (auto *op : to_run) { + pending_ops.erase(op); + complete_op.insert(op); + } + + if (to_run.empty()) break; + + // TODO(yy): Use thead pool to run OpHandle. Operators in ToRun can be + // paralleled. We can also use another schedule method. Just a demo here. + + std::stringstream ss; + ss << "\n"; + for (auto *op : to_run) { + ss << op->DebugString() << "\n"; + } + ss << std::endl; + LOG(INFO) << ss.str(); + } + + PADDLE_ENFORCE_EQ(complete_op.size(), num_op); + return std::vector(); +} +} // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index f67b926694..ec80f89f0e 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -28,32 +28,33 @@ limitations under the License. */ namespace paddle { namespace framework { -struct AllReduceCallBack { - void operator()(framework::OperatorBase* op); - - std::unordered_set param_grad_names_; - platform::DeviceContext dev_ctx; -}; - +class ParallelExecutorPrivate; +class VarHandle; +class OpHandle; class ParallelExecutor { + public: explicit ParallelExecutor(const std::vector& places, - const std::unordered_set& params); - - /* @Brief - * Runtime evaluation of the given ProgramDesc under certain Scope - * - * @param - * ProgramDesc - * Scope - */ - void Run(const ProgramDesc& prog, Scope* scope, int block_id, - bool create_local_scope = true, bool create_vars = true); + const std::unordered_set& params, + const ProgramDesc& startup_program, + const ProgramDesc& main_program, + const std::string& loss_var_name, Scope* scope); + + std::vector Run(const std::vector& fetch_tensors); private: - std::vector exes_; - std::vector scopes_; - std::vector all_reduce_callbacks_; - platform::Communicator nccl_com_; + ParallelExecutorPrivate* member_; + + void BCastParamsToGPUs(const ProgramDesc& startup_program) const; + + VarHandle* GetVarHandle(const std::string& each_var_name, + const platform::Place& place) const; + + void GenerateVar(OpHandle* op_handle, const std::string& each_var_name, + const platform::Place& place) const; + + void ConstructDependencyGraph(const std::unordered_set& params, + const ProgramDesc& main_program, + const std::string& loss_var_name) const; }; } // namespace framework diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h index 501bddfc6e..633251eb47 100644 --- a/paddle/fluid/platform/place.h +++ b/paddle/fluid/platform/place.h @@ -65,6 +65,17 @@ bool is_cpu_place(const Place &); bool places_are_same_class(const Place &, const Place &); bool is_same_place(const Place &, const Place &); +struct PlaceHash { + std::size_t operator()(const Place &p) const { + std::hash ihash; + size_t dev_id = 0; + if (is_gpu_place(p)) { + dev_id = boost::get(p).device; + } + return ihash(dev_id << 2 | p.which()); + } +}; + std::ostream &operator<<(std::ostream &, const Place &); template diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 8942b5c943..ecf9e47884 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -2,6 +2,7 @@ if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method + parallel_executor ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) target_link_libraries(paddle_pybind rt) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d2e883cacc..8b752c4efb 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -25,6 +25,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" +#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/prune.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" @@ -488,6 +489,19 @@ All parameter, weight, gradient are variables in Paddle. m.def("disable_profiler", platform::DisableProfiler); m.def("reset_profiler", platform::ResetProfiler); + py::class_(m, "ParallelExecutor") + .def( + "__init__", + [](ParallelExecutor &self, const std::vector &places, + const std::unordered_set ¶ms, + const ProgramDesc &startup_program, + const ProgramDesc &main_program, const std::string &loss_var_name, + Scope *scope) { + new (&self) ParallelExecutor(places, params, startup_program, + main_program, loss_var_name, scope); + }) + .def("run", [](ParallelExecutor &self) { self.Run({}); }); + BindRecordIOWriter(m); return m.ptr(); } diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py new file mode 100644 index 0000000000..2b41b2c9b4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -0,0 +1,47 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid + + +class ParallelExecutor(unittest.TestCase): + def test_main(self): + main = fluid.Program() + startup = fluid.Program() + + with fluid.program_guard(main, startup): + reader = fluid.layers.open_recordio_file( + filename='tmp', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + img, label = fluid.layers.read_file(reader) + hidden = fluid.layers.fc(img, size=200, act='tanh') + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + adam = fluid.optimizer.Adam() + adam.minimize(loss) + act_places = [] + for each in [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]: + p = fluid.core.Place() + p.set_place(each) + act_places.append(p) + + exe = fluid.core.ParallelExecutor( + act_places, + set([p.name for p in main.global_block().iter_parameters()]), + startup.desc, main.desc, loss.name, fluid.global_scope()) + exe.run() From 692a0f7425064f5e44179be6daf49062d50ffc2a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 14 Mar 2018 21:17:42 +0800 Subject: [PATCH 005/180] Better name --- paddle/fluid/framework/parallel_executor.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 7488458743..46fb15f580 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -27,7 +27,8 @@ struct VarHandle { platform::Place place_; OpHandle *generated_op_; - std::vector deps_ops_; + + std::vector pending_ops_; }; struct OpHandle { @@ -141,7 +142,7 @@ void ParallelExecutor::ConstructDependencyGraph( auto &place = pair.first; VarHandle *var = GetVarHandle(each_var_name, place); op_handle->inputs_.emplace_back(var); - var->deps_ops_.emplace_back(op_handle); + var->pending_ops_.emplace_back(op_handle); } var_names = op->OutputArgumentNames(); @@ -158,7 +159,7 @@ void ParallelExecutor::ConstructDependencyGraph( op_handle = member_->ops_.back().get(); auto &place = pair.first; VarHandle *loss = GetVarHandle(loss_var_name, place); - loss->deps_ops_.emplace_back(op_handle); + loss->pending_ops_.emplace_back(op_handle); op_handle->inputs_.emplace_back(loss); GenerateVar(op_handle, loss_var_name + "@GRAD", place); change_forward = true; @@ -188,7 +189,7 @@ void ParallelExecutor::ConstructDependencyGraph( } auto *prev_grad = &vars[vars.size() - 1]; op_handle->inputs_.emplace_back(prev_grad); - prev_grad->deps_ops_.emplace_back(op_handle); + prev_grad->pending_ops_.emplace_back(op_handle); auto &var = vars[vars.size()]; var.place_ = place; var.generated_op_ = op_handle; @@ -317,7 +318,7 @@ std::vector ParallelExecutor::Run( std::vector to_run; for (auto *var : to_remove) { - for (auto *op : var->deps_ops_) { + for (auto *op : var->pending_ops_) { if (var->name_ == "mean_0.tmp_0@GRAD") { LOG(INFO) << op->DebugString(); } From ae88fdefb7deff02a83ca5fe4eb8d4b17b2173e0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 15 Mar 2018 14:51:01 +0800 Subject: [PATCH 006/180] Use thread pool --- paddle/fluid/framework/parallel_executor.cc | 77 +++++++++++---------- paddle/fluid/framework/threadpool.h | 4 +- 2 files changed, 41 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 46fb15f580..dd726f1fab 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" #include "lod_tensor.h" #include "op_registry.h" +#include "threadpool.h" namespace paddle { namespace framework { @@ -34,7 +35,6 @@ struct VarHandle { struct OpHandle { std::vector inputs_; std::vector outputs_; - platform::DeviceContext *dev_ctx_; std::string DebugString() { std::stringstream ss; @@ -66,6 +66,9 @@ struct NCCLAllReduceOpHandle : public OpHandle {}; class ParallelExecutorPrivate { public: + explicit ParallelExecutorPrivate(size_t num_threads = 12) + : pool_(num_threads) {} + std::unordered_map local_scopes_; std::unordered_map vars_; std::vector> ops_; + + ThreadPool pool_; }; // TODO(yy): Move this function somewhere @@ -285,13 +290,15 @@ void ParallelExecutor::BCastParamsToGPUs( std::vector ParallelExecutor::Run( const std::vector &fetch_tensors) { // Version --> VarHandle - std::unordered_set pending_vars; + + std::unordered_map pending_vars; std::unordered_map pending_ops; for (auto &place_pair : member_->vars_) { for (auto &name_pair : place_pair.second) { for (auto &version_pair : name_pair.second) { - pending_vars.insert(&version_pair.second); + pending_vars[&version_pair.second] = + version_pair.second.generated_op_ == nullptr; } } } @@ -300,56 +307,50 @@ std::vector ParallelExecutor::Run( pending_ops.insert({op.get(), op->inputs_.size()}); } - std::unordered_set complete_op; - - size_t num_op = pending_ops.size(); - - while (complete_op.size() != num_op) { - std::vector to_remove; - for (auto &var : pending_vars) { - if (var->generated_op_ == nullptr || - complete_op.count(var->generated_op_) != 0) { - to_remove.push_back(var); + while (!pending_ops.empty()) { + VarHandle *ready_var = nullptr; + for (auto &pair : pending_vars) { + if (pair.second) { + ready_var = pair.first; } } - for (auto *var : to_remove) { - pending_vars.erase(var); + + if (ready_var == nullptr) { + member_->pool_.Wait(); // Wait thread pool; + continue; } + pending_vars.erase(ready_var); + std::vector to_run; - for (auto *var : to_remove) { - for (auto *op : var->pending_ops_) { - if (var->name_ == "mean_0.tmp_0@GRAD") { - LOG(INFO) << op->DebugString(); - } - auto &num = pending_ops[op]; - --num; - if (num == 0) { - to_run.emplace_back(op); - } + + for (auto *op : ready_var->pending_ops_) { + auto &deps = pending_ops[op]; + --deps; + if (deps == 0) { + to_run.emplace_back(op); } } for (auto *op : to_run) { pending_ops.erase(op); - complete_op.insert(op); - } - if (to_run.empty()) break; + std::vector ready_buffer; + for (auto *var : op->outputs_) { + ready_buffer.emplace_back(&pending_vars[var]); + } - // TODO(yy): Use thead pool to run OpHandle. Operators in ToRun can be - // paralleled. We can also use another schedule method. Just a demo here. + auto op_run = [ready_buffer, op] { + // TODO(yy) Check Previous Op has same dev ctx. + LOG(INFO) << "Run " << op->DebugString(); + for (auto *ready : ready_buffer) { + *ready = true; + } + }; - std::stringstream ss; - ss << "\n"; - for (auto *op : to_run) { - ss << op->DebugString() << "\n"; + member_->pool_.Run(op_run); } - ss << std::endl; - LOG(INFO) << ss.str(); } - - PADDLE_ENFORCE_EQ(complete_op.size(), num_op); return std::vector(); } } // namespace framework diff --git a/paddle/fluid/framework/threadpool.h b/paddle/fluid/framework/threadpool.h index df51fb24a5..f9dce7105e 100644 --- a/paddle/fluid/framework/threadpool.h +++ b/paddle/fluid/framework/threadpool.h @@ -32,6 +32,8 @@ namespace framework { // number of threads. class ThreadPool { public: + explicit ThreadPool(int num_threads); + using Task = std::packaged_task()>; // Returns the singleton of ThreadPool. @@ -103,8 +105,6 @@ class ThreadPool { DISABLE_COPY_AND_ASSIGN(ThreadPool); - explicit ThreadPool(int num_threads); - // If the task queue is empty and avaialbe is equal to the number of // threads, means that all tasks are completed. Note: this function // is not thread-safe. Returns true if all tasks are completed. From 22bb262a75d2b6ed71b9828ae0cfa4a621967c8a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 15 Mar 2018 14:51:38 +0800 Subject: [PATCH 007/180] Remove out of date design --- doc/design/parallel_executor.md | 74 --------------------------------- 1 file changed, 74 deletions(-) delete mode 100644 doc/design/parallel_executor.md diff --git a/doc/design/parallel_executor.md b/doc/design/parallel_executor.md deleted file mode 100644 index 78ef74f159..0000000000 --- a/doc/design/parallel_executor.md +++ /dev/null @@ -1,74 +0,0 @@ -# ParallelExecutor Design Doc - -## Introduction - -We introduce `ParallelExecutor` to run multi-GPU training in PaddlePaddle Fluid. It supports -1. keeping a copy of the parameters on each GPU -1. allreduce on a separate stream allowing computation and communication overlap - -An example of switching single GPU training to multiple GPUs: -```python -cost = your_neural_network() -opt = fluid.optimizer.SGDOptimizer() -opt.minimize(avg_cost) - -# change Executor -> ParallelExecutor -exe = fluid.ParallelExecutor(gpu_list=[0, 1]) - -for iter in xranges(iter_num): - exe.run() -``` - -## Design - -In the constructor, a list of parameter, whose gradients need to be allreduced, is given. - -During the runtime, `ParallelExecutor` starts `#gpu` threads to run each `Executor`. For every -operator run on each GPU, it will automatically sync with different streams when necessary. - -```c++ -// if op's input is params' grad: - // sync with allreduce stream - // e.g. sgd should wait for allreduce to be finished -CallBack->BeforeOp(op); - -op->Run(*local_scope, place_); - -// if op's output is params' grad: -// sync with computation stream -// e.g. allreduce shoudl wait for fc_grad to be finished. -CallBack->AfterOp(op); -``` - -And the `Callback` object can be implemented as the following - -```c++ -struct AllReduceCallBack { - void BeforeOp(framework::OperatorBase* op); - void AfterOp(framework::OperatorBase* op); - - std::unordered_set reduced_param_grad_names; - std::unordered_set param_grad_names_; - - platform::DeviceContext* computation_dev_ctx; // computation device context - platform::DeviceContext* communication_dev_ctx; // communication device context - - framework::Scope* scope; - platform::NCCL::Communicator* nccl_com; -}; - -AllReduceCallBack::BeforeOp(framework::OperatorBase* op) { - if (op->Input() in reduced_param_grad_names) { - communication_dev_ctx->Wait(); - reduced_param_grad_names.erase(op->Input()) - } -} - -AllReduceCallBack::AfterOp(framework::OperatorBase* op) { - if (op->Output() in param_grad_names) { - computation_dev_ctx->Wait(); - reduced_param_grad_names.insert(op->Output()); - ncclAllreduce(scope, op->Output(), communication_dev_ctx); - } -} -``` From 35744e7b36f3c7202080feeabc0d8f207839b2e1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 15 Mar 2018 16:30:16 +0800 Subject: [PATCH 008/180] Polish code --- paddle/fluid/framework/parallel_executor.cc | 100 ++++++++++++++---- paddle/fluid/framework/parallel_executor.h | 2 + .../tests/unittests/test_parallel_executor.py | 2 +- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index dd726f1fab..7af5cc075c 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -20,6 +20,12 @@ limitations under the License. */ namespace paddle { namespace framework { +#ifdef PADDLE_WITH_CUDA + +// FIXME: CHECK the return value of x; +#define NCCL_INVOKE(x) x +#endif + struct OpHandle; struct VarHandle { @@ -71,9 +77,51 @@ class ParallelExecutorPrivate { std::unordered_map local_scopes_; - std::unordered_map - dev_ctxs_; + +#ifdef PADDLE_WITH_CUDA + struct NCCLContext { + std::unique_ptr ctx_; + ncclComm_t comm; + + explicit NCCLContext(int dev_id) { + ctx_.reset(new platform::CUDADeviceContext(platform::CUDAPlace(dev_id))); + } + + cudaStream_t stream() const { return ctx_->stream(); } + + int device_id() const { + return boost::get(ctx_->GetPlace()).device; + } + + static void InitNCCLContext(std::map &contexts) { + std::vector comms; + std::vector devs; + comms.resize(contexts.size()); + devs.reserve(contexts.size()); + + for (auto &ctx : contexts) { + devs.push_back(ctx.first); + } + + NCCL_INVOKE(platform::dynload::ncclCommInitAll( + &comms[0], static_cast(contexts.size()), &devs[0])); + + int i = 0; + for (auto &ctx : contexts) { + ctx.second.comm = comms[i++]; + } + } + }; + + std::map communication_streams_; + + NCCLContext &GetNCCLCtx(platform::Place p) { + int dev_id = boost::get(p).device; + return communication_streams_.at(dev_id); + } + +#endif + platform::Place main_place_; std::unordered_mapmain_place_ = places[0]; // Bcast Parameters to all GPUs - if (platform::is_gpu_place(member_->main_place_)) { // Is CUDA - // BCastParamsToGPUs(startup_program); + if (platform::is_gpu_place(member_->main_place_) && + member_->local_scopes_.size() != 1) { // Is CUDA + BuildNCCLCommunicator(); + BCastParamsToGPUs(startup_program); } // Startup Program has been run. All local scopes has correct parameters. @@ -241,20 +291,20 @@ VarHandle *ParallelExecutor::GetVarHandle(const std::string &each_var_name, void ParallelExecutor::BCastParamsToGPUs( const ProgramDesc &startup_program) const { +#ifdef PADDLE_WITH_CUDA auto *main_scope = member_->local_scopes_[member_->main_place_]; + for (auto *var_desc : startup_program.Block(0).AllVars()) { if (var_desc->GetType() == proto::VarType::LOD_TENSOR) { auto &main_tensor = main_scope->FindVar(var_desc->Name())->Get(); - ncclDataType_t data_type = ToNCCLDataType(main_tensor.type()); auto &dims = main_tensor.dims(); size_t numel = main_tensor.numel(); - std::vector> mems; - mems.emplace_back( - const_cast(main_tensor.data()), - new platform::CUDADeviceContext( - boost::get(member_->main_place_))); + std::vector> + mems; + mems.emplace_back(const_cast(main_tensor.data()), + &member_->GetNCCLCtx(member_->main_place_)); for (auto &pair : member_->local_scopes_) { if (pair.first == member_->main_place_) { @@ -265,8 +315,7 @@ void ParallelExecutor::BCastParamsToGPUs( auto *t = local_scope->Var(var_desc->Name())->GetMutable(); t->Resize(dims); mems.emplace_back(t->mutable_data(pair.first, main_tensor.type()), - new platform::CUDADeviceContext( - boost::get(pair.first))); + &member_->GetNCCLCtx(member_->main_place_)); } // TODO(yy): Invoke ncclBCast here. mems, numel, data_type. The mems[0] @@ -274,17 +323,26 @@ void ParallelExecutor::BCastParamsToGPUs( (void)(data_type); (void)(numel); + } + } +#else + PADDLE_THROW("Not compiled with CUDA"); +#endif +} - // Free Communication Ctx - for (auto &pair : mems) { - // Release Communication Ctx +void ParallelExecutor::BuildNCCLCommunicator() const { +#ifdef PADDLE_WITH_CUDA + for (auto &place_pair : member_->local_scopes_) { + auto place = place_pair.first; + int dev_id = boost::get(place).device; - // FIXME: Store CUDA DevCtx to member. Since NCCL All Reduce will use - // this - delete pair.second; - } - } + member_->communication_streams_.emplace( + dev_id, ParallelExecutorPrivate::NCCLContext(dev_id)); } + + ParallelExecutorPrivate::NCCLContext::InitNCCLContext( + member_->communication_streams_); +#endif } std::vector ParallelExecutor::Run( diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index ec80f89f0e..805b7e5aa9 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -55,6 +55,8 @@ class ParallelExecutor { void ConstructDependencyGraph(const std::unordered_set& params, const ProgramDesc& main_program, const std::string& loss_var_name) const; + + void BuildNCCLCommunicator() const; }; } // namespace framework diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 2b41b2c9b4..65b43448a4 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -35,7 +35,7 @@ class ParallelExecutor(unittest.TestCase): adam = fluid.optimizer.Adam() adam.minimize(loss) act_places = [] - for each in [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]: + for each in [fluid.CUDAPlace(0)]: p = fluid.core.Place() p.set_place(each) act_places.append(p) From 193c0a7e4333ca7e403089ef1f9e66c79d56c68a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 15 Mar 2018 17:27:42 +0800 Subject: [PATCH 009/180] Handle var hazard --- paddle/fluid/framework/parallel_executor.cc | 137 +++++++++++++++++--- 1 file changed, 121 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 7af5cc075c..e98fedb68d 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -28,42 +28,79 @@ namespace framework { struct OpHandle; -struct VarHandle { +struct VarHandleBase { + virtual ~VarHandleBase() {} + virtual std::string DebugString() const = 0; + + OpHandle *generated_op_; + std::vector pending_ops_; +}; + +struct VarHandle : public VarHandleBase { + std::string DebugString() const override { + std::stringstream ss; + ss << name_ << ":" << place_; + return ss.str(); + } + size_t version_; std::string name_; platform::Place place_; +}; - OpHandle *generated_op_; - - std::vector pending_ops_; +struct DependencyVarHandle : public VarHandleBase { + std::string DebugString() const override { return "Deps var"; } }; struct OpHandle { - std::vector inputs_; - std::vector outputs_; + std::vector inputs_; + std::vector outputs_; + std::unordered_map + dev_ctx_; std::string DebugString() { std::stringstream ss; ss << "("; for (auto *var : inputs_) { - ss << var->name_ << ":" << var->place_ << ", "; + ss << var->DebugString() << ", "; } ss << ") --> ("; for (auto *var : outputs_) { - ss << var->name_ << ":" << var->place_ << ", "; + ss << var->DebugString() << ", "; } ss << ")\n"; return ss.str(); } virtual ~OpHandle() {} + + virtual void Run() {} + virtual void Wait() {} }; struct ComputationOpHandle : public OpHandle { std::unique_ptr op_; + Scope *scope_; + platform::Place place_; - explicit ComputationOpHandle(const OpDesc &op_desc) - : op_(framework::OpRegistry::CreateOp(op_desc)) {} + explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, + platform::Place place) + : op_(framework::OpRegistry::CreateOp(op_desc)), + scope_(scope), + place_(place) {} + + void Run() override { + // Wait other op if necessary + auto *cur_ctx = dev_ctx_[place_]; + for (auto *in : inputs_) { + if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { + in->generated_op_->Wait(); + } + } + + op_->Run(*scope_, place_); + } }; struct ScaleLossGradOpHandle : public OpHandle {}; @@ -122,12 +159,27 @@ class ParallelExecutorPrivate { #endif + platform::DeviceContext *CommunicationDevCtx(const platform::Place &place) { + if (platform::is_cpu_place(place) || local_scopes_.size() == 1) { + return const_cast( + platform::DeviceContextPool::Instance().Get(place)); + } else { +#ifdef PADDLE_WITH_CUDA + return GetNCCLCtx(place).ctx_.get(); +#else + PADDLE_THROW("Not compiled with CUDA") +#endif + } + } + platform::Place main_place_; std::unordered_map>, platform::PlaceHash> vars_; + std::unordered_set> dep_vars_; + std::vector> ops_; ThreadPool pool_; @@ -170,7 +222,7 @@ ParallelExecutor::ParallelExecutor( void ParallelExecutor::ConstructDependencyGraph( const std::unordered_set ¶ms, const ProgramDesc &main_program, const std::string &loss_var_name) const { - std::unordered_set grads; + std::unordered_set grads; for (auto &each_param : params) { grads.insert(each_param + "@GRAD"); } @@ -188,8 +240,11 @@ void ParallelExecutor::ConstructDependencyGraph( } for (auto &pair : member_->local_scopes_) { - member_->ops_.emplace_back(new ComputationOpHandle(*op)); + member_->ops_.emplace_back( + new ComputationOpHandle(*op, pair.second, pair.first)); auto *op_handle = member_->ops_.back().get(); + op_handle->dev_ctx_[pair.first] = const_cast( + platform::DeviceContextPool::Instance().Get(pair.first)); auto var_names = op->InputArgumentNames(); @@ -210,8 +265,11 @@ void ParallelExecutor::ConstructDependencyGraph( if (var_names.size() == 1 && var_names[0] == loss_var_name) { // Insert ScaleCost OpHandle member_->ops_.emplace_back(new ScaleLossGradOpHandle()); - op_handle = member_->ops_.back().get(); + + op_handle->dev_ctx_[pair.first] = + member_->CommunicationDevCtx(pair.first); + auto &place = pair.first; VarHandle *loss = GetVarHandle(loss_var_name, place); loss->pending_ops_.emplace_back(op_handle); @@ -251,11 +309,54 @@ void ParallelExecutor::ConstructDependencyGraph( var.name_ = og; var.version_ = vars.size() - 1; op_handle->outputs_.emplace_back(&var); + + for (auto &pair : member_->local_scopes_) { + op_handle->dev_ctx_[pair.first] = + member_->CommunicationDevCtx(pair.first); + } } } } } } + + /** + * Dependency graph has been constructed. However, there are still data + * harzaeds need to be handled. + * + * We only handle write after read(WAR), since it should not have a write + * after write in program. If there are write after write operators, we need + * prune them. + * + * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) + */ + + for (auto &place_pair : member_->vars_) { + for (auto &name_pair : place_pair.second) { + if (name_pair.second.size() <= 1) { + return; + } + auto it_new = name_pair.second.rbegin(); + auto it_old = name_pair.second.rbegin(); + ++it_old; + for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { + auto *write_op = it_new->second.generated_op_; + auto &read_ops = it_old->second.pending_ops_; + + for (auto *read_op : read_ops) { + // Manually add a dependency var from read_op to write_op; + + auto *dep_var = new DependencyVarHandle(); + dep_var->generated_op_ = read_op; + read_op->outputs_.emplace_back(dep_var); + + dep_var->pending_ops_.emplace_back(write_op); + write_op->inputs_.emplace_back(dep_var); + member_->dep_vars_.emplace(dep_var); + } + } + } + } } void ParallelExecutor::GenerateVar(OpHandle *op_handle, @@ -349,7 +450,7 @@ std::vector ParallelExecutor::Run( const std::vector &fetch_tensors) { // Version --> VarHandle - std::unordered_map pending_vars; + std::unordered_map pending_vars; std::unordered_map pending_ops; for (auto &place_pair : member_->vars_) { @@ -361,12 +462,16 @@ std::vector ParallelExecutor::Run( } } + for (auto &var : member_->dep_vars_) { + pending_vars[var.get()] = var->generated_op_ == nullptr; + } + for (auto &op : member_->ops_) { pending_ops.insert({op.get(), op->inputs_.size()}); } while (!pending_ops.empty()) { - VarHandle *ready_var = nullptr; + VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second) { ready_var = pair.first; @@ -400,7 +505,7 @@ std::vector ParallelExecutor::Run( auto op_run = [ready_buffer, op] { // TODO(yy) Check Previous Op has same dev ctx. - LOG(INFO) << "Run " << op->DebugString(); + op->Run(); for (auto *ready : ready_buffer) { *ready = true; } From d84ddcf1239d6a7a6a7c24ebe9668d39e8bb55e6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 15 Mar 2018 17:43:23 +0800 Subject: [PATCH 010/180] Stash --- paddle/fluid/framework/executor.cc | 8 ++++---- paddle/fluid/framework/executor.h | 2 ++ paddle/fluid/framework/parallel_executor.cc | 9 ++++----- .../reader/create_recordio_file_reader_op.cc | 4 +++- .../tests/unittests/test_parallel_executor.py | 19 ++++++++++++++++++- 5 files changed, 31 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 6ee3f18dd4..b250378b9f 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -45,7 +45,7 @@ struct ExecutorPrepareContext { Executor::Executor(const platform::Place& place) : place_(place) {} -static void CreateTensor(Variable* var, proto::VarType::Type var_type) { +void InitializeVariable(Variable* var, proto::VarType::Type var_type) { if (var_type == proto::VarType::LOD_TENSOR) { var->GetMutable(); } else if (var_type == proto::VarType::SELECTED_ROWS) { @@ -284,12 +284,12 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, if (var->Persistable()) { auto* ptr = scope->Var(var->Name()); - CreateTensor(ptr, var->GetType()); + InitializeVariable(ptr, var->GetType()); VLOG(3) << "Create Variable " << var->Name() << " global, which pointer is " << ptr; } else { auto* ptr = local_scope->Var(var->Name()); - CreateTensor(ptr, var->GetType()); + InitializeVariable(ptr, var->GetType()); VLOG(3) << "Create Variable " << var->Name() << " locally, which pointer is " << ptr; } @@ -297,7 +297,7 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, } else { for (auto& var : block.AllVars()) { auto* ptr = local_scope->Var(var->Name()); - CreateTensor(ptr, var->GetType()); + InitializeVariable(ptr, var->GetType()); VLOG(3) << "Create variable " << var->Name() << ", which pointer is " << ptr; } diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 8d8a7cf4db..e020a6e738 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -59,5 +59,7 @@ class Executor { const platform::Place place_; }; +extern void InitializeVariable(Variable* var, proto::VarType::Type var_type); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index e98fedb68d..97ffe01bec 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -84,14 +84,14 @@ struct ComputationOpHandle : public OpHandle { Scope *scope_; platform::Place place_; - explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, - platform::Place place) + explicit ComputationOpHandle(const OpDesc &op_desc, platform::Place place) : op_(framework::OpRegistry::CreateOp(op_desc)), - scope_(scope), + scope_(nullptr), place_(place) {} void Run() override { // Wait other op if necessary + LOG(INFO) << DebugString(); auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { @@ -240,8 +240,7 @@ void ParallelExecutor::ConstructDependencyGraph( } for (auto &pair : member_->local_scopes_) { - member_->ops_.emplace_back( - new ComputationOpHandle(*op, pair.second, pair.first)); + member_->ops_.emplace_back(new ComputationOpHandle(*op, pair.first)); auto *op_handle = member_->ops_.back().get(); op_handle->dev_ctx_[pair.first] = const_cast( platform::DeviceContextPool::Instance().Get(pair.first)); diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc index c3eb247bbe..0126ff7271 100644 --- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc +++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc @@ -25,7 +25,9 @@ class RecordIOFileReader : public framework::FileReader { : FileReader(shapes), scanner_(filename), dev_ctx_(*platform::DeviceContextPool::Instance().Get( - platform::CPUPlace())) {} + platform::CPUPlace())) { + LOG(INFO) << "Creating file reader" << filename; + } void ReadNext(std::vector* out) override { *out = framework::ReadFromRecordIO(scanner_, dev_ctx_); diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 65b43448a4..3604fdb285 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -14,16 +14,33 @@ import unittest import paddle.fluid as fluid +import paddle.v2 as paddle +import paddle.v2.dataset.mnist as mnist class ParallelExecutor(unittest.TestCase): + def setUp(self): + # Convert mnist to recordio file + with fluid.program_guard(fluid.Program(), fluid.Program()): + reader = paddle.batch(mnist.train(), batch_size=32) + feeder = fluid.DataFeeder( + feed_list=[ # order is image and label + fluid.layers.data( + name='image', shape=[784]), + fluid.layers.data( + name='label', shape=[1], dtype='int64'), + ], + place=fluid.CPUPlace()) + fluid.recordio_writer.convert_reader_to_recordio_file( + './mnist.recordio', reader, feeder) + def test_main(self): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): reader = fluid.layers.open_recordio_file( - filename='tmp', + filename='./mnist.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) From 6f0dfd89a4265e3aec08beb693ad7e342c10696b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 14:33:36 +0800 Subject: [PATCH 011/180] Single GPU ParallelExecutor complete --- CMakeLists.txt | 1 + cmake/external/threadpool.cmake | 30 ++++ paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/framework/parallel_executor.cc | 165 ++++++++++++++++---- paddle/fluid/framework/parallel_executor.h | 4 + paddle/fluid/operators/read_op.cc | 5 +- 6 files changed, 173 insertions(+), 34 deletions(-) create mode 100644 cmake/external/threadpool.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index c86889c05c..502213bf29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,6 +146,7 @@ include(external/cares) include(external/grpc) include(external/snappy) # download snappy include(external/snappystream) +include(external/threadpool) include(cudnn) # set cudnn libraries, must before configure include(cupti) diff --git a/cmake/external/threadpool.cmake b/cmake/external/threadpool.cmake new file mode 100644 index 0000000000..0159815fed --- /dev/null +++ b/cmake/external/threadpool.cmake @@ -0,0 +1,30 @@ +INCLUDE(ExternalProject) + +SET(THREADPOOL_SOURCE_DIR ${THIRD_PARTY_PATH}/threadpool) +SET(THREADPOOL_INCLUDE_DIR ${THREADPOOL_SOURCE_DIR}/src/extern_threadpool) +INCLUDE_DIRECTORIES(${THREADPOOL_INCLUDE_DIR}) + +ExternalProject_Add( + extern_threadpool + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/progschj/ThreadPool.git" + GIT_TAG 9a42ec1329f259a5f4881a291db1dcb8f2ad9040 + PREFIX ${THREADPOOL_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" +) + +if (${CMAKE_VERSION} VERSION_LESS "3.3.0") + set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/threadpool_dummy.c) + file(WRITE ${dummyfile} "const char *dummy_threadpool = \"${dummyfile}\";") + add_library(simple_threadpool STATIC ${dummyfile}) +else() + add_library(simple_threadpool INTERFACE) +endif() + +add_dependencies(simple_threadpool extern_threadpool) + +LIST(APPEND external_project_dependencies simple_threadpool) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 934bb43ffe..4fd66c77ac 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -87,7 +87,7 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table feed_fetch_method executor) + framework_proto backward glog lod_rank_table feed_fetch_method executor simple_threadpool) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 97ffe01bec..930be7fab3 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" +#include "ThreadPool.h" +#include "executor.h" #include "lod_tensor.h" #include "op_registry.h" -#include "threadpool.h" namespace paddle { namespace framework { @@ -49,7 +50,7 @@ struct VarHandle : public VarHandleBase { }; struct DependencyVarHandle : public VarHandleBase { - std::string DebugString() const override { return "Deps var"; } + std::string DebugString() const override { return "Dependency Variable"; } }; struct OpHandle { @@ -75,7 +76,7 @@ struct OpHandle { virtual ~OpHandle() {} - virtual void Run() {} + virtual void Run() { PADDLE_THROW("Not implemented"); } virtual void Wait() {} }; @@ -84,14 +85,15 @@ struct ComputationOpHandle : public OpHandle { Scope *scope_; platform::Place place_; - explicit ComputationOpHandle(const OpDesc &op_desc, platform::Place place) + explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, + platform::Place place) : op_(framework::OpRegistry::CreateOp(op_desc)), - scope_(nullptr), + scope_(scope), place_(place) {} void Run() override { // Wait other op if necessary - LOG(INFO) << DebugString(); + LOG(INFO) << "Run " << this << " " << DebugString(); auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { @@ -100,12 +102,49 @@ struct ComputationOpHandle : public OpHandle { } op_->Run(*scope_, place_); + LOG(INFO) << "Done " << this; } }; -struct ScaleLossGradOpHandle : public OpHandle {}; +struct ScaleLossGradOpHandle : public OpHandle { + float coeff_; + Scope *scope_; + platform::Place place_; + + explicit ScaleLossGradOpHandle(size_t num_dev, Scope *scope, + platform::Place place) + : coeff_(static_cast(1.0 / num_dev)), + scope_(scope), + place_(place) {} + + void Run() override { + LOG(INFO) << "Run Scale Loss Grad"; + + std::string var_name = static_cast(this->outputs_[0])->name_; -struct NCCLAllReduceOpHandle : public OpHandle {}; + float *tmp = scope_->FindVar(var_name) + ->GetMutable() + ->mutable_data(make_ddim({1}), place_); + + if (platform::is_cpu_place(place_)) { + *tmp = coeff_; + } else { + memory::Copy( + boost::get(place_), tmp, platform::CPUPlace(), + &coeff_, sizeof(float), + static_cast(this->dev_ctx_[place_]) + ->stream()); + } + } +}; + +struct NCCLAllReduceOpHandle : public OpHandle { + void Run() override { + if (this->inputs_.size() == 1) { + return; // No need to all reduce when GPU count = 1; + } + } +}; class ParallelExecutorPrivate { public: @@ -182,7 +221,10 @@ class ParallelExecutorPrivate { std::vector> ops_; + // Use a simpler thread pool, might be faster. ThreadPool pool_; + + std::unique_ptr exception_; }; // TODO(yy): Move this function somewhere @@ -217,6 +259,19 @@ ParallelExecutor::ParallelExecutor( // Step 2. Convert main_program to SSA form and dependency graph. Also, insert // ncclOp ConstructDependencyGraph(params, main_program, loss_var_name); + + // Step 3. Create vars in each scope; + for (auto &pair : member_->local_scopes_) { + auto *scope = pair.second; + + for (auto *var : main_program.Block(0).AllVars()) { + if (scope->FindVar(var->Name()) != nullptr) { + continue; + } + + InitializeVariable(scope->Var(var->Name()), var->GetType()); + } + } } void ParallelExecutor::ConstructDependencyGraph( @@ -240,7 +295,8 @@ void ParallelExecutor::ConstructDependencyGraph( } for (auto &pair : member_->local_scopes_) { - member_->ops_.emplace_back(new ComputationOpHandle(*op, pair.first)); + member_->ops_.emplace_back( + new ComputationOpHandle(*op, pair.second, pair.first)); auto *op_handle = member_->ops_.back().get(); op_handle->dev_ctx_[pair.first] = const_cast( platform::DeviceContextPool::Instance().Get(pair.first)); @@ -263,16 +319,20 @@ void ParallelExecutor::ConstructDependencyGraph( if (is_forwarding) { if (var_names.size() == 1 && var_names[0] == loss_var_name) { // Insert ScaleCost OpHandle - member_->ops_.emplace_back(new ScaleLossGradOpHandle()); + member_->ops_.emplace_back(new ScaleLossGradOpHandle( + this->member_->local_scopes_.size(), pair.second, pair.first)); op_handle = member_->ops_.back().get(); op_handle->dev_ctx_[pair.first] = member_->CommunicationDevCtx(pair.first); auto &place = pair.first; - VarHandle *loss = GetVarHandle(loss_var_name, place); - loss->pending_ops_.emplace_back(op_handle); - op_handle->inputs_.emplace_back(loss); + // FIXME: Currently ScaleLossGradOp only use device_count as scale + // factor. So it does not depend on any other operators. + // VarHandle *loss = GetVarHandle(loss_var_name, place); + // loss->pending_ops_.emplace_back(op_handle); + // op_handle->inputs_.emplace_back(loss); + GenerateVar(op_handle, loss_var_name + "@GRAD", place); change_forward = true; LOG(INFO) << "Scale Loss " << op_handle->DebugString(); @@ -341,11 +401,25 @@ void ParallelExecutor::ConstructDependencyGraph( for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { auto *write_op = it_new->second.generated_op_; auto &read_ops = it_old->second.pending_ops_; + auto *ex_write_op = it_old->second.generated_op_; + + if (ex_write_op == nullptr) { // Nobody write this var. + continue; + } + + LOG(INFO) << "Link " << it_new->second.DebugString() << " From " + << it_old->second.version_ << " To " + << it_new->second.version_; for (auto *read_op : read_ops) { // Manually add a dependency var from read_op to write_op; + if (read_op == write_op) { + // Read Write is the same op. + continue; + } auto *dep_var = new DependencyVarHandle(); + dep_var->generated_op_ = read_op; read_op->outputs_.emplace_back(dep_var); @@ -448,7 +522,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const { std::vector ParallelExecutor::Run( const std::vector &fetch_tensors) { // Version --> VarHandle - + member_->exception_.reset(); std::unordered_map pending_vars; std::unordered_map pending_ops; @@ -465,8 +539,18 @@ std::vector ParallelExecutor::Run( pending_vars[var.get()] = var->generated_op_ == nullptr; } + std::vector to_run; + for (auto &op : member_->ops_) { - pending_ops.insert({op.get(), op->inputs_.size()}); + if (op->inputs_.empty()) { // Special case, Op has no input. + to_run.emplace_back(op.get()); + } else { + pending_ops.insert({op.get(), op->inputs_.size()}); + } + } + + for (auto *op : to_run) { + RunOp(pending_vars, op); } while (!pending_ops.empty()) { @@ -478,13 +562,19 @@ std::vector ParallelExecutor::Run( } if (ready_var == nullptr) { - member_->pool_.Wait(); // Wait thread pool; + // FIXME use conditional var instead of busy wait. + + if (member_->exception_) { + throw * member_->exception_; + } + + std::this_thread::yield(); continue; } pending_vars.erase(ready_var); - std::vector to_run; + to_run.clear(); for (auto *op : ready_var->pending_ops_) { auto &deps = pending_ops[op]; @@ -496,24 +586,35 @@ std::vector ParallelExecutor::Run( for (auto *op : to_run) { pending_ops.erase(op); - - std::vector ready_buffer; - for (auto *var : op->outputs_) { - ready_buffer.emplace_back(&pending_vars[var]); - } - - auto op_run = [ready_buffer, op] { - // TODO(yy) Check Previous Op has same dev ctx. - op->Run(); - for (auto *ready : ready_buffer) { - *ready = true; - } - }; - - member_->pool_.Run(op_run); + RunOp(pending_vars, op); } } return std::vector(); } + +void ParallelExecutor::RunOp( + std::unordered_map &pending_vars, + OpHandle *op) const { + std::vector ready_buffer; + for (auto *var : op->outputs_) { + ready_buffer.emplace_back(&pending_vars[var]); + } + + auto op_run = [ready_buffer, op, this] { + try { + // TODO(yy) Check Previous Op has same dev ctx. + op->Run(); + for (auto *ready : ready_buffer) { + *ready = true; + } + } catch (platform::EnforceNotMet ex) { + member_->exception_.reset(new platform::EnforceNotMet(ex)); + } catch (...) { + LOG(FATAL) << "Unknown exception catched"; + } + }; + + member_->pool_.enqueue(op_run); +} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 805b7e5aa9..1e4c5c48f2 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -31,6 +31,7 @@ namespace framework { class ParallelExecutorPrivate; class VarHandle; class OpHandle; +class VarHandleBase; class ParallelExecutor { public: explicit ParallelExecutor(const std::vector& places, @@ -57,6 +58,9 @@ class ParallelExecutor { const std::string& loss_var_name) const; void BuildNCCLCommunicator() const; + + void RunOp(std::unordered_map& pending_vars, + OpHandle* op) const; }; } // namespace framework diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc index 2a5605e0d3..2925b8a85d 100644 --- a/paddle/fluid/operators/read_op.cc +++ b/paddle/fluid/operators/read_op.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" +#include "paddle/fluid/operators/detail/safe_ref.h" namespace paddle { namespace operators { @@ -59,7 +60,9 @@ class ReadOp : public framework::OperatorBase { void RunImpl(const framework::Scope& scope, const platform::Place& dev_place) const override { framework::ReaderHolder* reader = - scope.FindVar(Input("Reader"))->GetMutable(); + detail::Ref(scope.FindVar(Input("Reader")), + "Cannot find reader variable %s", Input("Reader")) + .GetMutable(); std::vector out_arg_names = Outputs("Out"); std::vector ins; reader->ReadNext(&ins); From 8c9cd369dc2280ec9c212586b804de9c10adb600 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 14:47:56 +0800 Subject: [PATCH 012/180] Polish code style --- paddle/fluid/framework/parallel_executor.cc | 22 ++++++++++++--------- paddle/fluid/framework/parallel_executor.h | 2 ++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 930be7fab3..40de26bdd0 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -379,17 +379,21 @@ void ParallelExecutor::ConstructDependencyGraph( } } - /** - * Dependency graph has been constructed. However, there are still data - * harzaeds need to be handled. - * - * We only handle write after read(WAR), since it should not have a write - * after write in program. If there are write after write operators, we need - * prune them. - * - * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) + /* + Dependency graph has been constructed. However, there are still data + harzaeds need to be handled. */ + PolishGraphToSupportDataHarzaeds(); +} +/** + * We only handle write after read(WAR), since it should not have a write + * after write in program. If there are write after write operators, we need + * prune them. + * + * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) + */ +void ParallelExecutor::PolishGraphToSupportDataHarzaeds() const { for (auto &place_pair : member_->vars_) { for (auto &name_pair : place_pair.second) { if (name_pair.second.size() <= 1) { diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 1e4c5c48f2..30416563f8 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -61,6 +61,8 @@ class ParallelExecutor { void RunOp(std::unordered_map& pending_vars, OpHandle* op) const; + + void PolishGraphToSupportDataHarzaeds() const; }; } // namespace framework From 8b397d16024f1d5a985e0cbc6c88c6560d7e7661 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 14:48:17 +0800 Subject: [PATCH 013/180] Make recordio file reader thread-safe by default --- .../reader/create_recordio_file_reader_op.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc index 0126ff7271..986e1b7a21 100644 --- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc +++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc @@ -18,6 +18,7 @@ namespace paddle { namespace operators { namespace reader { +template class RecordIOFileReader : public framework::FileReader { public: RecordIOFileReader(const std::string& filename, @@ -26,11 +27,19 @@ class RecordIOFileReader : public framework::FileReader { scanner_(filename), dev_ctx_(*platform::DeviceContextPool::Instance().Get( platform::CPUPlace())) { + if (ThreadSafe) { + mutex_.reset(new std::mutex()); + } LOG(INFO) << "Creating file reader" << filename; } void ReadNext(std::vector* out) override { - *out = framework::ReadFromRecordIO(scanner_, dev_ctx_); + if (ThreadSafe) { + std::lock_guard guard(*mutex_); + *out = framework::ReadFromRecordIO(scanner_, dev_ctx_); + } else { + *out = framework::ReadFromRecordIO(scanner_, dev_ctx_); + } } bool HasNext() const override { return scanner_.HasNext(); } @@ -38,6 +47,7 @@ class RecordIOFileReader : public framework::FileReader { void ReInit() override { scanner_.Reset(); } private: + std::unique_ptr mutex_; recordio::Scanner scanner_; const platform::DeviceContext& dev_ctx_; }; @@ -61,7 +71,7 @@ class CreateRecordIOReaderOp : public framework::OperatorBase { auto* out = scope.FindVar(Output("Out")) ->template GetMutable(); - out->Reset(new RecordIOFileReader(filename, shapes)); + out->Reset(new RecordIOFileReader(filename, shapes)); } }; From 0ef9edf566a2206c8fa8b209d4b5610f1a4f067e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 15:21:13 +0800 Subject: [PATCH 014/180] Stash --- paddle/fluid/framework/parallel_executor.cc | 43 +++++++++++-------- .../tests/unittests/test_parallel_executor.py | 2 +- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 40de26bdd0..25b31f8636 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -229,8 +229,15 @@ class ParallelExecutorPrivate { // TODO(yy): Move this function somewhere ncclDataType_t ToNCCLDataType(std::type_index type) { - // FIXME!! - return ncclFloat; + if (type == typeid(float)) { // NOLINT + return ncclFloat; + } else if (type == typeid(double)) { // NOLINT + return ncclDouble; + } else if (type == typeid(int)) { // NOLINT + return ncclInt; + } else { + PADDLE_THROW("Not supported"); + } } ParallelExecutor::ParallelExecutor( @@ -479,30 +486,32 @@ void ParallelExecutor::BCastParamsToGPUs( ncclDataType_t data_type = ToNCCLDataType(main_tensor.type()); auto &dims = main_tensor.dims(); size_t numel = main_tensor.numel(); - std::vector> - mems; - mems.emplace_back(const_cast(main_tensor.data()), - &member_->GetNCCLCtx(member_->main_place_)); - for (auto &pair : member_->local_scopes_) { - if (pair.first == member_->main_place_) { - continue; - } + platform::dynload::ncclGroupStart(); + for (auto &pair : member_->local_scopes_) { auto local_scope = pair.second; auto *t = local_scope->Var(var_desc->Name())->GetMutable(); t->Resize(dims); - mems.emplace_back(t->mutable_data(pair.first, main_tensor.type()), - &member_->GetNCCLCtx(member_->main_place_)); + auto &nccl_ctx = member_->GetNCCLCtx(pair.first); + platform::dynload::ncclBcast( + t->mutable_data(pair.first, main_tensor.type()), numel, data_type, + 0, nccl_ctx.comm, nccl_ctx.stream()); } + platform::dynload::ncclGroupEnd(); + } + } - // TODO(yy): Invoke ncclBCast here. mems, numel, data_type. The mems[0] - // is the src, rests are dests. + for (auto &pair : member_->local_scopes_) { + member_->GetNCCLCtx(pair.first).ctx_->Wait(); - (void)(data_type); - (void)(numel); - } + auto &b = pair.second->FindVar("fc_1.b_0")->Get(); + framework::LoDTensor cpu; + framework::TensorCopy(b, platform::CPUPlace(), &cpu); + platform::DeviceContextPool::Instance().Get(b.place())->Wait(); + LOG(INFO) << *cpu.data(); } + #else PADDLE_THROW("Not compiled with CUDA"); #endif diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 3604fdb285..85a9f7697f 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -52,7 +52,7 @@ class ParallelExecutor(unittest.TestCase): adam = fluid.optimizer.Adam() adam.minimize(loss) act_places = [] - for each in [fluid.CUDAPlace(0)]: + for each in [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]: p = fluid.core.Place() p.set_place(each) act_places.append(p) From 9fc0b596a92cf63e6c0df18b7f59842758411c5d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 15:39:52 +0800 Subject: [PATCH 015/180] Test more --- paddle/fluid/framework/parallel_executor.cc | 1 + .../paddle/fluid/tests/unittests/test_parallel_executor.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 25b31f8636..ea5ce3f2e9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -502,6 +502,7 @@ void ParallelExecutor::BCastParamsToGPUs( } } + // Debug code, bias should be 1.0f. for (auto &pair : member_->local_scopes_) { member_->GetNCCLCtx(pair.first).ctx_->Wait(); diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 85a9f7697f..2a614700b0 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -45,7 +45,12 @@ class ParallelExecutor(unittest.TestCase): lod_levels=[0, 0], dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(reader) - hidden = fluid.layers.fc(img, size=200, act='tanh') + hidden = fluid.layers.fc( + img, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) prediction = fluid.layers.fc(hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) From d470763f6c0e7641367641bdb6cb1f28b8cf39c3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 15:53:36 +0800 Subject: [PATCH 016/180] Stash --- paddle/fluid/framework/parallel_executor.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ea5ce3f2e9..215ee38ac5 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -154,6 +154,8 @@ class ParallelExecutorPrivate { std::unordered_map local_scopes_; + std::vector places_; + #ifdef PADDLE_WITH_CUDA struct NCCLContext { std::unique_ptr ctx_; @@ -246,6 +248,8 @@ ParallelExecutor::ParallelExecutor( const ProgramDesc &startup_program, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope) : member_(new ParallelExecutorPrivate()) { + member_->places_ = places; + // Step 1. RunStartupProgram and Bcast the params to devs. Executor exe(places[0]); exe.Run(startup_program, scope, 0); @@ -489,14 +493,14 @@ void ParallelExecutor::BCastParamsToGPUs( platform::dynload::ncclGroupStart(); - for (auto &pair : member_->local_scopes_) { - auto local_scope = pair.second; + for (auto &place : member_->places_) { + auto local_scope = member_->local_scopes_[place]; auto *t = local_scope->Var(var_desc->Name())->GetMutable(); t->Resize(dims); - auto &nccl_ctx = member_->GetNCCLCtx(pair.first); - platform::dynload::ncclBcast( - t->mutable_data(pair.first, main_tensor.type()), numel, data_type, - 0, nccl_ctx.comm, nccl_ctx.stream()); + auto &nccl_ctx = member_->GetNCCLCtx(place); + platform::dynload::ncclBcast(t->mutable_data(place, main_tensor.type()), + numel, data_type, 0, nccl_ctx.comm, + nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); } @@ -506,7 +510,7 @@ void ParallelExecutor::BCastParamsToGPUs( for (auto &pair : member_->local_scopes_) { member_->GetNCCLCtx(pair.first).ctx_->Wait(); - auto &b = pair.second->FindVar("fc_1.b_0")->Get(); + auto &b = pair.second->FindVar("fc_0.b_0")->Get(); framework::LoDTensor cpu; framework::TensorCopy(b, platform::CPUPlace(), &cpu); platform::DeviceContextPool::Instance().Get(b.place())->Wait(); From c15d2c9edc1dbea3e3d5b5948bb2c5b0cc81eb88 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 16:13:44 +0800 Subject: [PATCH 017/180] Update --- paddle/fluid/framework/parallel_executor.cc | 34 +++++++++++++-------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 215ee38ac5..996273c720 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -171,27 +171,28 @@ class ParallelExecutorPrivate { return boost::get(ctx_->GetPlace()).device; } - static void InitNCCLContext(std::map &contexts) { + static void InitNCCLContext(std::unordered_map &contexts, + const std::vector &places) { std::vector comms; std::vector devs; comms.resize(contexts.size()); devs.reserve(contexts.size()); - for (auto &ctx : contexts) { - devs.push_back(ctx.first); + for (auto &p : places) { + devs.push_back(boost::get(p).device); } NCCL_INVOKE(platform::dynload::ncclCommInitAll( &comms[0], static_cast(contexts.size()), &devs[0])); int i = 0; - for (auto &ctx : contexts) { - ctx.second.comm = comms[i++]; + for (auto &dev_id : devs) { + contexts.at(dev_id).comm = comms[i++]; } } }; - std::map communication_streams_; + std::unordered_map communication_streams_; NCCLContext &GetNCCLCtx(platform::Place p) { int dev_id = boost::get(p).device; @@ -493,13 +494,20 @@ void ParallelExecutor::BCastParamsToGPUs( platform::dynload::ncclGroupStart(); - for (auto &place : member_->places_) { - auto local_scope = member_->local_scopes_[place]; - auto *t = local_scope->Var(var_desc->Name())->GetMutable(); - t->Resize(dims); + for (size_t i = 0; i < member_->places_.size(); ++i) { + auto place = member_->places_[i]; + void *buffer; + if (i == 0) { + buffer = const_cast(main_tensor.data()); + } else { + auto local_scope = member_->local_scopes_[place]; + auto *t = local_scope->Var(var_desc->Name())->GetMutable(); + t->Resize(dims); + buffer = t->mutable_data(place, main_tensor.type()); + } + auto &nccl_ctx = member_->GetNCCLCtx(place); - platform::dynload::ncclBcast(t->mutable_data(place, main_tensor.type()), - numel, data_type, 0, nccl_ctx.comm, + platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm, nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); @@ -533,7 +541,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const { } ParallelExecutorPrivate::NCCLContext::InitNCCLContext( - member_->communication_streams_); + member_->communication_streams_, member_->places_); #endif } From 8f0590e7c5924e9281a957cf0d355176c4bed301 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 16:31:58 +0800 Subject: [PATCH 018/180] Add ncclAllReduce --- paddle/fluid/framework/parallel_executor.cc | 50 +++++++++++++++++---- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 996273c720..ec5eb57910 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -138,14 +138,6 @@ struct ScaleLossGradOpHandle : public OpHandle { } }; -struct NCCLAllReduceOpHandle : public OpHandle { - void Run() override { - if (this->inputs_.size() == 1) { - return; // No need to all reduce when GPU count = 1; - } - } -}; - class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(size_t num_threads = 12) @@ -243,6 +235,46 @@ ncclDataType_t ToNCCLDataType(std::type_index type) { } } +struct NCCLAllReduceOpHandle : public OpHandle { + ParallelExecutorPrivate *member_; + + explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) + : member_(member) {} + + void Run() override { + if (this->inputs_.size() == 1) { + return; // No need to all reduce when GPU count = 1; + } else { + auto &var_name = static_cast(this->inputs_[0])->name_; + + int dtype = -1; + size_t numel = 0; + + for (auto &p : member_->places_) { + int dev_id = boost::get(p).device; + + Scope *s = member_->local_scopes_[p]; + auto &lod_tensor = s->FindVar(var_name)->Get(); + void *buffer = const_cast(lod_tensor.data()); + if (dtype == -1) { + dtype = ToNCCLDataType(lod_tensor.type()); + } + + if (numel == 0) { + numel = static_cast(lod_tensor.numel()); + } + + auto &nccl_ctx = member_->communication_streams_.at(dev_id); + + ncclAllReduce(buffer, buffer, numel, static_cast(dtype), + ncclSum, nccl_ctx.comm, nccl_ctx.stream()); + } + + ncclGroupEnd(); + } + } +}; + ParallelExecutor::ParallelExecutor( const std::vector &places, const std::unordered_set ¶ms, @@ -361,7 +393,7 @@ void ParallelExecutor::ConstructDependencyGraph( for (auto &og : var_names) { if (grads.count(og) != 0) { // is param grad // Insert NCCL AllReduce Op - member_->ops_.emplace_back(new NCCLAllReduceOpHandle()); + member_->ops_.emplace_back(new NCCLAllReduceOpHandle(member_)); auto *op_handle = member_->ops_.back().get(); for (auto &pair : member_->local_scopes_) { From e8a7e5d1e6e854ab542644f1df7ae90c8565cc5b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 16:35:56 +0800 Subject: [PATCH 019/180] Update --- paddle/fluid/framework/parallel_executor.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ec5eb57910..5870eac811 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -250,6 +250,8 @@ struct NCCLAllReduceOpHandle : public OpHandle { int dtype = -1; size_t numel = 0; + platform::dynload::ncclGroupStart(); + for (auto &p : member_->places_) { int dev_id = boost::get(p).device; @@ -266,11 +268,12 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &nccl_ctx = member_->communication_streams_.at(dev_id); - ncclAllReduce(buffer, buffer, numel, static_cast(dtype), - ncclSum, nccl_ctx.comm, nccl_ctx.stream()); + platform::dynload::ncclAllReduce( + buffer, buffer, numel, static_cast(dtype), ncclSum, + nccl_ctx.comm, nccl_ctx.stream()); } - ncclGroupEnd(); + platform::dynload::ncclGroupEnd(); } } }; From b2c7a9b82850c2e4ffaf7027e82f49fa463defc5 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 16:43:49 +0800 Subject: [PATCH 020/180] Wait by stream --- paddle/fluid/framework/parallel_executor.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 5870eac811..d46adf291b 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -77,7 +77,7 @@ struct OpHandle { virtual ~OpHandle() {} virtual void Run() { PADDLE_THROW("Not implemented"); } - virtual void Wait() {} + virtual void Wait(platform::DeviceContext *waited_dev) {} }; struct ComputationOpHandle : public OpHandle { @@ -97,13 +97,17 @@ struct ComputationOpHandle : public OpHandle { auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { - in->generated_op_->Wait(); + in->generated_op_->Wait(cur_ctx); } } op_->Run(*scope_, place_); LOG(INFO) << "Done " << this; } + + void Wait(platform::DeviceContext *waited_dev) override { + this->dev_ctx_.at(place_)->Wait(); + } }; struct ScaleLossGradOpHandle : public OpHandle { @@ -136,6 +140,10 @@ struct ScaleLossGradOpHandle : public OpHandle { ->stream()); } } + + void Wait(platform::DeviceContext *waited_dev) override { + this->dev_ctx_.at(place_)->Wait(); + } }; class ParallelExecutorPrivate { @@ -276,6 +284,10 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclGroupEnd(); } } + + void Wait(platform::DeviceContext *waited_dev) override { + this->dev_ctx_.at(waited_dev->GetPlace())->Wait(); + } }; ParallelExecutor::ParallelExecutor( From 254d7ff4f5e5793d44aecde15ee375ec76d4ea4b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 16 Mar 2018 17:23:43 +0800 Subject: [PATCH 021/180] Refactor local_scopes --- paddle/fluid/framework/parallel_executor.cc | 76 ++++++++------------- 1 file changed, 28 insertions(+), 48 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d46adf291b..edc24cc131 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -151,11 +151,10 @@ class ParallelExecutorPrivate { explicit ParallelExecutorPrivate(size_t num_threads = 12) : pool_(num_threads) {} - std::unordered_map - local_scopes_; - std::vector places_; + std::vector local_scopes_; + #ifdef PADDLE_WITH_CUDA struct NCCLContext { std::unique_ptr ctx_; @@ -260,10 +259,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclGroupStart(); - for (auto &p : member_->places_) { + for (size_t i = 0; i < member_->local_scopes_.size(); ++i) { + auto &p = member_->places_[i]; + auto *s = member_->local_scopes_[i]; int dev_id = boost::get(p).device; - Scope *s = member_->local_scopes_[p]; auto &lod_tensor = s->FindVar(var_name)->Get(); void *buffer = const_cast(lod_tensor.data()); if (dtype == -1) { @@ -302,8 +302,8 @@ ParallelExecutor::ParallelExecutor( Executor exe(places[0]); exe.Run(startup_program, scope, 0); // Create local scopes - for (auto &place : places) { - member_->local_scopes_[place] = &scope->NewScope(); + for (size_t i = 0; i < member_->places_.size(); ++i) { + member_->local_scopes_.push_back(&scope->NewScope()); } member_->main_place_ = places[0]; @@ -320,9 +320,7 @@ ParallelExecutor::ParallelExecutor( ConstructDependencyGraph(params, main_program, loss_var_name); // Step 3. Create vars in each scope; - for (auto &pair : member_->local_scopes_) { - auto *scope = pair.second; - + for (auto *scope : member_->local_scopes_) { for (auto *var : main_program.Block(0).AllVars()) { if (scope->FindVar(var->Name()) != nullptr) { continue; @@ -353,46 +351,44 @@ void ParallelExecutor::ConstructDependencyGraph( } } - for (auto &pair : member_->local_scopes_) { - member_->ops_.emplace_back( - new ComputationOpHandle(*op, pair.second, pair.first)); + for (size_t i = 0; i < member_->places_.size(); ++i) { + auto &p = member_->places_[i]; + auto *s = member_->local_scopes_[i]; + + member_->ops_.emplace_back(new ComputationOpHandle(*op, s, p)); auto *op_handle = member_->ops_.back().get(); - op_handle->dev_ctx_[pair.first] = const_cast( - platform::DeviceContextPool::Instance().Get(pair.first)); + op_handle->dev_ctx_[p] = const_cast( + platform::DeviceContextPool::Instance().Get(p)); auto var_names = op->InputArgumentNames(); for (auto &each_var_name : var_names) { - auto &place = pair.first; - VarHandle *var = GetVarHandle(each_var_name, place); + VarHandle *var = GetVarHandle(each_var_name, p); op_handle->inputs_.emplace_back(var); var->pending_ops_.emplace_back(op_handle); } var_names = op->OutputArgumentNames(); for (auto &each_var_name : var_names) { - auto &place = pair.first; - GenerateVar(op_handle, each_var_name, place); + GenerateVar(op_handle, each_var_name, p); } if (is_forwarding) { if (var_names.size() == 1 && var_names[0] == loss_var_name) { // Insert ScaleCost OpHandle member_->ops_.emplace_back(new ScaleLossGradOpHandle( - this->member_->local_scopes_.size(), pair.second, pair.first)); + this->member_->local_scopes_.size(), s, p)); op_handle = member_->ops_.back().get(); - op_handle->dev_ctx_[pair.first] = - member_->CommunicationDevCtx(pair.first); + op_handle->dev_ctx_[p] = member_->CommunicationDevCtx(p); - auto &place = pair.first; // FIXME: Currently ScaleLossGradOp only use device_count as scale // factor. So it does not depend on any other operators. // VarHandle *loss = GetVarHandle(loss_var_name, place); // loss->pending_ops_.emplace_back(op_handle); // op_handle->inputs_.emplace_back(loss); - GenerateVar(op_handle, loss_var_name + "@GRAD", place); + GenerateVar(op_handle, loss_var_name + "@GRAD", p); change_forward = true; LOG(INFO) << "Scale Loss " << op_handle->DebugString(); } @@ -411,9 +407,9 @@ void ParallelExecutor::ConstructDependencyGraph( member_->ops_.emplace_back(new NCCLAllReduceOpHandle(member_)); auto *op_handle = member_->ops_.back().get(); - for (auto &pair : member_->local_scopes_) { - auto &place = pair.first; - auto &vars = member_->vars_[place][og]; + for (size_t i = 0; i < member_->places_.size(); ++i) { + auto &p = member_->places_[i]; + auto &vars = member_->vars_[p][og]; if (vars.empty()) { // This device has no data. continue. continue; @@ -422,16 +418,13 @@ void ParallelExecutor::ConstructDependencyGraph( op_handle->inputs_.emplace_back(prev_grad); prev_grad->pending_ops_.emplace_back(op_handle); auto &var = vars[vars.size()]; - var.place_ = place; + var.place_ = p; var.generated_op_ = op_handle; var.name_ = og; var.version_ = vars.size() - 1; op_handle->outputs_.emplace_back(&var); - for (auto &pair : member_->local_scopes_) { - op_handle->dev_ctx_[pair.first] = - member_->CommunicationDevCtx(pair.first); - } + op_handle->dev_ctx_[p] = member_->CommunicationDevCtx(p); } } } @@ -529,7 +522,7 @@ VarHandle *ParallelExecutor::GetVarHandle(const std::string &each_var_name, void ParallelExecutor::BCastParamsToGPUs( const ProgramDesc &startup_program) const { #ifdef PADDLE_WITH_CUDA - auto *main_scope = member_->local_scopes_[member_->main_place_]; + auto *main_scope = member_->local_scopes_[0]; for (auto *var_desc : startup_program.Block(0).AllVars()) { if (var_desc->GetType() == proto::VarType::LOD_TENSOR) { @@ -547,7 +540,7 @@ void ParallelExecutor::BCastParamsToGPUs( if (i == 0) { buffer = const_cast(main_tensor.data()); } else { - auto local_scope = member_->local_scopes_[place]; + auto local_scope = member_->local_scopes_[i]; auto *t = local_scope->Var(var_desc->Name())->GetMutable(); t->Resize(dims); buffer = t->mutable_data(place, main_tensor.type()); @@ -560,18 +553,6 @@ void ParallelExecutor::BCastParamsToGPUs( platform::dynload::ncclGroupEnd(); } } - - // Debug code, bias should be 1.0f. - for (auto &pair : member_->local_scopes_) { - member_->GetNCCLCtx(pair.first).ctx_->Wait(); - - auto &b = pair.second->FindVar("fc_0.b_0")->Get(); - framework::LoDTensor cpu; - framework::TensorCopy(b, platform::CPUPlace(), &cpu); - platform::DeviceContextPool::Instance().Get(b.place())->Wait(); - LOG(INFO) << *cpu.data(); - } - #else PADDLE_THROW("Not compiled with CUDA"); #endif @@ -579,8 +560,7 @@ void ParallelExecutor::BCastParamsToGPUs( void ParallelExecutor::BuildNCCLCommunicator() const { #ifdef PADDLE_WITH_CUDA - for (auto &place_pair : member_->local_scopes_) { - auto place = place_pair.first; + for (auto &place : member_->places_) { int dev_id = boost::get(place).device; member_->communication_streams_.emplace( From 9cb8f503026c6d3d25fa80e34b8fa2ca0bea6d2f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 14:58:50 +0800 Subject: [PATCH 022/180] Complete fetch op --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/framework/parallel_executor.cc | 123 +++++++++++++++--- paddle/fluid/framework/parallel_executor.h | 3 +- paddle/fluid/operators/math/concat.h | 1 + paddle/fluid/pybind/pybind.cc | 2 +- .../tests/unittests/test_parallel_executor.py | 15 ++- 6 files changed, 124 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index fadc24ae5d..6522a7a69f 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -87,7 +87,7 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table feed_fetch_method executor simple_threadpool) + framework_proto backward glog lod_rank_table feed_fetch_method executor simple_threadpool concat) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index edc24cc131..cfaa2dbd1f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -16,7 +16,9 @@ limitations under the License. */ #include "ThreadPool.h" #include "executor.h" #include "lod_tensor.h" +#include "lod_tensor_array.h" #include "op_registry.h" +#include "paddle/fluid/operators/math/concat.h" namespace paddle { namespace framework { @@ -34,7 +36,7 @@ struct VarHandleBase { virtual std::string DebugString() const = 0; OpHandle *generated_op_; - std::vector pending_ops_; + std::unordered_set pending_ops_; }; struct VarHandle : public VarHandleBase { @@ -93,7 +95,6 @@ struct ComputationOpHandle : public OpHandle { void Run() override { // Wait other op if necessary - LOG(INFO) << "Run " << this << " " << DebugString(); auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { @@ -102,7 +103,6 @@ struct ComputationOpHandle : public OpHandle { } op_->Run(*scope_, place_); - LOG(INFO) << "Done " << this; } void Wait(platform::DeviceContext *waited_dev) override { @@ -122,8 +122,6 @@ struct ScaleLossGradOpHandle : public OpHandle { place_(place) {} void Run() override { - LOG(INFO) << "Run Scale Loss Grad"; - std::string var_name = static_cast(this->outputs_[0])->name_; float *tmp = scope_->FindVar(var_name) @@ -146,6 +144,64 @@ struct ScaleLossGradOpHandle : public OpHandle { } }; +struct FetchedData { + public: + std::vector tensors_; + + explicit FetchedData(size_t num_fetched) { tensors_.resize(num_fetched); } +}; + +struct FetchOpHandle : public OpHandle { + std::shared_ptr data_; + size_t offset_; + std::vector *local_scopes_; + std::vector tensors_; + + ~FetchOpHandle() { + for (auto *input_var : inputs_) { + input_var->pending_ops_.erase(this); + } + for (auto &pair : dev_ctx_) { + pair.second->Wait(); + } + + // Lazily merge tensors. Will faster code. + MergeTensors(); + } + + void Run() override { + tensors_.resize(inputs_.size()); + auto *var = static_cast(inputs_[0]); + auto &var_name = var->name_; + platform::CPUPlace cpu; + auto &scopes = *local_scopes_; + + for (size_t i = 0; i < scopes.size(); ++i) { + auto &scope = scopes[i]; + auto &t = scope->FindVar(var_name)->Get(); + if (platform::is_gpu_place(var->place_)) { + TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); + } else { + tensors_[i].ShareDataWith(t); + tensors_[i].set_lod(t.lod()); + } + } + } + + void Wait(platform::DeviceContext *waited_dev) override { + PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); + } + + private: + void MergeTensors() const { + std::vector tensors_ptr; + for (auto &t : tensors_) { + tensors_ptr.emplace_back(&t); + } + data_->tensors_[offset_].MergeLoDTensor(tensors_ptr, platform::CPUPlace()); + } +}; + class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(size_t num_threads = 12) @@ -154,6 +210,7 @@ class ParallelExecutorPrivate { std::vector places_; std::vector local_scopes_; + Scope *global_scope_; #ifdef PADDLE_WITH_CUDA struct NCCLContext { @@ -297,7 +354,7 @@ ParallelExecutor::ParallelExecutor( const std::string &loss_var_name, Scope *scope) : member_(new ParallelExecutorPrivate()) { member_->places_ = places; - + member_->global_scope_ = scope; // Step 1. RunStartupProgram and Bcast the params to devs. Executor exe(places[0]); exe.Run(startup_program, scope, 0); @@ -308,9 +365,9 @@ ParallelExecutor::ParallelExecutor( member_->main_place_ = places[0]; // Bcast Parameters to all GPUs + BuildNCCLCommunicator(); if (platform::is_gpu_place(member_->main_place_) && member_->local_scopes_.size() != 1) { // Is CUDA - BuildNCCLCommunicator(); BCastParamsToGPUs(startup_program); } // Startup Program has been run. All local scopes has correct parameters. @@ -365,7 +422,7 @@ void ParallelExecutor::ConstructDependencyGraph( for (auto &each_var_name : var_names) { VarHandle *var = GetVarHandle(each_var_name, p); op_handle->inputs_.emplace_back(var); - var->pending_ops_.emplace_back(op_handle); + var->pending_ops_.emplace(op_handle); } var_names = op->OutputArgumentNames(); @@ -390,7 +447,6 @@ void ParallelExecutor::ConstructDependencyGraph( GenerateVar(op_handle, loss_var_name + "@GRAD", p); change_forward = true; - LOG(INFO) << "Scale Loss " << op_handle->DebugString(); } } } @@ -416,7 +472,7 @@ void ParallelExecutor::ConstructDependencyGraph( } auto *prev_grad = &vars[vars.size() - 1]; op_handle->inputs_.emplace_back(prev_grad); - prev_grad->pending_ops_.emplace_back(op_handle); + prev_grad->pending_ops_.emplace(op_handle); auto &var = vars[vars.size()]; var.place_ = p; var.generated_op_ = op_handle; @@ -463,10 +519,6 @@ void ParallelExecutor::PolishGraphToSupportDataHarzaeds() const { continue; } - LOG(INFO) << "Link " << it_new->second.DebugString() << " From " - << it_old->second.version_ << " To " - << it_new->second.version_; - for (auto *read_op : read_ops) { // Manually add a dependency var from read_op to write_op; if (read_op == write_op) { @@ -479,7 +531,7 @@ void ParallelExecutor::PolishGraphToSupportDataHarzaeds() const { dep_var->generated_op_ = read_op; read_op->outputs_.emplace_back(dep_var); - dep_var->pending_ops_.emplace_back(write_op); + dep_var->pending_ops_.emplace(write_op); write_op->inputs_.emplace_back(dep_var); member_->dep_vars_.emplace(dep_var); } @@ -572,8 +624,9 @@ void ParallelExecutor::BuildNCCLCommunicator() const { #endif } -std::vector ParallelExecutor::Run( - const std::vector &fetch_tensors) { +void ParallelExecutor::Run(const std::vector &fetch_tensors, + const std::string &fetched_var_name) { + auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); std::unordered_map pending_vars; @@ -602,6 +655,38 @@ std::vector ParallelExecutor::Run( } } + std::unordered_map> fetched_vars; + + for (auto &fetch_var_name : fetch_tensors) { + for (auto &pair : member_->vars_) { + auto it = pair.second.find(fetch_var_name); + if (it != pair.second.end()) { + fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); + } + } + } + + std::vector fetch_ops; + + for (size_t i = 0; i < fetch_tensors.size(); ++i) { + auto &var_name = fetch_tensors[i]; + auto &vars = fetched_vars[var_name]; + fetch_ops.emplace_back(); + FetchOpHandle *op = &fetch_ops.back(); + op->data_ = fetched_data; + op->offset_ = i; + op->local_scopes_ = &member_->local_scopes_; + for (auto &p : member_->places_) { + op->dev_ctx_[p] = this->member_->GetNCCLCtx(p).ctx_.get(); + } + + for (auto *var : vars) { + var->pending_ops_.emplace(op); + op->inputs_.emplace_back(var); + } + pending_ops.insert({op, op->inputs_.size()}); + } + for (auto *op : to_run) { RunOp(pending_vars, op); } @@ -642,7 +727,9 @@ std::vector ParallelExecutor::Run( RunOp(pending_vars, op); } } - return std::vector(); + fetch_ops.clear(); + *member_->global_scope_->Var(fetched_var_name)->GetMutable() = + fetched_data->tensors_; } void ParallelExecutor::RunOp( diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 30416563f8..e4857f0eef 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -40,7 +40,8 @@ class ParallelExecutor { const ProgramDesc& main_program, const std::string& loss_var_name, Scope* scope); - std::vector Run(const std::vector& fetch_tensors); + void Run(const std::vector& fetch_tensors, + const std::string& fetched_var_name = "fetched_var"); private: ParallelExecutorPrivate* member_; diff --git a/paddle/fluid/operators/math/concat.h b/paddle/fluid/operators/math/concat.h index 22147d79e4..c0e983e4aa 100644 --- a/paddle/fluid/operators/math/concat.h +++ b/paddle/fluid/operators/math/concat.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/tensor.h" namespace paddle { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index c2348d9686..929c343f7a 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -508,7 +508,7 @@ All parameter, weight, gradient are variables in Paddle. new (&self) ParallelExecutor(places, params, startup_program, main_program, loss_var_name, scope); }) - .def("run", [](ParallelExecutor &self) { self.Run({}); }); + .def("run", &ParallelExecutor::Run); BindRecordIOWriter(m); return m.ptr(); diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 2a614700b0..1cea14fb96 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -16,6 +16,7 @@ import unittest import paddle.fluid as fluid import paddle.v2 as paddle import paddle.v2.dataset.mnist as mnist +import numpy class ParallelExecutor(unittest.TestCase): @@ -66,4 +67,16 @@ class ParallelExecutor(unittest.TestCase): act_places, set([p.name for p in main.global_block().iter_parameters()]), startup.desc, main.desc, loss.name, fluid.global_scope()) - exe.run() + exe.run([loss.name], 'fetched_var') + + first_loss = numpy.array(fluid.global_scope().find_var('fetched_var') + .get_lod_tensor_array()[0]) + + for i in xrange(10): + exe.run([], 'fetched_var') + exe.run([loss.name], 'fetched_var') + last_loss = numpy.array(fluid.global_scope().find_var('fetched_var') + .get_lod_tensor_array()[0]) + + print first_loss, last_loss + self.assertGreater(first_loss[0], last_loss[0]) From e18a2697054f02d87d1289f7feed1081cf3599c3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 15:08:09 +0800 Subject: [PATCH 023/180] Add debug code --- paddle/fluid/framework/parallel_executor.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index cfaa2dbd1f..b3bf2b8fb6 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -46,6 +46,8 @@ struct VarHandle : public VarHandleBase { return ss.str(); } + // version field currently is not used, however, just store the version to + // debug easily. size_t version_; std::string name_; platform::Place place_; @@ -742,7 +744,7 @@ void ParallelExecutor::RunOp( auto op_run = [ready_buffer, op, this] { try { - // TODO(yy) Check Previous Op has same dev ctx. + VLOG(10) << op->DebugString(); op->Run(); for (auto *ready : ready_buffer) { *ready = true; From 389ea18a4e95f19cfc78cae6fc46d5096a648a91 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 15:13:04 +0800 Subject: [PATCH 024/180] Debug code --- .../tests/unittests/test_parallel_executor.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 1cea14fb96..e8976ff052 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -71,12 +71,13 @@ class ParallelExecutor(unittest.TestCase): first_loss = numpy.array(fluid.global_scope().find_var('fetched_var') .get_lod_tensor_array()[0]) - - for i in xrange(10): - exe.run([], 'fetched_var') - exe.run([loss.name], 'fetched_var') - last_loss = numpy.array(fluid.global_scope().find_var('fetched_var') - .get_lod_tensor_array()[0]) - - print first_loss, last_loss - self.assertGreater(first_loss[0], last_loss[0]) + print first_loss + # + # for i in xrange(10): + # exe.run([], 'fetched_var') + # exe.run([loss.name], 'fetched_var') + # last_loss = numpy.array(fluid.global_scope().find_var('fetched_var') + # .get_lod_tensor_array()[0]) + # + # print first_loss, last_loss + # self.assertGreater(first_loss[0], last_loss[0]) From f8141d90c845c71cda03df10649b0dfc747f2c1a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 15:16:40 +0800 Subject: [PATCH 025/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 1 + .../tests/unittests/test_parallel_executor.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b3bf2b8fb6..c42101e21a 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -345,6 +345,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { } void Wait(platform::DeviceContext *waited_dev) override { + VLOG(3) << "Wait NCCL AllReduce"; this->dev_ctx_.at(waited_dev->GetPlace())->Wait(); } }; diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index e8976ff052..e156d5b60e 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -72,12 +72,12 @@ class ParallelExecutor(unittest.TestCase): first_loss = numpy.array(fluid.global_scope().find_var('fetched_var') .get_lod_tensor_array()[0]) print first_loss - # - # for i in xrange(10): - # exe.run([], 'fetched_var') - # exe.run([loss.name], 'fetched_var') - # last_loss = numpy.array(fluid.global_scope().find_var('fetched_var') - # .get_lod_tensor_array()[0]) - # - # print first_loss, last_loss - # self.assertGreater(first_loss[0], last_loss[0]) + + for i in xrange(10): + exe.run([], 'fetched_var') + exe.run([loss.name], 'fetched_var') + last_loss = numpy.array(fluid.global_scope().find_var('fetched_var') + .get_lod_tensor_array()[0]) + + print first_loss, last_loss + self.assertGreater(first_loss[0], last_loss[0]) From 09935ab936364257f3172f7cc0986a813057ecd0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 15:24:21 +0800 Subject: [PATCH 026/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index c42101e21a..1782430927 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -345,8 +345,9 @@ struct NCCLAllReduceOpHandle : public OpHandle { } void Wait(platform::DeviceContext *waited_dev) override { - VLOG(3) << "Wait NCCL AllReduce"; - this->dev_ctx_.at(waited_dev->GetPlace())->Wait(); + for (auto &pair : member_->communication_streams_) { + pair.second.ctx_->Wait(); + } } }; From 0023c3bcf52c7bde221a32fb898f52a9aac635c2 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 16:29:41 +0800 Subject: [PATCH 027/180] Use atomic bool --- paddle/fluid/framework/parallel_executor.cc | 6 +++--- paddle/fluid/framework/parallel_executor.h | 5 +++-- paddle/fluid/platform/profiler_test.cc | 9 +++++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1782430927..c8dd3f9151 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -633,7 +633,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); - std::unordered_map pending_vars; + std::unordered_map> pending_vars; std::unordered_map pending_ops; for (auto &place_pair : member_->vars_) { @@ -737,9 +737,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } void ParallelExecutor::RunOp( - std::unordered_map &pending_vars, + std::unordered_map> &pending_vars, OpHandle *op) const { - std::vector ready_buffer; + std::vector *> ready_buffer; for (auto *var : op->outputs_) { ready_buffer.emplace_back(&pending_vars[var]); } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index e4857f0eef..c3cebcfc57 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -60,8 +60,9 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - void RunOp(std::unordered_map& pending_vars, - OpHandle* op) const; + void RunOp( + std::unordered_map>& pending_vars, + OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; }; diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index fc77e0f321..366c82bf96 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/profiler.h" +#include "cuda_runtime.h" #include "gtest/gtest.h" TEST(Event, CpuElapsedTime) { @@ -157,3 +158,11 @@ TEST(RecordEvent, RecordEvent) { // Will remove parsing-related code from test later DisableProfiler(EventSortingKey::kTotal, "/tmp/profiler"); } + +TEST(TMP, stream_wait) { + cudaStream_t stream; + cudaStreamCreate(&stream); + cudaStreamSynchronize(stream); + cudaStreamSynchronize(stream); + cudaStreamSynchronize(stream); +} From f52714d391d49230e0cfc630a5fcbb35c06c941a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 16:33:35 +0800 Subject: [PATCH 028/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index c8dd3f9151..1e1a5477a0 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -172,6 +172,10 @@ struct FetchOpHandle : public OpHandle { } void Run() override { + for (auto *input : inputs_) { + input->generated_op_->Wait(nullptr); + } + tensors_.resize(inputs_.size()); auto *var = static_cast(inputs_[0]); auto &var_name = var->name_; From 5957f28b862c154add5bdf1c35b9826d3b77ed39 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 16:39:29 +0800 Subject: [PATCH 029/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1e1a5477a0..5b483849b1 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -714,6 +714,12 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, throw * member_->exception_; } + { + for (auto &pair : pending_vars) { + VLOG(3) << pair.first->DebugString(); + } + } + std::this_thread::yield(); continue; } From 36e0415220312ba9920777f1850d8f18cfa97d36 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 16:59:08 +0800 Subject: [PATCH 030/180] Single Thread --- paddle/fluid/framework/parallel_executor.cc | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 5b483849b1..2898c5ffd9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -714,12 +714,6 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, throw * member_->exception_; } - { - for (auto &pair : pending_vars) { - VLOG(3) << pair.first->DebugString(); - } - } - std::this_thread::yield(); continue; } @@ -768,7 +762,8 @@ void ParallelExecutor::RunOp( } }; - member_->pool_.enqueue(op_run); + op_run(); + // member_->pool_.enqueue(op_run); } } // namespace framework } // namespace paddle From f3e983e49987b32af57e2e7924be8b245041ec4d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 17:08:19 +0800 Subject: [PATCH 031/180] Memory order --- paddle/fluid/framework/parallel_executor.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 2898c5ffd9..875b5d8ba7 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -702,7 +702,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, while (!pending_ops.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { - if (pair.second) { + if (pair.second.load(std::memory_order_acquire)) { ready_var = pair.first; } } @@ -714,7 +714,6 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, throw * member_->exception_; } - std::this_thread::yield(); continue; } @@ -753,7 +752,7 @@ void ParallelExecutor::RunOp( VLOG(10) << op->DebugString(); op->Run(); for (auto *ready : ready_buffer) { - *ready = true; + ready->store(true, std::memory_order_release); } } catch (platform::EnforceNotMet ex) { member_->exception_.reset(new platform::EnforceNotMet(ex)); @@ -762,8 +761,7 @@ void ParallelExecutor::RunOp( } }; - op_run(); - // member_->pool_.enqueue(op_run); + member_->pool_.enqueue(op_run); } } // namespace framework } // namespace paddle From b57b880b055a0eab250e5092eb6a5b3e9b1b9ee3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 17:15:45 +0800 Subject: [PATCH 032/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 875b5d8ba7..b5b1e43abf 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -742,26 +742,29 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, void ParallelExecutor::RunOp( std::unordered_map> &pending_vars, OpHandle *op) const { - std::vector *> ready_buffer; + std::vector *> *ready_buffer = + new std::vector *>(); for (auto *var : op->outputs_) { - ready_buffer.emplace_back(&pending_vars[var]); + ready_buffer->emplace_back(&pending_vars[var]); } auto op_run = [ready_buffer, op, this] { try { VLOG(10) << op->DebugString(); op->Run(); - for (auto *ready : ready_buffer) { + for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); } + delete ready_buffer; } catch (platform::EnforceNotMet ex) { member_->exception_.reset(new platform::EnforceNotMet(ex)); } catch (...) { LOG(FATAL) << "Unknown exception catched"; } }; - + VLOG(3) << "Enqueue"; member_->pool_.enqueue(op_run); + VLOG(3) << "Done"; } } // namespace framework } // namespace paddle From b1cb8bbd405ecb602446da0a6e5822d5b696afbd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 17:20:14 +0800 Subject: [PATCH 033/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b5b1e43abf..a0bd01e0c8 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -700,13 +700,14 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } while (!pending_ops.empty()) { + VLOG(1) << "1"; VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second.load(std::memory_order_acquire)) { ready_var = pair.first; } } - + VLOG(1) << "1"; if (ready_var == nullptr) { // FIXME use conditional var instead of busy wait. @@ -716,11 +717,11 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, continue; } - + VLOG(1) << "1"; pending_vars.erase(ready_var); - + VLOG(1) << "1"; to_run.clear(); - + VLOG(1) << "1"; for (auto *op : ready_var->pending_ops_) { auto &deps = pending_ops[op]; --deps; @@ -728,13 +729,16 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, to_run.emplace_back(op); } } - + VLOG(1) << "1"; for (auto *op : to_run) { pending_ops.erase(op); RunOp(pending_vars, op); } + VLOG(1) << "1"; } + VLOG(1) << "1"; fetch_ops.clear(); + VLOG(1) << "1"; *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; } From 1f063d0900d79c0d09809419d6393bc2ecebbb2b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 17:30:16 +0800 Subject: [PATCH 034/180] Memorder --- paddle/fluid/framework/parallel_executor.cc | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index a0bd01e0c8..7d2ba74086 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -643,14 +643,16 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, for (auto &place_pair : member_->vars_) { for (auto &name_pair : place_pair.second) { for (auto &version_pair : name_pair.second) { - pending_vars[&version_pair.second] = - version_pair.second.generated_op_ == nullptr; + pending_vars[&version_pair.second].store( + version_pair.second.generated_op_ == nullptr, + std::memory_order_relaxed); } } } for (auto &var : member_->dep_vars_) { - pending_vars[var.get()] = var->generated_op_ == nullptr; + pending_vars[var.get()].store(var->generated_op_ == nullptr, + std::memory_order_relaxed); } std::vector to_run; @@ -700,14 +702,12 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } while (!pending_ops.empty()) { - VLOG(1) << "1"; VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second.load(std::memory_order_acquire)) { ready_var = pair.first; } } - VLOG(1) << "1"; if (ready_var == nullptr) { // FIXME use conditional var instead of busy wait. @@ -717,11 +717,8 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, continue; } - VLOG(1) << "1"; pending_vars.erase(ready_var); - VLOG(1) << "1"; to_run.clear(); - VLOG(1) << "1"; for (auto *op : ready_var->pending_ops_) { auto &deps = pending_ops[op]; --deps; @@ -729,16 +726,12 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, to_run.emplace_back(op); } } - VLOG(1) << "1"; for (auto *op : to_run) { pending_ops.erase(op); RunOp(pending_vars, op); } - VLOG(1) << "1"; } - VLOG(1) << "1"; fetch_ops.clear(); - VLOG(1) << "1"; *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; } From 515e516e770e648a6adf41d6aa0bd839b4683007 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 17:36:00 +0800 Subject: [PATCH 035/180] Add more log --- paddle/fluid/framework/parallel_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 7d2ba74086..57dc663c41 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -747,8 +747,9 @@ void ParallelExecutor::RunOp( auto op_run = [ready_buffer, op, this] { try { - VLOG(10) << op->DebugString(); + VLOG(10) << op->DebugString() << " " << this; op->Run(); + VLOG(10) << "Done " << this; for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); } From ea11a0a8533affaa9681d7859713d07eed8fddd8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 18:19:39 +0800 Subject: [PATCH 036/180] Use volitie --- paddle/fluid/framework/parallel_executor.cc | 24 +++++++++++---------- paddle/fluid/framework/parallel_executor.h | 5 ++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 57dc663c41..450df244b7 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -97,6 +97,10 @@ struct ComputationOpHandle : public OpHandle { void Run() override { // Wait other op if necessary + if (platform::is_gpu_place(place_)) { + int dev_id = boost::get(place_).device; + cudaSetDevice(dev_id); + } auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { @@ -637,22 +641,20 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); - std::unordered_map> pending_vars; + std::unordered_map pending_vars; std::unordered_map pending_ops; for (auto &place_pair : member_->vars_) { for (auto &name_pair : place_pair.second) { for (auto &version_pair : name_pair.second) { - pending_vars[&version_pair.second].store( - version_pair.second.generated_op_ == nullptr, - std::memory_order_relaxed); + pending_vars[&version_pair.second] = + version_pair.second.generated_op_ == nullptr; } } } for (auto &var : member_->dep_vars_) { - pending_vars[var.get()].store(var->generated_op_ == nullptr, - std::memory_order_relaxed); + pending_vars[var.get()] = var->generated_op_ == nullptr; } std::vector to_run; @@ -704,7 +706,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, while (!pending_ops.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { - if (pair.second.load(std::memory_order_acquire)) { + if (pair.second) { ready_var = pair.first; } } @@ -737,10 +739,10 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } void ParallelExecutor::RunOp( - std::unordered_map> &pending_vars, + std::unordered_map &pending_vars, OpHandle *op) const { - std::vector *> *ready_buffer = - new std::vector *>(); + std::vector *ready_buffer = + new std::vector(); for (auto *var : op->outputs_) { ready_buffer->emplace_back(&pending_vars[var]); } @@ -751,7 +753,7 @@ void ParallelExecutor::RunOp( op->Run(); VLOG(10) << "Done " << this; for (auto *ready : *ready_buffer) { - ready->store(true, std::memory_order_release); + *ready = true; } delete ready_buffer; } catch (platform::EnforceNotMet ex) { diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index c3cebcfc57..150b429f94 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -60,9 +60,8 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - void RunOp( - std::unordered_map>& pending_vars, - OpHandle* op) const; + void RunOp(std::unordered_map& pending_vars, + OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; }; From a87ce91c4b93561a913a47350043ef6048f29912 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 18:30:15 +0800 Subject: [PATCH 037/180] Use mtx --- paddle/fluid/framework/parallel_executor.cc | 7 +++---- paddle/fluid/framework/parallel_executor.h | 23 ++++++++++++++++++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 450df244b7..773e5c0074 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -641,7 +641,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); - std::unordered_map pending_vars; + std::unordered_map pending_vars; std::unordered_map pending_ops; for (auto &place_pair : member_->vars_) { @@ -739,10 +739,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } void ParallelExecutor::RunOp( - std::unordered_map &pending_vars, + std::unordered_map &pending_vars, OpHandle *op) const { - std::vector *ready_buffer = - new std::vector(); + std::vector *ready_buffer = new std::vector(); for (auto *var : op->outputs_) { ready_buffer->emplace_back(&pending_vars[var]); } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 150b429f94..b6fa6fb2d8 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -32,6 +32,27 @@ class ParallelExecutorPrivate; class VarHandle; class OpHandle; class VarHandleBase; + +struct GuardedBool { + public: + GuardedBool() {} + + operator bool() const { + std::lock_guard g(mtx_); + return value_; + } + + GuardedBool& operator=(bool o) { + std::lock_guard g(mtx_); + value_ = o; + return *this; + } + + private: + mutable std::mutex mtx_; + bool value_; +}; + class ParallelExecutor { public: explicit ParallelExecutor(const std::vector& places, @@ -60,7 +81,7 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - void RunOp(std::unordered_map& pending_vars, + void RunOp(std::unordered_map& pending_vars, OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; From a5ba704de060f3e23eac74fcdc3e635c1cf6c2a7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 18:38:36 +0800 Subject: [PATCH 038/180] Counter --- paddle/fluid/framework/parallel_executor.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 773e5c0074..ab0d9f72f7 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -748,9 +748,9 @@ void ParallelExecutor::RunOp( auto op_run = [ready_buffer, op, this] { try { - VLOG(10) << op->DebugString() << " " << this; + VLOG(10) << op->DebugString() << " " << op; op->Run(); - VLOG(10) << "Done " << this; + VLOG(10) << "Done " << op; for (auto *ready : *ready_buffer) { *ready = true; } @@ -761,9 +761,7 @@ void ParallelExecutor::RunOp( LOG(FATAL) << "Unknown exception catched"; } }; - VLOG(3) << "Enqueue"; member_->pool_.enqueue(op_run); - VLOG(3) << "Done"; } } // namespace framework } // namespace paddle From d3e55fde032c08e45c8cab83204d73a27c99cfc8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 18:40:03 +0800 Subject: [PATCH 039/180] Guard devctx --- paddle/fluid/platform/device_context.cc | 1 + paddle/fluid/platform/device_context.h | 1 + 2 files changed, 2 insertions(+) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 98b4178177..37a77c7ea7 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -159,6 +159,7 @@ CUDADeviceContext::~CUDADeviceContext() { Place CUDADeviceContext::GetPlace() const { return place_; } void CUDADeviceContext::Wait() const { + std::lock_guard guard(this->mutex_); PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); PADDLE_ENFORCE(cudaGetLastError()); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 603b890af1..c43207b641 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -110,6 +110,7 @@ class CUDADeviceContext : public DeviceContext { int compute_capability; int multi_process; int max_threads_per_mp; + mutable std::mutex mutex_; }; template <> From 866f6f1be09bc38a8ed3b51bcfc475b52c07a28a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 18:56:15 +0800 Subject: [PATCH 040/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ab0d9f72f7..08d508d542 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -703,7 +703,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, RunOp(pending_vars, op); } - while (!pending_ops.empty()) { + while (!pending_vars.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second) { @@ -716,6 +716,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, if (member_->exception_) { throw * member_->exception_; } + VLOG(3) << pending_vars.size(); continue; } @@ -748,9 +749,7 @@ void ParallelExecutor::RunOp( auto op_run = [ready_buffer, op, this] { try { - VLOG(10) << op->DebugString() << " " << op; op->Run(); - VLOG(10) << "Done " << op; for (auto *ready : *ready_buffer) { *ready = true; } From 7bff02b2ca6ab5206406bcda10a46448c5f3a71e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:00:34 +0800 Subject: [PATCH 041/180] Change to pending op --- paddle/fluid/framework/parallel_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 08d508d542..ac2c878453 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -703,7 +703,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, RunOp(pending_vars, op); } - while (!pending_vars.empty()) { + while (!pending_ops.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second) { @@ -716,8 +716,8 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, if (member_->exception_) { throw * member_->exception_; } - VLOG(3) << pending_vars.size(); + VLOG(3) << pending_vars.size(); continue; } pending_vars.erase(ready_var); From 5fa535b71785cc2abc58f3e0f76a2e7c73dfd497 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:09:45 +0800 Subject: [PATCH 042/180] Wait all thread done --- paddle/fluid/framework/parallel_executor.cc | 16 ++++++++++++---- paddle/fluid/framework/parallel_executor.h | 7 ++++--- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index ac2c878453..938f4317b1 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -699,8 +699,11 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, pending_ops.insert({op, op->inputs_.size()}); } + std::vector> op_threads; + op_threads.reserve(pending_ops.size() + to_run.size()); + for (auto *op : to_run) { - RunOp(pending_vars, op); + op_threads.emplace_back(RunOp(pending_vars, op)); } while (!pending_ops.empty()) { @@ -731,15 +734,20 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } for (auto *op : to_run) { pending_ops.erase(op); - RunOp(pending_vars, op); + op_threads.emplace_back(RunOp(pending_vars, op)); } } + + for (auto &t : op_threads) { + t.get(); // Join all workers + } + fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; } -void ParallelExecutor::RunOp( +std::future ParallelExecutor::RunOp( std::unordered_map &pending_vars, OpHandle *op) const { std::vector *ready_buffer = new std::vector(); @@ -760,7 +768,7 @@ void ParallelExecutor::RunOp( LOG(FATAL) << "Unknown exception catched"; } }; - member_->pool_.enqueue(op_run); + return member_->pool_.enqueue(op_run); } } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index b6fa6fb2d8..badf7c5ea7 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once +#include #include - #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/program_desc.h" @@ -81,8 +81,9 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - void RunOp(std::unordered_map& pending_vars, - OpHandle* op) const; + std::future RunOp( + std::unordered_map& pending_vars, + OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; }; From c7beac142609c89343ab862d9a3695e0c077d4cf Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:18:01 +0800 Subject: [PATCH 043/180] Add dummy var --- paddle/fluid/framework/parallel_executor.cc | 32 +++++++++++---------- paddle/fluid/framework/parallel_executor.h | 5 ++-- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 938f4317b1..2fb274d3a5 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -53,6 +53,10 @@ struct VarHandle : public VarHandleBase { platform::Place place_; }; +struct DummyVarHandle : public VarHandleBase { + std::string DebugString() const override { return "dummy"; } +}; + struct DependencyVarHandle : public VarHandleBase { std::string DebugString() const override { return "Dependency Variable"; } }; @@ -643,6 +647,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, member_->exception_.reset(); std::unordered_map pending_vars; std::unordered_map pending_ops; + std::vector dummy_vars; for (auto &place_pair : member_->vars_) { for (auto &name_pair : place_pair.second) { @@ -696,17 +701,21 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, var->pending_ops_.emplace(op); op->inputs_.emplace_back(var); } + + dummy_vars.emplace_back(); + auto *var = &dummy_vars.back(); + op->outputs_.emplace_back(var); + var->generated_op_ = op; + pending_vars[var] = false; + pending_ops.insert({op, op->inputs_.size()}); } - std::vector> op_threads; - op_threads.reserve(pending_ops.size() + to_run.size()); - for (auto *op : to_run) { - op_threads.emplace_back(RunOp(pending_vars, op)); + RunOp(pending_vars, op); } - while (!pending_ops.empty()) { + while (!pending_vars.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second) { @@ -715,12 +724,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } if (ready_var == nullptr) { // FIXME use conditional var instead of busy wait. - if (member_->exception_) { throw * member_->exception_; } - - VLOG(3) << pending_vars.size(); continue; } pending_vars.erase(ready_var); @@ -734,20 +740,16 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } for (auto *op : to_run) { pending_ops.erase(op); - op_threads.emplace_back(RunOp(pending_vars, op)); + RunOp(pending_vars, op); } } - for (auto &t : op_threads) { - t.get(); // Join all workers - } - fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; } -std::future ParallelExecutor::RunOp( +void ParallelExecutor::RunOp( std::unordered_map &pending_vars, OpHandle *op) const { std::vector *ready_buffer = new std::vector(); @@ -768,7 +770,7 @@ std::future ParallelExecutor::RunOp( LOG(FATAL) << "Unknown exception catched"; } }; - return member_->pool_.enqueue(op_run); + member_->pool_.enqueue(op_run); } } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index badf7c5ea7..8fe93fb62e 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -81,9 +81,8 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - std::future RunOp( - std::unordered_map& pending_vars, - OpHandle* op) const; + void RunOp(std::unordered_map& pending_vars, + OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; }; From 1f53193a630bc3b6289154dd5f5334a45ddb9285 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:22:03 +0800 Subject: [PATCH 044/180] Use atomic code --- paddle/fluid/framework/parallel_executor.cc | 13 ++++++----- paddle/fluid/framework/parallel_executor.h | 25 +++------------------ 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 2fb274d3a5..fa6763b5b5 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -645,7 +645,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); - std::unordered_map pending_vars; + std::unordered_map> pending_vars; std::unordered_map pending_ops; std::vector dummy_vars; @@ -694,7 +694,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, op->offset_ = i; op->local_scopes_ = &member_->local_scopes_; for (auto &p : member_->places_) { - op->dev_ctx_[p] = this->member_->GetNCCLCtx(p).ctx_.get(); + op->dev_ctx_[p] = member_->GetNCCLCtx(p).ctx_.get(); } for (auto *var : vars) { @@ -718,7 +718,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, while (!pending_vars.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { - if (pair.second) { + if (pair.second.load(std::memory_order_consume)) { ready_var = pair.first; } } @@ -750,9 +750,10 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } void ParallelExecutor::RunOp( - std::unordered_map &pending_vars, + std::unordered_map> &pending_vars, OpHandle *op) const { - std::vector *ready_buffer = new std::vector(); + std::vector *> *ready_buffer = + new std::vector *>(); for (auto *var : op->outputs_) { ready_buffer->emplace_back(&pending_vars[var]); } @@ -761,7 +762,7 @@ void ParallelExecutor::RunOp( try { op->Run(); for (auto *ready : *ready_buffer) { - *ready = true; + ready->store(true, std::memory_order_release); } delete ready_buffer; } catch (platform::EnforceNotMet ex) { diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 8fe93fb62e..03bf60b8bc 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -33,26 +33,6 @@ class VarHandle; class OpHandle; class VarHandleBase; -struct GuardedBool { - public: - GuardedBool() {} - - operator bool() const { - std::lock_guard g(mtx_); - return value_; - } - - GuardedBool& operator=(bool o) { - std::lock_guard g(mtx_); - value_ = o; - return *this; - } - - private: - mutable std::mutex mtx_; - bool value_; -}; - class ParallelExecutor { public: explicit ParallelExecutor(const std::vector& places, @@ -81,8 +61,9 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; - void RunOp(std::unordered_map& pending_vars, - OpHandle* op) const; + void RunOp( + std::unordered_map>& pending_vars, + OpHandle* op) const; void PolishGraphToSupportDataHarzaeds() const; }; From 3aa7051b980c10eb73c591302f379671540042bd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:23:40 +0800 Subject: [PATCH 045/180] Remove DevCtx lock --- paddle/fluid/platform/device_context.cc | 1 - paddle/fluid/platform/device_context.h | 1 - 2 files changed, 2 deletions(-) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 37a77c7ea7..98b4178177 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -159,7 +159,6 @@ CUDADeviceContext::~CUDADeviceContext() { Place CUDADeviceContext::GetPlace() const { return place_; } void CUDADeviceContext::Wait() const { - std::lock_guard guard(this->mutex_); PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); PADDLE_ENFORCE(cudaGetLastError()); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index c43207b641..603b890af1 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -110,7 +110,6 @@ class CUDADeviceContext : public DeviceContext { int compute_capability; int multi_process; int max_threads_per_mp; - mutable std::mutex mutex_; }; template <> From d7badb3ed2d4fdcc42a81dffedf68e131daf5fdb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:33:35 +0800 Subject: [PATCH 046/180] Use event to sync stream --- paddle/fluid/framework/parallel_executor.cc | 30 ++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index fa6763b5b5..6777aec488 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -315,9 +315,21 @@ ncclDataType_t ToNCCLDataType(std::type_index type) { struct NCCLAllReduceOpHandle : public OpHandle { ParallelExecutorPrivate *member_; + std::vector events_; explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) - : member_(member) {} + : member_(member) { + events_.resize(member_->places_.size()); + for (auto &ev : events_) { + cudaEventCreateWithFlags(&ev, cudaEventDisableTiming); + } + } + + ~NCCLAllReduceOpHandle() { + for (auto &ev : events_) { + cudaEventDestroy(ev); + } + } void Run() override { if (this->inputs_.size() == 1) { @@ -350,6 +362,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); + cudaEventRecord(events_[i], nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); @@ -357,8 +370,19 @@ struct NCCLAllReduceOpHandle : public OpHandle { } void Wait(platform::DeviceContext *waited_dev) override { - for (auto &pair : member_->communication_streams_) { - pair.second.ctx_->Wait(); + if (platform::is_cpu_place( + waited_dev->GetPlace())) { // Wait by CPU, just sync stream + for (auto &pair : member_->communication_streams_) { + pair.second.ctx_->Wait(); + } + } else { + if (events_.size() > 1) { + auto stream = + static_cast(waited_dev)->stream(); + for (auto &ev : events_) { + cudaStreamWaitEvent(stream, ev, 0); + } + } } } }; From 29cc9f308d151c23ddbaeef69530f3c7c56a6ce4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:39:13 +0800 Subject: [PATCH 047/180] SetDev for nccl --- paddle/fluid/framework/parallel_executor.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 6777aec488..f7dc833937 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -358,7 +358,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { } auto &nccl_ctx = member_->communication_streams_.at(dev_id); - + cudaSetDevice(dev_id); platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); @@ -519,7 +519,6 @@ void ParallelExecutor::ConstructDependencyGraph( var.name_ = og; var.version_ = vars.size() - 1; op_handle->outputs_.emplace_back(&var); - op_handle->dev_ctx_[p] = member_->CommunicationDevCtx(p); } } From 8af57706e216131937b26ddbd83338883de0d5d1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:44:31 +0800 Subject: [PATCH 048/180] Only wait same device --- paddle/fluid/framework/parallel_executor.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f7dc833937..1d9584939f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -315,19 +315,19 @@ ncclDataType_t ToNCCLDataType(std::type_index type) { struct NCCLAllReduceOpHandle : public OpHandle { ParallelExecutorPrivate *member_; - std::vector events_; + std::unordered_map events_; explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) : member_(member) { - events_.resize(member_->places_.size()); - for (auto &ev : events_) { - cudaEventCreateWithFlags(&ev, cudaEventDisableTiming); + for (auto &nccl : member_->communication_streams_) { + cudaEventCreate(&events_[nccl.second.device_id()], + cudaEventDisableTiming); } } ~NCCLAllReduceOpHandle() { for (auto &ev : events_) { - cudaEventDestroy(ev); + cudaEventDestroy(ev.second); } } @@ -362,7 +362,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); - cudaEventRecord(events_[i], nccl_ctx.stream()); + cudaEventRecord(events_[dev_id], nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); @@ -377,11 +377,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { } } else { if (events_.size() > 1) { + int dev_id = + boost::get(waited_dev->GetPlace()).device; auto stream = static_cast(waited_dev)->stream(); - for (auto &ev : events_) { - cudaStreamWaitEvent(stream, ev, 0); - } + cudaStreamWaitEvent(stream, events_[dev_id], 0); } } } From 071043c388990465531c14a3ec7644fb80204f08 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:47:55 +0800 Subject: [PATCH 049/180] Add paddle enforce --- paddle/fluid/framework/parallel_executor.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1d9584939f..2e13b3c8c1 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -320,14 +320,14 @@ struct NCCLAllReduceOpHandle : public OpHandle { explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) : member_(member) { for (auto &nccl : member_->communication_streams_) { - cudaEventCreate(&events_[nccl.second.device_id()], - cudaEventDisableTiming); + PADDLE_ENFORCE(cudaEventCreate(&events_[nccl.second.device_id()], + cudaEventDisableTiming)); } } ~NCCLAllReduceOpHandle() { for (auto &ev : events_) { - cudaEventDestroy(ev.second); + PADDLE_ENFORCE(cudaEventDestroy(ev.second)); } } @@ -362,7 +362,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); - cudaEventRecord(events_[dev_id], nccl_ctx.stream()); + PADDLE_ENFORCE(cudaEventRecord(events_[dev_id], nccl_ctx.stream())); } platform::dynload::ncclGroupEnd(); @@ -381,7 +381,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { boost::get(waited_dev->GetPlace()).device; auto stream = static_cast(waited_dev)->stream(); - cudaStreamWaitEvent(stream, events_[dev_id], 0); + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, events_[dev_id], 0)); } } } From 9824e8f31160e5a7c6723d58060a9e3d515a684a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 19:55:39 +0800 Subject: [PATCH 050/180] Scale loss op use event --- paddle/fluid/framework/parallel_executor.cc | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 2e13b3c8c1..dc614fc6ba 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -124,12 +124,17 @@ struct ScaleLossGradOpHandle : public OpHandle { float coeff_; Scope *scope_; platform::Place place_; + cudaEvent_t ev_; explicit ScaleLossGradOpHandle(size_t num_dev, Scope *scope, platform::Place place) : coeff_(static_cast(1.0 / num_dev)), scope_(scope), - place_(place) {} + place_(place) { + PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming)); + } + + ~ScaleLossGradOpHandle() { PADDLE_ENFORCE(cudaEventDestroy(ev_)); } void Run() override { std::string var_name = static_cast(this->outputs_[0])->name_; @@ -141,16 +146,23 @@ struct ScaleLossGradOpHandle : public OpHandle { if (platform::is_cpu_place(place_)) { *tmp = coeff_; } else { - memory::Copy( - boost::get(place_), tmp, platform::CPUPlace(), - &coeff_, sizeof(float), + auto stream = static_cast(this->dev_ctx_[place_]) - ->stream()); + ->stream(); + memory::Copy(boost::get(place_), tmp, + platform::CPUPlace(), &coeff_, sizeof(float), stream); + PADDLE_ENFORCE(cudaEventRecord(ev_, stream)); } } void Wait(platform::DeviceContext *waited_dev) override { - this->dev_ctx_.at(place_)->Wait(); + if (platform::is_cpu_place(waited_dev->GetPlace())) { + this->dev_ctx_.at(place_)->Wait(); + } else { + auto stream = + static_cast(waited_dev)->stream(); + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev_, 0)); + } } }; From 4a330094f9f3e090847a287bb4fe707852c45fc3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:04:35 +0800 Subject: [PATCH 051/180] Add log --- paddle/fluid/framework/parallel_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index dc614fc6ba..94c61461c0 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -795,6 +795,7 @@ void ParallelExecutor::RunOp( auto op_run = [ready_buffer, op, this] { try { + VLOG(10) << op->DebugString(); op->Run(); for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); From bade579826d0e6e82b62b6f0b630dbfee35f65d2 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:08:52 +0800 Subject: [PATCH 052/180] Wait code --- paddle/fluid/framework/parallel_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 94c61461c0..bc9035b302 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -193,7 +193,8 @@ struct FetchOpHandle : public OpHandle { void Run() override { for (auto *input : inputs_) { - input->generated_op_->Wait(nullptr); + auto *var = static_cast(input); + var->generated_op_->Wait(this->dev_ctx_[var->place_]); } tensors_.resize(inputs_.size()); From 7fd0d24e0cf185251d861a81eabcda3a37b907fa Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:13:35 +0800 Subject: [PATCH 053/180] Add lgo --- paddle/fluid/framework/parallel_executor.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index bc9035b302..df04cfc461 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -149,9 +149,15 @@ struct ScaleLossGradOpHandle : public OpHandle { auto stream = static_cast(this->dev_ctx_[place_]) ->stream(); + VLOG(3) << "1"; + PADDLE_ENFORCE(cudaGetLastError()); + VLOG(3) << "2"; memory::Copy(boost::get(place_), tmp, platform::CPUPlace(), &coeff_, sizeof(float), stream); + PADDLE_ENFORCE(cudaGetLastError()); + VLOG(3) << "3"; PADDLE_ENFORCE(cudaEventRecord(ev_, stream)); + VLOG(3) << "4"; } } From dad7bdabd42ac2eeef7b3cb004ca64b6ad388cde Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:17:32 +0800 Subject: [PATCH 054/180] Add setDev --- paddle/fluid/framework/parallel_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index df04cfc461..c3a90149a1 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -149,6 +149,7 @@ struct ScaleLossGradOpHandle : public OpHandle { auto stream = static_cast(this->dev_ctx_[place_]) ->stream(); + cudaSetDevice(boost::get(place_).device); VLOG(3) << "1"; PADDLE_ENFORCE(cudaGetLastError()); VLOG(3) << "2"; @@ -163,7 +164,7 @@ struct ScaleLossGradOpHandle : public OpHandle { void Wait(platform::DeviceContext *waited_dev) override { if (platform::is_cpu_place(waited_dev->GetPlace())) { - this->dev_ctx_.at(place_)->Wait(); + dev_ctx_.at(place_)->Wait(); } else { auto stream = static_cast(waited_dev)->stream(); From 932364a27597e141b167694d9ec94e615965cbfc Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:21:50 +0800 Subject: [PATCH 055/180] Sync dev --- paddle/fluid/framework/parallel_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index c3a90149a1..67e7078fbc 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -155,7 +155,7 @@ struct ScaleLossGradOpHandle : public OpHandle { VLOG(3) << "2"; memory::Copy(boost::get(place_), tmp, platform::CPUPlace(), &coeff_, sizeof(float), stream); - PADDLE_ENFORCE(cudaGetLastError()); + PADDLE_ENFORCE(cudaDeviceSynchronize()); VLOG(3) << "3"; PADDLE_ENFORCE(cudaEventRecord(ev_, stream)); VLOG(3) << "4"; From d55a03d916f2a587d5fd9d2eefc750f20813d3b0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:25:00 +0800 Subject: [PATCH 056/180] Scale loss on place --- paddle/fluid/framework/parallel_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 67e7078fbc..21d9fd259c 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -146,6 +146,7 @@ struct ScaleLossGradOpHandle : public OpHandle { if (platform::is_cpu_place(place_)) { *tmp = coeff_; } else { + VLOG(3) << "Scale loss on place" << place_; auto stream = static_cast(this->dev_ctx_[place_]) ->stream(); From d26f093f9d1f5c3a64f42821cb52fda95b4a54c1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:32:02 +0800 Subject: [PATCH 057/180] Log --- paddle/fluid/framework/parallel_executor.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 21d9fd259c..1a2e6a5f86 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -132,9 +132,13 @@ struct ScaleLossGradOpHandle : public OpHandle { scope_(scope), place_(place) { PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming)); + VLOG(3) << "Create " << ev_; } - ~ScaleLossGradOpHandle() { PADDLE_ENFORCE(cudaEventDestroy(ev_)); } + ~ScaleLossGradOpHandle() { + VLOG(3) << "Destroy " << ev_; + PADDLE_ENFORCE(cudaEventDestroy(ev_)); + } void Run() override { std::string var_name = static_cast(this->outputs_[0])->name_; @@ -146,20 +150,13 @@ struct ScaleLossGradOpHandle : public OpHandle { if (platform::is_cpu_place(place_)) { *tmp = coeff_; } else { - VLOG(3) << "Scale loss on place" << place_; auto stream = static_cast(this->dev_ctx_[place_]) ->stream(); cudaSetDevice(boost::get(place_).device); - VLOG(3) << "1"; - PADDLE_ENFORCE(cudaGetLastError()); - VLOG(3) << "2"; memory::Copy(boost::get(place_), tmp, platform::CPUPlace(), &coeff_, sizeof(float), stream); - PADDLE_ENFORCE(cudaDeviceSynchronize()); - VLOG(3) << "3"; PADDLE_ENFORCE(cudaEventRecord(ev_, stream)); - VLOG(3) << "4"; } } From 99f85a9fbc704424ab99a0327d09f49d46f82be0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:35:07 +0800 Subject: [PATCH 058/180] Set dev --- paddle/fluid/framework/parallel_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1a2e6a5f86..b78dc3b8ae 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -131,6 +131,7 @@ struct ScaleLossGradOpHandle : public OpHandle { : coeff_(static_cast(1.0 / num_dev)), scope_(scope), place_(place) { + cudaSetDevice(boost::get(place_).device); PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming)); VLOG(3) << "Create " << ev_; } From b94ffacbd722b752871715a78cee52a151fd5445 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:38:43 +0800 Subject: [PATCH 059/180] SetDev --- paddle/fluid/framework/parallel_executor.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b78dc3b8ae..3a92494e7e 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -132,12 +132,12 @@ struct ScaleLossGradOpHandle : public OpHandle { scope_(scope), place_(place) { cudaSetDevice(boost::get(place_).device); + // Must set device before create event PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming)); - VLOG(3) << "Create " << ev_; } ~ScaleLossGradOpHandle() { - VLOG(3) << "Destroy " << ev_; + cudaSetDevice(boost::get(place_).device); PADDLE_ENFORCE(cudaEventDestroy(ev_)); } @@ -339,13 +339,15 @@ struct NCCLAllReduceOpHandle : public OpHandle { explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) : member_(member) { for (auto &nccl : member_->communication_streams_) { - PADDLE_ENFORCE(cudaEventCreate(&events_[nccl.second.device_id()], - cudaEventDisableTiming)); + int dev_id = nccl.second.device_id(); + cudaSetDevice(dev_id); + PADDLE_ENFORCE(cudaEventCreate(&events_[dev_id], cudaEventDisableTiming)); } } ~NCCLAllReduceOpHandle() { for (auto &ev : events_) { + cudaSetDevice(ev.first); PADDLE_ENFORCE(cudaEventDestroy(ev.second)); } } From ee697b8b5a8522d2cec7e44520c28dfc43054c67 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:44:12 +0800 Subject: [PATCH 060/180] Larger model --- .../tests/unittests/test_parallel_executor.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index e156d5b60e..148f0ce5bb 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -46,12 +46,14 @@ class ParallelExecutor(unittest.TestCase): lod_levels=[0, 0], dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(reader) - hidden = fluid.layers.fc( - img, - size=200, - act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) + hidden = img + for _ in xrange(10): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) prediction = fluid.layers.fc(hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.mean(loss) From 48619bc9817c0df92f63e5cbaa5206f7f6ab983b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:45:50 +0800 Subject: [PATCH 061/180] Shrink model --- python/paddle/fluid/tests/unittests/test_parallel_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 148f0ce5bb..c0ec6442de 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -47,7 +47,7 @@ class ParallelExecutor(unittest.TestCase): dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(reader) hidden = img - for _ in xrange(10): + for _ in xrange(2): hidden = fluid.layers.fc( hidden, size=200, From c372ce2885684f9d4af26e2e894d70c33e5d4cc8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 19 Mar 2018 20:54:55 +0800 Subject: [PATCH 062/180] Add event for computational op --- paddle/fluid/framework/parallel_executor.cc | 26 +++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 3a92494e7e..f841b3b7fa 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -92,12 +92,22 @@ struct ComputationOpHandle : public OpHandle { std::unique_ptr op_; Scope *scope_; platform::Place place_; + cudaEvent_t event_; explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, platform::Place place) : op_(framework::OpRegistry::CreateOp(op_desc)), scope_(scope), - place_(place) {} + place_(place) { + if (platform::is_gpu_place(place)) { + cudaSetDevice(boost::get(place_).device); + cudaEventCreateWithFlags(&event_, cudaEventDisableTiming); + } + } + + ~ComputationOpHandle() { + // FIXME: Destroy Event + } void Run() override { // Wait other op if necessary @@ -113,10 +123,22 @@ struct ComputationOpHandle : public OpHandle { } op_->Run(*scope_, place_); + if (platform::is_gpu_place(place_)) { + auto stream = static_cast(dev_ctx_[place_]) + ->stream(); + PADDLE_ENFORCE(cudaEventRecord(event_, stream)); + } } void Wait(platform::DeviceContext *waited_dev) override { - this->dev_ctx_.at(place_)->Wait(); + if (platform::is_cpu_place(waited_dev->GetPlace()) || + platform::is_cpu_place(place_)) { + this->dev_ctx_.at(place_)->Wait(); + } else { + auto stream = + static_cast(waited_dev)->stream(); + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, event_, 0)); + } } }; From c18c2f6ab01082e14e76fdbcf384f577239bcc0f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:15:06 +0800 Subject: [PATCH 063/180] Sync all computation streams at the end of run --- paddle/fluid/framework/parallel_executor.cc | 12 +++++++++--- paddle/fluid/framework/parallel_executor.h | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f841b3b7fa..0f9bc86972 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -482,7 +482,6 @@ void ParallelExecutor::ConstructDependencyGraph( bool is_forwarding = true; for (auto *op : main_program.Block(0).AllOps()) { bool change_forward = false; - if (!is_forwarding) { // FIXME(yy): Do not hard code like this if (op->OutputArgumentNames().size() == 1 && @@ -573,7 +572,7 @@ void ParallelExecutor::ConstructDependencyGraph( Dependency graph has been constructed. However, there are still data harzaeds need to be handled. */ - PolishGraphToSupportDataHarzaeds(); + PolishGraphToSupportDataHazards(); } /** @@ -583,7 +582,7 @@ void ParallelExecutor::ConstructDependencyGraph( * * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) */ -void ParallelExecutor::PolishGraphToSupportDataHarzaeds() const { +void ParallelExecutor::PolishGraphToSupportDataHazards() const { for (auto &place_pair : member_->vars_) { for (auto &name_pair : place_pair.second) { if (name_pair.second.size() <= 1) { @@ -813,6 +812,13 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; + + // FIXME: + // It could be optimized by using multiple events in an operator. + // Manually sync computation during iter. + for (auto &p : member_->places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } } void ParallelExecutor::RunOp( diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 03bf60b8bc..cb93c0cd41 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -65,7 +65,7 @@ class ParallelExecutor { std::unordered_map>& pending_vars, OpHandle* op) const; - void PolishGraphToSupportDataHarzaeds() const; + void PolishGraphToSupportDataHazards() const; }; } // namespace framework From d3c82c356e806d17d399f152948dee3c8ac169e8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:18:37 +0800 Subject: [PATCH 064/180] Wait multiple stream --- paddle/fluid/framework/parallel_executor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 0f9bc86972..f4f5ab6a6f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -816,6 +816,10 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, // FIXME: // It could be optimized by using multiple events in an operator. // Manually sync computation during iter. + for (auto &s : member_->communication_streams_) { + s.second.ctx_->Wait(); + } + for (auto &p : member_->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } From 3da4159f88e8715abb60f6a8c475b4d59b8f3ef6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:20:56 +0800 Subject: [PATCH 065/180] Add run iter --- paddle/fluid/framework/parallel_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f4f5ab6a6f..1847a4dfa5 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -707,6 +707,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const { void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { + VLOG(3) << "Run iter"; auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); From 4137bb4eda7692b06b986ed7ede8f09ec2f28fb0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:28:40 +0800 Subject: [PATCH 066/180] Add wait --- paddle/fluid/framework/parallel_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1847a4dfa5..d3122353af 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -813,7 +813,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; - + VLOG(3) << "Before Wait"; // FIXME: // It could be optimized by using multiple events in an operator. // Manually sync computation during iter. @@ -824,6 +824,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, for (auto &p : member_->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } + VLOG(3) << "Done wait"; } void ParallelExecutor::RunOp( From d2cb3790e9aecc74cd9915b12346a4c7076f5510 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:38:15 +0800 Subject: [PATCH 067/180] Wait all evernts --- paddle/fluid/framework/parallel_executor.cc | 6 +++--- .../paddle/fluid/tests/unittests/test_parallel_executor.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d3122353af..cb1b080eea 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -420,11 +420,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { } } else { if (events_.size() > 1) { - int dev_id = - boost::get(waited_dev->GetPlace()).device; auto stream = static_cast(waited_dev)->stream(); - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, events_[dev_id], 0)); + for (auto &ev : events_) { + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); + } } } } diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index c0ec6442de..cabb8e769d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -47,7 +47,7 @@ class ParallelExecutor(unittest.TestCase): dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(reader) hidden = img - for _ in xrange(2): + for _ in xrange(4): hidden = fluid.layers.fc( hidden, size=200, @@ -60,7 +60,7 @@ class ParallelExecutor(unittest.TestCase): adam = fluid.optimizer.Adam() adam.minimize(loss) act_places = [] - for each in [fluid.CUDAPlace(0), fluid.CUDAPlace(1)]: + for each in [fluid.CUDAPlace(0)]: p = fluid.core.Place() p.set_place(each) act_places.append(p) From 8a9de67e179bea067302da949e76d36822ccd9dd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:42:27 +0800 Subject: [PATCH 068/180] Remove wait --- paddle/fluid/framework/parallel_executor.cc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index cb1b080eea..409cb3fbb9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -813,18 +813,6 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; - VLOG(3) << "Before Wait"; - // FIXME: - // It could be optimized by using multiple events in an operator. - // Manually sync computation during iter. - for (auto &s : member_->communication_streams_) { - s.second.ctx_->Wait(); - } - - for (auto &p : member_->places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); - } - VLOG(3) << "Done wait"; } void ParallelExecutor::RunOp( From 3238ce06727d1daadfd5c93c12b7e9073f75e695 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:47:01 +0800 Subject: [PATCH 069/180] Add wait --- paddle/fluid/framework/parallel_executor.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 409cb3fbb9..6408ecdd37 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -813,6 +813,16 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, fetch_ops.clear(); *member_->global_scope_->Var(fetched_var_name)->GetMutable() = fetched_data->tensors_; + // FIXME: + // It could be optimized by using multiple events in an operator. + // Manually sync computation during iter. + for (auto &s : member_->communication_streams_) { + s.second.ctx_->Wait(); + } + + for (auto &p : member_->places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } } void ParallelExecutor::RunOp( From e025e284c662ccab9089359eadb07637ae32f19a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 12:56:03 +0800 Subject: [PATCH 070/180] Exchange wait op --- paddle/fluid/framework/parallel_executor.cc | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 6408ecdd37..07dfddfa30 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -810,19 +810,13 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } - fetch_ops.clear(); - *member_->global_scope_->Var(fetched_var_name)->GetMutable() = - fetched_data->tensors_; - // FIXME: - // It could be optimized by using multiple events in an operator. - // Manually sync computation during iter. - for (auto &s : member_->communication_streams_) { - s.second.ctx_->Wait(); - } - for (auto &p : member_->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } + + fetch_ops.clear(); + *member_->global_scope_->Var(fetched_var_name)->GetMutable() = + fetched_data->tensors_; } void ParallelExecutor::RunOp( From 260cfe3b865d48a09ff903bb1f7816d1d055da73 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 13:08:46 +0800 Subject: [PATCH 071/180] Stop Wait NCCL Stream --- paddle/fluid/framework/parallel_executor.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 07dfddfa30..d0c4d8dd8b 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -211,9 +211,6 @@ struct FetchOpHandle : public OpHandle { for (auto *input_var : inputs_) { input_var->pending_ops_.erase(this); } - for (auto &pair : dev_ctx_) { - pair.second->Wait(); - } // Lazily merge tensors. Will faster code. MergeTensors(); From feb569f8ea9808dadce26e9ebdad43d9a7e67587 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 14:59:12 +0800 Subject: [PATCH 072/180] Add log --- paddle/fluid/framework/parallel_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d0c4d8dd8b..f9fc35d8ce 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -376,7 +376,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { return; // No need to all reduce when GPU count = 1; } else { auto &var_name = static_cast(this->inputs_[0])->name_; - + VLOG(3) << "Invoke NCCL AllReduce"; int dtype = -1; size_t numel = 0; From 9b1f4d5d621d2d0d24f884c4afde8e974fd9ed9c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 15:31:57 +0800 Subject: [PATCH 073/180] After nccl add event --- paddle/fluid/framework/parallel_executor.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f9fc35d8ce..21a19cb5b2 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -402,10 +402,13 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); - PADDLE_ENFORCE(cudaEventRecord(events_[dev_id], nccl_ctx.stream())); } - platform::dynload::ncclGroupEnd(); + + for (auto &ev : events_) { + PADDLE_ENFORCE(cudaEventRecord( + ev.second, member_->communication_streams_.at(ev.first).stream())); + } } } From 631aa3d10a33a1fbb52f9c6ec0ebd5022b80ede7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 15:38:26 +0800 Subject: [PATCH 074/180] Wait all inputs ready --- paddle/fluid/framework/parallel_executor.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 21a19cb5b2..248a1b4a25 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -375,6 +375,12 @@ struct NCCLAllReduceOpHandle : public OpHandle { if (this->inputs_.size() == 1) { return; // No need to all reduce when GPU count = 1; } else { + // Wait input done + for (auto *in : inputs_) { + auto &p = static_cast(in)->place_; + in->generated_op_->Wait(dev_ctx_[p]); + } + auto &var_name = static_cast(this->inputs_[0])->name_; VLOG(3) << "Invoke NCCL AllReduce"; int dtype = -1; From 4185dd48e4bc506d7a579e8b1ed95d1b65336698 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 15:59:05 +0800 Subject: [PATCH 075/180] Disable multi-thread --- paddle/fluid/framework/parallel_executor.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 248a1b4a25..25f8d7afde 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -84,8 +84,8 @@ struct OpHandle { virtual ~OpHandle() {} - virtual void Run() { PADDLE_THROW("Not implemented"); } - virtual void Wait(platform::DeviceContext *waited_dev) {} + virtual void Run() = 0; + virtual void Wait(platform::DeviceContext *waited_dev) = 0; }; struct ComputationOpHandle : public OpHandle { @@ -382,7 +382,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { } auto &var_name = static_cast(this->inputs_[0])->name_; - VLOG(3) << "Invoke NCCL AllReduce"; int dtype = -1; size_t numel = 0; @@ -848,7 +847,8 @@ void ParallelExecutor::RunOp( LOG(FATAL) << "Unknown exception catched"; } }; - member_->pool_.enqueue(op_run); + op_run(); + // member_->pool_.enqueue(op_run); } } // namespace framework } // namespace paddle From 1dd216dc3b7a293bcecda34da00ad1ef8ca6f192 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:04:20 +0800 Subject: [PATCH 076/180] Wait bcast param --- paddle/fluid/framework/parallel_executor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 25f8d7afde..66ad3f33d9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -690,6 +690,10 @@ void ParallelExecutor::BCastParamsToGPUs( } platform::dynload::ncclGroupEnd(); } + + for (auto &stream : member_->communication_streams_) { + stream.second.ctx_->Wait(); + } } #else PADDLE_THROW("Not compiled with CUDA"); From f251a58e852503054eaba612665733b6d34bb7e9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:28:09 +0800 Subject: [PATCH 077/180] Use base class manage events --- paddle/fluid/framework/parallel_executor.cc | 156 ++++++++------------ 1 file changed, 60 insertions(+), 96 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 66ad3f33d9..335a063c4b 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -68,6 +68,8 @@ struct OpHandle { platform::PlaceHash> dev_ctx_; + std::unordered_map events_; + std::string DebugString() { std::stringstream ss; ss << "("; @@ -84,32 +86,57 @@ struct OpHandle { virtual ~OpHandle() {} - virtual void Run() = 0; - virtual void Wait(platform::DeviceContext *waited_dev) = 0; + void Run() { + if (events_.empty()) { + for (auto &p : dev_ctx_) { + int dev_id = boost::get(p.first).device; + cudaSetDevice(dev_id); + cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming); + } + } + + RunImpl(); + + for (auto &p : dev_ctx_) { + int dev_id = boost::get(p.first).device; + auto stream = + static_cast(p.second)->stream(); + cudaEventRecord(events_.at(dev_id), stream); + } + } + + virtual void Wait(platform::DeviceContext *waited_dev) { + if (platform::is_cpu_place(waited_dev->GetPlace())) { + for (auto &dev_ctx : dev_ctx_) { + dev_ctx.second->Wait(); + } + } else { + auto stream = + static_cast(waited_dev)->stream(); + + for (auto &ev : events_) { + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); + } + } + } + + protected: + virtual void RunImpl() = 0; }; struct ComputationOpHandle : public OpHandle { std::unique_ptr op_; Scope *scope_; platform::Place place_; - cudaEvent_t event_; explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, platform::Place place) : op_(framework::OpRegistry::CreateOp(op_desc)), scope_(scope), - place_(place) { - if (platform::is_gpu_place(place)) { - cudaSetDevice(boost::get(place_).device); - cudaEventCreateWithFlags(&event_, cudaEventDisableTiming); - } - } - - ~ComputationOpHandle() { - // FIXME: Destroy Event - } + place_(place) {} - void Run() override { + protected: + void RunImpl() override { // Wait other op if necessary if (platform::is_gpu_place(place_)) { int dev_id = boost::get(place_).device; @@ -123,22 +150,6 @@ struct ComputationOpHandle : public OpHandle { } op_->Run(*scope_, place_); - if (platform::is_gpu_place(place_)) { - auto stream = static_cast(dev_ctx_[place_]) - ->stream(); - PADDLE_ENFORCE(cudaEventRecord(event_, stream)); - } - } - - void Wait(platform::DeviceContext *waited_dev) override { - if (platform::is_cpu_place(waited_dev->GetPlace()) || - platform::is_cpu_place(place_)) { - this->dev_ctx_.at(place_)->Wait(); - } else { - auto stream = - static_cast(waited_dev)->stream(); - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, event_, 0)); - } } }; @@ -146,7 +157,6 @@ struct ScaleLossGradOpHandle : public OpHandle { float coeff_; Scope *scope_; platform::Place place_; - cudaEvent_t ev_; explicit ScaleLossGradOpHandle(size_t num_dev, Scope *scope, platform::Place place) @@ -154,16 +164,14 @@ struct ScaleLossGradOpHandle : public OpHandle { scope_(scope), place_(place) { cudaSetDevice(boost::get(place_).device); - // Must set device before create event - PADDLE_ENFORCE(cudaEventCreateWithFlags(&ev_, cudaEventDisableTiming)); } ~ScaleLossGradOpHandle() { cudaSetDevice(boost::get(place_).device); - PADDLE_ENFORCE(cudaEventDestroy(ev_)); } - void Run() override { + protected: + void RunImpl() override { std::string var_name = static_cast(this->outputs_[0])->name_; float *tmp = scope_->FindVar(var_name) @@ -176,20 +184,8 @@ struct ScaleLossGradOpHandle : public OpHandle { auto stream = static_cast(this->dev_ctx_[place_]) ->stream(); - cudaSetDevice(boost::get(place_).device); memory::Copy(boost::get(place_), tmp, platform::CPUPlace(), &coeff_, sizeof(float), stream); - PADDLE_ENFORCE(cudaEventRecord(ev_, stream)); - } - } - - void Wait(platform::DeviceContext *waited_dev) override { - if (platform::is_cpu_place(waited_dev->GetPlace())) { - dev_ctx_.at(place_)->Wait(); - } else { - auto stream = - static_cast(waited_dev)->stream(); - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev_, 0)); } } }; @@ -216,7 +212,12 @@ struct FetchOpHandle : public OpHandle { MergeTensors(); } - void Run() override { + void Wait(platform::DeviceContext *waited_dev) override { + PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); + } + + protected: + void RunImpl() override { for (auto *input : inputs_) { auto *var = static_cast(input); var->generated_op_->Wait(this->dev_ctx_[var->place_]); @@ -240,10 +241,6 @@ struct FetchOpHandle : public OpHandle { } } - void Wait(platform::DeviceContext *waited_dev) override { - PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); - } - private: void MergeTensors() const { std::vector tensors_ptr; @@ -256,8 +253,8 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads = 12) - : pool_(num_threads) {} + explicit ParallelExecutorPrivate(size_t num_threads = 0) + : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; @@ -333,7 +330,7 @@ class ParallelExecutorPrivate { std::vector> ops_; // Use a simpler thread pool, might be faster. - ThreadPool pool_; + std::unique_ptr pool_; std::unique_ptr exception_; }; @@ -353,25 +350,12 @@ ncclDataType_t ToNCCLDataType(std::type_index type) { struct NCCLAllReduceOpHandle : public OpHandle { ParallelExecutorPrivate *member_; - std::unordered_map events_; explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) - : member_(member) { - for (auto &nccl : member_->communication_streams_) { - int dev_id = nccl.second.device_id(); - cudaSetDevice(dev_id); - PADDLE_ENFORCE(cudaEventCreate(&events_[dev_id], cudaEventDisableTiming)); - } - } + : member_(member) {} - ~NCCLAllReduceOpHandle() { - for (auto &ev : events_) { - cudaSetDevice(ev.first); - PADDLE_ENFORCE(cudaEventDestroy(ev.second)); - } - } - - void Run() override { + protected: + void RunImpl() override { if (this->inputs_.size() == 1) { return; // No need to all reduce when GPU count = 1; } else { @@ -403,34 +387,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { } auto &nccl_ctx = member_->communication_streams_.at(dev_id); - cudaSetDevice(dev_id); platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); - - for (auto &ev : events_) { - PADDLE_ENFORCE(cudaEventRecord( - ev.second, member_->communication_streams_.at(ev.first).stream())); - } - } - } - - void Wait(platform::DeviceContext *waited_dev) override { - if (platform::is_cpu_place( - waited_dev->GetPlace())) { // Wait by CPU, just sync stream - for (auto &pair : member_->communication_streams_) { - pair.second.ctx_->Wait(); - } - } else { - if (events_.size() > 1) { - auto stream = - static_cast(waited_dev)->stream(); - for (auto &ev : events_) { - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); - } - } } } }; @@ -851,8 +812,11 @@ void ParallelExecutor::RunOp( LOG(FATAL) << "Unknown exception catched"; } }; - op_run(); - // member_->pool_.enqueue(op_run); + if (member_->pool_) { + member_->pool_->enqueue(op_run); + } else { + op_run(); + } } } // namespace framework } // namespace paddle From ca4b3d25326d0c1f910a1b68e883eac17b1dc143 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:37:50 +0800 Subject: [PATCH 078/180] Use 12 threads --- paddle/fluid/framework/parallel_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 335a063c4b..344587897f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -253,7 +253,7 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads = 0) + explicit ParallelExecutorPrivate(size_t num_threads = 12) : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; From 7643c2cbab8d9efb7b0dbb96d1d418abedd7d043 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:43:53 +0800 Subject: [PATCH 079/180] Add flag for use event --- paddle/fluid/framework/parallel_executor.cc | 29 ++++++++++++--------- paddle/fluid/framework/parallel_executor.h | 1 + 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 344587897f..121302880c 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -86,8 +86,8 @@ struct OpHandle { virtual ~OpHandle() {} - void Run() { - if (events_.empty()) { + void Run(bool use_event) { + if (events_.empty() && use_event) { for (auto &p : dev_ctx_) { int dev_id = boost::get(p.first).device; cudaSetDevice(dev_id); @@ -97,16 +97,18 @@ struct OpHandle { RunImpl(); - for (auto &p : dev_ctx_) { - int dev_id = boost::get(p.first).device; - auto stream = - static_cast(p.second)->stream(); - cudaEventRecord(events_.at(dev_id), stream); + if (use_event) { + for (auto &p : dev_ctx_) { + int dev_id = boost::get(p.first).device; + auto stream = + static_cast(p.second)->stream(); + cudaEventRecord(events_.at(dev_id), stream); + } } } virtual void Wait(platform::DeviceContext *waited_dev) { - if (platform::is_cpu_place(waited_dev->GetPlace())) { + if (platform::is_cpu_place(waited_dev->GetPlace()) && events_.empty()) { for (auto &dev_ctx : dev_ctx_) { dev_ctx.second->Wait(); } @@ -677,7 +679,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const { void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { - VLOG(3) << "Run iter"; + bool use_event = false; auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); @@ -748,7 +750,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } for (auto *op : to_run) { - RunOp(pending_vars, op); + RunOp(use_event, pending_vars, op); } while (!pending_vars.empty()) { @@ -776,7 +778,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } for (auto *op : to_run) { pending_ops.erase(op); - RunOp(pending_vars, op); + RunOp(use_event, pending_vars, op); } } @@ -790,6 +792,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } void ParallelExecutor::RunOp( + bool use_event, std::unordered_map> &pending_vars, OpHandle *op) const { std::vector *> *ready_buffer = @@ -798,10 +801,10 @@ void ParallelExecutor::RunOp( ready_buffer->emplace_back(&pending_vars[var]); } - auto op_run = [ready_buffer, op, this] { + auto op_run = [ready_buffer, op, this, use_event] { try { VLOG(10) << op->DebugString(); - op->Run(); + op->Run(use_event); for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index cb93c0cd41..2345bffcc7 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -62,6 +62,7 @@ class ParallelExecutor { void BuildNCCLCommunicator() const; void RunOp( + bool use_event, std::unordered_map>& pending_vars, OpHandle* op) const; From fbbcedda01656e8e2183b2e88d5db2dbd2b08c7a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:46:55 +0800 Subject: [PATCH 080/180] Fix bug --- paddle/fluid/framework/parallel_executor.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 121302880c..2a1652f749 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -108,14 +108,13 @@ struct OpHandle { } virtual void Wait(platform::DeviceContext *waited_dev) { - if (platform::is_cpu_place(waited_dev->GetPlace()) && events_.empty()) { + if (platform::is_cpu_place(waited_dev->GetPlace()) || events_.empty()) { for (auto &dev_ctx : dev_ctx_) { dev_ctx.second->Wait(); } } else { auto stream = static_cast(waited_dev)->stream(); - for (auto &ev : events_) { PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); } From f8f1a963d9508cbdbd37c61554e8ffac9bf4a6ab Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:52:20 +0800 Subject: [PATCH 081/180] Add debug code --- paddle/fluid/framework/parallel_executor.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 2a1652f749..d1652a3030 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -365,6 +365,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &p = static_cast(in)->place_; in->generated_op_->Wait(dev_ctx_[p]); } + PADDLE_ENFORCE(cudaDeviceSynchronize()); auto &var_name = static_cast(this->inputs_[0])->name_; int dtype = -1; @@ -393,6 +394,8 @@ struct NCCLAllReduceOpHandle : public OpHandle { nccl_ctx.comm, nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); + + PADDLE_ENFORCE(cudaDeviceSynchronize()); } } }; From 3c9cea597e1e3075f8b56d0c7d11febe1a384033 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 16:58:37 +0800 Subject: [PATCH 082/180] Add more log --- paddle/fluid/framework/parallel_executor.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d1652a3030..24a9dcacf2 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -365,6 +365,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &p = static_cast(in)->place_; in->generated_op_->Wait(dev_ctx_[p]); } + VLOG(3) << "Before NCCL"; PADDLE_ENFORCE(cudaDeviceSynchronize()); auto &var_name = static_cast(this->inputs_[0])->name_; @@ -394,8 +395,9 @@ struct NCCLAllReduceOpHandle : public OpHandle { nccl_ctx.comm, nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); - PADDLE_ENFORCE(cudaDeviceSynchronize()); + + VLOG(3) << "After NCCL"; } } }; From a8bd7b9809a1953396b7f985e6154e42b13b82e6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 17:03:13 +0800 Subject: [PATCH 083/180] Add log --- paddle/fluid/framework/parallel_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 24a9dcacf2..e0b75b2342 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -109,6 +109,7 @@ struct OpHandle { virtual void Wait(platform::DeviceContext *waited_dev) { if (platform::is_cpu_place(waited_dev->GetPlace()) || events_.empty()) { + VLOG(4) << "I am here"; for (auto &dev_ctx : dev_ctx_) { dev_ctx.second->Wait(); } From e53b6aba63a1635b137a57b15410f2eeda180e8e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 17:06:41 +0800 Subject: [PATCH 084/180] Use no thread --- paddle/fluid/framework/parallel_executor.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index e0b75b2342..31a49575f1 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -109,7 +109,6 @@ struct OpHandle { virtual void Wait(platform::DeviceContext *waited_dev) { if (platform::is_cpu_place(waited_dev->GetPlace()) || events_.empty()) { - VLOG(4) << "I am here"; for (auto &dev_ctx : dev_ctx_) { dev_ctx.second->Wait(); } @@ -255,7 +254,7 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads = 12) + explicit ParallelExecutorPrivate(size_t num_threads = 0) : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; @@ -397,8 +396,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { } platform::dynload::ncclGroupEnd(); PADDLE_ENFORCE(cudaDeviceSynchronize()); - - VLOG(3) << "After NCCL"; } } }; From dbed1233823b081071752275bbc770125d08fff0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 17:08:53 +0800 Subject: [PATCH 085/180] Debug --- paddle/fluid/framework/parallel_executor.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 31a49575f1..d3e846d10d 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -365,8 +365,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &p = static_cast(in)->place_; in->generated_op_->Wait(dev_ctx_[p]); } - VLOG(3) << "Before NCCL"; - PADDLE_ENFORCE(cudaDeviceSynchronize()); auto &var_name = static_cast(this->inputs_[0])->name_; int dtype = -1; @@ -395,7 +393,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { nccl_ctx.comm, nccl_ctx.stream()); } platform::dynload::ncclGroupEnd(); - PADDLE_ENFORCE(cudaDeviceSynchronize()); } } }; From 4e43b713779971d681b8d224b336bfb29abb67e2 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 17:13:00 +0800 Subject: [PATCH 086/180] Add wait log --- paddle/fluid/framework/parallel_executor.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d3e846d10d..8630e51d0d 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -146,6 +146,7 @@ struct ComputationOpHandle : public OpHandle { auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { + VLOG(3) << "Wait " << in->generated_op_->DebugString(); in->generated_op_->Wait(cur_ctx); } } @@ -163,13 +164,9 @@ struct ScaleLossGradOpHandle : public OpHandle { platform::Place place) : coeff_(static_cast(1.0 / num_dev)), scope_(scope), - place_(place) { - cudaSetDevice(boost::get(place_).device); - } + place_(place) {} - ~ScaleLossGradOpHandle() { - cudaSetDevice(boost::get(place_).device); - } + ~ScaleLossGradOpHandle() {} protected: void RunImpl() override { From a0494f8e5548aa0b6493e7205fd890cf3c24df83 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 17:16:06 +0800 Subject: [PATCH 087/180] Mutex lock wait --- paddle/fluid/platform/device_context.cc | 1 + paddle/fluid/platform/device_context.h | 1 + 2 files changed, 2 insertions(+) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 98b4178177..ab02a95f26 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -159,6 +159,7 @@ CUDADeviceContext::~CUDADeviceContext() { Place CUDADeviceContext::GetPlace() const { return place_; } void CUDADeviceContext::Wait() const { + std::lock_guard guard(mutex_); PADDLE_ENFORCE(cudaStreamSynchronize(stream_)); PADDLE_ENFORCE(cudaGetLastError()); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 603b890af1..df0a427b48 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -103,6 +103,7 @@ class CUDADeviceContext : public DeviceContext { std::unique_ptr eigen_device_; std::unique_ptr eigen_stream_; + mutable std::mutex mutex_; cudaStream_t stream_; cudnnHandle_t cudnn_handle_; cublasHandle_t cublas_handle_; From 1c2b6100b05f99bf8351c3a1124a42e1a3cd83c1 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 17:16:36 +0800 Subject: [PATCH 088/180] Add --- paddle/fluid/framework/parallel_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 8630e51d0d..aa52cbb7bf 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -251,7 +251,7 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads = 0) + explicit ParallelExecutorPrivate(size_t num_threads = 12) : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; From 798e6907b42a8f60b730d99033a0d5715a6698df Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:00:06 +0800 Subject: [PATCH 089/180] Change mem order --- paddle/fluid/framework/parallel_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index aa52cbb7bf..b869097662 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -752,7 +752,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, while (!pending_vars.empty()) { VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { - if (pair.second.load(std::memory_order_consume)) { + if (pair.second.load(std::memory_order_acquire)) { ready_var = pair.first; } } From 95a0d7c7c14f5df4e4a455de76d30b905ee0df22 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:05:56 +0800 Subject: [PATCH 090/180] Illegal memory access --- paddle/fluid/framework/parallel_executor.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b869097662..daa19eb17c 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -138,15 +138,9 @@ struct ComputationOpHandle : public OpHandle { protected: void RunImpl() override { - // Wait other op if necessary - if (platform::is_gpu_place(place_)) { - int dev_id = boost::get(place_).device; - cudaSetDevice(dev_id); - } auto *cur_ctx = dev_ctx_[place_]; for (auto *in : inputs_) { if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { - VLOG(3) << "Wait " << in->generated_op_->DebugString(); in->generated_op_->Wait(cur_ctx); } } From ed7727e8f04c215f4ff77f486e46347efe0ad3cd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:17:13 +0800 Subject: [PATCH 091/180] Fix bug in system allocator --- paddle/fluid/memory/detail/system_allocator.cc | 11 +++++++++++ paddle/fluid/memory/detail/system_allocator.h | 3 +++ paddle/fluid/memory/memory.cc | 2 +- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index 8ac8978120..9949d80434 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -79,7 +79,18 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { // if size is 0. We just make sure it does. if (size <= 0) return nullptr; void* p; + int prev_id; + cudaGetDevice(&prev_id); + if (prev_id != gpu_id_) { + cudaSetDevice(gpu_id_); + } + cudaError_t result = cudaMalloc(&p, size); + + if (prev_id != gpu_id_) { + cudaSetDevice(prev_id); + } + if (result == cudaSuccess) { index = 0; gpu_alloc_size_ += size; diff --git a/paddle/fluid/memory/detail/system_allocator.h b/paddle/fluid/memory/detail/system_allocator.h index e93c2c1e32..c103d08640 100644 --- a/paddle/fluid/memory/detail/system_allocator.h +++ b/paddle/fluid/memory/detail/system_allocator.h @@ -43,6 +43,8 @@ class CPUAllocator : public SystemAllocator { #ifdef PADDLE_WITH_CUDA class GPUAllocator : public SystemAllocator { public: + explicit GPUAllocator(int gpu_id) : gpu_id_(gpu_id) {} + virtual void* Alloc(size_t& index, size_t size); virtual void Free(void* p, size_t size, size_t index); virtual bool UseGpu() const; @@ -50,6 +52,7 @@ class GPUAllocator : public SystemAllocator { private: size_t gpu_alloc_size_ = 0; size_t fallback_alloc_size_ = 0; + int gpu_id_; }; #endif diff --git a/paddle/fluid/memory/memory.cc b/paddle/fluid/memory/memory.cc index d07f89439a..1985f1f4e6 100644 --- a/paddle/fluid/memory/memory.cc +++ b/paddle/fluid/memory/memory.cc @@ -69,7 +69,7 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { } platform::SetDeviceId(gpu_id); if (!as[gpu_id]) { - as[gpu_id] = new BuddyAllocator(new detail::GPUAllocator, + as[gpu_id] = new BuddyAllocator(new detail::GPUAllocator(gpu_id), platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); VLOG(10) << "\n\nNOTE: each GPU device use " From 176277b824ec0c8fad774b731dff176c30ce17cd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:26:28 +0800 Subject: [PATCH 092/180] Add log --- paddle/fluid/memory/memory.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/memory/memory.cc b/paddle/fluid/memory/memory.cc index 1985f1f4e6..a12cdd45aa 100644 --- a/paddle/fluid/memory/memory.cc +++ b/paddle/fluid/memory/memory.cc @@ -90,6 +90,7 @@ size_t Used(platform::CUDAPlace place) { template <> void* Alloc(platform::CUDAPlace place, size_t size) { auto* buddy_allocator = GetGPUBuddyAllocator(place.device); + VLOG(30) << "Allocating " << size << " bytes on " << place; auto* ptr = buddy_allocator->Alloc(size); if (ptr == nullptr) { int cur_dev = platform::GetCurrentDeviceId(); From 1533bf12dfa057bc7e34be540a391cb83d4dc9bb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:38:02 +0800 Subject: [PATCH 093/180] Use event and single thread --- paddle/fluid/framework/parallel_executor.cc | 4 ++-- paddle/fluid/memory/memory.cc | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index daa19eb17c..f1b8a20e41 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -245,7 +245,7 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads = 12) + explicit ParallelExecutorPrivate(size_t num_threads = 0) : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; @@ -669,7 +669,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const { void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { - bool use_event = false; + bool use_event = true; auto fetched_data = std::make_shared(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); diff --git a/paddle/fluid/memory/memory.cc b/paddle/fluid/memory/memory.cc index a12cdd45aa..1985f1f4e6 100644 --- a/paddle/fluid/memory/memory.cc +++ b/paddle/fluid/memory/memory.cc @@ -90,7 +90,6 @@ size_t Used(platform::CUDAPlace place) { template <> void* Alloc(platform::CUDAPlace place, size_t size) { auto* buddy_allocator = GetGPUBuddyAllocator(place.device); - VLOG(30) << "Allocating " << size << " bytes on " << place; auto* ptr = buddy_allocator->Alloc(size); if (ptr == nullptr) { int cur_dev = platform::GetCurrentDeviceId(); From ba227df9419bbb2f8b3ac5636674c176cced3f19 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:41:57 +0800 Subject: [PATCH 094/180] Expose num_threads --- paddle/fluid/framework/parallel_executor.cc | 6 +++--- paddle/fluid/framework/parallel_executor.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index f1b8a20e41..bbfaac7339 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -245,7 +245,7 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads = 0) + explicit ParallelExecutorPrivate(size_t num_threads) : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; @@ -389,11 +389,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { }; ParallelExecutor::ParallelExecutor( - const std::vector &places, + size_t num_threads, const std::vector &places, const std::unordered_set ¶ms, const ProgramDesc &startup_program, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope) - : member_(new ParallelExecutorPrivate()) { + : member_(new ParallelExecutorPrivate(num_threads)) { member_->places_ = places; member_->global_scope_ = scope; // Step 1. RunStartupProgram and Bcast the params to devs. diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 2345bffcc7..c206e726a7 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -35,7 +35,8 @@ class VarHandleBase; class ParallelExecutor { public: - explicit ParallelExecutor(const std::vector& places, + explicit ParallelExecutor(size_t num_threads, + const std::vector& places, const std::unordered_set& params, const ProgramDesc& startup_program, const ProgramDesc& main_program, From d42117e7422facdbffbd77d3f5b2841fe6ad5ed9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:42:40 +0800 Subject: [PATCH 095/180] Set NumThreads --- paddle/fluid/pybind/pybind.cc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 929c343f7a..60662244cc 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -498,16 +498,17 @@ All parameter, weight, gradient are variables in Paddle. m.def("reset_profiler", platform::ResetProfiler); py::class_(m, "ParallelExecutor") - .def( - "__init__", - [](ParallelExecutor &self, const std::vector &places, - const std::unordered_set ¶ms, - const ProgramDesc &startup_program, - const ProgramDesc &main_program, const std::string &loss_var_name, - Scope *scope) { - new (&self) ParallelExecutor(places, params, startup_program, - main_program, loss_var_name, scope); - }) + .def("__init__", + [](ParallelExecutor &self, size_t num_threads, + const std::vector &places, + const std::unordered_set ¶ms, + const ProgramDesc &startup_program, + const ProgramDesc &main_program, const std::string &loss_var_name, + Scope *scope) { + new (&self) + ParallelExecutor(num_threads, places, params, startup_program, + main_program, loss_var_name, scope); + }) .def("run", &ParallelExecutor::Run); BindRecordIOWriter(m); From 65bc7d17d52741cd124a00444bf063195e4f9c5d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 18:46:20 +0800 Subject: [PATCH 096/180] Add mtx to ncclAllReduce --- paddle/fluid/framework/parallel_executor.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index bbfaac7339..d61f1438a6 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -340,6 +340,8 @@ ncclDataType_t ToNCCLDataType(std::type_index type) { } } +static std::mutex g_nccl_mtx_; + struct NCCLAllReduceOpHandle : public OpHandle { ParallelExecutorPrivate *member_; @@ -361,6 +363,8 @@ struct NCCLAllReduceOpHandle : public OpHandle { int dtype = -1; size_t numel = 0; + std::lock_guard g(g_nccl_mtx_); + platform::dynload::ncclGroupStart(); for (size_t i = 0; i < member_->local_scopes_.size(); ++i) { From eb0a580e78da1418e66358278fc2270b6406ef80 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 19:08:44 +0800 Subject: [PATCH 097/180] Add enforce --- paddle/fluid/framework/parallel_executor.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d61f1438a6..b8751662c3 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -246,7 +246,7 @@ struct FetchOpHandle : public OpHandle { class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(size_t num_threads) - : pool_(num_threads == 0 ? nullptr : new ThreadPool(num_threads)) {} + : pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; @@ -365,7 +365,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { std::lock_guard g(g_nccl_mtx_); - platform::dynload::ncclGroupStart(); + PADDLE_ENFORCE(platform::dynload::ncclGroupStart()); for (size_t i = 0; i < member_->local_scopes_.size(); ++i) { auto &p = member_->places_[i]; @@ -383,11 +383,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { } auto &nccl_ctx = member_->communication_streams_.at(dev_id); - platform::dynload::ncclAllReduce( + PADDLE_ENFORCE(platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, - nccl_ctx.comm, nccl_ctx.stream()); + nccl_ctx.comm, nccl_ctx.stream())); } - platform::dynload::ncclGroupEnd(); + PADDLE_ENFORCE(platform::dynload::ncclGroupEnd()); } } }; From 82693e72273599da5a0ffc8e21790665279d4a4b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 19:14:27 +0800 Subject: [PATCH 098/180] Wait nccl all reduce --- paddle/fluid/framework/parallel_executor.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b8751662c3..8ee2e57324 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -348,6 +348,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) : member_(member) {} + void Wait(platform::DeviceContext *waited_dev) override { + VLOG(3) << "Wait nccl all reduce op"; + OpHandle::Wait(waited_dev); + } + protected: void RunImpl() override { if (this->inputs_.size() == 1) { @@ -381,7 +386,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { if (numel == 0) { numel = static_cast(lod_tensor.numel()); } - auto &nccl_ctx = member_->communication_streams_.at(dev_id); PADDLE_ENFORCE(platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, From e335f01826143452c8733495f02a60f7d668d3c7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 19:20:37 +0800 Subject: [PATCH 099/180] Add more logs --- paddle/fluid/framework/parallel_executor.cc | 54 ++++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 8ee2e57324..82df86bebd 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -125,30 +125,6 @@ struct OpHandle { virtual void RunImpl() = 0; }; -struct ComputationOpHandle : public OpHandle { - std::unique_ptr op_; - Scope *scope_; - platform::Place place_; - - explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, - platform::Place place) - : op_(framework::OpRegistry::CreateOp(op_desc)), - scope_(scope), - place_(place) {} - - protected: - void RunImpl() override { - auto *cur_ctx = dev_ctx_[place_]; - for (auto *in : inputs_) { - if (in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx) { - in->generated_op_->Wait(cur_ctx); - } - } - - op_->Run(*scope_, place_); - } -}; - struct ScaleLossGradOpHandle : public OpHandle { float coeff_; Scope *scope_; @@ -396,6 +372,36 @@ struct NCCLAllReduceOpHandle : public OpHandle { } }; +struct ComputationOpHandle : public OpHandle { + std::unique_ptr op_; + Scope *scope_; + platform::Place place_; + + explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, + platform::Place place) + : op_(framework::OpRegistry::CreateOp(op_desc)), + scope_(scope), + place_(place) {} + + protected: + void RunImpl() override { + auto *cur_ctx = dev_ctx_[place_]; + for (auto *in : inputs_) { + bool need_wait = + in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx; + if (dynamic_cast(in->generated_op_)) { + VLOG(3) << "Input is nccl all reduce, need to wait" << need_wait; + } + + if (need_wait) { + in->generated_op_->Wait(cur_ctx); + } + } + + op_->Run(*scope_, place_); + } +}; + ParallelExecutor::ParallelExecutor( size_t num_threads, const std::vector &places, const std::unordered_set ¶ms, From 43e54079a89a31a3970989b34178391a2120f0e8 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 19:32:35 +0800 Subject: [PATCH 100/180] Debug code --- paddle/fluid/framework/parallel_executor.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 82df86bebd..382e13451f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -545,6 +545,13 @@ void ParallelExecutor::ConstructDependencyGraph( harzaeds need to be handled. */ PolishGraphToSupportDataHazards(); + + for (auto &g : grads) { + LOG(INFO) << member_->vars_.begin() + ->second[g] + .rbegin() + ->second.pending_ops_.size(); + } } /** From 599f7a87ba6f87b42141f16b06ca28721a6982e9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 20 Mar 2018 19:34:38 +0800 Subject: [PATCH 101/180] Refine code --- paddle/fluid/framework/parallel_executor.cc | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 382e13451f..c008da9493 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -389,10 +389,6 @@ struct ComputationOpHandle : public OpHandle { for (auto *in : inputs_) { bool need_wait = in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx; - if (dynamic_cast(in->generated_op_)) { - VLOG(3) << "Input is nccl all reduce, need to wait" << need_wait; - } - if (need_wait) { in->generated_op_->Wait(cur_ctx); } @@ -545,13 +541,6 @@ void ParallelExecutor::ConstructDependencyGraph( harzaeds need to be handled. */ PolishGraphToSupportDataHazards(); - - for (auto &g : grads) { - LOG(INFO) << member_->vars_.begin() - ->second[g] - .rbegin() - ->second.pending_ops_.size(); - } } /** From 7ac969b88c53ab7e6bc345f20033f6e0fbd934dd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 11:33:09 +0800 Subject: [PATCH 102/180] Debug * add Check align * Make FetchData not shared_ptr * Remove FetchData * Wait & Fetch Data --- paddle/fluid/framework/parallel_executor.cc | 55 +++++++++++---------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index c008da9493..8d8004fc6d 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "lod_tensor.h" #include "lod_tensor_array.h" #include "op_registry.h" +#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/operators/math/concat.h" namespace paddle { @@ -158,15 +159,8 @@ struct ScaleLossGradOpHandle : public OpHandle { } }; -struct FetchedData { - public: - std::vector tensors_; - - explicit FetchedData(size_t num_fetched) { tensors_.resize(num_fetched); } -}; - struct FetchOpHandle : public OpHandle { - std::shared_ptr data_; + FeedFetchList *data_; size_t offset_; std::vector *local_scopes_; std::vector tensors_; @@ -175,15 +169,26 @@ struct FetchOpHandle : public OpHandle { for (auto *input_var : inputs_) { input_var->pending_ops_.erase(this); } - - // Lazily merge tensors. Will faster code. - MergeTensors(); } void Wait(platform::DeviceContext *waited_dev) override { PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); } + void WaitAndMergeCPUTensors() const { + // Wait fetch stream done. + for (auto &ctx : dev_ctx_) { + ctx.second->Wait(); + } + + std::vector tensors_ptr; + tensors_ptr.reserve(tensors_.size()); + for (auto &t : tensors_) { + tensors_ptr.emplace_back(&t); + } + data_->at(offset_).MergeLoDTensor(tensors_ptr, platform::CPUPlace()); + } + protected: void RunImpl() override { for (auto *input : inputs_) { @@ -208,15 +213,6 @@ struct FetchOpHandle : public OpHandle { } } } - - private: - void MergeTensors() const { - std::vector tensors_ptr; - for (auto &t : tensors_) { - tensors_ptr.emplace_back(&t); - } - data_->tensors_[offset_].MergeLoDTensor(tensors_ptr, platform::CPUPlace()); - } }; class ParallelExecutorPrivate { @@ -325,7 +321,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { : member_(member) {} void Wait(platform::DeviceContext *waited_dev) override { - VLOG(3) << "Wait nccl all reduce op"; OpHandle::Wait(waited_dev); } @@ -355,6 +350,11 @@ struct NCCLAllReduceOpHandle : public OpHandle { auto &lod_tensor = s->FindVar(var_name)->Get(); void *buffer = const_cast(lod_tensor.data()); + uintptr_t buf = reinterpret_cast(buffer); + if (buf % sizeof(float) != 0) { + VLOG(3) << "Buffer is not aligned " << buf; + } + if (dtype == -1) { dtype = ToNCCLDataType(lod_tensor.type()); } @@ -680,7 +680,7 @@ void ParallelExecutor::BuildNCCLCommunicator() const { void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { bool use_event = true; - auto fetched_data = std::make_shared(fetch_tensors.size()); + FeedFetchList fetched_data(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); std::unordered_map> pending_vars; @@ -728,7 +728,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, auto &vars = fetched_vars[var_name]; fetch_ops.emplace_back(); FetchOpHandle *op = &fetch_ops.back(); - op->data_ = fetched_data; + op->data_ = &fetched_data; op->offset_ = i; op->local_scopes_ = &member_->local_scopes_; for (auto &p : member_->places_) { @@ -786,9 +786,12 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, platform::DeviceContextPool::Instance().Get(p)->Wait(); } - fetch_ops.clear(); - *member_->global_scope_->Var(fetched_var_name)->GetMutable() = - fetched_data->tensors_; + for (auto &fetch_op : fetch_ops) { + fetch_op.WaitAndMergeCPUTensors(); + } + + *member_->global_scope_->Var(fetched_var_name)->GetMutable() = + fetched_data; } void ParallelExecutor::RunOp( From 90f980167d8b2f706e1c1cba98eb1bbc5356eec3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 11:35:03 +0800 Subject: [PATCH 103/180] Do not wait computation stream --- paddle/fluid/framework/parallel_executor.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 8d8004fc6d..fce1bf4724 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -782,10 +782,6 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } - for (auto &p : member_->places_) { - platform::DeviceContextPool::Instance().Get(p)->Wait(); - } - for (auto &fetch_op : fetch_ops) { fetch_op.WaitAndMergeCPUTensors(); } From 99fe83a0200af9054457ebb677a46b02627011bc Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 12:23:55 +0800 Subject: [PATCH 104/180] Move nccl helper --- paddle/fluid/framework/parallel_executor.cc | 18 ++-------- paddle/fluid/platform/nccl_helper.h | 37 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 paddle/fluid/platform/nccl_helper.h diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index fce1bf4724..991a0c8238 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "op_registry.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/operators/math/concat.h" +#include "paddle/fluid/platform/nccl_helper.h" namespace paddle { namespace framework { @@ -299,19 +300,6 @@ class ParallelExecutorPrivate { std::unique_ptr exception_; }; -// TODO(yy): Move this function somewhere -ncclDataType_t ToNCCLDataType(std::type_index type) { - if (type == typeid(float)) { // NOLINT - return ncclFloat; - } else if (type == typeid(double)) { // NOLINT - return ncclDouble; - } else if (type == typeid(int)) { // NOLINT - return ncclInt; - } else { - PADDLE_THROW("Not supported"); - } -} - static std::mutex g_nccl_mtx_; struct NCCLAllReduceOpHandle : public OpHandle { @@ -356,7 +344,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { } if (dtype == -1) { - dtype = ToNCCLDataType(lod_tensor.type()); + dtype = platform::ToNCCLDataType(lod_tensor.type()); } if (numel == 0) { @@ -629,7 +617,7 @@ void ParallelExecutor::BCastParamsToGPUs( if (var_desc->GetType() == proto::VarType::LOD_TENSOR) { auto &main_tensor = main_scope->FindVar(var_desc->Name())->Get(); - ncclDataType_t data_type = ToNCCLDataType(main_tensor.type()); + ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type()); auto &dims = main_tensor.dims(); size_t numel = main_tensor.numel(); diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h new file mode 100644 index 0000000000..e20f99bc6b --- /dev/null +++ b/paddle/fluid/platform/nccl_helper.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/platform/dynload/nccl.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace platform { + +inline ncclDataType_t ToNCCLDataType(std::type_index type) { + if (type == typeid(float)) { // NOLINT + return ncclFloat; + } else if (type == typeid(double)) { // NOLINT + return ncclDouble; + } else if (type == typeid(int)) { // NOLINT + return ncclInt; + } else { + PADDLE_THROW("Not supported"); + } +} + +} // namespace platform +} // namespace paddle From 41ad63234181e2c6dcec464db51c08270c18ac3c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 12:35:39 +0800 Subject: [PATCH 105/180] Add NCCL Group Guard --- paddle/fluid/framework/parallel_executor.cc | 7 +------ paddle/fluid/platform/nccl_helper.h | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 991a0c8238..1823cefe42 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -300,8 +300,6 @@ class ParallelExecutorPrivate { std::unique_ptr exception_; }; -static std::mutex g_nccl_mtx_; - struct NCCLAllReduceOpHandle : public OpHandle { ParallelExecutorPrivate *member_; @@ -327,9 +325,7 @@ struct NCCLAllReduceOpHandle : public OpHandle { int dtype = -1; size_t numel = 0; - std::lock_guard g(g_nccl_mtx_); - - PADDLE_ENFORCE(platform::dynload::ncclGroupStart()); + platform::NCCLGroupGuard guard; for (size_t i = 0; i < member_->local_scopes_.size(); ++i) { auto &p = member_->places_[i]; @@ -355,7 +351,6 @@ struct NCCLAllReduceOpHandle : public OpHandle { buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm, nccl_ctx.stream())); } - PADDLE_ENFORCE(platform::dynload::ncclGroupEnd()); } } }; diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index e20f99bc6b..cceceda8ad 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/enforce.h" @@ -33,5 +34,24 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) { } } +class NCCLGroupGuard { + public: + inline NCCLGroupGuard() { + mutex().lock(); + PADDLE_ENFORCE(dynload::ncclGroupStart()); + } + + inline ~NCCLGroupGuard() { + PADDLE_ENFORCE(dynload::ncclGroupEnd()); + mutex().unlock(); + } + + private: + static std::mutex& mutex() { + static std::mutex mtx; + return mtx; + } +}; + } // namespace platform } // namespace paddle From f2685bed81d492e13e471b16fefd31ce834962e9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 12:38:42 +0800 Subject: [PATCH 106/180] Clean code --- paddle/fluid/framework/parallel_executor.cc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 1823cefe42..d06613b573 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -25,12 +25,6 @@ limitations under the License. */ namespace paddle { namespace framework { -#ifdef PADDLE_WITH_CUDA - -// FIXME: CHECK the return value of x; -#define NCCL_INVOKE(x) x -#endif - struct OpHandle; struct VarHandleBase { @@ -59,10 +53,6 @@ struct DummyVarHandle : public VarHandleBase { std::string DebugString() const override { return "dummy"; } }; -struct DependencyVarHandle : public VarHandleBase { - std::string DebugString() const override { return "Dependency Variable"; } -}; - struct OpHandle { std::vector inputs_; std::vector outputs_; @@ -252,7 +242,7 @@ class ParallelExecutorPrivate { devs.push_back(boost::get(p).device); } - NCCL_INVOKE(platform::dynload::ncclCommInitAll( + PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( &comms[0], static_cast(contexts.size()), &devs[0])); int i = 0; @@ -558,7 +548,7 @@ void ParallelExecutor::PolishGraphToSupportDataHazards() const { continue; } - auto *dep_var = new DependencyVarHandle(); + auto *dep_var = new DummyVarHandle(); dep_var->generated_op_ = read_op; read_op->outputs_.emplace_back(dep_var); From a478a11e0b381c19bc392efd85d016dfaa62df22 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 12:43:23 +0800 Subject: [PATCH 107/180] NCCL Guard for bcast --- paddle/fluid/framework/parallel_executor.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d06613b573..a5221d03d6 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -606,7 +606,7 @@ void ParallelExecutor::BCastParamsToGPUs( auto &dims = main_tensor.dims(); size_t numel = main_tensor.numel(); - platform::dynload::ncclGroupStart(); + platform::NCCLGroupGuard guard; for (size_t i = 0; i < member_->places_.size(); ++i) { auto place = member_->places_[i]; @@ -624,7 +624,6 @@ void ParallelExecutor::BCastParamsToGPUs( platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm, nccl_ctx.stream()); } - platform::dynload::ncclGroupEnd(); } for (auto &stream : member_->communication_streams_) { From 6ebc6bf5337bb7b30c379bb242d00ae15f53ee82 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 13:41:58 +0800 Subject: [PATCH 108/180] ReorganizeCode --- paddle/fluid/framework/CMakeLists.txt | 3 +- paddle/fluid/framework/details/CMakeLists.txt | 1 + paddle/fluid/framework/details/var_handle.cc | 32 +++ paddle/fluid/framework/details/var_handle.h | 66 +++++ paddle/fluid/framework/parallel_executor.cc | 268 +++++++----------- paddle/fluid/framework/parallel_executor.h | 14 - paddle/fluid/platform/nccl_helper.h | 36 ++- 7 files changed, 244 insertions(+), 176 deletions(-) create mode 100644 paddle/fluid/framework/details/CMakeLists.txt create mode 100644 paddle/fluid/framework/details/var_handle.cc create mode 100644 paddle/fluid/framework/details/var_handle.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 6522a7a69f..9d2dc29028 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(details) # ddim lib proto_library(framework_proto SRCS framework.proto) @@ -87,7 +88,7 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table feed_fetch_method executor simple_threadpool concat) + framework_proto backward glog lod_rank_table feed_fetch_method executor simple_threadpool var_handle) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt new file mode 100644 index 0000000000..5074715e2e --- /dev/null +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(var_handle SRCS var_handle.cc DEPS place) diff --git a/paddle/fluid/framework/details/var_handle.cc b/paddle/fluid/framework/details/var_handle.cc new file mode 100644 index 0000000000..6f00abd947 --- /dev/null +++ b/paddle/fluid/framework/details/var_handle.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/var_handle.h" + +namespace paddle { +namespace framework { +namespace details { + +VarHandleBase::~VarHandleBase() {} + +std::string VarHandle::DebugString() const { + std::stringstream ss; + ss << name_ << ":" << place_; + return ss.str(); +} + +std::string DummyVarHandle::DebugString() const { return "dummy"; } +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/var_handle.h b/paddle/fluid/framework/details/var_handle.h new file mode 100644 index 0000000000..613ff901b1 --- /dev/null +++ b/paddle/fluid/framework/details/var_handle.h @@ -0,0 +1,66 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include + +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace framework { + +struct OpHandleBase; + +namespace details { + +// VarHandleBase is the var node in the dependency graph. +// A variable can only be generated by a single operator. i.e. +// This is a single assignment graph. +struct VarHandleBase { + virtual ~VarHandleBase(); + virtual std::string DebugString() const = 0; + + // The operator who generate this variable. nullptr if the variable + // is a root node. + OpHandleBase *generated_op_; + + // Operators which depend on this variable ready. + std::unordered_set pending_ops_; +}; + +// VarHandle is actually a single version of Runtime Variable. +// Variable in Runtime mapped to many VarHandles in Graph. +// Each assignment will generate a new var handle with newer version. +// +// NOTE: runtime variables have place. +struct VarHandle : public VarHandleBase { + std::string DebugString() const override; + + // version field currently is not used, however, just store the version to + // debug easily. + size_t version_; + std::string name_; + platform::Place place_; +}; + +// Dummy Variable. It is used to represent dependencies between operators +struct DummyVarHandle : public VarHandleBase { + std::string DebugString() const override; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index a5221d03d6..2b094eba1e 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "lod_tensor.h" #include "lod_tensor_array.h" #include "op_registry.h" +#include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/operators/math/concat.h" #include "paddle/fluid/platform/nccl_helper.h" @@ -25,35 +26,11 @@ limitations under the License. */ namespace paddle { namespace framework { -struct OpHandle; +using details::DummyVarHandle; +using details::VarHandle; +using details::VarHandleBase; -struct VarHandleBase { - virtual ~VarHandleBase() {} - virtual std::string DebugString() const = 0; - - OpHandle *generated_op_; - std::unordered_set pending_ops_; -}; - -struct VarHandle : public VarHandleBase { - std::string DebugString() const override { - std::stringstream ss; - ss << name_ << ":" << place_; - return ss.str(); - } - - // version field currently is not used, however, just store the version to - // debug easily. - size_t version_; - std::string name_; - platform::Place place_; -}; - -struct DummyVarHandle : public VarHandleBase { - std::string DebugString() const override { return "dummy"; } -}; - -struct OpHandle { +struct OpHandleBase { std::vector inputs_; std::vector outputs_; std::unordered_map *local_scopes_; @@ -216,51 +193,13 @@ class ParallelExecutorPrivate { std::vector local_scopes_; Scope *global_scope_; -#ifdef PADDLE_WITH_CUDA - struct NCCLContext { - std::unique_ptr ctx_; - ncclComm_t comm; - - explicit NCCLContext(int dev_id) { - ctx_.reset(new platform::CUDADeviceContext(platform::CUDAPlace(dev_id))); - } - - cudaStream_t stream() const { return ctx_->stream(); } - - int device_id() const { - return boost::get(ctx_->GetPlace()).device; - } - - static void InitNCCLContext(std::unordered_map &contexts, - const std::vector &places) { - std::vector comms; - std::vector devs; - comms.resize(contexts.size()); - devs.reserve(contexts.size()); - - for (auto &p : places) { - devs.push_back(boost::get(p).device); - } - - PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( - &comms[0], static_cast(contexts.size()), &devs[0])); - - int i = 0; - for (auto &dev_id : devs) { - contexts.at(dev_id).comm = comms[i++]; - } - } - }; - - std::unordered_map communication_streams_; + std::unordered_map communication_streams_; - NCCLContext &GetNCCLCtx(platform::Place p) { + platform::NCCLContext &GetNCCLCtx(platform::Place p) { int dev_id = boost::get(p).device; return communication_streams_.at(dev_id); } -#endif - platform::DeviceContext *CommunicationDevCtx(const platform::Place &place) { if (platform::is_cpu_place(place) || local_scopes_.size() == 1) { return const_cast( @@ -282,27 +221,95 @@ class ParallelExecutorPrivate { vars_; std::unordered_set> dep_vars_; - std::vector> ops_; + std::vector> ops_; // Use a simpler thread pool, might be faster. std::unique_ptr pool_; std::unique_ptr exception_; -}; -struct NCCLAllReduceOpHandle : public OpHandle { - ParallelExecutorPrivate *member_; + VarHandle *GetVarHandle(const std::string &each_var_name, + const platform::Place &place) { + auto &var_holders = vars_[place]; + auto &var_holder = var_holders[each_var_name]; + VarHandle *var = nullptr; + if (var_holder.empty()) { + auto &init_var = var_holder[0]; + init_var.place_ = place; + init_var.name_ = each_var_name; + init_var.generated_op_ = nullptr; + init_var.version_ = 0; + var = &init_var; + } else { + var = &var_holder.rbegin()->second; + } + return var; + } - explicit NCCLAllReduceOpHandle(ParallelExecutorPrivate *member) - : member_(member) {} + void RunOp( + bool use_event, + std::unordered_map> &pending_vars, + OpHandleBase *op) { + std::vector *> *ready_buffer = + new std::vector *>(); + for (auto *var : op->outputs_) { + ready_buffer->emplace_back(&pending_vars[var]); + } + + auto op_run = [ready_buffer, op, this, use_event] { + try { + VLOG(10) << op->DebugString(); + op->Run(use_event); + for (auto *ready : *ready_buffer) { + ready->store(true, std::memory_order_release); + } + delete ready_buffer; + } catch (platform::EnforceNotMet ex) { + exception_.reset(new platform::EnforceNotMet(ex)); + } catch (...) { + LOG(FATAL) << "Unknown exception catched"; + } + }; + if (pool_) { + pool_->enqueue(op_run); + } else { + op_run(); + } + } + + void GenerateVar(OpHandleBase *op_handle, const std::string &each_var_name, + const platform::Place &place) { + auto &vars = vars_[place][each_var_name]; + size_t version = vars.size(); + auto &var = vars[version]; + var.version_ = version; + var.generated_op_ = op_handle; + var.name_ = each_var_name; + var.place_ = place; + op_handle->outputs_.emplace_back(&var); + } +}; // namespace framework + +struct NCCLAllReduceOpHandle : public OpHandleBase { + const std::vector &local_scopes_; + const std::vector &places_; + const std::unordered_map &communication_ctxs_; + + explicit NCCLAllReduceOpHandle( + const std::vector &local_scopes, + const std::vector &places, + const std::unordered_map &ctxs) + : local_scopes_(local_scopes), + places_(places), + communication_ctxs_(ctxs) {} void Wait(platform::DeviceContext *waited_dev) override { - OpHandle::Wait(waited_dev); + OpHandleBase::Wait(waited_dev); } protected: void RunImpl() override { - if (this->inputs_.size() == 1) { + if (inputs_.size() == 1) { return; // No need to all reduce when GPU count = 1; } else { // Wait input done @@ -317,9 +324,9 @@ struct NCCLAllReduceOpHandle : public OpHandle { platform::NCCLGroupGuard guard; - for (size_t i = 0; i < member_->local_scopes_.size(); ++i) { - auto &p = member_->places_[i]; - auto *s = member_->local_scopes_[i]; + for (size_t i = 0; i < local_scopes_.size(); ++i) { + auto &p = places_[i]; + auto *s = local_scopes_[i]; int dev_id = boost::get(p).device; auto &lod_tensor = s->FindVar(var_name)->Get(); @@ -336,16 +343,16 @@ struct NCCLAllReduceOpHandle : public OpHandle { if (numel == 0) { numel = static_cast(lod_tensor.numel()); } - auto &nccl_ctx = member_->communication_streams_.at(dev_id); + auto &nccl_ctx = communication_ctxs_.at(dev_id); PADDLE_ENFORCE(platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, - nccl_ctx.comm, nccl_ctx.stream())); + nccl_ctx.comm_, nccl_ctx.stream())); } } } }; -struct ComputationOpHandle : public OpHandle { +struct ComputationOpHandle : public OpHandleBase { std::unique_ptr op_; Scope *scope_; platform::Place place_; @@ -443,14 +450,14 @@ void ParallelExecutor::ConstructDependencyGraph( auto var_names = op->InputArgumentNames(); for (auto &each_var_name : var_names) { - VarHandle *var = GetVarHandle(each_var_name, p); + VarHandle *var = member_->GetVarHandle(each_var_name, p); op_handle->inputs_.emplace_back(var); var->pending_ops_.emplace(op_handle); } var_names = op->OutputArgumentNames(); for (auto &each_var_name : var_names) { - GenerateVar(op_handle, each_var_name, p); + member_->GenerateVar(op_handle, each_var_name, p); } if (is_forwarding) { @@ -468,7 +475,7 @@ void ParallelExecutor::ConstructDependencyGraph( // loss->pending_ops_.emplace_back(op_handle); // op_handle->inputs_.emplace_back(loss); - GenerateVar(op_handle, loss_var_name + "@GRAD", p); + member_->GenerateVar(op_handle, loss_var_name + "@GRAD", p); change_forward = true; } } @@ -483,7 +490,9 @@ void ParallelExecutor::ConstructDependencyGraph( for (auto &og : var_names) { if (grads.count(og) != 0) { // is param grad // Insert NCCL AllReduce Op - member_->ops_.emplace_back(new NCCLAllReduceOpHandle(member_)); + member_->ops_.emplace_back(new NCCLAllReduceOpHandle( + member_->local_scopes_, member_->places_, + member_->communication_streams_)); auto *op_handle = member_->ops_.back().get(); for (size_t i = 0; i < member_->places_.size(); ++i) { @@ -562,37 +571,6 @@ void ParallelExecutor::PolishGraphToSupportDataHazards() const { } } -void ParallelExecutor::GenerateVar(OpHandle *op_handle, - const std::string &each_var_name, - const platform::Place &place) const { - auto &vars = member_->vars_[place][each_var_name]; - size_t version = vars.size(); - auto &var = vars[version]; - var.version_ = version; - var.generated_op_ = op_handle; - var.name_ = each_var_name; - var.place_ = place; - op_handle->outputs_.emplace_back(&var); -} - -VarHandle *ParallelExecutor::GetVarHandle(const std::string &each_var_name, - const platform::Place &place) const { - auto &var_holders = member_->vars_[place]; - auto &var_holder = var_holders[each_var_name]; - VarHandle *var = nullptr; - if (var_holder.empty()) { - auto &init_var = var_holder[0]; - init_var.place_ = place; - init_var.name_ = each_var_name; - init_var.generated_op_ = nullptr; - init_var.version_ = 0; - var = &init_var; - } else { - var = &var_holder.rbegin()->second; - } - return var; -} - void ParallelExecutor::BCastParamsToGPUs( const ProgramDesc &startup_program) const { #ifdef PADDLE_WITH_CUDA @@ -621,8 +599,8 @@ void ParallelExecutor::BCastParamsToGPUs( } auto &nccl_ctx = member_->GetNCCLCtx(place); - platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm, - nccl_ctx.stream()); + platform::dynload::ncclBcast(buffer, numel, data_type, 0, + nccl_ctx.comm_, nccl_ctx.stream()); } } @@ -640,12 +618,12 @@ void ParallelExecutor::BuildNCCLCommunicator() const { for (auto &place : member_->places_) { int dev_id = boost::get(place).device; - member_->communication_streams_.emplace( - dev_id, ParallelExecutorPrivate::NCCLContext(dev_id)); + member_->communication_streams_.emplace(dev_id, + platform::NCCLContext(dev_id)); } - ParallelExecutorPrivate::NCCLContext::InitNCCLContext( - member_->communication_streams_, member_->places_); + platform::NCCLContext::InitNCCLContext(member_->communication_streams_, + member_->places_); #endif } @@ -656,7 +634,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, // Version --> VarHandle member_->exception_.reset(); std::unordered_map> pending_vars; - std::unordered_map pending_ops; + std::unordered_map pending_ops; std::vector dummy_vars; for (auto &place_pair : member_->vars_) { @@ -672,7 +650,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, pending_vars[var.get()] = var->generated_op_ == nullptr; } - std::vector to_run; + std::vector to_run; for (auto &op : member_->ops_) { if (op->inputs_.empty()) { // Special case, Op has no input. @@ -722,7 +700,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } for (auto *op : to_run) { - RunOp(use_event, pending_vars, op); + member_->RunOp(use_event, pending_vars, op); } while (!pending_vars.empty()) { @@ -750,7 +728,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } for (auto *op : to_run) { pending_ops.erase(op); - RunOp(use_event, pending_vars, op); + member_->RunOp(use_event, pending_vars, op); } } @@ -762,35 +740,5 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, fetched_data; } -void ParallelExecutor::RunOp( - bool use_event, - std::unordered_map> &pending_vars, - OpHandle *op) const { - std::vector *> *ready_buffer = - new std::vector *>(); - for (auto *var : op->outputs_) { - ready_buffer->emplace_back(&pending_vars[var]); - } - - auto op_run = [ready_buffer, op, this, use_event] { - try { - VLOG(10) << op->DebugString(); - op->Run(use_event); - for (auto *ready : *ready_buffer) { - ready->store(true, std::memory_order_release); - } - delete ready_buffer; - } catch (platform::EnforceNotMet ex) { - member_->exception_.reset(new platform::EnforceNotMet(ex)); - } catch (...) { - LOG(FATAL) << "Unknown exception catched"; - } - }; - if (member_->pool_) { - member_->pool_->enqueue(op_run); - } else { - op_run(); - } -} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index c206e726a7..466b5f5f62 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -29,9 +29,6 @@ namespace paddle { namespace framework { class ParallelExecutorPrivate; -class VarHandle; -class OpHandle; -class VarHandleBase; class ParallelExecutor { public: @@ -50,23 +47,12 @@ class ParallelExecutor { void BCastParamsToGPUs(const ProgramDesc& startup_program) const; - VarHandle* GetVarHandle(const std::string& each_var_name, - const platform::Place& place) const; - - void GenerateVar(OpHandle* op_handle, const std::string& each_var_name, - const platform::Place& place) const; - void ConstructDependencyGraph(const std::unordered_set& params, const ProgramDesc& main_program, const std::string& loss_var_name) const; void BuildNCCLCommunicator() const; - void RunOp( - bool use_event, - std::unordered_map>& pending_vars, - OpHandle* op) const; - void PolishGraphToSupportDataHazards() const; }; diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index cceceda8ad..3db846b024 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -47,11 +47,45 @@ class NCCLGroupGuard { } private: - static std::mutex& mutex() { + static std::mutex &mutex() { static std::mutex mtx; return mtx; } }; +struct NCCLContext { + std::unique_ptr ctx_; + ncclComm_t comm_; + + explicit NCCLContext(int dev_id) + : ctx_(new CUDADeviceContext(CUDAPlace(dev_id))) {} + + cudaStream_t stream() const { return ctx_->stream(); } + + int device_id() const { + return boost::get(ctx_->GetPlace()).device; + } + + static void InitNCCLContext(std::unordered_map &contexts, + const std::vector &places) { + std::vector comms; + std::vector devs; + comms.resize(contexts.size()); + devs.reserve(contexts.size()); + + for (auto &p : places) { + devs.push_back(boost::get(p).device); + } + + PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( + &comms[0], static_cast(contexts.size()), &devs[0])); + + int i = 0; + for (auto &dev_id : devs) { + contexts.at(dev_id).comm_ = comms[i++]; + } + } +}; + } // namespace platform } // namespace paddle From fe7ed285d131ba99e82538e76cb7ac5381e97809 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 14:49:02 +0800 Subject: [PATCH 109/180] Extract NCCLCtxMap --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/framework/details/CMakeLists.txt | 1 + .../fluid/framework/details/op_handle_base.cc | 84 +++++++++++++ .../fluid/framework/details/op_handle_base.h | 48 ++++++++ paddle/fluid/framework/details/var_handle.h | 4 +- paddle/fluid/framework/parallel_executor.cc | 114 +++--------------- paddle/fluid/platform/nccl_helper.h | 46 +++++++ 7 files changed, 196 insertions(+), 103 deletions(-) create mode 100644 paddle/fluid/framework/details/op_handle_base.cc create mode 100644 paddle/fluid/framework/details/op_handle_base.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 9d2dc29028..afc7ec9d66 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -88,7 +88,7 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table feed_fetch_method executor simple_threadpool var_handle) + framework_proto backward glog lod_rank_table simple_threadpool var_handle op_handle_base) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 5074715e2e..d9bdf0b94d 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -1 +1,2 @@ cc_library(var_handle SRCS var_handle.cc DEPS place) +cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context) diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc new file mode 100644 index 0000000000..094b62cc94 --- /dev/null +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/op_handle_base.h" + +namespace paddle { +namespace framework { +namespace details { +std::string OpHandleBase::DebugString() const { + std::stringstream ss; + ss << "("; + for (auto *var : inputs_) { + ss << var->DebugString() << ", "; + } + ss << ") --> ("; + for (auto *var : outputs_) { + ss << var->DebugString() << ", "; + } + ss << ")\n"; + return ss.str(); +} + +OpHandleBase::~OpHandleBase() {} + +void OpHandleBase::Run(bool use_event) { +#ifdef PADDLE_WITH_CUDA + if (events_.empty() && use_event) { + for (auto &p : dev_ctx_) { + int dev_id = boost::get(p.first).device; + cudaSetDevice(dev_id); + cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming); + } + } +#else + PADDLE_ENFORCE(!use_event); +#endif + + RunImpl(); + +#ifdef PADDLE_WITH_CUDA + if (use_event) { + for (auto &p : dev_ctx_) { + int dev_id = boost::get(p.first).device; + auto stream = + static_cast(p.second)->stream(); + cudaEventRecord(events_.at(dev_id), stream); + } + } +#endif +} + +void OpHandleBase::Wait(platform::DeviceContext *waited_dev) { +#ifdef PADDLE_WITH_CUDA + if (platform::is_cpu_place(waited_dev->GetPlace()) || events_.empty()) { + for (auto &dev_ctx : dev_ctx_) { + dev_ctx.second->Wait(); + } + } else { + auto stream = + static_cast(waited_dev)->stream(); + for (auto &ev : events_) { + PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); + } + } +#else + for (auto &dev_ctx : dev_ctx_) { + dev_ctx.second->Wait(); + } +#endif +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h new file mode 100644 index 0000000000..bdfd1f78ad --- /dev/null +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -0,0 +1,48 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/var_handle.h" +#include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +namespace details { + +struct OpHandleBase { + std::vector inputs_; + std::vector outputs_; + std::unordered_map + dev_ctx_; + +#ifdef PADDLE_WITH_CUDA + std::unordered_map events_; +#endif + + std::string DebugString() const; + + virtual ~OpHandleBase(); + + void Run(bool use_event); + + virtual void Wait(platform::DeviceContext *waited_dev); + + protected: + virtual void RunImpl() = 0; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/var_handle.h b/paddle/fluid/framework/details/var_handle.h index 613ff901b1..893cc15f6c 100644 --- a/paddle/fluid/framework/details/var_handle.h +++ b/paddle/fluid/framework/details/var_handle.h @@ -21,10 +21,8 @@ namespace paddle { namespace framework { - -struct OpHandleBase; - namespace details { +struct OpHandleBase; // VarHandleBase is the var node in the dependency graph. // A variable can only be generated by a single operator. i.e. diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 2b094eba1e..3c24fa4bdf 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -14,86 +14,22 @@ limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" #include "ThreadPool.h" -#include "executor.h" #include "lod_tensor.h" #include "lod_tensor_array.h" #include "op_registry.h" +#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/framework/feed_fetch_type.h" -#include "paddle/fluid/operators/math/concat.h" #include "paddle/fluid/platform/nccl_helper.h" namespace paddle { namespace framework { using details::DummyVarHandle; +using details::OpHandleBase; using details::VarHandle; using details::VarHandleBase; -struct OpHandleBase { - std::vector inputs_; - std::vector outputs_; - std::unordered_map - dev_ctx_; - - std::unordered_map events_; - - std::string DebugString() { - std::stringstream ss; - ss << "("; - for (auto *var : inputs_) { - ss << var->DebugString() << ", "; - } - ss << ") --> ("; - for (auto *var : outputs_) { - ss << var->DebugString() << ", "; - } - ss << ")\n"; - return ss.str(); - } - - virtual ~OpHandleBase() {} - - void Run(bool use_event) { - if (events_.empty() && use_event) { - for (auto &p : dev_ctx_) { - int dev_id = boost::get(p.first).device; - cudaSetDevice(dev_id); - cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming); - } - } - - RunImpl(); - - if (use_event) { - for (auto &p : dev_ctx_) { - int dev_id = boost::get(p.first).device; - auto stream = - static_cast(p.second)->stream(); - cudaEventRecord(events_.at(dev_id), stream); - } - } - } - - virtual void Wait(platform::DeviceContext *waited_dev) { - if (platform::is_cpu_place(waited_dev->GetPlace()) || events_.empty()) { - for (auto &dev_ctx : dev_ctx_) { - dev_ctx.second->Wait(); - } - } else { - auto stream = - static_cast(waited_dev)->stream(); - for (auto &ev : events_) { - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); - } - } - } - - protected: - virtual void RunImpl() = 0; -}; - struct ScaleLossGradOpHandle : public OpHandleBase { float coeff_; Scope *scope_; @@ -193,12 +129,7 @@ class ParallelExecutorPrivate { std::vector local_scopes_; Scope *global_scope_; - std::unordered_map communication_streams_; - - platform::NCCLContext &GetNCCLCtx(platform::Place p) { - int dev_id = boost::get(p).device; - return communication_streams_.at(dev_id); - } + std::unique_ptr nccl_ctxs_; platform::DeviceContext *CommunicationDevCtx(const platform::Place &place) { if (platform::is_cpu_place(place) || local_scopes_.size() == 1) { @@ -206,7 +137,7 @@ class ParallelExecutorPrivate { platform::DeviceContextPool::Instance().Get(place)); } else { #ifdef PADDLE_WITH_CUDA - return GetNCCLCtx(place).ctx_.get(); + return nccl_ctxs_->DevCtx(place); #else PADDLE_THROW("Not compiled with CUDA") #endif @@ -293,15 +224,12 @@ class ParallelExecutorPrivate { struct NCCLAllReduceOpHandle : public OpHandleBase { const std::vector &local_scopes_; const std::vector &places_; - const std::unordered_map &communication_ctxs_; + const platform::NCCLContextMap &nccl_ctxs_; - explicit NCCLAllReduceOpHandle( - const std::vector &local_scopes, - const std::vector &places, - const std::unordered_map &ctxs) - : local_scopes_(local_scopes), - places_(places), - communication_ctxs_(ctxs) {} + explicit NCCLAllReduceOpHandle(const std::vector &local_scopes, + const std::vector &places, + const platform::NCCLContextMap &ctxs) + : local_scopes_(local_scopes), places_(places), nccl_ctxs_(ctxs) {} void Wait(platform::DeviceContext *waited_dev) override { OpHandleBase::Wait(waited_dev); @@ -343,7 +271,7 @@ struct NCCLAllReduceOpHandle : public OpHandleBase { if (numel == 0) { numel = static_cast(lod_tensor.numel()); } - auto &nccl_ctx = communication_ctxs_.at(dev_id); + auto &nccl_ctx = nccl_ctxs_.at(dev_id); PADDLE_ENFORCE(platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, nccl_ctx.comm_, nccl_ctx.stream())); @@ -491,8 +419,7 @@ void ParallelExecutor::ConstructDependencyGraph( if (grads.count(og) != 0) { // is param grad // Insert NCCL AllReduce Op member_->ops_.emplace_back(new NCCLAllReduceOpHandle( - member_->local_scopes_, member_->places_, - member_->communication_streams_)); + member_->local_scopes_, member_->places_, *member_->nccl_ctxs_)); auto *op_handle = member_->ops_.back().get(); for (size_t i = 0; i < member_->places_.size(); ++i) { @@ -598,15 +525,12 @@ void ParallelExecutor::BCastParamsToGPUs( buffer = t->mutable_data(place, main_tensor.type()); } - auto &nccl_ctx = member_->GetNCCLCtx(place); + auto &nccl_ctx = member_->nccl_ctxs_->at(place); platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm_, nccl_ctx.stream()); } } - - for (auto &stream : member_->communication_streams_) { - stream.second.ctx_->Wait(); - } + member_->nccl_ctxs_->WaitAll(); } #else PADDLE_THROW("Not compiled with CUDA"); @@ -615,15 +539,7 @@ void ParallelExecutor::BCastParamsToGPUs( void ParallelExecutor::BuildNCCLCommunicator() const { #ifdef PADDLE_WITH_CUDA - for (auto &place : member_->places_) { - int dev_id = boost::get(place).device; - - member_->communication_streams_.emplace(dev_id, - platform::NCCLContext(dev_id)); - } - - platform::NCCLContext::InitNCCLContext(member_->communication_streams_, - member_->places_); + member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_)); #endif } @@ -682,7 +598,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, op->offset_ = i; op->local_scopes_ = &member_->local_scopes_; for (auto &p : member_->places_) { - op->dev_ctx_[p] = member_->GetNCCLCtx(p).ctx_.get(); + op->dev_ctx_[p] = member_->nccl_ctxs_->DevCtx(p); } for (auto *var : vars) { diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index 3db846b024..2999004320 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -87,5 +87,51 @@ struct NCCLContext { } }; +struct NCCLContextMap { + std::unordered_map contexts_; + std::vector order_; + + NCCLContextMap(const std::vector &places) { + order_.reserve(places.size()); + for (auto &p : places) { + int dev_id = boost::get(p).device; + order_.emplace_back(dev_id); + contexts_.emplace(dev_id, NCCLContext(dev_id)); + } + PADDLE_ENFORCE_EQ( + order_.size(), contexts_.size(), + "NCCL Context Map does not support contain two or more same device"); + + std::vector comms; + comms.resize(order_.size()); + + PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( + &comms[0], static_cast(order_.size()), &order_[0])); + + int i = 0; + for (auto &dev_id : order_) { + contexts_.at(dev_id).comm_ = comms[i++]; + } + } + + CUDADeviceContext *DevCtx(int dev_id) const { return at(dev_id).ctx_.get(); } + + CUDADeviceContext *DevCtx(platform::Place p) const { + return DevCtx(boost::get(p).device); + } + + const NCCLContext &at(platform::Place p) const { + return this->at(boost::get(p).device); + } + + const NCCLContext &at(int dev_id) const { return contexts_.at(dev_id); } + + void WaitAll() { + for (auto &p : contexts_) { + p.second.ctx_->Wait(); + } + } +}; + } // namespace platform } // namespace paddle From 5368e50d845bd70d9c9f38a5a75db6cba949f48a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 14:58:28 +0800 Subject: [PATCH 110/180] Reorganize code --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/framework/details/CMakeLists.txt | 1 + .../details/scale_loss_grad_op_handle.cc | 47 +++++++++++++++++++ .../details/scale_loss_grad_op_handle.h | 39 +++++++++++++++ paddle/fluid/framework/parallel_executor.cc | 35 +------------- 5 files changed, 90 insertions(+), 34 deletions(-) create mode 100644 paddle/fluid/framework/details/scale_loss_grad_op_handle.cc create mode 100644 paddle/fluid/framework/details/scale_loss_grad_op_handle.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index afc7ec9d66..123b9cb735 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -88,7 +88,7 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table simple_threadpool var_handle op_handle_base) + framework_proto backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index d9bdf0b94d..427785d518 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -1,2 +1,3 @@ cc_library(var_handle SRCS var_handle.cc DEPS place) cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context) +cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc new file mode 100644 index 0000000000..df9ca37180 --- /dev/null +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" + +namespace paddle { +namespace framework { +namespace details { +ScaleLossGradOpHandle::ScaleLossGradOpHandle(size_t num_dev, Scope *scope, + platform::Place place) + : coeff_(static_cast(1.0 / num_dev)), scope_(scope), place_(place) {} + +ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} + +void ScaleLossGradOpHandle::RunImpl() { + std::string var_name = static_cast(this->outputs_[0])->name_; + + float *tmp = + scope_->FindVar(var_name)->GetMutable()->mutable_data( + make_ddim({1}), place_); + + if (platform::is_cpu_place(place_)) { + *tmp = coeff_; + } else { +#ifdef PADDLE_WITH_CUDA + auto stream = + static_cast(this->dev_ctx_[place_]) + ->stream(); + memory::Copy(boost::get(place_), tmp, + platform::CPUPlace(), &coeff_, sizeof(float), stream); +#endif + } +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h new file mode 100644 index 0000000000..44a10e3375 --- /dev/null +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h @@ -0,0 +1,39 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/scope.h" +namespace paddle { +namespace framework { +namespace details { + +struct ScaleLossGradOpHandle : public OpHandleBase { + float coeff_; + Scope *scope_; + platform::Place place_; + + ScaleLossGradOpHandle(size_t num_dev, Scope *scope, platform::Place place); + + ~ScaleLossGradOpHandle() final; + + protected: + void RunImpl() override; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 3c24fa4bdf..5dba3e94c1 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "lod_tensor_array.h" #include "op_registry.h" #include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/platform/nccl_helper.h" @@ -27,42 +28,10 @@ namespace framework { using details::DummyVarHandle; using details::OpHandleBase; +using details::ScaleLossGradOpHandle; using details::VarHandle; using details::VarHandleBase; -struct ScaleLossGradOpHandle : public OpHandleBase { - float coeff_; - Scope *scope_; - platform::Place place_; - - explicit ScaleLossGradOpHandle(size_t num_dev, Scope *scope, - platform::Place place) - : coeff_(static_cast(1.0 / num_dev)), - scope_(scope), - place_(place) {} - - ~ScaleLossGradOpHandle() {} - - protected: - void RunImpl() override { - std::string var_name = static_cast(this->outputs_[0])->name_; - - float *tmp = scope_->FindVar(var_name) - ->GetMutable() - ->mutable_data(make_ddim({1}), place_); - - if (platform::is_cpu_place(place_)) { - *tmp = coeff_; - } else { - auto stream = - static_cast(this->dev_ctx_[place_]) - ->stream(); - memory::Copy(boost::get(place_), tmp, - platform::CPUPlace(), &coeff_, sizeof(float), stream); - } - } -}; - struct FetchOpHandle : public OpHandleBase { FeedFetchList *data_; size_t offset_; From 15f5f10ed5b09b47bd897f8d0df916bed3fcf0f6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 15:43:21 +0800 Subject: [PATCH 111/180] AddInput/AddOutput for OpHandle --- paddle/fluid/framework/CMakeLists.txt | 3 +- paddle/fluid/framework/details/CMakeLists.txt | 1 + .../framework/details/fetch_op_handle.cc | 77 ++++++++++ .../fluid/framework/details/fetch_op_handle.h | 47 ++++++ .../fluid/framework/details/op_handle_base.cc | 11 ++ .../fluid/framework/details/op_handle_base.h | 4 + .../details/scale_loss_grad_op_handle.cc | 7 +- .../details/scale_loss_grad_op_handle.h | 4 +- paddle/fluid/framework/parallel_executor.cc | 140 +++++------------- 9 files changed, 190 insertions(+), 104 deletions(-) create mode 100644 paddle/fluid/framework/details/fetch_op_handle.cc create mode 100644 paddle/fluid/framework/details/fetch_op_handle.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 123b9cb735..cf288e7804 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -88,7 +88,8 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle) + framework_proto backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle + fetch_op_handle) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 427785d518..aed444d9aa 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -1,3 +1,4 @@ cc_library(var_handle SRCS var_handle.cc DEPS place) cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context) cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) +cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc new file mode 100644 index 0000000000..ab552081a4 --- /dev/null +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/fetch_op_handle.h" + +namespace paddle { +namespace framework { +namespace details { + +FetchOpHandle::FetchOpHandle(FeedFetchList *data, size_t offset, + std::vector *local_scopes) + : data_(data), offset_(offset), local_scopes_(local_scopes) {} + +FetchOpHandle::~FetchOpHandle() { + for (auto *input_var : inputs_) { + input_var->pending_ops_.erase(this); + } +} + +void FetchOpHandle::Wait(platform::DeviceContext *waited_dev) { + PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); +} + +void FetchOpHandle::WaitAndMergeCPUTensors() const { + // Wait fetch stream done. + for (auto &ctx : dev_ctx_) { + ctx.second->Wait(); + } + + std::vector tensors_ptr; + tensors_ptr.reserve(tensors_.size()); + for (auto &t : tensors_) { + tensors_ptr.emplace_back(&t); + } + data_->at(offset_).MergeLoDTensor(tensors_ptr, platform::CPUPlace()); +} + +void FetchOpHandle::RunImpl() { + for (auto *input : inputs_) { + auto *var = static_cast(input); + var->generated_op_->Wait(this->dev_ctx_[var->place_]); + } + + tensors_.resize(inputs_.size()); + auto *var = static_cast(inputs_[0]); + auto &var_name = var->name_; + platform::CPUPlace cpu; + auto &scopes = *local_scopes_; + + for (size_t i = 0; i < scopes.size(); ++i) { + auto &scope = scopes[i]; + auto &t = scope->FindVar(var_name)->Get(); + if (platform::is_gpu_place(var->place_)) { +#ifdef PADDLE_WITH_CUDA + TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); +#endif + } else { + tensors_[i].ShareDataWith(t); + tensors_[i].set_lod(t.lod()); + } + } +} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h new file mode 100644 index 0000000000..3123f7ba23 --- /dev/null +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -0,0 +1,47 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/feed_fetch_type.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace framework { +namespace details { + +struct FetchOpHandle : public OpHandleBase { + FeedFetchList *data_; + size_t offset_; + std::vector *local_scopes_; + std::vector tensors_; + + FetchOpHandle(FeedFetchList *data, size_t offset, + std::vector *local_scopes); + + ~FetchOpHandle(); + + void Wait(platform::DeviceContext *waited_dev) override; + + void WaitAndMergeCPUTensors() const; + + protected: + void RunImpl() override; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 094b62cc94..ca354a63c6 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -79,6 +79,17 @@ void OpHandleBase::Wait(platform::DeviceContext *waited_dev) { } #endif } + +void OpHandleBase::AddInput(VarHandleBase *in) { + this->inputs_.emplace_back(in); + in->pending_ops_.insert(this); +} + +void OpHandleBase::AddOutput(VarHandleBase *out) { + outputs_.emplace_back(out); + out->generated_op_ = this; +} + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index bdfd1f78ad..5178b51d8d 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -39,6 +39,10 @@ struct OpHandleBase { virtual void Wait(platform::DeviceContext *waited_dev); + void AddInput(VarHandleBase *in); + + void AddOutput(VarHandleBase *out); + protected: virtual void RunImpl() = 0; }; diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index df9ca37180..2e69f1e5e8 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -18,8 +18,11 @@ namespace paddle { namespace framework { namespace details { ScaleLossGradOpHandle::ScaleLossGradOpHandle(size_t num_dev, Scope *scope, - platform::Place place) - : coeff_(static_cast(1.0 / num_dev)), scope_(scope), place_(place) {} + platform::Place place, + platform::DeviceContext *dev_ctx) + : coeff_(static_cast(1.0 / num_dev)), scope_(scope), place_(place) { + dev_ctx_[place_] = dev_ctx; +} ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h index 44a10e3375..3a35574919 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h @@ -17,6 +17,7 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" + namespace paddle { namespace framework { namespace details { @@ -26,7 +27,8 @@ struct ScaleLossGradOpHandle : public OpHandleBase { Scope *scope_; platform::Place place_; - ScaleLossGradOpHandle(size_t num_dev, Scope *scope, platform::Place place); + ScaleLossGradOpHandle(size_t num_dev, Scope *scope, platform::Place place, + platform::DeviceContext *context); ~ScaleLossGradOpHandle() final; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 5dba3e94c1..7064828b21 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -17,77 +17,22 @@ limitations under the License. */ #include "lod_tensor.h" #include "lod_tensor_array.h" #include "op_registry.h" +#include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/details/var_handle.h" -#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/platform/nccl_helper.h" namespace paddle { namespace framework { using details::DummyVarHandle; +using details::FetchOpHandle; using details::OpHandleBase; using details::ScaleLossGradOpHandle; using details::VarHandle; using details::VarHandleBase; -struct FetchOpHandle : public OpHandleBase { - FeedFetchList *data_; - size_t offset_; - std::vector *local_scopes_; - std::vector tensors_; - - ~FetchOpHandle() { - for (auto *input_var : inputs_) { - input_var->pending_ops_.erase(this); - } - } - - void Wait(platform::DeviceContext *waited_dev) override { - PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); - } - - void WaitAndMergeCPUTensors() const { - // Wait fetch stream done. - for (auto &ctx : dev_ctx_) { - ctx.second->Wait(); - } - - std::vector tensors_ptr; - tensors_ptr.reserve(tensors_.size()); - for (auto &t : tensors_) { - tensors_ptr.emplace_back(&t); - } - data_->at(offset_).MergeLoDTensor(tensors_ptr, platform::CPUPlace()); - } - - protected: - void RunImpl() override { - for (auto *input : inputs_) { - auto *var = static_cast(input); - var->generated_op_->Wait(this->dev_ctx_[var->place_]); - } - - tensors_.resize(inputs_.size()); - auto *var = static_cast(inputs_[0]); - auto &var_name = var->name_; - platform::CPUPlace cpu; - auto &scopes = *local_scopes_; - - for (size_t i = 0; i < scopes.size(); ++i) { - auto &scope = scopes[i]; - auto &t = scope->FindVar(var_name)->Get(); - if (platform::is_gpu_place(var->place_)) { - TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); - } else { - tensors_[i].ShareDataWith(t); - tensors_[i].set_lod(t.lod()); - } - } - } -}; - class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(size_t num_threads) @@ -99,19 +44,9 @@ class ParallelExecutorPrivate { Scope *global_scope_; std::unique_ptr nccl_ctxs_; - - platform::DeviceContext *CommunicationDevCtx(const platform::Place &place) { - if (platform::is_cpu_place(place) || local_scopes_.size() == 1) { - return const_cast( - platform::DeviceContextPool::Instance().Get(place)); - } else { -#ifdef PADDLE_WITH_CUDA - return nccl_ctxs_->DevCtx(place); -#else - PADDLE_THROW("Not compiled with CUDA") -#endif - } - } + std::unordered_map + fetch_dev_ctxs_; platform::Place main_place_; @@ -119,6 +54,7 @@ class ParallelExecutorPrivate { std::unordered_map>, platform::PlaceHash> vars_; + std::unordered_set> dep_vars_; std::vector> ops_; @@ -183,10 +119,9 @@ class ParallelExecutorPrivate { size_t version = vars.size(); auto &var = vars[version]; var.version_ = version; - var.generated_op_ = op_handle; var.name_ = each_var_name; var.place_ = place; - op_handle->outputs_.emplace_back(&var); + op_handle->AddOutput(&var); } }; // namespace framework @@ -198,7 +133,11 @@ struct NCCLAllReduceOpHandle : public OpHandleBase { explicit NCCLAllReduceOpHandle(const std::vector &local_scopes, const std::vector &places, const platform::NCCLContextMap &ctxs) - : local_scopes_(local_scopes), places_(places), nccl_ctxs_(ctxs) {} + : local_scopes_(local_scopes), places_(places), nccl_ctxs_(ctxs) { + for (auto &p : places_) { + this->dev_ctx_[p] = nccl_ctxs_.DevCtx(p); + } + } void Wait(platform::DeviceContext *waited_dev) override { OpHandleBase::Wait(waited_dev); @@ -283,6 +222,17 @@ ParallelExecutor::ParallelExecutor( : member_(new ParallelExecutorPrivate(num_threads)) { member_->places_ = places; member_->global_scope_ = scope; + + if (platform::is_cpu_place(places[0])) { + member_->fetch_dev_ctxs_[places[0]] = const_cast( + platform::DeviceContextPool::Instance().Get(places[0])); + } else { + for (auto &p : member_->places_) { + member_->fetch_dev_ctxs_[p] = + new platform::CUDADeviceContext(boost::get(p)); + } + } + // Step 1. RunStartupProgram and Bcast the params to devs. Executor exe(places[0]); exe.Run(startup_program, scope, 0); @@ -348,8 +298,7 @@ void ParallelExecutor::ConstructDependencyGraph( for (auto &each_var_name : var_names) { VarHandle *var = member_->GetVarHandle(each_var_name, p); - op_handle->inputs_.emplace_back(var); - var->pending_ops_.emplace(op_handle); + op_handle->AddInput(var); } var_names = op->OutputArgumentNames(); @@ -360,11 +309,10 @@ void ParallelExecutor::ConstructDependencyGraph( if (is_forwarding) { if (var_names.size() == 1 && var_names[0] == loss_var_name) { // Insert ScaleCost OpHandle - member_->ops_.emplace_back(new ScaleLossGradOpHandle( - this->member_->local_scopes_.size(), s, p)); - op_handle = member_->ops_.back().get(); - - op_handle->dev_ctx_[p] = member_->CommunicationDevCtx(p); + op_handle = + new ScaleLossGradOpHandle(this->member_->local_scopes_.size(), s, + p, member_->nccl_ctxs_->DevCtx(p)); + member_->ops_.emplace_back(op_handle); // FIXME: Currently ScaleLossGradOp only use device_count as scale // factor. So it does not depend on any other operators. @@ -399,15 +347,14 @@ void ParallelExecutor::ConstructDependencyGraph( continue; } auto *prev_grad = &vars[vars.size() - 1]; - op_handle->inputs_.emplace_back(prev_grad); - prev_grad->pending_ops_.emplace(op_handle); + op_handle->AddInput(prev_grad); + auto &var = vars[vars.size()]; var.place_ = p; - var.generated_op_ = op_handle; var.name_ = og; var.version_ = vars.size() - 1; - op_handle->outputs_.emplace_back(&var); - op_handle->dev_ctx_[p] = member_->CommunicationDevCtx(p); + + op_handle->AddOutput(&var); } } } @@ -454,12 +401,8 @@ void ParallelExecutor::PolishGraphToSupportDataHazards() const { } auto *dep_var = new DummyVarHandle(); - - dep_var->generated_op_ = read_op; - read_op->outputs_.emplace_back(dep_var); - - dep_var->pending_ops_.emplace(write_op); - write_op->inputs_.emplace_back(dep_var); + read_op->AddOutput(dep_var); + write_op->AddInput(dep_var); member_->dep_vars_.emplace(dep_var); } } @@ -561,24 +504,21 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, for (size_t i = 0; i < fetch_tensors.size(); ++i) { auto &var_name = fetch_tensors[i]; auto &vars = fetched_vars[var_name]; - fetch_ops.emplace_back(); + fetch_ops.emplace_back(&fetched_data, i, &member_->local_scopes_); FetchOpHandle *op = &fetch_ops.back(); - op->data_ = &fetched_data; - op->offset_ = i; - op->local_scopes_ = &member_->local_scopes_; + + // FIXME: Use new device context for (auto &p : member_->places_) { - op->dev_ctx_[p] = member_->nccl_ctxs_->DevCtx(p); + op->dev_ctx_[p] = member_->fetch_dev_ctxs_[p]; } for (auto *var : vars) { - var->pending_ops_.emplace(op); - op->inputs_.emplace_back(var); + op->AddInput(var); } dummy_vars.emplace_back(); auto *var = &dummy_vars.back(); - op->outputs_.emplace_back(var); - var->generated_op_ = op; + op->AddOutput(var); pending_vars[var] = false; pending_ops.insert({op, op->inputs_.size()}); From 5c333e414380f064696a1c152d26cc6b5d6750e4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 16:21:18 +0800 Subject: [PATCH 112/180] Add dctor for dev_ctx --- paddle/fluid/framework/parallel_executor.cc | 27 +++++----------- paddle/fluid/platform/device_context.cc | 34 +++++++++++---------- paddle/fluid/platform/device_context.h | 17 ++--------- paddle/fluid/platform/place.h | 3 +- 4 files changed, 31 insertions(+), 50 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 7064828b21..8c29aacab6 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -35,18 +35,18 @@ using details::VarHandleBase; class ParallelExecutorPrivate { public: - explicit ParallelExecutorPrivate(size_t num_threads) - : pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) {} + explicit ParallelExecutorPrivate(size_t num_threads, + const std::vector &places) + : places_(places), + fetch_dev_ctxs_(places), + pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; - + platform::DeviceContextPool fetch_dev_ctxs_; std::vector local_scopes_; Scope *global_scope_; std::unique_ptr nccl_ctxs_; - std::unordered_map - fetch_dev_ctxs_; platform::Place main_place_; @@ -219,20 +219,9 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const ProgramDesc &startup_program, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope) - : member_(new ParallelExecutorPrivate(num_threads)) { - member_->places_ = places; + : member_(new ParallelExecutorPrivate(num_threads, places)) { member_->global_scope_ = scope; - if (platform::is_cpu_place(places[0])) { - member_->fetch_dev_ctxs_[places[0]] = const_cast( - platform::DeviceContextPool::Instance().Get(places[0])); - } else { - for (auto &p : member_->places_) { - member_->fetch_dev_ctxs_[p] = - new platform::CUDADeviceContext(boost::get(p)); - } - } - // Step 1. RunStartupProgram and Bcast the params to devs. Executor exe(places[0]); exe.Run(startup_program, scope, 0); @@ -509,7 +498,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, // FIXME: Use new device context for (auto &p : member_->places_) { - op->dev_ctx_[p] = member_->fetch_dev_ctxs_[p]; + op->dev_ctx_[p] = member_->fetch_dev_ctxs_.Get(p); } for (auto *var : vars) { diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index ab02a95f26..59b76a1edb 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -10,43 +10,45 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_context.h" +#include #include "paddle/fluid/memory/memory.h" - namespace paddle { namespace platform { DeviceContextPool* DeviceContextPool::pool = nullptr; -const platform::DeviceContext* DeviceContextPool::Get( - const platform::Place& place) { +platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) { auto it = device_contexts_.find(place); if (it == device_contexts_.end()) { PADDLE_THROW( "'Place' is not supported, Please re-compile with WITH_GPU " "option"); } - return it->second; + return it->second.get(); } DeviceContextPool::DeviceContextPool( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); - for (size_t i = 0; i < places.size(); i++) { - if (platform::is_cpu_place(places[i])) { + using PtrType = std::unique_ptr; + std::unordered_set set; + for (auto& p : places) { + set.insert(p); + } + + for (auto& p : set) { + if (platform::is_cpu_place(p)) { #ifdef PADDLE_WITH_MKLDNN - device_contexts_.emplace(places[i], - new platform::MKLDNNDeviceContext( - boost::get(places[i]))); + device_contexts_.emplace( + p, PtrType(new MKLDNNDeviceContext(boost::get(p)))); #else - device_contexts_.emplace(places[i], - new platform::CPUDeviceContext( - boost::get(places[i]))); + device_contexts_.emplace( + p, PtrType(new CPUDeviceContext(boost::get(p)))); #endif - } else if (platform::is_gpu_place(places[i])) { + } else if (platform::is_gpu_place(p)) { #ifdef PADDLE_WITH_CUDA - device_contexts_.emplace(places[i], - new platform::CUDADeviceContext( - boost::get(places[i]))); + device_contexts_.emplace( + p, PtrType(new CUDADeviceContext(boost::get(p)))); #else PADDLE_THROW( "'CUDAPlace' is not supported, Please re-compile with WITH_GPU " diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index df0a427b48..202394c7be 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -160,7 +160,7 @@ class DeviceContextPool { } /*! \brief Return handle of single device context. */ - const platform::DeviceContext* Get(const platform::Place& place); + platform::DeviceContext* Get(const platform::Place& place); template const typename DefaultDeviceContextType::TYPE* GetByPlace( @@ -173,19 +173,8 @@ class DeviceContextPool { private: static DeviceContextPool* pool; - constexpr static int LEFT_SHIFT = 8; - struct Hash { - std::hash hash_; - size_t operator()(const platform::Place& place) const { - int pre_hash = place.which() << LEFT_SHIFT; - if (platform::is_gpu_place(place)) { - pre_hash += boost::get(place).GetDeviceId(); - } - return hash_(pre_hash); - } - }; - std::unordered_map + std::unordered_map, PlaceHash> device_contexts_; DISABLE_COPY_AND_ASSIGN(DeviceContextPool); }; diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h index 633251eb47..4cc8b377b8 100644 --- a/paddle/fluid/platform/place.h +++ b/paddle/fluid/platform/place.h @@ -67,12 +67,13 @@ bool is_same_place(const Place &, const Place &); struct PlaceHash { std::size_t operator()(const Place &p) const { + constexpr size_t num_dev_bits = 4; std::hash ihash; size_t dev_id = 0; if (is_gpu_place(p)) { dev_id = boost::get(p).device; } - return ihash(dev_id << 2 | p.which()); + return ihash(dev_id << num_dev_bits | p.which()); } }; From f28ae6e4b16322310ec91fa3e7f6916f2aa79889 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 16:48:44 +0800 Subject: [PATCH 113/180] Reorganize Code --- paddle/fluid/framework/CMakeLists.txt | 8 +- paddle/fluid/framework/details/CMakeLists.txt | 2 + .../details/nccl_all_reduce_op_handle.cc | 74 +++++++++++++++++++ .../details/nccl_all_reduce_op_handle.h | 41 ++++++++++ paddle/fluid/framework/parallel_executor.cc | 65 +--------------- 5 files changed, 126 insertions(+), 64 deletions(-) create mode 100644 paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc create mode 100644 paddle/fluid/framework/details/nccl_all_reduce_op_handle.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index cf288e7804..12d6541b8f 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -87,9 +87,15 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) + +if(WITH_GPU) + set(parallel_executor_cuda_deps nccl_all_reduce_op_handle) +else() + set(parallel_executor_cuda_deps) +endif() cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle - fetch_op_handle) + fetch_op_handle ${parallel_executor_cuda_deps}) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index aed444d9aa..fb276ea703 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -2,3 +2,5 @@ cc_library(var_handle SRCS var_handle.cc DEPS place) cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context) cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) +nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory + dynload_cuda) diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc new file mode 100644 index 0000000000..a79c61f359 --- /dev/null +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" + +namespace paddle { +namespace framework { +namespace details { +NCCLAllReduceOpHandle::NCCLAllReduceOpHandle( + const std::vector &local_scopes, + const std::vector &places, + const platform::NCCLContextMap &ctxs) + : local_scopes_(local_scopes), places_(places), nccl_ctxs_(ctxs) { + for (auto &p : places_) { + this->dev_ctx_[p] = nccl_ctxs_.DevCtx(p); + } +} + +void NCCLAllReduceOpHandle::RunImpl() { + if (inputs_.size() == 1) { + return; // No need to all reduce when GPU count = 1; + } else { + // Wait input done + for (auto *in : inputs_) { + auto &p = static_cast(in)->place_; + in->generated_op_->Wait(dev_ctx_[p]); + } + + auto &var_name = static_cast(this->inputs_[0])->name_; + int dtype = -1; + size_t numel = 0; + + platform::NCCLGroupGuard guard; + + for (size_t i = 0; i < local_scopes_.size(); ++i) { + auto &p = places_[i]; + auto *s = local_scopes_[i]; + int dev_id = boost::get(p).device; + + auto &lod_tensor = s->FindVar(var_name)->Get(); + void *buffer = const_cast(lod_tensor.data()); + uintptr_t buf = reinterpret_cast(buffer); + if (buf % sizeof(float) != 0) { + VLOG(3) << "Buffer is not aligned " << buf; + } + + if (dtype == -1) { + dtype = platform::ToNCCLDataType(lod_tensor.type()); + } + + if (numel == 0) { + numel = static_cast(lod_tensor.numel()); + } + auto &nccl_ctx = nccl_ctxs_.at(dev_id); + PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + buffer, buffer, numel, static_cast(dtype), ncclSum, + nccl_ctx.comm_, nccl_ctx.stream())); + } + } +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h new file mode 100644 index 0000000000..7152d1a587 --- /dev/null +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h @@ -0,0 +1,41 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/nccl_helper.h" + +namespace paddle { +namespace framework { +namespace details { + +struct NCCLAllReduceOpHandle : public OpHandleBase { + const std::vector &local_scopes_; + const std::vector &places_; + const platform::NCCLContextMap &nccl_ctxs_; + + NCCLAllReduceOpHandle(const std::vector &local_scopes, + const std::vector &places, + const platform::NCCLContextMap &ctxs); + + protected: + void RunImpl() override; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 8c29aacab6..93db5ad3e5 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include "lod_tensor_array.h" #include "op_registry.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" +#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/details/var_handle.h" @@ -28,6 +29,7 @@ namespace framework { using details::DummyVarHandle; using details::FetchOpHandle; +using details::NCCLAllReduceOpHandle; using details::OpHandleBase; using details::ScaleLossGradOpHandle; using details::VarHandle; @@ -123,69 +125,6 @@ class ParallelExecutorPrivate { var.place_ = place; op_handle->AddOutput(&var); } -}; // namespace framework - -struct NCCLAllReduceOpHandle : public OpHandleBase { - const std::vector &local_scopes_; - const std::vector &places_; - const platform::NCCLContextMap &nccl_ctxs_; - - explicit NCCLAllReduceOpHandle(const std::vector &local_scopes, - const std::vector &places, - const platform::NCCLContextMap &ctxs) - : local_scopes_(local_scopes), places_(places), nccl_ctxs_(ctxs) { - for (auto &p : places_) { - this->dev_ctx_[p] = nccl_ctxs_.DevCtx(p); - } - } - - void Wait(platform::DeviceContext *waited_dev) override { - OpHandleBase::Wait(waited_dev); - } - - protected: - void RunImpl() override { - if (inputs_.size() == 1) { - return; // No need to all reduce when GPU count = 1; - } else { - // Wait input done - for (auto *in : inputs_) { - auto &p = static_cast(in)->place_; - in->generated_op_->Wait(dev_ctx_[p]); - } - - auto &var_name = static_cast(this->inputs_[0])->name_; - int dtype = -1; - size_t numel = 0; - - platform::NCCLGroupGuard guard; - - for (size_t i = 0; i < local_scopes_.size(); ++i) { - auto &p = places_[i]; - auto *s = local_scopes_[i]; - int dev_id = boost::get(p).device; - - auto &lod_tensor = s->FindVar(var_name)->Get(); - void *buffer = const_cast(lod_tensor.data()); - uintptr_t buf = reinterpret_cast(buffer); - if (buf % sizeof(float) != 0) { - VLOG(3) << "Buffer is not aligned " << buf; - } - - if (dtype == -1) { - dtype = platform::ToNCCLDataType(lod_tensor.type()); - } - - if (numel == 0) { - numel = static_cast(lod_tensor.numel()); - } - auto &nccl_ctx = nccl_ctxs_.at(dev_id); - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( - buffer, buffer, numel, static_cast(dtype), ncclSum, - nccl_ctx.comm_, nccl_ctx.stream())); - } - } - } }; struct ComputationOpHandle : public OpHandleBase { From 31815010130249033096ea584bc2c89983a7e367 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 17:02:51 +0800 Subject: [PATCH 114/180] Rerange code --- paddle/fluid/framework/CMakeLists.txt | 4 +- paddle/fluid/framework/details/CMakeLists.txt | 1 + .../details/computation_op_handle.cc | 40 +++++++++++++++++++ .../framework/details/computation_op_handle.h | 39 ++++++++++++++++++ paddle/fluid/framework/parallel_executor.cc | 28 +------------ 5 files changed, 84 insertions(+), 28 deletions(-) create mode 100644 paddle/fluid/framework/details/computation_op_handle.cc create mode 100644 paddle/fluid/framework/details/computation_op_handle.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 12d6541b8f..2b90bb5abd 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -94,8 +94,8 @@ else() set(parallel_executor_cuda_deps) endif() cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - framework_proto backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle - fetch_op_handle ${parallel_executor_cuda_deps}) + backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle + fetch_op_handle computation_op_handle ${parallel_executor_cuda_deps}) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index fb276ea703..7565bc4c9c 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -4,3 +4,4 @@ cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_h cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory dynload_cuda) +cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry) diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc new file mode 100644 index 0000000000..5867f8fc55 --- /dev/null +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -0,0 +1,40 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/computation_op_handle.h" + +namespace paddle { +namespace framework { +namespace details { +ComputationOpHandle::ComputationOpHandle(const OpDesc &op_desc, Scope *scope, + platform::Place place) + : op_(framework::OpRegistry::CreateOp(op_desc)), + scope_(scope), + place_(place) {} + +void ComputationOpHandle::RunImpl() { + auto *cur_ctx = dev_ctx_[place_]; + for (auto *in : inputs_) { + bool need_wait = + in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx; + if (need_wait) { + in->generated_op_->Wait(cur_ctx); + } + } + + op_->Run(*scope_, place_); +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h new file mode 100644 index 0000000000..1fbfd4eabe --- /dev/null +++ b/paddle/fluid/framework/details/computation_op_handle.h @@ -0,0 +1,39 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace framework { +namespace details { +struct ComputationOpHandle : public OpHandleBase { + std::unique_ptr op_; + Scope *scope_; + platform::Place place_; + + ComputationOpHandle(const OpDesc &op_desc, Scope *scope, + platform::Place place); + + protected: + void RunImpl() override; +}; +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 93db5ad3e5..440040a2ef 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include "lod_tensor.h" #include "lod_tensor_array.h" #include "op_registry.h" +#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/op_handle_base.h" @@ -34,6 +35,7 @@ using details::OpHandleBase; using details::ScaleLossGradOpHandle; using details::VarHandle; using details::VarHandleBase; +using details::ComputationOpHandle; class ParallelExecutorPrivate { public: @@ -127,32 +129,6 @@ class ParallelExecutorPrivate { } }; -struct ComputationOpHandle : public OpHandleBase { - std::unique_ptr op_; - Scope *scope_; - platform::Place place_; - - explicit ComputationOpHandle(const OpDesc &op_desc, Scope *scope, - platform::Place place) - : op_(framework::OpRegistry::CreateOp(op_desc)), - scope_(scope), - place_(place) {} - - protected: - void RunImpl() override { - auto *cur_ctx = dev_ctx_[place_]; - for (auto *in : inputs_) { - bool need_wait = - in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx; - if (need_wait) { - in->generated_op_->Wait(cur_ctx); - } - } - - op_->Run(*scope_, place_); - } -}; - ParallelExecutor::ParallelExecutor( size_t num_threads, const std::vector &places, const std::unordered_set ¶ms, From 8dec4ad7a1c37b705b584e64c3eef4d6df320c13 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 17:12:27 +0800 Subject: [PATCH 115/180] Use int not Place for vars --- paddle/fluid/framework/parallel_executor.cc | 46 ++++++++++----------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 440040a2ef..d3919f0d51 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -28,6 +28,7 @@ limitations under the License. */ namespace paddle { namespace framework { +using details::ComputationOpHandle; using details::DummyVarHandle; using details::FetchOpHandle; using details::NCCLAllReduceOpHandle; @@ -35,7 +36,6 @@ using details::OpHandleBase; using details::ScaleLossGradOpHandle; using details::VarHandle; using details::VarHandleBase; -using details::ComputationOpHandle; class ParallelExecutorPrivate { public: @@ -43,7 +43,9 @@ class ParallelExecutorPrivate { const std::vector &places) : places_(places), fetch_dev_ctxs_(places), - pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) {} + pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) { + vars_.resize(places.size()); + } std::vector places_; platform::DeviceContextPool fetch_dev_ctxs_; @@ -52,12 +54,7 @@ class ParallelExecutorPrivate { std::unique_ptr nccl_ctxs_; - platform::Place main_place_; - - std::unordered_map>, - platform::PlaceHash> - vars_; + std::vector>> vars_; std::unordered_set> dep_vars_; @@ -69,8 +66,8 @@ class ParallelExecutorPrivate { std::unique_ptr exception_; VarHandle *GetVarHandle(const std::string &each_var_name, - const platform::Place &place) { - auto &var_holders = vars_[place]; + const platform::Place &place, size_t place_offset) { + auto &var_holders = vars_[place_offset]; auto &var_holder = var_holders[each_var_name]; VarHandle *var = nullptr; if (var_holder.empty()) { @@ -118,8 +115,8 @@ class ParallelExecutorPrivate { } void GenerateVar(OpHandleBase *op_handle, const std::string &each_var_name, - const platform::Place &place) { - auto &vars = vars_[place][each_var_name]; + const platform::Place &place, size_t place_offset) { + auto &vars = vars_[place_offset][each_var_name]; size_t version = vars.size(); auto &var = vars[version]; var.version_ = version; @@ -144,11 +141,10 @@ ParallelExecutor::ParallelExecutor( for (size_t i = 0; i < member_->places_.size(); ++i) { member_->local_scopes_.push_back(&scope->NewScope()); } - member_->main_place_ = places[0]; // Bcast Parameters to all GPUs BuildNCCLCommunicator(); - if (platform::is_gpu_place(member_->main_place_) && + if (platform::is_gpu_place(places[0]) && member_->local_scopes_.size() != 1) { // Is CUDA BCastParamsToGPUs(startup_program); } @@ -201,13 +197,13 @@ void ParallelExecutor::ConstructDependencyGraph( auto var_names = op->InputArgumentNames(); for (auto &each_var_name : var_names) { - VarHandle *var = member_->GetVarHandle(each_var_name, p); + VarHandle *var = member_->GetVarHandle(each_var_name, p, i); op_handle->AddInput(var); } var_names = op->OutputArgumentNames(); for (auto &each_var_name : var_names) { - member_->GenerateVar(op_handle, each_var_name, p); + member_->GenerateVar(op_handle, each_var_name, p, i); } if (is_forwarding) { @@ -224,7 +220,7 @@ void ParallelExecutor::ConstructDependencyGraph( // loss->pending_ops_.emplace_back(op_handle); // op_handle->inputs_.emplace_back(loss); - member_->GenerateVar(op_handle, loss_var_name + "@GRAD", p); + member_->GenerateVar(op_handle, loss_var_name + "@GRAD", p, i); change_forward = true; } } @@ -245,7 +241,7 @@ void ParallelExecutor::ConstructDependencyGraph( for (size_t i = 0; i < member_->places_.size(); ++i) { auto &p = member_->places_[i]; - auto &vars = member_->vars_[p][og]; + auto &vars = member_->vars_[i][og]; if (vars.empty()) { // This device has no data. continue. continue; @@ -280,8 +276,8 @@ void ParallelExecutor::ConstructDependencyGraph( * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) */ void ParallelExecutor::PolishGraphToSupportDataHazards() const { - for (auto &place_pair : member_->vars_) { - for (auto &name_pair : place_pair.second) { + for (auto &var_map : member_->vars_) { + for (auto &name_pair : var_map) { if (name_pair.second.size() <= 1) { return; } @@ -369,8 +365,8 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, std::unordered_map pending_ops; std::vector dummy_vars; - for (auto &place_pair : member_->vars_) { - for (auto &name_pair : place_pair.second) { + for (auto &var_map : member_->vars_) { + for (auto &name_pair : var_map) { for (auto &version_pair : name_pair.second) { pending_vars[&version_pair.second] = version_pair.second.generated_op_ == nullptr; @@ -395,9 +391,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, std::unordered_map> fetched_vars; for (auto &fetch_var_name : fetch_tensors) { - for (auto &pair : member_->vars_) { - auto it = pair.second.find(fetch_var_name); - if (it != pair.second.end()) { + for (auto &var_map : member_->vars_) { + auto it = var_map.find(fetch_var_name); + if (it != var_map.end()) { fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); } } From 64d7a3027157c0de8dcfdbb27e5d013620a68151 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 18:11:23 +0800 Subject: [PATCH 116/180] Extract SSAGraph --- paddle/fluid/framework/parallel_executor.cc | 189 ++++++++++---------- paddle/fluid/framework/parallel_executor.h | 2 - 2 files changed, 98 insertions(+), 93 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d3919f0d51..37bfdc0df5 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -37,6 +37,86 @@ using details::ScaleLossGradOpHandle; using details::VarHandle; using details::VarHandleBase; +struct SSAGraph { + std::vector>> vars_; + std::unordered_set> dep_vars_; + std::vector> ops_; +}; + +/** + * We only handle write after read(WAR), since it should not have a write + * after write in program. If there are write after write operators, we need + * prune them. + * + * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) + */ +static void PolishGraphToSupportDataHazards(SSAGraph *graph) { + for (auto &var_map : graph->vars_) { + for (auto &name_pair : var_map) { + if (name_pair.second.size() <= 1) { + return; + } + auto it_new = name_pair.second.rbegin(); + auto it_old = name_pair.second.rbegin(); + ++it_old; + for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { + auto *write_op = it_new->second.generated_op_; + auto &read_ops = it_old->second.pending_ops_; + auto *ex_write_op = it_old->second.generated_op_; + + if (ex_write_op == nullptr) { // Nobody write this var. + continue; + } + + for (auto *read_op : read_ops) { + // Manually add a dependency var from read_op to write_op; + if (read_op == write_op) { + // Read Write is the same op. + continue; + } + + auto *dep_var = new DummyVarHandle(); + read_op->AddOutput(dep_var); + write_op->AddInput(dep_var); + graph->dep_vars_.emplace(dep_var); + } + } + } + } +} + +static VarHandle *CreateOrGetLatestVarHandle(SSAGraph *graph, + const std::string &each_var_name, + const platform::Place &place, + size_t place_offset) { + auto &var_holders = graph->vars_[place_offset]; + auto &var_holder = var_holders[each_var_name]; + VarHandle *var = nullptr; + if (var_holder.empty()) { + auto &init_var = var_holder[0]; + init_var.place_ = place; + init_var.name_ = each_var_name; + init_var.generated_op_ = nullptr; + init_var.version_ = 0; + var = &init_var; + } else { + var = &var_holder.rbegin()->second; + } + return var; +} + +static void CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, + const std::string &each_var_name, + const platform::Place &place, size_t place_offset) { + auto &vars = graph->vars_[place_offset][each_var_name]; + size_t version = vars.size(); + auto &var = vars[version]; + var.version_ = version; + var.name_ = each_var_name; + var.place_ = place; + op_handle->AddOutput(&var); +} + class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(size_t num_threads, @@ -44,7 +124,7 @@ class ParallelExecutorPrivate { : places_(places), fetch_dev_ctxs_(places), pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) { - vars_.resize(places.size()); + graph_.vars_.resize(places.size()); } std::vector places_; @@ -54,35 +134,13 @@ class ParallelExecutorPrivate { std::unique_ptr nccl_ctxs_; - std::vector>> vars_; - - std::unordered_set> dep_vars_; - - std::vector> ops_; + SSAGraph graph_; // Use a simpler thread pool, might be faster. std::unique_ptr pool_; std::unique_ptr exception_; - VarHandle *GetVarHandle(const std::string &each_var_name, - const platform::Place &place, size_t place_offset) { - auto &var_holders = vars_[place_offset]; - auto &var_holder = var_holders[each_var_name]; - VarHandle *var = nullptr; - if (var_holder.empty()) { - auto &init_var = var_holder[0]; - init_var.place_ = place; - init_var.name_ = each_var_name; - init_var.generated_op_ = nullptr; - init_var.version_ = 0; - var = &init_var; - } else { - var = &var_holder.rbegin()->second; - } - return var; - } - void RunOp( bool use_event, std::unordered_map> &pending_vars, @@ -113,17 +171,6 @@ class ParallelExecutorPrivate { op_run(); } } - - void GenerateVar(OpHandleBase *op_handle, const std::string &each_var_name, - const platform::Place &place, size_t place_offset) { - auto &vars = vars_[place_offset][each_var_name]; - size_t version = vars.size(); - auto &var = vars[version]; - var.version_ = version; - var.name_ = each_var_name; - var.place_ = place; - op_handle->AddOutput(&var); - } }; ParallelExecutor::ParallelExecutor( @@ -189,21 +236,22 @@ void ParallelExecutor::ConstructDependencyGraph( auto &p = member_->places_[i]; auto *s = member_->local_scopes_[i]; - member_->ops_.emplace_back(new ComputationOpHandle(*op, s, p)); - auto *op_handle = member_->ops_.back().get(); + member_->graph_.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); + auto *op_handle = member_->graph_.ops_.back().get(); op_handle->dev_ctx_[p] = const_cast( platform::DeviceContextPool::Instance().Get(p)); auto var_names = op->InputArgumentNames(); for (auto &each_var_name : var_names) { - VarHandle *var = member_->GetVarHandle(each_var_name, p, i); + VarHandle *var = + CreateOrGetLatestVarHandle(&member_->graph_, each_var_name, p, i); op_handle->AddInput(var); } var_names = op->OutputArgumentNames(); for (auto &each_var_name : var_names) { - member_->GenerateVar(op_handle, each_var_name, p, i); + CreateOpOutput(&member_->graph_, op_handle, each_var_name, p, i); } if (is_forwarding) { @@ -212,7 +260,7 @@ void ParallelExecutor::ConstructDependencyGraph( op_handle = new ScaleLossGradOpHandle(this->member_->local_scopes_.size(), s, p, member_->nccl_ctxs_->DevCtx(p)); - member_->ops_.emplace_back(op_handle); + member_->graph_.ops_.emplace_back(op_handle); // FIXME: Currently ScaleLossGradOp only use device_count as scale // factor. So it does not depend on any other operators. @@ -220,7 +268,8 @@ void ParallelExecutor::ConstructDependencyGraph( // loss->pending_ops_.emplace_back(op_handle); // op_handle->inputs_.emplace_back(loss); - member_->GenerateVar(op_handle, loss_var_name + "@GRAD", p, i); + CreateOpOutput(&member_->graph_, op_handle, loss_var_name + "@GRAD", + p, i); change_forward = true; } } @@ -235,13 +284,13 @@ void ParallelExecutor::ConstructDependencyGraph( for (auto &og : var_names) { if (grads.count(og) != 0) { // is param grad // Insert NCCL AllReduce Op - member_->ops_.emplace_back(new NCCLAllReduceOpHandle( + member_->graph_.ops_.emplace_back(new NCCLAllReduceOpHandle( member_->local_scopes_, member_->places_, *member_->nccl_ctxs_)); - auto *op_handle = member_->ops_.back().get(); + auto *op_handle = member_->graph_.ops_.back().get(); for (size_t i = 0; i < member_->places_.size(); ++i) { auto &p = member_->places_[i]; - auto &vars = member_->vars_[i][og]; + auto &vars = member_->graph_.vars_[i][og]; if (vars.empty()) { // This device has no data. continue. continue; @@ -265,49 +314,7 @@ void ParallelExecutor::ConstructDependencyGraph( Dependency graph has been constructed. However, there are still data harzaeds need to be handled. */ - PolishGraphToSupportDataHazards(); -} - -/** - * We only handle write after read(WAR), since it should not have a write - * after write in program. If there are write after write operators, we need - * prune them. - * - * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) - */ -void ParallelExecutor::PolishGraphToSupportDataHazards() const { - for (auto &var_map : member_->vars_) { - for (auto &name_pair : var_map) { - if (name_pair.second.size() <= 1) { - return; - } - auto it_new = name_pair.second.rbegin(); - auto it_old = name_pair.second.rbegin(); - ++it_old; - for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { - auto *write_op = it_new->second.generated_op_; - auto &read_ops = it_old->second.pending_ops_; - auto *ex_write_op = it_old->second.generated_op_; - - if (ex_write_op == nullptr) { // Nobody write this var. - continue; - } - - for (auto *read_op : read_ops) { - // Manually add a dependency var from read_op to write_op; - if (read_op == write_op) { - // Read Write is the same op. - continue; - } - - auto *dep_var = new DummyVarHandle(); - read_op->AddOutput(dep_var); - write_op->AddInput(dep_var); - member_->dep_vars_.emplace(dep_var); - } - } - } - } + PolishGraphToSupportDataHazards(&member_->graph_); } void ParallelExecutor::BCastParamsToGPUs( @@ -365,7 +372,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, std::unordered_map pending_ops; std::vector dummy_vars; - for (auto &var_map : member_->vars_) { + for (auto &var_map : member_->graph_.vars_) { for (auto &name_pair : var_map) { for (auto &version_pair : name_pair.second) { pending_vars[&version_pair.second] = @@ -374,13 +381,13 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } - for (auto &var : member_->dep_vars_) { + for (auto &var : member_->graph_.dep_vars_) { pending_vars[var.get()] = var->generated_op_ == nullptr; } std::vector to_run; - for (auto &op : member_->ops_) { + for (auto &op : member_->graph_.ops_) { if (op->inputs_.empty()) { // Special case, Op has no input. to_run.emplace_back(op.get()); } else { @@ -391,7 +398,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, std::unordered_map> fetched_vars; for (auto &fetch_var_name : fetch_tensors) { - for (auto &var_map : member_->vars_) { + for (auto &var_map : member_->graph_.vars_) { auto it = var_map.find(fetch_var_name); if (it != var_map.end()) { fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 466b5f5f62..8c91c45d14 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -52,8 +52,6 @@ class ParallelExecutor { const std::string& loss_var_name) const; void BuildNCCLCommunicator() const; - - void PolishGraphToSupportDataHazards() const; }; } // namespace framework From 79989c902530fcaf525161b8d1b3eaee9d634291 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 21 Mar 2018 20:17:11 +0800 Subject: [PATCH 117/180] Add SSA builder --- paddle/fluid/framework/parallel_executor.cc | 369 +++++++++++--------- paddle/fluid/framework/parallel_executor.h | 4 - 2 files changed, 199 insertions(+), 174 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 37bfdc0df5..b2be3d1305 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -43,79 +43,211 @@ struct SSAGraph { std::vector> ops_; }; -/** - * We only handle write after read(WAR), since it should not have a write - * after write in program. If there are write after write operators, we need - * prune them. - * - * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) - */ -static void PolishGraphToSupportDataHazards(SSAGraph *graph) { - for (auto &var_map : graph->vars_) { - for (auto &name_pair : var_map) { - if (name_pair.second.size() <= 1) { - return; - } - auto it_new = name_pair.second.rbegin(); - auto it_old = name_pair.second.rbegin(); - ++it_old; - for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { - auto *write_op = it_new->second.generated_op_; - auto &read_ops = it_old->second.pending_ops_; - auto *ex_write_op = it_old->second.generated_op_; - - if (ex_write_op == nullptr) { // Nobody write this var. - continue; +class SSAGraphBuilder { + public: + virtual ~SSAGraphBuilder() {} + virtual void Build(const ProgramDesc &program, SSAGraph *graph) const = 0; + + protected: + /** + * We only handle write after read(WAR), since it should not have a write + * after write in program. If there are write after write operators, we need + * prune them. + * + * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) + */ + static void PolishGraphToSupportDataHazards(SSAGraph *graph) { + for (auto &var_map : graph->vars_) { + for (auto &name_pair : var_map) { + if (name_pair.second.size() <= 1) { + return; } - - for (auto *read_op : read_ops) { - // Manually add a dependency var from read_op to write_op; - if (read_op == write_op) { - // Read Write is the same op. + auto it_new = name_pair.second.rbegin(); + auto it_old = name_pair.second.rbegin(); + ++it_old; + for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { + auto *write_op = it_new->second.generated_op_; + auto &read_ops = it_old->second.pending_ops_; + auto *ex_write_op = it_old->second.generated_op_; + + if (ex_write_op == nullptr) { // Nobody write this var. continue; } - auto *dep_var = new DummyVarHandle(); - read_op->AddOutput(dep_var); - write_op->AddInput(dep_var); - graph->dep_vars_.emplace(dep_var); + for (auto *read_op : read_ops) { + // Manually add a dependency var from read_op to write_op; + if (read_op == write_op) { + // Read Write is the same op. + continue; + } + + auto *dep_var = new DummyVarHandle(); + read_op->AddOutput(dep_var); + write_op->AddInput(dep_var); + graph->dep_vars_.emplace(dep_var); + } } } } } -} -static VarHandle *CreateOrGetLatestVarHandle(SSAGraph *graph, - const std::string &each_var_name, - const platform::Place &place, - size_t place_offset) { - auto &var_holders = graph->vars_[place_offset]; - auto &var_holder = var_holders[each_var_name]; - VarHandle *var = nullptr; - if (var_holder.empty()) { - auto &init_var = var_holder[0]; - init_var.place_ = place; - init_var.name_ = each_var_name; - init_var.generated_op_ = nullptr; - init_var.version_ = 0; - var = &init_var; - } else { - var = &var_holder.rbegin()->second; + static VarHandle *CreateOrGetLatestVarHandle(SSAGraph *graph, + const std::string &each_var_name, + const platform::Place &place, + size_t place_offset) { + auto &var_holders = graph->vars_[place_offset]; + auto &var_holder = var_holders[each_var_name]; + VarHandle *var = nullptr; + if (var_holder.empty()) { + auto &init_var = var_holder[0]; + init_var.place_ = place; + init_var.name_ = each_var_name; + init_var.generated_op_ = nullptr; + init_var.version_ = 0; + var = &init_var; + } else { + var = &var_holder.rbegin()->second; + } + return var; } - return var; -} -static void CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, - const std::string &each_var_name, - const platform::Place &place, size_t place_offset) { - auto &vars = graph->vars_[place_offset][each_var_name]; - size_t version = vars.size(); - auto &var = vars[version]; - var.version_ = version; - var.name_ = each_var_name; - var.place_ = place; - op_handle->AddOutput(&var); -} + static void CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, + const std::string &each_var_name, + const platform::Place &place, + size_t place_offset) { + auto &vars = graph->vars_[place_offset][each_var_name]; + size_t version = vars.size(); + auto &var = vars[version]; + var.version_ = version; + var.name_ = each_var_name; + var.place_ = place; + op_handle->AddOutput(&var); + } +}; + +class MultiDevSSAGraphBuilder : public SSAGraphBuilder { + public: + MultiDevSSAGraphBuilder(const std::vector &places, + const std::string &loss_var_name, + const std::unordered_set ¶ms, + const std::vector &local_scopes, + platform::NCCLContextMap *nccl_ctxs) + : loss_var_name_(loss_var_name), + places_(places), + local_scopes_(local_scopes), + nccl_ctxs_(nccl_ctxs) { + for (auto &p : params) { + grad_names_.insert(GradVarName(p)); + } + } + + void Build(const ProgramDesc &program, SSAGraph *graph) const override { + SSAGraph &result = *graph; + result.vars_.resize(places_.size()); + + bool is_forwarding = true; + for (auto *op : program.Block(0).AllOps()) { + bool change_forward = false; + if (!is_forwarding) { + // FIXME(yy): Do not hard code like this + if (op->OutputArgumentNames().size() == 1 && + op->OutputArgumentNames()[0] == GradVarName(loss_var_name_)) { + continue; // Drop fill 1. for backward coeff; + } + } + + for (size_t i = 0; i < places_.size(); ++i) { + auto &p = places_[i]; + auto *s = local_scopes_[i]; + + result.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); + auto *op_handle = result.ops_.back().get(); + op_handle->dev_ctx_[p] = const_cast( + platform::DeviceContextPool::Instance().Get(p)); + + auto var_names = op->InputArgumentNames(); + + for (auto &each_var_name : var_names) { + VarHandle *var = + CreateOrGetLatestVarHandle(&result, each_var_name, p, i); + op_handle->AddInput(var); + } + var_names = op->OutputArgumentNames(); + + for (auto &each_var_name : var_names) { + CreateOpOutput(&result, op_handle, each_var_name, p, i); + } + + if (is_forwarding) { + if (var_names.size() == 1 && var_names[0] == loss_var_name_) { + // Insert ScaleCost OpHandle + op_handle = new ScaleLossGradOpHandle(local_scopes_.size(), s, p, + nccl_ctxs_->DevCtx(p)); + result.ops_.emplace_back(op_handle); + + // FIXME: Currently ScaleLossGradOp only use device_count as scale + // factor. So it does not depend on any other operators. + // VarHandle *loss = GetVarHandle(loss_var_name, place); + // loss->pending_ops_.emplace_back(op_handle); + // op_handle->inputs_.emplace_back(loss); + + CreateOpOutput(&result, op_handle, GradVarName(loss_var_name_), p, + i); + change_forward = true; + } + } + } + + if (change_forward) { + is_forwarding = false; + } + + if (!is_forwarding) { + auto var_names = op->OutputArgumentNames(); + for (auto &og : var_names) { + if (grad_names_.count(og) != 0) { // is param grad + // Insert NCCL AllReduce Op + result.ops_.emplace_back( + new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_)); + auto *op_handle = result.ops_.back().get(); + + for (size_t i = 0; i < places_.size(); ++i) { + auto &p = places_[i]; + auto &vars = result.vars_[i][og]; + + if (vars.empty()) { // This device has no data. continue. + continue; + } + auto *prev_grad = &vars[vars.size() - 1]; + op_handle->AddInput(prev_grad); + + auto &var = vars[vars.size()]; + var.place_ = p; + var.name_ = og; + var.version_ = vars.size() - 1; + + op_handle->AddOutput(&var); + } + } + } + } + } + + /* + Dependency graph has been constructed. However, there are still data + harzaeds need to be handled. + */ + PolishGraphToSupportDataHazards(&result); + } + + private: + std::string loss_var_name_; + const std::vector &places_; + const std::vector &local_scopes_; + platform::NCCLContextMap *nccl_ctxs_; + + std::unordered_set grad_names_; +}; class ParallelExecutorPrivate { public: @@ -123,9 +255,7 @@ class ParallelExecutorPrivate { const std::vector &places) : places_(places), fetch_dev_ctxs_(places), - pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) { - graph_.vars_.resize(places.size()); - } + pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) {} std::vector places_; platform::DeviceContextPool fetch_dev_ctxs_; @@ -199,7 +329,10 @@ ParallelExecutor::ParallelExecutor( // Step 2. Convert main_program to SSA form and dependency graph. Also, insert // ncclOp - ConstructDependencyGraph(params, main_program, loss_var_name); + MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, params, + member_->local_scopes_, + member_->nccl_ctxs_.get()); + builder.Build(main_program, &member_->graph_); // Step 3. Create vars in each scope; for (auto *scope : member_->local_scopes_) { @@ -213,110 +346,6 @@ ParallelExecutor::ParallelExecutor( } } -void ParallelExecutor::ConstructDependencyGraph( - const std::unordered_set ¶ms, - const ProgramDesc &main_program, const std::string &loss_var_name) const { - std::unordered_set grads; - for (auto &each_param : params) { - grads.insert(each_param + "@GRAD"); - } - - bool is_forwarding = true; - for (auto *op : main_program.Block(0).AllOps()) { - bool change_forward = false; - if (!is_forwarding) { - // FIXME(yy): Do not hard code like this - if (op->OutputArgumentNames().size() == 1 && - op->OutputArgumentNames()[0] == loss_var_name + "@GRAD") { - continue; // Drop fill 1. for backward coeff; - } - } - - for (size_t i = 0; i < member_->places_.size(); ++i) { - auto &p = member_->places_[i]; - auto *s = member_->local_scopes_[i]; - - member_->graph_.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); - auto *op_handle = member_->graph_.ops_.back().get(); - op_handle->dev_ctx_[p] = const_cast( - platform::DeviceContextPool::Instance().Get(p)); - - auto var_names = op->InputArgumentNames(); - - for (auto &each_var_name : var_names) { - VarHandle *var = - CreateOrGetLatestVarHandle(&member_->graph_, each_var_name, p, i); - op_handle->AddInput(var); - } - var_names = op->OutputArgumentNames(); - - for (auto &each_var_name : var_names) { - CreateOpOutput(&member_->graph_, op_handle, each_var_name, p, i); - } - - if (is_forwarding) { - if (var_names.size() == 1 && var_names[0] == loss_var_name) { - // Insert ScaleCost OpHandle - op_handle = - new ScaleLossGradOpHandle(this->member_->local_scopes_.size(), s, - p, member_->nccl_ctxs_->DevCtx(p)); - member_->graph_.ops_.emplace_back(op_handle); - - // FIXME: Currently ScaleLossGradOp only use device_count as scale - // factor. So it does not depend on any other operators. - // VarHandle *loss = GetVarHandle(loss_var_name, place); - // loss->pending_ops_.emplace_back(op_handle); - // op_handle->inputs_.emplace_back(loss); - - CreateOpOutput(&member_->graph_, op_handle, loss_var_name + "@GRAD", - p, i); - change_forward = true; - } - } - } - - if (change_forward) { - is_forwarding = false; - } - - if (!is_forwarding) { - auto var_names = op->OutputArgumentNames(); - for (auto &og : var_names) { - if (grads.count(og) != 0) { // is param grad - // Insert NCCL AllReduce Op - member_->graph_.ops_.emplace_back(new NCCLAllReduceOpHandle( - member_->local_scopes_, member_->places_, *member_->nccl_ctxs_)); - auto *op_handle = member_->graph_.ops_.back().get(); - - for (size_t i = 0; i < member_->places_.size(); ++i) { - auto &p = member_->places_[i]; - auto &vars = member_->graph_.vars_[i][og]; - - if (vars.empty()) { // This device has no data. continue. - continue; - } - auto *prev_grad = &vars[vars.size() - 1]; - op_handle->AddInput(prev_grad); - - auto &var = vars[vars.size()]; - var.place_ = p; - var.name_ = og; - var.version_ = vars.size() - 1; - - op_handle->AddOutput(&var); - } - } - } - } - } - - /* - Dependency graph has been constructed. However, there are still data - harzaeds need to be handled. - */ - PolishGraphToSupportDataHazards(&member_->graph_); -} - void ParallelExecutor::BCastParamsToGPUs( const ProgramDesc &startup_program) const { #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 8c91c45d14..39a1c51b9e 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -47,10 +47,6 @@ class ParallelExecutor { void BCastParamsToGPUs(const ProgramDesc& startup_program) const; - void ConstructDependencyGraph(const std::unordered_set& params, - const ProgramDesc& main_program, - const std::string& loss_var_name) const; - void BuildNCCLCommunicator() const; }; From dd73d18bb7b7cb521cab2f3547633fd6736e8c12 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 22 Mar 2018 10:49:51 +0800 Subject: [PATCH 118/180] Extract SSAGraph --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/framework/details/CMakeLists.txt | 2 ++ paddle/fluid/framework/details/ssa_graph.cc | 15 ++++++++ paddle/fluid/framework/details/ssa_graph.h | 34 +++++++++++++++++++ paddle/fluid/framework/parallel_executor.cc | 12 ++----- 5 files changed, 54 insertions(+), 11 deletions(-) create mode 100644 paddle/fluid/framework/details/ssa_graph.cc create mode 100644 paddle/fluid/framework/details/ssa_graph.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 2b90bb5abd..f1d19efa97 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -95,7 +95,7 @@ else() endif() cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle - fetch_op_handle computation_op_handle ${parallel_executor_cuda_deps}) + fetch_op_handle computation_op_handle ssa_graph ${parallel_executor_cuda_deps}) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 7565bc4c9c..9ed41ab94c 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -5,3 +5,5 @@ cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory dynload_cuda) cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry) + +cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base) diff --git a/paddle/fluid/framework/details/ssa_graph.cc b/paddle/fluid/framework/details/ssa_graph.cc new file mode 100644 index 0000000000..1b8c889449 --- /dev/null +++ b/paddle/fluid/framework/details/ssa_graph.cc @@ -0,0 +1,15 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/ssa_graph.h" diff --git a/paddle/fluid/framework/details/ssa_graph.h b/paddle/fluid/framework/details/ssa_graph.h new file mode 100644 index 0000000000..c1e041b8c0 --- /dev/null +++ b/paddle/fluid/framework/details/ssa_graph.h @@ -0,0 +1,34 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/details/var_handle.h" + +namespace paddle { +namespace framework { +namespace details { + +struct SSAGraph { + std::vector>> vars_; + std::unordered_set> dep_vars_; + std::vector> ops_; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b2be3d1305..5c10595db9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -15,15 +15,12 @@ limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" #include "ThreadPool.h" #include "lod_tensor.h" -#include "lod_tensor_array.h" #include "op_registry.h" #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" -#include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" -#include "paddle/fluid/framework/details/var_handle.h" -#include "paddle/fluid/platform/nccl_helper.h" +#include "paddle/fluid/framework/details/ssa_graph.h" namespace paddle { namespace framework { @@ -34,15 +31,10 @@ using details::FetchOpHandle; using details::NCCLAllReduceOpHandle; using details::OpHandleBase; using details::ScaleLossGradOpHandle; +using details::SSAGraph; using details::VarHandle; using details::VarHandleBase; -struct SSAGraph { - std::vector>> vars_; - std::unordered_set> dep_vars_; - std::vector> ops_; -}; - class SSAGraphBuilder { public: virtual ~SSAGraphBuilder() {} From b123e43bf99fa84b68c91e16d92a8aac5508e88e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Mar 2018 12:28:14 +0800 Subject: [PATCH 119/180] extract multi devices graph builder --- paddle/fluid/framework/CMakeLists.txt | 9 +- paddle/fluid/framework/details/CMakeLists.txt | 3 + .../details/multi_devices_graph_builder.cc | 140 ++++++++++ .../details/multi_devices_graph_builder.h | 46 ++++ .../framework/details/ssa_graph_builder.cc | 88 ++++++ .../framework/details/ssa_graph_builder.h | 56 ++++ paddle/fluid/framework/parallel_executor.cc | 254 ++---------------- 7 files changed, 354 insertions(+), 242 deletions(-) create mode 100644 paddle/fluid/framework/details/multi_devices_graph_builder.cc create mode 100644 paddle/fluid/framework/details/multi_devices_graph_builder.h create mode 100644 paddle/fluid/framework/details/ssa_graph_builder.cc create mode 100644 paddle/fluid/framework/details/ssa_graph_builder.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index f1d19efa97..d3f69ee9d8 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -88,14 +88,9 @@ cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glo cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) -if(WITH_GPU) - set(parallel_executor_cuda_deps nccl_all_reduce_op_handle) -else() - set(parallel_executor_cuda_deps) -endif() + cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - backward glog lod_rank_table simple_threadpool scale_loss_grad_op_handle - fetch_op_handle computation_op_handle ssa_graph ${parallel_executor_cuda_deps}) + backward glog lod_rank_table simple_threadpool multi_devices_graph_builder fetch_op_handle) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 9ed41ab94c..4432bc0245 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -7,3 +7,6 @@ nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_h cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry) cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base) +cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph) +cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle + nccl_all_reduce_op_handle scale_loss_grad_op_handle) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc new file mode 100644 index 0000000000..3fab6adf0f --- /dev/null +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -0,0 +1,140 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" +#include "paddle/fluid/framework/details/computation_op_handle.h" +#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" +#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/nccl_helper.h" + +namespace paddle { +namespace framework { +namespace details { +MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( + const std::vector &places, + const std::string &loss_var_name, + const std::unordered_set ¶ms, + const std::vector &local_scopes, + platform::NCCLContextMap *nccl_ctxs) + : loss_var_name_(loss_var_name), + places_(places), + local_scopes_(local_scopes), + nccl_ctxs_(nccl_ctxs) { + for (auto &p : params) { + grad_names_.insert(GradVarName(p)); + } +} + +void MultiDevSSAGraphBuilder::Build(const ProgramDesc &program, + SSAGraph *graph) const { + SSAGraph &result = *graph; + result.vars_.resize(places_.size()); + + bool is_forwarding = true; + for (auto *op : program.Block(0).AllOps()) { + bool change_forward = false; + if (!is_forwarding) { + // FIXME(yy): Do not hard code like this + if (op->OutputArgumentNames().size() == 1 && + op->OutputArgumentNames()[0] == GradVarName(loss_var_name_)) { + continue; // Drop fill 1. for backward coeff; + } + } + + for (size_t i = 0; i < places_.size(); ++i) { + auto &p = places_[i]; + auto *s = local_scopes_[i]; + + result.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); + auto *op_handle = result.ops_.back().get(); + op_handle->dev_ctx_[p] = const_cast( + platform::DeviceContextPool::Instance().Get(p)); + + auto var_names = op->InputArgumentNames(); + + for (auto &each_var_name : var_names) { + VarHandle *var = + CreateOrGetLatestVarHandle(&result, each_var_name, p, i); + op_handle->AddInput(var); + } + var_names = op->OutputArgumentNames(); + + for (auto &each_var_name : var_names) { + CreateOpOutput(&result, op_handle, each_var_name, p, i); + } + + if (is_forwarding) { + if (var_names.size() == 1 && var_names[0] == loss_var_name_) { + // Insert ScaleCost OpHandle + op_handle = new ScaleLossGradOpHandle(local_scopes_.size(), s, p, + nccl_ctxs_->DevCtx(p)); + result.ops_.emplace_back(op_handle); + + // FIXME: Currently ScaleLossGradOp only use device_count as scale + // factor. So it does not depend on any other operators. + // VarHandle *loss = GetVarHandle(loss_var_name, place); + // loss->pending_ops_.emplace_back(op_handle); + // op_handle->inputs_.emplace_back(loss); + + CreateOpOutput(&result, op_handle, GradVarName(loss_var_name_), p, i); + change_forward = true; + } + } + } + + if (change_forward) { + is_forwarding = false; + } + + if (!is_forwarding) { + auto var_names = op->OutputArgumentNames(); + for (auto &og : var_names) { + if (grad_names_.count(og) != 0) { // is param grad + // Insert NCCL AllReduce Op + result.ops_.emplace_back( + new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_)); + auto *op_handle = result.ops_.back().get(); + + for (size_t i = 0; i < places_.size(); ++i) { + auto &p = places_[i]; + auto &vars = result.vars_[i][og]; + + if (vars.empty()) { // This device has no data. continue. + continue; + } + auto *prev_grad = &vars[vars.size() - 1]; + op_handle->AddInput(prev_grad); + + auto &var = vars[vars.size()]; + var.place_ = p; + var.name_ = og; + var.version_ = vars.size() - 1; + + op_handle->AddOutput(&var); + } + } + } + } + } + + /* + Dependency graph has been constructed. However, there are still data + harzaeds need to be handled. + */ + PolishGraphToSupportDataHazards(&result); +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h new file mode 100644 index 0000000000..510f85bc87 --- /dev/null +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -0,0 +1,46 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/ssa_graph_builder.h" + +namespace paddle { +namespace platform { +class NCCLContextMap; +} + +namespace framework { +class Scope; +namespace details { +class MultiDevSSAGraphBuilder : public SSAGraphBuilder { + public: + MultiDevSSAGraphBuilder(const std::vector &places, + const std::string &loss_var_name, + const std::unordered_set ¶ms, + const std::vector &local_scopes, + platform::NCCLContextMap *nccl_ctxs); + + void Build(const ProgramDesc &program, SSAGraph *graph) const override; + + private: + std::string loss_var_name_; + const std::vector &places_; + const std::vector &local_scopes_; + platform::NCCLContextMap *nccl_ctxs_; + std::unordered_set grad_names_; +}; +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/ssa_graph_builder.cc b/paddle/fluid/framework/details/ssa_graph_builder.cc new file mode 100644 index 0000000000..7a80a4b1e7 --- /dev/null +++ b/paddle/fluid/framework/details/ssa_graph_builder.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/ssa_graph_builder.h" + +namespace paddle { +namespace framework { +namespace details { +void SSAGraphBuilder::PolishGraphToSupportDataHazards(SSAGraph *graph) { + for (auto &var_map : graph->vars_) { + for (auto &name_pair : var_map) { + if (name_pair.second.size() <= 1) { + return; + } + auto it_new = name_pair.second.rbegin(); + auto it_old = name_pair.second.rbegin(); + ++it_old; + for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { + auto *write_op = it_new->second.generated_op_; + auto &read_ops = it_old->second.pending_ops_; + auto *ex_write_op = it_old->second.generated_op_; + + if (ex_write_op == nullptr) { // Nobody write this var. + continue; + } + + for (auto *read_op : read_ops) { + // Manually add a dependency var from read_op to write_op; + if (read_op == write_op) { + // Read Write is the same op. + continue; + } + + auto *dep_var = new DummyVarHandle(); + read_op->AddOutput(dep_var); + write_op->AddInput(dep_var); + graph->dep_vars_.emplace(dep_var); + } + } + } + } +} + +VarHandle *SSAGraphBuilder::CreateOrGetLatestVarHandle( + SSAGraph *graph, const std::string &each_var_name, + const platform::Place &place, size_t place_offset) { + auto &var_holders = graph->vars_[place_offset]; + auto &var_holder = var_holders[each_var_name]; + VarHandle *var = nullptr; + if (var_holder.empty()) { + auto &init_var = var_holder[0]; + init_var.place_ = place; + init_var.name_ = each_var_name; + init_var.generated_op_ = nullptr; + init_var.version_ = 0; + var = &init_var; + } else { + var = &var_holder.rbegin()->second; + } + return var; +} + +void SSAGraphBuilder::CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, + const std::string &each_var_name, + const platform::Place &place, + size_t place_offset) { + auto &vars = graph->vars_[place_offset][each_var_name]; + size_t version = vars.size(); + auto &var = vars[version]; + var.version_ = version; + var.name_ = each_var_name; + var.place_ = place; + op_handle->AddOutput(&var); +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/ssa_graph_builder.h b/paddle/fluid/framework/details/ssa_graph_builder.h new file mode 100644 index 0000000000..848b90293a --- /dev/null +++ b/paddle/fluid/framework/details/ssa_graph_builder.h @@ -0,0 +1,56 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/framework/details/ssa_graph.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/platform/place.h" + +#include + +namespace paddle { +namespace framework { +namespace details { + +class SSAGraphBuilder { + public: + SSAGraphBuilder() {} + virtual ~SSAGraphBuilder() {} + virtual void Build(const ProgramDesc &program, SSAGraph *graph) const = 0; + + DISABLE_COPY_AND_ASSIGN(SSAGraphBuilder); + + protected: + /** + * We only handle write after read(WAR), since it should not have a write + * after write in program. If there are write after write operators, we need + * prune them. + * + * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) + */ + static void PolishGraphToSupportDataHazards(SSAGraph *graph); + + static VarHandle *CreateOrGetLatestVarHandle(SSAGraph *graph, + const std::string &each_var_name, + const platform::Place &place, + size_t place_offset); + + static void CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, + const std::string &each_var_name, + const platform::Place &place, size_t place_offset); +}; +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 5c10595db9..4ebb89181c 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -16,231 +16,14 @@ limitations under the License. */ #include "ThreadPool.h" #include "lod_tensor.h" #include "op_registry.h" -#include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" -#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" -#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" +#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/details/ssa_graph.h" +#include "paddle/fluid/platform/nccl_helper.h" namespace paddle { namespace framework { -using details::ComputationOpHandle; -using details::DummyVarHandle; -using details::FetchOpHandle; -using details::NCCLAllReduceOpHandle; -using details::OpHandleBase; -using details::ScaleLossGradOpHandle; -using details::SSAGraph; -using details::VarHandle; -using details::VarHandleBase; - -class SSAGraphBuilder { - public: - virtual ~SSAGraphBuilder() {} - virtual void Build(const ProgramDesc &program, SSAGraph *graph) const = 0; - - protected: - /** - * We only handle write after read(WAR), since it should not have a write - * after write in program. If there are write after write operators, we need - * prune them. - * - * https://en.wikipedia.org/wiki/Hazard_(computer_architecture)#Write_after_read_(WAR) - */ - static void PolishGraphToSupportDataHazards(SSAGraph *graph) { - for (auto &var_map : graph->vars_) { - for (auto &name_pair : var_map) { - if (name_pair.second.size() <= 1) { - return; - } - auto it_new = name_pair.second.rbegin(); - auto it_old = name_pair.second.rbegin(); - ++it_old; - for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { - auto *write_op = it_new->second.generated_op_; - auto &read_ops = it_old->second.pending_ops_; - auto *ex_write_op = it_old->second.generated_op_; - - if (ex_write_op == nullptr) { // Nobody write this var. - continue; - } - - for (auto *read_op : read_ops) { - // Manually add a dependency var from read_op to write_op; - if (read_op == write_op) { - // Read Write is the same op. - continue; - } - - auto *dep_var = new DummyVarHandle(); - read_op->AddOutput(dep_var); - write_op->AddInput(dep_var); - graph->dep_vars_.emplace(dep_var); - } - } - } - } - } - - static VarHandle *CreateOrGetLatestVarHandle(SSAGraph *graph, - const std::string &each_var_name, - const platform::Place &place, - size_t place_offset) { - auto &var_holders = graph->vars_[place_offset]; - auto &var_holder = var_holders[each_var_name]; - VarHandle *var = nullptr; - if (var_holder.empty()) { - auto &init_var = var_holder[0]; - init_var.place_ = place; - init_var.name_ = each_var_name; - init_var.generated_op_ = nullptr; - init_var.version_ = 0; - var = &init_var; - } else { - var = &var_holder.rbegin()->second; - } - return var; - } - - static void CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, - const std::string &each_var_name, - const platform::Place &place, - size_t place_offset) { - auto &vars = graph->vars_[place_offset][each_var_name]; - size_t version = vars.size(); - auto &var = vars[version]; - var.version_ = version; - var.name_ = each_var_name; - var.place_ = place; - op_handle->AddOutput(&var); - } -}; - -class MultiDevSSAGraphBuilder : public SSAGraphBuilder { - public: - MultiDevSSAGraphBuilder(const std::vector &places, - const std::string &loss_var_name, - const std::unordered_set ¶ms, - const std::vector &local_scopes, - platform::NCCLContextMap *nccl_ctxs) - : loss_var_name_(loss_var_name), - places_(places), - local_scopes_(local_scopes), - nccl_ctxs_(nccl_ctxs) { - for (auto &p : params) { - grad_names_.insert(GradVarName(p)); - } - } - - void Build(const ProgramDesc &program, SSAGraph *graph) const override { - SSAGraph &result = *graph; - result.vars_.resize(places_.size()); - - bool is_forwarding = true; - for (auto *op : program.Block(0).AllOps()) { - bool change_forward = false; - if (!is_forwarding) { - // FIXME(yy): Do not hard code like this - if (op->OutputArgumentNames().size() == 1 && - op->OutputArgumentNames()[0] == GradVarName(loss_var_name_)) { - continue; // Drop fill 1. for backward coeff; - } - } - - for (size_t i = 0; i < places_.size(); ++i) { - auto &p = places_[i]; - auto *s = local_scopes_[i]; - - result.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); - auto *op_handle = result.ops_.back().get(); - op_handle->dev_ctx_[p] = const_cast( - platform::DeviceContextPool::Instance().Get(p)); - - auto var_names = op->InputArgumentNames(); - - for (auto &each_var_name : var_names) { - VarHandle *var = - CreateOrGetLatestVarHandle(&result, each_var_name, p, i); - op_handle->AddInput(var); - } - var_names = op->OutputArgumentNames(); - - for (auto &each_var_name : var_names) { - CreateOpOutput(&result, op_handle, each_var_name, p, i); - } - - if (is_forwarding) { - if (var_names.size() == 1 && var_names[0] == loss_var_name_) { - // Insert ScaleCost OpHandle - op_handle = new ScaleLossGradOpHandle(local_scopes_.size(), s, p, - nccl_ctxs_->DevCtx(p)); - result.ops_.emplace_back(op_handle); - - // FIXME: Currently ScaleLossGradOp only use device_count as scale - // factor. So it does not depend on any other operators. - // VarHandle *loss = GetVarHandle(loss_var_name, place); - // loss->pending_ops_.emplace_back(op_handle); - // op_handle->inputs_.emplace_back(loss); - - CreateOpOutput(&result, op_handle, GradVarName(loss_var_name_), p, - i); - change_forward = true; - } - } - } - - if (change_forward) { - is_forwarding = false; - } - - if (!is_forwarding) { - auto var_names = op->OutputArgumentNames(); - for (auto &og : var_names) { - if (grad_names_.count(og) != 0) { // is param grad - // Insert NCCL AllReduce Op - result.ops_.emplace_back( - new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_)); - auto *op_handle = result.ops_.back().get(); - - for (size_t i = 0; i < places_.size(); ++i) { - auto &p = places_[i]; - auto &vars = result.vars_[i][og]; - - if (vars.empty()) { // This device has no data. continue. - continue; - } - auto *prev_grad = &vars[vars.size() - 1]; - op_handle->AddInput(prev_grad); - - auto &var = vars[vars.size()]; - var.place_ = p; - var.name_ = og; - var.version_ = vars.size() - 1; - - op_handle->AddOutput(&var); - } - } - } - } - } - - /* - Dependency graph has been constructed. However, there are still data - harzaeds need to be handled. - */ - PolishGraphToSupportDataHazards(&result); - } - - private: - std::string loss_var_name_; - const std::vector &places_; - const std::vector &local_scopes_; - platform::NCCLContextMap *nccl_ctxs_; - - std::unordered_set grad_names_; -}; - class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(size_t num_threads, @@ -256,17 +39,17 @@ class ParallelExecutorPrivate { std::unique_ptr nccl_ctxs_; - SSAGraph graph_; + details::SSAGraph graph_; // Use a simpler thread pool, might be faster. std::unique_ptr pool_; std::unique_ptr exception_; - void RunOp( - bool use_event, - std::unordered_map> &pending_vars, - OpHandleBase *op) { + void RunOp(bool use_event, + std::unordered_map> + &pending_vars, + details::OpHandleBase *op) { std::vector *> *ready_buffer = new std::vector *>(); for (auto *var : op->outputs_) { @@ -321,9 +104,9 @@ ParallelExecutor::ParallelExecutor( // Step 2. Convert main_program to SSA form and dependency graph. Also, insert // ncclOp - MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, params, - member_->local_scopes_, - member_->nccl_ctxs_.get()); + details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, + params, member_->local_scopes_, + member_->nccl_ctxs_.get()); builder.Build(main_program, &member_->graph_); // Step 3. Create vars in each scope; @@ -389,9 +172,9 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, FeedFetchList fetched_data(fetch_tensors.size()); // Version --> VarHandle member_->exception_.reset(); - std::unordered_map> pending_vars; - std::unordered_map pending_ops; - std::vector dummy_vars; + std::unordered_map> pending_vars; + std::unordered_map pending_ops; + std::vector dummy_vars; for (auto &var_map : member_->graph_.vars_) { for (auto &name_pair : var_map) { @@ -406,7 +189,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, pending_vars[var.get()] = var->generated_op_ == nullptr; } - std::vector to_run; + std::vector to_run; for (auto &op : member_->graph_.ops_) { if (op->inputs_.empty()) { // Special case, Op has no input. @@ -416,7 +199,8 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } - std::unordered_map> fetched_vars; + std::unordered_map> + fetched_vars; for (auto &fetch_var_name : fetch_tensors) { for (auto &var_map : member_->graph_.vars_) { @@ -427,13 +211,13 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } } - std::vector fetch_ops; + std::vector fetch_ops; for (size_t i = 0; i < fetch_tensors.size(); ++i) { auto &var_name = fetch_tensors[i]; auto &vars = fetched_vars[var_name]; fetch_ops.emplace_back(&fetched_data, i, &member_->local_scopes_); - FetchOpHandle *op = &fetch_ops.back(); + details::FetchOpHandle *op = &fetch_ops.back(); // FIXME: Use new device context for (auto &p : member_->places_) { @@ -457,7 +241,7 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } while (!pending_vars.empty()) { - VarHandleBase *ready_var = nullptr; + details::VarHandleBase *ready_var = nullptr; for (auto &pair : pending_vars) { if (pair.second.load(std::memory_order_acquire)) { ready_var = pair.first; From 4c3361cda826f9ca2e5c96637b1481211f2bba63 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Mar 2018 13:39:57 +0800 Subject: [PATCH 120/180] Extract GraphExecutor --- paddle/fluid/framework/parallel_executor.cc | 323 ++++++++++++-------- 1 file changed, 194 insertions(+), 129 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 4ebb89181c..78ef66be51 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -24,42 +24,184 @@ limitations under the License. */ namespace paddle { namespace framework { -class ParallelExecutorPrivate { +using details::DummyVarHandle; +using details::FetchOpHandle; +using details::OpHandleBase; +using details::SSAGraph; +using details::VarHandleBase; + +class SSAGraphExecutor { + DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); + public: - explicit ParallelExecutorPrivate(size_t num_threads, - const std::vector &places) - : places_(places), - fetch_dev_ctxs_(places), - pool_(num_threads <= 1 ? nullptr : new ThreadPool(num_threads)) {} + explicit SSAGraphExecutor(SSAGraph *graph) : graph_(*graph) {} - std::vector places_; - platform::DeviceContextPool fetch_dev_ctxs_; - std::vector local_scopes_; - Scope *global_scope_; + virtual ~SSAGraphExecutor() {} - std::unique_ptr nccl_ctxs_; + virtual void Run(Scope *global_scope, + const std::vector &fetch_tensors, + const std::string &fetch_list_name) = 0; - details::SSAGraph graph_; + protected: + SSAGraph &graph_; +}; - // Use a simpler thread pool, might be faster. - std::unique_ptr pool_; +class ThreadedSSAGraphExecutor : public SSAGraphExecutor { + public: + ThreadedSSAGraphExecutor(size_t num_threads, bool use_event, + const std::vector &local_scopes, + const std::vector &places, + SSAGraph *graph) + : SSAGraphExecutor(graph), + pool_(num_threads >= 2 ? new ::ThreadPool(num_threads) : nullptr), + local_scopes_(local_scopes), + places_(places), + fetch_ctxs_(places), + use_event_(use_event) {} + + void Run(Scope *global_scope, const std::vector &fetch_tensors, + const std::string &fetch_list_name) override { + std::unordered_map pending_ops; + std::unordered_map> pending_vars; + std::unordered_set ready_ops; + + auto InsertPendingVar = [&pending_vars](VarHandleBase &var) { + pending_vars[&var] = var.generated_op_ == nullptr; + }; - std::unique_ptr exception_; + auto InsertPendingOp = [&pending_ops](OpHandleBase &op_instance) { + pending_ops.insert({&op_instance, op_instance.inputs_.size()}); + }; + + // Transform SSAGraph to pending_ops & pending_vars + for (auto &var_map : graph_.vars_) { + for (auto &name_pair : var_map) { + for (auto &version_pair : name_pair.second) { + InsertPendingVar(version_pair.second); + } + } + } + for (auto &var : graph_.dep_vars_) { + InsertPendingVar(*var); + } + + for (auto &op : graph_.ops_) { + if (op->inputs_.empty()) { // Special case, Op has no input. + ready_ops.insert(op.get()); + } else { + InsertPendingOp(*op); + } + } + + // Step 2. Insert FetchOps + std::vector fetch_ops; + std::vector dummy_vars; + FeedFetchList fetch_data(fetch_tensors.size()); + + std::unordered_map> fetched_vars; + + for (auto &fetch_var_name : fetch_tensors) { + for (auto &var_map : graph_.vars_) { + auto it = var_map.find(fetch_var_name); + if (it != var_map.end()) { + fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); + } + } + } - void RunOp(bool use_event, - std::unordered_map> - &pending_vars, - details::OpHandleBase *op) { + for (size_t i = 0; i < fetch_tensors.size(); ++i) { + auto &var_name = fetch_tensors[i]; + auto &vars = fetched_vars[var_name]; + fetch_ops.emplace_back(&fetch_data, i, &local_scopes_); + details::FetchOpHandle *op = &fetch_ops.back(); + + // FIXME: Use new device context + for (auto &p : places_) { + op->dev_ctx_[p] = fetch_ctxs_.Get(p); + } + + for (auto *var : vars) { + op->AddInput(var); + } + + dummy_vars.emplace_back(); + auto *var = &dummy_vars.back(); + var->generated_op_ = nullptr; + op->AddOutput(var); + InsertPendingVar(*var); + InsertPendingOp(*op); + } + + auto run_all_ready_ops = [&] { + for (auto *op : ready_ops) { + RunOp(pending_vars, op); + } + ready_ops.clear(); + }; + + // Step 3. Execution + while (!pending_vars.empty()) { + // 1. Run All Ready ops + run_all_ready_ops(); + + // 2. Find ready variable + VarHandleBase *ready_var = nullptr; + for (auto &pair : pending_vars) { + if (pair.second.load(std::memory_order_acquire)) { + ready_var = pair.first; + break; + } + } + + // if there is no variable ready + if (ready_var == nullptr) { + // FIXME use conditional var instead of busy wait. + // if there is an exception, throw it + if (exception_) { + throw * exception_; + } + // keep waiting the ready variables + continue; + } + + // 3. Remove the dependency of ready_var. + // Find the ready_ops after the ready_var. + pending_vars.erase(ready_var); + for (auto *op : ready_var->pending_ops_) { + auto &deps = pending_ops[op]; + --deps; + if (deps == 0) { + ready_ops.insert(op); + } + } + // Keep loop until all vars are ready. + } + + // Wait FetchOps. + for (auto &fetch_op : fetch_ops) { + fetch_op.WaitAndMergeCPUTensors(); + } + + *global_scope->Var(fetch_list_name)->GetMutable() = + fetch_data; + } + + ~ThreadedSSAGraphExecutor() {} + + private: + void RunOp( + std::unordered_map> &pending_vars, + details::OpHandleBase *op) { std::vector *> *ready_buffer = new std::vector *>(); for (auto *var : op->outputs_) { ready_buffer->emplace_back(&pending_vars[var]); } - auto op_run = [ready_buffer, op, this, use_event] { + auto op_run = [ready_buffer, op, this] { try { VLOG(10) << op->DebugString(); - op->Run(use_event); + op->Run(use_event_); for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); } @@ -76,6 +218,31 @@ class ParallelExecutorPrivate { op_run(); } } + + private: + std::unique_ptr<::ThreadPool> pool_; + std::vector local_scopes_; + std::vector places_; + platform::DeviceContextPool fetch_ctxs_; + const bool use_event_; + std::unique_ptr exception_; +}; + +class ParallelExecutorPrivate { + public: + explicit ParallelExecutorPrivate(const std::vector &places) + : places_(places), fetch_dev_ctxs_(places) {} + + std::vector places_; + platform::DeviceContextPool fetch_dev_ctxs_; + std::vector local_scopes_; + Scope *global_scope_; + + std::unique_ptr nccl_ctxs_; + + details::SSAGraph graph_; + + std::unique_ptr executor_; }; ParallelExecutor::ParallelExecutor( @@ -83,7 +250,7 @@ ParallelExecutor::ParallelExecutor( const std::unordered_set ¶ms, const ProgramDesc &startup_program, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope) - : member_(new ParallelExecutorPrivate(num_threads, places)) { + : member_(new ParallelExecutorPrivate(places)) { member_->global_scope_ = scope; // Step 1. RunStartupProgram and Bcast the params to devs. @@ -109,6 +276,9 @@ ParallelExecutor::ParallelExecutor( member_->nccl_ctxs_.get()); builder.Build(main_program, &member_->graph_); + member_->executor_.reset(new ThreadedSSAGraphExecutor( + num_threads, true, member_->local_scopes_, places, &member_->graph_)); + // Step 3. Create vars in each scope; for (auto *scope : member_->local_scopes_) { for (auto *var : main_program.Block(0).AllVars()) { @@ -168,113 +338,8 @@ void ParallelExecutor::BuildNCCLCommunicator() const { void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { - bool use_event = true; - FeedFetchList fetched_data(fetch_tensors.size()); - // Version --> VarHandle - member_->exception_.reset(); - std::unordered_map> pending_vars; - std::unordered_map pending_ops; - std::vector dummy_vars; - - for (auto &var_map : member_->graph_.vars_) { - for (auto &name_pair : var_map) { - for (auto &version_pair : name_pair.second) { - pending_vars[&version_pair.second] = - version_pair.second.generated_op_ == nullptr; - } - } - } - - for (auto &var : member_->graph_.dep_vars_) { - pending_vars[var.get()] = var->generated_op_ == nullptr; - } - - std::vector to_run; - - for (auto &op : member_->graph_.ops_) { - if (op->inputs_.empty()) { // Special case, Op has no input. - to_run.emplace_back(op.get()); - } else { - pending_ops.insert({op.get(), op->inputs_.size()}); - } - } - - std::unordered_map> - fetched_vars; - - for (auto &fetch_var_name : fetch_tensors) { - for (auto &var_map : member_->graph_.vars_) { - auto it = var_map.find(fetch_var_name); - if (it != var_map.end()) { - fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); - } - } - } - - std::vector fetch_ops; - - for (size_t i = 0; i < fetch_tensors.size(); ++i) { - auto &var_name = fetch_tensors[i]; - auto &vars = fetched_vars[var_name]; - fetch_ops.emplace_back(&fetched_data, i, &member_->local_scopes_); - details::FetchOpHandle *op = &fetch_ops.back(); - - // FIXME: Use new device context - for (auto &p : member_->places_) { - op->dev_ctx_[p] = member_->fetch_dev_ctxs_.Get(p); - } - - for (auto *var : vars) { - op->AddInput(var); - } - - dummy_vars.emplace_back(); - auto *var = &dummy_vars.back(); - op->AddOutput(var); - pending_vars[var] = false; - - pending_ops.insert({op, op->inputs_.size()}); - } - - for (auto *op : to_run) { - member_->RunOp(use_event, pending_vars, op); - } - - while (!pending_vars.empty()) { - details::VarHandleBase *ready_var = nullptr; - for (auto &pair : pending_vars) { - if (pair.second.load(std::memory_order_acquire)) { - ready_var = pair.first; - } - } - if (ready_var == nullptr) { - // FIXME use conditional var instead of busy wait. - if (member_->exception_) { - throw * member_->exception_; - } - continue; - } - pending_vars.erase(ready_var); - to_run.clear(); - for (auto *op : ready_var->pending_ops_) { - auto &deps = pending_ops[op]; - --deps; - if (deps == 0) { - to_run.emplace_back(op); - } - } - for (auto *op : to_run) { - pending_ops.erase(op); - member_->RunOp(use_event, pending_vars, op); - } - } - - for (auto &fetch_op : fetch_ops) { - fetch_op.WaitAndMergeCPUTensors(); - } - - *member_->global_scope_->Var(fetched_var_name)->GetMutable() = - fetched_data; + member_->executor_->Run(member_->global_scope_, fetch_tensors, + fetched_var_name); } } // namespace framework From c70b60dd70d41a349a6ed4d5aad9a60facc49c60 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Mar 2018 13:56:52 +0800 Subject: [PATCH 121/180] Make executor steal graph inside --- .../details/multi_devices_graph_builder.cc | 7 +++- .../details/multi_devices_graph_builder.h | 2 +- .../framework/details/ssa_graph_builder.h | 3 +- paddle/fluid/framework/parallel_executor.cc | 41 +++++++++---------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 3fab6adf0f..b27647a8ee 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -37,8 +37,9 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( } } -void MultiDevSSAGraphBuilder::Build(const ProgramDesc &program, - SSAGraph *graph) const { +std::unique_ptr MultiDevSSAGraphBuilder::Build( + const ProgramDesc &program) const { + auto graph = new SSAGraph(); SSAGraph &result = *graph; result.vars_.resize(places_.size()); @@ -134,6 +135,8 @@ void MultiDevSSAGraphBuilder::Build(const ProgramDesc &program, harzaeds need to be handled. */ PolishGraphToSupportDataHazards(&result); + + return std::unique_ptr(graph); } } // namespace details } // namespace framework diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index 510f85bc87..17959a94d6 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -32,7 +32,7 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { const std::vector &local_scopes, platform::NCCLContextMap *nccl_ctxs); - void Build(const ProgramDesc &program, SSAGraph *graph) const override; + std::unique_ptr Build(const ProgramDesc &program) const override; private: std::string loss_var_name_; diff --git a/paddle/fluid/framework/details/ssa_graph_builder.h b/paddle/fluid/framework/details/ssa_graph_builder.h index 848b90293a..df05bb7394 100644 --- a/paddle/fluid/framework/details/ssa_graph_builder.h +++ b/paddle/fluid/framework/details/ssa_graph_builder.h @@ -18,6 +18,7 @@ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/platform/place.h" +#include #include namespace paddle { @@ -28,7 +29,7 @@ class SSAGraphBuilder { public: SSAGraphBuilder() {} virtual ~SSAGraphBuilder() {} - virtual void Build(const ProgramDesc &program, SSAGraph *graph) const = 0; + virtual std::unique_ptr Build(const ProgramDesc &program) const = 0; DISABLE_COPY_AND_ASSIGN(SSAGraphBuilder); diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 78ef66be51..88070a06a2 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -34,16 +34,16 @@ class SSAGraphExecutor { DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); public: - explicit SSAGraphExecutor(SSAGraph *graph) : graph_(*graph) {} + // Steal graph inside + explicit SSAGraphExecutor(std::unique_ptr &&graph) + : graph_(std::move(graph)) {} virtual ~SSAGraphExecutor() {} - virtual void Run(Scope *global_scope, - const std::vector &fetch_tensors, - const std::string &fetch_list_name) = 0; + virtual FeedFetchList Run(const std::vector &fetch_tensors) = 0; protected: - SSAGraph &graph_; + std::unique_ptr graph_; }; class ThreadedSSAGraphExecutor : public SSAGraphExecutor { @@ -51,16 +51,17 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ThreadedSSAGraphExecutor(size_t num_threads, bool use_event, const std::vector &local_scopes, const std::vector &places, - SSAGraph *graph) - : SSAGraphExecutor(graph), + std::unique_ptr &&graph) + : SSAGraphExecutor(std::move(graph)), pool_(num_threads >= 2 ? new ::ThreadPool(num_threads) : nullptr), local_scopes_(local_scopes), places_(places), fetch_ctxs_(places), use_event_(use_event) {} - void Run(Scope *global_scope, const std::vector &fetch_tensors, - const std::string &fetch_list_name) override { + // Run a SSAGraph by a thread pool + // Use topological sort algorithm + FeedFetchList Run(const std::vector &fetch_tensors) override { std::unordered_map pending_ops; std::unordered_map> pending_vars; std::unordered_set ready_ops; @@ -74,18 +75,18 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { }; // Transform SSAGraph to pending_ops & pending_vars - for (auto &var_map : graph_.vars_) { + for (auto &var_map : graph_->vars_) { for (auto &name_pair : var_map) { for (auto &version_pair : name_pair.second) { InsertPendingVar(version_pair.second); } } } - for (auto &var : graph_.dep_vars_) { + for (auto &var : graph_->dep_vars_) { InsertPendingVar(*var); } - for (auto &op : graph_.ops_) { + for (auto &op : graph_->ops_) { if (op->inputs_.empty()) { // Special case, Op has no input. ready_ops.insert(op.get()); } else { @@ -101,7 +102,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { std::unordered_map> fetched_vars; for (auto &fetch_var_name : fetch_tensors) { - for (auto &var_map : graph_.vars_) { + for (auto &var_map : graph_->vars_) { auto it = var_map.find(fetch_var_name); if (it != var_map.end()) { fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); @@ -182,8 +183,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { fetch_op.WaitAndMergeCPUTensors(); } - *global_scope->Var(fetch_list_name)->GetMutable() = - fetch_data; + return fetch_data; } ~ThreadedSSAGraphExecutor() {} @@ -240,8 +240,6 @@ class ParallelExecutorPrivate { std::unique_ptr nccl_ctxs_; - details::SSAGraph graph_; - std::unique_ptr executor_; }; @@ -274,10 +272,10 @@ ParallelExecutor::ParallelExecutor( details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, params, member_->local_scopes_, member_->nccl_ctxs_.get()); - builder.Build(main_program, &member_->graph_); + auto graph = builder.Build(main_program); member_->executor_.reset(new ThreadedSSAGraphExecutor( - num_threads, true, member_->local_scopes_, places, &member_->graph_)); + num_threads, true, member_->local_scopes_, places, std::move(graph))); // Step 3. Create vars in each scope; for (auto *scope : member_->local_scopes_) { @@ -338,8 +336,9 @@ void ParallelExecutor::BuildNCCLCommunicator() const { void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { - member_->executor_->Run(member_->global_scope_, fetch_tensors, - fetched_var_name); + auto fetch_data = member_->executor_->Run(fetch_tensors); + *member_->global_scope_->Var(fetched_var_name)->GetMutable() = + fetch_data; } } // namespace framework From e3144393e3b6e0d74506f8b996c8b2931eb9641e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Mar 2018 14:15:20 +0800 Subject: [PATCH 122/180] Extract Executors to indie modules --- paddle/fluid/framework/CMakeLists.txt | 3 +- paddle/fluid/framework/details/CMakeLists.txt | 3 + .../framework/details/ssa_graph_executor.cc | 28 +++ .../framework/details/ssa_graph_executor.h | 41 ++++ .../details/threaded_ssa_graph_executor.cc | 192 +++++++++++++++ .../details/threaded_ssa_graph_executor.h | 55 +++++ paddle/fluid/framework/parallel_executor.cc | 219 +----------------- 7 files changed, 327 insertions(+), 214 deletions(-) create mode 100644 paddle/fluid/framework/details/ssa_graph_executor.cc create mode 100644 paddle/fluid/framework/details/ssa_graph_executor.h create mode 100644 paddle/fluid/framework/details/threaded_ssa_graph_executor.cc create mode 100644 paddle/fluid/framework/details/threaded_ssa_graph_executor.h diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index d3f69ee9d8..c425c71160 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -89,8 +89,7 @@ cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table feed_fetch_method) -cc_library(parallel_executor SRCS parallel_executor.cc DEPS op_registry device_context scope - backward glog lod_rank_table simple_threadpool multi_devices_graph_builder fetch_op_handle) +cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor) cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 4432bc0245..f13ac276fc 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -10,3 +10,6 @@ cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base) cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph) cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle nccl_all_reduce_op_handle scale_loss_grad_op_handle) +cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph) +cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope + simple_threadpool device_context) diff --git a/paddle/fluid/framework/details/ssa_graph_executor.cc b/paddle/fluid/framework/details/ssa_graph_executor.cc new file mode 100644 index 0000000000..8da6ca889b --- /dev/null +++ b/paddle/fluid/framework/details/ssa_graph_executor.cc @@ -0,0 +1,28 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/ssa_graph_executor.h" + +namespace paddle { +namespace framework { +namespace details { + +SSAGraphExecutor::SSAGraphExecutor(std::unique_ptr &&graph) + : graph_(std::move(graph)) {} + +SSAGraphExecutor::~SSAGraphExecutor() {} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/ssa_graph_executor.h b/paddle/fluid/framework/details/ssa_graph_executor.h new file mode 100644 index 0000000000..3b818b1a45 --- /dev/null +++ b/paddle/fluid/framework/details/ssa_graph_executor.h @@ -0,0 +1,41 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/framework/details/ssa_graph.h" +#include "paddle/fluid/framework/feed_fetch_type.h" + +namespace paddle { +namespace framework { +namespace details { + +class SSAGraphExecutor { + DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); + + public: + // Steal graph inside + explicit SSAGraphExecutor(std::unique_ptr &&graph); + + virtual ~SSAGraphExecutor(); + + virtual FeedFetchList Run(const std::vector &fetch_tensors) = 0; + + protected: + std::unique_ptr graph_; +}; +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc new file mode 100644 index 0000000000..86e880ed72 --- /dev/null +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -0,0 +1,192 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" + +#include "paddle/fluid/framework/details/fetch_op_handle.h" +#include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace framework { +namespace details { +ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( + size_t num_threads, bool use_event, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph) + : SSAGraphExecutor(std::move(graph)), + pool_(num_threads >= 2 ? new ::ThreadPool(num_threads) : nullptr), + local_scopes_(local_scopes), + places_(places), + fetch_ctxs_(places), + use_event_(use_event) {} + +FeedFetchList ThreadedSSAGraphExecutor::Run( + const std::vector &fetch_tensors) { + std::unordered_map pending_ops; + std::unordered_map> pending_vars; + std::unordered_set ready_ops; + + auto InsertPendingVar = [&pending_vars](VarHandleBase &var) { + pending_vars[&var] = var.generated_op_ == nullptr; + }; + + auto InsertPendingOp = [&pending_ops](OpHandleBase &op_instance) { + pending_ops.insert({&op_instance, op_instance.inputs_.size()}); + }; + + // Transform SSAGraph to pending_ops & pending_vars + for (auto &var_map : graph_->vars_) { + for (auto &name_pair : var_map) { + for (auto &version_pair : name_pair.second) { + InsertPendingVar(version_pair.second); + } + } + } + for (auto &var : graph_->dep_vars_) { + InsertPendingVar(*var); + } + + for (auto &op : graph_->ops_) { + if (op->inputs_.empty()) { // Special case, Op has no input. + ready_ops.insert(op.get()); + } else { + InsertPendingOp(*op); + } + } + + // Step 2. Insert FetchOps + std::vector fetch_ops; + std::vector dummy_vars; + FeedFetchList fetch_data(fetch_tensors.size()); + + std::unordered_map> fetched_vars; + + for (auto &fetch_var_name : fetch_tensors) { + for (auto &var_map : graph_->vars_) { + auto it = var_map.find(fetch_var_name); + if (it != var_map.end()) { + fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); + } + } + } + + for (size_t i = 0; i < fetch_tensors.size(); ++i) { + auto &var_name = fetch_tensors[i]; + auto &vars = fetched_vars[var_name]; + fetch_ops.emplace_back(&fetch_data, i, &local_scopes_); + details::FetchOpHandle *op = &fetch_ops.back(); + + // FIXME: Use new device context + for (auto &p : places_) { + op->dev_ctx_[p] = fetch_ctxs_.Get(p); + } + + for (auto *var : vars) { + op->AddInput(var); + } + + dummy_vars.emplace_back(); + auto *var = &dummy_vars.back(); + var->generated_op_ = nullptr; + op->AddOutput(var); + InsertPendingVar(*var); + InsertPendingOp(*op); + } + + auto run_all_ready_ops = [&] { + for (auto *op : ready_ops) { + RunOp(pending_vars, op); + } + ready_ops.clear(); + }; + + // Step 3. Execution + while (!pending_vars.empty()) { + // 1. Run All Ready ops + run_all_ready_ops(); + + // 2. Find ready variable + VarHandleBase *ready_var = nullptr; + for (auto &pair : pending_vars) { + if (pair.second.load(std::memory_order_acquire)) { + ready_var = pair.first; + break; + } + } + + // if there is no variable ready + if (ready_var == nullptr) { + // FIXME use conditional var instead of busy wait. + // if there is an exception, throw it + if (exception_) { + throw * exception_; + } + // keep waiting the ready variables + continue; + } + + // 3. Remove the dependency of ready_var. + // Find the ready_ops after the ready_var. + pending_vars.erase(ready_var); + for (auto *op : ready_var->pending_ops_) { + auto &deps = pending_ops[op]; + --deps; + if (deps == 0) { + ready_ops.insert(op); + } + } + // Keep loop until all vars are ready. + } + + // Wait FetchOps. + for (auto &fetch_op : fetch_ops) { + fetch_op.WaitAndMergeCPUTensors(); + } + + return fetch_data; +} + +void ThreadedSSAGraphExecutor::RunOp( + std::unordered_map> &pending_vars, + details::OpHandleBase *op) { + std::vector *> *ready_buffer = + new std::vector *>(); + for (auto *var : op->outputs_) { + ready_buffer->emplace_back(&pending_vars[var]); + } + + auto op_run = [ready_buffer, op, this] { + try { + VLOG(10) << op->DebugString(); + op->Run(use_event_); + for (auto *ready : *ready_buffer) { + ready->store(true, std::memory_order_release); + } + delete ready_buffer; + } catch (platform::EnforceNotMet ex) { + exception_.reset(new platform::EnforceNotMet(ex)); + } catch (...) { + LOG(FATAL) << "Unknown exception catched"; + } + }; + if (pool_) { + pool_->enqueue(op_run); + } else { + op_run(); + } +} +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h new file mode 100644 index 0000000000..5b099c18c9 --- /dev/null +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -0,0 +1,55 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "ThreadPool.h" // ThreadPool in thrird party +#include "paddle/fluid/framework/details/ssa_graph_executor.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace details { + +class ThreadedSSAGraphExecutor : public SSAGraphExecutor { + public: + ThreadedSSAGraphExecutor(size_t num_threads, bool use_event, + const std::vector &local_scopes, + const std::vector &places, + std::unique_ptr &&graph); + + // Run a SSAGraph by a thread pool + // Use topological sort algorithm + FeedFetchList Run(const std::vector &fetch_tensors) override; + + ~ThreadedSSAGraphExecutor() {} + + private: + void RunOp( + std::unordered_map> &pending_vars, + details::OpHandleBase *op); + + private: + std::unique_ptr<::ThreadPool> pool_; + std::vector local_scopes_; + std::vector places_; + platform::DeviceContextPool fetch_ctxs_; + const bool use_event_; + std::unique_ptr exception_; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 88070a06a2..78963fd568 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -13,221 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" + #include "ThreadPool.h" -#include "lod_tensor.h" -#include "op_registry.h" -#include "paddle/fluid/framework/details/fetch_op_handle.h" -#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" -#include "paddle/fluid/framework/details/ssa_graph.h" + #include "paddle/fluid/platform/nccl_helper.h" +#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" +#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" + namespace paddle { namespace framework { -using details::DummyVarHandle; -using details::FetchOpHandle; -using details::OpHandleBase; -using details::SSAGraph; -using details::VarHandleBase; - -class SSAGraphExecutor { - DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor); - - public: - // Steal graph inside - explicit SSAGraphExecutor(std::unique_ptr &&graph) - : graph_(std::move(graph)) {} - - virtual ~SSAGraphExecutor() {} - - virtual FeedFetchList Run(const std::vector &fetch_tensors) = 0; - - protected: - std::unique_ptr graph_; -}; - -class ThreadedSSAGraphExecutor : public SSAGraphExecutor { - public: - ThreadedSSAGraphExecutor(size_t num_threads, bool use_event, - const std::vector &local_scopes, - const std::vector &places, - std::unique_ptr &&graph) - : SSAGraphExecutor(std::move(graph)), - pool_(num_threads >= 2 ? new ::ThreadPool(num_threads) : nullptr), - local_scopes_(local_scopes), - places_(places), - fetch_ctxs_(places), - use_event_(use_event) {} - - // Run a SSAGraph by a thread pool - // Use topological sort algorithm - FeedFetchList Run(const std::vector &fetch_tensors) override { - std::unordered_map pending_ops; - std::unordered_map> pending_vars; - std::unordered_set ready_ops; - - auto InsertPendingVar = [&pending_vars](VarHandleBase &var) { - pending_vars[&var] = var.generated_op_ == nullptr; - }; - - auto InsertPendingOp = [&pending_ops](OpHandleBase &op_instance) { - pending_ops.insert({&op_instance, op_instance.inputs_.size()}); - }; - - // Transform SSAGraph to pending_ops & pending_vars - for (auto &var_map : graph_->vars_) { - for (auto &name_pair : var_map) { - for (auto &version_pair : name_pair.second) { - InsertPendingVar(version_pair.second); - } - } - } - for (auto &var : graph_->dep_vars_) { - InsertPendingVar(*var); - } - - for (auto &op : graph_->ops_) { - if (op->inputs_.empty()) { // Special case, Op has no input. - ready_ops.insert(op.get()); - } else { - InsertPendingOp(*op); - } - } - - // Step 2. Insert FetchOps - std::vector fetch_ops; - std::vector dummy_vars; - FeedFetchList fetch_data(fetch_tensors.size()); - - std::unordered_map> fetched_vars; - - for (auto &fetch_var_name : fetch_tensors) { - for (auto &var_map : graph_->vars_) { - auto it = var_map.find(fetch_var_name); - if (it != var_map.end()) { - fetched_vars[fetch_var_name].push_back(&it->second.rbegin()->second); - } - } - } - - for (size_t i = 0; i < fetch_tensors.size(); ++i) { - auto &var_name = fetch_tensors[i]; - auto &vars = fetched_vars[var_name]; - fetch_ops.emplace_back(&fetch_data, i, &local_scopes_); - details::FetchOpHandle *op = &fetch_ops.back(); - - // FIXME: Use new device context - for (auto &p : places_) { - op->dev_ctx_[p] = fetch_ctxs_.Get(p); - } - - for (auto *var : vars) { - op->AddInput(var); - } - - dummy_vars.emplace_back(); - auto *var = &dummy_vars.back(); - var->generated_op_ = nullptr; - op->AddOutput(var); - InsertPendingVar(*var); - InsertPendingOp(*op); - } - - auto run_all_ready_ops = [&] { - for (auto *op : ready_ops) { - RunOp(pending_vars, op); - } - ready_ops.clear(); - }; - - // Step 3. Execution - while (!pending_vars.empty()) { - // 1. Run All Ready ops - run_all_ready_ops(); - - // 2. Find ready variable - VarHandleBase *ready_var = nullptr; - for (auto &pair : pending_vars) { - if (pair.second.load(std::memory_order_acquire)) { - ready_var = pair.first; - break; - } - } - - // if there is no variable ready - if (ready_var == nullptr) { - // FIXME use conditional var instead of busy wait. - // if there is an exception, throw it - if (exception_) { - throw * exception_; - } - // keep waiting the ready variables - continue; - } - - // 3. Remove the dependency of ready_var. - // Find the ready_ops after the ready_var. - pending_vars.erase(ready_var); - for (auto *op : ready_var->pending_ops_) { - auto &deps = pending_ops[op]; - --deps; - if (deps == 0) { - ready_ops.insert(op); - } - } - // Keep loop until all vars are ready. - } - - // Wait FetchOps. - for (auto &fetch_op : fetch_ops) { - fetch_op.WaitAndMergeCPUTensors(); - } - - return fetch_data; - } - - ~ThreadedSSAGraphExecutor() {} - - private: - void RunOp( - std::unordered_map> &pending_vars, - details::OpHandleBase *op) { - std::vector *> *ready_buffer = - new std::vector *>(); - for (auto *var : op->outputs_) { - ready_buffer->emplace_back(&pending_vars[var]); - } - - auto op_run = [ready_buffer, op, this] { - try { - VLOG(10) << op->DebugString(); - op->Run(use_event_); - for (auto *ready : *ready_buffer) { - ready->store(true, std::memory_order_release); - } - delete ready_buffer; - } catch (platform::EnforceNotMet ex) { - exception_.reset(new platform::EnforceNotMet(ex)); - } catch (...) { - LOG(FATAL) << "Unknown exception catched"; - } - }; - if (pool_) { - pool_->enqueue(op_run); - } else { - op_run(); - } - } - - private: - std::unique_ptr<::ThreadPool> pool_; - std::vector local_scopes_; - std::vector places_; - platform::DeviceContextPool fetch_ctxs_; - const bool use_event_; - std::unique_ptr exception_; -}; - class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(const std::vector &places) @@ -239,8 +35,7 @@ class ParallelExecutorPrivate { Scope *global_scope_; std::unique_ptr nccl_ctxs_; - - std::unique_ptr executor_; + std::unique_ptr executor_; }; ParallelExecutor::ParallelExecutor( @@ -274,7 +69,7 @@ ParallelExecutor::ParallelExecutor( member_->nccl_ctxs_.get()); auto graph = builder.Build(main_program); - member_->executor_.reset(new ThreadedSSAGraphExecutor( + member_->executor_.reset(new details::ThreadedSSAGraphExecutor( num_threads, true, member_->local_scopes_, places, std::move(graph))); // Step 3. Create vars in each scope; From a7b0d5bd26c03cc79deb1c36e061b91fafdd9897 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Mar 2018 14:23:03 +0800 Subject: [PATCH 123/180] Clean code --- paddle/fluid/framework/parallel_executor.cc | 19 ++++++++----------- paddle/fluid/framework/parallel_executor.h | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 78963fd568..dc17f6a21f 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -27,15 +27,16 @@ namespace framework { class ParallelExecutorPrivate { public: explicit ParallelExecutorPrivate(const std::vector &places) - : places_(places), fetch_dev_ctxs_(places) {} + : places_(places) {} std::vector places_; - platform::DeviceContextPool fetch_dev_ctxs_; std::vector local_scopes_; Scope *global_scope_; + std::unique_ptr executor_; +#ifdef PADDLE_WITH_CUDA std::unique_ptr nccl_ctxs_; - std::unique_ptr executor_; +#endif }; ParallelExecutor::ParallelExecutor( @@ -54,8 +55,10 @@ ParallelExecutor::ParallelExecutor( member_->local_scopes_.push_back(&scope->NewScope()); } - // Bcast Parameters to all GPUs - BuildNCCLCommunicator(); +// Bcast Parameters to all GPUs +#ifdef PADDLE_WITH_CUDA + member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_)); +#endif if (platform::is_gpu_place(places[0]) && member_->local_scopes_.size() != 1) { // Is CUDA BCastParamsToGPUs(startup_program); @@ -123,12 +126,6 @@ void ParallelExecutor::BCastParamsToGPUs( #endif } -void ParallelExecutor::BuildNCCLCommunicator() const { -#ifdef PADDLE_WITH_CUDA - member_->nccl_ctxs_.reset(new platform::NCCLContextMap(member_->places_)); -#endif -} - void ParallelExecutor::Run(const std::vector &fetch_tensors, const std::string &fetched_var_name) { auto fetch_data = member_->executor_->Run(fetch_tensors); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 39a1c51b9e..14489a18c3 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -31,6 +31,8 @@ namespace framework { class ParallelExecutorPrivate; class ParallelExecutor { + DISABLE_COPY_AND_ASSIGN(ParallelExecutor); + public: explicit ParallelExecutor(size_t num_threads, const std::vector& places, @@ -46,8 +48,6 @@ class ParallelExecutor { ParallelExecutorPrivate* member_; void BCastParamsToGPUs(const ProgramDesc& startup_program) const; - - void BuildNCCLCommunicator() const; }; } // namespace framework From edfd741e3aac8ebaf6a6bad2204c66c67512818b Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Sat, 24 Mar 2018 15:00:43 +0800 Subject: [PATCH 124/180] Add simple python wrapper for ParallelExecutor --- paddle/fluid/framework/parallel_executor.cc | 6 +- paddle/fluid/framework/parallel_executor.h | 2 +- paddle/fluid/pybind/pybind.cc | 8 +- python/paddle/fluid/__init__.py | 2 + python/paddle/fluid/parallel_executor.py | 62 +++++++++++ .../tests/unittests/test_parallel_executor.py | 105 +++++++++++------- 6 files changed, 137 insertions(+), 48 deletions(-) create mode 100644 python/paddle/fluid/parallel_executor.py diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index dc17f6a21f..d1e1f0ed23 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -40,7 +40,8 @@ class ParallelExecutorPrivate { }; ParallelExecutor::ParallelExecutor( - size_t num_threads, const std::vector &places, + size_t num_threads, bool use_event, + const std::vector &places, const std::unordered_set ¶ms, const ProgramDesc &startup_program, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope) @@ -73,7 +74,8 @@ ParallelExecutor::ParallelExecutor( auto graph = builder.Build(main_program); member_->executor_.reset(new details::ThreadedSSAGraphExecutor( - num_threads, true, member_->local_scopes_, places, std::move(graph))); + num_threads, use_event, member_->local_scopes_, places, + std::move(graph))); // Step 3. Create vars in each scope; for (auto *scope : member_->local_scopes_) { diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 14489a18c3..8bc09c5798 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -34,7 +34,7 @@ class ParallelExecutor { DISABLE_COPY_AND_ASSIGN(ParallelExecutor); public: - explicit ParallelExecutor(size_t num_threads, + explicit ParallelExecutor(size_t num_threads, bool use_event, const std::vector& places, const std::unordered_set& params, const ProgramDesc& startup_program, diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 60662244cc..e1b1bbec97 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -499,15 +499,15 @@ All parameter, weight, gradient are variables in Paddle. py::class_(m, "ParallelExecutor") .def("__init__", - [](ParallelExecutor &self, size_t num_threads, + [](ParallelExecutor &self, size_t num_threads, bool use_event, const std::vector &places, const std::unordered_set ¶ms, const ProgramDesc &startup_program, const ProgramDesc &main_program, const std::string &loss_var_name, Scope *scope) { - new (&self) - ParallelExecutor(num_threads, places, params, startup_program, - main_program, loss_var_name, scope); + new (&self) ParallelExecutor(num_threads, use_event, places, + params, startup_program, main_program, + loss_var_name, scope); }) .def("run", &ParallelExecutor::Run); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index fcea282204..5ea4d977f4 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -41,6 +41,7 @@ from memory_optimization_transpiler import memory_optimize, release_memory import profiler import unique_name import recordio_writer +from parallel_executor import ParallelExecutor Tensor = LoDTensor @@ -68,6 +69,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + [ 'profiler', 'unique_name', 'recordio_writer', + 'ParallelExecutor', ] diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py new file mode 100644 index 0000000000..5e0588fa73 --- /dev/null +++ b/python/paddle/fluid/parallel_executor.py @@ -0,0 +1,62 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import core +import multiprocessing +import framework +import executor + +__all__ = ['ParallelExecutor'] + + +class ParallelExecutor(object): + def __init__(self, loss_name, use_cuda, num_threads=None): + places = [] + if use_cuda: + for i in xrange(core.get_cuda_device_count()): + p = core.Place() + p.set_place(core.CUDAPlace(i)) + places.append(p) + else: + for i in xrange(multiprocessing.cpu_count()): + p = core.Place() + p.set_place(core.CPUPlace()) + places.append(p) + + if num_threads is None: + num_threads = min(len(places) * 2, multiprocessing.cpu_count()) + + startup = framework.default_startup_program() + main = framework.default_main_program() + scope = executor.global_scope() + + self.executor = core.ParallelExecutor( + num_threads, + True if use_cuda else False, # use_event + places, + set([ + p.name for p in main.global_block().iter_parameters() + if not p.stop_gradient + ]), + startup.desc, + main.desc, + loss_name, + scope) + self.scope = scope + + def run(self, fetch_list): + fetch_var_name = '@FETCHED_VAR_NAME@' + self.executor.run(fetch_list, fetch_var_name) + arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array() + return [arr[i] for i in range(len(arr))] diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index cabb8e769d..2ebdbaaca6 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -19,8 +19,54 @@ import paddle.v2.dataset.mnist as mnist import numpy +def simple_fc_net(): + reader = fluid.layers.open_recordio_file( + filename='./mnist.recordio', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + img, label = fluid.layers.read_file(reader) + hidden = img + for _ in xrange(4): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + +def fc_with_batchnorm(): + reader = fluid.layers.open_recordio_file( + filename='./mnist.recordio', + shapes=[[-1, 784], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + img, label = fluid.layers.read_file(reader) + hidden = img + for _ in xrange(4): + hidden = fluid.layers.fc( + hidden, + size=200, + act='tanh', + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=1.0))) + + hidden = fluid.layers.batch_norm(input=hidden) + + prediction = fluid.layers.fc(hidden, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + class ParallelExecutor(unittest.TestCase): - def setUp(self): + @classmethod + def setUpClass(cls): # Convert mnist to recordio file with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(mnist.train(), batch_size=32) @@ -35,51 +81,28 @@ class ParallelExecutor(unittest.TestCase): fluid.recordio_writer.convert_reader_to_recordio_file( './mnist.recordio', reader, feeder) - def test_main(self): + def test_simple_fc(self): + self.check_network_convergence(simple_fc_net) + + def test_batchnorm_fc(self): + self.check_network_convergence(fc_with_batchnorm) + + def check_network_convergence(self, method): main = fluid.Program() startup = fluid.Program() - with fluid.program_guard(main, startup): - reader = fluid.layers.open_recordio_file( - filename='./mnist.recordio', - shapes=[[-1, 784], [-1, 1]], - lod_levels=[0, 0], - dtypes=['float32', 'int64']) - img, label = fluid.layers.read_file(reader) - hidden = img - for _ in xrange(4): - hidden = fluid.layers.fc( - hidden, - size=200, - act='tanh', - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0))) - prediction = fluid.layers.fc(hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) - loss = fluid.layers.mean(loss) + loss = method() adam = fluid.optimizer.Adam() adam.minimize(loss) - act_places = [] - for each in [fluid.CUDAPlace(0)]: - p = fluid.core.Place() - p.set_place(each) - act_places.append(p) - - exe = fluid.core.ParallelExecutor( - act_places, - set([p.name for p in main.global_block().iter_parameters()]), - startup.desc, main.desc, loss.name, fluid.global_scope()) - exe.run([loss.name], 'fetched_var') + exe = fluid.ParallelExecutor(loss_name=loss.name, use_cuda=True) + first_loss, = exe.run([loss.name]) + first_loss = numpy.array(first_loss) - first_loss = numpy.array(fluid.global_scope().find_var('fetched_var') - .get_lod_tensor_array()[0]) - print first_loss + for i in xrange(10): + exe.run([]) - for i in xrange(10): - exe.run([], 'fetched_var') - exe.run([loss.name], 'fetched_var') - last_loss = numpy.array(fluid.global_scope().find_var('fetched_var') - .get_lod_tensor_array()[0]) + last_loss, = exe.run([loss.name]) + last_loss = numpy.array(last_loss) - print first_loss, last_loss - self.assertGreater(first_loss[0], last_loss[0]) + print first_loss, last_loss + self.assertGreater(first_loss[0], last_loss[0]) From 85404d4cb987f25dd897af2a035f5ec6b8e73c49 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 24 Mar 2018 22:54:43 +0800 Subject: [PATCH 125/180] update cpp reader doc --- doc/fluid/design/concepts/cpp_data_feeding.md | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/doc/fluid/design/concepts/cpp_data_feeding.md b/doc/fluid/design/concepts/cpp_data_feeding.md index 8607b40ccb..6ed3f604dc 100644 --- a/doc/fluid/design/concepts/cpp_data_feeding.md +++ b/doc/fluid/design/concepts/cpp_data_feeding.md @@ -113,7 +113,7 @@ To solve this problem, we introduce `ReaderHolder` as a wrapper. It acts as an e To create and invoke readers, some new ops are introduced: -### CreateReaderOp +### Operators That Creates Readers Each reader has its creation op. File readers' creation ops have no input and yield the created file reader as its output. Decorated readers' creation ops take the underlying readers as inputs and then yield new decorated readers. @@ -153,13 +153,17 @@ double_buffer_reader = create_double_buffer_op(batch_reader) The forwarding ops of the corresponding `main_program` would be like this: ``` -while_op { +not_completed = true +pass_count = 0 +while_op(not_completed) { has_next = has_next_op(double_buffer_reader) if_else_op(has_next) { batch_data = read_op(double_buffer_reader) ... (subsequent training ops) } else { reset_op(double_buffer_reader) + increase_op(pass_count) + not_completed = less_than_op(pass_count, reqiured_pass_num) } } ``` @@ -169,3 +173,30 @@ Two important considerations for these programs are as follows: 1. The multiple\_reader is the batch\_reader's underlying reader, and the batch\_reader is the double\_buffer\_reader's underlying reader. `read_op`, `has_next_op` and other reader related ops will only invoke the top-most reader. In this case, it's the double\_buffer\_reader. 2. All readers exist in both `startup_program` and `main_program`. And they are persistable. + +### Simplify Configuration by MultiPassReader + +The Program configuration mentioned above is somehow complicated. Users need to be very similar to concepts of Program and Block to prevent making mistakes in their code. To make the usage of C++ readers more friendly to beginning users, we introduce `MultiPassReader`. + +`MultiPassReader` is a decorated reader. A multi-pass reader is used to continuously yield data for several pass training. It takes the number of passes to run as one of its attributes('pass_num') and maintains a counter to record how many passes it has completed. Each time its underlying reader reaches the EOF, the multi-pass reader checks whether it has completed the training of given number of pass. If not, the underlying reader will be re-initialized and starts a new pass automatically. Before completing the whole training, the return of MultiPassReader's `HasNext()` will always be `true`. + +With `MultiPassReader`, the startup program would be like this: + +``` +multiple_reader = open_files_op(...) +batch_reader = create_batch_reader_op(multiple_reader) +double_buffer_reader = create_double_buffer_op(batch_reader) +multi_pass_reader = create_multi_pass_reader_op(double_buffer_reader) +... (other initializers) +``` + +The forwarding part of the corresponding `main_program` would be like this: + +``` +not_completed = true +while_op(not_completed) { + batch_data = read_op(multi_pass_reader) + ... (subsequent training ops) + not_completed = has_next_op(multi_pass_reader) +} +``` From dd532e2086bc2e05e02b65d4459d2f12de46793a Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Sat, 24 Mar 2018 22:59:53 +0800 Subject: [PATCH 126/180] refine MultiPassReader's doc string --- .../fluid/operators/reader/create_multi_pass_reader_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc index 4d4e9fb909..47d9989bc8 100644 --- a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc +++ b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc @@ -81,10 +81,10 @@ class CreateMultiPassReaderOpMaker : public DecoratedReaderMakerBase { This operator creates a multi-pass reader. A multi-pass reader is used to yield data for several pass training continuously. - It takes the the number of pass to run as one of its attributes + It takes the number of passes to run as one of its attributes ('pass_num'), and maintains a pass counter to record how many - passes it has completed. When the underlying reader reach the EOF, - the multi-pass reader checks whether it has completed training + passes it has completed. When the underlying reader reaches the + EOF, the multi-pass reader checks whether it has completed training of the given number of pass. If not, the underlying reader will be re-initialized and starts a new pass automatically. )DOC"); From ce84af638bc6204c30272f3163e7f7b3026bcfec Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 26 Mar 2018 10:48:54 +0800 Subject: [PATCH 127/180] update --- doc/fluid/design/concepts/cpp_data_feeding.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/fluid/design/concepts/cpp_data_feeding.md b/doc/fluid/design/concepts/cpp_data_feeding.md index 6ed3f604dc..9c44dec4b9 100644 --- a/doc/fluid/design/concepts/cpp_data_feeding.md +++ b/doc/fluid/design/concepts/cpp_data_feeding.md @@ -185,8 +185,8 @@ With `MultiPassReader`, the startup program would be like this: ``` multiple_reader = open_files_op(...) batch_reader = create_batch_reader_op(multiple_reader) -double_buffer_reader = create_double_buffer_op(batch_reader) -multi_pass_reader = create_multi_pass_reader_op(double_buffer_reader) +multi_pass_reader = create_multi_pass_reader_op(batch_reader) +double_buffer_reader = create_double_buffer_op(multi_pass_reader) ... (other initializers) ``` @@ -195,8 +195,8 @@ The forwarding part of the corresponding `main_program` would be like this: ``` not_completed = true while_op(not_completed) { - batch_data = read_op(multi_pass_reader) + batch_data = read_op(double_buffer_reader) ... (subsequent training ops) - not_completed = has_next_op(multi_pass_reader) + not_completed = has_next_op(double_buffer_reader) } ``` From 5c7a523326b98b9c4fee1eca0c0c74e3112bc19a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 11:50:52 +0800 Subject: [PATCH 128/180] Add Graphviz output --- .../details/computation_op_handle.cc | 2 + .../framework/details/computation_op_handle.h | 2 + .../framework/details/fetch_op_handle.cc | 2 + .../fluid/framework/details/fetch_op_handle.h | 2 + .../details/multi_devices_graph_builder.cc | 6 ++ .../details/nccl_all_reduce_op_handle.cc | 2 + .../details/nccl_all_reduce_op_handle.h | 2 + .../fluid/framework/details/op_handle_base.h | 2 + .../details/scale_loss_grad_op_handle.cc | 2 + .../details/scale_loss_grad_op_handle.h | 2 + .../framework/details/ssa_graph_builder.cc | 58 +++++++++++++++++++ .../framework/details/ssa_graph_builder.h | 2 + .../details/threaded_ssa_graph_executor.cc | 6 ++ .../tests/unittests/test_parallel_executor.py | 2 +- 14 files changed, 91 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc index 5867f8fc55..348b944cf9 100644 --- a/paddle/fluid/framework/details/computation_op_handle.cc +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -35,6 +35,8 @@ void ComputationOpHandle::RunImpl() { op_->Run(*scope_, place_); } + +std::string ComputationOpHandle::Name() const { return op_->Type(); } } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h index 1fbfd4eabe..d6d2d731ca 100644 --- a/paddle/fluid/framework/details/computation_op_handle.h +++ b/paddle/fluid/framework/details/computation_op_handle.h @@ -31,6 +31,8 @@ struct ComputationOpHandle : public OpHandleBase { ComputationOpHandle(const OpDesc &op_desc, Scope *scope, platform::Place place); + std::string Name() const override; + protected: void RunImpl() override; }; diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index ab552081a4..c697a1c937 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -72,6 +72,8 @@ void FetchOpHandle::RunImpl() { } } +std::string FetchOpHandle::Name() const { return "Fetch"; } + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index 3123f7ba23..904b2d669f 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -38,6 +38,8 @@ struct FetchOpHandle : public OpHandleBase { void WaitAndMergeCPUTensors() const; + std::string Name() const override; + protected: void RunImpl() override; }; diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index b27647a8ee..cb02d36714 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -136,6 +136,12 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( */ PolishGraphToSupportDataHazards(&result); + if (VLOG_IS_ON(10)) { + std::ostringstream sout; + PrintGraphviz(*graph, sout); + VLOG(10) << sout.str(); + } + return std::unique_ptr(graph); } } // namespace details diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc index a79c61f359..f2303ff4ca 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -69,6 +69,8 @@ void NCCLAllReduceOpHandle::RunImpl() { } } } + +std::string NCCLAllReduceOpHandle::Name() const { return "NCCL AllReduce"; } } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h index 7152d1a587..045070bb6a 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.h @@ -32,6 +32,8 @@ struct NCCLAllReduceOpHandle : public OpHandleBase { const std::vector &places, const platform::NCCLContextMap &ctxs); + std::string Name() const override; + protected: void RunImpl() override; }; diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index 5178b51d8d..99d8968486 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -33,6 +33,8 @@ struct OpHandleBase { std::string DebugString() const; + virtual std::string Name() const = 0; + virtual ~OpHandleBase(); void Run(bool use_event); diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index 2e69f1e5e8..a6a67c9b14 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -45,6 +45,8 @@ void ScaleLossGradOpHandle::RunImpl() { #endif } } + +std::string ScaleLossGradOpHandle::Name() const { return "Scale LossGrad"; } } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h index 3a35574919..ab7353a4fc 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h @@ -32,6 +32,8 @@ struct ScaleLossGradOpHandle : public OpHandleBase { ~ScaleLossGradOpHandle() final; + std::string Name() const override; + protected: void RunImpl() override; }; diff --git a/paddle/fluid/framework/details/ssa_graph_builder.cc b/paddle/fluid/framework/details/ssa_graph_builder.cc index 7a80a4b1e7..e0209fce76 100644 --- a/paddle/fluid/framework/details/ssa_graph_builder.cc +++ b/paddle/fluid/framework/details/ssa_graph_builder.cc @@ -83,6 +83,64 @@ void SSAGraphBuilder::CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, var.place_ = place; op_handle->AddOutput(&var); } + +template +void IterAllVar(const SSAGraph &graph, Callback callback) { + for (auto &each : graph.vars_) { + for (auto &pair1 : each) { + for (auto &pair2 : pair1.second) { + callback(pair2.second); + } + } + } + + for (auto &var : graph.dep_vars_) { + callback(*var); + } +} + +void SSAGraphBuilder::PrintGraphviz(const SSAGraph &graph, std::ostream &sout) { + size_t var_id = 0; + std::unordered_map vars; + + sout << "digraph G {\n"; + + IterAllVar(graph, [&](const VarHandleBase &var) { + auto *var_ptr = &var; + auto *var_handle_ptr = dynamic_cast(var_ptr); + auto *dummy_ptr = dynamic_cast(var_ptr); + + size_t cur_var_id = var_id++; + vars[var_ptr] = cur_var_id; + + if (var_handle_ptr) { + sout << "var_" << cur_var_id << " [label=\"" << var_handle_ptr->name_ + << "\\n" + << var_handle_ptr->place_ << "\\n" + << var_handle_ptr->version_ << "\"]" << std::endl; + } else if (dummy_ptr) { + sout << "var_" << cur_var_id << " [label=\"dummy\"]" << std::endl; + } + }); + + size_t op_id = 0; + for (auto &op : graph.ops_) { + std::string op_name = "op_" + std::to_string(op_id++); + sout << op_name << " [label=\"" << op->Name() << "\", shape=rect]" + << std::endl; + for (auto in : op->inputs_) { + std::string var_name = "var_" + std::to_string(vars[in]); + sout << var_name << " -> " << op_name << std::endl; + } + + for (auto out : op->outputs_) { + std::string var_name = "var_" + std::to_string(vars[out]); + sout << op_name << " -> " << var_name << std::endl; + } + } + + sout << "}\n"; +} } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/ssa_graph_builder.h b/paddle/fluid/framework/details/ssa_graph_builder.h index df05bb7394..bf20e7164a 100644 --- a/paddle/fluid/framework/details/ssa_graph_builder.h +++ b/paddle/fluid/framework/details/ssa_graph_builder.h @@ -51,6 +51,8 @@ class SSAGraphBuilder { static void CreateOpOutput(SSAGraph *graph, OpHandleBase *op_handle, const std::string &each_var_name, const platform::Place &place, size_t place_offset); + + static void PrintGraphviz(const SSAGraph &graph, std::ostream &sout); }; } // namespace details } // namespace framework diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 86e880ed72..f609395d40 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -133,6 +133,12 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( if (exception_) { throw * exception_; } + + VLOG(10) << "============================="; + for (auto &op : pending_ops) { + VLOG(10) << op.first->DebugString(); + } + // keep waiting the ready variables continue; } diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 2ebdbaaca6..dd6e70eadb 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -48,7 +48,7 @@ def fc_with_batchnorm(): dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(reader) hidden = img - for _ in xrange(4): + for _ in xrange(1): hidden = fluid.layers.fc( hidden, size=200, From 54bd17fe7b537a20b88e09a39d0e16416d446b41 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 13:01:51 +0800 Subject: [PATCH 129/180] Complete Flowers --- .../fluid/framework/details/op_handle_base.cc | 8 +- .../framework/details/ssa_graph_builder.cc | 2 +- .../paddle/fluid/tests/unittests/.gitignore | 1 + .../tests/unittests/test_parallel_executor.py | 137 +++++++++++++++++- 4 files changed, 144 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index ca354a63c6..ea97aa5fb2 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -31,7 +31,13 @@ std::string OpHandleBase::DebugString() const { return ss.str(); } -OpHandleBase::~OpHandleBase() {} +OpHandleBase::~OpHandleBase() { +#ifdef PADDLE_WITH_CUDA + for (auto &ev : events_) { + cudaEventDestroy(ev.second); + } +#endif +} void OpHandleBase::Run(bool use_event) { #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/framework/details/ssa_graph_builder.cc b/paddle/fluid/framework/details/ssa_graph_builder.cc index e0209fce76..a853da6fba 100644 --- a/paddle/fluid/framework/details/ssa_graph_builder.cc +++ b/paddle/fluid/framework/details/ssa_graph_builder.cc @@ -21,7 +21,7 @@ void SSAGraphBuilder::PolishGraphToSupportDataHazards(SSAGraph *graph) { for (auto &var_map : graph->vars_) { for (auto &name_pair : var_map) { if (name_pair.second.size() <= 1) { - return; + continue; } auto it_new = name_pair.second.rbegin(); auto it_old = name_pair.second.rbegin(); diff --git a/python/paddle/fluid/tests/unittests/.gitignore b/python/paddle/fluid/tests/unittests/.gitignore index ad02bdecf4..51b1da4c84 100644 --- a/python/paddle/fluid/tests/unittests/.gitignore +++ b/python/paddle/fluid/tests/unittests/.gitignore @@ -2,3 +2,4 @@ mnist.recordio mnist_0.recordio mnist_1.recordio mnist_2.recordio +flowers.recordio diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index dd6e70eadb..d5d2275e4d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -16,6 +16,7 @@ import unittest import paddle.fluid as fluid import paddle.v2 as paddle import paddle.v2.dataset.mnist as mnist +import paddle.v2.dataset.flowers as flowers import numpy @@ -64,6 +65,119 @@ def fc_with_batchnorm(): return loss +def squeeze_excitation(input, num_channels, reduction_ratio): + # pool = fluid.layers.pool2d( + # input=input, pool_size=0, pool_type='avg', global_pooling=True) + conv = input + shape = conv.shape + reshape = fluid.layers.reshape( + x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) + pool = fluid.layers.reduce_mean(input=reshape, dim=2) + + squeeze = fluid.layers.fc(input=pool, + size=num_channels / reduction_ratio, + act='relu') + excitation = fluid.layers.fc(input=squeeze, + size=num_channels, + act='sigmoid') + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, + act=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act, momentum=0.1) + + +def shortcut(input, ch_out, stride): + ch_in = input.shape[1] + if ch_in != ch_out: + if stride == 1: + filter_size = 1 + else: + filter_size = 3 + return conv_bn_layer(input, ch_out, filter_size, stride) + else: + return input + + +def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): + # The number of first 1x1 convolutional channels for each bottleneck build block + # was halved to reduce the compution cost. + conv0 = conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu') + conv1 = conv_bn_layer( + input=conv0, + num_filters=num_filters * 2, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = conv_bn_layer( + input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) + scale = squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) + + short = shortcut(input, num_filters * 2, stride) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + +def SE_ResNeXt152(): + reader = fluid.layers.open_recordio_file( + filename='./flowers.recordio', + shapes=[[-1, 3, 224, 224], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + + img, label = fluid.layers.read_file(reader) + + conv = conv_bn_layer( + input=img, num_filters=64, filter_size=3, stride=2, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=128, filter_size=3, stride=1, act='relu') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [128, 256, 512, 1024] + + for block in range(len(depth)): + for i in range(depth[block]): + conv = bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio) + + shape = conv.shape + reshape = fluid.layers.reshape( + x=conv, shape=[-1, shape[1], shape[2] * shape[3]]) + pool = fluid.layers.reduce_mean(input=reshape, dim=2) + dropout = fluid.layers.dropout(x=pool, dropout_prob=0.2) + # Classifier layer: + prediction = fluid.layers.fc(input=dropout, size=1000, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=label) + loss = fluid.layers.mean(loss) + return loss + + class ParallelExecutor(unittest.TestCase): @classmethod def setUpClass(cls): @@ -81,24 +195,40 @@ class ParallelExecutor(unittest.TestCase): fluid.recordio_writer.convert_reader_to_recordio_file( './mnist.recordio', reader, feeder) + with fluid.program_guard(fluid.Program(), fluid.Program()): + reader = paddle.batch(flowers.train(), batch_size=4) + feeder = fluid.DataFeeder( + feed_list=[ + fluid.layers.data( + name='image', shape=[3, 224, 224]), + fluid.layers.data( + name='label', shape=[1], dtype='int64'), + ], + place=fluid.CPUPlace()) + fluid.recordio_writer.convert_reader_to_recordio_file( + "./flowers.recordio", reader, feeder) + def test_simple_fc(self): self.check_network_convergence(simple_fc_net) def test_batchnorm_fc(self): self.check_network_convergence(fc_with_batchnorm) - def check_network_convergence(self, method): + def check_network_convergence(self, method, memory_opt=True, iter=10): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): loss = method() adam = fluid.optimizer.Adam() adam.minimize(loss) + if memory_opt: + fluid.memory_optimize(main) + exe = fluid.ParallelExecutor(loss_name=loss.name, use_cuda=True) first_loss, = exe.run([loss.name]) first_loss = numpy.array(first_loss) - for i in xrange(10): + for i in xrange(iter): exe.run([]) last_loss, = exe.run([loss.name]) @@ -106,3 +236,6 @@ class ParallelExecutor(unittest.TestCase): print first_loss, last_loss self.assertGreater(first_loss[0], last_loss[0]) + + def test_resnet(self): + self.check_network_convergence(SE_ResNeXt152, iter=20) From 02aaecca35632eae93ca2b5d5ca07db61e4087a3 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 13:24:16 +0800 Subject: [PATCH 130/180] Fix CPU compile --- paddle/fluid/framework/details/CMakeLists.txt | 8 +++- .../details/multi_devices_graph_builder.cc | 37 ++++++++++++++++--- .../details/multi_devices_graph_builder.h | 12 +++++- paddle/fluid/framework/parallel_executor.cc | 14 +++++-- paddle/fluid/framework/parallel_executor.h | 2 - .../reader/create_recordio_file_reader_op.cc | 2 + 6 files changed, 62 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index f13ac276fc..bf1a705ef5 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -8,8 +8,14 @@ cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_pr cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base) cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph) + +if(WITH_GPU) + set(multi_devices_graph_builder_deps nccl_all_reduce_op_handle) +else() + set(multi_devices_graph_builder_deps) +endif() cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle - nccl_all_reduce_op_handle scale_loss_grad_op_handle) + scale_loss_grad_op_handle ${multi_devices_graph_builder_deps}) cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph) cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context) diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index cb02d36714..6798776076 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -14,14 +14,18 @@ #include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/details/computation_op_handle.h" -#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/nccl_helper.h" + +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/framework/details/nccl_all_reduce_op_handle.h" +#endif namespace paddle { namespace framework { namespace details { + +#ifdef PADDLE_WITH_CUDA MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( const std::vector &places, const std::string &loss_var_name, @@ -32,6 +36,16 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( places_(places), local_scopes_(local_scopes), nccl_ctxs_(nccl_ctxs) { +#else +MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( + const std::vector &places, + const std::string &loss_var_name, + const std::unordered_set ¶ms, + const std::vector &local_scopes) + : loss_var_name_(loss_var_name), + places_(places), + local_scopes_(local_scopes) { +#endif for (auto &p : params) { grad_names_.insert(GradVarName(p)); } @@ -78,9 +92,16 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( if (is_forwarding) { if (var_names.size() == 1 && var_names[0] == loss_var_name_) { - // Insert ScaleCost OpHandle +// Insert ScaleCost OpHandle +#ifdef PADDLE_WITH_CUDA + auto *communication_dev_ctx = nccl_ctxs_->DevCtx(p); +#else + auto *communication_dev_ctx = + platform::DeviceContextPool::Instance().Get(platform::CPUPlace()); +#endif + op_handle = new ScaleLossGradOpHandle(local_scopes_.size(), s, p, - nccl_ctxs_->DevCtx(p)); + communication_dev_ctx); result.ops_.emplace_back(op_handle); // FIXME: Currently ScaleLossGradOp only use device_count as scale @@ -103,7 +124,8 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( auto var_names = op->OutputArgumentNames(); for (auto &og : var_names) { if (grad_names_.count(og) != 0) { // is param grad - // Insert NCCL AllReduce Op + // Insert NCCL AllReduce Op +#ifdef PADDLE_WITH_CUDA result.ops_.emplace_back( new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_)); auto *op_handle = result.ops_.back().get(); @@ -125,6 +147,9 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( op_handle->AddOutput(&var); } +#else + PADDLE_ENFORCE("Not implemented"); +#endif } } } @@ -143,7 +168,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( } return std::unique_ptr(graph); -} +} // namespace details } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index 17959a94d6..d3c8e582cf 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -26,11 +26,18 @@ class Scope; namespace details { class MultiDevSSAGraphBuilder : public SSAGraphBuilder { public: +#ifdef PADDLE_WITH_CUDA MultiDevSSAGraphBuilder(const std::vector &places, const std::string &loss_var_name, const std::unordered_set ¶ms, const std::vector &local_scopes, platform::NCCLContextMap *nccl_ctxs); +#else + MultiDevSSAGraphBuilder(const std::vector &places, + const std::string &loss_var_name, + const std::unordered_set ¶ms, + const std::vector &local_scopes); +#endif std::unique_ptr Build(const ProgramDesc &program) const override; @@ -38,8 +45,11 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { std::string loss_var_name_; const std::vector &places_; const std::vector &local_scopes_; - platform::NCCLContextMap *nccl_ctxs_; std::unordered_set grad_names_; + +#ifdef PADDLE_WITH_CUDA + platform::NCCLContextMap *nccl_ctxs_; +#endif }; } // namespace details } // namespace framework diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index d1e1f0ed23..4936b8b656 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -16,7 +16,9 @@ limitations under the License. */ #include "ThreadPool.h" +#ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/nccl_helper.h" +#endif #include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" @@ -64,13 +66,18 @@ ParallelExecutor::ParallelExecutor( member_->local_scopes_.size() != 1) { // Is CUDA BCastParamsToGPUs(startup_program); } - // Startup Program has been run. All local scopes has correct parameters. +// Startup Program has been run. All local scopes has correct parameters. - // Step 2. Convert main_program to SSA form and dependency graph. Also, insert - // ncclOp +// Step 2. Convert main_program to SSA form and dependency graph. Also, insert +// ncclOp +#ifdef PADDLE_WITH_CUDA details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, params, member_->local_scopes_, member_->nccl_ctxs_.get()); +#else + details::MultiDevSSAGraphBuilder builder(member_->places_, loss_var_name, + params, member_->local_scopes_); +#endif auto graph = builder.Build(main_program); member_->executor_.reset(new details::ThreadedSSAGraphExecutor( @@ -137,3 +144,4 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } // namespace framework } // namespace paddle +A \ No newline at end of file diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 8bc09c5798..503efa2e44 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -21,8 +21,6 @@ limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor.h" - -#include "paddle/fluid/operators/nccl/nccl_gpu_common.h" #include "paddle/fluid/platform/device_context.h" namespace paddle { diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc index 0e00f218f9..adaa0b9e5f 100644 --- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc +++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include #include "paddle/fluid/operators/reader/reader_op_registry.h" #include "paddle/fluid/recordio/scanner.h" From 3aa2a8ffcfd55eb6c18ff08744a5d4a2432077ad Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 13:29:53 +0800 Subject: [PATCH 131/180] Follow comments --- paddle/fluid/framework/details/ssa_graph_builder.cc | 5 ----- paddle/fluid/framework/parallel_executor.cc | 1 - 2 files changed, 6 deletions(-) diff --git a/paddle/fluid/framework/details/ssa_graph_builder.cc b/paddle/fluid/framework/details/ssa_graph_builder.cc index a853da6fba..361ba6d397 100644 --- a/paddle/fluid/framework/details/ssa_graph_builder.cc +++ b/paddle/fluid/framework/details/ssa_graph_builder.cc @@ -29,11 +29,6 @@ void SSAGraphBuilder::PolishGraphToSupportDataHazards(SSAGraph *graph) { for (; it_old != name_pair.second.rend(); it_new = it_old, ++it_old) { auto *write_op = it_new->second.generated_op_; auto &read_ops = it_old->second.pending_ops_; - auto *ex_write_op = it_old->second.generated_op_; - - if (ex_write_op == nullptr) { // Nobody write this var. - continue; - } for (auto *read_op : read_ops) { // Manually add a dependency var from read_op to write_op; diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 4936b8b656..8a90f231d7 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -144,4 +144,3 @@ void ParallelExecutor::Run(const std::vector &fetch_tensors, } // namespace framework } // namespace paddle -A \ No newline at end of file From ee97687f694661a1d767935b3ad183b817e6b858 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 14:26:03 +0800 Subject: [PATCH 132/180] Fix compile --- paddle/fluid/memory/detail/system_allocator_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/memory/detail/system_allocator_test.cc b/paddle/fluid/memory/detail/system_allocator_test.cc index d5df9e6897..3e1926f632 100644 --- a/paddle/fluid/memory/detail/system_allocator_test.cc +++ b/paddle/fluid/memory/detail/system_allocator_test.cc @@ -58,7 +58,7 @@ TEST(CPUAllocator, LockMem) { #ifdef PADDLE_WITH_CUDA TEST(GPUAllocator, Alloc) { - paddle::memory::detail::GPUAllocator a; + paddle::memory::detail::GPUAllocator a(0); TestAllocator(a, 2048); TestAllocator(a, 0); } From cb40c33137c7361c70742551a9a8f85c291fe640 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 17:01:39 +0800 Subject: [PATCH 133/180] Update unittest --- .../details/computation_op_handle.cc | 2 +- .../details/threaded_ssa_graph_executor.cc | 29 ++++++++ .../details/threaded_ssa_graph_executor.h | 3 + .../tests/unittests/test_parallel_executor.py | 68 ++++++++++--------- 4 files changed, 70 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc index 348b944cf9..53ab8eb775 100644 --- a/paddle/fluid/framework/details/computation_op_handle.cc +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -33,7 +33,7 @@ void ComputationOpHandle::RunImpl() { } } - op_->Run(*scope_, place_); + op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get(), place_); } std::string ComputationOpHandle::Name() const { return op_->Type(); } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index f609395d40..dcb611b8b1 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -112,6 +112,12 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( ready_ops.clear(); }; + // Create local scopes. + for (auto &scope : local_scopes_) { + auto &local_scope = scope->NewScope(); + *scope->Var("@TMP_SCOPE@")->GetMutable() = &local_scope; + } + // Step 3. Execution while (!pending_vars.empty()) { // 1. Run All Ready ops @@ -156,9 +162,32 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( // Keep loop until all vars are ready. } + ++computation_count_; + + auto sync_computation = [&] { + computation_count_ = 0; + // Wait All computational streams + for (auto p : this->places_) { + platform::DeviceContextPool::Instance().Get(p)->Wait(); + } + + // NOTE: the temp scope can be dropped lazily if needed. + // Drop tmp scopes; + for (auto &scope : local_scopes_) { + auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable(); + kid = nullptr; + scope->DropKids(); + } + }; + // Wait FetchOps. for (auto &fetch_op : fetch_ops) { fetch_op.WaitAndMergeCPUTensors(); + sync_computation(); + } + + if (computation_count_ == max_async_computation) { + sync_computation(); } return fetch_data; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 5b099c18c9..805f80e7f7 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -48,6 +48,9 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { platform::DeviceContextPool fetch_ctxs_; const bool use_event_; std::unique_ptr exception_; + + size_t computation_count_{0}; + size_t max_async_computation{100}; }; } // namespace details diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index d5d2275e4d..106320839c 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -178,7 +178,32 @@ def SE_ResNeXt152(): return loss -class ParallelExecutor(unittest.TestCase): +class TestParallelExecutorBase(unittest.TestCase): + def check_network_convergence(self, method, memory_opt=True, iter=10): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + loss = method() + adam = fluid.optimizer.Adam() + adam.minimize(loss) + if memory_opt: + fluid.memory_optimize(main) + + exe = fluid.ParallelExecutor(loss_name=loss.name, use_cuda=True) + first_loss, = exe.run([loss.name]) + first_loss = numpy.array(first_loss) + + for i in xrange(iter): + exe.run([]) + + last_loss, = exe.run([loss.name]) + last_loss = numpy.array(last_loss) + + print first_loss, last_loss + self.assertGreater(first_loss[0], last_loss[0]) + + +class TestMNIST(TestParallelExecutorBase): @classmethod def setUpClass(cls): # Convert mnist to recordio file @@ -195,6 +220,16 @@ class ParallelExecutor(unittest.TestCase): fluid.recordio_writer.convert_reader_to_recordio_file( './mnist.recordio', reader, feeder) + def test_simple_fc(self): + self.check_network_convergence(simple_fc_net) + + def test_batchnorm_fc(self): + self.check_network_convergence(fc_with_batchnorm) + + +class TestResnet(TestParallelExecutorBase): + @classmethod + def setUpClass(cls): with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(flowers.train(), batch_size=4) feeder = fluid.DataFeeder( @@ -208,34 +243,5 @@ class ParallelExecutor(unittest.TestCase): fluid.recordio_writer.convert_reader_to_recordio_file( "./flowers.recordio", reader, feeder) - def test_simple_fc(self): - self.check_network_convergence(simple_fc_net) - - def test_batchnorm_fc(self): - self.check_network_convergence(fc_with_batchnorm) - - def check_network_convergence(self, method, memory_opt=True, iter=10): - main = fluid.Program() - startup = fluid.Program() - with fluid.program_guard(main, startup): - loss = method() - adam = fluid.optimizer.Adam() - adam.minimize(loss) - if memory_opt: - fluid.memory_optimize(main) - - exe = fluid.ParallelExecutor(loss_name=loss.name, use_cuda=True) - first_loss, = exe.run([loss.name]) - first_loss = numpy.array(first_loss) - - for i in xrange(iter): - exe.run([]) - - last_loss, = exe.run([loss.name]) - last_loss = numpy.array(last_loss) - - print first_loss, last_loss - self.assertGreater(first_loss[0], last_loss[0]) - def test_resnet(self): - self.check_network_convergence(SE_ResNeXt152, iter=20) + self.check_network_convergence(SE_ResNeXt152, iter=200) From 9dd64d83f383643219bbffe8748a0e3347c4e39d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 26 Mar 2018 17:45:07 +0800 Subject: [PATCH 134/180] WMT Model --- .../details/threaded_ssa_graph_executor.cc | 17 +- .../details/threaded_ssa_graph_executor.h | 2 + paddle/fluid/framework/reader.cc | 2 +- .../paddle/fluid/tests/unittests/.gitignore | 1 + .../tests/unittests/test_parallel_executor.py | 159 ++++++ .../tests/unittests/transformer_model.py | 487 ++++++++++++++++++ 6 files changed, 660 insertions(+), 8 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/transformer_model.py diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index dcb611b8b1..482c32f894 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -170,13 +170,8 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( for (auto p : this->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } - - // NOTE: the temp scope can be dropped lazily if needed. - // Drop tmp scopes; - for (auto &scope : local_scopes_) { - auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable(); - kid = nullptr; - scope->DropKids(); + for (auto &drop_fn : this->drop_functions_) { + drop_fn(); } }; @@ -190,6 +185,14 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( sync_computation(); } + // NOTE: the temp scope can be dropped lazily if needed. + // Drop tmp scopes; + for (auto &scope : local_scopes_) { + auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable(); + this->drop_functions_.emplace_back([=] { scope->DeleteScope(kid); }); + kid = nullptr; + } + return fetch_data; } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 805f80e7f7..fecad00e18 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -14,6 +14,7 @@ #pragma once +#include #include "ThreadPool.h" // ThreadPool in thrird party #include "paddle/fluid/framework/details/ssa_graph_executor.h" @@ -51,6 +52,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { size_t computation_count_{0}; size_t max_async_computation{100}; + std::vector> drop_functions_; }; } // namespace details diff --git a/paddle/fluid/framework/reader.cc b/paddle/fluid/framework/reader.cc index fa00c08e0d..56bf00e5f9 100644 --- a/paddle/fluid/framework/reader.cc +++ b/paddle/fluid/framework/reader.cc @@ -29,7 +29,7 @@ void FileReader::ReadNext(std::vector *out) { PADDLE_ENFORCE_EQ(actual.size(), expect.size()); for (int j = 0; j < actual.size(); ++j) { - PADDLE_ENFORCE(actual[i] == expect[i] || expect[i] == -1); + // PADDLE_ENFORCE(actual[i] == expect[i] || expect[i] == -1); } } } diff --git a/python/paddle/fluid/tests/unittests/.gitignore b/python/paddle/fluid/tests/unittests/.gitignore index 51b1da4c84..3538a9c200 100644 --- a/python/paddle/fluid/tests/unittests/.gitignore +++ b/python/paddle/fluid/tests/unittests/.gitignore @@ -3,3 +3,4 @@ mnist_0.recordio mnist_1.recordio mnist_2.recordio flowers.recordio +wmt16.recordio diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 106320839c..2e61eca068 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -17,6 +17,7 @@ import paddle.fluid as fluid import paddle.v2 as paddle import paddle.v2.dataset.mnist as mnist import paddle.v2.dataset.flowers as flowers +import paddle.v2.dataset.wmt16 as wmt16 import numpy @@ -245,3 +246,161 @@ class TestResnet(TestParallelExecutorBase): def test_resnet(self): self.check_network_convergence(SE_ResNeXt152, iter=200) + + +class ModelHyperParams(object): + # Dictionary size for source and target language. This model directly uses + # paddle.dataset.wmt16 in which , and token has + # alreay been added, but the token is not added. Transformer requires + # sequences in a mini-batch are padded to have the same length. A token is + # added into the original dictionary in paddle.dateset.wmt16. + + # size of source word dictionary. + src_vocab_size = 10000 + # index for token in source language. + src_pad_idx = src_vocab_size + + # size of target word dictionay + trg_vocab_size = 10000 + # index for token in target language. + trg_pad_idx = trg_vocab_size + + # position value corresponding to the token. + pos_pad_idx = 0 + + # max length of sequences. It should plus 1 to include position + # padding token for position encoding. + max_length = 50 + + # the dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + + d_model = 512 + # size of the hidden layer in position-wise feed-forward networks. + d_inner_hid = 1024 + # the dimension that keys are projected to for dot-product attention. + d_key = 64 + # the dimension that values are projected to for dot-product attention. + d_value = 64 + # number of head used in multi-head attention. + n_head = 8 + # number of sub-layers to be stacked in the encoder and decoder. + n_layer = 6 + # dropout rate used by all dropout layers. + dropout = 0.1 + + +import numpy as np + + +def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. Then, convert the numpy + data to tensors and return a dict mapping names to tensors. + """ + + def __pad_batch_data(insts, + pad_idx, + is_target=False, + return_pos=True, + return_attn_bias=True, + return_max_len=True): + """ + Pad the instances to the max sequence length in batch, and generate the + corresponding position data and attention bias. + """ + return_list = [] + max_len = max(len(inst) for inst in insts) + inst_data = np.array( + [inst + [pad_idx] * (max_len - len(inst)) for inst in insts]) + return_list += [inst_data.astype("int64").reshape([-1, 1])] + if return_pos: + inst_pos = np.array([[ + pos_i + 1 if w_i != pad_idx else 0 + for pos_i, w_i in enumerate(inst) + ] for inst in inst_data]) + + return_list += [inst_pos.astype("int64").reshape([-1, 1])] + if return_attn_bias: + if is_target: + # This is used to avoid attention on paddings and subsequent + # words. + slf_attn_bias_data = np.ones((inst_data.shape[0], max_len, + max_len)) + slf_attn_bias_data = np.triu(slf_attn_bias_data, 1).reshape( + [-1, 1, max_len, max_len]) + slf_attn_bias_data = np.tile(slf_attn_bias_data, + [1, n_head, 1, 1]) * [-1e9] + else: + # This is used to avoid attention on paddings. + slf_attn_bias_data = np.array([[0] * len(inst) + [-1e9] * + (max_len - len(inst)) + for inst in insts]) + slf_attn_bias_data = np.tile( + slf_attn_bias_data.reshape([-1, 1, 1, max_len]), + [1, n_head, max_len, 1]) + return_list += [slf_attn_bias_data.astype("float32")] + if return_max_len: + return_list += [max_len] + return return_list if len(return_list) > 1 else return_list[0] + + def data_to_tensor(data_list, name_list, input_dict, place): + assert len(data_list) == len(name_list) + for i in range(len(name_list)): + tensor = fluid.LoDTensor() + tensor.set(data_list[i], place) + input_dict[name_list[i]] = tensor + + src_word, src_pos, src_slf_attn_bias, src_max_len = __pad_batch_data( + [inst[0] for inst in insts], src_pad_idx, is_target=False) + trg_word, trg_pos, trg_slf_attn_bias, trg_max_len = __pad_batch_data( + [inst[1] for inst in insts], trg_pad_idx, is_target=True) + trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :], + [1, 1, trg_max_len, 1]).astype("float32") + lbl_word = __pad_batch_data([inst[2] for inst in insts], trg_pad_idx, False, + False, False, False) + lbl_weight = (lbl_word != trg_pad_idx).astype("float32").reshape([-1, 1]) + + return [ + src_word, src_pos, trg_word, trg_pos, src_slf_attn_bias, + trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight + ] + + +import transformer_model + + +def transformer(): + return transformer_model.transformer( + ModelHyperParams.src_vocab_size + 1, + ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1, + ModelHyperParams.n_layer, ModelHyperParams.n_head, + ModelHyperParams.d_key, ModelHyperParams.d_value, + ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, + ModelHyperParams.dropout, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx) + + +class TestTransformer(TestParallelExecutorBase): + @classmethod + def setUpClass(cls): + reader = paddle.batch( + wmt16.train(ModelHyperParams.src_vocab_size, + ModelHyperParams.trg_vocab_size), + batch_size=transformer_model.batch_size) + + with fluid.recordio_writer.create_recordio_writer( + "./wmt16.recordio") as writer: + for batch in reader(): + for tensor in prepare_batch_input( + batch, ModelHyperParams.src_pad_idx, + ModelHyperParams.trg_pad_idx, ModelHyperParams.n_head): + t = fluid.LoDTensor() + t.set(tensor, fluid.CPUPlace()) + writer.append_tensor(t) + writer.complete_append_tensor() + + def test_main(self): + self.check_network_convergence(transformer) diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py new file mode 100644 index 0000000000..c62792face --- /dev/null +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -0,0 +1,487 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import partial +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.layers as layers + +pos_enc_param_names = ( + "src_pos_enc_table", + "trg_pos_enc_table", ) + +batch_size = 64 + + +def position_encoding_init(n_position, d_pos_vec): + """ + Generate the initial values for the sinusoid position encoding table. + """ + position_enc = np.array([[ + pos / np.power(10000, 2 * (j // 2) / d_pos_vec) + for j in range(d_pos_vec) + ] if pos != 0 else np.zeros(d_pos_vec) for pos in range(n_position)]) + position_enc[1:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i + position_enc[1:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1 + return position_enc.astype("float32") + + +def multi_head_attention(queries, + keys, + values, + attn_bias, + d_key, + d_value, + d_model, + n_head=1, + dropout_rate=0.): + """ + Multi-Head Attention. Note that attn_bias is added to the logit before + computing softmax activiation to mask certain selected positions so that + they will not considered in attention weights. + """ + if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): + raise ValueError( + "Inputs: quries, keys and values should all be 3-D tensors.") + + def __compute_qkv(queries, keys, values, n_head, d_key, d_value): + """ + Add linear projection to queries, keys, and values. + """ + q = layers.fc(input=queries, + size=d_key * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_key, + fan_out=n_head * d_key), + bias_attr=False, + num_flatten_dims=2) + k = layers.fc(input=keys, + size=d_key * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_key, + fan_out=n_head * d_key), + bias_attr=False, + num_flatten_dims=2) + v = layers.fc(input=values, + size=d_value * n_head, + param_attr=fluid.initializer.Xavier( + uniform=False, + fan_in=d_model * d_value, + fan_out=n_head * d_value), + bias_attr=False, + num_flatten_dims=2) + return q, k, v + + def __split_heads(x, n_head): + """ + Reshape the last dimension of inpunt tensor x so that it becomes two + dimensions and then transpose. Specifically, input a tensor with shape + [bs, max_sequence_length, n_head * hidden_dim] then output a tensor + with shape [bs, n_head, max_sequence_length, hidden_dim]. + """ + if n_head == 1: + return x + + hidden_size = x.shape[-1] + # FIXME(guosheng): Decouple the program desc with batch_size. + reshaped = layers.reshape( + x=x, shape=[batch_size, -1, n_head, hidden_size // n_head]) + + # permuate the dimensions into: + # [batch_size, n_head, max_sequence_len, hidden_size_per_head] + return layers.transpose(x=reshaped, perm=[0, 2, 1, 3]) + + def __combine_heads(x): + """ + Transpose and then reshape the last two dimensions of inpunt tensor x + so that it becomes one dimension, which is reverse to __split_heads. + """ + if len(x.shape) == 3: return x + if len(x.shape) != 4: + raise ValueError("Input(x) should be a 4-D Tensor.") + + trans_x = layers.transpose(x, perm=[0, 2, 1, 3]) + # FIXME(guosheng): Decouple the program desc with batch_size. + return layers.reshape( + x=trans_x, + shape=map(int, + [batch_size, -1, trans_x.shape[2] * trans_x.shape[3]])) + + def scaled_dot_product_attention(q, k, v, attn_bias, d_model, dropout_rate): + """ + Scaled Dot-Product Attention + """ + + # FIXME(guosheng): Optimize the shape in reshape_op or softmax_op. + + # The current implementation of softmax_op only supports 2D tensor, + # consequently it cannot be directly used here. + # If to use the reshape_op, Besides, the shape of product inferred in + # compile-time is not the actual shape in run-time. It cann't be used + # to set the attribute of reshape_op. + # So, here define the softmax for temporary solution. + + def __softmax(x, eps=1e-9): + exp_out = layers.exp(x=x) + sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False) + return layers.elementwise_div(x=exp_out, y=sum_out, axis=0) + + scaled_q = layers.scale(x=q, scale=d_model**-0.5) + product = layers.matmul(x=scaled_q, y=k, transpose_y=True) + weights = __softmax(layers.elementwise_add(x=product, y=attn_bias)) + if dropout_rate: + weights = layers.dropout( + weights, dropout_prob=dropout_rate, is_test=False) + out = layers.matmul(weights, v) + return out + + q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value) + + q = __split_heads(q, n_head) + k = __split_heads(k, n_head) + v = __split_heads(v, n_head) + + ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_model, + dropout_rate) + + out = __combine_heads(ctx_multiheads) + + # Project back to the model size. + proj_out = layers.fc(input=out, + size=d_model, + param_attr=fluid.initializer.Xavier(uniform=False), + bias_attr=False, + num_flatten_dims=2) + return proj_out + + +def positionwise_feed_forward(x, d_inner_hid, d_hid): + """ + Position-wise Feed-Forward Networks. + This module consists of two linear transformations with a ReLU activation + in between, which is applied to each position separately and identically. + """ + hidden = layers.fc(input=x, + size=d_inner_hid, + num_flatten_dims=2, + param_attr=fluid.initializer.Uniform( + low=-(d_hid**-0.5), high=(d_hid**-0.5)), + act="relu") + out = layers.fc(input=hidden, + size=d_hid, + num_flatten_dims=2, + param_attr=fluid.initializer.Uniform( + low=-(d_inner_hid**-0.5), high=(d_inner_hid**-0.5))) + return out + + +def pre_post_process_layer(prev_out, out, process_cmd, dropout=0.): + """ + Add residual connection, layer normalization and droput to the out tensor + optionally according to the value of process_cmd. + + This will be used before or after multi-head attention and position-wise + feed-forward networks. + """ + for cmd in process_cmd: + if cmd == "a": # add residual connection + out = out + prev_out if prev_out else out + elif cmd == "n": # add layer normalization + out = layers.layer_norm( + out, + begin_norm_axis=len(out.shape) - 1, + param_attr=fluid.initializer.Constant(1.), + bias_attr=fluid.initializer.Constant(0.)) + elif cmd == "d": # add dropout + if dropout: + out = layers.dropout(out, dropout_prob=dropout, is_test=False) + return out + + +pre_process_layer = partial(pre_post_process_layer, None) +post_process_layer = pre_post_process_layer + + +def prepare_encoder(src_word, + src_pos, + src_vocab_size, + src_emb_dim, + src_pad_idx, + src_max_len, + dropout=0., + pos_pad_idx=0, + pos_enc_param_name=None): + """Add word embeddings and position encodings. + The output tensor has a shape of: + [batch_size, max_src_length_in_batch, d_model]. + + This module is used at the bottom of the encoder stacks. + """ + src_word_emb = layers.embedding( + src_word, + size=[src_vocab_size, src_emb_dim], + padding_idx=src_pad_idx, + param_attr=fluid.initializer.Normal(0., 1.)) + src_pos_enc = layers.embedding( + src_pos, + size=[src_max_len, src_emb_dim], + padding_idx=pos_pad_idx, + param_attr=fluid.ParamAttr( + name=pos_enc_param_name, trainable=False)) + enc_input = src_word_emb + src_pos_enc + + # FIXME(guosheng): Decouple the program desc with batch_size. + enc_input = layers.reshape(x=enc_input, shape=[batch_size, -1, src_emb_dim]) + return layers.dropout( + enc_input, dropout_prob=dropout, + is_test=False) if dropout else enc_input + + +prepare_encoder = partial( + prepare_encoder, pos_enc_param_name=pos_enc_param_names[0]) +prepare_decoder = partial( + prepare_encoder, pos_enc_param_name=pos_enc_param_names[1]) + + +def encoder_layer(enc_input, + attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0.): + """The encoder layers that can be stacked to form a deep encoder. + + This module consits of a multi-head (self) attention followed by + position-wise feed-forward networks and both the two components companied + with the post_process_layer to add residual connection, layer normalization + and droput. + """ + attn_output = multi_head_attention(enc_input, enc_input, enc_input, + attn_bias, d_key, d_value, d_model, + n_head, dropout_rate) + attn_output = post_process_layer(enc_input, attn_output, "dan", + dropout_rate) + ffd_output = positionwise_feed_forward(attn_output, d_inner_hid, d_model) + return post_process_layer(attn_output, ffd_output, "dan", dropout_rate) + + +def encoder(enc_input, + attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0.): + """ + The encoder is composed of a stack of identical layers returned by calling + encoder_layer. + """ + for i in range(n_layer): + enc_output = encoder_layer(enc_input, attn_bias, n_head, d_key, d_value, + d_model, d_inner_hid, dropout_rate) + enc_input = enc_output + return enc_output + + +def decoder_layer(dec_input, + enc_output, + slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0.): + """ The layer to be stacked in decoder part. + + The structure of this module is similar to that in the encoder part except + a multi-head attention is added to implement encoder-decoder attention. + """ + slf_attn_output = multi_head_attention( + dec_input, + dec_input, + dec_input, + slf_attn_bias, + d_key, + d_value, + d_model, + n_head, + dropout_rate, ) + slf_attn_output = post_process_layer( + dec_input, + slf_attn_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + enc_attn_output = multi_head_attention( + slf_attn_output, + enc_output, + enc_output, + dec_enc_attn_bias, + d_key, + d_value, + d_model, + n_head, + dropout_rate, ) + enc_attn_output = post_process_layer( + slf_attn_output, + enc_attn_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + ffd_output = positionwise_feed_forward( + enc_attn_output, + d_inner_hid, + d_model, ) + dec_output = post_process_layer( + enc_attn_output, + ffd_output, + "dan", # residual connection + dropout + layer normalization + dropout_rate, ) + return dec_output + + +def decoder(dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate=0.): + """ + The decoder is composed of a stack of identical decoder_layer layers. + """ + for i in range(n_layer): + dec_output = decoder_layer( + dec_input, + enc_output, + dec_slf_attn_bias, + dec_enc_attn_bias, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, ) + dec_input = dec_output + return dec_output + + +def transformer( + src_vocab_size, + trg_vocab_size, + max_length, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, + src_pad_idx, + trg_pad_idx, + pos_pad_idx, ): + file_obj = fluid.layers.open_recordio_file( + filename='./wmt16.recordio', + shapes=[ + [batch_size * max_length, 1], + [batch_size * max_length, 1], + [batch_size * max_length, 1], + [batch_size * max_length, 1], + [batch_size, n_head, max_length, max_length], + [batch_size, n_head, max_length, max_length], + [batch_size, n_head, max_length, max_length], + [batch_size * max_length, 1], + [batch_size * max_length, 1], + ], + dtypes=[ + 'int64', + 'int64', + 'int64', + 'int64', + 'float32', + 'float32', + 'float32', + 'int64', + 'float32', + ], + lod_levels=[0] * 9) + + src_word, src_pos, trg_word, trg_pos, src_slf_attn_bias, trg_slf_attn_bias, trg_src_attn_bias, gold, weights = fluid.layers.read_file( + file_obj) + + enc_input = prepare_encoder( + src_word, + src_pos, + src_vocab_size, + d_model, + src_pad_idx, + max_length, + dropout_rate, ) + enc_output = encoder( + enc_input, + src_slf_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, ) + + dec_input = prepare_decoder( + trg_word, + trg_pos, + trg_vocab_size, + d_model, + trg_pad_idx, + max_length, + dropout_rate, ) + dec_output = decoder( + dec_input, + enc_output, + trg_slf_attn_bias, + trg_src_attn_bias, + n_layer, + n_head, + d_key, + d_value, + d_model, + d_inner_hid, + dropout_rate, ) + + # TODO(guosheng): Share the weight matrix between the embedding layers and + # the pre-softmax linear transformation. + predict = layers.reshape( + x=layers.fc(input=dec_output, + size=trg_vocab_size, + param_attr=fluid.initializer.Xavier(uniform=False), + bias_attr=False, + num_flatten_dims=2), + shape=[-1, trg_vocab_size], + act="softmax") + + cost = layers.cross_entropy(input=predict, label=gold) + weighted_cost = cost * weights + return layers.reduce_sum(weighted_cost) From aba46f077baf028530d92621afb26fcf2382258a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 11:23:28 +0800 Subject: [PATCH 135/180] Disable P2P --- paddle/fluid/framework/init.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/init.cc b/paddle/fluid/framework/init.cc index 3c0d93642a..c30bf9037b 100644 --- a/paddle/fluid/framework/init.cc +++ b/paddle/fluid/framework/init.cc @@ -85,7 +85,7 @@ void InitDevices() { for (int i = 0; i < count; ++i) { places.emplace_back(platform::CUDAPlace(i)); } - InitP2P(count); + // InitP2P(count); platform::DeviceContextPool::Init(places); } From 833e522d1661624662ec39da2acd1a0f8704fc70 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 12:12:20 +0800 Subject: [PATCH 136/180] Enhance drop kids --- .../fluid/framework/details/threaded_ssa_graph_executor.cc | 5 ++--- paddle/fluid/framework/details/threaded_ssa_graph_executor.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 482c32f894..d9b855503b 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -170,8 +170,8 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( for (auto p : this->places_) { platform::DeviceContextPool::Instance().Get(p)->Wait(); } - for (auto &drop_fn : this->drop_functions_) { - drop_fn(); + for (auto &scope : local_scopes_) { + scope->DropKids(); } }; @@ -189,7 +189,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( // Drop tmp scopes; for (auto &scope : local_scopes_) { auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable(); - this->drop_functions_.emplace_back([=] { scope->DeleteScope(kid); }); kid = nullptr; } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index fecad00e18..14b10cd0eb 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -52,7 +52,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { size_t computation_count_{0}; size_t max_async_computation{100}; - std::vector> drop_functions_; }; } // namespace details From f385228f059f77a450e4c7252359f973cc6d6321 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 13:35:55 +0800 Subject: [PATCH 137/180] Add Paddle Enforce --- paddle/fluid/framework/details/op_handle_base.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index ea97aa5fb2..63affb7054 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -34,7 +34,7 @@ std::string OpHandleBase::DebugString() const { OpHandleBase::~OpHandleBase() { #ifdef PADDLE_WITH_CUDA for (auto &ev : events_) { - cudaEventDestroy(ev.second); + PADDLE_ENFORCE(cudaEventDestroy(ev.second)); } #endif } @@ -44,8 +44,9 @@ void OpHandleBase::Run(bool use_event) { if (events_.empty() && use_event) { for (auto &p : dev_ctx_) { int dev_id = boost::get(p.first).device; - cudaSetDevice(dev_id); - cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming); + PADDLE_ENFORCE(cudaSetDevice(dev_id)); + PADDLE_ENFORCE( + cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming)); } } #else @@ -60,7 +61,7 @@ void OpHandleBase::Run(bool use_event) { int dev_id = boost::get(p.first).device; auto stream = static_cast(p.second)->stream(); - cudaEventRecord(events_.at(dev_id), stream); + PADDLE_ENFORCE(cudaEventRecord(events_.at(dev_id), stream)); } } #endif From 5a02739ce9c564c728e4631c731137cd0eb99bf7 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 13:41:42 +0800 Subject: [PATCH 138/180] Throw error --- .../fluid/framework/details/threaded_ssa_graph_executor.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index d9b855503b..501e1dfad7 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -208,6 +208,11 @@ void ThreadedSSAGraphExecutor::RunOp( try { VLOG(10) << op->DebugString(); op->Run(use_event_); + + for (auto &dev_ctx : op->dev_ctx_) { + dev_ctx.second->Wait(); // Sync error + } + for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); } From 55e2cc3d878237b026b301a0e46c816d43703bbb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 13:49:45 +0800 Subject: [PATCH 139/180] FetchOp Force sync --- paddle/fluid/framework/details/fetch_op_handle.cc | 4 +++- paddle/fluid/framework/details/threaded_ssa_graph_executor.cc | 4 ---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index c697a1c937..03323e3da7 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -47,9 +47,11 @@ void FetchOpHandle::WaitAndMergeCPUTensors() const { } void FetchOpHandle::RunImpl() { + auto cpu_ctx = + platform::DeviceContextPool::Instance().Get(platform::CPUPlace()); for (auto *input : inputs_) { auto *var = static_cast(input); - var->generated_op_->Wait(this->dev_ctx_[var->place_]); + var->generated_op_->Wait(cpu_ctx); } tensors_.resize(inputs_.size()); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 501e1dfad7..7d1f7e46b8 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -209,10 +209,6 @@ void ThreadedSSAGraphExecutor::RunOp( VLOG(10) << op->DebugString(); op->Run(use_event_); - for (auto &dev_ctx : op->dev_ctx_) { - dev_ctx.second->Wait(); // Sync error - } - for (auto *ready : *ready_buffer) { ready->store(true, std::memory_order_release); } From b6ca3711b4efad23afb13d5d3ca72d462550d7b0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 13:52:16 +0800 Subject: [PATCH 140/180] Get error --- paddle/fluid/framework/details/op_handle_base.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 63affb7054..07a4b89217 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -33,6 +33,9 @@ std::string OpHandleBase::DebugString() const { OpHandleBase::~OpHandleBase() { #ifdef PADDLE_WITH_CUDA + for (auto &ctx : dev_ctx_) { + ctx.second->Wait(); + } for (auto &ev : events_) { PADDLE_ENFORCE(cudaEventDestroy(ev.second)); } From 76570c2e969df26fff28f22e1d6e8fe18cf5e45c Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 13:56:14 +0800 Subject: [PATCH 141/180] Wait fetch op --- paddle/fluid/framework/details/fetch_op_handle.cc | 1 + paddle/fluid/framework/details/op_handle_base.cc | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 03323e3da7..26c09eb8eb 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -66,6 +66,7 @@ void FetchOpHandle::RunImpl() { if (platform::is_gpu_place(var->place_)) { #ifdef PADDLE_WITH_CUDA TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); + dev_ctx_[t.place()]->Wait(); #endif } else { tensors_[i].ShareDataWith(t); diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 07a4b89217..63affb7054 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -33,9 +33,6 @@ std::string OpHandleBase::DebugString() const { OpHandleBase::~OpHandleBase() { #ifdef PADDLE_WITH_CUDA - for (auto &ctx : dev_ctx_) { - ctx.second->Wait(); - } for (auto &ev : events_) { PADDLE_ENFORCE(cudaEventDestroy(ev.second)); } From 222763296f31ff723260155ad0b0169c285212cd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 14:02:16 +0800 Subject: [PATCH 142/180] Change fetch op --- paddle/fluid/framework/details/fetch_op_handle.cc | 7 ++----- .../framework/details/threaded_ssa_graph_executor.cc | 9 +-------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 26c09eb8eb..9ed974151f 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -33,11 +33,6 @@ void FetchOpHandle::Wait(platform::DeviceContext *waited_dev) { } void FetchOpHandle::WaitAndMergeCPUTensors() const { - // Wait fetch stream done. - for (auto &ctx : dev_ctx_) { - ctx.second->Wait(); - } - std::vector tensors_ptr; tensors_ptr.reserve(tensors_.size()); for (auto &t : tensors_) { @@ -72,6 +67,8 @@ void FetchOpHandle::RunImpl() { tensors_[i].ShareDataWith(t); tensors_[i].set_lod(t.lod()); } + + this->WaitAndMergeCPUTensors(); } } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 7d1f7e46b8..7cfd668379 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -96,12 +96,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( for (auto *var : vars) { op->AddInput(var); } - - dummy_vars.emplace_back(); - auto *var = &dummy_vars.back(); - var->generated_op_ = nullptr; - op->AddOutput(var); - InsertPendingVar(*var); InsertPendingOp(*op); } @@ -176,8 +170,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( }; // Wait FetchOps. - for (auto &fetch_op : fetch_ops) { - fetch_op.WaitAndMergeCPUTensors(); + if (!fetch_ops.empty()) { sync_computation(); } From 9af870854e99c4eba22506b085cdb1b521f70f20 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 14:30:58 +0800 Subject: [PATCH 143/180] Use heap variables --- paddle/fluid/framework/details/op_handle_base.h | 10 +++++++++- .../framework/details/threaded_ssa_graph_executor.cc | 9 ++++----- .../fluid/tests/unittests/test_parallel_executor.py | 3 +++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index 99d8968486..78f566c035 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -16,11 +16,17 @@ #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/macros.h" + namespace paddle { namespace framework { namespace details { -struct OpHandleBase { +class OpHandleBase { + private: + DISABLE_COPY_AND_ASSIGN(OpHandleBase); + + public: std::vector inputs_; std::vector outputs_; std::unordered_map events_; #endif + OpHandleBase() {} + std::string DebugString() const; virtual std::string Name() const = 0; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 7cfd668379..41034e9f05 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -67,7 +67,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( } // Step 2. Insert FetchOps - std::vector fetch_ops; + std::vector> fetch_ops; std::vector dummy_vars; FeedFetchList fetch_data(fetch_tensors.size()); @@ -84,9 +84,9 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( for (size_t i = 0; i < fetch_tensors.size(); ++i) { auto &var_name = fetch_tensors[i]; - auto &vars = fetched_vars[var_name]; - fetch_ops.emplace_back(&fetch_data, i, &local_scopes_); - details::FetchOpHandle *op = &fetch_ops.back(); + auto &vars = fetched_vars.at(var_name); + auto *op = new FetchOpHandle(&fetch_data, i, &local_scopes_); + fetch_ops.emplace_back(op); // FIXME: Use new device context for (auto &p : places_) { @@ -138,7 +138,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( for (auto &op : pending_ops) { VLOG(10) << op.first->DebugString(); } - // keep waiting the ready variables continue; } diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 2e61eca068..a5eea30f87 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -231,6 +231,9 @@ class TestMNIST(TestParallelExecutorBase): class TestResnet(TestParallelExecutorBase): @classmethod def setUpClass(cls): + import os + if os.path.exists('./flowers.recordio'): + return with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(flowers.train(), batch_size=4) feeder = fluid.DataFeeder( From dfb8680018a4b7f34f4585f82ac62815cce5f660 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 14:39:37 +0800 Subject: [PATCH 144/180] Early drop fetch op --- paddle/fluid/framework/details/threaded_ssa_graph_executor.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 41034e9f05..13789667b8 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -170,6 +170,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( // Wait FetchOps. if (!fetch_ops.empty()) { + fetch_ops.clear(); sync_computation(); } From 52dd8ff09a73b37c6b1275a672b8dc8269530e8d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 14:50:05 +0800 Subject: [PATCH 145/180] Force sync dev --- paddle/fluid/framework/details/threaded_ssa_graph_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 13789667b8..50c24d3afa 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -199,7 +199,7 @@ void ThreadedSSAGraphExecutor::RunOp( auto op_run = [ready_buffer, op, this] { try { - VLOG(10) << op->DebugString(); + VLOG(10) << op->Name() << " : " << op->DebugString(); op->Run(use_event_); for (auto *ready : *ready_buffer) { @@ -211,6 +211,7 @@ void ThreadedSSAGraphExecutor::RunOp( } catch (...) { LOG(FATAL) << "Unknown exception catched"; } + PADDLE_ENFORCE(cudaDeviceSynchronize()); }; if (pool_) { pool_->enqueue(op_run); From 5b92dd4026ac1afb5904646688a3a8ada6b29c65 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 15:06:07 +0800 Subject: [PATCH 146/180] Remove dev sync --- paddle/fluid/framework/details/threaded_ssa_graph_executor.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 50c24d3afa..c1a28f1d1d 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -211,7 +211,6 @@ void ThreadedSSAGraphExecutor::RunOp( } catch (...) { LOG(FATAL) << "Unknown exception catched"; } - PADDLE_ENFORCE(cudaDeviceSynchronize()); }; if (pool_) { pool_->enqueue(op_run); From c42c4a6718599126bd9e7ba7f0407db18618c9e0 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 15:26:58 +0800 Subject: [PATCH 147/180] Add performance tests --- .../tests/unittests/test_parallel_executor.py | 73 ++++++++++++------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index a5eea30f87..727dc6a56c 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -135,14 +135,11 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): return fluid.layers.elementwise_add(x=short, y=scale, act='relu') -def SE_ResNeXt152(): - reader = fluid.layers.open_recordio_file( - filename='./flowers.recordio', - shapes=[[-1, 3, 224, 224], [-1, 1]], - lod_levels=[0, 0], - dtypes=['float32', 'int64']) - - img, label = fluid.layers.read_file(reader) +def SE_ResNeXt152(batch_size=4): + img = fluid.layers.fill_constant( + shape=[batch_size, 3, 224, 224], dtype='float32', value=0.0) + label = fluid.layers.fill_constant( + shape=[batch_size, 1], dtype='int64', value=0.0) conv = conv_bn_layer( input=img, num_filters=64, filter_size=3, stride=2, act='relu') @@ -179,8 +176,15 @@ def SE_ResNeXt152(): return loss +import time + + class TestParallelExecutorBase(unittest.TestCase): - def check_network_convergence(self, method, memory_opt=True, iter=10): + def check_network_convergence(self, + method, + memory_opt=True, + iter=10, + batch_size=None): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): @@ -191,6 +195,9 @@ class TestParallelExecutorBase(unittest.TestCase): fluid.memory_optimize(main) exe = fluid.ParallelExecutor(loss_name=loss.name, use_cuda=True) + if batch_size is not None: + batch_size *= fluid.core.get_cuda_device_count() + begin = time.time() first_loss, = exe.run([loss.name]) first_loss = numpy.array(first_loss) @@ -198,6 +205,12 @@ class TestParallelExecutorBase(unittest.TestCase): exe.run([]) last_loss, = exe.run([loss.name]) + end = time.time() + + if batch_size is not None: + print "%.4f Instance per second" % ( + (batch_size * iter + 2) / (end - begin)) + last_loss = numpy.array(last_loss) print first_loss, last_loss @@ -229,26 +242,32 @@ class TestMNIST(TestParallelExecutorBase): class TestResnet(TestParallelExecutorBase): - @classmethod - def setUpClass(cls): - import os - if os.path.exists('./flowers.recordio'): - return - with fluid.program_guard(fluid.Program(), fluid.Program()): - reader = paddle.batch(flowers.train(), batch_size=4) - feeder = fluid.DataFeeder( - feed_list=[ - fluid.layers.data( - name='image', shape=[3, 224, 224]), - fluid.layers.data( - name='label', shape=[1], dtype='int64'), - ], - place=fluid.CPUPlace()) - fluid.recordio_writer.convert_reader_to_recordio_file( - "./flowers.recordio", reader, feeder) + # @classmethod + # def setUpClass(cls): + # # import os + # # if os.path.exists('./flowers.recordio'): + # # return + # with fluid.program_guard(fluid.Program(), fluid.Program()): + # reader = paddle.batch(flowers.train(), batch_size=4) + # feeder = fluid.DataFeeder( + # feed_list=[ + # fluid.layers.data( + # name='image', shape=[3, 224, 224]), + # fluid.layers.data( + # name='label', shape=[1], dtype='int64'), + # ], + # place=fluid.CPUPlace()) + # fluid.recordio_writer.convert_reader_to_recordio_file( + # "./flowers.recordio", reader, feeder, compressor=fluid.core.RecordIOWriter.Compressor.NoCompress) def test_resnet(self): - self.check_network_convergence(SE_ResNeXt152, iter=200) + import functools + batch_size = 4 + self.check_network_convergence( + functools.partial( + SE_ResNeXt152, batch_size=batch_size), + iter=20, + batch_size=batch_size) class ModelHyperParams(object): From 3f88fad08ce6d7800356372e7cb20a3b70cd3208 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 15:30:57 +0800 Subject: [PATCH 148/180] Fix merge op --- paddle/fluid/framework/details/fetch_op_handle.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 9ed974151f..4fc05b3248 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -67,9 +67,9 @@ void FetchOpHandle::RunImpl() { tensors_[i].ShareDataWith(t); tensors_[i].set_lod(t.lod()); } - - this->WaitAndMergeCPUTensors(); } + + this->WaitAndMergeCPUTensors(); } std::string FetchOpHandle::Name() const { return "Fetch"; } From c0c2e15920fefb95010c86aa9654f2868d1b29fd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 15:49:13 +0800 Subject: [PATCH 149/180] NCCL AllReduce --- paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc | 4 ---- paddle/fluid/platform/nccl_helper.h | 6 ++---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc index f2303ff4ca..116b13d330 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -50,10 +50,6 @@ void NCCLAllReduceOpHandle::RunImpl() { auto &lod_tensor = s->FindVar(var_name)->Get(); void *buffer = const_cast(lod_tensor.data()); - uintptr_t buf = reinterpret_cast(buffer); - if (buf % sizeof(float) != 0) { - VLOG(3) << "Buffer is not aligned " << buf; - } if (dtype == -1) { dtype = platform::ToNCCLDataType(lod_tensor.type()); diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index 2999004320..ecdd98987d 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -36,12 +36,10 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) { class NCCLGroupGuard { public: - inline NCCLGroupGuard() { - mutex().lock(); - PADDLE_ENFORCE(dynload::ncclGroupStart()); - } + inline NCCLGroupGuard() { PADDLE_ENFORCE(dynload::ncclGroupStart()); } inline ~NCCLGroupGuard() { + mutex().lock(); PADDLE_ENFORCE(dynload::ncclGroupEnd()); mutex().unlock(); } From 7dcb217e3147642221b65fd20820010ebe78d316 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 15:54:12 +0800 Subject: [PATCH 150/180] Refine allreduce op --- .../details/nccl_all_reduce_op_handle.cc | 18 ++++++++++++++---- paddle/fluid/platform/nccl_helper.h | 6 ++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc index 116b13d330..f77a4b55a1 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -41,7 +41,7 @@ void NCCLAllReduceOpHandle::RunImpl() { int dtype = -1; size_t numel = 0; - platform::NCCLGroupGuard guard; + std::vector> all_reduce_calls; for (size_t i = 0; i < local_scopes_.size(); ++i) { auto &p = places_[i]; @@ -58,10 +58,20 @@ void NCCLAllReduceOpHandle::RunImpl() { if (numel == 0) { numel = static_cast(lod_tensor.numel()); } + auto &nccl_ctx = nccl_ctxs_.at(dev_id); - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( - buffer, buffer, numel, static_cast(dtype), ncclSum, - nccl_ctx.comm_, nccl_ctx.stream())); + auto stream = nccl_ctx.stream(); + auto comm = nccl_ctx.comm_; + all_reduce_calls.emplace_back([=] { + PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + buffer, buffer, numel, static_cast(dtype), ncclSum, + comm, stream)); + }); + } + + platform::NCCLGroupGuard guard; + for (auto &call : all_reduce_calls) { + call(); } } } diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index ecdd98987d..2999004320 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -36,10 +36,12 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) { class NCCLGroupGuard { public: - inline NCCLGroupGuard() { PADDLE_ENFORCE(dynload::ncclGroupStart()); } + inline NCCLGroupGuard() { + mutex().lock(); + PADDLE_ENFORCE(dynload::ncclGroupStart()); + } inline ~NCCLGroupGuard() { - mutex().lock(); PADDLE_ENFORCE(dynload::ncclGroupEnd()); mutex().unlock(); } From 50f71f50057c3c28e110da65cec7251a7d91e86a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 18:30:11 +0800 Subject: [PATCH 151/180] Using blocking queue --- .../details/threaded_ssa_graph_executor.cc | 54 ++++++------------- .../details/threaded_ssa_graph_executor.h | 32 +++++++++-- 2 files changed, 44 insertions(+), 42 deletions(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index c1a28f1d1d..0bf05c3c11 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -35,11 +35,17 @@ ThreadedSSAGraphExecutor::ThreadedSSAGraphExecutor( FeedFetchList ThreadedSSAGraphExecutor::Run( const std::vector &fetch_tensors) { std::unordered_map pending_ops; - std::unordered_map> pending_vars; + std::unordered_set pending_vars; + + BlockingQueue ready_vars; + std::unordered_set ready_ops; - auto InsertPendingVar = [&pending_vars](VarHandleBase &var) { - pending_vars[&var] = var.generated_op_ == nullptr; + auto InsertPendingVar = [&pending_vars, &ready_vars](VarHandleBase &var) { + pending_vars.insert(&var); + if (var.generated_op_ == nullptr) { + ready_vars.Push(&var); + } }; auto InsertPendingOp = [&pending_ops](OpHandleBase &op_instance) { @@ -101,7 +107,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( auto run_all_ready_ops = [&] { for (auto *op : ready_ops) { - RunOp(pending_vars, op); + RunOp(ready_vars, op); } ready_ops.clear(); }; @@ -118,29 +124,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( run_all_ready_ops(); // 2. Find ready variable - VarHandleBase *ready_var = nullptr; - for (auto &pair : pending_vars) { - if (pair.second.load(std::memory_order_acquire)) { - ready_var = pair.first; - break; - } - } - - // if there is no variable ready - if (ready_var == nullptr) { - // FIXME use conditional var instead of busy wait. - // if there is an exception, throw it - if (exception_) { - throw * exception_; - } - - VLOG(10) << "============================="; - for (auto &op : pending_ops) { - VLOG(10) << op.first->DebugString(); - } - // keep waiting the ready variables - continue; - } + VarHandleBase *ready_var = ready_vars.Pop(); // 3. Remove the dependency of ready_var. // Find the ready_ops after the ready_var. @@ -189,23 +173,15 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( } void ThreadedSSAGraphExecutor::RunOp( - std::unordered_map> &pending_vars, - details::OpHandleBase *op) { - std::vector *> *ready_buffer = - new std::vector *>(); - for (auto *var : op->outputs_) { - ready_buffer->emplace_back(&pending_vars[var]); - } - - auto op_run = [ready_buffer, op, this] { + BlockingQueue &ready_var_q, details::OpHandleBase *op) { + auto op_run = [&ready_var_q, op, this] { try { VLOG(10) << op->Name() << " : " << op->DebugString(); op->Run(use_event_); - for (auto *ready : *ready_buffer) { - ready->store(true, std::memory_order_release); + for (auto &each : op->outputs_) { + ready_var_q.Push(each); } - delete ready_buffer; } catch (platform::EnforceNotMet ex) { exception_.reset(new platform::EnforceNotMet(ex)); } catch (...) { diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 14b10cd0eb..26ff147863 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -24,6 +24,33 @@ class Scope; namespace details { +template +class BlockingQueue { + public: + void Push(const T &v) { + { + std::lock_guard g(mutex_); + q_.emplace_back(v); + } + cv_.notify_one(); + } + + T Pop() { + std::unique_lock lock(mutex_); + while (q_.empty()) { + cv_.wait(lock); + } + T v = q_.front(); + q_.pop_front(); + return v; + } + + private: + std::mutex mutex_; + std::condition_variable cv_; + std::deque q_; +}; + class ThreadedSSAGraphExecutor : public SSAGraphExecutor { public: ThreadedSSAGraphExecutor(size_t num_threads, bool use_event, @@ -38,9 +65,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { ~ThreadedSSAGraphExecutor() {} private: - void RunOp( - std::unordered_map> &pending_vars, - details::OpHandleBase *op); + void RunOp(BlockingQueue &ready_var_q, + details::OpHandleBase *op); private: std::unique_ptr<::ThreadPool> pool_; From dcf7bd2d92482927ab9ae2d3ad88d5b06e4961cf Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 18:42:29 +0800 Subject: [PATCH 152/180] Add initP2P --- paddle/fluid/framework/init.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/init.cc b/paddle/fluid/framework/init.cc index c30bf9037b..3c0d93642a 100644 --- a/paddle/fluid/framework/init.cc +++ b/paddle/fluid/framework/init.cc @@ -85,7 +85,7 @@ void InitDevices() { for (int i = 0; i < count; ++i) { places.emplace_back(platform::CUDAPlace(i)); } - // InitP2P(count); + InitP2P(count); platform::DeviceContextPool::Init(places); } From 201f79d03985114de6e49adbaad7887fed8939b6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 27 Mar 2018 18:53:54 +0800 Subject: [PATCH 153/180] Use Extend method --- .../framework/details/threaded_ssa_graph_executor.cc | 5 +---- .../framework/details/threaded_ssa_graph_executor.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 0bf05c3c11..fc84031556 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -178,10 +178,7 @@ void ThreadedSSAGraphExecutor::RunOp( try { VLOG(10) << op->Name() << " : " << op->DebugString(); op->Run(use_event_); - - for (auto &each : op->outputs_) { - ready_var_q.Push(each); - } + ready_var_q.Extend(op->outputs_); } catch (platform::EnforceNotMet ex) { exception_.reset(new platform::EnforceNotMet(ex)); } catch (...) { diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 26ff147863..8392170311 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -35,6 +35,17 @@ class BlockingQueue { cv_.notify_one(); } + template + void Extend(const U &items) { + { + std::lock_guard g(mutex_); + for (auto &item : items) { + q_.emplace_back(item); + } + } + cv_.notify_all(); + } + T Pop() { std::unique_lock lock(mutex_); while (q_.empty()) { From 0e7413938a109285e41f3a55650c6a338279c355 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 27 Mar 2018 14:32:42 -0700 Subject: [PATCH 154/180] added missing *.pb.h *.pb.cc generation to fix distribute build issue --- cmake/generic.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index c749c97f13..c0808ac06c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -597,6 +597,9 @@ function(grpc_library TARGET_NAME) COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" + "${ABS_PROTO}" DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc) # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it From d4f49355309f257f33ce08c4d680c712ee5cf2a0 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 27 Mar 2018 18:56:04 -0700 Subject: [PATCH 155/180] test removal of redundant line --- cmake/generic.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index c0808ac06c..981da16a45 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -587,7 +587,6 @@ function(grpc_library TARGET_NAME) get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) get_filename_component(PROTO_PATH ${ABS_PROTO} PATH) - protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h") cc_library("${TARGET_NAME}_proto" SRCS "${grpc_proto_srcs}") From 06aaea8a64c59467d45f2cf2e4eea3d0e91d946a Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 27 Mar 2018 19:10:04 -0700 Subject: [PATCH 156/180] Revert "test removal of redundant line" This reverts commit d4f49355309f257f33ce08c4d680c712ee5cf2a0. --- cmake/generic.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 981da16a45..c0808ac06c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -587,6 +587,7 @@ function(grpc_library TARGET_NAME) get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) get_filename_component(PROTO_PATH ${ABS_PROTO} PATH) + protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h") cc_library("${TARGET_NAME}_proto" SRCS "${grpc_proto_srcs}") From 1daa96579cd5df393b8f848c72ea9974a8d25b62 Mon Sep 17 00:00:00 2001 From: Xi Chen Date: Tue, 27 Mar 2018 20:14:34 -0700 Subject: [PATCH 157/180] adding comments for this fix --- cmake/generic.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index c0808ac06c..3fe750f47e 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -587,6 +587,9 @@ function(grpc_library TARGET_NAME) get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) get_filename_component(PROTO_PATH ${ABS_PROTO} PATH) + #FIXME(putcn): the follwoing line is supposed to generate *.pb.h and cc, but + # somehow it didn't. line 602 to 604 is to patching this. Leaving this here + # for now to enable dist CI. protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h") From 5408854090230b0bb47315c66abcf4e364d26c06 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Mar 2018 13:23:39 +0800 Subject: [PATCH 158/180] Disable model evaluation in unittests --- .../paddle/fluid/tests/unittests/test_parallel_executor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index 727dc6a56c..cb16ce26c6 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy import unittest + import paddle.fluid as fluid import paddle.v2 as paddle import paddle.v2.dataset.mnist as mnist -import paddle.v2.dataset.flowers as flowers import paddle.v2.dataset.wmt16 as wmt16 -import numpy def simple_fc_net(): @@ -214,7 +214,7 @@ class TestParallelExecutorBase(unittest.TestCase): last_loss = numpy.array(last_loss) print first_loss, last_loss - self.assertGreater(first_loss[0], last_loss[0]) + # self.assertGreater(first_loss[0], last_loss[0]) class TestMNIST(TestParallelExecutorBase): From 9f4a98f39729d1f6c6019e5d95cd6c3b6721259f Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Wed, 28 Mar 2018 15:13:33 +0800 Subject: [PATCH 159/180] Add design doc --- .../images/parallel_executor_overview.dot | 83 ++++++++++++++ .../images/parallel_executor_overview.png | Bin 0 -> 179321 bytes doc/design/parallel_executor.md | 104 ++++++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 doc/design/images/parallel_executor_overview.dot create mode 100644 doc/design/images/parallel_executor_overview.png create mode 100644 doc/design/parallel_executor.md diff --git a/doc/design/images/parallel_executor_overview.dot b/doc/design/images/parallel_executor_overview.dot new file mode 100644 index 0000000000..40753cb140 --- /dev/null +++ b/doc/design/images/parallel_executor_overview.dot @@ -0,0 +1,83 @@ +digraph G { + subgraph cluster_init { + label="Initialization" + startup_program [label="startup", shape=box] + node_w_g0 [label="W\nGPU0"] + startup_program -> node_w_g0 [label="Initialize"] + node_w_g1 [label="W\nGPU1"] + node_w_g0 -> node_w_g1 [label="broadcast"] + } + + subgraph cluster_train { + label="forward_backward" + + subgraph cluster_gpu0 { + label="GPU0" + fc_0 [label="fc\nGPU0", shape=box] + hidden_0 [label="hidden\nGPU0"] + node_w_g0 -> fc_0 + fc_0 -> hidden_0 + loss0 [label="loss\nGPU0"] + hidden_0 -> loss0 [label="many ops omitted"] + scale_loss_0 [label="scale_loss_gradient\nGPU0", shape=box] + loss_g0 [label="loss_grad\nGPU0"] + scale_loss_0->loss_g0 + + fc_g_0 [label="w_grad\nGPU0", shape=box] + loss0 -> fc_g_0 + loss_g0 -> fc_g_0 + hidden_0 -> fc_g_0 + } + + subgraph cluster_gpu1 { + label="GPU1" + fc_1 [label="fc\nGPU1", shape=box] + hidden_1 [label="hidden\nGPU1"] + node_w_g1 -> fc_1 + fc_1 -> hidden_1 + loss1 [label="loss\nGPU1"] + hidden_1 -> loss1 [label="many ops omitted"] + scale_loss_1 [label="scale_loss_gradient\nGPU1", shape=box] + loss_g1 [label="loss_grad\nGPU1"] + scale_loss_1->loss_g1 + + fc_g_1 [label="w_grad\nGPU1", shape=box] + loss1 -> fc_g_1 + loss_g1 -> fc_g_1 + hidden_1 -> fc_g_1 + } + } + + all_reduce_w [label="Merge Gradients(AllReduce)", shape=box] + fc_g_0 -> all_reduce_w + fc_g_1 -> all_reduce_w + + fc_g_0_merged [label="w_grad\nMerged\nGPU0"] + fc_g_1_merged [label="w_grad\nMerged\nGPU1"] + all_reduce_w -> fc_g_0_merged + all_reduce_w -> fc_g_1_merged + + subgraph cluster_optimization { + label="Optimization" + subgraph cluster_opt_gpu0 { + label="GPU0" + sgd_0 [label="SGD Op\nGPU0", shape=box] + + fc_g_0_merged -> sgd_0 + node_w_g0 -> sgd_0 + optimized_w_0 [label="Optimized W\nGPU0"] + sgd_0 -> optimized_w_0 + } + subgraph cluster_opt_gpu1 { + label="GPU1" + sgd_1 [label="SGD Op\nGPU1", shape=box] + + fc_g_1_merged -> sgd_1 + node_w_g1 -> sgd_1 + optimized_w_1 [label="Optimized W\nGPU0"] + sgd_1 -> optimized_w_1 + } + } + + +} diff --git a/doc/design/images/parallel_executor_overview.png b/doc/design/images/parallel_executor_overview.png new file mode 100644 index 0000000000000000000000000000000000000000..d890c0ffee3b38dc7cb74a2b56c2ab4831532211 GIT binary patch literal 179321 zcmd?RWmuGJ^gaqBqX>#1r64eZN(rcxfWROr2+|z}(jhG%X@DT5gwi1(t#k^uf&&a) z0|pHPN{7IIy@Px2-|u`o=Q>}`b-uW-tuymJv7WW=b>H{%KGf7uK1jtvMMg$;@REvx zHW?WTPe!(%nsP7v&1w7Ca`+Fqo3`>rvf|cbQ)FaVvP%kby0^(^3HzQheCYZ3TfX&@ zR;&$YSRjTkm@0I?7J~xCWgfn(uRq$ilJD`sk;hqHR6G!P^nD-?4<*kjO--%Xf#(v(M+I0;Jo{3riEayBm!Yge!6jR7XddvM_;UD9XQoU}Mm#Ox{;K70Adb zXt96);KtJ84x#@23>nIWidNy%{*QD2d<(u$tFS2iKYu3~MS*t-ekJukzD0rmbKrkG zi#C@`&OQ0+Y~+7WCzBJA-Sa6tGUA z6QR(!!tOVF>$VKz?Vl0ih3ZLSGLCGxi^vM9=mugWwd5j|F_%#C4-UM_QBOLRCzk)E z99w3EJ4c{sjY1|AeUaQXQzuld`@i}3!}L>9wwm>h|2zNZn($lqt92v(U4!TpSn2-$ z5>v*1ml{ijru@J0Q&>I^WR>#mRkHO?1;})9F#78v8iZ~ z`QZk;-;x&VnXA|(D}D_#Z@$ofN7r7MD7&42IHi{kRZDpd-a2Pc8Og@{sltp`V_d;<3OP>(*oT!}T?| z6sfAu;YE`HgKVCCZ{me4oYWEoD_BJB-V;uvM?~#=zPcr&+B!9)1llSMH^=&qe@tSR z_US9P?W{aHC8%L`HovXR_RHgwVtyO$QqwQ8I0t(Rjc%M&i+8sW<7Dmpb1}Rq`Od-@ z&#mR&-1V=?G|2J5u%R9)*Y`6VuxF?=tMgp>$m-lz@4Yw_YLlfD!Fd|uuHwrRZj-U*xFW~F%DJa= zgu1$>)!&~uss<|T2VZ?+w_jgftN>NV~`4XF{m9NVywcMpsq>&;y{ON4kjc!rb zQ579ba^q}Sn@mrRI%`!;x|?I)%QqX#GuHDBtEt5L#nxitCpNESV(m=vO0Pvgf0Fa- zsfE{BGv*a`aS_kiOke|3*<9Y!&GirFzVIL3zCb#$_Y4AKCV#C+8>HSuwZsd! zy(vV=k4dHZt@N+-Tc_2q&c+HCl^*l%y2<*Z8O8|O6QSbhFK6H~RaXxGzs$KVVkrVB`yrbS!CD8L-mz8~^&OIOS$@?5o9AskZF5JY#QL zQUvoYs2$b zg2d@&Tu|h(0a&!;T8|l};Tgrk+6RX?*!w47%iNZS+}fmL9Pg*JNUgrS_Kx$@q@t6C zr$&)+1?Qlva*)m67_tm$)U#$DH&_YbVOH-;QSaZVlvR0d(VV$VX zXRi@I>%{&Owa2_iV=7&KDzzMZOBm1GQ0#qUPa484s%Uz)R{x27`4j2MH{*t!|J*nC z*uE;#L*87aPgZ2a&EU?LYpf{I_Ca7G)wfuUT8kxYdR3M5^KlxPna6%jy8e2zudb>& zM)1Y^p9Y$24$4So{VWr-vt=LW*ZYz2cpx?zbswFT@3j4AT`<6VXKU5V`_n-dk_@law*y5}WO}h&>}Fk_V=BznVNH`hw@De5R)#!y^4GG2!6WQt za&+b^tLD#*ixJ|(HZ|)#Jw+zst6H2~?KR%rp1(gZ*NM;z`t&Thy(mtR6yS*fINN<5tvoQky0J7RcH`p#sik&#d7@_fhqg4? zk#&Y8iNRfvp%M=tEj^L9FGyrRK!lG-4$mn=&xmj-N0!6E;WmuOsUQKhAO;M#WHdo3 zYxj!?L^+4Rwo87k|6w#*6-X;sPu2(}WE>P#ViByv^U=0YrKk(73gBn%jRUnL-;9&i|JN6?lk3>Cusxa{bdr zrSDH^BtI?RctN?l42>|m%?fK9W|S@M+OKzqh|>in8qLot9r~~PZLU%kYsh z7`CYmmAy4|e&7Ka>LK-?6%4-f;t?%lbex|~HUr2*Rp2Pvq z*;k)x>HVQ#h60EWi-@Ap*X0i;>A$?nu2WWUe{$oSg|l6{DPNvl?eecL>T zy{!!<$oCz)ns#U5q2oaL7aG|=w+4OZ8we8Ct;ti<)A+2A?h=cU2J>>8n1=0*88Mr7 zOwtAK(uS>-LH%3}HmRAyvcik^sbbs~QGcfkoz!MFx6M8jFRkJ0J2f7}f$mf4iKD@I zoh#DaDT;FL5iFvGis4K-fi*5aKO*R7wSQ}M#2L_1*&&&=Cja#@=9#X%Cj#d6edy|U zA&0!)EPjk&b)4xbTItfc@+|lV4Vz@2mNMJTlKO=(YJa^Er#hk)RQkIq?j4t$WYPG1 z`{r0)&vysK4WU0VrIq$svZ?zPOzp6>vAlv4A9*_6vp!e9sKF}X`rHrJP(SDLGlogc zle>ZZff$Gk}X!6tc_&dK|&yjJ>!m-2+*L#;y$2V?o*l(^3IumR>1}hy2G5};Ss%5sFIsH)aQmg}pcYDrn`^bZBWS3Bm zSd1cy$|jE8rI|PPsnf1)Ce>>wZy95F`eBIj13j)Nyk%E2>_77xH4hvBG0U+|dW+FzcukDX+>F z!Md-hzH<@@hqk^7w;9&Bjx8kf1hf~JT1Jpr-2Q&wDb~olOO^0T^~>X(t%~o`zJm=D zf0n%mfBF?0zRK(_GU-L{{AO?2-c13|jp1Z>zwL^`?nN_94#U{zlp4&u2;Hq07BAP$ z6xS`3BR?)e6`^3?m@4g?*n$1QyFL>>(KqQQ?$GzmH7l-B{qJFMhUDTuLpN|2dl(&S zQK)5!i~J28OaHYHHYu{2&7*9x18$dHSBSdkyZOOqC3s?TBQ`T5 zGimQn0l@Rmq>7;z`OV(^&nQh{lwrZxw?4=$np@l%XsoyC;cwRLo*EPmK7=h3ShpK9NUUMY9YZOD8kBhpc1JHJ%eFCil7N37@_nC&ks@?ISGT*QeF z29obFEiMaQt-zOpWDHABfOPCMhiK7*z~fj(}NXQBT32P)^dd#CKtG~5Q>8?-WJzKo`3B?%{*LXup-Y{cDgQLO3nE>lA{Kp|oXO-1?S))L zXnD;wO7-bZzj#kon6<}-%^;S{c++;&sLXd%C^6jlP~2N4jXZ-A^X+%uBw8adaGpYC z;+I)MJBH#d_e)QraaCZrQa|tR^eN;ZP$(M|Sm{1rCl}b^}y;s!e-3?BCpX;zGb)UBenkyAC{K*|7=*Q zn^m^RsIKq718hxCgYT*#$4v(l4UNbttx`*?#sneL>N7X?CLcitjxmKXf1h?P7W;-G z4pyy>G1f2lnCMqInb`w0QbGt0VSB*QN`p;bNa*G5aiH5G{m1#r6t1?X%jJZOG#?K+ zb{*-*;W&MER?m{&UCHMPn6dz0^)Dy2cu|zkO|FKpdA_{a5}&OB2C{yX zO3xK7jS{u%dYP@3VAY0pIl23N22L!k86_w;liAjnZcH>C zIU4^+!TMh66HbKuvQ&Xhmj9K^6y$JNQ*xgKzP~HWlp5JF>@}F>{r$fsP?QXO-_ZC( zex!9r{^1A(W#+r~7E>-1FO#O^C#dBPFk4<3`tDt%^z9IAV8GIzA}ppI#qv=9xR7K! zLFFfDDSpsmLL>Ktn*pk<3{=Y0`y3uge5a2DP0%boCbadn%vcz$N?jncg7D!KPalm? z?g?Yi@0=*<{T zuaGkGXDax5;iQrvD(EXUWTBg{PnPe7R!|p^j=956g)y{L&Q@z6J;EQRyJ8a6&Q5{iYf^E?N<~_cmXAR#5L0XD zaO@r#Y8`##VXM$vU(zHUm|%IvhsOqJRkC2^42q0zCW$+1{4|pIOHe+egAY8CvR1^* zP>38Zx@hj+;~~HdKSPP3m4{v4OO*rz@+TJxwR#Y_Mm$eC%$VCSz&A6(#*YGp(6Ox< zZ&k{L>Oz!A=j?#(rZuH;ygCnJ zCS&*OoJE!?Fb)2BtBbW99TZ5-BLrUz^y0*khy|XxdT9`DLfDG+0XCU@BTI6s8kTND^ zR_ed`zpEg-L{5Riz*!H}$b3jip@`+e^PR3f9Lt`SLYm?SNRooZ$37hoY!X8b#36Oi0jSQ;Wil5o5SrkA`` zq|-DPDRXn!7aBzxdMT3d`Dx2AzH+jl!ox6`g4!qHq?5>lh1e!LA_n>C zH*BV0DyuRn(kLM2B@MR}?dg6sz${Yk zlHIib`5b)U^o*w9zM#w0HYX&xxjzA;iL@4XM$cY*d#^fiWwA|m@R@b0Z8(#_z?_RB zL8$hzMZi|~|Alk^c91l18;BD5&Tdm4)D&I2N_q1*E~&_Q~_bP6+J>*WcQHwwQDl1Axvh6sR(xcCduhX{{W*L@q zfVJ((y}}{&EpSg}<@V~$dJjh(a#qq3T;zgyODrD0NT(7ND1NvI4KOR0kFbr@_i1sT}A71%DwO z%)ebA>yfPg#g}KSB>PO~A&uJn&!=tcZFwn5k;iOJp!c-e0T%nyK)Ee_|Msx&d_PcB zPAm`S33XnUQbM~?ssrb7cGjSn3W}kPyMgVoSMl3DV(2pEV=nhvyks_K%ZNa_mnpM* zzaP8o8?-u9<7S2#sByDSlJTppV?yWt4<5V-04ftO;f1?QIKyM4@ffqNLYQzv=D$q% zoR4vplOxb}B?ph5d%yP(`>~@pN2*=2rX*um2|!*l&x+f2yccum(`?V{BdKoDXyi1v zKQu+5{OL8DX3+2Pj-8%_(=Qe_=NkuqeSb&X-kh)EPP@~wz4r6aCiJ}oYe3{>z<3Wg ztZaAY>+uiT=B5rqwOUlD-|nKvLZgQ$W3`-`4=%iL67SP6~oLpbb14{7*jiy zLZ1o&eY4yY&%{B$?{DvWu8$=Q%I#ybOfYr5^gImKt&O1IJ?uT9SZVv*R8EQnKQX?L zBFd_qYtB)eJEhsK;x2bkyR|fIJVCH<^ibvvV#ytQbUO=9zr8rwE>3|dPy)CHc%)K? zUSk+j96q#mJgLXV%l-H3BMQzxMM-COg)+J|M%wR79Q2Ao#0UlMfzO!~_{E7fuk{vK z_QoV!7Vw(;Q}jEhZC@S139$JsW-(u87EAEB$USkDlr}98$XaS*!+?6j6zX-N_Ju(K znPku9qw7(Pzdy1%f+{hmBZ#?`E;T zqXnSF-6mY5esLfh6;#N;6C*{XvzBtf$a~6^0kuv0RM^6-aVX=o}tP z%NNRca_bDTT!GF|rdHGq+G#0Dm9bODSnl_zI2GFdJY3U}DR{gXJ4l`GAv30v%NMA> z#mJ}o5~y$!q1_(rI4uyE6e(X%az;Cm6AWKyx0~XpoDNOwb~TuO$PizcvSlb}&%n8R zZ(KpkP6|PaQlxcb^@*{^HdzCL*({W|p&{*D2b`EE8mjjxF-5z zPgjjl?0LYP>XORmHxdgFkHcFYC`%vBClcvY?m|Vk#f~$O26Yqx`A4L=l`wS(xSiV` zn@0BYCE%5iE$+Y>WgwAYK$P*wgQ2B^&WRZAluk!!nFgRYLC7N2j~NlANEnSD0qNw> zDJfv(mY`HS0@1;nm!?YkD(!zvu+iyscN#{k$Y!1{o3$f^_gW(-Wv? zfOJH}nXEphmXTV*=lZY$6|cJ@Vb}n;JyzX*BVW*mHM90ub#(!y1)x}geo-kdN!+^f zcrx2=Yx=K&1N$w2)b?fn3^-N4i?V5uEI6rLj3T$7vN>UVA2X0Zkk2B8&|&BZHOQjm z>9AKfQRcL`18jp{KXgc^{2RpGE`krD!=!MXF;j$3!P@+Qb}N$p5f~TJCs9CzR-U`r ze8#{X|D_MO3RnybEC%@$PvS$YCR&6=UBg||H_CIjxjF$2a`XXEBQU(E=5RO^ys^5Z zw)PuWP=|~iHE?*)w0upL=!AHh2)2rl;lC|tP@LSbKAyA!bm<^e(4R`#qFzf-#fyNC zA#c2%NFrot^RYUQOVh?#3$m%Ec$Nl(kK7_HiI&qcH9* zo!QbhFuIZd%^qrfLSoC@u^%!a_Y9o8$rZAN!c?7 zj$ruZ50TLSl?3M04c2-TNL|($jA^DT!EBOZV@(f+T_P=CNBbE?uI|;_;OpgeGVxiK z+eE9;Ej@s8q}f@ah4>#J2{}5j2^P!|tSwHhlR#n`KRKn2xINscU+KQke3QumRBKhD zsJ)XL*L76xgy!x*wSl8b(KhE(hWrP*g%Bc(;E-r*O|gd#CG3KJvFWpkAIs}2`h2G$ z?z)T32B&+9?7mqV~VtJv967 zkZd{P3IecVR^ZitlNmAeh}yI_(Ome|q=GRm&==^Wx{oLW2x9hZm)mCqxyyl2>*39g zxO_(ZH*x51!;5M{4VF~|&!m>TV%@yQQrpq;G%mcy*(BZ1IQ}E5@^M0u7MNoia@S~Y~o2V)$zCQb@ zRF@0K#<~9Y0|aM_bcV#!%nwu)kmQr+KE4wZ!nqq5`ZR1O$#G zow*h~Hac3JT?P}UL zd-0J^@L%x3N}YI&jJff_4g{2tG*q{$$vP(N_h*XCbu79%biPe6CilWCW`bRERqc3HwJ?_0~S9C619 zN!xYth(}ep=EaS>fV*fA5?xgQCXG~_Fq)x|hmK?KJi>}m{#pAOB^Y>2#3q`WGp-pA zzMpFAe#-p&Rt$Jt%*Ky&+lAO48F~zVPYDC8No}?3u@Eel!rX6JPaHiI#>hVl&C^TR zoI!!W!_nj170(iNjCt8v{CZEb!xm&fT=3>iq?2gvpj%L^L$cCpnrW#ZbUz4$c9dCP zNuVz@Iv>k_Y(+au=^(3^!&7OHG6kKDsXy7oDNyYujo36N-tCw8pc`ilP~;5l z7rD%Rf>D#;x!L!NuVZi&o?Mqi!>GUkCbJc2xU;LslMj-L=)aYrsS>67eG(2LG|L0( z*Ltg6emZ(xSG!D}U0V;fC8x2Eh&_(=%#FFvO1t~KZ4rn`NS}=2wqMHMncT$>k*{*5y5Tp3IrEMPs%h>62GUL?ygdj)S`s3rw}=` z)Se+v1XEVoT^dQAtlsQOhrXA`J03te?1Q-y}`plMd7pKZ7lCJZmpC77Bm8lm6&K>{LrCS{f>E_SzRS{eq1EN25fRNW;+pT z#qO-#5>F`Cmw5j7)WH%;Hd4;hQAP*q4|RT?f_ymWj(ChWwHCac833I zG*4KdHpbj%>SR@VSdQ(Yn#4-q4Rx*>B!#AaTf9fFf$R|eVVvA%gPr_i-qR|X> zQ1G85r&`|Hn)+SqMH6M_`f)5K9c5tZa-YqA?fcx`^Ri|ap*j`?>}(5)OG>60W_7?h zs{@qMnte&3%1G_j>KsAJ=ZTOxKjYYVl5?dphhG<>iH3{eDM`y5fN1tv95!f_%YGZV zXkhh*oNC76t0UEVTcJU&mFh-(e7S!>s^k=BQ*us|@+u~RU8xf6sERAgo#ADzk}Z%@ z=`ZR0))}VUvg z%!yJ{&E|cfK%H-s6A5BdjiOq8zc>SS=G0rzhd(*Wj>YO|-&z?p=!ay7P*p^5%W3$J zb&(ia+VfBynmmpkq$z7cSRqU=C{oxnx2c;D5JV#}8m*Gl5KDtQN~m$0`0Od$_=_p@ z85lLvk}eJIc94L`Q%>1v+{K=wumW#~RW*1|<%jAER&|n+?y|k3kO<)=x3OqA&1$I0{34Cpf#!eqS+;3hUDd~#Y>^- zafpSWYaf{ZkiO;f!*Oe}AhP|1Mr9w>4>6~ovewfPEeqWU+=UyvE`17>tAMO?7?*gS z(M2~fMgA?ryZ(Ig!|l2*iQ|!vkCOM#SY7&CUPu=JXfZ+H&<^?_bFR zY@~(@1lT|dI0rXwe2P@J53j&9!w6;zN?w;DRsjU|gZtyvo;!xTW=6MAC^@eP!^Xz3LQ^`X-518^$$ zb_R39-&G`WULo=ros<4aPR%U%?}`U4{b{)?jHqkrAjntR;5kh|ztEoMFMK@JnL7x% z+0ppgEvOxyLoT15S-rRoFo19{GI7S6I8af{BA}4jq(3Owu%r#{h!pDAU_Uk%&-%PR z0e*_B&$qnxXR#+%O1uSoI_7RReZ8(8dSGpunt7)<-Jrd+rZr63qAnT zq^?T$SP1YzcHh~UNF5g%(uOrskAl+_lZC1YGSZlvf0zt0%rp==^bJqk1nQHi);Uw^ z0uYlDaBOfA#PUv8SGS<5yg9rwhlk@j2oGFoegHb1Oy&-c#5nr6fQg}wnBT%*6I3&} zGWz6-PtwALiM<~s^uYOoFH@a6=qFJVUN$hB}(;=aWA5*6In$q)4VL2H>~o*SP4E5-Kb+^KyzXL!KTR-_xnfOCSF4`5hSI)%pPi}Pv|7jE3*U{` zKB&$AM$JrG<2Wxy6Td7x0_y4n0)WKLv^_J!xIAc9K(kpgcfo%< zYU85BPoMW23qJ{eUT8VYe@71(H!0b24Ikjzcudb`Y42_-@oTuusg#6euOW!YZQhL- z#8f?{JtJf+$XafK+Aoc#7o*izeOKKJj&?wVCr2CLYP*fl_EcoTyFeOn?L#Kng z5dNX)!X#Uk-qDJVjB%rtFzqAd)87eRBSfW7jI?|^28&8S8!leDBCVOIGIx>BX`mij zjRAWlec723>gKqfEO6Wonih1}E+vo^appA*_-_AMY%}AyElV${S(c*^Wy-&$Jim-cIrOKTXh{GY5By`N-Nq)Aq^0Qz=y zApgdBbh~6@Yu+e+qhA!mn(t3Et)%mHCK{%~q?uC|pZ?rauV7^_Rse&!64a2tQhbl{8G z%3yu{$ZbLAsgCS9C2aQj%gN{QgfmQSzZD4v1xNS-jLIA*W4>lu$Wv8{L@fY3Fq<^( zV4>3!EN41#NfGaGY-gjl(Ue7bu4DLvMfvh7*@QAij=bnjZtMea@>b`~r$W-9uz%!N z9h{(xKBJ1$jMnYSq{HiiFe!>uK*B8dhlAgR_lmhIP`Ukl98AY<7CE(F>N0Nm(qut4I*9yxc0fg7cJ{f`QjhAGV zkMQX_f#JZ9kU)nEP`{KGsC|Bk`Ka^)GvU_@Q-=t1`)m#AnT*DIo^LBT61s_@POgt= zibZ{jbWdbQ@i2xWVSPd^aD?Z}EKw4OwF^Ek*iBbowQzRlz7K3pH$$# zC<7M#H+Fa8tMs#4nxFN`t9_afs?9hnS*;&fL^lg1pf!8TCII%|oV)^MFI3%AhH}Ow z0Y44|@IO4xubL*XIQ|SKFS4mdI1Aqp?UOu%%uj)cTb}LjMC`*=A`)EjgR2XJxrzG~ zq6|A%iVR=9MfVw>R|%?Q7q4L6}^^EG-`IT$@onrJ4K)WjN`!FzyQc%Tb<11 z6hY4^m*z)C$`w3;JQ(5`0>Adnub!G`igNIs2abbIy%g(*=J>!ZNB2U%C=nc!u^oFGoIdW+IJU(rg5H+<#hGS3)KD%P|!P-LP-s6&Hi49Qm@chK)IY#`f&o}>(E zlJ!f>iTU{75v<~#df@g@A?vuv#H-+s2$M(<$<0oC zXIO4o!Q_B{Q8u2{^Z0vW)YD+>1(eiWb(TrrBox=7@iVyl($2#dRuyr+?4owDNjYsM z=gj!Ul=k(oyy@}Hj)>MMKgi2U`>P1-n9g+%9vqK8M!xlB9k8&ccqPcY{B&i*P3ve#d_V+{!Ajdy{Go`eGdareM+WSIg!diekDq&>{9`Kc;vSEoK!G2)r;$<_>Uk z^NiB=NeL|4aLZfeYEhaF!LnciTG-=KwAECgH2=d3$f5ixGaD*xr;8E8OvJEVtXe^} z?UMeYhW#5VSwiTGCZnuo7Ofo+Ukbq-TAY%&SpyQu@X0an9-a!NrE-xCXl$}Ag? zhiX*vET7~|$J<6sk1_7~=Uo8WzHdC1%9v8L`fnzfnVx}TYaUqCufju#c?qGuRC&rl z4pW5kr^?I{GR^9w(p=W`f2EWcdl;Xm!|io9bvsDi8h;qa;lGyQdk659Q~W`kVLIMQ zeIz&8iBPX+h!3q$3I$%=Goe+xZ~MbmGKq*za3c3R-W^)u&pg%dJNZDFQ6O&)-81Y4 z?LR>zV9JSqLLTC1RADnu9Jni)!1!5*|@Vk$RuDqs`M4?UpI&42X#xBBJrQAtyrhpGwzy*3bKo_)xo9Nx=B#P z7H_7373=r0*4LDlDOjLFFCQYX(wTwfqY5^KY!e`wE3XC8U)9-;8ow!D)eHoeK`%b6 zzSW8xOt>{(HI-P(HIEaumU#1J;doY0`*cuoE~?`_W9iQPNI71K_83qMo;<3Os3y`ymDws&%o^FD#0tQj{Gd6 z@RKu#&DyWf4&D{~1%zqc&ct$^TG{={4EzibG}XQbmu?PF7dBXLH5P6P@K9PlK~UZD zsoujXLFs7xt7wmaQq)x#8rMgkB5U<`6=0Q5f~&sN`HTwAgxc<{8`J1+s#4Jp3BgW9 zqdC^nB4tW^xs%#SS3u?w=a`3}`=Qiv!_35`icW#qAo6E0Px?+;tQ!Q0PtUxVdRrC9 zwX0)Bi)Ay^uT~TIx0dZ-;&wzNWT32Zdi+9>H~xF6%ZT$EA^CZ~@E?7VjxD&_g*f*Lfi77A zasurW>Cm{|!A=nyKZesO+N7&5g@bb;r~);M6wBCw?94`0@2sBTt)s@eN>f)E0K0(? zBOx`NjnXnN(2M>t$6cEk`}g*s0eOB4+GmUamU3_Z#AYG2fYa55(bP5+#yf3ObY`(s zIe0tgE}yse_RkI9hkGD3B4-HIljv@+C%OhzS_D)nSTL8A=|JX_+Kel>*d&hfKa`2i zmX&XW2HI)kHR44=DDIIpJT1lB6X|p|(vk2|h0iznbB)CXr%zeUpLH5K-&w!Hm)IsF z`Lf4;OSXuCo(h$RII^<^!P60lJ|3zsZaaQx%yAW9#F)*>OmEaM08$Q09-?YitB(RR zp?(%N(|;;E0bxbl#D(msG|-*U$vKZ^B>9D(OSrQnaXX@E1rSM9?mKJHeqVw?-F`1k zV}fw&`FCTsG)K-fi9Eq-#!Ii(@e}1SjHqGU*V?mv{D}uu@-T@HbPg3+ZM>(y(gzeN z!9iAaA9TAQ!PH^vfT0}zeu0ZN0fN@lbQU__-&8!IJwK@BYL43g)jxOTleF)OyW2JU zR(qtx%VTw{rdynsn6Kg-OY&Cgo8UDWjh?GAr%fuqy6@a|GStNuDJO|J4uyKrzFyjw zrCeGU2d}XpG@7!Zjxx8;9+TVHcWLK^46s%`m5wG==Q8zeFI%ENKJp?vh{cSeHc<0L zIk({kmx72$$?~J4!b9@m$^_tjoSb~ewC6!f5lq+DEYj0vyt;2{)o_&4J4PD{X6%AC z+!LS>2-T08a@s+z^gP?My<8H|AF#d1A$gdbqVzJBx?wV-QPNw{yT|&*Yed z%x9gnb;A-+)00h!CtIa!ZpqLqgp`fc*im$xqJ57sK5tyS20TF}XtQ;)2i>x+4ftE0 zr$Zg3|Fuw<#rz0g?o0JT)VRnx`wqN7!XidD8tA@}tufVrh7Q~#umiOiZc4cLK)cjA zgX^cIOu=g2wejdenj`SCRlo}YdbcDD!j%x3&lELW?h8iKDAP62H;JGN-iiWtvWEBE za-Bw5fKAL1p_lqjo}g))HnB-OWi|rmZHlPbm;0D+SAsD6@P`}Vn%EHAHiGLm_vwfW zYQc{Vp_Zq+g#kHCe#eyGTLiW|SWNwYGjggzc=<~ojut>x(vJO_*Pi%K&ufODd12~FCKLBq`f#Qa@K&XlzrmudW zENaN5fq)n2L5vV(86@k7jo1W)MW}>WC}yQwK0jZ-I7E}Ee`Sh;sxXevvqhllX(exz zv2dWxQFk~tJhDkYoRJG3tA~o@F~3l9S0;W*rh_g|y2~GMprSS-rzsQBMt)NQOZhGm zUTd=2?!pq6D(;-Bc2q}m3;MZ!sLUN`to=6p#1TE!7{DSGu}D2?kp z-V>Y!(FeKk>}BIIDUiOGr#e5$tdG*i)2>{-4epTZ`*{v3h_g@Q8v_CiK7sNvq*CGk zY0?EJ+WWLpuh_JIOqc(~9MnJ7BCD{5>#w+pnBd9hTH16d%J+vq=U|K_!jTK9N!%+B zSe;%DIyXx9oGe3>P@c*_)Q#+jsf<#O8U3o*8fIPoip#bbmbjkDB`~+lvQrfrU90Gu z{U90r&%F&rI(VB%83pluWxVW zM2s1%K**i@4W{5abVe*o#|tekW=$`!OwdR#X^P)f$sbT`umYB!0eb&t3K6QJOy1Pe zJe%Q3L>PtR;e=4_z#{V|2}apNhn7Y?nIoSaHw707>1DUqZZPX!NxjgI<9sf6py>09 ztHy`WJ$1y&q`|bE6Tn|eNUA1-#$J(id(y&I10cG%B+?T5#0o z9FVhc3uUQ}cf(3?%Sm6OBG^tk_4beby`}kDIB?nhtSt6dtY2;B=W zy6B<2J$0=A{9&R;2kGvhcjuWkU-;#qcgZ6o+x*ZIF!mfnIPh6Z;xUsk}%UduRS`*dI?_$;<5lSAoXrvoya}n468yvuA zbd*J?O29Gq3kBMqXG^4gPy%ub7-H@w2rTZM69?HOyJD4#cvo8`CXR_O=Q>?KpmzMP z#nck~Sf_8g@hOYIchnJHy$sP*A@d*GqJv)rFE5?ubcof-j&g6R7Zs0x0G0c_qaU%e zcOKwCUXYWt7g?3Bi8GNN$0_iybo;>Bc!`cuhgRSWx*aiD!L`P7y#6E`-KMj&e;0D}nsdFCQ#*$uZ~!;)wwtmx>dtK~KWO2kzBe+SH`3<00Mv2; zU+5MHb|tDoq@EV|_i&Hfm`XY@geTdYK)qq0G?5JHRZE=H;KCaR-bXUWJ+pLB{yo7i zGYDOkEuuP_h(f`Q%<*1fyj%sOJOY=JWCQ%uUjRwiZ6?ALbanqdIN5}0a2J+A1saKE zRUt_#Yb3AgSL*bDoh>h4K&`~UJv6s!x>UD7gS{&acM}jN%E%MCf9?`7!p-%FByP!S z1I<72_UwAuWs@9x0G5!-1?=9}>Ug%^LM3n`2ooONhIhFPf`#ci0GM)wBNcNVt|z{c z>AOfvysMGEpVG1b`}FQ4csHx@jD(6}I&ac1Vc(!OQu6YCmvd&$@p{8M1cq=A6U9oP zo_Tib8T22L{iAsceRS;}Msm+&^5LMDl=mb;^*LttzJm+vgHT3j;{_)qk+`~tE+4wR zv%TfGF#1LCR*&7oHtE%5xHI`VJGg%BM|dT~gHfVU!2bJpuxaG)`9#>F4>tz_8 zQ;)Ppv#&Sj-16A2d3>uye(ndA&3@M{MdvrcXr~8<2+kcP7P6i0K9JW^1Wc=6R_k%! zIVR`+_MR@p4SN4QzZ1rSZ~@|B%LH;En(F#>(i_{b$5F<8io$nzoQ=tsL0=<6^7dTR zT!msLaLHVS@zTyyjO5gFUC`E3fnzZOTt5p;$4d}gdq?F+o2GI(9jdicq%FD=Q9RQ8 zH@*<=WeP5$*xXoroiJVPZqRB0sHqO$EyH{j63_Rz7MlR^wi@4RkXY< z`1u$yLVtz){2q*_wOW%%>P23>AO$a-LEeL~0t|H0Hk;~AXqpiq4Obh-*$cx@Pjhd)x5Z4ykmrPwJJ(>+Xf*OsB=u)CNU zT5q9Ab-cUpkVbRq4+{LHc|p>n&m3+bwgoX+*n*Q%8hDvO|IfR3P-FKauT5A1M9>G9 za~zx-mgAvleKq$Ui@VpVOq)W>u@7#&$)Sw%u4Ds|f+ikrCbE#@&3=L&N4q?O9L!iAB(AL?X8UFsg2FYg|JXw0P#rVtPKZ#$Qb8X`hrvdl~ns?}S z{mkbuXKsDNA~#Kj=EuOZQ=~$D<9CV4Udb>WPTIXez;D)KBoFkm%!^Kk-*>XYoBh}| zQUp-C12!5C*zrRwpUd+jjqOU%o2$O3fn5H7$olShs^9njoRrnzAe9j!tB53-hYs0$ zXOt+r;vkz!86`rJ?3Jv@o+TnOj+vQ^Y%(JIcisAYzQ6Uy`|*A}-t|7`yzbY1U-xxA zujli5F+nT`;uB9xVxs6TsJRN@&Km;QFh;{zfB{kxa#EQDFm*%zg5_P7@SeKoM50;U z%OyPF(ACj{b6vRdcnK7LgUoGp*htmCjn%N&sK!60wiRm zR*1)KuFiqG+YRJm*FW|(oJs;VoS(2Kgn~eF!~-g@`H74QfCHhD@-BsC6C5;pp;PVv z<#uVtvuIwU%gRl@Tw=XGX??A3DB9Q^Ym;b)mzsYVO3Mny@ntgs8G<*>5bPl>Qk%}V zH{jw^S2FpUWdTvG0M#+NhR~vGqek!AfeM0AAwlBDt2@Bo7nV>D#<%|O6XisZEfN2zyB zmqLanARlwwv>E~`^tJNmOJr-v*|5jdqZwHGw?XRjoEaR_pfg{naM3?@0=R4W#Y65* zD_Jw07c;qi=b?djEkh%NqtLde=;m|D?)5i!YbaI$73nMNF=f1b^q;bDi!CAb)2aKz zB(A6L!!}(K4hox;V1l*tv!JlbcfXUO?>CN`=kxcgrqW`JsW+ifU!r~Zl+*xXXd?n} z%Xh#dVW&;5=P(t66JW0w9W$OURkr%-D19?jz?5S#NO7sjYdqq-cmA?=Ou z)Rl)RKt9TP0z;H3crb;OV$a%bj;I|-0WxEi_%pqQ4y&vOX3ctuzFHq{3xm3Gurslh zuzv{H5@QdBM?=J0w%t@^iW=eDR6f~2VmHWL@Sq(cXWgyVxH+kwhE+(MEy2qt*V^(O zl&vMBgHLvMPd$5$Zn{0G;;aqn0M(;lcb0Q+v;fCiYqKip%XHU;q-uN!WIjW}M>B>z z7mtuPo~7;4H5gk63bl3ukC|DUv^FcpF$3fwRdGM^j%AX1bg0`DP_-KD2lNMDiU3vBuZ4G?ynwVg=l< z)dkVd0V40x!&n*H0|63x7u0QLVmF)HWT@w!?wl z8uMH_mpD{%5ob`dF5+O)O32m+pE2oN{WWkQ8vm8G+Ba)I@o(L#LFTbD$7u8zmvA8I zYSOY(W6LKVVl6%(O~vp3PSkhzNl)b^Ju;H*oG+9dtFjRF>5%uu`fY z!JZEk@>ivHef3n3VI%03abM-lW$8N$5`FqIMaN!h8a-Z1uLfz~SjHD|v$$uacj)lD zuw#FX2Ttt1LDU3T66AVd7T?BBns17;K3aC;Jsk4`rkU+hI>MRq1kLHkdROcqk?8jX zx7V^Y(>}Qss~q_Uk$l;uuVC-CPTDM0bgMA&o|FsWr?dm|(hT_2N%YA~af+=$8)|;3 zIx>yM>vRytO5m#429rCq%sk3t4oB9=?u9_|$8d?B-w4!W2jb#^XJC4hGYDkKfC97i z8UTcxFWkG&c#$$IDWQuGF$4z*Z&^1Wb^RmK4_&6&Tce^AZ=421m@^rvB73r!W{}3% zUjJQy7H>|)lG54^O4WerQG#28&_L+diE7D+p}+V#J6aY>GOM!ndyrSV{EEI^cH&Zj z(~SwuBSpneOE6YmG9V)r7Rx>%D%}4>Pjw}ychk6Zh?E$25LvPK?jCr>)wk<~r=lTG z@qJ*`ET4$e>FBdQa{E9eB`duhuYw+tJO?NqZb?ufwl_TUc`mv7DrQbryWTq5A@w%5 zfnMm$IG^pVK=oF8^Kjy}aQpvT*+t%e}len zP7hLIz~%Y_70zp-&@NWJ;9zBQI@!I)1vW-l`Xc@wtuwA?N;?~<#yH8aMLFF|Mj0hh8cwG6UeE#V)_;tLfw zJJQu%qY8kUqaEk8GHIuot9!cF@ts+9RgAXS?meaCwu&~>-x+4MO6$Kd_H()Wj8A`h z9c~nF9Y5JxDF!|8jfyvRcgo|6U0J$vS7^=^B>5UmouTY@{+z5&U~Y^JiTsij=A^m^ zJ7S^qa$)67fQ=wFHe~tUGHTr2G(Km}A@*UrrkAF{$ZwKx#3bcG&&aXtlatMpG?_;Z zq?zu^ZgrmDBFV=X=N>K-%u7fjPCsZ|Z5eH@O(ZTnP+FOOvc5)aehC=Klc`(yb1GN% zcHHk`up8W&KV|d;cm2Q;8PTsYR9K_}{Pg$()JA#!d3S(93rbOn`Ww)1@`Ch5$tQu% z1m^_ZIpx%te_oec&_149*hzFIC)LAE9X_Bsb?kev(!J00cw1B3!AFY!{Vth;>dhzJ zY}5wuF=f3WS_D6!be8MVE(^44t(^RiKF$Q9!PJXi66B9m0I9jpoFiwol-q7l@miD; zT^6j7@aGwwsSc#FXEgv+l1~aI?m|Dv%);oSIa3SU-a)noJW-8SI9-O-4s?hNx%H6z z*#rA97!Znr`}B9|{%_tHIr7a|+%T3YpuEIzto1q4{`nC=-K0rjY z&}`#Up~c3x|8()3@c2A8Qng5_+Y|~Yd6*Y>&lW&h?X?aXW3??ZC({4-(U6f1q}vOd z0PkP(r=&-fK=AJM5XAYVRtSbCH+prM0`rshw|vsq;E?JCVCmm5!n|P>^A_LxOYO*? zhu^y8A+CY6F-l+|=RWW$*~*v3j2!@Oo`oAwRm~D%bnqkuc#@QGEjA3f0&a?&my{U7 zHowaqaEn9CLF5Sf#`ZJtVInqakH9dcg4G?=TtCZxbC_g5m;``0`ppuDn-(&=2(^O` zvIy2Xg&{{A?pkkZI{Y1=rlZDGE$_&l529ZLF!0kKMS`LhV8Xn3Kj210JDlf8JuP@O z1f3o&^uu`#r{$n^0Cn{CzJ@yp=tJg#dNfz}6d5VVsf0622W(Z~Z|oit2K|{gpK|Bd^`e3>jWzNzJ2LtYo|LN&aJNm(`(F+}S z*&L0nrXB@h*jwA8&}N`kNQKqrbpir13%< zNIOvpEM104dG&L?aob>|sgx!(3`_3bz!cu)-rF7j=U>|Ul0?XCK-+hD6D~yB%z%Hk zmM@Te`d?A2?#oCe88{*hXnPAli!Vm>KZfl+H4l<;Q}9Ky`}&fCwTC$fZ!xDy{9;}D zzJh=-m(*r>Pg>}wgI<+vBtRb@DRi=py5P$K@b!5S}cy z4spjmgnuh&k)ek+hKC+=*J8ww<0(R0X#6$5MZvkI4f%{tDjgy%9e*`VL;k_H{c;1W z8W(&-?UpF=)KrLsHGgM;l+Xju{KxQIs}B>xCZWs_&r{m_3pmy(Olr94X?hT&avuPr zjOz&EJZDxVu>?SjKo;Z`tGPbgO@r4Fi->{RhTsgqKq4V%E$QOx>RZhxky?Jy#aUz^+3mcEl`a zsnN;N(GI#XX}YuO`c+^p;6fjadYqrNkQ@U z+vE_(+;#Pzu#~!V_h7W97R_WW^3O>w8cnZ7q}tFt1MK?-re>)Wa)5+%D_E0MWy z>d+mu@Q09gx@5noB&L@G3BJD5TxFs;@vOziUTItuTHY#6e%e82hM(bS@eIwumH&5a z#QGtB4*<`YmTi=wCV~SUyncf5CVXbS%MD}?A8HNn?jM1zi4^r$Y{6yhTc*}sRVjkX zYs@7qo&@^+J5*#on zVdjuR;%y-?G>Ro5s0HGRsNUTBa-x4sZsxJ9*5#m{Wy+L`O%!|B2^a=Jf;mNH^Z-C~ z9q`ekQ;6cxKI5qRx^NU}ek&&AhzC{w`+H#o#h+TW1DcH}OD z?MTc|v!H41Z^p57U`YpChE&Z+vR8*=gLWm%QcX4DXg5xn^r>4cA+H5fi=SZJf0X;$scK`r(B zBzqimVV#$P%NhPGe5c{e`|1h$Kv%{FWnlv{n3a zA6OpviX5yzWj`>s=X;A><_~lp@M5tomw$h;CL8Akm|L2YG@wZi`p-06=LdSrT;^;5 z&$IgGZ(`o{70?27QN2Ky$63W`8P;G@G#J4HHQ*8X9qRYK@r!)v>waV`@4-^P7x0%8 zJzX|itJ-zx-Sy{QfT5>xmVq4i2BMlU&gq_%PYcG_4raZ5(g-l~Fvjb^ z7X14?kv*7T@#2uG?06TPc1_)|Fn1CbDqTW>&l^&C`I zF71k3(U`74TCUfQFHU*P3SL^8eFAne7pH%HMlFz3k8fay>b^Q_4GPF7fakOx06l4I zh_k_Pku^tO%pLK&&(c4kuXqWpj@#Dtj)ovNd=KHU9ApIckmSE-yYje%5P;+c5Up-Q zHDWRRpa|3-WtvXShmwfPh>ggI1#rq1k{sAz)U7HfKN$E%uWgAzb3*O-F2?d~nVEo*?tkZgHf}uR=op zzt+n6$eVP}E`!ww*U>%)@!thr=xcc^=lwrbFAZ9}j^pnSayUQRl^Mm8I*N1>dt$Xr z#6S9I!h4$NHzRZs2oPYL5^zL@%PElD%Ksx@PD#yyHfU|$>-b`QD;Tc)TaqD;ftza! zcN(Lm0L2>QE=;S}o>tPJJWZ6D;^~7M{swAwYu|CoPNaJUJ!ZtQL~5Hp)YF4>Dj(MB zNsFvC%f`rWkPO8*q;(p1Bwu}g7^|ilt}1``Zbwg zb{F$$1X_(PjcyU6Qv(QZ&}C?azLz)vv$GJ!*0x{t84b&uVSqh`3f}(-GcS05FZk^C zkCP%PphKaw>ZUvps9zFU#0YP%Y22y}pmAWCg;nd1l2EGaEpz6RDG=~LR(${Cb?2c+ z$vxc8Jp-zrn^3K1NkUDQf}v~cC|mrE=n78ew(Gx*(^8A&z-4-uG9;qj+SAGt0cos3M$?QJ}|3xeu%;GuR-P|9a#YbCaV{5E?4 zKL%Tlp5z3)mzI5E>3E>GP+}s>7NN`5`-p;e_WRA}s&&@+`7L73(A)P~~eF--4#Gheij30f;eA&#Nw=AiVA1kL6iR#*#&ulD3h0gNs?vIQD zMV@m(5tQ^=MOa0pM|S7fH?ct+{rXiewYq2X^(+o^dgN=r3^+?cV@P?9+xpV3o0s>l zQ?=7VY^m$7q01L;w@CbWUSD~^sb#|R<-|nGK#yn3=CU^k@(17PWlvZ6T3s-sK19R( z$S~n3weLRCAX*%r;dTuT`4|()y43TN7aQkGCs+_Yr{#|{H*=15dWdpkKFnx{dMdv& z-YbG96y5e+W!pzWUdyGyvvVUvb|0ox7H6GJ1|@!!se6XHSI{={B z5X9A99b?zLAq7Dxw&_H*6>d%>1aXo`bLludyL(tsL+~7ijR7=^B^HbAMHa zD#|!(`se2-3{0>ewy8GyfKtHqF7mN4Q4r=_Xx-(%rY`0(YX#UwrE*}-*xB9!fCf9{ z^fTZx5h<7HM=Qgs@n|1=89BA7ENc|`)8m3Q6zX+iLl)t))v=tvIHfEdul&xR1u0Dp z;zhf9!fn~I@Vzo%=bc94gF=VFzEqA=AcyC9>_lw`OeTkm8wDL|O*TR=!J#^4K;=9+ z4jzmAnBVMf&*5W7-S@+To%h-CWS|1F4DnbBo;QdxAAD8CqsS?6tthL>yjGke3Ci}g zt}X))Kv8C4w?3Do3z$GSBNE!Vz(2RMrJeUaT|m5m>5p-HJG-@uTNM{YA9SAMGz3aF-J)k=bt^; zCGx}5i4klOoTfshUDH(gh>j0GvBHkUu^Pbd{q!2GwiX34au=~~=MV<~eK4&;OQ1oE z#8N#1&7Ki)7?|`-@%S^nhro;HbXnEih!bf@NcM+o7@kpePCu9Bc-5j<(z6%9zjD19=sMJTv&8AIVpJJAXA!Zm4QFRduoy$Ogf0zBPMw+; ztT7wu3e0=m@2`K7?kM1FX%lM`_3o1$I!--ygKi}o&O86P`)M710w!gh5igh??(K!^ zG&nq&t<>aUQY8-7wI_(J4ZBU#yfopB4*T(FtUm!>V#2jJD zeY?DlQ|9)_Vaj9He!T#?6s}c`;rL?Po)$T#uR2#HX)3$)u35fQ?DN}uOb!axq;OI> zL2^>RkI=W8z`i8qyyzmNLKh+sr@jDez6rgI@Ql;kmPJ3jJrH?Ds5=%~zNLhD80^wu z5F<jSm<{dCXEX_1qFdrUlS z3KpP#ONR~xj$)XWxh*j98g;-`T+FwKt^G|$Nb<}8=KpfCC+dvxWueD0kI{Zi!SADq z#qv{LVJ#=6YnIK6%A2L8-c-4Du2V@khVsLOo&rkvAqm2mfXR09dn_0dc2=5Vs22DA z;jS)YUya_vo~CJ5B6x2<6EuR&?Ft9S>2}T}59u4eC$a-Kdf8Z4qY3Hj&}4UPA(Iln za3vEhvyi&)Gymw@YQfap7f9-Oz2OpU^7Jy~kJzla9q)Au%^Wyq@cv^_?=8lDNcpk> ztV^RCQyR@SXhj_EJXKf9d+E*hgKRXxK&NL5x&R{~GABs*bT_J>b8rf;!-yiPGT>MB zzixkA-v`O84X89Dou}G4SKsHhf~fv>2a`n&dsqPQ2HL>V@pa%YtNpNU`$)AoJ}MD{ zK5`#HZp;r}0HK`iCOkAN6MPOE!D_C3H zD02T}!&M~!wlNbu;`e`F@3|v4^Bur_yT8AG=FDC>Dro@*FSq@CkC_D${Fk-2vGcqO z(fGdy7>tLp@!SX8&!{GuIE7L`82w@V7>_5MvFbR8LTD%KC+*u`=&2h{`kA=&)}ay} zu?-iz?jwT6bgrH+_*Ys&nkZ~}QfDawa^Al0c{fc?alEud&P9m-FB#P)fl?%x9rGTs zjQLB)u;oy5%{{%)a^8~P4&sDN*+O!NSmWzUcjj}Jy^%$Kpq-_71vHmcb0s^X0zee| zDUh&WkjCk zdw27>4wmcWW6$JEy%w|+*cWeADn^$Ij|3qp8FGW2po$BW zQp79c$O2`~d{JdYPvz*pkCDpBGHvqq5cOZ^O2rD~i7ZcD&0z#nD(9VAqi7w<$@S%l zeyCPQ(>m11xb0`d>o(ZyGhJc-1-q7f7c3<-98LvlT@kb_kBP@qk;Qo~DbDnMNS~sT zIWr4<`ufPzv>KX^FFHtwO*`p7ie%Mkd&~p99Jme)J zK%rWzfL58>Q(lt#hexHX`aVWEbR3lZqYQ=L2lFn_MKwAAXG1%M(h%STRqtlV4fweE z7>;JAG;aTd<}#s*2tD{hs8~`GRQM&s61qg`Aj)3DLiVzVB`)Ewq zS=3}%B}viH5On!b(WW;l%{y6tY_Cg*X?r z+P}Rd*KM;1g~Ldh8RDM?j)bd;eWh@dYYd=on_h-Q%)H0g34*dQ))XWxPah`**Jn%A z7w~8)sssJ9ZAmA*y%0+Tf}_&ZNlKK8`Qm;-pt(^0`sLCiU%hjZiCNzp-E&e-2xapx zz=#aBr#!VWwM&TVEf}blEzETjHB|?J!hS~Nw~22xUceN#8f#PgostI8@INs?c@i{u8; z4q??0fi0MSZK!!T{^|) z+|Z~Uclgsso08ET{&ov$)fs$&@o_pe{y6L8gsd!C{ds1gx`!dRjZ#}Jg68gfwdIAK zzWQ442mLb+?KA;%CC6lri^uog4{7*5cClB>vbh*}+)k63=b}H)dca!pVIQnmpA~{s z#6M-tdSRQe0a#OSvNH%Sp@*94skm`6`@n*5hj*SpAF# z*(_T~z?!Xnn;|F?3wM0sJkG zPP?h$sy<>}pf0}Wcm1XB%YOg-m9Jege1%I_yPfJBTI7!l)oJL7eB7HdGOK+1OeI+K zL?>x-zy5-Hh_b!(s=3+R?D^mG_|C?u$6TT6_7*LG-l1U*=vZJ?|M#&QsDTS>=7svVCQ^6JLmE1#}5>6 zy{iR3)iayotX_Kf8&hyx=XgQSd?jWg!cx7sNAl8R)~6Fm!Tk9!hp_zFV@l--do7i# zKUT(Zd`ZB2$hP)3VK+HMcS}8no%8zmT9(eRkbfQ`qI&EAucYkI6KtYV$cqM!gz@>L z*~efJk$sK0mgKGS>innFRetyEk&Apnfhs+j-u8ckCwR42`gBaesMD?Kr7?RjY`jF&^fetjPswI>ls_9zLln405*=~Rv%uH=HtU^a;7x-zG1%swZi3r z8@R+>Sh(t4Z~13U)pFl7aC+t}9GMkdZr-rl#uiL}&~iW^N?v;VhOyzH@#-uq#;KO1 z^yh5;YuED3{Xx5VW+pGi%x#<=VLT?>(u4Q7gHb7gl%x~Ianpaw_9s+J+q2aD%Vx_- zv0iZ>u(5+~iO#1e6n?vz8$LI(*4r{)=e3p7l^uVf%%2BmXE6KzE!p#%g2vg$`J57J zMtNR;24rsA>Wih4P7z*OW%eO9P}VG~Hs5D=r^T}V`Ql5xbiJgbU|!Q61X%|y8ud393GnpVLadxOZM^RQVC7{zGB3C&hvb)m2X7)_nyT zh?ufSxNV1wUq65mw=cg@@Kzmi15c0N|bI%VR86P@~C&)YzYC! zgQ~Xxc8g&boz#q8EO^aRZL;2A3MK>dno`ZtN*%MUR^hq2-u*^iJ(Sacdua}5zVSS~ zZlYE79a89GU|RK~DLogXuQTs9B^%{FFN|^Cz<3Wj_w<5SfHfejPE{lDmrretsJu@y zL*902$G-;!XGc!j#o!C7&YSPWPii}epVwQ}-@2!tzePC;)61S>h*|G%1(lyAwt>D+ z+eISqwu5Dq&$K|rZ&?PIr`q19k#WD>=x6OWIcrT%8@Y>`T2Y9De15T-Gl_Fn@Lypo zLV_o}pWsUONI&FLrpr&<0)r3HpIHGX8aL7jElWL-cVF5bjPBivo(53(^4cq4B9^@O z?z@}OHRb6-e;)wX*}cq25=|sA5kBHGnQof7!Li1v2dNm?3c#imQiV-vJ;LUoF8FG^ zX$$a}%fu~UAifg9z(A~zK+!rCMUZ_?dFogZwUbO-w|!CX7q75Vkk1YV%>WPT7V^UA z4>&RAonC%t1CuU5eN^JN?`C^iq!p(Q#S0$!LcmE<3=;*=myG~UQa@2cs6wh53JDc-IxKQ;)pCeuVtB5Ra5Gfol%+drHx20fj|DI?l zW|@vJ(qoBCoP&$qIp<^_`CHwuh)`Ov_zqG|@CojcS|-;a$Hm6BxgUQR5R9Bg;G~lfNGJSD_0zYfGO(O zIz2E{yofw&+x!LiXvn#|I9a0t4T>rJFGon{))tb!=XyMTCs*NwQVqJ+(B^27tflR8 zUok(;CsYTc2*&UwHH$n`0Yb*NKGTgF-wgFB6uHK0_^iEs)6$VslY{2a-X-t&5H83| zzcLJN`ZGv%=n6e+$wx%-&!3Q@+xoL=f_ki9o;%^&xcCe7llSX;AKxy{s5n zo#s6zz+0`G-KPqUCz8Cz3)DFJd?dBtC}VP5aC^B5Xa>kt(J7`7 z(`{L{->-r#;n|CzOmC25lnSpoE@(W6g7WHR@F&O)?0}AZR-f7U&>nyk%?a=I>_SO4 z&+9Khi#9PHLiTxMFB4b#fLZV2aB_T`jJa47<;Irm3FXpQV&DC@<+4DZDd%y2!X6N@ zhcpCzODvfAS8j|QA@L;1Kd;|skQ&KVXYD{t^52{ghfI8?JvDgQV+TrPlas&TDvj{) z=cD?ZtVlj~z;Ew<4aqL*YqY%78ALFg1qy~MQQ07npGqwtQ@Iqb-t~bK@3O4^&a=N`B)XrMufP77a+Xo z4&f7E6P+muW}hulhSBvAOC?}C!*Bl^8Q`Vqy*=}d6j2(BGlak0{PrXd<=}K7OOR3) zLUq%}Uown|+K?Ff4w}*Fd`aFqQP^Ve;iYm=t%t0`jT*Vhwcd8+=;dmKRIZoLmr?%@ zP^D8+E#uc?)T0`Gir{`YBa?dS>>5QU;5;s1P7Ru+Dk>AMwUGN#tw#RcrsQ?8U;d@} z#qfih&YK1O$m*!BL4Rn!hGb)9tR zL{&4iskw!9yk1(3zqU*EfBzeU7C?=fwbtZC2Z!rnPKjQqN9#n@u>pwu{5Ps#>zVB} zu;F|bqeKfiG-%s>M{DzVb|f$45P2w&_#MUeLUCHQFH^=>N*{sYQ< zwj^4=QBm}(N9@;Zls|CQb}KJAgw(yNzMf%I0n$y9W}0FLkk~BG1*ii z!mOX6RR{O$rL_~M*#H+czx8G>>{wv_;_JVKIAjA2J(mA~D}>ZJq>Q-GD$!ATx0Y^{ z2YsUuP3Z0>D86TFs04)NZW1S+vaes?yIw;*NSC}h-GP(FpY}myr=iENe5#N{A+x1; zz>x|gwziWtB}j@4Wk;tJ!>ehb1{m`paGzcIO-f`3zE)n@Y5(uh_p6}YBkO@9u$x12 zLXtgq_%K*@8kdwoh9K^GDj zq75`vr{4;}akt;`6#!NbFTj$6+MPxcIR8=J*LrIT60h#G7`d?sSp@KCGwpp=2PsDq z`2HEc?S-RS`a60lT#uAB_0WITr-W3NFR4&QqxVyUJAw_0ARFpMNhJ)360Eb|D|am( zeuOl4kYR)0q>RqC3fLp7aMg+TgdofmEUTtPP95WG*!wlq4Pt(6fmW%#l3+?{%xkjF z3?)`FzhIE((M!D?gwM?zr(j4K;LWEKnwxF(okXF9&0x&1-T1ww`hzx4!Vf>IX>|N6tGDiyDeXbSy{zU4XN+4c0X0e#qE=yo~x-K_9Tk z8lF(cPt5M#Anu3efv6u)E6PJ%5CrKh^ogb}>y)&2g(?6ca*N-gsx`JqB(1yr@4h<0 z9vaRLMv6O*HKoAiDuO9WeE{Cg3;A|IC3xk#)jROk%{fe;}EtEpEukq!AJINs8 z<}u5p0z;(HtrXNddcx!w&`*|Rb?V>Fw}`{F1X!g`>AA>Q{%Mz{UYF#1#Jsnu^vNfc zeRy-27QdwaU8UtxD}b(2D_A5S>xZT-9MIBmsLhrpnoVp74p!iw{fRCK#{%BFaTOlFmxXRkb zZWMMYr)-+H*s&w=3faWw01zm~BK>ExBQvEqBegn1pp0y_ywW)XE>3BM%)I|LLk%gW6uS+9AK8Ulq6xZAKQ+k3 z(Hh`#umN~>C~c(|QjnoHu)rlZA)vqUcyf&OUo-mmbKEe#K!|{b71(rVO*v=-6(DZs zS=?*1|1ErxFA|#FcA?bxuQjFDC686RnSs&|&I?0>FlGCOWANvH(QcbGf21W*xTud> zyA!J5&cUVS@H;=?I1i{39V<{*I#ii-%rvpN4YjKIZM@ge2xG!L6Y{IQ9q0DND<}U% z|DEa1qwuZaIN0|p($NK!#QvvYDk5)-(5wiUsi2(}lPZbUdoM=AP#dfUE9JS58#lms zz-4nKs5?F~(&~;cnm(W`kuW$8&S3@6gsOueGU4*x!}9(k^h26R3lz&BmbgIT_C4$> zOY~x57Bo3w%GsHgIASore{W+gE39fK^G~XIcil&$Z z-G>2y{U1L0iJ;Z`D~n^K*eG3g^95U$}O1^}1`RQ6GaNtl}&seIBa&%h!4 z00QhUalbC_?aUMf)=YLpGjNI_2b!G?@P-ugX6OL>Vg;Bd=;XXx<5db(S}-sjwQfQK0o4=M1H%sRVehX93EbUI2a3G*`X_(Gr9O;)cnQMLa)y zJ$zN+F;dcy8v4A&Zh8D@ao zqJhB;dNdz^Z*jop{t~eYCV|g_msmd1;byBS|GxR}>Xq<&8Z?Qd%_#juDccMiKUOQ{TDoH*+MmGBw;*y)Jx>E8`434 z1jU~7+6qN*pS{LDHuhYsyF6p_{`x1(CXjKBf8heHpK;Q6n248}aIa>>qX`#fgt4F( zDU*A%S*8ryR3sW}1g%CBq+vDglUu0ZZT+tZCLkKc$HBfstpp(~`IwX+aw|hItu|RU zZ_u&n+~v#M6xZd(G#+;&xgrbn?jyb=2FUe!ZUccRm0-f_U%fdmUJk2e4|EYkPxOGM z5E^1am}hoB^a48J*(yvAg%2W@#G_!GJicHHZ3)?Opc4(8d3l%3LKuEf8TmJ`N6xNl zECiC;LBPP`OPlXZy^{|A!#y1;LTBTzuPMl@t90({E^>iErADra1Go^gK^vqzw>kPk zJ!*>gLn$>2%)+(8Z5>`TelS`zt^J@S+Sw_xA6;pXfsGM}%jbYt=E7*GHN^yf=C{Ru z9*LhV!c{4SrmcVA`5#vs7uiJzfA)kswVa~!)k^p~9zWk7y$6DWJmgfh462Y9Dt~!m zZDv0I!B35ig5+N5z)HgXB~K|5elhY_KgW1V1UnBhKh)7my??O|zQvP`)knY7oo6i7 zoR81(`7ENW0)A9F0hwnDBxwt^$^D72s2CQGWw7?6p0m^i(x6dYT#V8c~2%F?}P8_X(zXf1VU!m=3%~JykD>=Fxydr~6 zl_3s71RUf@C#U+0?{G|iW(*AKQ_xWWIfg7dxG_3Ufb7#xL1*99q|A3=Z>eNCcvl8) zs|e>QaGZ2w*8&Po16(u4y;fhO^!dyDa;RtD?%v~=_L&WlW7x;8`RRDz26_1m%&VUW=9V*mJufKr|f`%Hl@(3>p)uitT6lOa%cj&qrP`Uut1P+iuu`?mZsqmS^z zL_XT>{Nd?Nx9I7>oP?`Mfd=eE1*Q4hyW4B7%j{LrVs;0#^ewM8RTBvB!T0T=Qd@}vH)ML0e%om zp!`#A(H*#-8EC_iFNe5@Wh0%wnz`W_noNN??2kA_oHCvAd3Q8Nh@QL_nZ5kE!mf4Z zXNFjkRKMw>LegpyUz%D9T)dSzS$zRtg7`lK41q0a?0J35xR1JK2=meRAzZo6adN#u zvPaMb!~gD04tNYtHH5aw0lC^MgHajQmTrAHNgl90Dh zRSo?8dJp{IgYawZ_f78J6yU3WpmZMvj2Afeu$Y`rXWmIWb5%~JTUn=>3{plCfc$Zd za3Hy0xA-oE9JELp+xD?g_SN;T(?Dk-s}g4Uxm_x@alRH-Vy&cRRFe?T;e z@13Q~ght8>lt33|mQ`5jqH9h3OcbqGcVhwBIMmm?ovoV5@}-AEaz5_vaxw%0zfE8@ z)Ja{F$wv-LJowolTp&N`5qEi(u??#BqEhPALmd4Bw|6y7L-Iy{qPx(K08_LBURIm0 z=q1bDDNJKxxX8Re=?G*~5~N9)$lx4*ccS8%$567XUryKUc2r_Q zvV6>003?cZ(Xu;LLhFirZ{FN~cLlQnhuf7xAt{r3KB6}7wyF3~atPmMyo#cVdAOqD zzO<$!2X=?>5d@+}E@|uO&<2|2wU<fW#@XkO5nAXQaa~9ckiV~E^Qziirh##xx6=pVJ#ot z-pLD&%xWVbrly>UbcHsj)VsscA|v#L1d5MuZG$(p5(y(ha>O>J0@(nY&xs;3?r8#S z*iI!at=HCnOr({9QqQ|CsxTGysz2|0^;R_sh+YM6n*vh9ldye%Z7O;#gJBBnb*Ydm)+}?5Hy^$2xADcN>pG{ z+DK72_D);TM4~tt(EGeLdZMb!u|Yb6(ER>uz2|heTc}FC8wy6|q5>b{f1&wm*iCGk zk#!Gc-r1sDBl~XNvH@KV$D67|jS6ngp~bhSimw$)fmg*~)RBdfwaI%C!O!%j3*EP& zmNg>Wd^@9FCJKW&TXGaz2fx_HUqva9411^uP4-deY&Gsf5hFq7DEew~vg%U>egs5< z&e$E=Q)LFkM$^gY#)fqLiHb=5>@!Vt$(Q3$YMpI$6lMv;>Pc7x%6Yk?`#2E zX{nUSY*M>_9M!)W8{jD~*u?3fF z87V&*(_P=GH!5s@gpaKAWt#Z^=(y~)UNREIXKW?9&d`)9Czvq)GEvut;!f%C*OgQm ze#U92T6X8Z-=8q{+uh}RnOtJ6;%E|9T{2-l5(yCjWMP-3?>D=ZiMo4@(4d^Ik37Y8 z!dwS;HEB13o#=BtRZo!+qD7;*5920>OB_wgj0r>JX71w~5`jTun1h=4CS{9x&R!Fa zbs2*ZIROIztQ?p~&d4+&4VolNrt)WS-p?&MA*sm-sl%FS*;W7djFRqz#GCGk?`?vH z659xxiOhjHMe<28Q|y{OVGY2eW?$?Jrj%#6B~T{Bq-iDcM~=vm#K?G>c)usi>L85# zG_4ALteN$RlnjOovRK2|))bIj^5ac(K9=2eri`61B*+cQ?bnP5fh%1cOaF;RZUv?` z*?yY1*E!MeKd2o~H-#U!AnL9+Ad97qz7$2;dOb~zq|AYNgU)lQth4bwrV=4=W!Z3? zywmUhDL6;|YOSo^u@F;nqV8(Jy_M-PSSxZaKRoe5`^7%|AAODeEcJb^NzGYQXmOUU~)4cSeV<@_ZD}Kn|2e=SG&a1OHSkrb;&w=@$W8 zJL4cI7UJQf90S$y&C5*!+AQPsh_0>PUyNKj?mot-o>N$oB)i8(I?T7*El}PPAL0Ab?YTP(UjqH>7Ap00r$rSHrN>84nch{} zR^v#}<<`T^IBSw*-#n>KC^S@WvJVuMr>znF@dtrF@4Xv!_!z2nM32oGvxmqvE$$WI zX%rFH@W5jA{Fzj2aADB34^^}PrYtePJm3ALu3cp@`{wAWdcISPwBkeStf#^?qE0Py zMj4OjrH&tEX=XQ^J%S6Z$NfMhD94oqV|F7(y8>&uMMe4;I`mPY&q?Xb4EgE$-Wt4~ zDoXIKu>(A@Nbu$tgHH86H84>tPSoc6qx9n89)?5Z=O{OE{IX+8d7T?19sd4gqYwgE zLxudn-mnJRv|}u*(wIh;42?&Q0-7(cSW4ESFMi~YiE7da|JO6Z# zT_!|2)|eQHJoY)XB^DZmpWRY^1FELrrfjbZO4o99J^oecwA>#8^gQ)ay>+_<_Wny1 zHdLU1-IK|=Q*%8iq4S>Ym0wUNO`B`jUNj24gIIl9xykU`_W7~Wa)$@#y-Go@c*uA% ze1UNfCNZPnUN#dx^Pws3Rqmk|%*E|5nkuh^sX;qL+WJ4x0Q`?tsEm+4l-j(9wvF-m z0Z+aBkT2!vW6rBM>%L?~1LJo~;FZ*eEYEFUuZ+3N?ZF)xv1ZcG$97F3qQjYbRB$5CW^N$v??DjMx}x%kmpXEwTK5KOis@0{?2?qA=axEIr8 z4*DcW4yi#WsTP)qT;{B`e?svjd)WUI+6>&!K0*;T%T5PCM`c0M8v%b`tz5p%Y!Pet znC&7cAo{>CQ@J##16>pO%B-9;k~eBdQA@`9Ci&hQh^J7%BllI1!M$od-Y(`+7q){e z{-CY~S2l6L1t;2#q$BrGF)qIpxfLMQ?g0io#vQ#R(+|+rvn3t#P7tk!47s5&hpeg^ z8JD9;IP0(ZMiWKynj)$9PK+={G1w}#uq(u-Zhdc0j$I(J=LY<@(E0+?OK%K(rBf+6 zVa;jwdmevknK8v)Pa7`0f&gzAr8f%;UI_FVvW(r8184!|DYDF}hx~7y7Ae^Mj)-TTAH3%f1fvjw(_-HMK1DHr)Agv`Vw@d;em1ohrtf+-2j9|!Ff!C2H$Jg%T+-?^khg_cc;O(P>>d42^^%&?irI_@i-y6uSr!(8M(Mqyef zfarGw1KgB6#J^p~HwlQvLgjFwLtQaV)%BF;Zy7~7S-1I=a5cwh$2(iB5=Ji6gXD_lua<27JV%0PKMC#)<;!+Aj4Q z5#;!x7mA%hNd33xMap)<*SSC`;)I5veRP*|MVTh870k#(81Ph%i2a{P-X!P#S%5kV z5hup}S(;Ipi*4(h%NM$uxB<_?v1XI5qfGc8X#7&#qq%#K9F@-L?w(v*7E@bk>6Ro zRy3ow&dkwX@rTFPBj;99Z_Dm{=HI6Q-H0n4Hs}8nMm)j(92qG0TBpap%q7QMn)lmJ zR@OO#EChmw7NLS7%Xv;9hg(v>PfM;~fE%5=-`MLG=ncPMO(1k4`3ab|aB03~k7iG3 zb4A?ldwoXXOE3_L1(S?+#mo zvFC)K zQe+})^w$u2(@qQbVZ70UenWS7yfFzstXzTTK0ckd!{$l9wiHh6g;XJIDBlyIBA^o` zJiU0=FpV5JBs(N09`m82yo+=?fq0=V2@rT;-0%stlT1#(zU6`U`1e~e_Ajq09Jk5A z{sp9DKw5o!aJnge#|4^^uKni0=ZqaGEhRP~1GNK)$p7)%d;lCiptc!+FPjgX`chT^ z1g5X(rnP@>4=+u86+k+As_N5;x*u+-x|+H=cD{5uo;e33+w#$^#&Z~@w@LKP&}HK< z7K^K|%+}3(sS5LP9DoB}{=0_CK->)X1dhv_CRGRpl~*KWS-B`8 z%CLh#y?DcPJhtT&1~zwbFn?Ktr8^o9rsDGf(#dM@wdmxec!BG_7hUtf+UeC!9IS>p2A|!{ACD%Q+}U4y$**;@1I5=eLl^(SRW9j- ztjrU0wV($211HX}~k={!P^kxkgdyKX3{)G{fL3a6H&xBdc_C2@>w~Sc| zwAyp39z2wwknnqZJkYMNOE1hS$oj|TyVicIFEv{IisQ>4IvbZ#e5J?$TBXl`9s$S3 zflJtDLo|J1WQ!Lx-!t$VPkL{xEi6cBU*tMYs>B`|w4=Pb{2^skNi6E^&Lch|B~426 z38TW#tkgP~zCdZ)?z6~#?i_Dgu(Ng!Q(5XYLf2{2qEg5a`m#guyA-APBk#WPO-LTI zWp&48_HQhzWVRa-R|ljBo*#Wl($3l%b=5jP@zrZV>_S(4Hdl95^6{+GL;Lu`gy5bI< z7nje6FTL8Djwn1E#thg}T7TJB0|I>?y)tKu9ia>8i{Vs(Sh_igK=Xxb%3gNhCrI^q zoOx96FKI!dMob4VDvoWnw13v3Fuwcgz~1O_fFyrajR9o1nQ9uYeBi;~=vT^HlYCH^ z!B4Qg@Jwar&2dshEFJ~%;%!ZaLz&<%9%vFFZDD;zx&{X8m7Bv9#dF#B2bRk`PLr@W zlW@D3GE)7i3K|mPbqU%3=`^bh*14HYgz!5p%~*2!dtS2F(pN4tO|#UC`qCglyL6hW zy$<%`A?&FQ3OEfH_p=j?Ko}Tu2l-q-51pa9M?s zva;Dn>`WqP_a!zVqQ_>+b^hpd0PVcc?bPKYH!E6m%?{q;jeJn5R|I1C3q89R!0;e?iu5hs zc+>Dlw)p3`EL#||$=5#k!GI+D9tbG%=Kz^#n|Qt|lpCd?W(m1O1-J zld-|&95CtlakPCFYX!O4%*U6RNTrQ|sipN-dL z@0iY?P|iN@nh#nEdcK^eu0dB!^33bZCqzLAh6KJmUmS$rDZdw!ztgUE9jyGE=b8Jq z%HdB+ypBPzbLi{;iuZ^2@vXK56db+mp>147#ih=hj1O@d6xDB9X`f;HcnG*1I`0Rr zOdWOpLvYbx29(QQ@?(NH1$Ni06!gWxFlEq$hf}-pBGt%k^BlpHYH9_4oGeWxcpnI5 zrOLj@Kf#rdWdZKVQT+|+KAsr{yfK?#=`X;L0a#*HU_V}1%K6i|>w{{DVxwBVb8}Ed zFU#PDk>CO#v;$(I$C)rlHKp8g0j{25LQvdaHJn*d^5VxmuA!n~#mdjBOK-j$M5o}( zPfdCd0I?DKT*wcumLdP*7*H1=M1pSga(6+Xdd_h<6K)u-rLI2<@MOR_ zmbFrRo?Z&)yPz*I-CsE5b^=tV&dKDijD&Nk^S@QE>gh{BvzqCdMitLfSkW8twZX*V zyXVCfay>cQQk5By@55JmJLsCbn00#DpKp$Y7$)cvuwfSgK=+x|^Q~;5;iT9N{7m=% zV(%@(q71vWVHrjp(xId!q@-I?kdkgex}-t6LqI@~?ha8wq*G8(QbJlmLQonc1Vj;w zcilXDzwbVd@Avm_|5F(5nYrV-);iZ2Ld+8E>%w0psvKyLLEM~eP2)#naRUdX<3^|N zM}SfB#a!n35QlIK8Xe!!vG?XoQ7S3!>{&}wo^63WaRzY}wa@p6CNrG%XVIzpm4R<8 zCox6tsa5B|`oey+&=sV6{Jmy|FX0~MU>X4wk0##NUP>jDIs?yNj z7qwnp;n?)vz;lov18$f`T_J0J1_wuO={r{ISo&ZnRzC8W%r|TFHXbWK++(RP+5A%Q z)vN8+n0djt{`Vhn*L^x)PWo)k#eciohA#A^W|;G2Z`@{>6ienG za-M5;)eJ*E*{f`hD+VgDb)G2hf=F4y@RnV=7n3|iI&vPp$xDu!*yLN2YtpW@I+-2b zd2-WN+Wl2f63rS>Z{=nCNb!Dn>rt2!+gaF*2e6)bxS}$l()AtlvHG<~16-M&=bu z9}*?&h+cZ&;2M$Y3JcN`v(eL*>LKGE5UO)hcWNK-ka`tx%AuY1WZH;b zRV$=YCX3-JX{UN_z7+FEsb>Qj&M+1eMHxFDTJNlNXH(l8pc}=H!D!O=;a=knxuDC3 zPY9q(ImnVVCD{q-HfS?B?LV_I4!uTPv4~)$vzD9^Nk+7zcoBxtA|(Ov?7r96JOI2YC%d3U#-7XS4xYFa&eOFI5$8AImS{VA+d zy((~MKjhG$k8*cRZJ*5jNKGsq1+}lXAdao(J9|^+wCt&>Up9&2Y&s-~^2Jm*JQ4}i zLT^en3%=AJT=Dt>h3O8nf4SPbWQn(H9$2loilJ3m#~K~=5R({qsE0yf`XlFeZRP$6Xrw+* z>2NCI%QF5%FLBxeZJRyu5<<7UTnk%m3#C((c4!6S%<}OYU8F208j+T+50Z@fD9h12 zPtAo--A#Cx!1bO0A_2TEFgzF_MTXOAsq zh3IAEe#C>*Mi`C|x}dcV>zbozg7xnsY+43~{54G4Y$&ABfa*VQcf%>oNrgbS#U~w4 zfbu#cKbjOE_cN!&wJrrl0|m8@9IoFWPl%$FNidU}Y#KTcjU0YT9^>1H?FvGOh;?)| z&Unh})Z#T#$+yGG1%~vAWF(j?0;+&&Y%)I03vYvHE%4^Gv<%oeQD~?S?V3e=V^XPUKTp2v;hc_Zi%vMAzStwR#Dxl$3Vj$K|UCDDlGY zYhmduz9G1^(o)V=?&+9c2djY%wJB(5T~Gdgy{mzm75Y0mIVb|io@diU;aNh|b)>ZH zv%7`ww7y9R7)-!vTwDOxiyXK1U>;2fm6+G90Sd!0r>gsRz!kRY>Dk1Pe+`PLB{gWH$fCbw74(7pjBKs?b>Mk)>R$;w^Wn2-1jtvvNvB?m^oBPRX`w!`Q zYk%&wEX`tQ5TX{;SbiNdH2CN7pZI0kV=?E#TWASZM>rj8FaG;zvQ{XMM_!C21oBH1usRZ=5Y`;Bk|5Ol3TQX4DBP-6!pA$O6PQwi z_UBvO8WP&|CWYNW+KVbAbg;1u%tqW>F!4OejKZoVWPC|0z&{XI962`hyfXv;#B zTR}kVMWifKkPklxIeGW4Vgu3PVOS0UiStamC_y!ETvUMiDLE8c%Hde+op)nifBmcJ zkopec4k6^ZgO6bB{xW-+$S84!a#^B@pU4|-oz6!i2Ml$3_rPH47f=vn%9;~Ubg~;43p|EQP z+Q0;$7?L#{N!b1%6l;4H2$8Ij2N*;_GagQJ%~gO#ytNz3PC5E`=1jN(ayi^=n7GDc zYdwJ32LwlC{RVsOWrN|ie~oUR7Qk%Jk(3ZH+hn?M7~N6?GHgBr@9M#ri3~a!)m3(+ zYN=FMUuVF56fIj#@)H)oD%dI@$gy3{?1n65S0@2Ton_^W@`z4{r1_%z2G7X-s;acb z&1D%akloZMcI1N$!5gCRkBr5mPoZO&p>=CZp`|a)!eSW1m06ZVcaT&koNaou2-tra zOYTWtJ%ojqBPxv9qaP5iaXHp3P*4NE2w;2v3{*5ufUtR}+*%0!rV#{O z$cQo5UuVKQNMer6b#LbZ)nxX^r@MD)VUiG(gtT9vNwEXmh2SIA1{NJ5%ebrBm^F09 zFOgB51Vr?qWMiDuNTMQ)p`PJ-06_VYLa2R=&0#(QG4V!3)Vn(Sj}hNr-H7$8;z}37 zyU?nEeDp9J@P$i1^MHbD8=*GCe*Fe9xIy?7rg$~*?4eK|h9%O7T?7t$Aw29x%*8iP z4YZ2<&5+s%QJo;I!E;?RSeE^P(jMM+dItiXtO4ONMYYa>n#(MDtnL1}GnjPQVg(Yj zE+0q|hV_601^iZG{kt24XwQ_ekK#zcT%1|iD7Ie=x%B3Iw_<-N?_vJ$(gdI9gyeK_ zNIaof25|Sk*BLn2XXF>{X;R4E_g||UeB55RqH#itups=um#VU0{cNma4c$o*pjsRQ z`EDbKvKr;u4QF6Y&shixFe=q z#gGmUnDr*`upk_6pT&)IZrh5g=KAxO{5wSFT!bV+*sueSvxodNV72*TV|B15v{pJI zlu$s>ZKuEw3F(AX&bYyj-V6}W^xk6k{Pzpehsj7$dlqG!p>jGp+5LlX%me75rut-7 zDvIY4^&=O$%5trqeoA`or|78~3pd339VxtkbSeU!OO;w`z4xXrG5{g#m*CZVp9|In zvxu+SzkeAq4v;&9P-hV8C|N)0Xgye^PlCwC*--PVWK_?)puPqu+deApQb`9kngIf- z8Mzg*HABGH_F3T%7)M^ul)r$V^pGtqhWPuTpcn>y9|5}84DL3eqyP+~6>~3ETXSDY zxq*lU%p%CYPpJu1`i~-E0<>WYVG7mt&)Pw>;yb{aH4R)~N2IBAyjcYi)QG9Pp@GU} z#DJqP=rtk@R%kXD;%z_Fedmm4YwMu*#l5#LgX;BKuJblxIA+`C91tm+jM@16P?8N6 zpCrV$AK@P#_0|A?dN*zr(4pN|EP-Dyx63q2R5{;PE%AhX=H<~SG{Gwd&n`%A^XEzW zRc}g{9Yv_`e_)NaX`pz<+w=}JjEKWkgUP2W_KA170C`5^3HJ;u`<*(%{w749u@gnK4g(WCq0yacxOoY}Y*5rZ}1U zUB>4}5}OP{miFSOf{dIdbC45n1&bi3*a+OrQ5`TL`Ix00Lj9gTtAx#v*!Jh4LZjcc zh2L`3I;4YvqZ&~1P<$T@NLu(_mlViAneevyC^HmHTxd+v*RdE85Jo;N%j&!iQXMG1 z76tKDm9q{d*7Cq)G?DBw=2!s<1crqcTfz+^^6#nZVnrzi#}g*; zRS?jYV(u|{O`E9%n6!Mp!w$2c()|bNfiD0499PE&*V|OV2zqxFG+<;w+3O$_z-hJE z7>N^ogG?!V{`J3Z6;5;e4sD~Ez$pE*MrSdY+5Aa5GcNwGFTx&13>VOiT_S+u88(Q@ zQHWIi0u4aI58&p!)Pr%d=&^_&5crCp-nn`L%h+l_P%sPuZkPJHx7^G$_2D zpZ|=|;_?V@o`*h&MGL;X6c@sRwPj>vxQ&NIg!TBmMh0ADz3c_vsmO%YxAQC|Iq8^_ z?!&gmO)Zp3X#N9KF$a3J?v@pa$ta?xcndAXn@d0+uGj1^3DH~$^yt7j-=fh(IulsK z@k@RD^*2P9nw&VRJonPi?MPAov>Ek*0_uH^$646#QP9`+=G^4AYt@EO5`o=GbA<%| zyeH%Z^5Ym)K{v%2vd;a%z&v~(O##oF237}B9wp0MhSl%yA(ATT0|L}_reY3Z!BL7U z=C2#;wEn?8Mh@Tm0Qy*;#l0o73LDZ-fko+u*Cte-`}MKuNYmYyy@u^zmte5V5s+hi zi$*;9Kiorl2rI3&1l2mZt5%C*>@eHbliKiSWB78b?jw+w4Yv_qnZ9YAuWihR+%12S zGpJvJ&m^yUI{yL@3E2PBQ= zF;+Ws1o)nBTNMky{OF1g*W^*^?ikCgX@yG+0;H<$=ux4pSYSO{t`ua$pyR^~o}Xes zAiSXu{kd3L>8PmjFRy-@-y8nEOtMrX>eP{AxUQZU%I}7#2hQ(G1nCiV$=SZuZ z5*tnVz@fnz+BmQI51-mz)p9!BDB*hmRd`X*$(74b7lZ;wRrHhkX6027^b!s&952B`2#m|MCG8PLjcq-~v+T+=` zTJ(=v#Ku@_l(&8F)rO<|5a1wR05Yp960S5CV65AaZ1}w*_+|b^Fk2%PMQNNZp*4b`_>?cqPushIgT!7{BOwopm zcA3V1cYIJ*<-@)>vbp)_+yAJYW)OMtIiFcXzVKhwQ@50Tx1b{uK*7B!e{jL9NC?I+nqWh9NL_Ll zi=o1?g;2{dT+~#>XYU&x=d4Dt;-+Ln0K;<>I?eGk^M;t-8-4`FB;Yf|aVfP@z3DWI-_L*|g=j`F!Cu?9WX| zlo6s_0oxW6<1bH0F}G8}Z%RlduNlsyC&{fg|2jkn%X`+Nm2=4&u&6Dxgx{_UVv37o z!Xh)S+wC&mV9gHSrc5>d0KV81<_~l?)jLqdOE`xVTftw2paIHrMaT=t9bKhfR%7QB z5q5%hU*9VdKW%zLnfWGtoy~apfLiKoDju&UW_HOS zUY8mqy6p0?g77LvjIzR22;<|tx%B%kBU_}VETaY*Nx(zOUZjiL2~yT;(#OTIfb$d5 ztF{V%BZ1dnfNPlOP5OivgT5E`ib@7X5O)AcRPAB1cp$Z9?N}s-;+`9^ zZb4jQ&sXRjf&vUiTyXy9zzcaBBqT^-`Q(WvXE;pXvE!i=Pwv1st^&2+{(Vg@UCw*5 z#h7hqA$ac`LbZJ-34APze<|5BYNvy#A;)QB9?n{f^@C9jzRSr5w2<1NDEJg!co`#7 z-_(D@o;PU0B!VRP#5fdn=<>jx6N)m9RZ=J`;u4W#bU*_UMFR1u(}+N-^y^!f{)UHv z7310kFwp47Lm`U#D)V+4zr_4(9tphg1Cpm;VtX3y2a*qDI{wy>+ED&~9^V{g9F4$zguf@sEevVrKpp3Z|qk2-9>eQBd(Oo)={0*{T274I+&AnC^#I#|+6 zll{u_Js)h|AwL(+h*&N6t!2W>%Nh}8MA`y@MBqjMe)u-PUH11StDs=HLVD|kcR9@P z`FbljK#&?4D~n_Y!If@?a(Mxvca}aB$fScEaimk_(OYc)4b(vtOPkrX5hy=O28VF`w5;W50l*Qz6AH!$(wYKw5IgJ7I1 ze^vBcx+J{3Rmkv=92KA@TQMfs8+$6XMs8H-XJimG#sy}gAi*?%6VVV+pafF~eT+Xd z+(!CObuOY7*^0IOzO9cwhs7l-s5qXt7LFKRKqCRQnLoO*Tsu1EM-0;NBFP!7maU7? zv|%{mmB>jrE13W%p&Se3gH|qc=(Y8EA92HD=82*-_kV~8aFEcvULOKv87}7R<)lYz;{GsB1G$aAvda5i8QbSNr;IYV3ih*4- z%v1sa$Dp&O;b%-HfwzsfEQNDaECpA1^_hN3U}x2) zfx|gQA`L@KSUn{t(NDnkaZos)kd^m_Ixc*#06due%kb{_1#AJ}3F+8q1fu~$Lk-}E zTpw|5y~AOvChieNgm}d)DBx=~0kpO#y9Ll=9&D|NLZ?i1p%zoB>nG zTh-((#$*dL)GL1L6Dbyt>>B^a=AU}%o^IF-<|--{^4H-5+{J4IdtFd~XT3f{74YBY zY^N6(cu(;e8}(z$G?*0zoY)OmxiTMf9bf%U%^<)mvE1*7g_|tu4A*%1SP9&|W1v?9 zK6b%h_=X)}VKs2FUM)TGgs;(tZXyi=yy-mVZDJRc-eMb6U?J7` z)d)nbbX8ee0$a!);BMkRi_|H?$iAlk6qa(WPCr=vKaspcPV*&HVa&p9vTN0Y-6_sY z$$V&U#LULH8B&*msRfsZNOAsg+2TnJ!$USeqP>Ks<{BT^FM*BB5?&=@(jF+n!3Z6f zLSv18IiQ&wd^iRrZ`G2wn3C^;32Wb6dj8!<_B}a>p$)ksLPr}^0PAT%t7T@)iU>|` z#!;8enh56H*Z+5jQdx2RP<5?%}k)t-ja#vGE z&k3NF2?UQ;%GN&{F2$q`2}LF_!6}T-w6cqXqTFP~krL{q2k_4;>f})JT4UZ@?o?~DiVfjE?kLD&bH_SY1WBOud9oQVsB%L(9ZC2_PA3@dL5?D*(0 zL8qaKf=Of@46Ow6@7d`8&pzN1&S(KH34IbV`0|ANLld9HZDb8u!-;;mHqiIOhcxC# zTu{ISgKvt9d6uv1Jwf;$i39kM(Y^=DXZKYxC2}e_Ssm(U5eJARl0AjSRQ6LgN-EP> zy{#J9qNEPzUFFMJN)MOKf$rdKWI`OgnL5d>*P-psswY@rJ3-}!)vnKgx^(Vit5@0; zO=?VfEZ_y|(=Bib`pKbU6H?6eWyJELkmxg*gxz=6j%R})xbg5@VhOM$B4-6Bc^zg% zhMLNB8aW1Jg>1wEtXLghW!v>e5zb)g>Bvyz$y@{A4oJ-LHnicqa(#gGrM5?ULK6k& zkkynNjv}Zb%bcqbl~W2~5B(tkKvWx45=Q= zIN|kxXJuv3eE<-FnhDON68}34T28GPJkovuId2vjpUesf*8Y#KUWL?>RYFA@(b_GM zZoq7Bvc}>QxQCG+NdY|=!8hwumnGm?x{QV~dxGQ*C1p5fmr60%9ZkKydlwK5Yq9hF zPwSQ7F%mfx21g*vFNP~BM3NM|{gl}G}Cl5C@4oOGBz1QH2#7K`uGHF5m(t-r> z(NF4}M-!3^GvL7U;Q*%{;E4;~44ZWD-{8~E7256@z^t(dDLo$nRAU`W(y;v&C?q=( zdcm@y6nREh^y%<*F_LT>Tz?uN!M(J~oO93uzWtwqASL4P|M8c2D4>)SA{hv5T7~&! zf1jjvA~Ve&f5s#+E7CAl5xuzfw+QD9WAa55JTcENCrM4*X<3YJe1C)W7;(P+1aZJj zSbqQm&xbzPU_^U9=s+M5#A*nJ`O;;-H0B0p+m^r=O zrykw7Kr3LaFvGQ|3ekrUy&?p+8VcTkeq-~Y7q(voWoYUVg@mucc(*BkTc&{c94rL< zYvBk6;emlwm6AO-V?)ec3}Q3Vb7X;?Fz<8->ImaCi54t0ZvW1~E!RPsEYlb%j9dtX zKE~t*G?JQwOvhZo_Fi1_oJ64+Dx-c*stTbB{$vOTm#^u1+qW*?hOa&cxX}bi6R;$T z)&W_sA40ZW;a=zdDGufzF|+$J-coSe>9UN0mTqhAc@4s>gi*U~1}9Uju0r-v?{VC~ zZyB63vW#MAW5^SSUbfjJ#GnScOG{`ty}1;Pc_yA60(bxxj&@UJT1B6|^AVDSxQZo+ zd_Z5q^%AT-Tme847e6OaAdPDAx}7A=10PipLkH7(7?+nxjkn#Imv}dd7~c;b!u&PA zgV$4>@6<9y!4mPhfw;14QJR0i2e@LU2AQ5E$F#_)mM@0J`N@KAMo=GEvXEzAdxtJK^OgKhu z@bD0U%7KjV=@39kPK$%pLnC!Givp$Bf`}7SvN0O8O_wM`9@XFD_T;u>W@1D~%93IZ zVNiuIY}eq1yNV>70DSK^1a?TS@SIA0gSXBDd5LJO-A(&wl4NYb;(*?VC|ProY4GP5 z20xoAJAc5sTW4U_K1cWN!s$`-G1w=MfG2JTx1|7P5&}0e5&zlc8-Ux}=WCQ#(=jqK zNt{o6%7g*W&@h$ri`!5c*fhi}KU9Vh8dEYkYL9+tEbSwn9`PT3#zershE+r=0)-xg zMQ+4v=D*-UAH;X4a#h_f&G#Ifz#4V9|0Kb~f%i_8`wVOfe$Z$=4FI3=3ekPU^%_vR z=U>F>x!F*pC2`;=(1cYMK{*tDFtVr5s!DDdd&}JrKWD7rQ#L6M#}JG?qf(#=WGKo4 zToZ?ykZL+YCQ;i7omkbnyJ|7KKmIr$$}u)!TwulFeNRr1PlvB3sqrNn+b@MeNF=Sl zzEqeUCOD;p|2ZS>_m=1R&!S)CC?&bZJ197~ZY)TPaP#Fyh2N>$AlexHA36naEg64f zhG1m5hG^Bsa2wMUA?vx{S$T}=Fri%OL^1UO8PP6>LSAWA{} zs-%@O~@HgazfX*Z>C{6xKaL<%j4G|N#`4o%EYShBfiQJ@T^cB z!!@+IXmp$8h*krSK#ocR^s~-RIYtU&3xo{U+8Q|!^1)hV!-!|D%dC-Y#?6nPoGPl; zgr}@#^{L_Zc#v?WYYR%PEo;R`9{)fdpgIygt@sIU;9b_DtiQ%RYX1;Q&rhb{eVBjw zT)X5DOX9SDjNrF)7Qh}AFHmYVFtbX!eEYKUqxm2Bgx3(sFT0DD$fUjuI1F#ej;}Hoa!J&ER3)U<`Z9Qo{QDX&=GjEgUy!0o0ijGJ0ZsSc&hjwYL zHW)t*Ll~#lhpdFF3)A}wZ)S}`%@)jcUcn(NGn^2kgqD~Oo8?e}Up>U){2VFF(OC#5o~Lz+D@* z=mwNFzj>>pJpjhNRFe@VdNJB69$`Oh!d!^vMSkox4v8CVr78vFDA>P;-$K;VN+X4f zWFcIuJ9%WTd?Q!0wJb$brd+SPoZ0*Yrer(6euEN|x{>eCo6NvCo@V#v5Ak}ZxB1iP zGm^I)yh!pFDej#k^7MT-&HEMZ`*SiEA~zB0YRDZJdX$qCS0tRj4|$P)zw*vvg8zU5 z@hbebJ}#rZhg6NKadP`}?3c@oOfRloli)(*$DJWK+w7fvtrGIq?s&-JCeR<0mlb{? zExt`emmlFyCjmyG`8WP#7@9jyN7!23p36|w?(&|pyZM&_HGCb($6Z6LNY!|n(&IS2 zhebYQBR=o#`8Hjh)pS4J8(lU>$Mnj$O!I#OhxuFxcKi4_sY~-Ri5lXnR!LOu_%{PP zFZ^^e8QXsjg-OArVMZeZyO@$UXr|A==&iZ6F0Xr^#5Kk5S0|n)5cxd2gb7L_H(fr~ ze0hQe+f9Y-X}B`<7A(T_Y%UxWwLEXmiSxaOviV-X`N7oJe|Oy0khBCbd1iI8QvLTA zaqz}n_s95{v>njXBi%N6y{HSGk?XW~S~^2Uh4>?TZGtlQdExHU%zmRnIvfv+WcU)Z ztb;`U6CF;=k?;tdR~>b#eO*!yg~4S((!L*tMq`doMqwf=1PR>|CT>@#{^dL3jo-C? z=q6WOiz&9UG)ax2nZ~{bE$SK!dHhheDYDnMM%21t=Di`cA6xN2REZ{=dHyU-1@IlK&`MeoJJ2tRyXc@G$Ebn94xr}hg-NV($N2Mu z4+eM-&Mi#4EXIc32&2Ti7_T8Z(}T2>Bdw{)0ey5{>Q0wa^z$Uwj*IUR4?e!^(6qmC zD-_5~Os=IkNc03*@pwlnea5`_coBwCDlOq+c<#of+iD7H%yZTxR}AHZ<1V==h{?xB z%I0DjeLZp`9hRgY{q1yFjv(OW=q1)5vvb@ecvumm+og8y(w5jQYncL8duCF;JwZlN zl%t#_>34kVa^SSCMS0ZO#n3^=5v#rhoqiIM7&)t8TB zo?b&`w=c>>?5SH*lMaz5K!N7^Mr1+mvMiyt;Bnpe=wZGT4uwS$Mr%Bi9I!9o=c=rY zVDgAV?!;|oS;7?%6!l5Lx^_;Fn6xaW>oWhOzMeRba49{~*NX4=kab(+-%BMe@#2rA ziNIp+ogt1Cd&8vAU^yhV9B;*|mxGVC%XMVC&%m^2U=iZ;4e}Qa(#N!5_o)x-fPQ;b zofP(J>2E-vZ5=8#+VG?K?casRo(W)fJXir6#`npO zQkB`VJFz=>L?U>`2b!cM->!AjSS*rI30Oy7AG=IO(Hu%zyxhv#L1Lc^Zh)hUaqK5j z--gJvR-Y-tDe1v7S`&$mfzd^H01UBNW&2`;t)lmzcjCQ-zCF5SB7iN&i$&}u6gJ+M zUU_^{cuZ$2S(tYSUs1yyJ@YgW3@6qQrtqxnr)6M&&b#s)e^UHQ2kyl)?%hNhZh6fV z!&h|VB-9W`)7Gu(m-*~L?JqDRS}`rqK|D4W&~yFA$%oM^J;=5D!{+l7p^buHE%E(t z;QNv{?qWBm{D2*$oa$pwwC6OcHoY$3Y_p?$7ea)v>{5s*q3&l?TIJdOcD=gR?peT#W3!GQ0Q)DRdVjnH?(fvg*}m+T!L0_ zl{V^NYiUfVjlM53An-sF5kt&XD~!dxougaN11p@-w8>3LqU?mTj<>|H>w(>a?r!FHG==@RZ&x67XRp@n#-d_&ldtKOeFchpp`5O z&)`-eraED&)cBjBID0=uWX|OQQyM&4N8iong+S-B98DXB<&Ct+4KeLX-99+6u0TCC z0ov|sPJ2l|2jCV912a!+opWGd(xr$jkdWD|iM@nP3TWB?>??~fY96ntdS^RjIPbNl zzM!@815$8ORV>)VzZYK?e&&vBh!L#ujTa|#UmqM;4W?(g_4)~qw>4TVTvvD02|}l? z#Hviqhq7#5)7=Wd`zGaUciTwY7~IR>R8ejd_fbER;MPKaQgYU-In-z37^~Nj6YIavbojXmaqk*S`ki90P`BTbyAT zK18$YTJbV%_D9-METP!I3q1mdjKt&hp1Z%8tqN~ghz_#j6Od*+E+7{BeCD~Rr7p?U zsMT1jDK;6me+>o};-8+XS2^c->T{Ktb7hS1(`!Nz+JAVf)R_hw~mvK~px_J$#9txJnkgd7w zGMaSlmxCAd8LSgZ6B(jQI0^nsQ0#9MYW&1T^n#B#l@t5p1yf(&p#*>Hc7YF)lus-^ zzp2JH&mj%davT9qqxNQ0<0UP^dybFRpGWDA6pK29=Wx3)NPR2jdJ=;Ylw8vV zaqk+dW~sLJ6$Tsr8JOhJ3^RN{_E}9v#n|p#Rg=O`o(p@iGlxh ze!zKW-|Bu%ImtkHEv{sL2R`Y}NUsN@>R$sY3^Wxf+Qa6cn-8bxlFN9KY-`-9tHwhS zPcJE2d)3f#m=E=m!sNvQ&Cz*~wqTG>r$gdf>oEcVIPaXmPhpMwkmZu*9ZTb9pLQCza;477>1Gu17da63Urq{B(JMzEUmNR7Ak zv4+Q%3)QU}n-4c$x}!lB;1klgP+x8ONWjEZ4w3b}tOs&3QRSJBrT_S4MxYhyh}ITcKbY}_sV*;;dp;!77C zddL=^OnLW9(@Mnw=O@@w{{Gn?Q45Bj64^aV7ECjF3HP6*e)3egT&SEAXjAQ%=-iDx zdKBV8P+7e4GxKJ1OF$i8R=7{sv$tg3ma`iH<=>^Ix}~>;4xwW{Z@{$plU*w1lhweh z&*uzkiA>t`RyUR*Piv-J;)~hG81|{t8*yh=y#ePa?xwbQ0Q%pZv$}p3$DxyHd|e}D zHC!WF6^;0%+13xv%rN3Brkr#gjB@N`WmVmjaTDC(z)!p5=MjUe^CbyotrDulwT;nLXapty1BMpzA3q(rz$VKv5F7HV^oI6=-NG+e^APt|? z&3JHYr_81lldbbK7aIc6r-iys%T%6zIKN<;BS=ZdWU}aj@=TCv7KGz=f&od?d~!;!vsuC(0GEZYkaIa{f(uCck82=OlA z76H1q8(byOz@uQn&)4ad!y_SwqxcZesQ0&syz`W)Z~NMC73`=pFh%gl#E3HfA`UDPQ! z%iF33y{i6b;F~nZtF${cjmP?)Z5teBR*oGuC@&in|LmWAOrXa$6l^?v|0O%WP^UWP zz;R?oSL;Cs{hZp%GcEg>pDI+Es14h{`-hor7vva97sd#Q1W)Xu#IK#H315Jk%$H62 z)u`3lL{`Et>S%UBnWmrS(o!XdQ85H!EOV6H&qJr@uRiy!wex@0OGax&vlj3$VQ^i5 za95}BVdH_qn>!-KBf)&t4s`Eftgx?8U7mlR|C_x9D*w3bW=BTuS+}8;WIZX7RKAj- zvKpakEx7|kqcC!tl=?ECz5`QkdOIc-j0tk6pR%7LvNR+)UlzX2`q1i*jbIYMfBzecCop?CJQxIU#4O zWuQv5&-Qt@dA0*CcZ?Mt4mqur`!N{#Z}Nf#X(O3#`H`nE*HC2*0Vq5j=&wi~`O4gpS zO=)%Dday~twVgXLFDYYk-oCi^R|w_P`dWpja2oTDe^jmMpAWg;S6RbjLI`*|IEWxE zqZpT8T8SNEdho!Lv#;&ZfnwD$w;OxnS6{!8E36*ELuuH*FImXW%TZV2t{vVzIl8O) zF6MJz7XXLockCV!BCb!x>yd<%-te_tsNdRF7#jY4juK7j~ zYI0-Ow-Ry$ry_elN8amZFt%z`oyvTm_b?{(vvBzGrMf(lFJp(y@%JAFX3y^2UgwUL zw-e#C=g3&MA{Wa@dCEro?1KT_cfUsBrTb~Kk_oe;{NE@`38#Cci|Bh_PWfz~dss&g z+~g|ea%uCp^O1|OQ&)Sq>v~A<_{C&lE{-Urzc-DB4xwf$o3_ANSoCfo{uvukMB>zom9n1}Qzl zBkwU8`v}skMd9-(d{gS_3KQ*+kZ(~YnzEZ7=1CD_bhh8R_E%;rd8O^EmCKa*bWVi7 zP*~vX#p)ZiosC_yF%s`k7VJgYCf>erIoa>C^Y}A8csDMbp1GYqks^9}wd}3z^HRI9 zsc96?`f%Yfd*hGzw^H6TUny3SN)c2Uv85`!$vWc;YMCnTXHT7hO7Q@G#?eflp*y>`J z62;=MTgdTHpv&gNO~uc>Be9;5dIW0e_k(4AYKjSMS79Ti>tc-h#Qlhe<=X|`-H25G zWGJ^Be!s8hgVEPC9@Xm07ul%7&Ko;gl)auRyRr79CyhZ(T0P`}Ue{P%hrqR(9h-{x zANs`kmoylxf5${pW!KmKmNmZF*xd7-6-~MF=?C>a_J!>7w?{=$Sd?BONiH9cLbST@ z4mXtwnV67;<#H@N0b0priRo^&=@!=RUspPwP|;zM__KViU(q4K6FpL6qKlUr->e;@+h)9?4&^I~hj8Pk^kKNbg_Y{NGm3b2XK4&1_SfeaY=9Ja|K zWK+wzf6#Pg=+FMP73aWgVm{8kx`61E;S|%<3#9|a>0@sqmc0vS>DTnW`~D0|c3 zYJE|0kIcC?&ifw))_1iLG~JkH*;=7M81mthoWI{yGc>)4U$3@}<`t%T_^+~db1;jS zct6VF47gs_)6)L^cGoxPCHZ(nZE$4g65ZlYs-&{OY-6_72jhHlD@J*5J0u*>`LmD( zCc8WPTaYQ;|McFC9*=(4RJmVCDkr3Q?{Sa&PT&Rjc}ThsL? zm^OD}PR||hgLb3RIr3=Bcy zp8Rm|uewbP|70ZqeXo{jmu5c6#M;+=~vUH(DxGO{u$JLUhDeCa1F2ZTupf&*_PFpM~otY=3sKD`#k{6 z)Y9!^Vl*pl6|5eRz8T78WT9Ca$t9H-qQ09-9?d?R)t~FE_vrZ1SKqR!sFvkdOq}Pf zBx8dWV+@%YJ!*`f=xVFIiV-YQv5s=lz0_N7ZlbgDy7o_ldC>Qf$!8228j=sX9?zN4 zEyif#J{>d>{RPxsc z{|VCwt?%S{IxN=K+ce(C_TnZb%Q-MPUtB^vTr{kRDsOu{|CK*`LaPf`?!8J;H>pQ@ zH~#C%FX&GPS>;Z)oJk_`G0imJJM4xlTqH7HcMn-)3ldg0e9K{xjjV3eEPR;zwD&O= zUt*U`C;tPihKcFBnDg(KV>!^bYNhwvB15jF<-QG+svFPl{=C>{y!B4P_&lb~mwl|= zayCYWw~aj_IblS5tA1jiA6e#9GcrSBas8&oR2R&q6eep_2*xApSN=UWK6$oNwrM$i z+%RCQI%-(u!Ft8Q`=XFJRlq|+`O|YHr2cYp@cpybW8>nrHzH~&t8YB6u2ftlVdv1J zepNcO8WPh*s^QjtAK;|2=0>67x6#lXcU1;oa1S(=~qi%XTnZEYT>5 z$wwpRlX!!Jumb@&1!7`WBdf9NPo$ou$e8am%)-VfO%HqHqlIkef^PfB*MaDwj)1XO z%Hm|i>R+};Y&S_6njU!o0AztD4Cs4}iG6&md%+o{fZn>#z?D>-2> zeaw|ar6fA-q_6hssX_Bh-)FAu-;5XduNsxf5sW+LOJzG<=DZt2laa6$JRQ0y9k0O> zAuMXjf3EAF73G7lz6eg$b2!_h%jyh8>U?oGxxQKX-F_glk$r_dHMS(raGkl9x z$&2QS%j>Zr?B0B-sc$Z&uz;BL66O~5vS7mUkY`b)hUpb6^~}DMvR98t4iYm1Xbgq= zNHdRw8?+h~mo62xm6^}^I!}w=TISavy+lr+V8Y1W3BoW+V>OE!qJoOVGozPAi#en* zKaVZj2XR%%3ho=Jk5ggO@s`nxQI~vauPVy-yBK!N&Q97kp9E}+eIuFt$dM#Q9-zMc zir#zd`mWZIyw_KBq4X4!P9S#AS>OEjJ-PcWYNM_hqlGK0Oap`4mJIgvLG=^!a))Wh zPi>#y!kiKA7sYHufeT;k$>*60Le^~mM^44OsL+H#0*iFE7rWl}kjJ&WDo9KW)@m~h z%RScJ9jz`=(_A*j;XD%v&%w^!L9MolEwi2o*HGey`_W(z+q49kJT3DAtAN$pSOtOw z@iC=Mch|Zo`MHFAvA0{}M-CWE37bdJ3NJa{OuRUny*Cj2cpR@OMC)4+T7$)Rpl79T zfb7!w3y8%)ot10?QNyO7CF$L)pq6}PvqW4Wsd34X6s2D_m3i_gd_0n*I88OGDf=TQWzV18Omy|9Je3{JfkGxrn``Xgb!*&<94FB#65grrup5`G z3PY3AmC^}OVD*uNkULV#+cc|Q968*4QXXwh;)!YzF8=Zc33OPNmnZD|G94@SFU8;YB0lcS?jQQ{H5`} zZd$*zHqRyVbg{>i)o$M0s(*Es&`Jqp^G}P6#I@9w-}c~=-b-ia-T(U>RjdDeng#G z|Md{XM|M52Y>Tn;ymu&RrBo_?zD_Dpx>QQ=bO6)(Ip&gE4hUQ+#z5B+4H2kZoW`jn||YEI#d(X$6+uybX|p`oth!_lvj>zrej0i`r9p`P&9vE-ORpe;Py}F8gm%JU(%z&qzFFCu(`I2mxoUJD?Y46kr_)T?;R?~txZ$L6=PBY8t>#?ywz{y zq_rgjnP`+Hmd-u@dYDr>Po23E-v!)eQ&GjL-w284cX!IQI-J{eoj+<*-MQ%mtkIim zTw0rcgcuiKFYK~Dq5k=0p+ef~^XS_pAg7qWOdes04*QgF<+)<@6qWD50^W<+fV)Gb zo4zmcxvE=Ax_kq*1rG*<9EPcel9}X5zk1LFSTEg696M7t;W1IAU#s|(*+-G0;IfY% zX_Y*ZX#X;7+h`JFFGqJSq*9&kwmZk+$X(NWO5?MrTqH=-(#gt|cNod+$S16roY}}u zT3fvLEGz5ATMdg>zrJwYyO+pej^@u1-+Y|*F%G-%K_wNg`3C9V=zcmNLx=3hNu*&= zZ5`dC#ZJK~>D}N<<#Z*L$ZEYZq4X`=hGb;yd84x=ufU$xDG{La^-9;j#K9yXc4!=R zd=Fk$8x|6y)W^awFY2Rxa)gx%`!CMfx zH+T4e-?S~hOK6{r=^4&w21g#ol0i#>d@WtOT15xWx~@%v@mKJwUY1(|2fbY#J`JA$ zyz%nF!Z!sbo&&6A4-v+gofREk?RvL)^T*>+3-{0A=U=${|MOdAC%_{hL%H+Fz1(&! zZztlQDN<$Llb#c=r>In5d-Vw!up0s6jnJd>_xCMIi(U$m&4r3HD@jFg-&g)0roKC# z>c8)w)1hN_jLdNCne41|oXo7ujL6DXWL9MF5g}V<*^!YXA){=vM@AV5k%lDwUZ1Y} zx_|c{*W+8O56kxXBb%+Du1A@JRn?;_LOKM3La z2zPHu*%{&N{|x+7(|j|V+2l^X1`KC&5hYt4CBw-X+zej2!>sMq61U1be%9VupZ}4e3PCO-5@bT>#F_p{lnqq{64SfKfJZhssQ@2@&Wtf=VKkvOsVbb zHJ@fZypVshl_`RscG?cWfQera^JVw)p0ZT!sD_-<#RRPHY~d;{*o=$(F|_oB^;ZZm zW#u{sT7TAr@WtD+Hw-TdWv_pIA^dtF=8>`h$EHn;P8?~t(X8y7{68ydKRk=YUs~UZ z078K0BeP4~k9M=hF%luyWeYp-+ zb)RNVpLzV{EvHAX;NvE_urj*Fnu$5z#^cRD@A_r%N|48q#6+r8sS_z_%=oN^&7aX8 ztuf<0_YXT)WqOqh~Og-_2N5EY%S)5k- zGlHKPY2FV+Tochwn%!=QvfTinXI;fAIyxoO?VmlF(NC>^x+O}D^v(1~l^CSA{CGc3 zyggG>;CrRleKfCIXF_hcgz;I{vrERan>NIOLK4#4TYl03TNK*X1Fk67#}-@On?13& zznBgEZR7jsl(MQM)YLHAYcc`-90obK+;tOutSMbw+kuNw-p02Zu8j;+`#-NARZUww z{w#cvJ1V{1t;Xi`f-~8W18auHCX#zuVb){80VaM2=X^;Ij~)^-YhI|D>cv zxn;($q({uBuH|%-M8N9j$NER^!;lZiqe;?#~OQV0X zoAWCkjfgIr@?7Lc$tvAhTWh)#?Nxa12aVr+`57zp&NQbV>P15aW1*nVD6y2gqPUe* z#iUww4At4W;VlEZiB!k?I^KDXm+k0K48$D@?QbyGocvtFHRCahO}D_~m$gD%9PyKl zm#z>3c*uPM(eY=16@4xD$}ZA)o9LlYwrUw8hcIoRjnM1cDXGjh<~ym_+y|ML&WGbe zF{i-Db8v|&Bx|gqaFDwV6%rW)+;uN;Cxxp;E?5*H-wb0-7YHB`IrR(6jC6gbV{&NB zPmN`+`(;aBX%-6W5*a)y=^+JAKy?sgT>>>y$imbqn_eZ|HM)quz^l8C?{RAD2)KW!b>#e2J}=R zP^j;zeE0Fo#(%W{;ItAwph}j#|Mgt-M8#_A1MK{%obgz!lN<_xj8K%QuAk01NA{%w zVv2Oa&4!jR!7oJ2vStFJt^2l&LPE~`1F5eIw#h&!#{ndVf*mIQi?3Epxn2cp1hAo}5qK!ltqHDa`5_YskZshzOMZ(V6` zxo(nQh>B8GyvlIdwlZuMu$}Wu&E5ro!KqRY*1pZ7x_r%l#`&>NmeW(c`#*`^QTu+j z=eN9*=Yg|7-LAa@9ewF^OW(}6cB|j90eNqYqZL^H4(&Lohz0628-|yB{ilYgrb{hrQkB81^O}7&Xn)R9QifouP_v#QLP%Rn4%jJI8XFyc zH0V4w*a_>(jd&1&I(;(B$x7;}T5&y}R%WRkCdYp{hE%-_(2^B)C)dj(F;a&3uJ^a# z!Mx2K{THB-aHZGO?7e=W4^9sdk4oEgs_3H=szbM|Q%*h?(7Vz4>rtNvWl+85quv7H zxt~|aMvAjm_2>cj=XJqJFy&$d%gf8nh1{5_cxO=;^b9JRu?XlrWz~E);S`bfFQ45s z(9LKC*ej*YE-59`aI&cV0ZYsGnBUqRM1?mHFRaXly$fN@VB%(1%eL?XMvg#))|;@B-5UD&L?;q9m%t$uforj3Y2V9}=hY=BEf^ zYyJQ>9ysOIzqE-;H*;WSMD?b>-Q(947pX?@V0V}~WZW`dS34z2+{*hPU|07kYv4^X z_Ws^=X}X`OzT4ZbMn#wf@~L3t3a^S~|x7%n)EBq&9n#!fjwLNhHth zUBeLP+^GFKHh9IC-v{i7Nr|y3e5vFol=k!6Rrt%xW&qg#p?|K4f2v9~Lv6{slSW;Z ze`}L#taYuDSKzAHosi?p8VR}k5DhH#ADg|ZzsU_a>~nuhMv7u4Z1qFA4X@VhS-4g; zTsd$)}>uN zY`C36n~Hr@G(1t@oqOROicVd)Lxwk~^LBSUw;8eI2wrCLHCC!V;jCEeN zgh8E0b$>CT`%ww8kqmfyfy@Y(SufzSw-^*U%)ftGcr|Wm^jlC39~(ihr<+i1QynrK ze@=nM_mTkO&?SA*??|kdyzs})@t_d?ug8Nv%`{Xf7iop^?Q_lS$fmKfeLBaf_qHFS z*J*#Os{>GSTlWX%0B$&#tQg0ss1T}pWl|$Lk>V}o#p zAVTomdG_khz@PS_r5Mb21Qt7hAVlfekc8IDDw1RQPDiz=!CeiIaC=#2*ZqKbK%zsk zg@uIjMKtE83VTQAH2`JaV9{q>=E~ATfb_LMxd=f zxtM+tEQgKI8qwhcqGtwoX!~at&sTX<@+pYacKv!?j8=Z2G@?#ewQ&g*>(f#Sl`6P( z?)?CfiM3|y^POV*&*%PPs=i+J2X#MrgMsY}P?bI6Oy+z7K69T zMavO4G~adKFqbodz@O7y$QkB7r5xSZKEYuVg?RU*iwo(YyycTGKZ#8wQlwE-%zb0`4lNjqc7YM)E1I&QOc30_FM6Cn)|^$sxseTW>5uANwNuex9??6+M7@ zU+ z!;#h?rIfL=r-uXBl|RIgZk&~OAyL`WT3~X>`)bq~b8^1+u!ZM6xb-@j%rF5#GG773 zGK4kdIpcW0R7&!_^XKGCC&KZr$ZMG&_F428*}eQe;T40Y201H9k3~GAe4_T989@Q{ ziwhPZK(#Z;&nuMiG!3JFU)_*G;44FTh$QQ;fVeRLn#cn{=Gb)mgSpHD{I&dB80>pp z#oAl`lP)o`d-k|FD4|&LP;Wlel=6Y7cCuNeCjxIsbICjLQAciTck~fH=dD!NxHiYd zw?CpcO#rj#A)Z3^W@uss)~{Pl9A94^64$SC;t3SC-yd=M1n#NjRzDDPRRE}DKcbjP zD^mqkF6efVljM+>gy>ENoP2&3&QitSzMGhlsm$aRO{lbZ+;2BJb{NyGtR~zt=$Xa& z`9GKC!lQ5XHUN9K{#Fm{n3<(wzP#I1v`J383<7smCRibW)}w(`K$J$8(~y3Vn~Ps z^~5UMJei=2V})1&&bIA22st*;pGK7fHR(fu@2AkS0h6vB;T?#we*?Mw<<_E*2Puw) zI~8AbaX1}s{j zHp#YtjVFek2|)HT2%c21Wk~h|r56;W%|VPfQOk6$Ae6`f z;LL6YTtEcN_mJZ(qR!Ro20Ti1VsO7)q1}UQVQjuQZ_?J)uZFR7oIeIJ!$73U_DX6W zytf&SINF;*M3C|&+#JbTIZ@%y==9t$hL0(*5n|=<*C0h_b&w3Z#Y?)GJ(ID!#l8vN>A_rqeb7wk>Z+@z+X+G zMlZ%K$GjaRscm`&QfrPKk)$lgU7e7$+2-;ttNzru{TjZZCk$qdzem6QY1qUc%8ZuV zb%;X{mkrf{zG?hBp)yzT$4CJ?O&0gd`4R;ICxzM^){Lg6x*SCgeyVw*LNqp;3|9j@ zFCSO%<}p~R^ZW9it^6`0S%?F|3!9F9TzW2i9kOQS+Ou?7DbVg2gxMgktKm?)K2eqJ zN*2|D@TIV z=<^BVI&3r}aIkilGs|Xb{=BnI?oX}7-T*Fb2M)rkt-U|aINk|e%#~b&jl z!5cQuz)wxBm=F5wJp^QYG~D!MbIF~UP9?Z=?~%0bWhBOAawp8#I&LlJ7dKn7F=A-r7EG$fc{w)h|IWAWR=C5{ znC(-goxw_nJFf>~`171h0DteBX6Zems{7&UcJlk`Bx;1$P7WVx`%tRpb0nMhLTlgv zSbmOHcOVqPE5I3DU5+~=L#J0cMY=^a8lny3E^qkm4gD7#+182de}cHCM7`kcP@5 zM>)Y(urhJEuu8b~(2vs5k{6wo027?;Le6krXGhOjd$27iGb39YF*@^X?xu9#dN}qC z3+z~RGIrwMFK4Z<<86=qS@?T2xhN((>sB{j8N7J~NbA)s91W|g{~l3`1|Qw6j1>9Y zozFiVlg(JNehGP077)~GuTRF5{7rv#SaO{in8{`r@X9Rzy|5F0*}D@JEZ74jL(8Ab z$f42?t+I=|mW;gc+fM|Da2lIu&XJ!#17f4EILqIH;)3bVLSQ4kg$fYALs5cExrHu& z1bzaJ?%W3ExUyHHD(c5>BBvN*Qg3cDY9rlkwD zwc+H}lu3?jHZ%3a&OxR<0PTha5A9;&9)*`C>n|ctmIg!ka$0#2zyyOMu?|GZ&pLK6Oe2ees=^yd0- zfhf!IGB?2CuXZy|(PBm*2cc@ zf7{7wOUbbhtA5`RMl}#Br9c_PcfFgYuq1+-DxKVclGwRWQpU#9J3UFo?a5odW|=I6 zqsA#h7i1F?Z)pt%o(nbUr8zK&g33gk=ATMk^}4}+z$Nkgk^cqX4iKQ96gBlERsW1^ zy33_^$-y-dLMWwG@lgR&5uCGS`D?ZP^KJz{ryV)!)(hiI&KRu-B+KD!RpJmHKLcte zx8nR&B##4Jf*GZ5g#J1tSM+-EsEl3*a^&fxEqA>c+y$keP}E~+Gw6cwW#^+4&;6Lv z;Jy&eXH`x(t`@2j5rG%N2A_%LH+e{Yc)^3GWvg+POHK#$qSi?NvI;Vke<&}SR)Bj`kipfT)cmi1Yfb zltsQe55{81E*!aHgoi7B;|#v3Oz{4F3nIHalr~$4-N_Gr)>9$;gylG@;L%V0TsT3Y zI>X7`L@hc?wiipUqq1GApLbv5-yAK``cCjkyianoo7u6_knk9pIKn6a#$r<^1qBsB zd5HE0kfPfIi|fxD`7a!@fb;I&%aJ}!OpOsw3M`$$kM%y)??zQF_na8oi(oK+6P^%c zZbm_&28DT{6Q7^KmuAM}v(|s4AkEec1hpW@IP=gw0G%fbv}I}0Qd$D82)WRDr_ZBV ztLO*5i+c13LlWFwagQVNoC;%(6ls>S8R9Y4Wu;gRX4I(4vxi9^5)}sBz`!KAkUnja zE6+4BvUQCHHrrb{Yu47SCe`EQN$mYRK$N{#@b9LZD{P&jF@>zddM}L>jRii5yDK>aC zov15RkElYxyk!W%;yOThs$(%Z#sQ&CIQ|_C{@5R&LS`d#_mg{tG7=lfSNryj3XEUo zymXA1miP*Evm6%NLN3-{I>yb09MPtj9M5iMqz3N*x(gx`ZPMnPSQ?n++^9#(zX*#E z14`NTByL^0vL3)$FaiMx4jm6P%Nsc<-(j){%Od@)U^sNBh()9G5U`vh>OGVR1o)66US4pafCqOumdV}uPfnT`NdJGCV{qM(3l0On;$ituY5f}&VsbOII+0tegGP|B93xhebgc8u^3`FovME4 zCt>D5$w)M&4f?C5Z+2aW;ntXtn`j7J$iQfwsSBuestXbLV-TvICDM4j7e(~w6p|iJ zL$hM&3ay#p&PBueyMbg|?@$HqX=WjlDMK>ZJ*eF0HEXqSAm%x?$% zo<$nm$}fmoMDx~(M;>D_$WN2-WgvRc1h0jB0XqhBr-DPui`+EGpn7A8M+1VOkRs}E z_jtwdH3B?1NJM?CXS_3v8G-7FfDDG^CA=zO@dSD?2GX^PJvtfz_{(r)pYpFXk~og} zSDFXi7-=N0qE;*7{5K`!@3lds>xj%<)s=O{)bavry%0>jDZ+DdWWTx!Hzggl59Lj*rO!V~39fx3?*C^BK_;2*f4NGQ)` zEtF@8MqI=8U{}omxfkgdVV8y=hr^=8xpu(SESA4J`f<4s13X8Z^g;yt7Kr_I`lIvi9ym)*Uh#U)G;)-o z1doP6N;^BIW%iLY`(UJl6wGv9-Tfn>NbX~a%or>O3R+q@pB7#P1)MDBjEGxD zocvtDg{UktIzFd%feosBPekmAnmF7^kYT31@EDTJ(-vO1P!ejU7TjeEA@FeHwFmtA zkn#|4GHBqgSt6QUTD4f#!t&B+m`HJ+&h1-h>X6lRZuTr4sVl)v|aP*%qh*X~Lr@`)oZSjo0+P?U8wtf@^yx#iEuqEH%b|s z0il7WqPUv2v9UN*BrW@Iof#`N6Iq4aR`NrU&^I1@!iY5jdSsK=o>J(_F z(ZI@OBNjP1z=r}wQb{e5S``_ECszM?P_R|+;o$3bZWn6(?+!F=Yi5eH14 zDnWk(=7E?%nYIHB{=+EPTf769O@E>?q#f`bIwRVEZt4tFX-PqbWt7o(dKNe;BJz$7 zPq1?~L#OW-N9ljINK6P+tYHYzrb1k37Gxe0_`TfYpyz>I^sO87dYwl7}! zyPCKY^8JyI|2~J&p%WY|#lS_cSxPh_)04^w-c${PQ_-TB{}+TWA;U?{pVo^*sjM); z3C|EuIhfll0=ujPTtb)h^0@i`=*UrsyD(7Dl7k7@W9)}<+`kq09lH-90j0zq`23=z$WdU~?g3$aC$n{TeG~E_P22q2E7`+aKO;v(7YO1`~7HI=y>{&(Y zyUsqvzN=J*8HA;UU^89RvLE(jrmy5DK`t+!9j4Off+DX>HMsbr$9j-G5O5A?4W$MM zle0k{P-->HI2SAMKN@e(Ile>a>jL4^?EkKIY*IJxg_M$Nc#R}k)Lp-7_=qvjN=U=;`Y_IxshOfvvSwoSru1&3(pO-v zxNrsA+92VbeO!x`u`tSBl>!H(jZ01I()yx#LQ|9ITRGPx3m3AkxEK9euM@${}Ujf zr|F&*E?`)KHj&@485mEwpf%hT0BDyX_$Z@r+*1EHj0q;>0e9Q>sZa=;PCq2~!8yW@ z2dBebWV_>;#5?-o`-T!Qvgr3ge*^a*rR+kIKpka!WVZ#GMG2^anHtU*auf1tP&+Cb z0kt#+U2;LtB+dpwnODxo)Sml5ppbr zzz);0Vzg!Y>zCMq^9>LSkp?t|^S``}RvoV>?inpP*#_~wBRe4LWd2ll{goGXv$H~C z@#e;_-Hf~{&z?to<391vG137d*1p%5q#`r0rSS<`Or4Nc3WOV(Ith(m~{xCygr|hFY zzn?j;LV_#~I)maWOlCF_C;OpRXDYC*+o$48fEF(a3DAR$%EQo4uR$Bq8L8~m0b=DJ)O&oT&*pLf zfHkrpiJw)3^lLTbIJ+1iWnN4}ZEZ%(ew1PX22^xF%p9|k1!${4Y2)*(4fFa}+H+cO z>WP4o;VJ-~WMKNfyai)70ROVw{cKSFbH)W~x#EG^1euvP{qmnHtDmH*dP#yxS2j3K z&A2C|pIC-Bl0iCmbiHu+IMbn(S?7BDHeMhouc=RZumT6JH;%D$s(%5WsMnGtH=2+- z!*V(j6ssDr07?3T@Q#fC3Q+xKS7+eRn``5UZmx%9L_Y6A2*No<;-&T+k}OyQNaD1r z1o|Vl_8{bLcDxNJ8{f5xKX&H|UhQ!B%PJ=8>3s{4!CtJDbIjSdX?p>1;kCHuo;?F} z3Sq)~9w2m~5R~kqnEV{{&7M(C)#ONRxq1C z4Bx%DPBLu`vA^F?;T4lfgYqhS z2q$cYW5SoSMLlYsy=g?!VDv80=W_sM6q^-?GKMcg+aO;rQTa&zOBOk7w z1M#v?y;Bj7jLuwbvxAVt)90C-{*MoB$4*c-g2h+c%o7sdc5=6F^~r~bq+MDwNHU;= zRY*2gAL27X0jQ_NnYvYqcKE?^Zy=Pf{|TW?b@A6w#Ix>!RFKhx!S5&EXO1B5 zVsqFjE^TVv?Pd*;e&P{$_$BD?XBI>cG7NDXW2=+79Wq{zHsPM%X8KSQr?ms)B;5oG zm0^f0Y;&cFLcW!rZvk1Q2Rnr1k|7Df*r_hQnmDz#KUhe2oeCDG?C!byYrcQD5J|*i zgcCU;lYZ#bumf$iDcp!mx$)ou6-Ql1$({F5&#bqG-Yv#|yE4$X2GXz>Yt(t9L3Jn- z>xg;-;~nhp``dZ?{%=7lg~OB=Z+-P^SdHR{5KRsx@!#NQY44D;&{S3BSAopJtyS=q z9a)5|oUC$~*|evrtu_Amj@cXXwM2gXXD8*ukVfrppufc7e5>h;o1S|rLJb|xQ63~c zuz@PpoH4%sC%+nImJ(2w`a(`7g@G{V^Z@az0`Ojynu26tUfvUCtGu@9m)^H7PuRZH zpYce11>%RX+w6#ZZg1T9A9mFvt%F}64Z@P*n<|x(Xb?n?ouyJaUe1Lj`CERn_z+GX01Xj6bvK%B{a)35y^Y%V1g+Zg+Gltk&<-DDVEySAPfe)FCOpQ3Lq+{fF&* z*>20>MnP>ZY^Ug+&e-!;dd|5-%!{*9budzFvc9r#!mcZt4R~6ooZf%9rDR55`J+eN zFz9|)L>H8VeRrbf{21i7IrR_?RbfAH`E0s?bW5xoc8mHi}eD`CPf$>>YngRbZ!6D`~u0EBZ&U{l|CbUon5W zUVI$}pt>FY$NDKtmi+hkS+!7)A)Q}+-+&ahoyeJg8|jBES>dakLlipWUX1G=_jMfl z^JA!Voa@Dc&8@<EwFk?WMo8W*8>6EnpMRP3LMvh0NRdL{+Z77fh^V zPCm12rMQ^;eInFg>3r+{@5f*L8x}OrziQY>AIZx#)^EjHTO@4tnb24yXNdXVk!;kT zCbg>=;&`o5yrzDyIm-RYdIta%%zx0Z{_gfw^>Z|>Sf4&5r zMTE2^&n)70ZOA;YN%xgVq+LZU>FgLO?MTK)1+`UR(TdtJO4waR&w19AxW7;^5^`j` z;1^$|5Pe(hVNJ&~uk|A8#!IgHQ;iC-!WJqX6G5Xp?oZEw=POi1XbBOWvSCxv0vnCh`#UT@-B67P3xjIB4@yqzpl6Sn^g_5^y zkFNSo4w%}!vm|XChDOY`(l7O(sz(#5g~2`iU3Bo6ft6j16}wem?R+l}UPC+N?gr58 zsZQ{6T%=KQ_T!$-SXp$VN}F7vwm*Cb#3H@?<^^`*O_MDXqu+F=6%7(>%(lFeqGj=dOwHHAFL_ zDm*<=lGhNmKF6#5TwP3?6E-@L^V5y57tJdcPCe-!{v-aP>l1^Q>!1y&Hn)s&m;x8B zEVV!>@SsWW@6hB@&}TzTJ#C1{u8PN3)OndvOtA^~C$u%%rj-3^WjEa|cCsg=4_~)3 zcMKIs*xpmv?Kw}+e2Mq!H^t=U4VsIX=x9dg#h7nhA9{b98zuOqan!N+uL&Cr($!X} z1*>_Y+8Kdq#j7&eoZ_umhnaGhTI)=HM4T#F@bzvQ$1?{SUAjLH{~q?3S}3on7+T3} z`Z3VB`ng;AFOh`#PF*jMPtdLU{OMTSCOBMDvCpi0Op)|xWhjs%&@HhYH`_n@^7m*+ zj%_(Zy^yZkC_#SKSMKi0_epsxtH>|oy}#_@kCiiCopD|Fl&|Eus<+|^{i^{roR%Vu z+hJW81LK-geTGVO=^Fh!#xN&F-zfhI9&3Mg;LE!43*Sg)2ahOE)EWMRU{o=?Ua{hf z5Nv=rm11(M|CL`>aph@(=erL=FDNMFjAPWbPQ9xDNw4%t=)hI7r69cx!ys0!VbF=^ zACnfzC`5@cWZc~0`=Xr4?D%oz3{_f>7zdD)3r#It zo&}m&N%M(j4DW9__sfIQW))zMs=_>8q<^gcG5M3?D57TdmX@P`VS}Aa(C^uWE8*Pr zqdRYIqpyaxWM-X-yG0hrMKCxQ%5ObN^uuf=CjLaL=^(%OE9D~hKb$O8xZaytkCYYaEk2u7>-QNY z9@NC?dK*3LLv$3}Dy2m?30Oq7cs?}5(uP|e%I&#tyP))BDbyPp0jP^ts=b<0R4W#l z6XI_o_Vy^)e#9y@2tQ(<5brYTb{3VkeHHHfqNr&!@gc>%lRW|J^4;q5;q;Zh@m&wF zx^6wTb%R=wK2lPh^Sr`*4iZxeq1jj6YS%jXaz2+bwg6l%4dZcRPmyk-@+A6i=waO( zzoEU}Uxh$u3`kxmQcJkW$E5g5Nk@8!iC{sWr;*Kb*p`^l-1|pf_Od1>4wg{eo$9_o zT?A;iwqRQnU;n%~peOdD%;x4Vwk95bt^_~&%hlIJ7wz)S8-EB&{UEE55WdB|hYMg2 zVkd+?{qV-zn%kRc2X~0`p0bI04*UP?P{Zq&XQ8O=M|p_p$zpvWqj(zPM9@8gP$uDtBj)@M>Hg4z6{-1@z4IQAaXRY zp_j8|$~idyDk1N+>};{9f5`j**`l5zhlzmC$Zd?UCOu~&4MU;^V|aAEZooEEfsA{9 zfM0x)@5ImcUHN_7l4O-&#D`@4TU>!$ijKTjKln>8PxIQ++V~=1p~d@JqMmc8D{xO@CYRm-!=;H?fO+M5&IgMLfI&UA{xea#mu4v(F!IdS3NX{%20F7dv@ z=s&Hr7a{9-V|3x-)rQbij_?th)#_=Z&zy0i_ioyxey&vG>s@j0HFvMETX(0H*$)}k zn6s-;Et`=cNG%=U818??sLe*J& zcdujPq435kbA$QVR|hVjJ30`HClz4>`uX18^ehm5vPY2WW!*o)+_g-83F_{v z&t*Jw_7ALx+rM12oeL+4c*BkLd0#kp{@*KK?k4n^sr(`kSy(sznxf@F8$2kLs4#6< zr?ctt8c}6T|K!478J;`!Gb3cdGscEQ%qSdeFhjU~Hl zSgs|6IH& zZ7H^$j?=APwd(>tF`_GbZ|>ey6rYfv>Dxc~f0=OahbQu;d0pCV&geEnrD6c2($`Pz zdYr0du3oOR@ykYuNFK{1_zryJW9v%&ywlsLi!Tw9w{tNlSGkJxP{>i8(xQd7 zA)5v9taVt9-{(Zz-l}Nr4}Qhw-IBJ5rl@Nf=jN2ZAz8kg8oJ2n*=~K1LVMkHy6e*e zsITGvVDhhMSny3c6s)@6|8V`p>}KMOW6wO}_#_M3esF|kZrmjllTX&B{G^oN9Xed@ z0)-@rSZJpC+p6*F&Fh|q`kbMH^oFS;R$yb|nxoje2B;lAJmT}O`lN`%zv<(O zZ0dVG=S=wtsmtA7Q#{5lsG8%Ajj<2(2z0h9rKs8m!LG3N=w3!@cITmclH|Mo_3Sei z{m)Rbo0s1lzZr^L1bf98={&8%KjuCcG>npj4%IrP_Wfoiwh$zN>Qdodsrc6oy5xF^(h{MIbST7&0qOnN|4o0SssHcWF{A9aE+nx_I%xMD z!** z@7N)uTIo^#v>qo06&cnXiV`2qzW7oO_yF}R#J=mDbQb3*T+ZvkI1IjXa}jaJ!Y!hH zf%VA$nttfhWPmdD0FYo~nTp-2xnzHg_^ko;SUt^aa8b~N=!9gYnPt5 zg+R2d91K1EMs;PzK}0 zCqh7Ee;xr5A!Q`pt7~K2|7xNspt!A%6rBN(F!S-jcnWv52a!~D7PjB zNt?dB!egs{^C&Bxi16*7V+a&t`JPp-hU1TrDfdclJh~T~G*LoH83@ZNe+NES7)}gv z>R76URu|9^pk*c!+N?AV|3~-xw^A>-s}wV_?;37vSen^iI$aBSx|`)9aEc+5zfDlF<6sj27Kw%Ma;68lSQwMwVq4tO1+q8+aRtS%#Bmk$wy` zzR1x?D#@KUMgDQz;eQv!46e;q`9U=l=bj9BBGU?wp+6Y{xCh{v=<{Fwka!XXv9_bE zcQmpMx`Wp>L}8`7OBNj=mlQ%z6##zsUH|Ah zRf}{VO=OMDSm3|&Ce#%w!QjCKu!XOV4qOptPD43`%Vo?*6x{E5h8k5NJA9+b3+ijg z`eH%7UoxLrFC1v;W<3it>4Tcl_Y$O&XIlsbrYB{ev*rKUIzaIe4gdiDDc0stWK*@w z^s&LOz0Q`fT!JpQB{p9@@s!!Ei?9B5TOuyxYo&`K(6=N@?1A{1{ zEGG?5eZ#ZT8%f9tA>doNPNS>_R>_g4%hIKK#DcP4-+*EaGq_qcyp!ic+A#vB0PoRB zM{#zG#iJj-X(IC_<8)m#!Z6x{mT*a(*GAgEiToBIs4~nR|NjG*UPRYh8*vpbAj8iL z-85MG{eTF#YR@F)%Q-9v^T3Z>$uj38FtDIIl$W+${t$kO2~mL&Cd?FQ(K{IDc_FhU zAXK_#fyTOh9}o}&fQH0r{99!a>ej&VlR)f+!q+<%$WZx`$n2xM-7F&sou%W-*6#tY z*Z<9o4W=seZX$?03t>s$B^%KsuDnfwFc=8hb)<3#Z1XK&wl+oNbrNK$w{c+J^y@qe z#jYa;H40G=s|PVCgk;HCoU-{>Lc(U_5xj8>cQE#JUWRPH3$6!lJGlGN#mOC+C#TsD zcwwH@!dtY$6cjCTtxX#Z(oj14z zcQ*(QUW*f4zxb}XW|%;c31I0UfZ|2+>t=`*IDgQ;K**nlyIq%A7IYetlxDh;98K~8 zs`HxK!@0Jxg8k5Uy27I&ZNizv3RrQ~4>3rU1ccn7o*2{5t+)%D*Tv%N#zN@PV0>+H zsT3|RREgd=di&kudvrGuwRshX5Rwm2y4L>pt8n~NOdBJAru}_;R!y@lLtVL&Oenbz zK>avRV+iD@P!&QtG%!krQp^Jzv1jE3vL;A$r0jHE+O3rhPHi~Mce$uzI^*y((j&O& zo63sf@9$_foJ2OUuFTJ)8ZWVyQdL@j8?y{gEC&2A9Fx#BdlX*R3QIAG?LV*3uX~>$ zrML@p=aftfU-o0)rTzmbco4M=1pVlX6%~Vzp}slZkMdSv86Kt>dbN|(hS_~zn}U3#&pK1)e^;=;k)yt{Ko{+H};u&R(b7=3WQ6hfNANErs1Zowd_`2 zgThkU+1q{FZqg!X`9z|C+UQskLXgBZ0#*62INtOW4vg(Cbe=FR>K4){W_N68&j?^@ zEmV{@3dAcj;6{SBaBj5H<*&xv4$iMH`vrl#R+E{!8GW2uePwQX-xjQmJ)=H)j7 z-j4^!^0<5^zE=uZp-H zgnd<=P>~FTdVJG6Sk&0xZcnWd`ybL9e#j9zjc=nEQ8gBAqbuN%>PD!GTBk)<2r_vj z-z5-0Y|&C$+$f*}qyh8cBicQhtc*yft^zjoEW{9Z)kUA&8Gy(B!WD$m_=gfCMaaco zY^Tey=KyDrCAXTvO~ZK`T62;JdjnAY9x~TSzZ?9wjs4Ytp77p5L6n6fr1%RCR(r{)LaP6t4QUE$Z%N{jhb;o_TQzgl`qzdU> zS2*UAY!$_bezgsm{{R8h!)WY)H#AC{&Be?;u2yJrf+ZJ^%HQj&55jFj1Zl}_Ik^_63ad88|3^9dT5YJp}DKlKPSeWDkJZAPp{xO4`*}myP;6=*OZd< z9v_3KnY|bcg855)+KcTTm^p~2U%C1AVZuf>Ou8}^046UyCP_&w_UGbb6JP|bhGnDo z$2L;2>lnDYLa2G<6U5W5ov(S_B3;|&zV8Su$V{Y#*$-M<<(&kfFPeAKt#3MRACZG$md12d$P?c*VHc5T5_RlVP*(iVCEX5wu=lx zFLU$@1n4Bsc=!gA=a8a@RpR(r!mtU2yk&1x?bQ&A!{>{k6t)@C#++z#1a&kqq-=O+ z2arz9KRj@P@|-I7Sfqu-)zhI3r*Q8Zl)bCgFS$ZWGL8uVAo5_D=y2WAxnHw5pv z;2XwuA`Hcc+W=&(Xih5kO|cSV+JqLlGs4l?1G;vJ%jQNre9vw<^S{vgOR-(Lmrb0v z4@L4OkwndTj-WrEuUa-K2r}L$n0}X`fC)B0kmucsXFoR+VWp+%XZ}6`fW+M;>$*?5 zJJl_rH=ZMwYp+ml0b$HKvd#f-qy9=Z>M|?}B|V`rwxr~d?}#WK5_Hh+X}(<=bJ7q* z4ghj~-nXJl8fQOO*6~fNJlb$*M@UosuuVO5dZj1-iRT|P1q?-GM|_Q*!y}I%41?vk zS0BJQwT}4+F{@oT!SgK$P-EWYx!?jPQ@~(Uh1Fn(hkk39Q_cVjIhR_3(Y7f4TWIa* zMd3{1W28t+yc4mGbXO?23NcbWiV??tCAR1Xu;^wPR^f7n^;zTo|D7*r_AmMtEyV zt-L_XE34M7r}0aCA|~X2$^dkJlK_=a*!ytr{Ke>MA_mR__!dsFmh4L;FumQpfHIPS zFgFVM4-iBvaD(}p&n<&Npk(LpjYzD{=Zt268t55_);}a?)0Be;G*nXd^t}hTHyv*a zBOp)OAoPW^rxW*}Z|f}c-ruecxCk%Ovl?HHoPoeF3)$H>oPL6Lr?X=6$E%3CKC-a; zVI0g}UXT|8>@xgi0gAeL%=6{m(GRiw7{w{ zlKcR>=4OB=;OP7mjNHi?*N;0~&maOzKA(EdR%g zT3@=XUL3%vr_0OPbf6u7_tk=vY|~UJ5;ii+dgO;Y39CE=L5soKYgx{Z1Z^ICWXy4f zG?Tp!W_t!a#W>=Lr782cIRd{#V+s)7en2u$cE>QefBReK$FSo|Kgq#C&TH!N44uCC zqlb)z2Oaz3jzvp|!CVPdNeP;t-Y@EsNE68{nan9oiq#G|8tHg;vr7E=Jqx?xyjy$CW8GnOIwGU&UKNZMjR(~o4EPiVSf{Se6 zFKhPDH9U+shpPz#Y9`B$Qe)~dow@-I<@?CeVP2JhR|#ex*h)#CfB+^wW}Paeeo1SL zFmndEY=up`>>OvzO_dv%xJ6PlpqouODO>3Z+X5EUP{Y5bA7(;Pr1rTA{;YT-ODglC z2b1xAAk>fs3=#dR>qZYk!Pw)2z-Mp40^l;nO(Yhk>aj?YJc{a+U+To2n6YzOXZ@|1v*$`WVPH!O(PmsLC?+5XX(4avVu4oRoJ_VK55?mM)c} zN#xk$BEi(qAE3XuG1Vr1?u%?I4*YZ#5q-g44Rk9b+b#G5uT&nObjpam{!hd9Inn%ybz}q&sg~I+B7>?^ zxe1es+4ubBABKki#zMOde#w#pnNyQ&1uE8%T7C*#t|8y>zqah}NN48lCP>5rDEE2n zo(tH*j8&kcOaILiNlK>ZmW0b!|N2_Y;qzvzX`F%N>px8s?G;@(+yA|WH>jL~QsdLY zojR}Uw4}Ju{WGeEXbkkkQr-qpX<9$r|6Qe=A;sx7_PjXl&5{!3k*$fo24Y0V!-Gfi zHBqW~_QvaR=c}pMthxjTV02%=P%BrK^0qw;1gL|=&f*8~q%RjGet~$W>pXxPpnnpi zI{ft&wb2WbwZtkbRRbqmM58s|Ghm<0@6MK*8@`AmTLBv8N8Gf z=0nnQqdV0w{H!z)P|~$RTrIJp#;m-2X7hTP2K6#oW&Kss+3#Xma$}CZcpt^|c@mOf zvO3QT{)~xVFQEGS)ib6qmTUeMh~g*&AP)mwxKnHvm)af(A(R2Fu*=+|!H3NGkcNqp zsMV|Wqlgutiv#(S|86IMYB0g3EWi#TK$>#)!RX&;T0AVqx#GL3y;61AXx^kne2Sx4 z267v@H|={yvz`HJ3Edf%%r=&zJbc77Kur_W4kaW3Nim z25`-?L}16o?OcSD&)FWn0P)9Q(`%L+MO8R@$S3gkTP8duBZ1L|ZSx25{ExULAk%qtQw{<6pplqFotWhFSaD`yjkkW#=W6q7nIC+DUaLo zC@<^0@7{LJ6_(Mv6bovk$SmL{(8WHUnRc8DZWJi@dGc&hLh3%Zl3p^eHz|7fkI7dj zA?j;N=um(hn45q75R@5TuN(biax$nGj4crlu1pV(ZZ7beJ31_IZ;=0fKTC=i+%SjN z2A@($R6XeaBY@+O1uFw5{kbW7O@0$(1hA6TEeG3ZfElVMr~pGqHLbGn3-G5h>zBy5 z+{D^bn_w6P$`K%+%mSB)juT-a%fEms{Bj}tUHdaC4rjXjpBmr`uGAX8+L_2+w)GWV zLKF~BM#J|_yJ`QqfkC3kj~+%7x&4ozZ!`v`WS+ZQrzBofCQ0J_E=A8u$RqZT3n_in zzUY~Fu5N8N;VVPuKrHcyR7EomR$ID*-g#pI>oB}663QfNyh47%iqz9+hZnGFLE$g2 zkbOTQ6_$C8KgPzpn;mD6KJm2eCo8%+$TUNh`r*56u=vg`Gf=YOkkOKlONt z_)SXx?g^-nK9U^vJTPLgV>!iDB3RFlhi#GaPf8WeE_#xY~dJW@%aU@7^b~MLC6FI*0 z?B=|u>dtvj2>%AOmOeSp=-d5CV5js^wTsfGmp)tC?Z07F9Z|rXD!X_bc_P zvE&a3mvlm0jFI+?fV<3>^hBjc&#L+C^soIfK%RAtGARVF1usHWR~vXj{{(n4xqief zt%01A-x?c_`ITS>kE$4Z)UWQ5JnBq8^9?ar=a(6lGQf-)^wzOP7luJmqGpD7pm74? zI-5_d?=U;lxN&Wkj4d2sq~4PxlzB8X%d9K7#$Zxem(FbCMDVJhy@I(Ul%+?xtrS=^ z76Oj0A>fB~HfZcZL?D$=OSRZraRfbsm@wXmMKCD}2u*#(D8f7{I{tCfpo<0MORbu! zrSjC}L)D~5?gRlfc4>lkLDCL$5haOtL2};ztbeh&Stx;IPY7}W?p4XBlhChmg~`wk zFhxjSo)~IF>wNnIX8q<<#ZuRkV&PW0+;|fgbv3@ zu28Yk&ioINGujiwr_N@g2DQ8TOIZoxdik7#kVQHj`Nbkf5;|=&9 zIG`{sT$h7`b0N}LzvYJ2J{h6c3_GJh1iAH;PK5K2FZf5+LiBDQG%inkg>z-bgIq2U z8JY(eaYMjJ77-64{{1aA*F#-sK8)-{aKDa#8$jVXD29 z4jK`s0H|vNK0JDsbrS)XtnZb7(hW=tt1LrTI=mM^EW>dl_!OFx(SsIU~B+w_<_(y+{4~<(wsvY3yFT@W)T_Ctt+RHYIr*2KC!MM~}OXS3Ui9A5T+GC4_wwYL(-fUI5O8gu{qQaB|MblK>4DRQrn`XGmst3J)5p7)_ z1@H>N`w6#q9hPdc*9Sb@5fvUcf0u*65?`O_aLf6egR{w1IR2C}b?)7La=HXrF8OYr zx`TXojcWsY?uwaHb39E}Khzt>&|j?@gs8P9TWnrdu1pYl*uHHEtvfYTC&gOV#l1cm z-8X2XfLg2mX}#UZxit`@5^#ZvO-*549~s24KF_jlc<~DOEVbD-*Z!O2tkXw&_qE-=`%IC9>|z?4ei-U@G}ZbMZyN;2H}_vD`b%}+;&1!$)&DK-7t>tu zGA{a3-K6TgNhrj7vaHMf)BeG=NvH9QuuDZy`m4wT*)-w#vmqdjSs(_q!hsm$$`E3dZI-MXQ5h6 zD(-8ui=FH?5-nxwXW*znsf? zY-YO6aM;uC(6vWXGbx>(X$8YMlvufZ6Pf3{R7U$d$1o7Y(%okfZG(k)c+O)`^&`gP zld)YLVw2~UfaRhNzL9J5N;<>RxeF^1#C6oO7)wKj;BJmFnsimiKbGpg&B!8`6Ff+h zxT`hPz7)#zl48cyY}3-QG@|qw&v($gQ@)fU{x)99d6#Tr$}$a6NDX{~GX+@3ubYz% z@H6=j8f^*gReLY7K2JnApOIwl-dxhwyG05!oF3-La{^^Je?op1l^i*IcSXoT zE-Xaq4(SiNt(;X*=SPR(iLbU_i-Z$_^)gq2B7RIhO{BHnrRF;3GPeKe+;H`tpTK&gEzVCBB&9o4ChMG|a;mkQW zn>I`|PP9T2*2m~I#<|r|JCUkFe#?@AqM2sv7w8fJ z^;7z{rUU)^d{&%fqe~%6=CoCLYLm9xxIeC%2V3RM>W@@ z@nksxL-ZdErt0gUd&oc7-bfqcd7QKouHgEb_wyh#$ zti9`131>y$p*2zCx!}Z>ROQdK?Pp+5cp97wgTk61_N*$?RYrAl=-C!?TMU%HufYBl zCmzj|7H`!kuyTceIu0VaE%yy`qwa>f9o?>s)7YE>OYHMqY@e^t-Ih7rgPqC-*}y`+ zHrwj$jrengQ2@}31bcw%h{hHGW*;6xHLcs~Fzi{$79znM`uf52e!<>{<3)t88 zq}O&zt^gQVK%tspyoDZIS7i}#wRwU3c|pQ=38VpAAS0N3yj>vZINouQG%dG3;^LC| zKrdH;+vLT!R=86RY$D~kW9VG^zSWy5`Y=WDO_}cXpg?NKR;6u&+f6eed&+Q(2ZnoT z`&nMxk;!=@HaFv|em&V0yC7#*FLI2!+%1gfBsZFNRzQxvF z+c+WlrV9*YpUrFMh+KN?B%j%0E_t}p#TfF(klCf!J+RN_VD-bXcf+3KH;R9TbY4!m z2{ypoUH@)KrMU&eY|LTYh}t1RbSB3>@u(x7IP8`9HO%@E^xpY#-p=G*yD_VUDk`V$ z3Hyn87C&?wtjarU=g%1Q$1w>6eoZ*JOm*sElpLW@Ac^B%uixX;+NV0Illfn#ls=kf zYHN!wD$ibXqQ+t@#qU<)-qtdi+c8(3O?{f!;=?Z#y%*?3@(!qswGcl@Ufho#o*lj4 z_*t#Kt}ReNbq^qjmt=TlI=z}Fw%Kma{56u$v3+!{tkyYA69tf>XWo^#YFqc`XdTK? zKN)B1RmKN^N1;S7Pj03;By5;LpVLGnC0h7v-4`9R+YfpH6u2p(KGzFJdtE-?a zbKmK;;CkXY8#lQmWW3JqLO9n>$G-0I)ti){VOJ?z2mx&|{gr{S*G0ST)IZ)04If7; z4a@FBvP>J!RbI2PdgW+|&6+HX<@GrIRi<(Kt+{>$+}s8>^RQIKM7U8);Z|O4?TZFP zm5R>H1L)rejk3~fd~=d$Tav(9ri!kSpzz+%mh#Y*%cHJXd4gSN5||kcR=7;g>D#z_ zJ+fAsHBTT2k$WsvFWqy}uFe9?k3K6m1-BVj7U34}`K2Z`4T-*y=Z|Z%^l*WDa+}o- z#kqoxD{OctvRStp*guv1Rvd4-pjt~9oAWwo>}ASw@5lGjaqHrkL|#I1&SdD+spIL* zYXsL2&%~$IC|Ys_kJvhs{#rszn9QtCTUD+9RoRN`11tpKHC`iB<3<1sxzSp87}muQ zWx~0bOYg6uqiXNkQ^-k1|Dy+VFK!?WTd5aragmoiaSq6lyQid{`FtJs-9)(PqKb%D z9H_;05b5!*tg5J?dpg7XPmb6|GB@u_t;Tcmn~TD-GguE?GQD>HgS$((IcEmwsxoyE z$q2z~>g?*VGkZ(91EMS6$t7tdI5Bj9Tr zQ@TH;g}E1Dv1YTl}2x4 z4{Rb-w34O0>-=AS-jw{|6mIgJk;9XThxl`u1isl;_X!f8ms}|GJ2xS6LhC2gBc`#Y zW1)NbYE^(Cn_B9r^K;fRa}4hucZs10JA%2$da90tz@{Q5dd%Tw%p}7aoiY41hqbch zk?CSIYKd0{I)xY9cIVWAD6~iD5>luJJ@a z?Z2S9KY^<%7+yGPf$y2+Yd^U$lAHIs*?gXgP#|M!_SEmb^R4#N(FxZG(m8!E66u-+ zF0%yc@u}Z-TwO(;UOvEic}hDLSbGSog9+ja=+ZX_p6Z3pH1kDvFezk(7B@IV@~1;} zbW*|M?;B0Ll9ojCLZqaueZ>|`j46rzslI{0c<2)M5>5L+Kx~+RZ3n6%*xdmtPza;s z==b~CyIq-PU?c6+2Vo!0Kh`EHSl)f{=}bV!vCNUwL;8pNrx|6(vEmluy~Ehj9jcBQ z0e|M5yilV*>JuvQ3s?W|5tlhrV+>>`riJbrLwm9=96dA@5sJ*&tJ@de9jQgG)sxX=^li@0*A+7yP z$oNNo_BbcVRMI8_%@rpyn;NFontOdoQ|!m(kngFftWGwMH-R_wqP1%O00HlHG{61T zp{bGE)v{nCpe--17?7~wc(i}2OLMAyw?3G@Bzq}H1?9PXN*&H;n`W0h29GiRHdc0P z#*HMI%2+R?yxw+r4qgdQ?$_Vq8a^c;n93^2-xi-JzMNdiZ-g`#+@)3?9}6&&gH=U2mEGO?}UFwrRsFoIsyk4@Put ze@S6?re9tjCC_Cid10al^zIm(NTbOXXS6RjCRFm^D=3@`@{mnfelF8DH(5Dx{*u*- zNgH_^zX}ph7uD+%rUqunTz~bwxa>@o{u78;-4BN>58c|O+RTHTe}ere_Oj{kAkO8N zOdq{UwT5UF9=RZ??E9N4ow^*I>t5dd2!{DLNc#lo^S4D^n$GJT zxX_y#Sf5;Zq>p5m<4JzgPr8Xbj7C3WS@ZeZ{&Gb;vi#q!+h@)*BiR*C8~EHX=*lZX zgyo^BG#>u? zIY9dYQayR_McOkwB5-q0Bj;G}RlU19e-8H5@-Z-*OJ#$dCI8?b40Nz(4am}hVmH^P z!)8$&sh{s2eSmw0b5ENa@tn*7k`^dY9^G~>lTI!$|*b(~FI`A8`veZ^!9cQoQ)@7I+urAqM_vUsn_k_W!}r z^jIZzPC6;YehC>wun$XRRf1 zH5KOq(p4v4Z|zLgI_L4+RI?xeWav+eclf+D?9>Y=%$skWs@BC&=X_k1s?2RLP4?Ul zKZQ=sdyK0p2RKNQyEu+4@*>Y1vJs)?7EvtSa6_kr7v4%U-x$ar6}@@*n7=?lLIy>FW{F+oK9*dPwMa_RTcExmmdi){PrYCz4Xm+wK; z;r_FUAiVoT(7Sj_AJWNj!K}noX>xW>D{8`i9CO7UJcGQuOD|9Kj5_78bo>M^x|FnM&4ePEOi!#2Ed*|Ss~i-`h2n6Wjc|x0#`XASGAV@fofjeDQfv_j9GatA^$o7;IJYI4jatW~mj<;i-t|;n7lPv`Arj9a<66mDSQ}u6)Vjfj<$G9 z6W^7D`(0u1df@Bxnj;8;@L|Ee*+&;Erz1WNWY5ei${_nxG;&4X|>MfX_^{x7iQeIB@ zSgCDn3j<3P+OoZDIlA@Ba@KGo{e1K|xh!>0L#kit^ec+Z`!Zh+4Kxe)buOM0xo2J= zV7ROEiYGMH?*)jR_wb9S+TE@Fg&koQHh#jPC{|e$`N7xR*T>8$vHwNhC*O;=QJ=c+ zft7qTbJ18_(aS2fNQo_mCWg>$CVy4uO~y+B?c(_k&6VcLv6k%gOxbXVBYPb^Ggi|p zo#YwhnRTfoS6={OaZqiF)wV zjpyiVm;jj;Us2d&+T5z~;E?T}r)M=v4i*B&_tG#DsvK6r&jb>@t=MdP7mQHd&^Z4Q zE8=olmh@d;?k}oewhxv*bTx)nogc3oo z#_TP6B5wLQ7{02UBonpkGFdns{r0=kw1;#PEl$`%@mRw>KaP;m@`?u`Clph-r?a_% ztr)Vq^krQ3GM=^r$erYJXVW>Be?8))+zwgA8ZOD)NQ-W06UV4|1y`{@EWN!L-B?)u zRo1(noZW0p@1~z=1lX}RO(YhNA?kGTbjAV@21Xc?r9bZ=DjIf-0Lnw!9ll!L#Ewi4_^=yjuT; z>0@PI!OR(|aP3a;{oGwut+npzd-C}M4ZHoR{Q4@?eq7}PFK2}qG-!>`9Nxj7`9k3P z3_G0rS{9#@cvRR#r*!x$8`C{-{)BVw%$>uBU5U)Hm)Y!rsw8rRA3V~3x3Di9Iu2}9^%CJ|Euz{v~}98#FX?W^~CTtnK3_@mLyCw zycf8C*C!6!>t%gvvDcZ6dqlr&m?mF=1qnJ|1rofhCSRQX5@z?UUySzdESTpg^Z4&9 z^Y*bsveQc1-Ddm!iWO15H(j-B#`T%nSB9Oo1)qyP{RWP;IZtq`&A9MX*-x2O)W>c1 zNa4qP?SjpEQsi0wP93)d^gKPdwbz~!3|GtbPYFo$tDuYD>?yu9z|uyDy@)iW&x81S z@(@*42*F_+MD-CZ^-Ie{H^HOX*!_kJ`2q=5HkM+0_v%jS)}ykOtSU|r?nT~-E@jg6 z-Yytz+!v(fB>k*f+-Fbuy;X0Azounl9E$Kp+<+X0-V&^{=Y$_KmR{`yF;Roxh$6hX z&KY)wPo2~KntQi~FnUyRe38Ah=ov!~ob=kj!wqJMBF2W>*bsxWFI>x?CX(w0{lEUS zYnz1}{L;GnUNV19^@*&x!PCCr^6y!wCl^>1}hjb1)h*?{m(47JvUXc~b~ux_QWK(a0&^%Ochv+;AU%ZDp0;m}|O9 zQkB&xj^O(FQ72o6vse?Xq9KSxC}_Mc#Y5@dGe=yS=_8NX(&ykCdW-tre{LCO2+Mx| zY>OyP^Y_CcGVe#-Z}H_uWQVsGFR@GLl5B+x)b#@H5RRZ1ve?zhv7d~lZ&1B|{-*NK zo|1lk1e|%f>+iO}=WD_6YYJ0fSqe{ksOO@58dgaMyAzUkYDP_#+_n=Nke~L#f>uyx zRszb75YpHUdRwY+-QuQVaaQ-iB~=#?I^&uM+cb&(6+T@w>lF4Ocz(mu-jUEaSCXqSt|h;R=@YxSl$SV}9S2 zer5rE8rJw)yn$*AXk<#xMc`H-t3gS=$P zkMfV4(q6F|v6#>iO0;tXY0y>pQK)l*KAts~e>KyY!R@WFhtGz+FF+L1uAbBW`hHNC zyy8Ch@hExM5MLUI5v@#14H(P*kzD>B(%1Q2{{g*HUHzqju80q<%w81Cowk(oe6ybq2;-uNI72B zV!G{icHnF1erXfjZn>0V+4gp<7POy&rUk#sJS8Er|OYuJ7WkLLg1 z)v#rqhwM;?(@8GE@G7}}YfpQtt^FbJ zAKnz3jUN^gr&w>JLu$hHi?iHjK>EkmBn#g9GT~gmNLu+I5*cK!&gg4xH_3?9ii)ft zD$VoM6L!sH-XTs^&g`wPo%N<4_0e2=U-l+Ya)fK_afOvkVw4=j82>e|n7%!IQ>Da& zGufWWRT58&uNF3XSYnQl3ybUOx8!xDfq_UagM0CO_Q{3L@ryiH)*Gm2Q6SW5STeMk zbMV03M~vc?vtFcwjr&!rH`lgQg23u1iOL;zg7SJww@;Qi616(L%il(VPi zVxdge2FpYOfV8i56m4ny!YF+Kn>mBLM0GwtZs!BZX=s!qa%;I(te%uFha28=52n`D z;5X+sC0BG22FM28NX8XIWSeD~LL7&=6n+ZH(A zP3Be}r};ZcS-d`C*B)Er>!UM;pZPVvjhCOaDjxe0t!8HIa!guf1U7zmmjilGmvWpQ zOZ?7903y!NOSj7ccc75U_Yf=3wDAyMqJM!kFX~#Kb>GZRc5#x*pu^I_&08^_-lB9X zz3VE{+%$>Dg}?s+>A@!Vppq#^DQO$z@@4N!t!}K|raT)tdxY7iU#p z?CDVGFSGp}2-)It6M$!@Oe#tgz3rNtxgD6Q`5=DIrsGwVo4`>oA4t5 zrOP74HId{u_GT-)5L1Ex9OfGO)lRIWG+4hDu-_iyHC>;NVPxa@z^rG>_YM&UbRuyqxYRgNv>vIIrGy z$UoJU z;!^0ltNm@FRk8I2O^?ch=Arb-v7+)ktey+uqo*2S?C_{5P)H7?YWT)ZOw}Vd z6IdTynEHpdoWN4P0i=KKpr7Sk<$vNtmfm5MPj#73O?a#KJjY8!ocA#Kqu`6FIp#tU zwlEg3&54JW?RJFyZ>I-}?B^BeFLU7fm1Z<@9*y^dE9tJu=*QAhc(KI7Fel6xaK-r?EKz7p0TbEZ&>_-RP1a&6(gb-Y;eagW_A%lE=#d$?jFLBYu@QjggyYMbD>Sn*- z&)YvZdK8yCc+GSswt~KSWTeL~D@0;FV{r;;4+9qytaolP=lg;14jGd8_5mO6ql=-O z)v~`@{APQkY(j>d6D~R2;`A}=h|o+BS>59^=Wr&A$T$-QPu$hZ3*1WvF+c zx@tc?F!T-gydUIe_N6eEZlBhMhsG2^qftWIo9c#gQ8z7SzBdr9%{X)fyC(kvTI<@+ zWP#b&L?vF6p~$#7-kGIGq4aHk5=vkIbC{ZND}urY1-y{Dw|!ud{-b?v z_0sNJ4;>vgryHwTaix8c*wgAylVxMoXy83s1~EVAQ@Fd%_witE4U4HUm<>~Kf(pO6 z6)fA;i8fZ@xs<+64+INBP(LM*7EO|3cj^M2b*IFSF#L3<9=}N6Z)Ak%VaIsvAXj#} z9h0lJpmz!s^vVl)P?PJr%0%zAHN_qk7U?SibwU_bIpoxdFnAVg@W*e7+)&;%+)bD& zsr7LSAwklWfv!H6ppXD{tg6ol`amEA3Hh{tsWPts>5PpwNv%ljdjDkGkWlk4X?EvN zpuzb2ue_cEhaaR)KX|#>f0=oU8rT0~QDBVy0mJ(*czlRiJ^xL~%29+IIgBXHR4^UW z^-TUE0_yxB6KdmScspk_G_$jr>Fqtbg}$-4xJ@k&J3K?AcJ4rAY{ysNa+>w$kPtig zjtcl!Lg@-&Rl0}~x@B(WBT{r-K-1V&vzLJC%VMARGBNRb|CrS*#Yi$l}&OE7m zb%ET{Y8_GK+P7|@8ASmkZbpP`o8$%-U8J*wzQSWa9lf@cff+9S$!THyY^)@03ab-; zoA>2u2uo_<#l*_F2T1Cgiak)4+H-j+wpLmr86E<K}8a20{}VfUD(sV5)K8ZU&kw!v*q3%I+Q*Lo z-8d1{_X%--vhlA|S)u&FNvzd#4+iKwdslC_)qD3>p4&QXpYwi|O{O>;pZb|ad{!4N zJpMZYH=kz2I;g_?rsKdSw!*~n>4j?C1@Hf~I|>h7tM-9-aCs^^&0*aB0*;3?a?ngv zFZ%G21V4GXD`?;NStvmHat|2>?wP${F(G7s8RP89#TyZ2-=HYY+ntBy*HIxE!4kZi zzRp^xSN!m2d1TA$b9wNvfx$8rmoane9=a*tg))#|v17`l z-rBFH>%WtH=kV978lQQue=p#kkxbLOF>~UDL~rm~;?-4HBr*uYNs&Iz`u{7w22k-S ztj3AWPj3rX{RaF1dqmp9Br)P&{0UB$7S;fFz#= zn$Ll?@e&^~s{AZyyv6t+3GHn^?oh=16==l4A(HQp_Hd6de?~+#NQ{);2a{~cSr?n& z7)!~6-#`-^dp%e4P{|L1iz|)LmJLmKb={Cf$U;YBDQ`q`sLUW*WzP@SAhOI3c^uO`PnjYE45j9|Dkd zuj!LVFp=l3Hpu(>qrXpGZQcpZw{C5BAr=Ht+kMXXOF#+>aAc}O{3MEIutuSV;nn5cP8)ra% zev*6aaI|vx)4eZ=`scnv3$RbM@AI=U0GG&9qqy_uonW(7SL=Xkpk)Xx1sm^;@6kQi zuO~-`$%m(N)8X?jMnX;gR^i@G45SB8FY15dB*#1#XcN4gR`E1rDzL|qb=48hS!*?{ z;;(02yW`VFGp4`pd5n`nCw?<>`>_3NF7I!<(A&8|D$nfi^~6t6bF3?LH})Y5&DV9< z(H9LC^ia2Ngi#s^>rhb7+5@a}IZRO0mK78E2!v&9@V~6f;@3p+3@L(qyYH3+%uF5H zFzZY4UIfZMjx!~|;N{?|-2NGGGHl{)(&C~8I2-UC@;5EBsvRBmzH`b+dc`)~JV!$n zgs`&KcVQ=U2Dv5=5P>3rH`NLJUG32p5pe>)*kSWCCYkSc-J789u~CEnp9MhAkSUSY z!lngfc@ACww0-Kna^s? zrt|ow5=%BtysGzfvWwe`lVU@toEip{?g{VX$E~U6S}*X6I#?8M0ajtO~ObzRn&_$ zc69!X%b8t;uvfEAQ^Jjn(8+%PQT^f>5D>Cj6ArBFZuZf3YK#vGNU5qcBMb7tDE*^o zzVH_Q7ebDwTW8Qx=~5-2MLT$jyO0v^xwxJyKYc73svege=?c9Xil)dEnQHt+?rCuD zP$gPGCyKGLQUOz#FdxA;>|_p>0pg-kvpplp$@F;wC{Dg}cR>hXNnN_DFl6guPoN9= z)AR3s1}HL}bHljQzeYCdj%kPQ`otRSz8ejOGsSFo{`GNQK31(`mpkNqtC7sRVvWw7 zN)=wq#y#?}f?#d5frpju>RlyD#rncZ!YD>2)xm;qIDqaIWq1;&{GfCb$$OXL^M{nB z`3-YSn*pJZVH43QV&di)D9uS;UH_(-8{)wA> zDxRgK5703)nNo}%0twUop1h*VaG4vwysR>Oi1*v-I3N- zTEwt?MPxdYYE-hb(JMM80o=02^O?@0-|JH5fjoAG6M$ixDjzR4xa~!=cq>Nl{2LUIoGl3Aq-23@TD8s1OMHxB7hWts zd=o<{ln8B*B=TCjWZeE*eSw&4Xj})@ol4YwMLVyj5+BFVJODElG&bSz>N=zmNQ~J5`W_B~xeRb8QZ#nM%OyAz+ofkOi@5*&;TlJ8+RmN;~ z11826+MljTmq@Z|l_9%st-LBux&R+fd*jUcO{cYou}@!8tJ?7kyI|ofQvQMuxkmjc zDFynHD!Q`vN?G0?QCHqzd@=DRByWa~lwxRVuD4;F=#z7Ix+;?RjDU@}?Z=zfhL~%u z+{BW$ktS?RY&HA+j_JOekqJV^%M)EriM#hu`tRonjS{mUg449^r&TWk}q{K>(d+`YBC?-7ZVAxs)g9G1DFUQCye7GoP z^sC^eqC&?f&+;5f0Fwhd4NT^FLea)35-ucR*hJij8)^0AdsgOmtGU%^hoU|`VP2Kl zcTzWu-KF*i>b%8h$t>=$>}L(AY(hy|eo14lYxG3{Oqn1|Y)FsG`#XBmV6-QRnpO@? zl*-dGCt1YoU)2V9GNacpHWbvmnSeK7Z*ZF+u_A@a8%uOjfaHg~Y$xi3+FC)DK82Uw;C&%p5kfYxeuzE-mTRVBkhE4VH&6$@bUZau11hN`Sn%*Ip0{GE$+r#1uW+c-MLe2Is{4_t`tz z<&4n?U^#1@O^Y2{YS#%JZAX>W4!exSyFs_8e~e>SGn-b7~Wz*>n~szq=33gJF)~21_(!D4$RBTBda_t z7!-HNbl4C}ZAB#gG5B0BKXuAfkxH(E^xIvzI^({MB`162yZ+IMP2Yjd9WoyBb#9bRx2o0Ey(XQyHc4XH24kE__$mJ07r57;X{3jfj>%WBHA5@%|HD<=GfEoLO5Yiq{;?*G4hqrqsb-aEy)_y;if* z5h+|$uYJC|lQ-W$VNxyIDCUW$SRmjoU9h3A)?IM0SfB_43gnRxGH*Hx?m|u{)w~qj z(_UoMoNF9K{>1{!i8g8_>O9q7o4c0vV)d*zJ{Bj)l@^|^OD9@!Bk9We5(X!I-QS7e z@K!D3;`Wtlam4e{i@*0hEO<9^KXECltF1S-%1`bQ$z1<{Q>PA*x7Ys9zQQaX&E1`f zlnC`J0jkk{6-txFEal0zAYZo+ysKF8M8rjc6=h92%PRITxWJ&GbJK$P5qti?PSIwk zJm>Sf?&SA|>UufV7$!KcDJ9qW#R~g?Fb@WD;|z1Q?(l)VSZf1I9WS>chLHT9VT`Xn zfbw=Nn7?tYXcHK#^Wc-<(einj0+xawUW>kAA!DOF2xNdXu+B<+iK{PV;7d|rj*=7W?bJ9?CoMY#C=zn?Y5yu&6vpY1esai zt>kFi+{=4h`*PLV!At6zyZyJQP6y8B!q|JZTL=vav}3q8BmJz9l1!mJSV=ki-7qf~ z0W6$@GAI3h)65v=?ve!y+fN9-G^xBi?qtY@JPyUeMds;7ea?woN37-iLphFAiw(OLR9|cJvz3#BJ>Lp z!1-*AO6ib^@PEIrUOS=n{rn+3L2sVo@xBk=0J24PdE?@`q8ig+UGaT&udD~lM$(&q zZ%9H-(?JaibkG4u%3Lqm)l#GRrh*?2C_1P4bF5+)2;}sgadzimC9ECH1J0HH7I%uZx&YA@=!@vd6b^CB&cR z%PN(>5YHLcB6XE%Ze}wGTDjKrZ@poSxbJdm@-{^8P5?Lb8bp79f$|_SfAr#VJ^4ic zl~_h+774iXbLm0FzT^)jT z?&afe?df+zpl8DzdMPo~59bH0`f_Pt?`RjnrNR}<^)M~|)WI=1kWpkrUoFarNoM*u z%Q6xs{!7j5J6Ct+@-BiP@h$NNp>P$OsC~$18^V|d@BD>K4qI|5T_0U(4AnK#`bWxOM z%;o0~-bN;Gxm2B+9p-Bm;N(Ad zPh7=Jr}oJN$f?5hYhS7cX??{KXDw||_=Rb7ZKM6&nFCYvsM zWRL8Sz4zXG@0690N;VBb2&rUNga|2=k*tvMJ70ai_kDlA|Nj2IkHh;oI(p|dp0DTg z9FOxn9}i0mBV$HQqVnQ#K(P}O3+RSo8Qx4k1zyPWPm^74V=Ghf$WpJF+EXg>IO*8} zF`tWIdFsu7xv z<+_CGh?MZx;IId0}!faF#W zl_|d-3pRA(DIC%g0e#C049si1EDp8AuPuN8>gx@U`pIl z%2Cd?H(*1K{{b`iC?l5nht@l7g*Aa8b5O_xli;hnsKHv`HkKg*|1Z2IN+_0I6WB}hF}J$Oe(Bj=fA-m9h$Bt#iajS4 zB^xU@3$s#gV(Qub&P0g*OY6$yDnYQuQAoLB_}~m*SYvlr!f0TV_!56P7i-A~ zTF82o#1#kEAw<;r_2tD%Z0Y0s2(i$kR_TJioUrfEc`LyC*p2FbIngL0E{@P0+xL&I z;zs=M z`)WXB*o_KgFCO&80%eXp&KgDnE(^48#Oy=G<=x1%$J55jMLN&+-w7hNH#60m6O_w5 zbFh}n>sk-Jdr7au=;&3?_j-o4@1O|A%I7ClpP?}bgJt||sY}o>d2``fh4~J!VV);O zI8#NI)Go;9m52i^-%{;s)FS~7`id(@U?NeE^mfO%%E#lKSdk>`8Q)_@?T{OD80?qF zR-F-x;CIipKUk~2t{3lgo)~Erh+YEofRJQ+i*C0$qs4%SixWrjs;ov7vN4+CjTEI! zxr53TV|R0^N5*5fDzLy!8_t&chGH_j1$sp*Oe$x0?#o_cZhc}o`#513IQ~5BGsIeJ zL|>vQTGnP*o-*>$a(~%J=)acJ@B&6FPL6>aP3rDQBq<01j{0EQq3L`PnA8Q4P&mXC zDR|676X6P3RQ}3Q68%mdsY)A4 z@@&S_9VBn5F!rNCl6x2*g(M5mB-qp(%bgqa0}n zy;MBE1#rfteAIGD#j2f0=+>S+Nr_d#QTYdI%QWf@-~Q7cx4gL<&cdfkYk6&{yIhMp z;J7-*|OD*8p6KfEnqWI0E$t@4q+ zJJs*selNl$+{|;{TiO(FO3)ZB)nIW+48Xv-4`fe|L+m_C44%wLYEu5*&*qR4R}|bz z(MTGi+q@y0rRxMFTs+~s-7MQYg-%*j%FF6cnYG~lWzh2WU_XEQmL=O2GGIeYweteT z9T&(#>k?8jE~n#NhlYT3YN7HUp?)?&m@r@pNzm0sV!og}gY-BY4)ag%7~nW@;I6a4 zJ=HgQkDCLMBT5}jf(cX}H*+iQ&Sc$#?O(H23Vv-}I?K-|ju0_q2LRiyRw_}lqEk}w zw8s!?Qg7PITasvm3FWs^dVDML48)n*VzIzxUF6yELvGNb-nCnC4ixFV93(+sF!Dc- zfC$=>)+p*%4neQ(Ke86WQEqGV3Kn*y3y;Jy%`7GB``Nr^8U zUN@)}Q2|G}^$^TzKlS=EnRTdL`W;ozJb%`8wc)Vm*CU^72>RH3jyDWmxrTDJ+-V4jDpC9H^w;RvkaDpG2^H?pFJ=|CnKzXq+neJ=Wo8Q21;l5Eg*nbqqy^W z=v~VYdAZ2x_BWa4z!PXcl-)x*{>+|KTFb~o8VDkLUtgd+hwu57!#CsWj{AGQ9PAy@ za&~@Tvl*g>>vzR5hc1Wh6u5kjfR91w`v-b`o^%RV$(Lgqq{UyIH=x#U2g4={hZy1P zHdBal3+&Z;Rib&55#4aK^5LOQ1W!#NQ8XTQk^pCd#U$J!zVX-ORg z7dsO-)`r7z76}Joo^W)S9ekvj*SdkXKxx*g6U!$+^vdBW7D5t4DnJ|#sJTK^ zgASTS5)hGBFPQi79e{hfe zW7_|(zh|O`7e4cWvMT96|LQ-#g1e5S^|wp(Wr1t;-*5l>`xwNn{_lm6RsG+kK!G#) z-(B&)2jqWk#eW@yxc^;>|6Pjzb*BF!hW|ftg+|lQTLf5DBm=QU~3Gt%YFZe(Bk<;UN+>?l|&O{NE8GUfOGsvRfVl^RGG&#_E0Q z{Z&ERvf_46**%^MX#6jCnz%>`e*ks!-=S}od)r%|fOURKGuL11EwJBPZsKS8ZH4|> zcL$Ef+kTDmw~WzRqT!dn-!wR#T+hRfR#1%!e8_uPSI~ijtCk-E@vEQK9Y76=oSW*n z5jOCva(Fo&#{ecQb5Q0xj}kH;0Ed88UC}-e>*hN@DB$?yuKPV6*M^;&L2xGD;WIhi z^_aqCfWU`K<5ig1$WiGKw9R0c?|2eI_`Qtu0Q~5Xxh?}3OyS*#1MWZ6=>2Bw;<^5g4XhSR$`Ki6L_Qh20pb*U)X7wmqfr$@2Vz$Yjcc+2m1c;#rG(`q)nsZI) zSu-~P*YDCZlBys?U2KTzm%a=M^8o;*Z(qFdx;LbdG~}B$u~ZT|WZhGztDPQVwiJ(Z zM*~m3UQe50zyTsS9`J0_jbRtkhPH18P=axrY%^H%B!`#NLfwB?sA`SyYW7ZrffQF! zCwHPT`MJx(ll>|gbk6X73g}qsd5ELEuygePh<9OTkV5)RDKSh}R+ zay>3;PW;?q0{tCtX#cMbG#y!5Z{N~>+(+Yzwp2CZ=Y{kkaf~tk5lI>*!O*Vz5m)^p zU46$v)nh9Q=!r;~R(2`Es?p~w-y~;`O{C?dZbi-G5QRHAZ%afPr0WgW3Wd6N-Q>A*4&#=u z#Y-JAHNdca|3W#LLh`^(<5nQ@w6x0x?q|7rE!`KN5fMJIsOt%;bwD$(&FT#l$M}Sa+h}dk(X2x~Rvai) zl9`V?)c|pZi#lY#_vd34gd<)Egc;P-26}{Aehl$W#mv8eK)y#o6~w!6!(=9bGye1f zT&;|1d==$ffa7oRC?WLw{N4S?yi0Ym6hFEOu)0=|7t2SZq*ohapF@e@Y*}WCTJYti z7C7I7AjHPZ+?o`t@9sj3vU9Nn*^~iNuAHqit*}Cc>OlNGW|lx{Iz&~du82IWFvbmrAr_%V z&l?tY=;BoNmi|;3#^Os8K+zbg zlluOM9f7V0rDfl7{DOWpht-M3#ROW9H=B(GGM6$HL>ZPVp3XpHrN~RQb7Uxcd25ip z@%8=$*m!r7=<|DbdfyD$etiI{DWhjE3inzQ%fi4(z!TLgKz)onEfu%rCqDFINvOx; zQ5D((M90efPhuE_g5Q_+-w4B-qC}mHBPDULW~Ik>6}&6h^QVir<2kWL2a5y1mhPr=jDeiDGAAlT}H=$Pmu+f%! z8QRC;^sv^6iE5M>M|2WT;{M5Iw9Jd96QB;_yC)K{X3l zm-B7SS_QUK)vv{*3k^t|i`zPzQ34QVFhfXFF0b0`=q*p*F=Z6eagTA`(W6=32z1sPxa2o&1SB zanzRG`B?br{13FkHcxd;PBjO;|3^Lk;u#n-erPdfo=1XE?al9~+qn8cTXrFG92?TF zq5E?=3JJsE5Z_d^Av;|%g_S`i()lD^ZC9Y`WT7Cvi4;pr{l|=aKWHBMBBJxnZ(g=* zv7tktB(+>p`Y;rL9jxvdI(v zI2$Yz%P7JV4h%d`x}Q8UQ)CEPW)74PC^!~$Wn~am=%lNu-l@!ZiZJD0Bm1x;tANKz z>gIC}rdJs9D7pthyCPr=W0}O!J_noE5mBMXS{u;EfttkLSbc>3@kZj67B2eUw*ov! zGra?x32_O-#b|91eCb*50($S<#Gh3el1Sm3RO*qMky7ZpYc!_i)?L9I^~uK|*%9O3 z2clyG&^%KrQTcK=i|3jlT5S%IFv>@YJe690Hdm9|Z=MMOba@b{qE`-LEm7H6HAE~) zy|hx;OkqyEb3ffz7@<1XTsE5yA?a!KiXr5{w319XkWvdW8H=Uv2z~+qrweF~lAs&0 z1hi(uKrF&40CuE=jr-zdmz%^^Q`=hZps@A}dg_J%sGr4VgGtFjs%83WRmHckINEf) z7tpfN>}RERffcu4LH$!<4&47`C##eiLSko`N`8e>@@T}IKy)AzpQfz2$$OqKxFNBR z^qD>T~=8aDcwwhr?kuO0Iq$sGt=tWJvSB=&QB^q=<$E?=Gu{=mR6=G9-9J82TONx}xR9lhIakEQZ(J znqyBJ=B*(6lRs-2cMEg4<{2r1NGPP*6yM&S^bW(8EBf-P?|e9&9_v=*3MjHh=|OEg5@OE47CP3N_KP~xc(3s zs)Mo9=mzc%P|h3xQ(GjhH3+JVzn%$uHw42`D|Se5H>XS9yW$=KN+<)+5Z@+~P?JTW zC7ONJ_4D@%QUHCjBvBJ2AS<6%mUmHm^e6c{S;Ov({F*qu^Ztfp6z!m}&_5veca{FN z!a?*6TbNN0MY;otMbzHN!`36fPp=WxH7ej^9;vsw!Wl%0SnTF&>0o|dxFm|m27qV> z4SAm?(7h>$*w!Km)+qpO<`R8)7_9B#!chqF3PD1`M@b)RgQZ z=X;JM4T9J`WQJ}JWiu{A_L$n{W-p^R^Vr>U^R#E|b8@Y9G>cC_i|-X86v1K0C)%Hm z%-MxOs;~&(PfYH-GsdKf6eOG#{C*#Z-sWRD9DzV&xk=qfYOWo|(H&rJOAzTyG5`Dw zVxbmYBk(v-YYa z5AvKPtWlFR+6gdM(HaaRkkdCVJhbZWMOvH}p%lHouc*)UvS{)JSY3XjT?kvW`%!pYw2 zEC0kCu&8;a1_)&fm8nd7`TUY|Gno!RTCKne9H6@~`A3E1gJ6ZAgptkeddC8Lstbw7 zNOvo7NTg+32f~OZ-RA2yOqo|{`9imD2{crVBhSp3b<5i@%7WmlWWDDzsYZ4s0#wLuMbGhAdK!s zgv%r857MZD$x9)z# z+*g)WY+(K4|DhSnC4y=%dQIn>w%`mw>LUu=rQbpDou$v8&Rd$=kF=n$tYzRLZ77gC z-dm`?qrEx-8II-ohrn)Id$mBq;Gc8{XclDow;$UbRgP7iwRFOn4A>Atn5X7e@FWo!Nao zUNA8-mUPxqN%XA(?e(>NM6(wcj)v%w6v?${$?F@Izf*7qGz%JFlD~Fe+9@B(ucZfK z!AHgvp_}(=8?Kw4VHl8!io`JMU81CUWUP4(i|S;8)&WIkYBW{}{JiaL)FCqeBB)$h z>{Kb3t2OmyES7XX>_KbaGOp;yNyU#0ZHJ{b58fqy-z+r+>FPKl0^+? zoBV`Q{$-G%#l2Q24q!c7t_Dw1=(LB(f~^$)byA%IOL@4TH>{Y&adic}hC#5#330S0 zQh_2I2vMuX)PVEgCpuGTEZU)Kk?+vN{PGR0XZ!rn(}m%#Xo5SAQo{)aj%D~N=D(iu zU;jHu)mZE@C4}16WF84eAMC6wg-OFSv9;rudG`oHE%lZ*-8Zh$j~Iyq=m;Cvw2O2r z8Wu2~wW^p&gzy*IM;$+3 zNTL#(OFbk8T@IZPqZu9*-Edrj+2)=K*l`L>;b_pMu@q^${{4e#oNb>s6c<~bj8%+& zvCGxs5paEXf;7{I@)K%79eMHN{xl*%Rvuw}Ztdw@$2_m_+}}CnN_bJv({Lp^1!U4VW}6eC+79BWEc9n{)+(eO}(? za1kLumG7Y;XP(=Nrvyn)jYyrkmPgQC(Dn<8WCPjdz+!&*{QE!6^qE zA{mD$A|Si!a&#WG2CUkCfB)hxJCQ}hti;7)AEu5E86a;J(!rlzxAJ3u-NQUYwYNmh z>JLbhjnJS#V@&Z(z02B=R~<)LKOl#N>hW?XhrS{n(-MwiO-M_iZ#n=M0=bSW%nGH_ zNPJ7`(yIEO0$Uu85c@_WiX)mRTYY1uYnwL}8^XRUcUj0Def|!seUwrAf7fVr!EHE{ z3+=4Wn4|o{EB(Znkn--wi&ZKA{ZF9si<3hRS7wAh-~UtIg^HenAMQ?B)xWadf7gu( zAXC_1<{Z<<eOCS&F1&GziomZB2%)Cwk^;*cgYnQo&LlZA?Xc7ZOMVe&|{U zeOXT!%6$IaMOFpZUiBJV6;%x8MT(RDz6%jP`2*hOX`h6T%}D(|4p)guGLD}TsWN^K z3!qJ-mj`;bN>Lx1!;t(G6P<Yoa;@Z6m`lI~(KOnbqydHVvkIW$!ToWOXgXC`!;?2u!#5jX?7?{k5 z+z^}<;*J_Zf$A|N{(xW9ef`F`GjXT(-f#^1f)2r>(($T%og z_vO74xfxeVp>&{%`Nj~<8$=n`xZWfZw zK}akXJ$+}t8r_*2xxy}wsH$iSVDZ79$I7;`$ZMV%H4bcV7MQbnrp`?ahJ4ZOg;IHz-2 z5O2w2)ryS60f}^gb0N0T44pXB$iQnxeN96*(IN~v2_hp(TG!(@(nqxD=kJ4@-@OHA z%y>y}wG8Lb$y*X@PjB!D!Lbkt+OX~s zfmF=?nz{RB5s-m0963DA8aTQg(m?Anue{fC2Zg7WWwfujaTnF~s~%4iM3A{iFQs zfXmDlBm}AbLi+&PB znfe>VKBs=m==QV|TuBEmnKpu6v-Xq)%!n#OaK*x%2|dLPL$&t)m!((Om2nyI>}z|% zmuX3e&9n0L<}CJRPmVuX0ISTD z!YRG$vN@etvX1>+hJb~S`3~WOPg8O4j>ZAMr6KmsXWPFELG6lmj7qs%P^>6C!w}S! z_fqnimCyxdHp=J{A&L;LwH06FuEY7;$<~vN4-+BJG{FY<9lW}#enbn zEAPE;uxZ!#CJ-#dBbIUgL|%#Ica=oWfh7mHmfMe-uTEdnWD=ZYQp)9|K7r{uJ3!+v zMONPTbUU?F=*oYggfy`e`Yk64u0zR0RC_`3tW)>6JT$*P-HLD|j~)2}_zFMd`Df9u z6}rDO;|w^_-Yj9(k!l~$t=<>9$b9;;o=nn`MkP1W@AOK6iR5}bRpCep@6f0cAee_k zFKMEWU=YNYgXU=vC5>x&AVlIE8HphuE-Uzx`%v&T2N^Z?m70B!K{X|NFkG@M@I?zQ zFrBw7PU@FG{IEU(9pK-{sjSI2q4pu2F5GBPa5HHo?E}lwp5Z-W{+2H6r73A<9}+WX ztJgqIGcL*S)hKfkQ}zhkq)yI?6d+nKY=hNv^oSmBYVNtn?zdPHk*OaxJqeQE75J_z zCutSe&%56wkOk#K?P^&lLAIR`jwa_m3CfFAjMRt&+=tvhV6sSKyj!D+1BamBdAI%d z8EbcuS=W5)eB54d@~Wki6OEol)S{UH?)5KL-V#p25eC9=;^?m|BWLSb3MlS|D505( z&je06b2@g*-z)``sd<4)^uub-WtM=McD%u-PVZje(RUiR^JFAM+{TaYdmMIASC=*( zcNU-HmP)Uk{I|1ir^qUVNaUR&pbS?0=oN~Xw=`%d9rrO9oO5eB-UGYzAjs3Tz8`YV z86dH;DR7q*JD*m+J3_Z${u{x5Kl_iQE0nH+_vZhL(f!-T)Br(EpyahRJTs;n+O`Ji z_ejm|BC0%xz~mJ8!@qjwp6x%^&W7ze6RX48-5Jv|A|YhU(S3vQZW`d|lLdE-K5&Wi z7%V{yv3C(75gI~;(jrV2=9Qyt?}BBno_O>}cLdT)zlX+iJX$fK?*1-ObD7XC~a^Qb&7N*Pz^L=vmdDu~!-s8$vM!aY7IxZtNq zE!q9iW-BfDw0T)i;<;aO6jSu8{TCNNE3a&A zK!8G&`{2viGi?&X2otC&WK(x?N!9h<$RtG#css#Y5rb&wZt92jL9x&Iz#B(h?r8xS z6Tdexs%e;4?d%d1IWN{N#0sWi*PQ#gABw`9>ciAk`C_9uFGlYLXo2)@yay7uT1)ae zvT8674!gG<6ZfWinIud>SHcZ|ME*m|;7~O_0TI-DrQ6T{T%8K|dWWU71Jv^tLI1?3 zu|)Pf0wW=D)*v{y2_d0Ue}^U$fcgi4?&>=o;R|D#k~O+nuO5;+&7qwUM;1gIwi%Lr zk~G3Edy|`j`hdLs2VGdp1%KM{X6(O7I~~t_%Vyo$#T!@KMa0 zJl3zXX_l1s&XXlWV{Hvajly!gM8LaEM`9k$xxw;;AL2*Ltgf5TN;|ZSEk8QtlzoYC zo=U#p+bKkVPI|3=Yc}uC#+4~|xsnl$d!xG!jC8XeZ6}VuWV(tgJH#TEwQwo=_gR_f zm9xLdOU1AEp#p?~lUqKH_yxU@oY%afRxP$}*X%VDJ&tP5{)J7cgI52QgVoG5x0tNY&7PsP!$+u`3v*{s7sy-O zpLov7sR*%I;95P|6sduOCe1ZW5$-UF+<~Vl+1HP4?h3wH`H^Fvw3{a0Iypx5wz%i@ z%t?I9#M38>duRaSjO#w$xa>NfR^Re^MePysDH+uay2d(N+j*#eo@%_#&4xpbDt4tr zR+lHrscN7)$V%C##I<=Tnb9!+a)F+(DLk0mjiWa~o@*)#w5oq5DEMwM4!u-(;yBDO z`PB7x*Vv6e>odX-y3N=ntsJLZ9PBY5@-cZndRxSCNNv2v^r*J*yLuGbO#``!TT0Fh zM|3G!DLK99lbVSXwElgK`!k!1A<=KBy~?6;((|jJdL5uLEciNhFdj6|jqg9Re|JmR zcX2?8yX8SlhF0Rb&&oL8)BGNtB4n{;|1CwxvTp_394$W2XovTWM;K<^Itxkqw-6Gx zXJCr#=jziM0_uuR253Y6tk=$O4E$NI*X=07D&p^QH1HR7d<3`mJZ#^m=IaH4>mTVi z-sZ5U`loZMMWI=4Lj~=cGQWF98k$@sN^X>Jp|8zN18-E|{r#()u##kKV!t_b%VkXy zUgQPJo@+kKHRrUjI9OD7yZCjINV>^KEZ6Ar2N0Dn{xnG7CWTO>4>X6d~p)oZk7!@kKEx~EwpV=BJMO#1lB*WtwuL^G()s*@kOOm-&FP0Y6h ztghG3H@ZL6zVi*bI+Dod7rNt#QK|b-SX^%;En$OeTl~BB^CyzRXv(&OJ zAZ8iWAuSgl?R?Vgtgl^t2sc7?Gp}O=%RG5Ng zGtpJUlWSW|z@x8F<^IMCm9syQ>X|LAjQW4=#Qr>qV+VP-=3+?UYc}j<^fCqgWrM-! zR&tzZIu)PU8^DyyouAuC@*7rW(OrqKsO`KX9e7Qd_D|Q&jrSGmg+|z4WR5hnq>$RL zr0t|_)#=xoT{dmJ5%2foy)w~xuF`9cTw|4BRSrJ^$tY)!W~o1ZgzusMAAsRv@0=Ubl;5$0oVsvTEY2Pgdg^6Gd zf%RI0v@>hzXNPlfeiRK;vsKF_BpD@WrDcjiBOG#>|Ne;}FC|~&6r|$BCujPfAD&SQ zg8%*}$RFl7;XV;4Tkrq#uaYvOnw75po_W0j|96)e$37+J`>G@dj}TVIq!^E!_pI7nyjp8j}y9pp@#jA@|;lvt`S>tCJLs3&h)9qzp3D%RHuxgjrFv_`SJ|AZp%s zQ$W8W>D$m9a|*8Co_VyD5?_v?!N}Xe{@FG=MJoniIq``uY=gAw0;SvD_+`9&ZOj?0k zi-Y$HPwU5?qY+g*O-jebXZ~!vK)FOFIE?>Y3|#j&1&B=4U-tb0&Z7c`SvDa(QcpO0 zal0>gYf*?k;1FY*r*=omzs6Ta9(9c!)}TtHCnA{53p@g0M7e3!v(VdXl@05Ik`7oM8J?MX>iQjGuPn+-*VH<~_HS(8vwj&P^ks{LFbnq$u~@;r z{bb7c_$KAqG>4E6-rQOmu2!Hw)w76IXR1MKm&kBK4shG~!vppG&F}ULed+p*H(oXD zCa34(5yoZW`_@sdZ10_wMT zO)E%p7nq|PuwD{$!mi`IJN|N^yEX0L#}Bp>Bid{VNX0Zzw^(6qdkWC3a zhXlJqIhCqPZQ*W2|4r_r>bU#J(_o{jPw&#Y&9oYoKqt;cz`N~+OJr3pb})mH@jz)l zBcItC9#zFeLmH<9hKV1ZG>1d0@J-$&V-`C*y3>Slx4Oy{6Td->eY-#J(n{aV+UMqX zH@OWeaftym6EuVqF<|=4H7TUk;qz9SJ*`53ohJ9sVvmgLOAgduM_#xJJ2(n&x(s0} z;=WdszdHTg0NdfQ^K(qU)z2Blf*g`ebfr>gZ+9-6wGIl&b!)6TuV>wFa2PDygws3e z%d~o#FfIvC-PT@-jXaOpi4^ zDxUblGi^ti?cPldKk&h)uAGOpI3eunzSbELj`#z+mocaG%asUEF3r>5V!>;Uh0^4h zet6SW$K&I_o)-nJqmas{ew86N!WUy$e|?63#_EWqgcZ3OQ}lUmeJ7%YXYQXhz|?$)G+4)4t34fE za~1FSBuOpSi8gDnNcG3tuP!(9_tEgMECQetYRz(R-ZsIZ6^e~D7=@82P z;OnbY2m&so?%tYV!Jj2$dVl(cOeTY@8N4nBxRUH!eC7w&@vkB_IO;Au#$Up;SL#gU z7~E(S%gj?(W0JM8X3tqQYO2@Rc(liCN3GE4-<_3*%qu`iR?#txIi&{skSW?490I*& zYfTZFS91=U9Cejjjq9&hshy*j`q=%#{*xM$ShWAjIGD6>Q7!S3N+#|LU)>ELc2Q$G z4|h$B5&q1CpGuB<5(6DlZvdsam$D522&Z}uTceZbBp>-6BfcF??+1#wYDKzj@XIb} z(#?N&)+NVOlh0JBQ_QnA>uQUCX2Bmu`z#J{tFfk&m^}CSUe2<1kpgNkUA5-T=pUom zN|+Pr&Beh>uQTXBVCZbtX^P+wU|6GL&zwuRztg}q8Z@1de+Sv4E7&N1Pw+b^#t#r4 zsBM0;+269JXIf3R$5?AY#WWN3hr}&Ixw6mad;gC{d#Se>UGdwhFJdgcQB2g(hAQuQ#(H%{GElfLwuU)*4xo6 z9F7nSyCo~j+*6G_9-2(?Qj&(IN{$BS4;;Z@AdGAhPDDB?nF)0ZMV-qFiO=Z3xj2V1 zEMF7SS!E}TOUEq^6d=Z{aG)HJ+f4w6<7x%mB!n?^l0Hf>B(46)q_Mn0tLPe>Jr0A1 z+Ih&|_u)wr!Tzpf!m&&7JSd1|gSvbRI3l|r@9(3UNlng{85*c{S)m{%zX=3kT64ag z;eP`LCH4lzXVnUX?GI~Hki0^TX%*S#;Y1U#q-UZ(J~DXM9=wGvP|04sG2b1(yDSlU zF#z>O6yAdoG5+6RUOWr>KLKPtWYH=TBMF^_--K98jSId9A^YpUYD`3gOsVq#3UJU% z`DMX5{^@=G#$47BQ-jMG4~g-o7omav^EihAr~#>`oaa8Qj8{e;q$>p7ozplHLOsC> z*RvvITHA!KKLPOIH^I6QGmNr+V2(#y5Hd+>fB=O(WEltBUjgOMf#dk;DJJvZHO3Kx z6>=~UOpbwbnHkG+1bt}lU}t}aXqLpvaiYl{18JBF1W1)0w69Vx7@zDHUUoHm5V?v$ zf<1H!#2L73J6SfcDoE+Ws3LTGxhzFA%#wRleGdB6k$?Ded zWJ6|4RMSxq#%1A}J!aSx6V6r%zGRX0{;Fh?lZMrfI#t-6V8Es>>HRehEW7Gm4W@Ep z2q+;@f)r+1t;8}T9j%~*BpNw#;UO~;0rTCb3dGzCttnX8n+TaYL~c6RJ#!nY%0qf- zd|>QbX}45T3R(TVNL1KR-+c&e0q%pXlnbxsx;l$w574hnpf7N#ks*5Fb*5|Ftig zWsots=Y|9b9eZDiCm{`x*_(?cll36YK;AYwaM8o~F zq684Hp^$73&vnQV)k`xQ{^yRxix8tK!$&NtnG8E^q+hUds57K=*u7`Q)z`F|?B22< zp=^Uclr94Iq+Z?nx1|)|GUiUg6O`mhq9};S^-}%d99MvCMfPVX{a3Kx?bjFLN5~`s z*NfYs@6`wHMA$^iZ`o>it;U3zUE?Okl5>I_d9R5m2-s*f$-zShaAfhBi|u3&a+K1~S-h zVuic`dFWLQCW8l>O(8oj+@?(xA1n0~J5G*6mvo_}hH@2Yq`S6cNq!mAHI?>U ze!$W0;&f+Z=L28InqMP>_~sA?HZa2Vc8f%I^U-eVOb}kfdXVol!rRp)bhiclF8^3% zbQO}EN?U~{Qk5;tz%~neXc(>KB5S(@%!tmv2a_sePO?>;0tR$B$>fJhvidU zpZ2k1vzA09EpNa%REPM%$h%s%EGD5l_7Xmt(5$pqk6hJydwEniY|rb*r~13EQ?&2O z@xejJgLvTQZ7QX?uJ-d>x?Q%xNQMq?pQII~;pfI7gWx$zC@->nDgP>m<{lMAgJ_jp z2^G4pd%TtMt0imIB_fU)+iRbVU;lDW`+r*^i`wcyX!NTzzOC;b|Ud^3+x+W;sU|yhdaK?ftA5$8iDYq zKsvBa^%Tum4~?wzdp1cp4tQp%B|KU3SkO1hxj|&Y68a4|NZ8;jjBDM|Ey?y4bf0?S zAoPD8mJPPkG8(slEHxo3o;X2^PYX{-iA#*XAM`Vp@c%A8gYt@eWZCu%lsp}aM4k1u z{2qf_@}GTEAo4JgCJ5dVc`hq>t|RuoP2@5hx(~JK)BB={^efcnpO!#r*Z6@}(Sx$_ zcB-$Z*uC~Q?f)eg*=+~6i-JlGZCU35uo*N!P#O3x7(CcT_)%|o;JRU~QC==d7mmPJ zsDWEG%HE|)r9N&)h*&%+Rh_x_G9e(=4-U~U)#BBy`1|LVz#El+Q5#(5EL7p zH@}ksNBjRK`_WxwEzmY#2c4ubPi7gzx3kalTbNu*q@;;4W+N%<<%p6Pv0hv!STiHH z($IgP`N&FJm(9GP|MS~fQAd!`KbtLX zeEr|Ifj`)Az#|S&bS)wV{J(xyRpDSCEvCtG{MT)e?J5FmV^e}B_MfftpGUR6f&^#c zj;lid*PU?rgu2P3Tj+-1|9MU({J-Gv+&d+0|80){x>L6%*rt5WsKNjHMJE8kaQZuV zF;w$^R)h`f0C(z7(Khj8 zou%~n<>EaEg6#o5Yzq2)9@I@_JHd({(Ms++XhLZ5@b0SSIiW&k$8N( z&hirCcp(8`VK!phD}h+-D%gLDuq&R3C6C5Yl(I{WC;@IpjRui!{zrgfq|r(0n>Bmz zfvF=)?3vFStWeK=Lf5a*81vrVe)5@n(RqRE7E=B{*&qYF1%QcK2A!nc)DvePXz5)x zsk3-azQGKf^Ej9%Ao%gQxgO@^7>5qvAM@W0+e}`adipMSi0Diz72o^)?F!tzNW>5z zj{ACB%5ONdsxX5>j|6GAw*2_PJ>)=9AQ-w1@{S->{04AwRTvU4Km@=JHfMV(qy7Sd z-pGdT-h@uz?oZ~ZYoxXSv^<_}4;HcZZ({j(%n^&0T=qItorx5sYw>feno~WOBM-{?;Z0TE$w50E5&>q~*LrP2qJ2As^3u;?_~I4dyEb9stvV08&~*Xz zW~|zn;L8kH3YXAIiQvr>hKi?PQLT?6zh$eUzHspLwjAaGdfpHJA=Zb%^M(J)Ed(Hs zINo;z2_GIqK>B}-*P5qYEg7;&x6$suEl_VS#JLAicUl*D#L}HWmn+sg+G$Z+5cgBZ zhTr(PwRAnL9>6Xnd5`q4HAUVHLj&(7ulO!S9mfXQ{re}*2Fm!=PdN8s{>x||V!x+6 zoVpKcd2|i$0>_Y1e_tk}0H8D}V@4*5WgF-o^7Ot^!PbtSvy9S3`&I=aK>q2T$a zdI)02cYqjuhuypt!f0{bnq51GE!nK_1*K>wZ)q`i$9kcT60$nv|B>Ech#8`2)jZ~yRwFv+dPkFA2upY>u!|} zKk)nXVaQgdFY!IzpLhr))oab37Ju^E0K}H~FgKO*(gS}4)uv3JAvFsh!za~5PTwfNorsEq=%f?+lXggzbWD<#GhTLwvmhCf^^F% zoP2@;=Bkv1K*Y&WU@#1)e1~?dS8E~kRxu+~(`s|(0EpUtPu{hjRK7b;>~Y|@&;jiw z$}>C`f0MTA5CFM4{x7(Up8<`LClTCGP;kwTqPHDlDzRYxdLu3ri5T}`Zr~d@nbwH8 zTXq@59TRX|4X(JSG4jhy*fZq**nj*z(s~wmmqWvG2d>eZ5CMEL^?- z(z>^hmhk{2KvE=}8KL{n0^f3~gMA{5Z$eTgs4!hSN-OSR$j)lj5f(CYbm2TXR}GY@ zjMy(yDR2EgMgVGP1gJ8x{Xoeu3rgfBNIOsM>Hd9qsl**@keI#KiFLb{dbL65{gafh@9@R`zS!s{7Pjd|2Lm-DOjSuw zR3^y1JRp6A(O~G{YH5wxUiqX`dNY>!yam@L)*xF^R`(cuBKO9a7(0J}?$5FsMY33d z%Mxx=Pt+~-HrUZAtY|A<*d+J+rTfwbbF-ivNPP1;m!EFe?=R;~%H~@a2Ga??>mpEA zOnM~jV&l$DWazktrL=ydtOUZoimSJ^&7QhY-dk^>E@SF2w5DUfddqO#w@Gm|RH`+adW(WOuDtm$c(`uTkHo~krt+so}9d%s*P zsULC*g#|s;e&Wcb*_cv#H=+BNU|8$UYu#IP0Diys>Fd*|{oG*oGnBPEp(6rpZvQvv z75Mlk!QS}YMXJznPp*5UJ@2a*Mk)FP6uCbMKXw=t){#8>?pcXQSlUjp!0K6L%U?n_ zbCL1`X*Iv5pprv)?E884d++C%j*haa20(%`jXnX|N*5k{~hsoRK=> z1!+%C?ZcO26X%x&#q6ZcrZ$G^CC=;A?`RbWzX~4wAZejwC3K~)?`q=j`2OE8H6yvX zH2h1DcY###N8p!C0xRl^WH+AhZ>glwyS{JuSoDx?@r3x(aRHrijY-+}v+Q4NR>{1+ z#1iyqtO=H+ElXM*d<_wiyM%C3%2NIk+KT`u_4}6Xzu3f!(^}@oSm&elCFf+Z;{{zc zrU|sQ^V;aWL^Ha^`Ln32!eMDMGtvVcU9SkJCPD4Qw05`wN8A6v+Lykj6f5F)kvG)q z63!62cbXCEg^UGlDh*M0g&9_Q4v6KiU`t4MzMi?}YGST+sfNmD|J1wsvTrLFTXb58 zdG93X*JbXK#B1=~byyaAeD^`51n(EDiHc30^0!kmUlUW&y?Z{T^~)c?C^Gi=_kOcC zE7u#D5&wK4U_)M&e$M%Tx=2qCz`UxpqE3s?-9i#C4@f#4l3e3R;^tzz`d8Hj&v+5M z{D6;{R=2+Msy)h_30-6+^k<_vF`hw(Q&8JDIg@NUYHpF>T@yF8H1C@_nE_Yt7acp} z<}Tp@d;}NBOiq}8@4VWPqw5*|{B1QN{TCABF&QEOC(nB~&Yy%hJ!Ln`xaio=rz~ja zNawV$qXe1n8s!Iz@@d;kacx954X0BZ93E8Thk26qT=byQ{WcewqQ|MVKwo5+?k<> zj>I)KSi@@ObMN%UYu9HQ&M^A-9Mmk}Y{G;Tg415-QjhCK3kZo(7WzQ;BpXLeIvzK9 zZmYGkeyKu86l@9>UWDppG8fKd}_2+eahb>oRsdfVX(`gTQQfLf(<$* zx+$@jrZ9@rwC~-C-=XPe=rwdWO0TxKuR-}@*2{jxSf_KKb%s($#~uQ~iWJH79!g!f z8e4-o#&is0LqbSS%TJxN(pFP3`b3>RTvhq1ZULfvdwVDX_G!a3%axwn1aa?TeNqKj z7te`J+Ys=SXm@qWVsluOiPDAK4|v;#O~cZ#jx=ABBP~{E{*qEri;H=Ll=Hh!4XX=m@fqq0}F(-uO5 z$R42(LUx>XHW}F@MTCs9vq#9NtR#{VlFCZPeSCFY|NnJAxF6lWS3j>FE&hm7Xe9ICZ8;`;!Zz9X*=SH?cip<*Cs!sB?Vf zD@WS4Qw=ZiBdEr&UJ=coTp@~8%A@8Cc*InUx}i7ld_Mp7?3ciZzl5!#Z7Hk5+O{Ya z+qCeQCVaNrl6$C#-+i3-iCCU=^%7+hbGffY>*_g+&v+=@mrb1XjUG&?2o>=*2}TAj zdpT7(;qUuVYf>jY5JmuLDfnDp(s`q;iSHVj7u0u<;UojPxH9(@mv3m&yqHR2Zub;+ zSg71}^A#INjQ2yh1K3h7a#o|eYVUI23N7ROMZj&)X27)FaVM&^{;a9Pxbog$M;6Jk z)1D+tHnm+ipXYOi)=!%?5u^kM*&g{#58v>V$3A&@sNlI22V>TQokZY~sr9KPV9c`S z22iTqDsu+pi*zLK0j=9+y=;)ZooRJdkG!=ij6&QuB<;-7VTG089DSH978MMaJp}y^6F&a7jS}Y*5%~ng+)h8yu=)zw zJ;_dfCcDlY8%4|)yYQ#HRnHbk@jAv-L`ACd?1Z@Vxbo;ZS*?|L8c(Hqn~YH<<2j>4 zr6OcI8$0$!83J+D9;%fG=X7SpREk`mIzA44tvnsSb|%5u@84q`Az}>fvc2E%F>6%T zws8Kw!09;yFLJehcSd8gs6sv2Q|HN}sK$qE1RQjRi8_}=J6_6OU?z#_G>n-KFH}!Z6O^tUhHr9qGj~~7 zs#zlw%hSbDS-xl3oOyr8SE^%BmQsjYNktMru6?p^MfAXpE%NpA&do({#`zJ}$PXWF zx~`5pag`8WVxi2$4H7LceCxzp@6h#PypOi02crc=7h5F$jExJg7w3K`lWEr;s*sh+ z6+OyIX@u4zu#s=f!U&uMP7}|n=#$0rFRl%1tUqWCi_mQfW1?E zRn%Otmc5g#16MlQ#WuX~ou4GFa}?F>VH<&1@~vUT%U$4*FZC>I6E8bm5L;*IK14s{>0?n6%CyA@#p=kP^O zMl0cT5Zq>0BO%MbNu!E>yN%zOU4Fi@ZIb zx8C<*GRB=eU2BVNdx7~)y`>hD+ZPdegYC9oq*iVcJ5u{$_lgYime3Vs?w3M1s+lso zuCy4-GH~z=ij5O25z%`XYzQz)ED^+Q#_&@;32v6v7Y(Ch`Mq*)NRZ~5mFN%mC>1?d zHx{!Y{~4=Mkr~4CXHSTPJvE%e^Vqq62VHy@$}b=tf;b44t|n+Q`+DL4QQ zW{t;saP?yQf|q@kZV6{O`#tKjhEMeHnZ4%*LYqS4jHo~XHz6glV4D}f{o`%DHwrzs(4Z*?n!Y!_BjFXtTje`QJ^El0{OKnmrz6`f`I!NZDi>@b6Z*yRmg>- z!E`~9Gg?UnUs!TRvF?kIXW`Roh2>nFR-(&D^U4C%2}4O_h$Tl|H3sz+ zfIXDk2s(X8&585BpiVG+_D%-l!2iNG>Wa|fUV4ot`Y*_H^1=UuoBkjC>5>7UKkr3w zQU3?dg1>BL0QwVr|Hc1eSD5!SK)Gz|hTtHd_1`}Si1NnDR`7oTssI0%*?-uAUA(|~ zomhsI9KoF77-x#nn6_UFt@jQaA8RM8rko~g4np{?>pmc;5gf*{#a)~O&B+UcU#Cvm z2~e7>sJAUVAJr%TKpX)fv?dB%C8x8`yS)L>;xPt`8Y{U}6tZ&wfaNcM4iU(g5BOT# z=hUfmG&YL0x;yKb&w~4G_Yhr5Htk zvNti-Kz}05eWr}ZquU3+4S6}A!t>w<2B=U z@j<~94?uz93Tu|Ms}7&sO6t@J+Bq2p|2ANdCIH6X9JUS2k9>~9d1=rf1=KU|T>(xm z13^2Wlf<)R_k zeh4fH=WU6UGG0*HJhsE6CIGkFUwY-K>a;Km&q-5KMGu!Uo<*?}3H3HRDnr-;AgcX7 z@K_0fs{2(gQ`MjuSgzW64dR14zn3nWzH~7)`P<&}?+teV2JE#r zS#tv(fie*Q)@p#N%74;(D0$QO*b6vi4a$v#OL7MfG4u$XfhFRhoIO^jB6!dVSUnLr z79?xBKt2~EFkBlR1^OwKvU7wk;aL1JP=WVm`@j6V<75nc4eua#F9t=WUl4qJbm#Am zD+Yx&RZ?7K{ST)B^;cgP5u_Fk^nb4(0rm4DjQb zD^E^Y$c1)gd^i!re1O}2xqs?KFfU~{nw30C0iux*oEM4W64ZJGg1&0^c~uo7rh8ivolC2LMrT2^(alPx#b(<$$Id2nySV))#93exn^$A-VcMQ+Zfw zQN##iZW**~^bI=D)U!rsv32NO>8&!D@?PJFgdN89kuOO{Bo!&|I`m+HF=4A$K6*S{tA;>PuHP-6E{E z0Oxq3-0l`H^p7SmiGXoYHi-VKg5oVeQ@$b3CU$;b!UYIjwL%)eF#X_o*Kn>~bc|s; zL6zH(r)rW{9uMD7&ua|q zbRz9J<3VY4%3?e!Im%K*Glpy!Y{-VutKgZ+YfXyM{+4iSLJSI*O@QUw9-hG%HEQWm1yi)bxjnJsk75&FkkhUdWd zB=IZRhp)8m*WSE)UC(gUc-c4{U(l3^YBR35nVHtbRjD_@nn44 z&xKy8LO9`}nH!&!zSe9v_pk-RFN#5oA|8AcxGc;5?y8$+mPf;C5{}$dZSW3MVLVPpf`TL@cD=mc(O&5I6+EVje9c@-tGU@;j z%Y1KA)-^XHbz2~#T6PA z-RCfyEViV)ls0JVh*==Q_f>(}6@8=$WVRGY3{q*&Ue%_lQhz@u!#F_G)@VoKKVU#3 z&&Y+af41CIDd&I|0F`x}`uh&E3!M8zU^-z)k@a){twb!cgnq$k>y;1KS-%WAkZ2Yb zLVF8T^pymWr?B>7M_~NtYQII)PT+0ft)`#V2M>WY2wIrOZhv@Ybh+v7_h{BV!1iy< zHv5S?jTA()auD9Qh$4?tTY8)8JoNU(HC{?Af_Pvbyx~d?F(PjJPzYPeFEGxlm!rsb zHe(|-+(FW4oJ+C;GCn9RU=qNUQ**ij&9<3~SdACq>7)QJuz&9XnXYk(p~@%OmF!!c z{dO71$ZgYtJvs>8ZF@2taR%1GfBU|h%kgc_{j$L?J|KTn2N6UEnI(Nb|J_97E2xn- zZoBf+65dQcF;MwxBn-mk8>~v7?H59ky$!b7n~cgV6k!(ssJ=~A*r*sp6)wW8l0i+{ zM|n*GF`4qUzsOG66VQAVYd@ z!}zR+8{o?VZ1Q(g!E(zp8-wI7Hh|b64$g{nHg%B84!ZM zH}nQRSjxaJ@ca1l>{LP1%iyz=O`I5dup*HSlJ6nN;A(V#sSP2Hqbt7u88Nq>7f_0D@d!++k1vIUJ?;Bj^PyC!XkK za@RNxr<1eE^}$d>v~;Lq>cVAuVRL~^Z%wC{hzn#5i~0n_l*)|Vo+(eOCrD}*K{kZ& zD<}Hxz(2pP)4!2j#RPr@A8Q}@3Cm20Ieh1ETke5?_zy_uKYTd=*!SjFlA0RWB#bi% z_e`_J?RrjOxz~lOTp&sH(BB{UCPcs22ab>r9SN;!b9l2L5;1M@uWL+x=#lODwT~-r zxQ_Xc4>~|w)25%@IOD&81idg-edEU0zT{sZwIgk%#2~xd zx9PgF#tU9QW-a!&zVSTZHyC$zcN4Rafqz~cq*Z&$&DCbz-Sa{+cLY3ynFdcpG9`Oh z%OG_9ag6+@zHxjdYjUo$gZZ1M%~L#^@U(vo!B)Lo@f7Z78YC2 zeMNaz^17u24rNQEcsJ?;-<4o~&`u^`>lIwU?_TXzHm$b5MOS&{X@ax2f1FYq9ZE-m z*4QMP>e~XaZy(lH+z{oHPM(OB()a8JEQPV(J5|eCr%T`ErOob&3T|$=s!a7d=$X8* z>F@O|-0Zw{DuTm`**d6~fs9CSelmxso0+8bPlTF?|DOntf@c1K#tgH63z&A>7g)E= z0z)SisllUiGc$g8VyZb|=r(1&2c|$EI|USEMyxiaN?t2Te%I-YmRS~XVqP_2JR$~x z+@LcN$gQ$evo4m#*Abc#($A5x$@^uJ-;hjon|6{B3_=?5GPbkmU}yEi5X`7X1JFO2 z8M&42i|%y0tvtl14*G3`?xsN%Kuo(1t-$KdcaAKF=kcus0N=mWy1h@rxBuhyvGG9j zglh9ioylu`4z>+fDT{SM;;z#;&VszH4~GoJxvJKC_?{FqBn#~s=yWfW*##O=A>(pJ3%M_N-H2<4*O~|UZb{Siv&6pKAs4ZR0yb3f=Sq8_h}gbs z;aXM+K#{og&OigGu!pv1?hgE(fYkioCJd?y0LAL(YgqDKE<4 z4oCzfoAdpKwxwF{&fP*y)!uxt=8EFDBXE%SD7Q;sWr3wWc0^=a%R~^g?iiFb0&WmI z=n_uBn4?qp4TNb<>lseqx%mI)t$HGCTCE-w?EK-`VrlemR_SA|eGS-7wpmjRtpfd) zhq9k8&JVB>{Q^*I~StC&6c@QSeRDLo!?Wvf0=21uzACQ#vbrs`%0f13yA zxLw|z>*Z7`owV01KZ*`>Q4_wYZ}e!>Vp5uLXn{1N2*gp-dbnfe=fJHx2F`n$UrukdKID`ed1 z0sBb_-8KvN@iE|NGJx8h8fgZl_VHC)xQgbWgpo3qcyx=CJA%ot)35WGBS*pSGNkS> z#b;q3jzkZy-+Hd8=O}Q$N}ne|T(B0ZiP0u1Pv%SfRm6yf&i^)qrQSNz)1xf$b!M(T z%=Hf2e(UQ(dsrXcYI+ni*AMENwF=2;a~pD;NXor+r~CO8nCgnc+> zm^Ui2Cso8s>lBAeZ?8VpCPQgSQ5h}zrBuj^z~+xA!Q*dvNjgDfV1@`KscI+^#-ac+};5GLAV=7vh2wUfk{GL>mJSw8cRq-D#*m z)Alwnyyp(Z?qY-Fa0+IH2=gM0l+awfAljeWB*W%;ggB`WF|S&`V|a|jCr2{qLIo&3rh zNeB2=PSziW!7Eew>IEzDp1L)t<(Th`&K%FZyC|p*)tU&d+djL{=+8&MR)SPZARWr8 zf@}EGLF!f9YNZIyDA+O4aJFpe(A^O;cvnEu- zY92;EjYo^uYK9SaemCR zsn_H#{TCHM)FismV?py`EA<+;!M~k#G7>t>k9u_SIw&kpl7yrFC;O=Sv5zlZ-=$7C zP}5pnZ(mYUA1(nYrS$?=w+{+r)f0Q~CX+7wQ}I=aKzX{8E5DP2rq;uu;-AZ2+g}*s zn7+ajts5U7>s!31g<~8L_(!xTXwT=o?!PYSe;Q9|u8Yv7*6ZbU&O|me#A@I#6#8;49zQOZEg3ksG71bl96)2swP4JulQ}h3i zvY!H%*DzDcIi2%GZ07BcN-eMS4U&nTK;;}aW|Moy#sKLN+T36PJ=E-BL0}b5QfnyA z9)mR=rF)jo0}V}{@6Rv35^Sd{=w_j0M|xpF`1fHN>Dwuoa4L*|ygiZ(vdisaKhH#- z>ZGovqT_MWUjM)L>pLjOXn{kS#no$Q#4Wt7HyH#BF*@mj7cvss=IsAThVQ`VswL!* zrNBnNwfaF9!uq)S47G__v7j^^0R0u-H7;kQCjPyyF7)3l0KjLM;1zJG4Ml3V<)JB6 zP^M&5G;#ot?XZSx1dwGU*!3p#!-6I>t%i7;kQ+uE&$E4-0Lq4#zrJVD=<`EZ=z47u z?0CuAA;hUdX2wuc>t-b2c;4_nxnmOK%2YC4lR_>MBO*1#v^aTHo0uF664}f5$;q#e z>mbiY1%XdWd`)GECo>QSpFJR0S-G1~?Y;Pc5uEkh(>MlcR5eI&Vs}$N{lnMX5#TqC z00B00(EdB?XQI1Cp^bp<8=Z#%KP}Q$LOXuYcI_?mKX4&blhbeHmP5Bbs!G%bk17Ja zt~v)#r*U2gbTz2gpD>7FBk+z@NOmX0gv0K2V-0G}^i!-dhTw*|J*8A_H_x{TRXS-0 zHv*KGBP|@!5-Pi9vwjEMHw=&e7AJ+I5pzn+g9NKs{XTVLp=eSNv;@zGZ<9i&@c`hq z3*ah3P2uZ~Zv(o~52Rr+2+a^7o=J7$`c+?|9HpS^rw#JTvjaQd;k|^XGZzmN&CNd4 zo&6Fy=-TMRvrPGj(FsU9(LL|p_spn_7kVRX*<=avHn(|R<>ewxvhDtzx@L<33qTIR z(!i?`Lz@g0_D65VbstKiHXJ;@EP!XM6;WIFU(V)KY4K7@qYyV6jNY4fJ22-)Y*Xol4qF#v?|Dibbh<$<}-0cDO*za9Fw_4l<*A|%AsCZW)!HSR<@ela@c0l}r zIf?u^5;b1`=mEnwT8rU`x;~GB3yl8bJ@nu`mN8 zuNa`+6wqZdE`Q%6_&Jbl2%UxS-SvCw+p-`SlVbp7)5vyOQhR$$i|4ZbA7~Z7!p^(T z)W%#1W&rC?NZjY@?^c^mg7)PBqpCb`ls(VS1V`a&F~rd81}$0)%TDjzy42~taPob3j!O+6)Gr?1<-r>1gN%}@ur4j)Tn*AsZ=d*R)c z-+VezZps4}N8fb{Byq11r0W^yg2GpzEmGk=PB323uT|gvBnR^#*dB10RXY-Vsy7mk z^)kf~--okos)m!80OJ$;z9f}`K1hhyLquvu81n~KbC={(eN9eLeVo`p`oljPO+UOO zxae4%2OSXS!<2@eYw!)&SuQvGdVB*1%S>yK8LZ#jvUk2FL3?$RnxWjU{yQc~kI+ir z134S<{r0Iv7LN{q&`PPU7vMM!Wf?78+Xcnifb9_CztIQYZ(bsNJ*9*+M*fS1T9N>I z)Uv=DjCq$1TSob};!nWKm;BZTg$aC3K0kV_+gQyl^Z(zZk91 z67uuT^dA2Y(W49Ury|B}OtS6L#A)rLej0rI37MbW^*ewutu#u}IxPQz^;Gs$*+U*E zrpO2J(92b=pdMcoT)wvnS-{8%y6g*EAmwA-y9th^*~EL3rZ8uhMa5Bac@TY{L}u3( zf(xwt?@~o8Wh*5kha%NT?y@!KcTX97?s?Fel>B@E-QUE!mx__y zRP-t)pU#YP8CKh0IAc;(V0B4u?hfuN*77O9KXt)_sx73mZ}hfidN5LirR#lw+2qNkhGp4Xl97^ zyzE=d!P`8*?Q@N99wy?xW^qNfHf5}VTc;s3^__+B+X&wi=2 zIwIa7n)Koec3gqc%}?iNgaTJtrFT^HtnA&+(+L=Ij-NI)xPd6;?psw+jkr_&yvjT4 z2|LU?@y-6b2tsD|nY$YaUjTwjFJx49T_J#K(kI+3?CWs;=Y^H3OiC7lA(_TWoCvZA zh$^ujiv0D$c|tf7TtSRV5&z;048(2)E*;GA@o?;U zdH0j*@83M}%6>q6_aXptBe%cQ~!>=piVMASYm<4}^+`wKy zh=5F$Uemexr3Zxob?lq*olwUi#fUfIy;qgQ6OpH{di!9byc~QhEVGEI~UM zW1Y2y)ajh2s*5pHZDJadw@#F^ivC+kP?}NQ*C1|7djv$*GxgU%sCev9TZI>tFlQ4h zHd-O-(E{3q(oQfEE*584#uQ?T?u0i=NRO>LlKds2jCcZTn#W6)&l|I!p0N2Y7og4-dMb2B-U0;RY&m}fCf{klctN2<&@tWMcBJ+P0_>b+6byF1u%@~R zurDcl8Z26WlNNHwtN6GL-Hw1c)MGECaqGPTLALm8jyc)B`7XUPlMO3XEE62z%5iVf z&eX*?oKqpUX|84qP6(~fPxv`oE6}^$;1N`fW&AD>E9$qDVRsdZN{#2cz{15|QvQtW zv>~5}M);?1OVH0BMu_7KFtMM5py0kWr$Y6nqt2b{Bc1qNk&$B|(s4;<>^cX-wk$WN z{wC!(K8HW(IM7?@EOECnt43VDxy7pMUO3(k8x6UBkCgg~V{tN!snixwf0*6!xtBczCx{4(F@$gVzl{@}s+ zBBJ_<2+0M?y@F|t&2BV{5WjwoQ-*L&J^{R{atU#FZnhxv(7FRS=OrdtK@0flXS?+o zy+^pwYs)?P!>70dB%MYuY5TA_$Z2j*)<)Ma0C3=y7d7k&bb^$9acTL2M2*uZ_oq$g z>pi*WH&3b%7bNaLI=jm0oA9HXwN9g|af1$7ml^3RAhw`1Sl~b@xvf2pC!Y2*g6{)z ze=ZMdrEigH>fbka5*tAS9V%)GenTn&!YVm6s&;~NCdUQZO{9;=0zr<>%!ZzrvzmG7 zY6cKgLck^6mZ)E7e8Tjg-&9S-a(!=o`9>hupd~W?o^~l)2IsbPUOA`5A*44{2HeT>9N>CevL9p`Eocx6I6YSXEyLz3TN!58^;OHanSl)@$+eMShNew6de;AfJA1S z#oN_x>_pZJg68f?^}I@a%&&O3dqCK*O=x;*{&k!a%brIDOlIPH2UIR^BCJ}H_szhD zDZn6XPRSD=P%e7cA$Rk2{H=FU}!`c8T#9lF~p z8r*&Lk2haVUZdU&!+FGKEx|Q&CCNO@)&RBXq1k~{!XG_xAR`2(uR7uar;>ZYN>6-( z;SxxIUAm)647V#fa#-mg`9Fm8I3fv<^K8#c!n6@_ZvQDz342e5!yEof>S5dwo6d#9$9%~c@w$GeQGLgidkBYd z(4Ql(xgj@%L>?qz!fEDuI*9VE$wl^w%1vuD9ftFSVi`md==yp0oz&q7khIg#Q89bt zo%sg*-<+PA+bburWHBDr6BBZ@;r*qvAOhgV!6zlQuVNaC6Ju5BjBPCBMDqj|E)Qn% zM3J+N`M!1kCi)-)Cpw_7w<9K$=Vps*oR(zcM1>7|WaFgsPH$4R{o4pymwZ zWsUY8LWxUb|Db3RsE{yl7hjUNokddnYIKD9Bp(U+Zg;>4$c3$dl~>^kNge7NiM~3F zi0h^2SYpd#k@LO{r|pXi`9%GG*B*TqzV7#-0DCh`x; z^l6>@=C5vB9GccTCF?yDZFeSFD)5R-BZ$Wl*W>==GxpfyIP;0T*2t3(x|4g)L4P?~ zV_kmU46G3)ISSctJ?%1?&ijJFWmUe(u9y7jv5U-pk2Icc0rZsO;9`_HSIj4;uYmjp zG|$K-I60Y$5u>Bp018IhJ+ZSO(*8QWRT!^*$?t)QSqI`8i6S#V`8*J+z;RplE9*KM$f+1 zv5HJ4Vt_4Pgb>cyQIEzuU2W~>k=IXo8NnE0vg4C6Ux?Z`rnBc>C{@!w0#S%xGu?t8 z3ym+hB8tI)rGLBvd)t(+g`G*eV`b`Ab#W{b#sXd6SJ;uq7{`FJsa^Mv|LQk4 zdD0i_NKRm`(u9mPgDycOXA#PG{kJ)?MLJ>~UG0Q*Ky^Laq6gUNdB0)7A)UZjl{JTg zuVI>3Z3Ae|w%sD=SiFrZ%uRW2DL~%!bly|xf<7M2+B1fEHs281?(D*nU#q4t?!DmLECJsjQ{_OLW3UC<8 zSfLu>CsC)JGH8fHC);OC9H{S?PiUJ|f|E0`{KC8w^2pl|Q0F{nAd4*8x*N0+nT;3A z%^NO@JH9P{BanL8Ww1I6ldNSTcYrj?LDhv@1`kD55U%~=ORQ2NAZhzM-<|6=YLxHu zQr=vv0~}$j#?heE>UHbyn>yT7KkuH@Mzpw*^)*OAjpf9-0b8vsO8HWFU>GS{dbVA} z4>SkqR|^#5S+Xg5YVS zsNjY9BCHJ_ZC?aaS zyGqjnBCEgp^Z^z3{=i4SxRkM$77HRg&J+ZY{yG)^=!K2lESxdLiL?+%G{CVR&lW@Z zXfpStJ5kvz>`3oF6uHlLj5YgtZkP5j;Ku|JsdxH!ySTSZJ6^1Q3hTiUg77Fy%-V3F z5V0V{_;vj{^4EAS!t5&lej6>|gakPTcsepCIZ#Q1p@68zT|W@~{muly)= znQWrWdqMz-W15qPb+NU^_ChSx9QEA3qfCh;(rOoY;cT0-C+ojECMv)y}}Y$N(R) zZV$Y;toyRQkGYXZdBj$1dq<^b|~OFM~qCKwF?NcUU$CuVh4)kf3b zg;xiAIdnZVbrKDz+Ot3sdchL0r*Od#Cwi|h9tmH1 z3_!*hgwi0^!1siV+1~_aiDw}CzS_2%1QJ@WeFX?;Vkl!yn^+0A;@UBjXUqO@oDU2f zxGxF%$YAoxFc~8Arn?s@Y5aguZuB6(*?0!B(z7H>5fD#pP7z=s9=tMBT3>rY+n zGZhozhJQaRnpYA#_R0g8+o2YczOw1=Sf)gY>8U~D;3%&!=t$BzaTNFofUK5hkr3WY4Gpc65lpAcdN z$nizMNYmE92FPcSX{)P|qr^mGHj&Lm>JDKkZw?Hoj;TNX{1auBk4+$YhVwlXiBVZH zAAjr2LDWeyqO-YT!jmh-=Zk)mE_4ojf+^5HynzidY{C#dF}Tg#*uUsc#^(GI7_I+K zi+)b(NuYR}4}J}Zd#xC@Zv^V-p*EW=pz!GlO7P>G|B)mY1ZnTTfO&bgtvoNK@j2JL z(qmY5!g(gp7tu3{L*17U=U2Ne@HSsr1iZlEfH+@6w3B9ud?vY3e>DKSl6vdO`Oz25 ziNzoVdUFhbBeq=5cI}IQeDJLX%LT_SYIq(y{4l|eJe~eh!C{T0cfj0_BmqI1NY~B+ zaSzV*mo8@z+KtzMeeytiP7&P84Ul~ZbjdkJi^O_4_?-O96%%_*FVKi!0v4o~$E?%- zXaHo5_%=YILi^-bLcrPNQ8HyL{bTbZNZ@{PwgyuUYp$1Yq*vLp?)}G@j7V>zJLieAid#X|gj%zK-t-3i|3*;LaZj_} zvcd9?7*fFDUPEEeJ2#BKwWE#>YlG2vqIm;6rTfxZn+(ub@qjYMn8TTu4x82lMMpIt zMv75(0wc)&SQ2Um(SP|YpyDf^fyF2^U>^cXpm`X7EQ%Z*xdn%XP%bxuPdRkZ8FaxJ z5a9X-ils&9Ht7XE6Mc?S>L6r&kp(12-I>1kCXgxkoF@A?$=3lS?kZOvjS4T}-Zz2u z&hBnlr7$c|4VD(^coYb;gDFw>qzaF~NnHkQxO44&klzv%Qy|P#z(xbV=GXg>%zMkM z3MV_X{{3X_a$;E*kPlxW>JUI9Iz(4`cC4flcI#(xz?WS^#GtMyQwE#wks3_5&yg|+ zrvgYN_k0|LRK~?D^wwAM09>rQu68Dh!wV`lJJV-F)mZ;-=)<(J)kMX)zc^sotC%=8 z#!E!D`nh13YMy-I5p9swc=b>C2Wb=3s~BWsqKSQ|V*?uA+6E{UC#TktAlVJ}w=_zu zNEpNO755Jzslpc5dp*}D#g;#T|#qjTPwed$8kjYLSc)DJ|w+IkOh z+3!u1ThPtFqTm3(YI1_8P=lu`e6gQ(0U=T#tk`pR@Cu(IR z&$G@SNIB_}?`p{NdbuxLR0wj%TeKu5QH8baNWo|JuC8<9Ev6j_wklvJnvpLsoG880 zYZ`HL>h5-Ok48?EsyLjW@txwJm}`aqE8NcbHJ{xSLv>-S5!<*whyx9zHNw}@*3$A3 ziidhr9acd8vkDAXQ8fJesZgE6%diNYCO;b`#kfK3UL+H=UyEq10UH;FT8`SJs8I0q zV-CZ&$&b-D^Wkt3U8CqrA_or_9zaz`EM8^9!~I3FbGy@M(09mP!Xv{_U~KcVK8U9D zvLyRk{*(j`Z^KfaMw%Z6nI_R-FbOz`sM?5E3Cc%^G6+9AnLL7i z%(DH7hQNC8(VwU5pPH_n|5o@*)^o|}F}c7DrAhgG$Iq3b#9OcD!s6WhCI_QL?^fMW z^NXnT0x0(}q}#4OjWZdo%Hr9P?D&Fz1s09R$y%aTqk`s@=9Qz(N3u}0V(W=Tr*vR< zkP1OtkA~i$$o{J;4m6T&;d4w2g0KhTxlxs0`)&#}ehorAMc6mF)n)iX9 zNofdC{ezIb%$b0?wC&ER;6;cj6skDu;z?$+H>VWprxyOeiKgOOz>sfkCH$lh&_(^H zvn~?S#Eo@OJ^w4hHUuew=lGR8WLTLQ_G3j_N1Qk|Z8X*60eQWi^yuRQ4cS(?=l*O; zv!LawzO@@VEdq*|FJwB4qs^9KsMIuIUb_Gd_sa406*EL9#i9Ezo&{Z5$=Ob03UiU9 zDGE3?DEi@>A!zwv`v!j?j#`ov>HOQXyf#*6kXh z$=WH}#sy*q6pC9(Z&{W^vS66z)v!HBes)_vfDL(s+VxnrwwoyFJz6Oi37i}G`xlg- z{m{7Qes9w#f)f7i?gnA>&^j{()BNt{@Z_z~6;|lqKU`e^PMGMzZnfjEFzkOH9!tV- zB)O1n>;!Z3XHDC7mTzP^7J9O2wA1~UI|u4|KajML_+r)=T&_p>DgPUOUgI|0$^Y^0=H~v8$4X**2b+ zAN`H((KXJS+9l}16(rP#Xb8WIyBg1*dQUPK4m@HOVZM>PAs4j2l@}9vXbuzi0~-Mg zliHbwuV9UfsP3t3I&o~a@|0u@b`bAoJS&zn0u#!>=vLP#LpyEyg`pnhc%msUH2HQF_%j(^Lgx& zYLaY#7}hD~_WMhK8r9Kva(Kbm%}OeUmd&61W^-~*x6xcv3skE8sb?N=s#!yTw7JLt zh~$b)DQ%cErrliLs(U3Fk{nFVJF>&Y?H5$ste9}RzRd~|9U-sLs?KA-Uik=eh@s7| zm`B|)tP(~Px6_ZCr2K)~1_2>^zvJV{9p1^V^G+k2XCNx|EG-i5c>7c1%~6muYQt?9 zy~tAzZuR;;ZA3u9E(o@nmpw^CQmQA2Ep;m9J za_LTi-dH~F4Qr3`mdS+g7iCoBjZqr`{o~wCh$|)6f*>X~03Cj-3dlVD7cw$0dkTNV zM*X2XV`4g+X8A2gyU5`uomHr{fKKXl(UiwBm9Qg>+NghXXQaQe+yFv@Z_Bpu5&q=f z^PaZ$HeTR?d1qrH!WP**ZeuU#^>rwF!tXN}LTf<|v5>Qj9J+)gA^$#v9lkbhzV|yM zd=Pm+?BB$nBd9~CpPpzKglP$3NO?1!>BSp+59i~t0=~IhDty8|FFA5mT=Q^m*>7+P zy*iVB%yAUh8v5SCSQkZgq3z=_PopRAN*SS>4iWCcgS4cN)Ik&nIgkv>6*+bm+DAI| zg%Ca_VUmpvHU~TRpCMAefMJm~=m%VbH{Bx4!k_smR95vdw zYVN3pPlQQL7^ZH^-7Dm99~Kq*C=(^X=>9nb$?{%Z!YXB>&L{kW&`yAl2d{7ikkyf1 z=p&%lQbTqb4?zmc6AZ49dPswk8Z}w(1K4O%?P4jFKw0#27n9Bp-)%d>iO0&Wi5O^X zvvT``P+RS^e$Gl=TtoZtC%0NSO9N0E*7#-f8i$MCKSEUFQ@bd;iX$tfxUrD$J!yBi zxe(dL*JadEIA{q?F4UNd)A7;DOa6=g@p~F?abW@F&B6(KL=&Gxx2M|Xs~`;P$4;@6 z;T!8tO9(Um(0JjN}s8$<&5Y=i*wf9qjb~=TwFR-Vh>+Lh=U$_>&*@KS5>a z-4t@J8#yQLs)HgKzA&R9JEF^H%&^gjl-D*79g3^61^rns5=$%=sGFP3;$Z~^D=&}$ z;(&%5ty;igA~nO3j|in{ROiO2ZmT?wF|^M*wJT%saoPXyvxB)P^hFj-3P^?=_86MI zqeT3~P;E}u7m-xyalnHR+&Q~E`}C2D4uLx9K4>3JM~(J{)s(3@87~`*Vtfb zH#c{JXdA-H+CZtQibEnOJrkh{l6PSkEDwNId;{}?A4tE(wn_%n4V>HbPXaoSNMz%i zSGc6O-AlF^l-fXBII`(8X&!xb4&+f!7?aOS7R|$=f}ie6bdV4FeT%1Sgykbp_T4xb7D}?ng>8Nqmt}F0v5X zsbEFN_Nq=8qO=&*57}THZAGa{-mB;0d=~!HiH2)cRXYn?`3uS21KNAF6_r8Sm6iwU zP6vtb%DP;8*kl`VY5u?GOcF$qeG(wFn$@OvF_EmMib+0m>G><ATC^GTLFI6+9F1FE;!d6Du927>QOaeUC@ zcU@$}LPJwa*aI>iF|>eO-7lXli+ zC?JfIJlIkBOR%yz2j1jS&poY)Xv?~YO!?tC8R1V2o{ioc(xywev)ArhsGc1<=muW% zWB?exomLZ=uNakv^5T(BAV68RH^e2^h!)pCZ7C(?U!V5JFkz}rsN&j>HSexyDukSB zXCMpw)3r<>Hqm)zwAHe+KhO06<~{W$ZN+eYeQMrDV!G$-7lOvbe1&5kTY(tz1&BdO z9Xi&J+$8eIjPE*4u2$&Hn5=4B=OWNYuJ+TL@q&S^IjHTOUXw*k`5l3Eo}A|(w^MuO z(Th9}+-1>L^M<3umL{ABxpFKr_uT9Ex^ur+&crmqe++*^zR4B%%sZBa#6Rz-Y)&8q} z#e@85SN17YGO5JUr+IRJw?Zz~1e;`3zl_qKSG1JpsDz`=^_uY>NMGr=_28m3v3-Fo z7`Cr8-aB0TIbAt(?;(oh;948!v|6DRehPDWThheABy?l7R?684S`a52=;GWAC#k+a zbV2n(DqPFP+7!dsd}lDE(yza>(6?(VG9`6=B~m{ZTA!cgr}(#XctNVmPD+v7>6dy< zJRFG{#CR|3Ku`XAZK1z=x{bqX(Xnsy4pn)q5e9o3KWjIm`J<6fq%oa#mJT}fEu0%& zW_5lZbe5#b2b*fv=#ER8OvMAkPl7ysB3^!Yyk1m$Ww}Y>tqjj?$RtdsRIohup+y|u zKWB37Q!X6sehq%QV$#>9+D|sBU%9ikRSa1%v#?z~K!8wQgy6kD?N$s;%0@Di$ivXnsTcTjCGQFh-aRUkmcJRy1g?%ulkB|OYo|b+acAA$ z#5wc%4yzBLzTCIR4;70)5JY}E=bSik-2Y_CamCBjqW)ZGL#OCZ5P{s=9tipsoH(Im z<6TH1kaVSuQ;t8iM#|Xw_2CZba*n6Ii=_wnG|%6i_fAhASDJA4S9yYa`vNfR3x8lF zv7Qbfh~>j;Q{0`Q2@CfgH|0kT(mbRWeLqzmsT2e!x<^2XCu^x7>D4aODhijd-nJE* zBANIhVj=80>Wv!D!vpz=~mfDAZYZpe~W2-5ED?W6L(#+Kze) zU-6U25)K+@kJjCqULDF0%KJEjo(t>&CtYe# zsa=-ByHn$1E;rzO+*k&Q!#d?G*S}3>9ElT@!`I#x@}^nv3(K*+6(11H&HGWt54Wx} zzIe6(V~HEK(tP>tggZ7_i(MADTV7Gs&kzLuX>;+KNRo6bfQJt2jBrY5fYIxj#p777 z^y}7G{0_*P<$!ZSi?asweqD)zGZ8D7obk55!XdC!w;R7b!SD<1#1_|t^g2fmN!5F9 zK_p4o92fr%38VUQ_??$y=%ccSA!SBmfk5? znk}wFGNV8ue5T)y_dB;Uu0>4E_Ry12T+x@k;o>m5b?(!nbK1RCLN6z|ljg0nU0>cD zea<-fVaNBoGAubqH9HUBnqG&nk^VN5byOxAvCQPEy>|U(aZ-ZlBn5}rv&1=7e5+}* zcdZj+o-khWJ}w45?RAD5xPrkR(ThfJxOpg7>f%=0CT%20XQ2H+)vKEE`>(ASZlqwZ zcY^Fv^!Jgnnj~7YEYE=Eo$q3EKc`={3R~df`}12lxU>6#b z?e?dsZpp`cy~>aH!`f$j7vgi}E~Ohh7BgG_InCI+TI}0w9J?2cPwONNbJpP_PlW+3 zqjUKS+eUc;J}#f15VKLNCxQkU$6HLo0zQMe=#$T#6Js)P`8}r9yztY%Q4*hag#~`Q z)x*1#jFM5($&hu({e#i9|A)P|{>!TSx`pAQq(P8IKw6}`L6HWrQ0eaOl9W(VP(VTu zkPwh=2|)?zmJ*N_DQOWnbNjuY=Q-~=|G@ix-uoB*2$%cXd+oL6nsdxC#)`K0Dydse z!M;}I7s>c|4*VwQ;E?Jnu}%Wvyu@~VuMZ;aBMVjhF+4ZXv3?u0FxGEHpFT0T5Er~T zq{bVKkra0jBd_TXn3BP@GAYN;)_~p;!NK?bbfgzMNR@<`ZW&%izg!|czVEeBm%epR zE+~j}o6_BA`g8<^iZW6rvQF_p&NUBmd$1Ul?lz=?f92C{FwXe3c;3|}*Y(8^`LAG&sjo>^|LcpJtj{p`%g@o``R==%4yYXUS z7Y;R7;0+_L*=vSyB;y)8?x)%JHW1Z|ya>OOPr^U08&ywO;KO{g`pHt$ipPbCx)f}q z7D0sB!}Y$_MBOx;tN_K0!9KU6Gy8gXY@w7ZWo_QLG)b4g|HB_ozAsqJ5TnM z(?j9|ID5CkiP3aC&>DPK_holh_3cF-*!}s2)dfT0$ZpE*Lq=vX?CY1F$P>aUHb*^z-r+^?wDWc3HA45X05!QZ-gZ-2yD@DRk@w@9_8 zQ1aF@e|cZkeBdhW`VLKShixHU@%ua!-XbYqL|^uzlm?fa{W@n}+Qq4}J8f0asO)g_ z+K`dM*ATX<-btPP!#40dy81hTy#0pj@a1>TpQQ|>4N^}oxNneaWk$ftBIRDO*9n4S zcSWZmdO8Ot_RE}Hv-tH84(v8{nyBQO3Aw~9h%1}|s@MR4#&}zSZbiBYbYZ@Jsogyz z>n8I|Oyrt4Kfdm|LWbcCn{0H2n~DZwDIAd8&PGo_t2ud1aWxe>XKj!O(IaQFvOR43 zUbqbiO&Fv)To%(0t=@#T!q-ze)GZ}?+4{XQPvmTpn;jHFT`tk6;L;<-g1aq+yEPV= z9l%3H4JaiLNnF;1bD537AUnqUZI_@%gLR6(kgM^jm25qn&C;>|4G08nmBX5}wo}$$ z%7#?Agm-^5IACE(8Q=$tH=yu11~5@X)kBz)AMdY927RW`y|$fskSMRjQ@@(^kPD%g zgu{NpMHJq0ovK%v3_F=)4m7 z>ELq?>-ze2!DvW1rE=dg3(p`fuOQ@|;)CraoV#XPps41XPmokw;xAmU4~dQ$4WHpb zd|=FrYx?yT4Dd*Deu;s#n5jK4bMBz{7AQstn@?u8TH#z`=6GUU1 zavwR#Zyv}Au_a_ivBF~yjnkalpAx48MF|Zl8-F5IB0$cpI|2k4qgOx%QuHG#2{$XH zlXXadS*vZ?_>cp7O~aMSZAytKuzp?e`mlW46!)vMa`GO}r?6Vbkr`ZSb-H=6kAUp54^ty+zdSFWu8-)~+*jykV z*rSF+<&tOI?J18ZbcfOR1I44w>L~}>ZxG>2TTuAq-l@A+GXqg?FO}*PRW^Jh+)u- z)Jfw(Xv)B)QaGe8#&-}!pHv)f8ey!TIVhS6LGyfgAuIJU$|8 z)7rjJS#fvW~hgKIu7VItL+Q{e_WVr_wBjm>??%_?HeO!#9+J3 zgfGP4*T3hBuSJ&r?U%zAxBL2+t1UOLbkxCi1~bGLuCW=Nb|Z+xNTU+xdISDLvYAca zuN$z}D?)B6dS}?Yswxnm{|wauLpzme)GzxFx_UH0{2g?H1ftVU9~tX~8m0-M5H^5O zIYjl>Bq{}_tO~Vo^lLY~H78T%B_EdLHJ|dYc>rI_aRnl(LbtaXmt3!&Phmsl1qUxq zOtHS=r#zqEdoaTWlmvs^h`G11M3QOIcQCN@b=-VbHOAF8tJ|Nv_7qAlB`85-<9A;e z%z;V%vtDskVkWsM{DIAkg_$f^cpL*&@MPmMP4DE$8S|;Y^lMZra|2$_ls{!d5d4}E z#u8h;KKX!xF+xO8$#=`NW>Xfv^fE3oQ)S?{kb2yAfxqJ!&{!w^f>#k-S3&n@?%yGS zb-np2b?fRYH2F~BLDs1?))_t9EVcTD89R79m@4|oJ)$pew}=dyPp>acTL1;=!#^_( z$w((V9$bn*Ar6F)*TvD`RW_n^h<7ynXNudxm%e}OcRz?@8)}fSK}A{(Laa%Hz=gE` z``L~_6iJXJO`*ypN7B3FaQpw5Ne{_3g0v`tZY$A`rmU9#w5z!bSuHC}WGNA~0(-!>zS$D|XLUkmO%gsR?jUcghdh-mmaV^f^T5iW82xs_$mo(aG7 z=9IVY41_YROi^S?4emC+B~i9r9Gc7eyL7Y}rTc~#nkq>gYLR=6h6fatYbru}g~!oG zytVdyaNrEZ90f-$bOHwO_EdC?`ySl250lZo0eq=h9ikwiWWETVjx(MU?c5G;c&?eY z4dl6gKv*A7*zv|Xb!7k50j4zOO*e|Z5QE8#Z0#VnIP!1?}u8jnNE2YxcdfI|J<;QszW4x1^@ z)#X3`eB%GU=)V`j`*)1~ok#yJk$=Mo1joN&^8X0C|3=9FS4PNuYK2(7)(g(!Kt^&M zToP|a*|TdE-1{y<2sy8PjlDzMKii@kH)13BM2BClm(d6M8PRGdP^UE*yeqk?g$jZb z$v7R%eViW!Yj}{UU5h)@%At&9&2<{S1Qis=Hdq5bfts)}K@tA*_S{G3b|}eehiUz; z2bJUI$nU{X*Y~P5Jbl%Wyz+bcRxVd&3m*z;Nfv)&YIy-@ zCE56qotz_8jz&S3Out&%ux~>+f@j6wvk=nq>J|h;FiiHCw3h7E5{)phDR|oo${dpH zEzPZ8xLB$zh!0Ljaq;NocM1d!HY%853}>>k_-;96NTy&;=>@EjHV|iBf5YfgTs8Jg zniLBBU*#utr>vBj+~ifR*n_OHB&y}d);KzldjE71g^abHycT*4ZJ@Maun}G>(j}7~ zduWPx`uOD;N3=QpEgG!oyU-jTnc66;dUat3M1+=k#5hpMeA`U(39^0S&k^sWW zzWn4DHAH84oZQT{`#q~^slIc5kA7~#*K&9)E7C5xwBl-{>>m4Q} z*iG9YnCMVldYAM)pn)LE5+Lf*v)}9Cv!|P$fVcSP@B3%mh3cX+>GiUv<4LqTh8l0& zw{juKtKL9f^_H?03bD4Liar{KD!zJxEblgW+w|ah*yz;B6h7k9+|N01fLc_87q&Cy zFF7NJMWQ^Vkas+IAISl~a3JDJtlwChfP=YYHd3)P{gzW@N>sgz6m*# zheY8P-$%@idT)m$RNIAzWYPrT=KyxZzmtwad7{zyN_A;jt%;?~)&IJ#b3 zV{imT-8xSXCApOnx^32!b4k6%KIRbPWXh_~br~4I4IVkkRY2=mf5KP@D_vEuk$pg- zp&bm6>y_L?M$_<(?7K4OiSp24khy@sAJihF_V+TGqwh*|#<3uwZ2}X)SL71wk+y5M zM*k&+iYFgKaKZt1mO&1sF^=S}zOYQPF*Hg!rHm*~aG}5Zh}QmjYL?vNAU8A3kk#Lc zO_0cLaBbH#pK|MY&4-q;rO0d7U#$zVq7XoEHCl&SCOyv?cVl=YkpQ;`Y;Q_T=kmeW{QxUzqaS6Po%H9#J+sy&dQ0uW z^(H8DHrSkc_&Yw0QABBp4Jwc=O@6rED_*XqXJ0k?-*9bdk3jbm5aqC(ln>)ohPD9m zMXE?^FC-dY7dk2(t zR8k1|Z(P!NaJ90!5ijeQB@7#(f|+KP0c;%(oUFIuY5YOJd-RmDI;w~!eGO-EhElWz7k01dLE+5GHWD?n;FKE_mg`%44;lgy8dk}8Ya0fcWkq0nbrhg z>OR5*J#6>agID)Fke;s?;IeH1sg4(pt|u_22w!=04nQ0!Lc-vKUv=nMw=-G_QAMJP z8RbYc`1qGz+^uf?Q6GaRq~4OU8g{#$5pXJ05c_MdE6@&+ng^eesK-a0p9Uj_Ealzm zUEIE0dE5ynN7A0{<;h|>L>ofSk%;;=>bS-l%K%!X0746X!f(yhS9zcwBtya&aRx#2 zw^0`l*fWx^$^vxsH~zc{rNB#27f@0E{Nd9@H6c5!-%=5p<8Fn;)p&@SK9f{*AwGeq zKPJ2&CLs0p9DE@@+>NW?`;_$@xePofoOZ_gTTk_&P~R&<3wg^=HRl*`GF1v78rIN+ zu+QGSKYmsjjpwv;TO8AkR>Z@A)`b^PTjLNykqbGb6&wS0CtV#Y_)VqiP;{ zRZ&%sD>EPl#|}UGQ`i!SH-J75DKO_GQH>Ot8-g05t077a zmL@$wSZW^cF0Khg$ITjk`3O*LjGCS<-p8LWJ^1u|o=9Rfa~qm0Z!3--1fi2=QlwPh z)9ccKJ-NjyJKDlErhLP#=0OkzMdn2hq;Y~S_>`MUlq zFgUf6mCmUJeI9lnJm}B4i!H|8KzaLc;}JYTli!1(pw3OPc^7o#C3`mr{Zb~|Z-`;^ z1Mb^xNNTc)i2lQq-#I|{fVk3uF^mO6DM0u$-8Q2H77e5xt*~$yz~E`W6#7k{_@`(~i7T=Us}iEsLiazpJg;@g=YQ6mbp3Zhh_pCM+pVZ!ODD`HxB z3u1K8<2Hu1fe*=mi@M(6)O7z%?Q-kjRkybVr_4@l$+=2P;wx!nwiwipmWyk?sao!q zpdMdsDQ0w0g>lIS)Q0-7eyRu>eFHteQf%Kp)!>E>%X%s7yEsFGuo%jAHk|_;<%h8b zIa{EAOCMr==LKJV52SG-+6 zs^982Blx*JSES(_yMCAKF_;NHfu@Lf5j5|NfvXW&pe{0HSAkye<<@wAb1|pf<}cXa zCE1MGYgp_HtRiKR?peUuZU)lcvG#H~7<=JTJavB#W^ZhjSp^i1iXJ)nkNAItcYH_6pv{lej3JFO#K$QnXsva6jBn}>h) zG(23nn`w?&%dI;6Rw~^YX@DnA@H_xaTj$msL{TEhg{_lKo^gJk^Me`vn66?TdhNFL zXf!j41X;sv!A!4U%Kk)pf8lU2ywEgk%Vp033f?B-!ICj&+<$r#X1~~@Li~-a;gmX{ za_}zm=M5J^1B9Hh*~U|T4_eokt$vvy85#|zoM1V$R%Lr6w>~&$l5e8!;-78>oJ-OH zDr%hdl+9+bL)h&RH@!mM#p(E2+mgms*eA))ekC=3vY@x-zEU-kBVER!bt`b0Rv(LG zQ6P_+)KmHzUS&4gDXNoDzr=~CsQ$-0{*r)3Z$E>-`1v+`efx+T5>CHb=cSwMB7I-c zzo`_ESqS%@+8zv1$h$pLXRL%S`6u-SZ z$GJx2GN6^eZ*brZX+t0b7Z3S*I?i5{p=q?R4rHVdAgLY(fxJdEhNaW0g`ndxytdDY zm|NgO@nNB1H7k2e-CmF(f{!&|qlWk>Lbp_H{Vj(?4$PqEx6fQe| zBIO^@Tsd?+#=MYai$y%$YGCooycN=>dotuTwQeZaK5*qB$Ar~+0x~{8L8D_*4o}KjDkXPCpmvV1+x7ZRHa%GQC2|&U$MUArhSb{ zP&?j${5CMwiD>Og)nzctAcw8+~TK+3F|F{^^kFB?U`yl{JO;);4*L z2z#sT0CH;SGuCq<`vQB}^h>t0);3v%_(2#oX}O?C{gLzsidHVUs~ymMHBa8imaZ(= z2xHoj#pBpE{M-5?3cH?xd-duu5!20|NL^B4`v)d(tcDfRSFIM}vL{L3)Nq&p)4wnL zRu&xzk#QqlEC+1Gcs0MMUD8N@UfbYo!6KJ;)yWUC5MtTXc}UKYfM)%(b1-GYNDRpg z&?CMq&Q$|{?QsyqN+vqkp@TF7KdJ2HF)FIsrs1Zk{A{7Z-!M3%6t6rdIk&bN(m zJw?hUKURD>IXUESp;*I4b?NyO>2gM2(8To$5+TcycxtbutVkX_DaYHHb|?aexC2tg zWMpUVR-8M4Z&M6x_!UK=Y^1*XpnX*gJWm!r3wv%9M6)?uNw(N77kHUSebCVCiAOk3 z9Twaw9!%bvE;BwCc=3`+;CD&T*ay{m|WXn zmBY@oV7U2mF9RVJFk)TGepaPAuM_~&&GzM}yYwY0KJ;>PggnOYluNjaq|}m^63&vS zEMa?$_B5+F4aK%IPdy4D%=Cxg;;IQoUwXH zfOXB?^>1PQ)G zafm001`{Jjyde@Q_QZB}ATheE-RCK5;_3^9w}qnw2yDUy^#Z*yX!~b)nl^z35X%1%~s81wE}gB^361DnjLZ>v`j(;Ori?(_$o zx%TYI|CX{Tzc)V^($lK~YQH}(jWkGfxBi$y(1GqZXK zRHW}AVdvF`KjvJo!}TGlttLTKEhdD51=^#ceB+C}8O( z{B#dlWt$)^!cOlRLN}ngz8i#dIpS%biiZq%)qYJ#JkaRAR-x|aw6vD*qfv>Wf9XGu zP*1|h!InOa*y7;`sbX0C61uA#-JQvn^CcZnh3_G^QQWSy;w<;>hAg^X`_C$<^0mrO(GcE3!ICn4~{_5w>O&$YssW}$BQ9^%;yRpmU?Ou7U` z0f!Bt)l?(CU|el3n|p$_y$I@aT-Om#rzVCN%<| zM;$+Al+!#H=yjwCowc|~ptV*76TIE}YGV(_@NcEo?c!$KrD;^Ka*BW4P6*jchMS7i zCMnI+XK(I)r_i+{&DUo;`uTp_RLSWKh`a464*o}-{1ctX=`EjxB*7cV-EQ{t1=I7x zfOE>#`|38Ye_YpTL8!ELaNI$e;Acn^C@>}>dFnZ;iC0>K&F@@rsMBb7wG*A%g}pTt z)(Jn7^v^$N{(WC;H8bSrx|j1QytWPoX9<=aDRYE~)jfYb%`}}n z?L!8>_HXOB&G0nW;|f zhh{J1rP>oOKoa#eWWJn(3G87I7Nm38eK@!tRx@yiyPIjy`9>`0#08 z&Be^)L%Yi(&jXEWmd00HDsu)ZP1{$yoN`~XwDdJiTT856(pTZRceN|Yn3X!WZg^dV zkEDiK#bkD^>YL1SmM^hWaZwMyPTdrqIY}6l&bEGisoU3o>5;ZzZ<})`{}=vOc}_ps zANgKQW1eU|RJmiScMJq^$=8~w5!ImwS1zw~g}Qg_u3>fvbhuyqntSVWbJtB9y*!q@ zQ@(!os)?5fIo&^;h5AXDckx(2JpMs#T29xayrcvny_btuGULtYf9&ZnzkUcH0aq86 zBQwS5y>5=b`Wu^f(RT4K=hGHN5yR~w7L2#|AgdB)vGT5xlH=QRqQD zUg44Nmo0mj=aVeD^EP{VfxY7Kf>|@F7roj){a|^pVw1c~GNLyFJKD~3i?cZjvZ@;} z_FjH`??|`I{Z%DaJAhkCNfrDXV1yt0d?V93Y|_M4pC+z1Y_UXadOa)jw3*1~1SD_R zMkHN1_~h#~7Tt>bTVsMX6$wVN4rIdJUyWK1#;%S$yvubfTD+#Y5dTJ_q0>h!<8JPn zD9(tY`}q{V&Z95R@!Yw{fs>+ahA(bZGv&~*4M0-+HJJ~J8dM#^D`fPsh4>u;!abum zOvfc1*?@iQ1R2NY+`DPANd`XFbYky(>i%TtPO1!$raHuV6xM4M_IeVpP|R{xEf@I5 zoe3KvJukrWtVSXyeT>X>87wJkykOL8FwL=&((zInj}D3~eTN zK-{Ma$7J0yqq#Sn7Mz(#H)@TRZa%LTuKN*YES>ptqx!t;YcAdE?+V}J!5PrrcF*E@ z!ly?5wB#;9WRI1Wnsy3Y_m-|!jf2m;AlV%o>X;fI%y8|#2 zuJ1ANc7a}qUlqslN^Pii}g!=^1pExxhqfe1O?^ADPy9VbP7uyy66kPk{5kL zBTi$-90Y$XGbIv@?Otw|l*ilY3X)q06;x#Y*Z0cA%6@ltxr%ntZ(htY2*A_AOc+m{ z{A9}pFGxUx!MmvZ%hj@3IT|raaiw})217?fAcY*v)VSB!0sS*dSk0{ms_s*hjcfA4 zRTcZHF1{>HpNQ!U1uAi~H9Q7eiTTh4GDJbI)FEPmRO))_66MV2DpRy%a&Gs1hP>lF z>@Wx26pAh>$ij!ps?$;geYD28Fh$5Eq%U3=ph%Z@wW?6%_NO@-Alig%C4!P^KXGXn zZ-i`)Duogno>nyS>>rm`%b)>lQ#MMbA8nBG$yzYN_H&B({ zCP6&rLLzSbajHzM>y7#8c?Y~xj0-Q0qt!wz&L|23IrA!YXVT?X99uWP!TI0pau)dh zxOy?&g<8(NqtVs-gV?f9cbJ-x9lmh=S@fV+vZ>qz7KF z%#oB0HtbaUDIb;UrOvMctT1UQU_qkb`o`lg1f(fZkS&sQ__o8o*fngtPOa5m6hf_>iH3mEsQB^#%ttd(u89r&?+*-6xr(S3UB(; z7M7xC)eEJ-W%7HfBrJKvE*zG5)LpTy^u#HwF7xfq%|G51dZ_QxU1Hi#`T0_5gnL5` zW=hkD{TuVUyWPa*KB0DGJPFOYVP3?&W}VVV!?1Oim256XV~A7awP5AwN^RDu>(Fhn*Mim?uEnm{SMPkk zeWFc(UNVNCJ^RX0yCX_X{)BSi+SBFR z<3QAbel1?!^8E&{ddPUiY6aaAu6i zJ!RLBvAGnCj<=p(<8)h2kf=IHr%@*qEri$Af#D9<9WF_nEwwwhKGy(;y|s#Bc!#d$ zJ*TvF^Gq2DyDFEQ2aUk!y$nyGez;T{^nEB_k4!s%aDMNsIeIls^kyG*;>VjJtSYx2 z)MT_=RJ%rs@dxZ|Bt(WI9P@vNGp4%6ZQB*y=F_3gZw?pA#Q@vo7pPuVt#jR?y0PJ^wHxUE zBUB?#blwZ`ZA8X08?mLLVesYDygsHwx!b%&P^_30V(5Z8fn!~VI!z^phV6t;gMpvS zl&bF?>^J%MkDS?u(H;q}w%ifOo8b3-$ArCV zcHIzb@)5L8(hnW_?6b9>@qTXN@v9rMej*Z6 zxuA|=of16I6~RzmsV^L0eb}`1q}oSM*r7e_l;R z@6M^ z4lD=1vOZa}tobYPk@r%}NG}Y(e|gUx^Sux)WIe|cjj57GdT}m0>Lkk8m#d3wLg$_v zcRmuikyN2!%|N`_a{C=d&o_UI2U{zW_~Aap2b>%I!;G(*^(ybKmZn)~cll#es_0G6 z^|)~A&F$*_o~sgixLhLdtUX|j<--Hv##M550s|eT>TrDfsBCfUitye2$&MW<48iTW z1LkbCw;L6Ovak0PrDz^$8+b2?wGenK6>u^~to=w7$~>|%1@ zWbXA=@ZLwp`}!KY60t&7?=N9iIE;L)nAvM?fqK0~?@wu;9wwc~SK}QA%o3Pih8`J1 zd)MgotG|gH1N0)K35(1*k%u!}jz&(NTst6yE7Z*kC$JM^czBzW3$-8c<_zf<{N(&oX1sRMYp3Y>1u$~ zZ173`3^@XCzV zgSFsVvWkQZ_EXg<$vllR)zGbWz@AHx_P@p|>i3<%O1;I#$AIUWu%Ubk6S88yts%#Jq`04(uPn4D3x_z}bu$&_+w=l!EZW-#+*~?ndcBaFZu(d4oI=^b@{tcm+#j)f@zd!uO(*aytc3xr)Gtdw zK42e~qg`0uC$TQ%R+YeTtojR}4b-ISovCT-36(%S75_>uNx=U3N{Rv585GA&mVABd zSD$vgPMK;vTT!cj<~H34^IG5t7;!+#Om|zwWrb+hO<+JZv?GkxfoitjXYZzpz@<-1 z?84@c%Z||Xmw|wxrxN@@Z$0kt&zGuFwiHh5J0ypW7nMcaisR+)TvbWP`U1e0wu%db zr;dk!j^&@}MNRb%Tb0e-BR!Y0i5)sJm1ujv*XD0oaeEeAyYd6p?za6g_l=hndFj>= z|D2RdrPq}eH$`%J;p?)Wx&*0*)Oh|=z=kTyKVs7g**I%dq+MxdxY<{dJ0bT#0jLJR zrrS0TcU-ka$L26LlMj=4|1$NJ4znUz(R&qF9 zTmp3fx<+E3!5f3DvLXp*MjGjyL|wljPXgcQX^-Aga8~>g0v=JL@0bj2%89(~P)b zB1n83Kdxh2j8Mk5XCGR9{}?E*l6wBPaU2w~+M|}&@S380PD@aq{z;x^7{W$;%`Hc^amy37s4_Sd$4a>wX!=TOCC6^z!y^zR+q|ct%y^-b?k=9x z2Mzy}Ewv1F+ZPQIjt`S6<1WX0cafzV(Ctn_FM`?h~*wEyQW$OyDt z+wGUY)}nDU?4p8YK2y%ev!Y-}xebvd^ z)p;v~y^tC6#{^IDW&aqS|F_aSjdfg>%BJaATwkI}i*ri@SDs()%SfD|eSA zt)to&V1l6{!YabAe9w@s$5`Cx`t&EO2+$82ZZ3&uqcAw-B$Uey^jx}FouBl7SiiHE zW?VW-dJ zjD!RF-emfu%59h0JZkFGgqzL>#b>;D^f~ohe_HM`YsgH4SqDNl{qy`*#f5vkk;d(u zmGV^_0|Y3#?k+|igyDb(O+X*IstTXRRav>N*GTOp;9FEVr0P5(AJ*HK2rg)nF>bX0 z>A6d3q*^LUO95@_22GJ`ACl>owK?hte%rxjNBT1qWThHzksGDuMLZKiW6;ozi?X8# zU&;-$MK~+;nO28gXhhbRt;3FF7NP`O?Q3QzypIOij2LZ|8X5iy@Z;KT5i$`b&WHqA z#K1?J&`a7EFo#A%?J(dqd-o8v+JXu*;Hz1JMk|Q>b)^dd7@WnT@=2BCLL7#a6)#3> zB(9qBeUD%cLps(5dxM*=jB7aQ&Jol*&Ela0&%+vE0r70LI2or#k<)~4vk|h2XX0jZ zBF!2ppb4Ly5U*Ux=(miYN{^x=;Cg-VxU+BlZy>Qj-avJSYGu=6{*pFd-D}*mwM+#` zHSFNdM4-fWFj)AfMUqcGkEi(wV~W=m_cJI)YgF96qB}b=wnduoTSbX@v7F$AIMRhq z=GZ7=MgE*qfe8_>dr6?gm)YjHv--jpXq};Et6P5=m~d=rm>JcfruEMu^P#hp%-51i zm7&XY

{p0}+n#_?Ac4gfMJ2{d}S@cbm&po;IQ0nw2Ee)Xva7hE+_n;_>2}9Neeq zZkIV%AcxYv2(O20gM0k`-*y`EMCPr;>&5rMI)Z?`)VtN-ck;X&4z z`<3whlYbhD(9Grnom|b(Hy3@NoBi{-lz8fo$o}}T?r%YvuzG_E-CKBUd;-+l! zR3pJ}J|J0AJTBbj8sI|@OLBZ^8n?`rCR$wHL^!3wqYRJQ$z;^gN~he1-2Yz{9gK!H zh*}Hhk!b=W`1cQSxV$T)c;x@~qen1`MPbAL^BKHFNL2fR77@nGe_hNBf@lSeCf9%e zfFte(TpTC9;(tEF5eK@xJ{vJGqWQ1?DMths|M#Z;9jSlk6tYhKU40Og{tXELZu}cO z!OY~}ZRY=9Az|=wQ!g5tWP^%={JsCA4gY2gkSqP0Tfx@&Zx;G*zW%qk`2S2u2qJau zSpZQ*S_{Hu0&0AL@z@DCvk})!yQm=&*l=%ukd6`o(Q2Hp|Cj;m3)?opQcsX-GP&1( z@ww7pb2~7uO)`GH;y3@{Kays!^WoY3=D1*A0(LlXsM%ltvo=Cey^chLD;tEf1Xx{6 z2|%94^d*vSYVKPXH|S`B2DE~nUz5nQmEs@rL&O;T>txX+l;!j@`x@#NyY z{@&0PfHp6IhNJSYzya=874G-W=D{yJp&$WtSNAvADb?wq=kTaKuMPEfb1-HA)T)+|w6XvWlEpSKND!wt};wq9Yb70zNz`8rehM zNFU|DpY6$%E!2%9D&EV-6SfRgpB`1c%Jy5s{)S8 zNFRktIrw@s4QG8A!?zpFJsnS`zE!~&(G!Rpav+3Xgdbe*pHPB$Q^6~U=YUtRuq+_J zB%X~+P;{GXfpkvfPyyscX}oEoX|icbh{i=O=p4|O^ZwQX5sEo&A?7j-#2FBE`((tnD#or~9qJxFf;ou+c$@7x56?N7Vx(n{~n z<0U10-<#zP?}o84jlOcxQ5&s7;HEIZH7<{}o$putHRnUzpb_`oP(;P`0r)5Bm*KP) z_q?hrvG%_g(y;5KI*4Tg3Ii68WkX7Oq;`9Zfrs#pmX9Q&LUR-yM_#eo$<`O#een~( zD4Q=tT_op?$_S$>Rg@kP04Z%-m-X`k2W0p$? z!Zn6)i*<_>_*IizWNF8k@8$S`=Ow+c45jd-2COpj=q`vVQ)w+9mby_rm+%n%v+#i;zJnh5`9cD(mZ=Zt`^`YpfRhx z>UgW47Ta?E4_m1PbsE>&ar{{$7^{AFowioRxp}4ZRzKxg?n;lkVDHGnZp{F;WChIb z|8fv_=auWF{J4W?b&-I)4f+5(f+1<%90BL>c)M-kIvR*;+zBH{)6L8WBGvJ|dKrrd1F znbZQFoOG8#uzoZL5u7QIEq^fvXCq!*Nz4Et4aB%wK$~MDiK*7t=bKX$+2ZceXo>&m zlIGiHlxOykE9<*u3JbA8iAPxmmhGLGjMssN&+cjhLfBnt^LFRLa4h$a?jmt(l zWdMyw#MA@`{TiXcn-r5(eHvjQquT`BnO`f-zIvjS{I2@oN|1|S?Xb*?jiuZA`Gah= zY!6i#5Pb^4`&XXe_3(_I0dVE{mXNv0>haNM#U}y5@_l4|)P3|kV&EayTPu&*jRjhn6FOAw5qC}9EkfzcFLdyL{Ra=I`9;nMBUQ_A*$K_)r`Z!rL&zBzlDIL zF*w58NL0u~de9?3*Xc4HSC36-v(a%07YEP3Sr6cUdV3fZdikX z_a@`p?tP?{5eNRe5+(O=R~9(yTi&}Pw~(h8vvj2jv`Y>Io5<4 zS4tW->zzOk*3KBMDKU~lDzqD-z@1_~hh@d9p}|Hx`E=;#0w`{yVi7H(2x+EDFNnAp z9E}fILb_TU&25X&?VStO7Srm$ZWiR(7ND4H$z-X7@Th6%`NE#CAV^?e>Y35*VB0hZz@xY_!Db|x=cXn0St3Xk}-Za+P z&wWykm>`tGt5h-!=AA5EP#3{9$mW*p#V5#2-Bq4Hv4e&$AqWzc5G(4|P)Y-;T~kMx zN@X*9;CD{D^~w49Ux{x$V)&?;-)pd1FC2KR=y44+T=U!1CfZaLLD5``Afxy_n-8ioWdHXDp|NxnTTOSh$puPwwoh58O<3 z7qI^cG}9nA*Tk`li~$Eoo@4uV?W-h6jl>2no=VqVSn`dp^!>X)|K=aBM0{WC_G9)g zTC-(d;X-46&Hoj>M)+91tTG&JBY*8l=NT&(Cc20xO=p=J_f?FxhxL?8+vr)#(PiT4qI@RLIo%FD z^rwnsRxVXNvAWI>1bzX6)vU-ta2N)dgE1_RnwXz#slFm^7S4T zL00nxF%_{mIY-1%JqzlIXulB0r`<9Ub0i(A;C6TQxq>9SpLMju%+Gf-&goLwNYoP) zW0|_TALctx!@FF#i8VpndGE%QOCJ-jShtsg+cyiFH(j5uvm5Y~=cgG3roloh-0hLc z21g(W4h{+W8qw7BCY@9*+mqArXNbtkRjQ_ot)7~YJW~b5+1SbB47qxTyt|c}hOQy@ z1m_K7p^rC+#93Z56WH8{dxB$cWM6(ej5+g2MBw6g;1SR$spb?*y>eQ$ZT*~tJQZS_ z{{1BFo_o&d6(w%NMK`}Io~6taRYx%!d86zP^&A`bTak^!2|{-m*)1u45sdFKijtJr z#c%3{5jG`NZJ4BpkbN2Wtp;xC{?ZtBPKwF(o4pJhYtWV{bfsQ^4{Lqsp>PnfxHT^7 zLSQWxqGzqyrl|XY!73*h3&jf;a`5tw$3AR%w7X9NpqqwKrTX|+ogMKjk*_i-l4Xss zLtFM-nr3HMy*{NANE9v5m;+S-0r8f_Ek*W=cFjM~XRPB;82hfw|=3|jG2dQOQm%ImcbjoXx!kgyZlh7o!yJMeKt3Q8$WN*ITqk7Pr{uvT!w+yTP1*ToH6qwOc+7)BmfhuMUXn z+uEk38yUJA3F#1!9!fw;x=UIRkd}ra92)5sknWNWK|m0sTj@rS5Z^ub{oZ@;@8i#z z!y4NQP@< z*l@YC*rJ*Zl~@OkFO-6C7~yCyQi`sXSw?aVp>Hbn;Z1K-wv=itatF@%+d&U1Ro$2( zZ}qGSOW~~UYtY=+vI32tH>}rz2j|0a$BDL~5kHPZS>eb}I7bxZ{H3n7s@%qyKQxG^ zzC$xjVOFPLqapy@ z!!)N^(1|v_bIY?`pp1;6F&4vK2*Z{CVIkRsW7s3Valfqd90Eaw=(fx#15oAAJ z!QY{AA!H%G{BkLN#p*DwC5GscyLtupZi8R&mbI!Y9YxflN z;9p54o_v4LM%me6xZfL2-18SiV%qyZ;>d)TYB z{G-Qs4bez;p~~Z6RThUME$ETIXRFC_r@_`$;qdj$^7ailE-PH#9)_!vZvi9A0RRqj=mq*u?bbz(%c}r zJneIIiP?n zmM&}Gwa6z9D8z`?)VZVp;JV|EJlbxdM%fW8o&y*^n&vp202ub5W;-CWQKaVJ{-e1- zjbM1FMYYk!AxCfY8Ro*7-$vuDc4%6!@;!wWJ-=M9rQqOEvz zytK36k>&$T@x{CYpKvYZ^hg)NrfK0*CNY8-&h*`uH1R+sCEmZkXtID=Xuek6hw|bp zYo-?4^xvV3n^U?7dE55+z@|e$;CDFm?08;@NQUHY`aVO@`j3e{`7I!&LgV4vrvaT? z#(7is(s&J6T_ZL12okqf!SwGv9S=}ZthEUr7cdU}0RA*2?aSp?>5$k%1aL0>vL#N^ z&VKsKfU%dwEd*WQiB!9U9Rxk`Pp<-!*g?`wPMhMcUE_jS(h@X!^}E0K`C5C`Cmt1T zfxiIDx{3`b)|p`zt~%kt7S5(F*W1Bb;))FDX7-w}>< zJVV*`22S(pPdK9Jz5DtqUaQ^hE*K(h`rK94#4< zU~mFZpLO8s<5yJHsoW7U58L=Uh@Et!TG3}UIL8%cd?kWw^V+Md4%toC9s*X9sHe|v zB*pBpHj*xfLt?`k))W&TCx&-SL2qaHfVY)N(~aS=TrT|{CkfCCzU)S^pN%{ZhBsWr z1-<+ux29@K>d1}NLSdM79un`k^l^;sn>uPQplpPE!V#@zNBf@jFENG|c8X{Z z09o2^shB3;#@+so;7PzF6m#8S9$9ANVaCck&k`{HU3X8t4YD$3Swn|D<26CQS6I1l zS2t}&dQA5X0Gw?>JMZ=(-MH2>de`?~7@Zf&rC)-y!$;;u@|2>;H@{T>Tn34cusjH| z0a>ntGlA*LTxe3LC>>3ij~P6uc{2d|$b*IAZ5!M=yNnt6slOD~I44xyV@OhsIYiF? z7(sI|cHSVMrbw`Fe+7+fXU2@q{xH>)=lMFZk4?7#Qi3z5D*#kM5{w#Czq-};XG0y)w7TM57H7l0+CtK#F|B=#W# z`iz>?mXT7LZS1Z&3zs_U!)@=r(aIO~2arLI4f~mcAY#(K4ZToq3;vGk4Dn4?-yhbt z7cfy<&-vPfITpVu3Vg=XQ?Na-12XQ(L&^9LMIF@(?<@CU??scBR5V6(2PPJ%kT?>Ri08u(d)#VD z%@pG6%|R%aqR=auSVmq>X+pb=0KI*7&uzdvmfSwA&C~2ATIsp&)Dd3=621pkWj~hW zmf45aa4%6zAgW)0R_s~(*#3~V;M?Y;8iG}`OA`?8c6jb59K!B;rsGSo6Y8yCHE!hC zftp^#)l#Ep+!N0u+Oj>J-r2ssy#j%*V5XF0q7U@6q(tFlErCvYMV5(GEq(HP<`gU;)-7pd~S!yblS^_E9e6=vnyLX%JGbNJYPwu#W95wkXI?} zKl%i!=Y_#37dQ%3j3`H6<`s3wV`7FJfGLSjUXe=`ebHNtq}ZM`QjqvNQjCGJvHzU9 zp4voJQp81cp4Ace+fCp_y$Z%-)vbPyAtD&@#uw<>`8wyxjb$QHsmkxStL#NJ7JV2J zkJdR({zR}qrCS~{P#V8?!`mKcGSI)Y%B%C72u!_@p!rO6#B>Zqn$7b?VP6HY+$a09 zOBie=;#X=q5@-qIpBg9%Sy*D)`54%V3Xk~U?tax$HY*t_{g@+NByI*L!PT$0YBc-rBMDzrEwNf zV0Oeh&5bg`=AqN8=I_QT4`^PvKT}wyg6T*)Fu(TQ%$exqS7+2^={aE;|2kx%lo0*O zxofl6>!6}b8N+=WQ&G^F-h~Rw%0@!`?m=PvoF{fI;durIgF8<3lf$zL;%p~!{xQs? zkB+ikAx0t%WVjs@GiX-t!xnZ#qa?-)wm)8WI^OoJmOg?^R3h=abY2t*P%GgIX+vE% zjNc#I>e|^$@UQJ?%#ln4MiUaE%*)FtdSV|_sorXOJmvT)$0$CpMtcNdWyV_%C}-}{ zrlYt(kY*LjeZDoAV!wd?=2Sb$IGjk{!@TotGaQYpSGdEyyOpm5Yky5VUaom%y7|JdGI3IomEM7HVSO<< z;*wCKSJa#OYpS5Z2X*G`5xt%gW(y|BXITOn2E_$6eec zw?Xk>{06L*21F>Xa|J|%4OiFfFT(;YmZCc)n{FF=w*klLmqmv`vh#;f1u|q(XR{HqoeckG;SN={W*~f;^9ZHN!Nj*pMJM)S}26rLF{tggxs@ z5Hzv_S|B)9i&&BFZZv~WxCEZ;;_CSGEfE&T`-}9-znj7Jj0^nTv>2&-51#4Cmj)r} z#?Y`}!eL>QnePPk%hhVlwD&sfD<_)8&?td{X{9V-Q-cntFk5qTIH6L3R<&ton-Il> zcG+Tb+lVjE=<0Z1AH|KBPj0K!;A0HVTcvnv2LeyrSEMJjFB?Y;iYMsnGM)iM4|Eg`w(XF zS+yzx4K<%BW*^fGd3cRVEioCj$X5#Y&S{{p?Po$=tVa$)g-%UfMa?Lm7X2m7t zBbebMyFDa22(h6qjP1XXNKG%<#f7>jx(I9TZd4)Nrv810~2JN z-se26Tg0ksUI&69R_wJSqC+!q?tR?Tox7h;$>*WjAHu7pz|%+1igKi+sm+Gs&d~zb zBo%XoY3a9GqI)%!KQRk6#NXqwlh^Oe6X5&ETW>ePBgT}sCHaMAr;-zk^hK6H9Eun1 zRicy*aoJ5)z7?I=v)3aOJ1O`TtrILT-Mb~<4X&eS(;7p?CLT+xwVBCbfT}mp-GUtc z+(Xf-QrL5keo(3*!KFaYi~Fna4YD)S2CJnBJE`;bryxJny}l`@uYsz#5e) z$RF-5Z8RiVQ*N+SL??W#y!2(e+Qhq1!%0~P`HuS&4NWicO_3o>y=czCo2u_)$7@iN z4xd_ImVinZfr-`WR?I>MUjkl-OmwF_J2gg4Aw81uv{~I4V3*Xo`ju>@oAyZE`0rKk zz~caWjj8*6A8@LQCj?s-NCXkIHFQTJ70s6&Os!G=C1i@031AuFMd2RNk5P5iPl$fP zw#4uuWTEPkr4#>++%Q_Y%2_m0iXqni`jsgo*uY;SHt;$>r7Y&t#usToTs+w!OF4$I zC#L8L^q%l!Go^MB>-77VzW$^ly)%p@$^w?EK@OQItTC@IgGA;XH?@nu=RGkH_JjoY ziiwB3iF*l8C$~*0bGI*-ek+$M&z9SuWVMgyrR=3N({kqXFBb`45>}{yzxFX)k-);&>(}0l$#}STqP&Xr)zR|nN5rKq!VLimP5+~7=cVcl# zn<@NE;eoE1XjilAYL{Mr6l#7_*qemgO#|afnisDLJy8qdY21QeQwq7s!9jb&+%t`=OB6K`~dVNEXE|cKl+*cVP6^czzR(^NHh?e3~@-B-Qqapt13K zra`AhuN9U4rZUsG;kqh3Vda7@OD%AtFMAlFTo$gID6R*##HvYuTbkvf=R|uQR_}A5Y|^J2FsGqcdroIO}5Uau4@b zap@(h*oWrOK`*#w2fPx&BP>Le$R-~CoI%CD{w`X-ENUaB9*s)a3Db5gUF%(JrY3$l zIjy*78II)Fm>~b*DM6Oe)2<%kx4$>FUwi( zMOx+M)$!`8ig{^`?;~e;#hQ&xQ}mY(z&TuQiUT|9yjm&Gjg+RheF&`!QyVKuSY!?h zi=;i>v&fHgBeAk4)l!BP2nzkk<)Uyt><`Vb;7ZUcD=dzasdEad+&GLjIbrn6>JMrQp$s3m%*3{tc5k`j^{kbO)%> zGPl1C{V8e4PZl|(5>w3p2XymY)12IeW-&(hgT)n zpWf)*dBl5b(7Pn7qY|%4RCQB7C;L^R+M( zG+Or69rWd+b2xiE&unyF6qb0_NW8AbsSLRYH(D!X&7WlZdur1$Z~e)2xlM@vTkilV z1}K6ivrx=VD^L6k33*7?vD8p(@2Ojp-vLt9Kf{L-HA49?S|)f(=^;XnI>Gp4Pt(`< zSKg_-3H=UCYV8R@eQ*Hzz0kvUfW^`uiURe!I>HlnS+Wm@EB`G1t=jo^Wi2C~ay`&y zc%b^b-=10^!9xaFBNu=e;F+_l6L4$*k8A{D-3h|NamXpT&|e((pWhhtk&3jgmt5Bb zJB|+XSrN*H)CTj*Tn5brmmsUAa@VGNSXvLkD6Ar6nEWg6?@~@8k5hz6N=-=JqccKO zMeFhWrWUlN1k`;wwkj zWwh&+kaz z;>{BtzXS@&cI9IEGN97EEb7%)o0l@4mKJbg-)XN$ zvmrKxDk{2`f5m%zEuU8!i`~{=)S2p&;PI#v>zUwJs`zlWg=g_%(&C|{(JOe^Pi3w1 zn)Y?H7*S1vfVXPTi4qg-!kP6&nu4HTA%4^A{_eU5Ab%F_aEcCK5LBOW1HFJxFEnav^D?bzP|l#S;p= z$Q6mOG$C@mco^W)NtsCPcGGvGtUsJ%5-0!)L4m)Ckdrh;qr=W@LY)a^t9LS=uqm0YZe;^5I zv58Jfc8;4SB~LQ&$eDs$g|P#ELL>Js_^y>P+-yW^^~H$uwvfvkGvl$!6msuxKDFbA zlJ8j-lI`*@+PG}3Y;u-yb)acT6~OWSNLADfM+cd2}YD7i0u9k*JN zuiIiJe8i!8-1|{wTv;$1CLqjy^#>H_8tZRqT2qbi*juf);iHG_Vu9Bs4GbgaOwoZt z#qQ$KgzTay*xOO#oR3eWS7!=+vWL?9TP#`$DS0_lwC;M< zu>}^GgxaSu2_N>srF8fB64~{Vnv0;gG&krIzd?!BF(1qKnfSp)%jff7*PQ`Cu$_Ag zHo``R1rkY<@R7hAcb(NR89l^dIwNuA+HRqiXGq~*(a*Wz%lKNK5p9D}#^C~x5G~9R zv+JAz3RS~eyz-|%wag#B=zQaK0$>CNSt2BwtVL_|L@4e=^dqTcm7s1sYS|AQVOmXG z6@;hjP6%!v>W3G}k0lA}Nj9Ko7XA}t(8X`1A1ps7Hw;6{Wzv+h#J)xIbso&s zGNlXHX}BHK4n4$pAg&@L_t|7R2s@Q<0-L8lRG+K=IeYN8`Nt^0S>sD3N>|8JzXPxf zr-6u5MsU>Ur6ZPPJ%A2~1$Prjsh;yNt)x<)YV;j)@*3g0`i$}PV2#C~pLYA0MPm(; z+ya_KBi+ID(vcq)K;1W!tRIO$wqP)U&ehZT)%G}E*(5s$E6Saz?BTFHNv3+4fc{5A z8|Vk@W%i%xn$_Vl5OBCz2zp!wZVkV?w0gFVE(s&W2$6V*Z!O6Ix1>j^7V2225!4jV z;loh+WTR-Mhh5sRuhGNRY5$e6FS6#LiZF?sdYLL*CHT;>x@wvdjSmRG*h&J}CmwQ& zPIO3#BL})2v#bmzCNfwrlVJB?a>{B!Q{foe6SwX>N&#D#F2Qu2*F6SuvJo?mDL@4F z0Q`x4a+F%mJ}#r?t1;B}`lo-4g-@Um`RqR|lSK6;4Iat5>XL>ML)*Ps{+R)zfq04v;e?}1FTc4iv zOC<5JIZz`@zB5^=dAiP~X!8y3T~$x@#4+_J@4F)l<#z1shl2>| zcr@@-;=}`vcs&t?ZF1l{ZemiuJH4#9#$%L$&x0_|xo`33=UEIcy~>(BmwqiX07LEp zkSu4a5qqlx6GsVWK?f@bPh zxQsAjWB77A3)ho-H*^WO@X;kbhgE_=!F){a)y^bC#b+-7`Jj^KTzM~0<7}`c?&M2o%Np%J$HRFmTn<1X^cpE??bY{ufF`zg7^^ZQ#AqU2mJ%r3= zWJ`EsAP{&+r2@74lYzwX=c|A3o9}UU-y>d`5KOT5Fa__d9${YW&5YuTz(lq{Ca`Cr z@nu{s5|iuhRB;a|x7rLMsTOftoS*Eie)^;vvUv+X!X(75@!Hp`;KstEp`sO#71jXy zMw{+~_Jckc&tWLg*M#PG_3H11zW|bEvkQ!DhKmTl(V}yJHg38deZoqVj73cA0`*2q zpF}`agrA;@%N6Sd1`Yqp!@U9#h`QY*70cA?Ax%Go!$}LQd>?6JG2S1Fh3_aFCNJFK zBKhs4K=whC+j{R>ZqPk{AAyTw$G4i$?)J}vQn&*hI3srL-pBX=B_*-=`f$M};;|D2 zD3}&{BNdlg9c@oUDnSBTZ?l`Yv zjMM0nFXn?eGy$E4o_o98RYd*y%n#<#;n^rG%W{IRznJ%l@pX#!MDoZ_p*AghRI3a; z*LLz=y$%M{{Sliq6FuQfel+`fdCYIJNQLRnz`rxWx5AD16@F#Zlb`in%fY-;N zbDS{r&}-mRgck-z4q(QwD;X#B&{j9Q3xt_gv!z`81Qa2U_Hl=NEwqso=-;5;68ypU zTbZwxS4&7W4G$(%rUFmP^lK%L6)FW8b_mwN=K5%0$3n*y!OP>u zn4F4tfBC5~jj*VwJM8*o29-}S>Vp#A@e-kYltDsGPegB0dHXIVr^a`AJ>}2N=dOU; zHc0E15MRDi6W_bIJzQrtEQ&6VYJqz)RKwc)FI_m|xxlkOM0ob?iL+jo4Be+5f-_?2D z;8UKHFZ$oXWd7&sJvlI}fRqr@l)3pC1b* zWamTKh<4fhVpeafj?gb`K8NYm~=^95{Thk#FUUiZzI{Cjc{d+s1Oot8x-#6$mUcEeuY)0pE`%}VSUjO@@JtPD%U6C}e4f?jfVu9a^iUg?-A5mRN zNAO-Daw(jFCE5RH`9d_(jQj1y?)13oo}p~sBqsIG&sr6 pending_ops_; + + string name; + Place place; + size_t version; +}; + +struct OpHandleBase { + vector inputs_; + vector outputs_; +}; + +struct SSAGraph { + // vars on each devices. + // * the vars in each map in vector is on different device. + // * the map is mapping a variable name to variable handles + // with different versions + vector>> vars_; + + // All ops + vector ops_; +}; +``` +The variable handles are the wrapper of `Variables`. The operator handles are the wrapper of `OperatorBase`. Some `OpHandle` is not an `OperatorBase`, such as `NCCLAllReduceOpHandle`, because `AllReduceOpHandle` will use new device contexts. + +When the `ProgramDesc` converted into an `SSA` Graph, the [data hazard](https://en.wikipedia.org/wiki/Hazard_(computer_architecture)) problem is also need to be taken care. The dummy variables, which represent the dependency between operators, will be manually inserted into SSA graph to resolve the [data hazard](https://en.wikipedia.org/wiki/Hazard_(computer_architecture)) problem. + +## Execute SSA Graph + +The SSA graph can be out-of-order executed by an approximate [topological sorting](https://en.wikipedia.org/wiki/Topological_sorting) algorithm. The algorithm is + +1. Maintaining a map of an operator and its needed input number. +2. If a variable is not generated by an operator, i.e., `var.generated_op == nullptr`, decrease the needed input number of its pending operators. +3. If there is an operator which needed input number is decreased to zero, just run this operator. +4. After run this operator, just mark the variables are generated and repeat step 2 until all variables are generated. + +Running an operator can be asynchronized. There is a thread pool to execute an `SSA` graph. + +## Synchronize GPU Kernels + +The GPU is a non-blocking device. The different streams need be synchronized when switing streams. In current implementation, the synchronization based on the following algorithm: + +1. `OpHandle` will record `DeviceContext` that it is used. +2. In `OpHandle::Run`, if the `DeviceContext` of current operator is different from `DeviceContext` of any input variable, just wait the generate operator of this input variable. + +The `wait` are implemented by two strategies: + +1. Invoke `DeviceContext->Wait()`, It will wait all operators on this device contexts complete. +2. Uses `cudaStreamWaitEvent` to sending a event to the stream. It is a non-blocking call. The wait operators will be executed in GPU. + +Generally, the `cudaStreamWaitEvent` will have a better perforamnce. However, `DeviceContext->Wait()` strategy is easier to debug. The strategy can be changed in runtime. + +## What's next? + +* Merging gradient of dense parameters has been done. However, the merging of sparse parameters has not been done. +* The CPU version of Parallel Executor has not been implemented. The out-of-order logic will make CPU compuatation faster, too. +* A better strategy to merge gradients can be introduced. We can shrink the gradients from `float32` to `int8` or `int4` while merging. It will significantly speed up multi-GPUs training without much loss of precision. +* Combine multi-Nodes implementation. By the benifit of out-of-order, sending and recving operator can be an blocking operator, and the transpiler does not need to concern about the best position of operator. From 084cdd1f4f78eac9fcae4759575e172d87e81598 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Mar 2018 15:23:39 +0800 Subject: [PATCH 160/180] Rename code --- paddle/fluid/framework/details/computation_op_handle.cc | 4 ++-- paddle/fluid/framework/details/fetch_op_handle.cc | 4 ++-- .../framework/details/multi_devices_graph_builder.cc | 2 +- .../fluid/framework/details/nccl_all_reduce_op_handle.cc | 4 ++-- paddle/fluid/framework/details/op_handle_base.cc | 8 ++++---- paddle/fluid/framework/details/op_handle_base.h | 2 +- .../fluid/framework/details/scale_loss_grad_op_handle.cc | 4 ++-- .../framework/details/threaded_ssa_graph_executor.cc | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc index 53ab8eb775..7a1b40c0b6 100644 --- a/paddle/fluid/framework/details/computation_op_handle.cc +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -24,10 +24,10 @@ ComputationOpHandle::ComputationOpHandle(const OpDesc &op_desc, Scope *scope, place_(place) {} void ComputationOpHandle::RunImpl() { - auto *cur_ctx = dev_ctx_[place_]; + auto *cur_ctx = dev_ctxes_[place_]; for (auto *in : inputs_) { bool need_wait = - in->generated_op_ && in->generated_op_->dev_ctx_[place_] != cur_ctx; + in->generated_op_ && in->generated_op_->dev_ctxes_[place_] != cur_ctx; if (need_wait) { in->generated_op_->Wait(cur_ctx); } diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 4fc05b3248..9180903b86 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -60,8 +60,8 @@ void FetchOpHandle::RunImpl() { auto &t = scope->FindVar(var_name)->Get(); if (platform::is_gpu_place(var->place_)) { #ifdef PADDLE_WITH_CUDA - TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); - dev_ctx_[t.place()]->Wait(); + TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]); + dev_ctxes_[t.place()]->Wait(); #endif } else { tensors_[i].ShareDataWith(t); diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 6798776076..a1b913a863 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -74,7 +74,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( result.ops_.emplace_back(new ComputationOpHandle(*op, s, p)); auto *op_handle = result.ops_.back().get(); - op_handle->dev_ctx_[p] = const_cast( + op_handle->dev_ctxes_[p] = const_cast( platform::DeviceContextPool::Instance().Get(p)); auto var_names = op->InputArgumentNames(); diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc index f77a4b55a1..5ddf331cfc 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -23,7 +23,7 @@ NCCLAllReduceOpHandle::NCCLAllReduceOpHandle( const platform::NCCLContextMap &ctxs) : local_scopes_(local_scopes), places_(places), nccl_ctxs_(ctxs) { for (auto &p : places_) { - this->dev_ctx_[p] = nccl_ctxs_.DevCtx(p); + this->dev_ctxes_[p] = nccl_ctxs_.DevCtx(p); } } @@ -34,7 +34,7 @@ void NCCLAllReduceOpHandle::RunImpl() { // Wait input done for (auto *in : inputs_) { auto &p = static_cast(in)->place_; - in->generated_op_->Wait(dev_ctx_[p]); + in->generated_op_->Wait(dev_ctxes_[p]); } auto &var_name = static_cast(this->inputs_[0])->name_; diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 63affb7054..e4194a7442 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -42,7 +42,7 @@ OpHandleBase::~OpHandleBase() { void OpHandleBase::Run(bool use_event) { #ifdef PADDLE_WITH_CUDA if (events_.empty() && use_event) { - for (auto &p : dev_ctx_) { + for (auto &p : dev_ctxes_) { int dev_id = boost::get(p.first).device; PADDLE_ENFORCE(cudaSetDevice(dev_id)); PADDLE_ENFORCE( @@ -57,7 +57,7 @@ void OpHandleBase::Run(bool use_event) { #ifdef PADDLE_WITH_CUDA if (use_event) { - for (auto &p : dev_ctx_) { + for (auto &p : dev_ctxes_) { int dev_id = boost::get(p.first).device; auto stream = static_cast(p.second)->stream(); @@ -70,7 +70,7 @@ void OpHandleBase::Run(bool use_event) { void OpHandleBase::Wait(platform::DeviceContext *waited_dev) { #ifdef PADDLE_WITH_CUDA if (platform::is_cpu_place(waited_dev->GetPlace()) || events_.empty()) { - for (auto &dev_ctx : dev_ctx_) { + for (auto &dev_ctx : dev_ctxes_) { dev_ctx.second->Wait(); } } else { @@ -81,7 +81,7 @@ void OpHandleBase::Wait(platform::DeviceContext *waited_dev) { } } #else - for (auto &dev_ctx : dev_ctx_) { + for (auto &dev_ctx : dev_ctxes_) { dev_ctx.second->Wait(); } #endif diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index 78f566c035..71672fd24c 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -31,7 +31,7 @@ class OpHandleBase { std::vector outputs_; std::unordered_map - dev_ctx_; + dev_ctxes_; #ifdef PADDLE_WITH_CUDA std::unordered_map events_; diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index a6a67c9b14..0a6f6129b8 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -21,7 +21,7 @@ ScaleLossGradOpHandle::ScaleLossGradOpHandle(size_t num_dev, Scope *scope, platform::Place place, platform::DeviceContext *dev_ctx) : coeff_(static_cast(1.0 / num_dev)), scope_(scope), place_(place) { - dev_ctx_[place_] = dev_ctx; + dev_ctxes_[place_] = dev_ctx; } ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} @@ -38,7 +38,7 @@ void ScaleLossGradOpHandle::RunImpl() { } else { #ifdef PADDLE_WITH_CUDA auto stream = - static_cast(this->dev_ctx_[place_]) + static_cast(this->dev_ctxes_[place_]) ->stream(); memory::Copy(boost::get(place_), tmp, platform::CPUPlace(), &coeff_, sizeof(float), stream); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index fc84031556..105e21cab6 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -96,7 +96,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( // FIXME: Use new device context for (auto &p : places_) { - op->dev_ctx_[p] = fetch_ctxs_.Get(p); + op->dev_ctxes_[p] = fetch_ctxs_.Get(p); } for (auto *var : vars) { From f2d29be784b0d529281fc40bd54ee66cf1eee50f Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Mar 2018 15:31:38 +0800 Subject: [PATCH 161/180] Disable transformer --- python/paddle/fluid/tests/unittests/test_parallel_executor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py index cb16ce26c6..bbfd03c638 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py @@ -424,5 +424,6 @@ class TestTransformer(TestParallelExecutorBase): writer.append_tensor(t) writer.complete_append_tensor() + @unittest.skip("transformer is buggy in multi gpu") def test_main(self): self.check_network_convergence(transformer) From 9a9d67dac28c362b6b2e86ffeec7c68fa1704d01 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 28 Mar 2018 16:47:06 +0800 Subject: [PATCH 162/180] fix dist train selected rows height missing --- paddle/fluid/operators/detail/send_recv.proto | 8 ++++---- paddle/fluid/operators/detail/sendrecvop_utils.cc | 1 + paddle/fluid/operators/detail/test_serde.cc | 2 ++ paddle/fluid/operators/detail/variable_response.cc | 11 +++++++++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/detail/send_recv.proto b/paddle/fluid/operators/detail/send_recv.proto index 598aaa4c51..2d33f026e4 100644 --- a/paddle/fluid/operators/detail/send_recv.proto +++ b/paddle/fluid/operators/detail/send_recv.proto @@ -59,12 +59,12 @@ message VariableMessage { // lod details: int64 lod_level = 5; repeated LodData lod = 6; + // selected_rows height, aka. original dim0 + int64 slr_height = 7; // tensor data - bytes serialized = 7; + bytes serialized = 8; // selected_rows data - bytes rows = 8; + bytes rows = 9; } message VoidMessage {} - -message TestMessage { int64 test_1 = 1; } diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index d7bbf79c50..f318f8ac28 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -108,6 +108,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, e.WriteUint64(VarMsg::kDimsFieldNumber, dim); } e.WriteUint64(VarMsg::kLodLevelFieldNumber, 0); + e.WriteUint64(VarMsg::kSlrHeightFieldNumber, slr->height()); auto* tensor = slr->mutable_value(); if (platform::is_gpu_place(ctx.GetPlace())) { #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/detail/test_serde.cc b/paddle/fluid/operators/detail/test_serde.cc index e646c894d1..e9e2dc84ad 100644 --- a/paddle/fluid/operators/detail/test_serde.cc +++ b/paddle/fluid/operators/detail/test_serde.cc @@ -40,6 +40,7 @@ void RunSerdeTestSelectedRows(platform::Place place) { // serialize var to ByteBuffer framework::Variable var; auto* slr = var.GetMutable(); + slr->set_height(1000); auto* tensor = slr->mutable_value(); auto* rows = slr->mutable_rows(); tensor->Resize(framework::make_ddim({2, 10})); @@ -106,6 +107,7 @@ void RunSerdeTestSelectedRows(platform::Place place) { } EXPECT_EQ(rows_data2[0], 3); EXPECT_EQ(rows_data2[1], 10); + EXPECT_EQ(slr2->height(), 1000); } void RunTestLodTensor(platform::Place place, int from_type = 0) { diff --git a/paddle/fluid/operators/detail/variable_response.cc b/paddle/fluid/operators/detail/variable_response.cc index bdda570343..862fd26b54 100644 --- a/paddle/fluid/operators/detail/variable_response.cc +++ b/paddle/fluid/operators/detail/variable_response.cc @@ -68,6 +68,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, if (total_written + size_to_write > length) { size_to_write = length - total_written; } + VLOG(3) << "copy raw " << size_to_write + << " bytes, written: " << total_written << ", length: " << length; memory::Copy(boost::get(place), reinterpret_cast(p), cpu, data, size_to_write, gpu_dev_ctx.stream()); @@ -147,6 +149,7 @@ bool VariableResponse::CopySelectRowsTensorData( const platform::DeviceContext& ctx, framework::DDim& dims, int length) { auto var = scope_->FindVar(meta_.varname()); auto* slr = var->GetMutable(); + slr->set_height(meta_.slr_height()); auto* tensor = slr->mutable_value(); tensor->Resize(dims); void* tensor_data = tensor->mutable_data( @@ -348,6 +351,14 @@ int VariableResponse::Parse(Source* source) { } break; } + case sendrecv::VariableMessage::kSlrHeightFieldNumber: { + uint64_t v = 0; + if ((wt != WIRETYPE_VARINT) || !input.ReadVarint64(&v)) { + return tag; + } + meta_.set_slr_height(static_cast(v)); + break; + } case sendrecv::VariableMessage::kSerializedFieldNumber: { PADDLE_ENFORCE((meta_.type() == sendrecv::SELECTED_ROWS || meta_.type() == sendrecv::LOD_TENSOR) && From f707a83c80311f792aac594f3f401743d90cd687 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Wed, 28 Mar 2018 17:09:42 +0800 Subject: [PATCH 163/180] Add link --- doc/design/parallel_executor.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/design/parallel_executor.md b/doc/design/parallel_executor.md index 076c55d281..9aed3b059a 100644 --- a/doc/design/parallel_executor.md +++ b/doc/design/parallel_executor.md @@ -8,7 +8,7 @@ The executor is a very naive interpreter. It runs operators one by one. We can u We want a `ProgramDesc` can be run on different nodes. It is better not to contain device information in `ProgramDesc`. However, we can write a high-performance interpreter, which can hold an alternative intermediate representation of `ProgramDesc`, to take full usage of Multi-GPUs. -ParallelExecutor is an interpreter of `ProgramDesc` which will [out-of-order execute](Out-of-order execution) `Program` in data parallelism mode and maximise the utility of Multi-GPUs. +ParallelExecutor is an interpreter of `ProgramDesc` which will [out-of-order execute](https://en.wikipedia.org/wiki/Out-of-order_execution) `Program` in data parallelism mode and maximise the utility of Multi-GPUs. ## Overview of MultiGPUs logic From 7da1ea07a2cb8927522acd46d6492632f79701e9 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Mar 2018 19:25:45 +0800 Subject: [PATCH 164/180] Use PopAll --- .../details/threaded_ssa_graph_executor.cc | 26 +++++++++++++------ .../details/threaded_ssa_graph_executor.h | 17 ++++++++++-- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 105e21cab6..a6998f45df 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -124,16 +124,26 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( run_all_ready_ops(); // 2. Find ready variable - VarHandleBase *ready_var = ready_vars.Pop(); - + bool timeout; + auto cur_ready_vars = ready_vars.PopAll(100, &timeout); + + if (timeout) { + if (exception_) { + throw * exception_; + } else { + continue; + } + } // 3. Remove the dependency of ready_var. // Find the ready_ops after the ready_var. - pending_vars.erase(ready_var); - for (auto *op : ready_var->pending_ops_) { - auto &deps = pending_ops[op]; - --deps; - if (deps == 0) { - ready_ops.insert(op); + for (auto ready_var : cur_ready_vars) { + pending_vars.erase(ready_var); + for (auto *op : ready_var->pending_ops_) { + auto &deps = pending_ops[op]; + --deps; + if (deps == 0) { + ready_ops.insert(op); + } } } // Keep loop until all vars are ready. diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 8392170311..da559d8553 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include "ThreadPool.h" // ThreadPool in thrird party #include "paddle/fluid/framework/details/ssa_graph_executor.h" @@ -27,10 +28,10 @@ namespace details { template class BlockingQueue { public: - void Push(const T &v) { + void Push(const T &item) { { std::lock_guard g(mutex_); - q_.emplace_back(v); + q_.emplace_back(item); } cv_.notify_one(); } @@ -56,6 +57,18 @@ class BlockingQueue { return v; } + std::deque PopAll(size_t ms, bool *timeout) { + auto time = + std::chrono::system_clock::now() + std::chrono::milliseconds(ms); + std::unique_lock lock(mutex_); + *timeout = !cv_.wait_until(lock, time, [this] { return !q_.empty(); }); + std::deque ret; + if (!*timeout) { + std::swap(ret, q_); + } + return ret; + } + private: std::mutex mutex_; std::condition_variable cv_; From 38b53b37b491f1dccf9133e710198e3d0af34535 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Wed, 28 Mar 2018 19:37:10 +0800 Subject: [PATCH 165/180] Remove Pop method --- .../framework/details/threaded_ssa_graph_executor.cc | 2 +- .../framework/details/threaded_ssa_graph_executor.h | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index a6998f45df..2603aed62a 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -125,7 +125,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( // 2. Find ready variable bool timeout; - auto cur_ready_vars = ready_vars.PopAll(100, &timeout); + auto cur_ready_vars = ready_vars.PopAll(1000, &timeout); if (timeout) { if (exception_) { diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index da559d8553..2ea57ac8f9 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -47,16 +47,6 @@ class BlockingQueue { cv_.notify_all(); } - T Pop() { - std::unique_lock lock(mutex_); - while (q_.empty()) { - cv_.wait(lock); - } - T v = q_.front(); - q_.pop_front(); - return v; - } - std::deque PopAll(size_t ms, bool *timeout) { auto time = std::chrono::system_clock::now() + std::chrono::milliseconds(ms); From 2e577379ca8fc67dbb4fc436297cf7ae826b3fa7 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 28 Mar 2018 16:52:20 +0800 Subject: [PATCH 166/180] add cos --- paddle/fluid/operators/activation_op.cc | 18 +++++++ paddle/fluid/operators/activation_op.h | 49 +++++++++++++++++++ paddle/function/EigenGemm.cpp | 1 + python/paddle/fluid/layers/ops.py | 1 + .../tests/unittests/test_activation_op.py | 15 ++++++ 5 files changed, 84 insertions(+) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 979115eee0..7f4b23c526 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -260,6 +260,21 @@ $out = floor(x)$ } }; +class CosOpMaker : public framework::OpProtoAndCheckerMaker { + public: + CosOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Floor operator"); + AddOutput("Out", "Output of Floor operator"); + AddComment(R"DOC( +Floor Activation Operator. + +$out = cos(x)$ + +)DOC"); + } +}; + class RoundOpMaker : public framework::OpProtoAndCheckerMaker { public: RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker) @@ -561,6 +576,9 @@ REGISTER_OP(ceil, ops::ActivationOp, ops::CeilOpMaker, ceil_grad, REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad, ops::ActivationOpGrad); +REGISTER_OP(cos, ops::ActivationOp, ops::CosOpMaker, cos_grad, + ops::ActivationOpGrad); + REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad, ops::ActivationOpGrad); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 4c575b4a7b..3bd3f0bb94 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -331,6 +331,54 @@ struct FloorFunctor : public BaseActivationFunctor { } }; +template +struct Sine { + HOSTDEVICE T operator()(const T& val) const { return sin(val); } +}; + +template +struct Cosine { + HOSTDEVICE T operator()(const T& val) const { return cos(val); } +}; + +// cosine'(x) = -sin(x) +template +struct CosGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = -dout * x.unaryExpr(Sine()); + } +}; + +// cosine(x) = cos(x) +template +struct CosFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.unaryExpr(Cosine()); + } +}; + +// sine'(x) = cos(x) +template +struct SinGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = dout * x.unaryExpr(Cosine()); + } +}; + +// sine(x) = sin(x) +template +struct SinFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.unaryExpr(Sine()); + } +}; + // round(x) = [x] template struct RoundFunctor : public BaseActivationFunctor { @@ -782,6 +830,7 @@ struct SwishGradFunctor : public BaseActivationFunctor { __macro(abs, AbsFunctor, AbsGradFunctor); \ __macro(ceil, CeilFunctor, ZeroGradFunctor); \ __macro(floor, FloorFunctor, ZeroGradFunctor); \ + __macro(cos, CosFunctor, CosGradFunctor); \ __macro(round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(log, LogFunctor, LogGradFunctor); \ diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index bac4659e62..4c81ebdd31 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -63,6 +63,7 @@ struct EigenBlasGemm { const EigenMatrix a(const_cast(A), sizeA); const EigenMatrix b(const_cast(B), sizeB); EigenMatrix c(C, sizeC); + Eigen::Tensor ss; typedef typename Eigen::Tensor::DimensionPair DimPair; Eigen::array dims; diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index f5c6b47d24..ee8de219ee 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -25,6 +25,7 @@ __activations__ = [ 'abs', 'ceil', 'floor', + 'cos', 'round', 'reciprocal', 'log', diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 4a2b35322d..b78fb8a319 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -14,6 +14,7 @@ import unittest import numpy as np +import math import paddle.fluid.core as core from op_test import OpTest from scipy.special import expit @@ -196,6 +197,20 @@ class TestFloor(OpTest): self.check_grad(['X'], 'Out', max_relative_error=0.007) +class TestCos(OpTest): + def setUp(self): + self.op_type = "cos" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + self.outputs = {'Out': math.cos(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out', max_relative_error=0.007) + + class TestRound(OpTest): def setUp(self): self.op_type = "round" From e868950e5f938fe737b26f5040ffc7c09d29f6e6 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 29 Mar 2018 11:33:21 +0800 Subject: [PATCH 167/180] Add comments --- paddle/fluid/framework/details/ssa_graph.h | 1 + paddle/fluid/framework/details/threaded_ssa_graph_executor.cc | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/details/ssa_graph.h b/paddle/fluid/framework/details/ssa_graph.h index c1e041b8c0..ac3e2d8699 100644 --- a/paddle/fluid/framework/details/ssa_graph.h +++ b/paddle/fluid/framework/details/ssa_graph.h @@ -25,6 +25,7 @@ namespace details { struct SSAGraph { std::vector>> vars_; + // aux variables to represent dependency. Useful to resolve data hazard. std::unordered_set> dep_vars_; std::vector> ops_; }; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 2603aed62a..3f8655147b 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -15,7 +15,6 @@ #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" -#include "paddle/fluid/framework/scope.h" namespace paddle { namespace framework { From ce16400daedfa8f793d20d44081db7f417af693a Mon Sep 17 00:00:00 2001 From: "Yang Yang(Tony)" Date: Wed, 28 Mar 2018 21:15:12 -0700 Subject: [PATCH 168/180] make append activation in place by default (#9417) --- python/paddle/fluid/layer_helper.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index d771837fc5..4341e06596 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -398,7 +398,6 @@ class LayerHelper(object): return input_var if isinstance(act, basestring): act = {'type': act} - tmp = self.create_tmp_variable(dtype=input_var.dtype) if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') @@ -408,9 +407,9 @@ class LayerHelper(object): self.append_op( type=act_type, inputs={"X": [input_var]}, - outputs={"Out": [tmp]}, + outputs={"Out": [input_var]}, attrs=act) - return tmp + return input_var def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: From bdda08d9f2846cd4a5cb407e993be0bc03a674a5 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 28 Mar 2018 23:13:38 +0800 Subject: [PATCH 169/180] add sin --- paddle/fluid/framework/CMakeLists.txt | 2 +- paddle/fluid/operators/activation_op.cc | 24 ++++++++++++++++--- paddle/fluid/operators/activation_op.h | 1 + paddle/function/EigenGemm.cpp | 1 - python/paddle/fluid/layers/ops.py | 1 + .../tests/unittests/test_activation_op.py | 17 +++++++++++-- 6 files changed, 39 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index a4ea74a6d2..8c8def6bf4 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -100,7 +100,7 @@ cc_test(init_test SRCS init_test.cc DEPS init) cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) -cc_test(channel_test SRCS channel_test.cc) +# cc_test(channel_test SRCS channel_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 7f4b23c526..a6d9ce0f04 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -264,10 +264,10 @@ class CosOpMaker : public framework::OpProtoAndCheckerMaker { public: CosOpMaker(OpProto *proto, OpAttrChecker *op_checker) : framework::OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "Input of Floor operator"); - AddOutput("Out", "Output of Floor operator"); + AddInput("X", "Input of Cosine operator"); + AddOutput("Out", "Output of Cosine operator"); AddComment(R"DOC( -Floor Activation Operator. +Cosine Activation Operator. $out = cos(x)$ @@ -275,6 +275,21 @@ $out = cos(x)$ } }; +class SinOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SinOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : framework::OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "Input of Sine operator"); + AddOutput("Out", "Output of Sine operator"); + AddComment(R"DOC( +Sine Activation Operator. + +$out = sin(x)$ + +)DOC"); + } +}; + class RoundOpMaker : public framework::OpProtoAndCheckerMaker { public: RoundOpMaker(OpProto *proto, OpAttrChecker *op_checker) @@ -579,6 +594,9 @@ REGISTER_OP(floor, ops::ActivationOp, ops::FloorOpMaker, floor_grad, REGISTER_OP(cos, ops::ActivationOp, ops::CosOpMaker, cos_grad, ops::ActivationOpGrad); +REGISTER_OP(sin, ops::ActivationOp, ops::SinOpMaker, sin_grad, + ops::ActivationOpGrad); + REGISTER_OP(round, ops::ActivationOp, ops::RoundOpMaker, round_grad, ops::ActivationOpGrad); diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 3bd3f0bb94..7fbe4efc04 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -831,6 +831,7 @@ struct SwishGradFunctor : public BaseActivationFunctor { __macro(ceil, CeilFunctor, ZeroGradFunctor); \ __macro(floor, FloorFunctor, ZeroGradFunctor); \ __macro(cos, CosFunctor, CosGradFunctor); \ + __macro(sin, SinFunctor, SinGradFunctor); \ __macro(round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ __macro(log, LogFunctor, LogGradFunctor); \ diff --git a/paddle/function/EigenGemm.cpp b/paddle/function/EigenGemm.cpp index 4c81ebdd31..bac4659e62 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/function/EigenGemm.cpp @@ -63,7 +63,6 @@ struct EigenBlasGemm { const EigenMatrix a(const_cast(A), sizeA); const EigenMatrix b(const_cast(B), sizeB); EigenMatrix c(C, sizeC); - Eigen::Tensor ss; typedef typename Eigen::Tensor::DimensionPair DimPair; Eigen::array dims; diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index ee8de219ee..0e5987ee59 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -26,6 +26,7 @@ __activations__ = [ 'ceil', 'floor', 'cos', + 'sin', 'round', 'reciprocal', 'log', diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index b78fb8a319..fb162f8b73 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -14,7 +14,6 @@ import unittest import numpy as np -import math import paddle.fluid.core as core from op_test import OpTest from scipy.special import expit @@ -202,7 +201,21 @@ class TestCos(OpTest): self.op_type = "cos" x = np.random.uniform(-1, 1, [4, 4]).astype("float32") self.inputs = {'X': x} - self.outputs = {'Out': math.cos(self.inputs['X'])} + self.outputs = {'Out': np.cos(self.inputs['X'])} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Out', max_relative_error=0.007) + + +class TestSin(OpTest): + def setUp(self): + self.op_type = "sin" + x = np.random.uniform(-1, 1, [4, 4]).astype("float32") + self.inputs = {'X': x} + self.outputs = {'Out': np.sin(self.inputs['X'])} def test_check_output(self): self.check_output() From 450be963feb74b591bb232cd2b05aac9b01b23b4 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 29 Mar 2018 14:34:45 +0800 Subject: [PATCH 170/180] fix sparse errors --- paddle/fluid/operators/detail/grpc_client.cc | 1 - .../operators/detail/sendrecvop_utils.cc | 2 +- .../fluid/operators/detail/sendrecvop_utils.h | 7 +++++++ paddle/fluid/operators/detail/test_serde.cc | 19 +++++++++++-------- .../operators/detail/variable_response.cc | 9 ++++++--- paddle/fluid/operators/listen_and_serv_op.cc | 2 ++ paddle/fluid/operators/send_op.cc | 4 ++-- 7 files changed, 29 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/detail/grpc_client.cc index e73bbe7537..03b789f326 100644 --- a/paddle/fluid/operators/detail/grpc_client.cc +++ b/paddle/fluid/operators/detail/grpc_client.cc @@ -204,7 +204,6 @@ std::shared_ptr RPCClient::GetChannel(const std::string& ep) { } grpc::ChannelArguments args; - args.SetInt("grpc.testing.fixed_reconnect_backoff_ms", 5000); args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE); args.SetMaxSendMessageSize(std::numeric_limits::max()); args.SetMaxReceiveMessageSize(std::numeric_limits::max()); diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/detail/sendrecvop_utils.cc index f318f8ac28..7e3f015dab 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/detail/sendrecvop_utils.cc @@ -155,7 +155,7 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, ProtoEncodeHelper e2((char*)buf, 128); // NOTE: rows is of type int64_t size_t rows_memory_size = - slr->rows().capacity() * framework::SizeOfType(typeid(int64_t)); + slr->rows().size() * framework::SizeOfType(typeid(int64_t)); e2.WriteVarlengthBeginning(VarMsg::kRowsFieldNumber, rows_memory_size); slices[2] = ::grpc::Slice(e2.size()); memcpy(const_cast(slices[2].begin()), e2.data(), e2.size()); diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.h b/paddle/fluid/operators/detail/sendrecvop_utils.h index 3b87562703..b3b2b8469c 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.h +++ b/paddle/fluid/operators/detail/sendrecvop_utils.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include #include @@ -35,6 +36,12 @@ namespace detail { #define BATCH_BARRIER_MESSAGE "BATCH_BARRIER@RECV" #define FETCH_BARRIER_MESSAGE "FETCH_BARRIER@RECV" +static int64_t GetTimestamp() { + struct timeval tp; + gettimeofday(&tp, NULL); + return tp.tv_sec * 1000 + tp.tv_usec / 1000; +} + typedef void (*DestroyCallback)(void*); void SerializeToByteBuffer(const std::string& name, framework::Variable* var, diff --git a/paddle/fluid/operators/detail/test_serde.cc b/paddle/fluid/operators/detail/test_serde.cc index e9e2dc84ad..ea1670e56f 100644 --- a/paddle/fluid/operators/detail/test_serde.cc +++ b/paddle/fluid/operators/detail/test_serde.cc @@ -43,12 +43,11 @@ void RunSerdeTestSelectedRows(platform::Place place) { slr->set_height(1000); auto* tensor = slr->mutable_value(); auto* rows = slr->mutable_rows(); - tensor->Resize(framework::make_ddim({2, 10})); + tensor->Resize(framework::make_ddim({564, 128})); tensor->mutable_data(place); - int tensor_numel = 2 * 10; + int tensor_numel = 564 * 128; math::set_constant(ctx, tensor, 32.7); - rows->push_back(3); - rows->push_back(10); + for (int i = 0; i < 564; ++i) rows->push_back(i); ::grpc::ByteBuffer msg; operators::detail::SerializeToByteBuffer("myvar", &var, ctx, &msg); @@ -65,6 +64,7 @@ void RunSerdeTestSelectedRows(platform::Place place) { sendrecv::VariableMessage varmsg; EXPECT_TRUE(varmsg.ParseFromString(tmp)); + // deserialize bytebuffer EXPECT_EQ(varmsg.varname(), "myvar"); EXPECT_EQ(varmsg.type(), 1); @@ -75,8 +75,10 @@ void RunSerdeTestSelectedRows(platform::Place place) { for (int i = 0; i < tensor_numel; ++i) { EXPECT_FLOAT_EQ(tensor_data[i], 32.7); } - EXPECT_EQ(rows_data[0], 3); - EXPECT_EQ(rows_data[1], 10); + for (int i = 0; i < 564; ++i) { + EXPECT_EQ(rows_data[i], i); + } + // deserialize zero-copy // framework::Variable var2; // operators::detail::DeserializeFromByteBuffer(msg, ctx, &var2); @@ -105,8 +107,9 @@ void RunSerdeTestSelectedRows(platform::Place place) { for (int i = 0; i < tensor_numel; ++i) { EXPECT_FLOAT_EQ(tensor_data2[i], 32.7); } - EXPECT_EQ(rows_data2[0], 3); - EXPECT_EQ(rows_data2[1], 10); + for (int i = 0; i < rows2->size(); ++i) { + EXPECT_EQ(rows_data2[i], i); + } EXPECT_EQ(slr2->height(), 1000); } diff --git a/paddle/fluid/operators/detail/variable_response.cc b/paddle/fluid/operators/detail/variable_response.cc index 862fd26b54..f59c9b50bb 100644 --- a/paddle/fluid/operators/detail/variable_response.cc +++ b/paddle/fluid/operators/detail/variable_response.cc @@ -68,8 +68,6 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, if (total_written + size_to_write > length) { size_to_write = length - total_written; } - VLOG(3) << "copy raw " << size_to_write - << " bytes, written: " << total_written << ", length: " << length; memory::Copy(boost::get(place), reinterpret_cast(p), cpu, data, size_to_write, gpu_dev_ctx.stream()); @@ -152,6 +150,10 @@ bool VariableResponse::CopySelectRowsTensorData( slr->set_height(meta_.slr_height()); auto* tensor = slr->mutable_value(); tensor->Resize(dims); + PADDLE_ENFORCE_EQ( + tensor->numel(), + length / framework::SizeOfType( + paddle::operators::detail::ToTypeIndex(meta_.data_type()))); void* tensor_data = tensor->mutable_data( ctx.GetPlace(), paddle::operators::detail::ToTypeIndex(meta_.data_type())); @@ -168,7 +170,8 @@ bool VariableResponse::CopySelectRowsData( const platform::DeviceContext& ctx, int length) { auto var = scope_->FindVar(meta_.varname()); auto* slr = var->GetMutable(); - slr->mutable_rows()->resize(length / 8); // int64 + slr->mutable_rows()->resize(length / + framework::SizeOfType(typeid(int64_t))); // int64 int64_t* rows_data = slr->mutable_rows()->data(); // copy rows CPU data, GPU data will be copied lazily. diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 08b83375dd..9796fabdb6 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -141,6 +141,7 @@ class ListenAndServOp : public framework::OperatorBase { // and this will still work. std::vector> fs; + double ts = detail::GetTimestamp(); // block0 contains only listen_and_serv op, start run from block1. for (int blkid = 1; blkid < num_blocks - 1; ++blkid) { fs.push_back( @@ -162,6 +163,7 @@ class ListenAndServOp : public framework::OperatorBase { LOG(ERROR) << "run sub program error " << e.what(); } } + VLOG(2) << "run all blocks spent (ms) " << detail::GetTimestamp() - ts; // Reset the received sparse variables, the sum operator would not // sum the input sparse variables which rows is empty at the next diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc index fdf3c06ef0..0752bd1bbd 100644 --- a/paddle/fluid/operators/send_op.cc +++ b/paddle/fluid/operators/send_op.cc @@ -72,7 +72,7 @@ class SendOp : public framework::OperatorBase { for (size_t i = 0; i < ins.size(); i++) { if (NeedSend(scope, ins[i])) { - VLOG(2) << "sending " << ins[i] << " to " << epmap[i]; + VLOG(3) << "sending " << ins[i] << " to " << epmap[i]; rpc_client->AsyncSendVariable(epmap[i], ctx, scope, ins[i]); } else { VLOG(3) << "don't send no-initialied variable: " << ins[i]; @@ -81,7 +81,7 @@ class SendOp : public framework::OperatorBase { PADDLE_ENFORCE(rpc_client->Wait()); for (auto& ep : endpoints) { - VLOG(2) << "batch barrier, ep: " << ep; + VLOG(3) << "batch barrier, ep: " << ep; rpc_client->AsyncSendBatchBarrier(ep); } PADDLE_ENFORCE(rpc_client->Wait()); From f5da16e51b05ac88a9402f256cee0a101c58116d Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Wed, 28 Mar 2018 23:57:17 -0700 Subject: [PATCH 171/180] Disabling channel test to debug issue (#9491) --- paddle/fluid/framework/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index a4ea74a6d2..8c8def6bf4 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -100,7 +100,7 @@ cc_test(init_test SRCS init_test.cc DEPS init) cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) -cc_test(channel_test SRCS channel_test.cc) +# cc_test(channel_test SRCS channel_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op From 241f3c988f87978d05a8b2f516509490b01b5ef5 Mon Sep 17 00:00:00 2001 From: Thuan Nguyen Date: Thu, 29 Mar 2018 00:04:10 -0700 Subject: [PATCH 172/180] Add channel design document (#9463) * Add channel design document * Update channel send/recv state diagram --- doc/fluid/design/concurrent/channel.md | 139 ++++++++++++++++++ .../design/concurrent/images/channel_recv.png | Bin 0 -> 136646 bytes .../design/concurrent/images/channel_send.png | Bin 0 -> 85643 bytes 3 files changed, 139 insertions(+) create mode 100644 doc/fluid/design/concurrent/channel.md create mode 100644 doc/fluid/design/concurrent/images/channel_recv.png create mode 100644 doc/fluid/design/concurrent/images/channel_send.png diff --git a/doc/fluid/design/concurrent/channel.md b/doc/fluid/design/concurrent/channel.md new file mode 100644 index 0000000000..a00a3325e7 --- /dev/null +++ b/doc/fluid/design/concurrent/channel.md @@ -0,0 +1,139 @@ +# Channel Design + +## Introduction + +A Channel is a data structure that allows for synchronous interprocess +communication via message passing. It is a fundemental component of CSP +(communicating sequential processes), and allows for users to pass data +between threads without having to worry about synchronization. + +## How to use it + +Paddle offers python APIs to open and close channels, along with sending +and receiving data to/from a channel. + +### Create a channel + +Creates a new channel that takes in variables of a specific dtype. + +- **fluid.make_channel(dtype, capacity=0)** + - **dtype**: The data type of variables being sent/received through channel + - **capacity**: The capacity of the channel. A capacity of 0 represents + an unbuffered channel. Capacity > 0 represents a buffered channel + +``` +ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR, 10) +``` + +### Close a channel + +Closes a channel. Any pending senders and receivers will be awoken during +this time. Receivers can still receive from a closed channel, but senders +are not allowed to send any additional data to the channel (Paddle will +raise an exception if users try to send to a closed channel.) + +- **fluid.channel_close(channel)** + +``` +fluid.channel_close(ch) +``` + +### Send data to a channel + +Sends a variable to a channel. Currently, variables of dtype `LoDTensor`, +`LoDRankTable`, `LoDTensorArray`, `SelectedRows`, `ReaderHolder`, and +`ChannelHolder` are supported. + +By default, the data of the Variable is moved from the sender to the receiver, +however the user can optionally copy the data before performing the send. + +- **channel_send(channel, variable, is_copy=False)** + - **channel**: The channel to send the variable to + - **variable**: The variable to send to the channel + - **is_copy**: If set to True, channel_send will perform a variable assign + to copy the source variable to a new variable to be sent. + +``` +ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) +var = fill_constant(shape=[1],dtype=core.VarDesc.VarType.INT32, value=100) +fluid.channel_send(ch, var, True) +``` + +### Receive data from a channel + +Receives a variable from a channel. The data of the variable is moved to the +receiving variable. + +- **channel_recv(channel, return_variable)** + - **channel**: The channel to receive the variable from + - **return_variable**: The destination variable used to store the data of the + variable received from the channel + +``` +ch = fluid.make_channel(dtype=core.VarDesc.VarType.LOD_TENSOR) +var = fill_constant(shape=[1],dtype=core.VarDesc.VarType.INT32, value=-1) +fluid.channel_recv(ch, var) +``` + +## How it Works + +Channels provides a simple interface for different threads to share data. +To support the synchronization requirements, channels utilizes a series of +internal queues, locks, and conditional variables. + +### QueueMessage + +QueueMessage encapsulates the state of the channel send/receive operation to be +put in the **sendq/recvq**. It contains a condition variable used to lock the +thread (when there are no available sends/receives). In addition, it contains +a callback function to notify a thread when the QueueMessage is being +processed by the channel. + +### Queues + +- **buff_**: This queue holds the data buffer in a buffered channel. The +capacity is set to the capacity of the channel. This data buffer is not +used in an unbuffered channel. + +- **sendq**: This queue holds the QueueMessage of any pending senders of a +channel. When a thread performs a channel_send operation on the channel, the +channel_send operation will put a new QueueMessage on the sendq and block the +current thread under two conditions: + 1. The channel is buffered and is full + 2. The channel is unbuffered and does not have a receiver + +- **recvq**: This queue holds the QueueMessage of any pending receivers of a +channel. When a thread performs a channel_recv operation on the channel, the +channel_recv operation will put a new QueueMessage on the recvq and block the +current thread under two conditions: + 1. The channel is buffered and there is no data on the buff_ + 2. The channel is unbuffered and does not have a sender + +### State diagram + +#### Channel Send + +

+
+

+ +#### Channel Receive + +

+
+

+ +## Limitations and Considerations + +### Variable Copy + +In golang, variables in channels are copied from the sender to the receiver. +In Paddle, the data from our variables are **moved** from sender to receiver. +As a result, these variables should not be used after they are sent. We +provide a flag in channel_send method to allow users to copy the variable to +be sent before it is sent. + +Please note that this is acheived by adding an **assign** operator and creating +a temporary variable that is sent in place of the original variable. Please +note that **assign** operator has limited support for only certain variables +datatypes. diff --git a/doc/fluid/design/concurrent/images/channel_recv.png b/doc/fluid/design/concurrent/images/channel_recv.png new file mode 100644 index 0000000000000000000000000000000000000000..c06cd15ae7b8a8c94d5742f6675e389081fcf789 GIT binary patch literal 136646 zcmZsCWn7fq_O>8MOM}uOAl=>F1Jd1{(p^e7(yi1G0)ljRh=4G3Gjw;?d*gG?`9B}t z5Bvo8-1pvl#kJPD)|v=qkTe=H0rHC%FVJLVBvf9!fCU3TRfw;E|8cN^|McPo*$Y_- zQ8iD4{cLzowf^b8Q8~M{P+i^kItx+vv>KmO7vx?BLn{h78sy2XoEQ%s;3HMZfS)Wl ziXQU=pCxi`LzxK6pR@bvTdT5Ntsa}d9S@iMj?&pJ&CH~IYhPI<)00}Kh?G8?A*Hs(B~y$o6Gki`E_|1=$vb9)-|W9>um=Su3G%T<*R`S?f#4 zcqqe(su zw#+@d6P6bR4@1qfrSnBSvG(S$3V}#0e@_)ii!o3Kbv+F-pmO8WHm66)Y+q0=5cpaT zTcp53Wsto}FK$DpQJh!V_GbULk%&8$k*9fTXpR!!pR&5-afiT=h3Y&|U7^M7rtopuZ>8K35yw3H@*XePJ$gJnAJ+k&t3Pm$c5grv>v-Ezp}l=pXm4@;+?*~ieN;oL2Cg$BNG>E~aBRwI@?7AUM9|NJ zhD~6zbxy_lryv`b=hEn~#u8`a$dAtd3FQSg3oJ~2IHW|n<&;mWAtrWQ!mTD=mj~CK z-dz1pEl&x5S}CoX3SdG8xvcAb@Ry-G@mp8PETXuzyxy0spxk;@55^g z`RR@8SQmu!zt;0tY4K99+7uu#DZzHhv@Jf4J3ZCVmfi$Ys(27bM!E|qfbjN{0laXu zC*DUx`qUwWIQwcV`;i^utKk1={P|69DUFn!npS3t@v8VPoVlBQM>deBYpd&&&414k zxw@&@!QBW66JozPq5h-tZJWKIbN?l8vCjhT|LiyrsURv)v}Xs(zQj4Xn|6^Rx7W3S zJozD`d%~}28++_7yng)-nnf)|d@=E>uR^z7XN|eAo~bSNUxz3Hk*W5UT=_F|(eH_|63+LV)c>nT5vIjUe22g8# z?a!L(p|cu^wqLP^ng*&8CaTP63n>y<++uJ`0p`X!@w>`MqQ>zb6{^K%#RY9hU zJrK{6?Pg|m@WyR@C)B*@{l2|kwVSl!SVR1P2%nSX&yn%*y34e_Go~~X(iw|?i!MZ@ zsdpzYam{}WJEqjIiQPvYA|^n*Mbh3%D@U%WKzco#G`Q1{#cce4R}CD*kV)mq4lp@~ z(QNjOp~Ee4^!AC)`2EM%Zo8JIpHnp9F)WNLZLCY8xx?b$*OY{{#sjmXqkHMG1x!*H zuG7&gRk;h1t0D<_@0B$_ZO*NIh}jBAFTr&&|Cjv_P{XM(f-H%yoYndVUOsB(m5=u_ zDRHqb-~UEL^ikY8JW|oMGS!!^*CV#@himBn-=iWR>UcUQWMi9e_3e-!3ibEMjchb$ zED>$iH8yeOsgU9UNJwxJukVr+8OdaYv*9p@Z)3&>7W@?=pNq3X< zoeTpLTaM<)?lsZg>RJl!ffM0>9{>P#02!7x62dDjtw*of01EL1<+?;$YedTs%z5J8 z%XsW<+_Z;sXX$gIrJj`h`>{H>FSua&$76#do777AbXZR9Gz$oM%f)HSq5gK6`pfq8 z^<+gwv!DN9_CH={11lQ|fd}`w)qNm7C@~2%1wH(Ja*0egx z+5Qjh$y^Y9RkA?*=bxLS_YG=_XZq<}GFEj^6^)KA%&XY4xkhI!uWIUNtEXIs+NACco55<4xj{!ZJDgfX=n@B=-TrqU5e=;CxMxe;9v_D< zy?PJTnB${ITdiXQtt@-g=N+}RRePJQ-8`@x5KjL$3_T;}cbek_o(rMHZXHZx;;Ew3 z7qRTaw^aj4cpmz?1(S12^_LY_VbrR0>vI1VLkOG@7WgbSMn|oqR;zvd#`bvko`ER% zx83gL&{-3H5c+1Um4Jtu#ng}+>%Rm+kLddj1SZ+VpRdtlMnMTN*Qgn`e-kM3ojT+7 zO+~~*$&QJ^Z;KagdQ|^22_r4o6R7LbNrLIZ0Sc&0YdsSO`;2hvU4>vi+@rU*3MtVc zx5dZLJ3rZU@D2YZzo75$FqkDQiAZp?D$RWG`E9}9?Q-YYUIhIpHsW(}1mXmBdlTVT z?}{(~!lGA{WIa3U4+iDV{CG>}7paWuuWTdeM^s<~mX@(kxYo7%b9L|C$>a$)`~EEr z7+A5a5q>VH>m;+lleu2oBz{8& zI<1DUbRk)E8>~GQs24rkhE79Ec#}d5H4~&1fkLxy>cqeZ#W~<_ zHKz=IFOu5*KPn$1u*uiihr-$zZm2jN#(28dPBtIf&#$6f>O(oR)VV~>F$&vz z9?Hq{NF%IFLkZWAQKbTVOFI8J_x^)1@n}c`7DOcH6$XQs4(SQL0fz;RHQs{+89W{o zv^dTL!t0j1`?Kj%3U5H!0(@f+0VK*V<8CJ!dh2WKme|O5UmNmC)VbHyc#dG{uvsE0 z8geM!qrngoVvGAc&WKkUwiRvG>LU6UAN@4R9h<099n4q9&Zu?8Bc2b44)sFA@0V62 zk~IEiT;qDEVvHbA>*2UqScf+Mq1ytj#QZL$#%q$g>EkBF0sqg`w)c;c89lVY+!F8Y z_StLVFq(+l4`dQBKb%8qoLnD%xy>~=V-Fi}>CRl;FK}aWaS!AvX5*k|J-gKqva0ch#INMDR>OL>$ zHjC57e}pRwX9mEj?_9(FRS3Jzav6SQ@rUF-4(p|NY3d+X<=%)& z8{Z4rWxpx^Neoj8UJ-CzCBuDeVHwV3%a!w2|CCW~ zb)t3hO6_2;l;h)h2S)^i*0cz*O2D$0NTo!r#^J_$uG;(OkM6lXor$OUu4Svysn(jS z-y}LbrkKPFZt?EfbF#6Z*mu3luZh^hU3D0TFLR!Ct;poSRAW2Dq$A*ibim)N8NU4c zeVE_05XqS52rh+~06aMP7izJ#p0uYYPcox!N{)b+nB_>;H$J!B9^H>uhx(-&^G}FJiQh|gB*V^%mCvPhL&0ob^=c}z@6NTEEbv{EJRq zKJ$Gwr~A{!A?TGjKKhsJ)@+{F=rnL=(Y@@&@(?Dm6>qd1#a*ph9wEzdy;8MStn%5$ z4C>gdwB0mF5BUp>GTo_1P`5vclyiFY_wVIa52qJhiA_ff1{nh0$hfpR!LMQoFh=3H z2G-e-eeq?M)rysdxELQ+wCyhhCrb=7TVDATayXRmJ$Aa>-pyB$2+4@Eu#LDxUQ1qP z6-63d^3c-J$)UsCUM&YewsHeNZEbC4mRGAn0oTXmlcnlWhl|ar1WvzL!Lz0^!;{DY zKG!51(_>ry>_HqD@x#1a_ue607j$MEn>b7I3TxsFUE*m!WKi<>9{;c|ouw;h;W6uH z`CpHy_C(h0Ol17hV32nX`Q)E{xhD-(!>X2zfCSsiLtRIzX8|GO10X1q?Y}L<%32ky z{Xx@1)7inNBCO(u^;Z&_BYsvSvKL&Goq&#hA-R9*4>$DuqI)S#vWp;k5q3 z!ooUjzP#}8@E=BV1bf?Y={3l*LR&Rnk^nO zL^Wn^Zz`@DkS&&gnJ`f=FI=_jyM0YQaWW5*ja0o_I5hDlWGy6N%=~kZSzn^GMcP~Q zNI*V?0~8Z!4q-F84V#)(BGyyPKdlD)detgSc=`Od;k@FGsdF!c)GEiG*xrFE4I4!l zbzJj5EK!!I_iMNwT?(Gs%_j=nJ&1JEpyHqW@N2%rKB;gcoccyg=rNv`9tAbE@9yp@ zvHx6m4xN>+YACa|jZ;I@vjNGO=RiUwTm?8UPI&vs$s!=%#KXn`t5-^<*QAz~mIe?4 zN6_y!sqOLBmbE-!XPoNv`1oso9BI@}K+;-&oGKu{;50`4CXOi8HB|EY+n^Uws%_Xn zDkp;rGduCwkDzI0Q&1T!F^|feA{tEtx3h*2C%GmA<}mLxV{Hxhlq(4Pa+)Pa55bhy zX_-i^fle=5twx7H`NGE^DeoeBML|rU^`HKjoF#{%NT#4wZOJTJJoE>*9p(%kn=e{{ z?|QRLf+jOlD|_WU83F+Stg;fPTLZ`8L+;-n`}?a`G1TtOaYVi<1CK)y}C}-QK*C;FBg=g2XN)%cFyEG{g(r zE`AB~qyneP)E?`0|A*p$r$^V-E_iYokHZC7^-?uV6I`h4(NZgi&!rt^BAwSVxmpw+ zqnu`?L2n}iD26|2bBQlX)usJq!;=D8GMt&}>q`bZ+WPkS8ZqWfHG{-`Yn}3xF7a;3 z86W5jBxx5N={0$+lp<&Z1RcUf2_s{mN=O~Qpe05}Mx?0KQM`=3G%56hep(x-inN1` zK*6)yppR!ZxTh9+jcz632}dr`xzzb6JiXSSk&jD%%U&i-d*SgEgF8l`Xg78;oZ2e4 zqbHQ(hx85){KNInSJ4`uD+gBn<$P&$auBBF?Wpk6pBzCSsJH@&ptnSCB(AAbhF{op z7K5pi4Jd!u&m8tG8JPfW;?>nyP3eLXdFq2^lKEGM#OCv^bd(ww2vfDo^$#pQ<4&m; z-Tnl0r+FD-W4jzRM}2G;*F{5Y`zK@&zN|XQ zk!vOc3smW`zWBG_7e|LRN!ZNz_3BrVWJ(G`(Mdsz%^q=%7$pNgM0U@2AYCqz@9z6! ziSzqno&xsfcPGoDwA2u_Kvd?}RZzSyR2|SHM+_iwSCJ%JN@QC*{u<~_HQ#K(@iw4T zXCo0+j*NpY94H}f99$C(XK@{r$F)@(N$5Dx;G5%(=SZs{H)p0>bLb(x)&yjyC81;; z&h!BVF#+iZ59jHNk7o)UYCu&f0Vdf8s!CrapsK8CE-Z2Ve2|3LW0SY@<7jTi zNzu;}!)3s}6Uq^W`p)BXFYWS6Anz)O?2v}KKAw!BoHhE6wJSVkkY28?n>fQcve7XH~mEvBuk)aftxx$;<@}?nxE6Av(uxPS9@!<#9PRQ`B6i& z_H36TyF%Ll#WB5}sce4p@^6_qh+zaA83A}n3B_^l_4r%qSEEmK0mJ^13-T`hztzZp z3(*LA`Ou$^L`4ntap?01+-dcu{H2-TVDP}cl)$g?bg;R4_Z$a*0s~(a@;A@v@D`L} z2@PJub>%NZN&@@?xTm_qG`9%@cxGS$9X-ki0=LBd=aw`8zm|~0l){(E%}D<^umn*v z=6IC#dDO|%7!q4(5#$OXSSG0ZOK=>MK8K1Tpk4w(#TNy_i@gXZV#eet+r1>K?y@%3 zka16%?8bU_YA&v(hj=N-)!R{IhxF&FGoIo3sGt8i>fk1azNhoT7roaZK7Q)Qwoz)6 zeKh90LAs-(*7w~848(c?b6*)8vk3oE!v#5jXeWL`+0z5Hv1M3SAG6jdnUWZl%XIv= z^0Ex*G8gHXKap%Zw6}aTmdNL={h4lU_xhP3AY*;rQ|X^QX(Lr9*ba=Iz5*;9U)lG4 zRdw22Gf&vnF>=@4ks}z>OeSY;U)u%PzYli9X=PcwA=m7H`~6v@v}fmcYtE${Vd@>4 zoX239+A(T-l_Od$S7_|=(mfjm*Y^gGGu(u}y0Cf6XQry)6L4NN5vVGlT{5|39{Bf& zYPDT#Kip5a@Ek60(y|i}TTuLD1B7M>*X-zB_6!|m8xnr6Sp21LtpPvcDDS!Ur8X*h zX4j~MTw>9D^cXyPDI{G|x86)Jv{{qfF8>kqb$RFAelOaV zgOr981jMk9+8>?w4JB~Z=Ijd%MxX_#I6jjO!z6% z>HCVt-}|PE0f|?U5x^gbjL^iBJ0S}!c{?^vt_ITdo@xA26>!8|Qlv{fO!uj*loIlF zSqxBsNak9qBv@25kl?<3h2VArgP|2S32QnJLQj3zQN)}ull7QUl z>M!2qhi;%u){g9*r^|{Ayq0S7dJ1*`C~rmx|e; zYb2nKhaj1i@+2D#3;d}p=u)YjofA`U@%{ER%QL_EYljH3*!~qUpy_5mu2{={LNJCp z53)@ye*|u3Ym5KuDt^z3Y%}dnVGXjA>>z`x?XI4Szidkw!ll=@KWj>6z8Cf(lyA11 z^fD#Dev2)|o%@7|hjaJ@lFrC6&Ah%hR`6-Ce?C)S&@&9O7**sTt>eJH!Uba$lpx%> zrz9+ZzwV_*-jDPW0aVs6+WYYZ^rv{sbGmD%?X#~41mo7XPZM^tN7fa=%z%^wkAO|sEf>lO~i0-R0lT|qN^3_w{?;DKg#mgf#@+#aQNMA#KcK9@H!FWu{ zHTDX#HWfFn38vVy{R(%<2VTVjvj*Uu4bkBO9X1OutTr8piVyPoVa1{F;~|*R&ykmQ z5v4h`hGAXjlU+HH!fPXyS7Fj&f1Ld`}HVq3(4KlqM$m^QGU37=T8P3a&G)D4v{rBH^1_>d+j7C2+t?2WiRP zP-$K5V)syK_|e9<^Ie|6FLj08+d6){NbTCh_1V5C9N+%h;crqCu(%t2eb zENXbmBvW%0Xit5=^ibKLDD0w`M-7%LD_R+-KNo^B!iTm;AfpE4tNvm%w05nU&a1^l zq2bVX;+=mp6#D-$KivpOoIvT_9S&rtHW@(Hr^F##!!m%x)6p$9(X%_F(?2-V;V*N> zmMGJ!VBxVU2Z?uLI_~(b>msB25EJ99f+IAfb+q4qC*794v;59=i(anbVQ(^|9*63H z%KewxWl;n`T+wA=#)dk61FpK9=K%-l=)r($olVr^71xlgYf)f4Tb|aQ>dGDx}B5FzPn3J$6BIKk>=RP|2E~4E@fKMZT zvIRZ%+@nfOM{pwX(IlF0Hz~&~;JN3=JR}|U53r{(JzEvOyB#mk_JXCR0twSq!C@4a*_@GFB@-%sq$)K}r0**HhJc~rZqHL(8m))7ho^0s1V zufmbu1P2aAL0pwr5zP3$o=`p8Xb>vailyT#n9~h~4m}`?;!I80^#9A*M+1?w#OcOm zPocx?c_ICqiTGb2qgDhKWYI|pN8zetfdKH`beQokx&0f+3{+i6{0{lCWnKaowhrtC z|3m+aGy(3Z$jt}1eHuQnvdOu$p!?TvtDawt=jD)!l=cQ#pyo2NWBQAC63-4j7G(M} zT|@&Lz?;dQM!~;Ncwqrfm9sX|Y5{9a4P0$_;QBexKNl|uz#Db~`%+n8T;2g$6_IrI z-S6jx0dK$#e&+Dj55OsW0nV66#Ko2IZ1hIXfYE0x#8-7HbnrX_Y#4w2ujeONxPd2z z?Ry&A709xIg(7Ss!@hqgXln{!Q^kgBGisx@=ckWmTt@%OMOzjCc#ZMRy0kzf;KVjv zpZkmcww@HifjzMa7`T#=-XsFQnvQSP{e5Cn9FXKqH0MfBO{V!1lI0F7yf=;FgEZ|IPxeKa1;zeEs!bPg4K_vuffSYOXIx zZ*qZO*WzGw|M3Wf=X)6^HO48BVFRKch6~q&f0j28%w>S%M}}X+0m2k{PMQ$@Vi^d|NB z2B%w7=|I;HSoo)&08Xip@q9}XA+8)GGh$#D?fwkizp*-v9r$Zg;C8`-I{p=KWXOvs zrGJ{wL9WlXd@<AS=i| zKbPHG>;Vn9-?CZM=Q-8W9J-64cBp`uEuJ;@7M0B-0mS<`4g; zH&F2G(GyP4pJ{=!fEPH?c`3C)=!nw z74XEowc{0vQ2vzG9=Yyn2@8(-jH~`IkUEMuXU^PTxZ;k3#U7x8ZFJdMDjTzkM0oWo zWuTO+k@(qWQUhnhRcUCz5h2L91lxOidwSo*LgKj{ml!UuuAT;6!(tgnk>FzM9Q37V zAetIFx>GA_lo`S$&`{Ze0vF%LHhdoqaFZV@w>(AjOg7h zrBrnI%n#%4@_NVRsR0eju09}e_v_X(m4slZYfmQ+nsIWDjrD#fBBCHZ#aHIRW3aN# z}&W&>l^^(}$o6bm!$IvaT9e$Y&axuUr_R#>%*~WMr zwx^mR1oK-c!@hp~`m>$AJs7CUd)q{UVDrjTnGIC{YL#5MKP)tqiK7@sj~6gAYWa@i zCb~$dXx+nNO)@v_E+D zo}rv>*KKa$^5&GD7zMAUYV;*}M(R4UWZh03sI2e@cV&nbn%4~*+xet?xn9XRq_0nJ zIyjWkrp9*p#?F+1@Jl(;`mf|Ytwd_Y(sKP~t|jvLKPvzm5=t<>h7f;^Jce-R&*k?CdNR61RXr7M)r#1_2R~04EE}$CM9JL4?B};9`BJ zTWVsW=6}o#4Ajx#Ynm^eZm2g7`AQ;3J++XU-}&24+MKq#MJ+9~V7|pAD$tlKt}A=F zJZjtEbG2yaC_re13cK;WynM7$w|=D5@9y%~I_gZFedgxfQF~lWY;1+BL|Y$A$~*vg zWIk8}3DAkj$qSh>jq=iRy~c6c+L_jtmafZ#d5_uU<#uD6%GI;9K#3KZOfIj3vkd}b zLd>xb4#qo4^>*6yh${49N_?>jSbDB9h&RiPivDffrfvj^KSJbcscw5S-=bX}oN*7n zt{O|1>zJMO`u_9h**3jq1y55`Q&H*R*RN!yDur?~A3q-dURf~$9i`IQ01!QJ1LvH< z!&s_PQc-TuI#(nf_BzFySg2*t$J<*w0)t`7qgfGp|{$Oh?+&C-K|9#r&#?XxZs-g$Ajip&{$w&o}WK zD*2S~5G8BIhuaI3`6jo$;jdr6@>#d0i~>kVwuL#s3RNpqxu~nJ&viN5R16dFIxX-i zxWB(IHtCIA9UB{?1Q_tt!e)v#7HMpr$-;cF23S8HY{#3f8-e9M5)!q({YX9CxB^z% zS!`{_^sjfY#+AESxh3>Bw5?HEjF+u3L@a{?moRW zntQZRrpdhbyQtk}vUJ9{I}{%9%@i7n_TLq`d+fxpnxSZAZ?WGtEs2@yHdu8_$+@I8 zDIIMG-`tGlWA}6H4h(?nUcX+X8_wuA_;KF#yJ)(xsmb8%{G5T+c8Wn)JdMX@qBwlA zOmi6p74^Ga_$|O)b--0~!zAK@>+0&VljJsj*-lrKMB*{pu>6W65ftn2PXdAWa0v*S zW{uFCdS}0M>}(D%Cc&Q`jP2D-Mse8{f4sSmva_~y=b$@2q4ajWZ_p@F%R%?QtXBW| z8R7HK{A@ebT_i1CUGDJMSRsFC50XA$8yh9&l?Y z=ZaYkDXoCV&CN|;zfdkYB@CU^T^9LX_~~98u#j-l;iM|-xf<&MKyg>MH#dH{w7AYx zvpF3*huQd6HFIsf1YFg>RqOQX>baWhP6xP;Py0*S4&tC`@sR!tI@4yy56C8Y>RbztU7Glv*%}mjIvoh}5*d`mGxlC| zs{Q^{z9BAdGzxHRGnI&6-?AEY0sQO1WO?gkV{SgFRylptZJ+=Ik=0ojvrbHNx{M zrJ~|q(bCvbxA`$62ERH;)=rEKPK{0-rB<>|X*+JZj#U+=dGK`8rIv($rLZWBg)x z6udvdNZ38r>=wVjl8cANCnu}hC|-|xx$RA%H3Ag>tAK!~pdFx~|E&Qxf_{C1-wF~g zb*^06XXHE{(0j0#Oj01vH5js-HG9bo^urPZGc)HS+01{{2>ac-rhwMKW)7|17h(>} zZLN%CWF7Q{E;rB3vLacs`6yLOOUq@^&#(M84-T@6)<=L3<{_Yy`~jM8*7w&ZHQpCC zrza=h($e1SPL&g)5%I+1G3lj!`;f_Dt+EbS$|D*g;`_vJk=G{q*gW&#E13RI@C3M5 zRyM%&8bP+fEoy^@Q9Jeia9MY4YK2`w8^JZ*OU#c>&dyC-?)x(fK;Pz93bV~mi{Z#D zA-|@lyQ`zB4B-J0q6Qtn1*0X%_@f{?60V1Uyyxa>ZRy8sZ&UhA{+q8R8mm-_gx-miJva{F65_ic%Mwzsq6`?Yae3m{NEohFkH zGwk`rMPX(8BcBu)auFUGc`JzU8o?t1DUC76juw=il|_w5r)KrFveL0BaTn;E0{uD; z>+yHw3N9PJBp>dsz$uKnDi*??^guv9-5k#BeSCZrPCr;QGa(jUs>sfd<$%F$x>Dv_ zA9XaD+9GlhH)#2qnaNdbpU+uQdwGB|#4E^Np3~6SX#ZWMFtY!-N%pY7DHSIuAn>Wu zpw;_58QIq|*10V%!1xAyVP`>EKnTnM5-gu|ud_{Fvw~)%UfP!YfX5pe(c+P8eoeLc z8f(>)dX@BV#8*OWPCen6n1oz*4HrOQCU{$wUCAi`gQ$ukJ(*o<;CD8}enP_1vcdZk zzFbF9Glkht*7yU}p8fJjI!m>C8VU~&k6t#HeTK`^;{${=JhzrBU}6s18)XXgIKt3W zOEJdeDKokhfs8sH39dSkhtbj4*vT3Ykuy4a9BTiAEYx*xs+_~?)NFUUk`(v7vIyDd z_XiL#`g1QME1P{<_T%;+a(&=4oK;m-li}MI)X|ul#xGzAGkk&@xNY{0tnW@5qjc;90U$>FsM`w|J7WL{=KLTs6 z+uCKu=(!@Q8eK9tCT2J<3ZJ<&5|=(@&_i-c5#qSB)i_%s4 z07S44C(z!VX~O_q`twvKeL6gu#Wuf8He03b;Ch(qmukhz-#idr9<3~ZXy`{wh_=8Lj7!Kz{y^NXm zT#PMiM$8W%)T+JY=Y$-Y)amagrBlTl9&0{gPL}B;x0PSGRjVT`G?)4K-cA_01?Z}B zmUFHDIg`g2)2HaNQz&wHnF%COg0(>8Yc}nF4bJL$My;dKY<`c;gZcV$tP2#!Jf-kr zb1Vi`zMMoBC&M(6I%*?Vr`_FM?*brlOlnl}2%(BK?t zxg7pj)w&+avebh_D1CJWC7eR-0{%kO27VDir zX)pOtkj7Y9)xPQb%<-7=sneT{DTraCd7Pi*LDx;RYdQHf*NCC3kWOJ@)FqFSHJ@wh~x zf*qpe{wRN|#Ym~qOr^xzEAS|aKO#A=&jB;v-LxS`g$J|u!y)mF-ICa{TAAEu%ijgV z?3Uxp=!c0hwVc}TlQ}$Z6(S=ez7-0`f$L&~9)v#MdGy9+bJ$JUeRz65N^13SnrVG^ z{T&mR(6=4z?rqtJDbPzugjcO5u=Q|vg;_;s+rGncqbJ0ub{ zG2yq^A4G@2Kz4kS334rlLao#vv${i!HC zC{D@qZdew7AITmHVYBc6-)T@ymYbYkTJj$m&g3ROT^}?(1`#yF05c>4+uiy3`O>ct z5SZEP1UF|Mzqu>LO-50Hz#N9r-7tQ4wo=@hWh0lzk$Ya8tu z6w^z&zGo+3^ZhLatKMFEi@PH3io`+CImv&% z?Q?8}mXwlVd(k+fF3NpOdj)MUdCyx3CW+PcpofsOMb~2t{Tn~__B0iw&|B3U)o_Sp zX8oTT`1VL4hb-Rg(iB?p+U6^=rw)fTLOk{W%omOu*astioTB1P3j!%L9?L+ulSbQ*|?UA6GZEM{#Xw#Bvk<>dZW;D_#RHJI&67EivsBQy!q0;T$UA=4o zqp!I)BGKN0P;tg|+=zx#lp7g{KaN1r5SL!+#<`PL%ysQh(3=oPq8Ayzh;Y7M<)hXc z-0#crQ%bpV2ZY>sTP@yqDQ(wNpD%A13Zyt~$B$=Swr?`IYwQn+yzim#$<~vbluk`& z*GM{Te*DDRbxAobzO=DNIxM*YyAya9oiCf#W}L{S$gjn^yGwMon8e8HDh!|UE96v< zhN$luf{A{mh==WLNea1I?U>Nb7umNI_p$EWyFrRn3RS2i=TW5MB6D@Z-elJ~Q|T|) zsh<-RO*pR@ zH8ne;`{C8goU9iPP1G0$G*y`rCg0z?jMZ;n9qsIv;s&7&`vfJu1NmT&{c0eDW%BbB ztGYpO{z^=h_eBCnUSeP}YG%UCsSDX;KM%-K>w%PCdvhdPH(f2(GF-`%`04S^k(>l@ z+>rN->ZM7Dai88u7USTh&&|z^uXcw?TH*9;ZaJB{-(NccHEhE1_X@*y13j8ad`wJH z^P!YNTGb*%QIroEEQbNN=R0CDj~CO11BSPQ3lrU8=t^9U7mp8jJAhtI{9ush1~a*t z1?uJiTj~R#rzHaWz%*Qjv>$!RqTt0snn;#VlrG04?V;q0$skTPn2uJTcY>qjZJ_}3 zJ%KqO=akVuGIDr(e{+`YbFrJu>9Sj58eeExu2W0r4zPMMUZ38uwKLc+C=l{5<`0ci zxb0mG=vMgN`_V+GGMLe&ao3Zb2{B})JU$xZy{#(7O!&|>Kl4Kh?X*}m%EV+62MbRM zqFIrr-5nNb9S}ZbSxFAEgtS*06ut+wpX^H_wbnK~hUDg^9ZLOTSBRV59zRGtN%tTT z%zJwHljv};L>v5F?c;F%7ISfqrP8hCG6|+LS&zO#^s(}#9VCNbqL|4}emg<&%^b${ zOybomWSp8znu2tlO5GR33Ya9RotC1{T z6LPv!1$iniE{%mPAdSK^&7GQc`vzz?(e2!#oJbJ`0x(*}}>Bz##GYSXe|N&8f2~ z{KOL+ow>x_f0qC>ovMRHQo>brb>~5Hh0Ys5QmE$nG2O@Od>a#5ndi!e($ITB8v&sV$f03A zig=1hgcsh?5?jqw#*u?^RN1R6*aQC--a;rq{mgn29=Z4OD!InOEhC6tGFG{+hFR%C z>b5sbGfhjR0amD9m1dA8DyPW%>~V_hU?@)2ep-%{&AT3zrEqNJJv}`=K39O)@ZB28 zK2S|udke+M8DDB$EqG97L5nN#wR@SZm1Q!v1C*GQDrd6%2dlH8CB6y}bYi}wHo$2; zSu^7+aVS{4&$q8_$)i7@cfpcrBn~NNPq2ELliRS?qN6hjG%fpGWn+NNeQI0H!bI@>dqnF(pv~f_N#R6*g)iom9Da$NfVOl6mDrQ?%>TLb&}G3IL?qgw4CQdt zJARba)SO-}RVx`kJzni*nhnyjvU;!I?ys*vsNKWZCKIE-)5_G?l*wgJ;0!=|tG~bh zGT7*)Miz7k!CJYYAgJSQWlofHD#L80%;J+@4X3pVN?yT^t>+u{si~)vV3SDizS7Bw z*?c72!CXTIJMWz>W{o|0$n6&otSEMj>4>#2p2Oihe&_c$0sFikH&J7<_~YtEv&ED7 zU2m;)h@xVD)mqE{ltzuqol=s`lB=V+%#yRG$#PfH3UG$p%CN(C3Kz>;vX-M?=5LPV zw|g8!2e3AH-_;$TQhzrj*bGq09dB)P@j2u!n~sy~{&s)_{;7nl6VHlAP=fv77e zoHVo%0?-NH+PyDI*=l)n3K#wyf1$%cu2QbhZ+3_BM(lVoGgrn5fcf3`F<@~}3e@Nd zOsQ5`Lpp%5SD;3fLl|%OL{xKr9?86$lf%cA~SBykmZz{|capg#{X`MLUx|41S@KJoeYY zItE`gB{c~IJl<~)CepMcy?!kf@2?_ynWn*qT~9_)NO$823)|0M7gf&c86GmeyB6d4 z1q=M8-s#s@n27=n#@oyOwqV*VL6iV0d>ZMhF5i=s<(;L+Rjl#ru%c ztr0~)O9eW(^pU>bXqXN1mX@%4ORp~=MaZKzCQft_y20=w81J~tnH3@89Qm!s1Mh7( zt$t|raQF=vkslVA#$H}>xIb39LdU6^+WZJx(NP}aekcySfW=}h47WTuwm0Q-JsiE( z(jLsu)O&EJ%QRD!C13w4mp;xhQdS>c7LtmC@>9yo8+m>X^{rf?(34oObf9=wUTkDu zln{z3q5Z7Gmu7tP1867g&QzH`!M%D#0fA9bBu9aD{$=v&BQs{F4;}7zGdM@4=T=?3h*SurMmWXD zPCz)XbQ=%A4_g2)5d`LdO~byr!}U@!A&KsLW=gO4LiJodjmZ2U8J~Rd15^@E|ly^|&1gezWYvhj+Ej4>oR!BzSQTW2suT|xd zPyry19XUE(X&6=FO#@E4k`jG@fr05-%N7>Ks|nD^yBo_B8#^==@BRSL##*quX)Z-|NgC_doFSD=t% zALnG4F0cp8rHQ0q4}|mdx+1tQb=dU5J2jrq0c6Pqu~m(&gr#3KQVkw_;{1W`ozhN9#I3>1B`tmREL^uM86 z=1jN8bE|JcT0!)Rm;02vWs_KO$$6NvI|M`C>Fc>y|bnhy7(z9N~SYs#k(2FMi zl0+DzEiX`UzBmfW=Jdw)-GN00DOw%#qJd9 zLH`=8u?+<9{wZ8$)nf5*Zn}7cKJmhudzc2_1>EjJzA6%c4V4 zDvmeUUJ}LJvHlOaa4ds_>?HuPQYRg9(EMlcpj*d3jQB~5Pgnhe%_U35^SkH;i;fLx z5|fm|`6L7 zy90ya|EA17<(~|p=ruIxfM$#Vh>aaG0>ZJZ2D~>Wz;kyXG5QYL`YcHX#j1sI_h94B zUfIuUTPz^2CyoGwkZd++<%Y8Lf4~eG;}(0C^UTZ7&kxfN37s1DAwG4w*9Da3K*KL( z;zGv(wo6S~p>jBr(J?PJZaR_1lr~qz3ElvhP#9~e4zF1KY~vSY&1Rq(uE#aiYJ}_S zsY}qOk?O_WMPq3@ggegq8+&`jQgh7v&`5X_L}Yz9jC=qkc~ru3mv+C)n8-fR=5c0H zy|%w$(GwY#Nl<@|>MpE4FV))>o_MaSqO`Krb1@f#&q~O()N&0gLlE6}9C{UrAPkw}^~lI|V0ty`K^fu5*_YD=+iDuTH>)YJUsQd2_Ge18enRq< zG7kaubJs6{^Fdw*sF)2|;KrF04-O7O8XFsaCh1!j{e((6(rz_|70QfagglPIMj{SgZdD(PGrh$sbjlH<^48^A^ z0Fz`L+00d82At2^wcHt3>29vu0X3sAoO7BeAgy-jk_Kj2^qXdpcD}sB2#lUO3f+@x&l(J( z|N1!?emDfQYPcEx+GFk@O2x}P7%EKWV3t<{!&NmAM^XQx9mS-H@BurDB2BLFf5>W7 zHt=N_LP#V?B?w6sRR-xDH$@57R)d>U;|C;PD!u8jIO}TkHoO%oTu&zKMSCZ$6f7}9 z2akk<;gEivqQX#^bDMC?pQDk&V@DcEzy|R(k5NY|EIxj%$bwF$QgnYhz-k1Os5(m1 z#fzl!jr3Cs5LH+cK*gW0 zM~j39>xU*ODE<1#+kMEK{NsuS!ZdhSpi4Y?^v$bqoD{xDT`pQn%MsO8Lkx*wgLY`x zWTX(vX+DS2(CBAZgOSf#rkhv$w7_?M(LqgiTC;h)wzXALDVxYZij2?V+=DEnZ;nYC zlw^-12t8qvH1Zf6T)8nD3^=Bthst$G7FNi3-t-39lp9~KPkvayF4|I4qIA<=d z`QQ39hO%%fjEWIOszp}C8s#-Rqu)EFoEm>^R>+}ASUQfGz)|55ttyc4?N=Z$FjH6i zbVSkp2}w*$3?)*~ROx3%m^Y*hvhBF{QXxSQd9&2kb+L9`V>8d0k&zJ#NFw(J5m~wA zaZeUJiFSeTVC}wNNazgKful*|P84N4g-t(>vDOA~>UB?ktnBR22l3q|!Z?H<=wtdt zP-SRVz_V&MNeiuCbe3kJ5+*q?hg+K!vJTYR(Bex;c^>blNIORBeT`9g|Cg09rgr6} zN*%35cd;7_U|O$fX}&sYOqg$< ztm6=piazIe!=hj=aGEXQ19TNF!^v;6gyhwd_nqdKsa%uQ9vs>=HUhxLZm^%s<+c&( zjCj^2g}yTuW?Z{lrWwdY!B>xCZ*O16Z!^ca4XTa~g%YQ*ak7tt+Y|2;qi`lhZZd@! zcLJBybZF8-d*Dkyg6w1hHp;+}Z=6$6DMZA*P6(OCL0I3HfKW2@?D&j9n0Y_V`h37_ z`vm)!;sJPTckeo_8_FttUlA-(tU1|t%|GQEOzj=&=P%O`46|XHoGHC!PUUH5db=a= zd%?k!g_n~P=e;_s)2TwAk1=cyBqoa&P=(E)Xl^ewc}&1KNa+4}3FA$nR-(x!$j|Tj z0jSv6#kHtQr=@sbwA>$42eV>tICtYy8!5yOFh=aWMMpMgdc&o~x4U z{zN@d?j}J6-_S#aQRt-PdT`z#7EQLGDznd>p!&7bbO=ujOWO;ZIza2o5#2N5UN$=WbUA6tpj27X7X!l9YHmc0ZPl zAcj7lQI;v3ILBivFQS+#ADtnTA1LQt;LFN-+w|t~S}&_Y&$94Ksd?szSdmJe{Ev%$ zfrppXWRLBEN5Av9h6uS6_SY`?lopgM@M#f_%ENK9WuD(cFC0w({Jx)LppqY(9sG`L zY~71+z7ZcaQz&E!9=_L(Biu%*(qyma)kL96p-MvX`-wT10`GZrzUcN+_gDz$hb<<{ zZfTRu0?$xg(`L&5K-q61Y^C}poSo3O%Q;u5X!;S7Gxfn+%?H1K>}wjc2Wn( zK#I>mDaS0qy88y!%i0OaA3^l9)9dx;4&k?$I?=yzy@zAjBt6?D-LcpUr7vjBkWY=p zIoDGBRlN}0IwDxs7FIE&cL|#MLu36e_g|H2R*HR)f+5>6xjk9$g_NKe;Zvw%%=HyV zRxrGlD~w)=PN^c~vXuC*GC@Mo^6&BT1H>PvTXd<(L)@U%*CsQ}#!)RrnjD;kG0P2F zGe~&4zlL%770C8e!N32+1mD*h9-x4RVFv(+;16g0VhL;hf0Z2$gdJqZ!G<5(TdG zx~WzL%W~=AHrCJZ8S`R8WSQN(Ecdjutth9}1>s!9)3e3dmc+M1X-a^x$D9w$iefv% zA2pb-jcfeLQ99q4M8m2>F{k&_ObHB=yXQJF!#FxozL@xqv8t`q4oG(UO8U*guXXDz zpg%6i`1~?%6*6S6E{`Y^Bdako;}*wVi*e_wqw({@UI}|XruSD_iUYbQ3$Xq8dF@{O zHk7S5ef`3rojFmf zx}m>;hz}VvJRrEC0(}HB>n}`!0;{hUsvJbqPCA>ZLWbQl2INCL3nL?TQQtFK-?re^ z-{c^s(`4BZ#H=FZ&?)|QvgEyfRmfK?lwYAXCyGp}`hnr2#@o{ZnnVpeQao$Hj{!R( zUD)dp&z2HUc@Baf_Z*=pRAogko4cVD~34l$$Bg< z{t_$9IpeWKIQm0or5-=>h56iMwS)8fQS^y;7_Umb{M*j$LEJT$-4%bTq+h@0`s46y z=a>WwTN+(QH%tcBC704ZKD%AhL7!{GFW#QqWUTVObLIbG zU5Eksg$qD`F|6%8)u$asjo|Cxp;-nlYKPS1?;v{Mz9nTPO!BwO+J=-%-lo&Dk)4a` z;$`mH>$_eDPK!dSw9K?wCx%Ta#X4zD2Yjr&-Oeg+pd}zxaeuoSELp;kcfb0*3z>55 z&xzl!FsSz@3RMt9LVw5;r0^darSdfvEZ!Q9!_#q2M_{;x$6%S^-Hz>I10p7ZLuwqe z`|umi>=n03|L!XH!bG@>{HWEZQQG~c5KZ0v{*WZgo%*SIC$m`Ycd}}sXB3C;IY4jw zNZjr74Q-J@I@3bzf8boiuMYLqGw0yh#KMvj zr6Tiehtib&7wTn-Ct~B9h3FHJEY)jST*&UQU2LW~^#^GNVA5U6V&uS^1F1&Z34q{Y4 zp!8!GObA8n<@7OfKpJ5bbBNJLU&99ZB*s7rjOdT}Jyg{dVM2yU@#Ix63PKQV5bGN6 z>es|WtFG*Iw;&|)jh9*IX zXk1{RU56+z=C%>lGb!{V`%N+ss)`1QNK=33RVa)GO7ykliS)aAIhzrH5PVVPACZga zm2T1&`D){P$}VVr{H)y>MbMER7lE`3>-|L$1#+JE@w+Y}uQkEULK}R$4e6TzPBjdx zSw&pCM*#3(88BJUYc*?&tfDE8pBssxM^!z8fNfo}HluP@5P=2`lf;{Jq93%A1cqm_A$;oA*YByh@iK1HX8g3CZOSQk>K1gW+v92onK?M>ounup{|E_|!wAOk z$B35hrp-MN=jZ>7DVVB4Tu6n0-IHL{;0;s*MybRB_-m@#H_;WOU6!&pnG}DHwR$1+ zGU<@2m(VTu_mXwdRRZLA1fq0V-j6riYd@p!OOh=?R!{s7_-!LuGX3{B-cr&XSQIa1 zNW`rLtQG6rnZBF7^7%+7z0_{9aUw`!{u< zd!BNo-l90L4Swoxh zucYo?JBQg$8x~n*1>^gh$tqvj3%M+h?RyW^Mit4qx~FFsml@J8@$N>=;7_6R-#fbl z3XzF;-gT=`qaN~c%?ReRr&!zg!@`%4r^cwGK_{+JDA0)E160EI-rmy&?E{r~FkRO+ z>Ih+uUB@K9e$|O%^avGuieWfuL@Ho4A*Ng_pj`66F{=N}W|0r0RK#UX45jYwpt%=m z`d)xOqRuoeTL&c#dL6aO5#3O+=+v?JSb?nXShMmw0^if!7zjV*H2eNepV(GH7g`q- zt(WB7V}4Y3A+SQD9R@RA;4~i@l}xzJ#H7;qIJ}ik`P#P$0G`fWgH<_RWnA)Kt)1{J5+MKQ^S~^-|*TE58 zYsJ;j{I2Cz=$GD9>fghz#yOc#yq8!>FbyKJH*MGV4 zemImj=J)U455`tjI)lqkPqP5pB`Ow)dKxsv<1pPK)cJ0ro$zDw12SSP9#szfZ-zvO z1?xCN?4_ja9Gx6Pcuvsg7@6SlQ;k;~pRG&>ji#T@_=xptwGNw|mbDIrwmr69ZM7VE zjND`_3mskEPba)NU0A&y4kO|~d@_B$krTO$o}=%LbCQ90olI ze8$;S)UpS|iMi6XV1FPqLV@PN{;=&!UvOH-&zdJZgz-Y1~QQXg_9t$zZ7t6ldVf zKkZQ+-?qw3$zbGrb+RaYy+NjQSiW;LS*&q1HK8bAGb(V@gx!6cbrXaWw}l8>$W>?= zIX*u5m^HvFFrv4ul80`l)&5n+&pZ_XKHQ(-w_<M%rxd7*?=UChK$gZ3KCE6=7 zLpex3zbrBI8e2*5`)x;hxqk6?ny>F& zbD|o-4?^@B<@t;YRR&R22{p0H)^O=f^zC7+*jqODvJ~n{>8%FkWDD@+*(1=MPI3GW>cmb zojz#%M4z+zd}O&=%b*edn`AIm_6{AR6x|3S3QAL6Ma3}*X8mU|2D|~Wu*JqAzKajm z>TwflogpQ>Kc7AxDn}A}uyZ;oaY>LrWVV{juSqhpxci_9_dYIl0Cy43qT>Lw0>F&+ zg|_`f-r%A#$UYC~bM6$u^;4e{M;CHFzS+)vvI1MC1nYs5=hA~Xi&AYd&?`2ly{PaQ zC}~$xlKl-HBr8rRikOdmuYNlt!d{W^;15D{9?p-Kq7&`%7(!Jg!*G~35D7do21kqC zPtOI=9_G-)rTqps;oy&7Da1ob3JV$YR$QO8FI2n_3}^3VE#fRKz2tKtPNKXC_WmNN z`1Qz5-W>IXxgUvBP|NwaTrfc6DfL9XFlDj>GDBlz_&ZAzFU}&8^c-$BYp$=|MnF!p zadyBxaZ;+_Lyp_>@X}K+*>e{C+6kwxAUwtlr z3r$Q+7?VKJ_~M}Q^ZZ<4Xr=)bYwR9elO^3u;)^Tfu1lqdNRIF=1593QpmySnD$`DK0_$4@nY=h;3qWCD0=K z*2DSObS%m6^_F4G+pRP^Qdv2<`Q-TcpKm0#A`m?C$n*2p^Y-TAQj0lZ8Os|z4feiw zS|8G^^B|~;!2P&9h|h6bn)6ysck`*SF3c|geni)ip&>oMMjGG%a-wu!79~TttM^C; zh!K>5w&1y#xVYEG)|T(;`UKZRF)yec>cpMwUdIXPS&?((ke@4IQW^FEq4yM!QrPFu zm6#Y9v_lIY!)Pv9*(`7AxmnIW4S9Id0#)S zV?S4p0nj%{`ASPmKMfAb+c(Qd(849`65C!+?*yC#tbc)c-o1WV%_0ZvE(y)-@d&haDv_ypCwHz0u#3>8CQX{KF@N zc)7x#Gb-Y&t~J$p{Zi=*PX1H8VC4W~xiZ6e6&6OtZ)+l8XY{=VN>raY?T?!gOw3(e zb#bm0g6~s!9gL=dEWS2BH}|W~QuUToU>hbJ3SYG*j-Zc4eE&u>(*z$L6DZ;qukK)A&WxS5%cp22H5ZRMmYtvDQ&*N&^%jJKL0mg5|T+3 z=wnYMRL9Ak6KaglB%Nf11t6y`l+iSzD&_)XSy?+&YPgAEqpoqzbCs;Ru+mnNJHp#J zkNnVQaO&Gb#>P28q*{p1b4(dBlxC!JS_hw2*U#FcC89ih8R-!xAm;`AmtGU02<+D4Ua_s!bD92R4yAVCB zoO-<^Fn-F}GOjk^`{Zac)?(l~ynnxI)q_4-O>K^kgHz=>mL*xu(tJLi&8Mu_Sbb|C z8O|8Dhy6w_mriMpHq%>W6ri$e0RaKSkRMC&!h=G-ksm4Y@mT#N`{>0fhmA`e5h7+uTs2}SjgerXFSe2o^IYt)Of7kF^U!OcJWQvbV_By`9_@o^U#?z5 zA8u`AeDH{T(utBne01xr_N`0fJKz9USChLnpJGU1Gb^LAU1SgvaE(EnK^KrE1zSl| z^)G@~hhC@0av{&JNMCssFL++8WP3>X-Mi$=`|eOrJyo>$Oq!YX^vbC#MpZT5YHJC& zMade8m>!e!T@7+PaqLzv+r_@j@KT(%kx|JMaXS)noTfivzYL=_x$KbKz4e&Lh!e(l zRipB{O|RYPzS=Gfx=t`@HaS*{`wCG)I{cxdCNJdoLUqSPD9Qc8FFixk8v}EJy zoqopWOnwF1X48sv-?cE6r6xhW@p4kXs)Fs85dl_VI39B$B1`y0M2()nj9RMiebS9J zFj`4w6gked_3Px<_phBIqM4dm$04+K9O|@8Z&l-ro?L9tB`s-AdDa7h=W&tXeVFgo z6X>C;8D?m=f)UG{al6o!Ae_K1`fp<&bOob7!M@oJpc%!BAi5e!<57x*_NM<9#24{R zz|J9_ylY)|@BRELo)3`p%s_P%lALOaaSR6|t|%lr6zRnvtGMW%z*z##AKrS!VcPYX znD2-4i$99dT*q_3b|_>Ae7%>SKcfs@ds(jFST4k9=<~Su$l$vYO%#|gMxPq11k(#~ zKrIw0f=8CTYTwbEC*F|teVsv%kJY$LT6R5%TjE({x{4&t^%mP^HDGZ{@@CS(XO4lU zatY*>Y(F0nVs#SMhHxCF79fzS<@D%sN#`jwL2Im+%Mj>tuI2_<)N&j)Zza?ecDmN1 zpoo_uew!vK_u~VO7u3z0=^Vqnrb6UtC-?>s>`$nPEZZ53P9l$*TYfFNzM6G!g!P)W z4?RrtAZd0YH+9bDzg1Z3e1nQqFFGEs=r!P_In^>r>rP(OTlH{Qq9xe*?AKJDUYE?6 z9Qii>i7SrTbb6^rZ6|V4M<}IrcB*RbYPXc|aHnC=vhZ-=@sziFT*V>M#7aZ6m64pB z+}Lt=*w`@q(#!~!PB#Eq1-O-QzDvk_blJ=Fd*s_)gL@_Ta6W-c--R2#Pct0iR3iI9 z)xS?=KI0nZt3c{#S01&-55Q>DL~G z7W_68!3OrIfo*Y>!Da~?kWXR|FclYVYeg(-=bwfsH$xoX=jpx&t6u5rAQQKDDI2Ph zU8X;MMnR&e?b+5)M5Vj!RM{P%In7G6t1V4H!G3QhIl+MHk2{01ko4NIin;1=bypSF zCb@}i&+f@REGQ(Hfq7eF*@sqXnNQS3;r9qO%dU&O)`)uk*F4Ve3vM1W$T%=LrAt*Z z)7FprH2Na)p3(JMiu{l69yjeTC(X>y8l#DGL$}0@L*p46l?l&p@5mC4mi#o}4IG{p zIWm-~wUlh8<>$~^-eRuq-CZOuh==5C{~_AEppU0+v4Bpo!JTa{`)a`9WXbJ}4UrAT zb0`XH^(gtsKGq7{m=~%Z+6ftcgXEiKcsmxJHF$L$JIbLXaTXId>T$QQS=mL`Zl@qT zoZ_|YkLY~*^rh=-F@473Xj$OIl)W8QHJ z!46{T<;|DcTxcYL&-jGwS3dTSi;2|Pi0V`H4D+n@cL$`-V3S@fi4+z(g4}@J zbw<`*3JvEY7QGR#@>qPA50mB_M=-L8FXCtV@#Gyl&_jEX0uQ4mi>yq zl~>!f=5l76#6pExZS*r(&UuuCQZoHk44an*zN8ShW(54PHvp39Bm+yhUo5( z3#0z(bLe@f2?-VZ$W47!PDK@~FID2jw$q0jypU4fU|A$D++7|N7yS`ctq)=FxW0|w zgq@*Rfyt7QTn}q~0*aWRNl))#-q*{E>YV>8j%(R6R0 z@iqhOVN95GYM9u#xTbLIjsu8OSaAN?t01U}DMk;{3kwQ*uY-+bC(t)-;C2|Ysgt^l zg!x<_um0B3(t-;=I4{6`ZuNfJCXp_W$ z=uOwgdVTpBMXSOnxan?3Wz$Xe&S=`o@i4`VSX{9h`B~=ri_r!TryJjS?%$VpWU>Og zNPNNj!=CQr%2xq#TNmvU6OZeZ`bz=6B3Gs29xu_+_D|=mq5MAtD;BN=85k_BB_z5i zl>|>DsHhW2CT{rR#K7^l?9XW*edfBHh~J~PS|NHXRDLf;rz+<UpM`nb>}^*=@X%a4cXK$(oZKM$v!k_LVt~N#dKBnwTtR#>SqM5WP+T zyTDBf!#)cS1hwO{BC34LKip{tKU;k6YQ6%Qm)m|RE?n2t{O?I@Z0spHe4FPf2{79@+sQeW!F7?T7}s!Q>|2M z`N#G0Q^sMGlPAc0F1TI^t;q5{N*n%)Q#1I*94$-}cGSw)Y)5>SY3?a)+Udz%Utu%v z81gxyZQ8&$G3}*6Zdq^v)@%^B^JL<~6mPG-Q#Jb@tL91YXD`Ph>s!N&u5RMX9}bgI zmX1Z+_Vd_E{mWh|8=bF|7~`6NTq)P&i4hmZ4wKAaw~4rVU{mo~+)qczc7GAKX6Hv_ z)<~C4Pp8Ovgmf%Cyc!BxT3YS2&cu@0gZ|ot)Bd@VBL=?i<#;c8dojvxQd8Gc8SDiK zVRLU8F1}o6j**__gwyfdSm-!h+|r10;DAp;&$Q&bzRzpPoA9MfJI|4kkoEy!;%(vU zLPX&dDEiI%pEHkGQF=OiYhPcVz}ni{p^~g@xGS~g@_aHCEp0I!Gz&`d6EWeExJVFF zhVa!Z`VWZTcG8RPiJa8Hk(6WK^)B1O!UEbXq(5QMjdAVSHR?j^wzpKXv@;~6$hs-5 zNZHxBnKj1K_e&wIDE-^rzU!-#HgSe=pLULk&A8#O(Jv$->(-EV>~?3VPVI!=_xHqiAuSz2ZA6!8;vGD*iA?8BKbS-<0B4V_JGn;+59W zI>Bd?aCtV)R@(=~K8#^(aDl?8AuRY9kI+WPD}?2H!W zGNs>qCLSDCZ&$l+McbW%3hTCry;VppDU;&x-mDW)RSs~m8%g}B$d^0l5u6wc-|3*} z<@Z?sr&f(|M(_AGU2$`0%3GfTP7FkStNe{JIXS3Ze(_dHY5+v9d;t2ia85B*w!KDK z#S=d2QZjnjG%l;WP;-!al<=EGfmZEQy4LOCVrBpUTjh?>?E3Cy;m6(y}h&}Mno~9#-c#7C(t1YMaRl#YiPj$LD=5unI++X-7 zA~#z*W`fem8kd;3$rpq|z%+YZmBpwJ=nXoPKTD&zIqSF725Vb!d7!*^e< zU0qZFR2}dh9vOC_S?h%m z$?-utoilp-?lzID8=d=IE8bCKP2usmznFrn`0<@Ya)KjdWO`JALUUNEf^#?h7-(Y zl3AKa;dihIT3w@sKNiXbPO3GoeK8cwc}kgO>6c{gD_{)5EmLvx5Arp&Ib=x<$f(k( z<7lcg#>DxUgqn<+k8djs9BiYN?c z#??w7I>WYiA0r_=ETVKzIgd?-h+*5u5_I8fLn}*5ORtETA!h?oxM^Kqj}PwyXGylB z2o~vHcL}hJY}^d?^<(KQHehKt))eSAdU#-A;$3Ersf15vq)@;I2al6;tLq)jUd|mb zkTdYJ|LD3KNE5!=-XK*;{r!S_o-Ik-@cDDts9S#(L zQrSIGBzu)2+&|iVNEg=U7X_CX(gJV?CC&;`=!d5{D4Y6`ed5)lYPZoz9U z=;EesrsT}K%`rX zG~lG}^^J*i#UBiyxFx@~_mO7s)GWgz z@75ZO&Hk`VRj?o7tRCKClE0sW!y_PY_jvz)wHL2EX=e!*u7yzeyq3mhKEqzO!-Mqh zoAzs_)<365s9}t)Oa3qV`g7?&@)IrD z*NmcyBwwFgim>jbf6ct4@1ZB#_UUy6B-aKQ3H;yc>aN|P2Zg47%<9XWyV-r8B0YIp z_Z>5fCH3_*Oy}dGkvnnpzDfz6l$as>$?+7p&x4=1t zhKGkYj1!w*H1i{M%srrOPRFjHwcy*zQjjvw{fx|HJlc>@F64Nzm~|WO(`3f@>Ti7c zSHCP7Xn~=^s~VV@o@8S=WA!wWF- zdEsE;l-)6%0!*eCO*!)qA?iLuw=ok6b)lpuB#^{DYIu0~2m|-k;5Lv6H9d|ip-Y`_ z{`OaW2;(!g$a zj|a}q&hSe>FIE8cg<8>|W~;A{1Mp2FgZPO%5HFUthBBpTYq>TGd>pEuindly#I+tA{3=xR(4igK6^*AC*`9?L-z4Ag=(AB)t2bkz(2 z2h6Cmj7$NyGNh(ymX8U&%_luXCZNarOGo2P`wH(0)XW^ikCs1R*3Z1x=E+uw3=2zv zYHGS)Oel$v-0*@8uz=M^HKjG6Hy^Ngt3|~QRYgeD4Y!bR0qyP>V+2; z(L$HWNG*`pyTHS$1e}9sQeHqZ>iI!%?qWr)bkT*(*GbCd?Mj5z3n49CO=lhNkqnAR z?V6(D<-}c-Y%05(mg||#%D<$EdGIIlU;4B@LIjfhGIW5_Yveif%E0Y0j+(}?K}DnO4jma4^&lfHt#K`a zwS^As2GeqXGlR4$tx2#0`v?jgX?=aYfx91fo>X94E5e_Tbb2tSd*!WPl!R|2HsyLd z!prILG;nk=RM`<$)eFM*%rbpwz@Mu7|5ixjR`Zx~KwMb@Gb6RFx+sx%(z%Urx)5si z*T`q|Hi|fm2JrF(#JEm5pWL;IKh`9+F$A%I!>OOE>tzZJVXO8pn_P6$?r)+)fs~y~ zs9~YJ2MH0L!IIor>{%An6fu=vJUN`SA_87s?+@GC@mn6+(E=0fC=7r8=0+nEzCQ;` zPn=fwp3PcUm=>tfOBr9jyya}McmXqjIl6%}?s2o3RNQaO*++Lmo0CuV9D~MBc56IT zp?z1DhFqFn@*{aRFvs-kVW2XaxELE5S@O^PAokmSc@1X>4taq<1nH8-W2YbXD?6;J zQzMAsU0DoN7mk-A_|qp_z=7FNK!Oun*th(ru>2EeQ0pqxOINgFdr1tACiR;kR#I=( z1{~D8zd3S@Z_bQ}3=Gabj4Wk!G+_!YJsuYq5rlY{Zjxi*<6mil?NpETY{hpL-`gc~ zK0dx2M0q6{84m{#XBWkyxAbS}{5=L8E=;egil&ys4H2*XwFa8$F5l|_j{0={a3KjXLT1ARZMYpcfT$G$0-Y)8tX8KLrXqt zW<0me9P!}LQ1gUsZG7Kv&cHT5jO+a(#xk@5+;i^-juDg+Fxyo3!ybv&e?DC;n=D`2 zyrLI*P<^MG;O8{r5kZWdMYMBb12+eXa;bimxpFCBL#o910z_^$;sZdc4ldvYoQ1LA z_U8LieycIU5&tTT8V&it-&6gqD$9tvU8N2oYd=eyit4fs;G;WvTO*mQ9A?9+zWH4> zF{}9w>pjAq)eKf+-l!GVV3b9IPDTUJf6P4m#6M6J@WAbtlrTZW+|;Kb$=j2IC*usS8S11l z{<#T)(ACpZM#^dat{C6In{7Pax99%`o(r7e(ti~?HOMj*T_{=6T&K(PK!w zxdc&+Wks6fzy^~y_++jI1PDh~x&x;dCo*l;-1BB2HrPf0ls0n>OkyEV=dCzuIV`A( z3fkVL0tZ5!f7>_=ey^lNPb57P11T;64#CXv{uc^x=hWZFSR%d=DHW&6Iqkd3-0Von z$jrrQsH!F~NS->r?`cnJWYtbk=b~g+Dpf=P^z;G%BAjNx22O*An|nI%U6NT&3^j@`Y}wAEN16SX6d z3hTDbe9750nhmnS7Iw}CTj|a(r5zMSqG&1Pmi-&ZXe4n7WVN!VUzqtq*vFj$DLe2t z4i!>7#{!gmOX-1|phOi;``?)70Oy%;AtArBxxTF}OZ{COgG<8J`xh{@aQz&6PB%a( zz6t7_I*k{hMc(M=(>vfKQ<3!Y1wzYgArXLIGgPaB_fd^FG8e=Bnt`$U_h}T(^@Rg) z%Si%UkFjrHz={!~Qv&<@e6JOOiD_Jf%yI_4sYjSB zH}Jaw{Q1J=5YI9bn~^;T4SF^3g;`EsL7}-|VR#V!g}>~ZHcaEGtN+mg#HN3_sB3Bx z6a{i4LIzNslI2dsyC*X7Vo1)q4&%0=NY!LysppCHjPKPJx|btOy7Qf08rPyey@^@`q$a2?!S)p7e%@ zsZ$bhefik-=1RcrEGjDiR#J@PiVl*Rb~3UI4^X*jG$hoA+`0mL1z+1?jcUtZJ^Ex! zY%MdkMpi5$pWlF&>&%9RhDvc;&tim`hFtlMS&k8Pg;+mSm{glm}eOoowxVM(L7g08ya zo4wN5_w>oEwGKZ|=Id+Pz*^C7KQPQkzqLyVGE!6LDwzmP2=LTUPBe?QksD&XM(BG9 zaipFikG|)QA%L@F{|hxYd4dm=ik+o@zHrS4J0t&x_w3QO@Fdixjp znHd2eFx-noa44`ZU__+eSSfI!y!G1MhFyI=!Lu>BCdmotgO3VV|J+MkG^YJzswKha z%gJc?4yiobzQS%~3-IwQ?GP3Gec(qj%5T+q^$-Pe3B4CRC6f;>LF`Oa;j&T84@weohs!`TK40|MT1a6Mc=WkXq7| z)^+5syYE=TL+S5V@;j6}^KJg#g89F1LG%rUq5hw@7~Y^A2k^>s0EzwFx`7A$>fc8c zSBylt{s)h5O8<8wf&b;>Vqby&BMEaPbhKn^%ku)w|4$olO!j?L0yuMF_rU-0w4$I1 zDW3}m|3j4L|Nr(wO0g4bYesN+9i14HpQrO@$9U~kj)^WJ{c|}-x%<&g1aBuesPsanN1X>yzr0|imVF9IwX=X}Fap0I|BH?$A z0sq+FbEN`!aQL5I1FdFybWym;8Bb3JnsK1FKuqHrhfm1;*bsOoGnreT9vi&@G&v4* zcG)Y|^EC={H8zY-cgs&yG&DUi6G&l^^}qiakoOTEe|tr8UcdThtgZ-4DmpnjhEseK zxjzm}cOI9COGuz{*_~9zc)Z;!cLdXV0OSCJL9baL|Iaw|qcFVGf6}(&(D{ekI@WST zvIU3Mx&@!BML>!)F0z3Aan5bn=g)xI&g)a)S?yIUoJ{#=m}ZK##ipk=&*XoZ zme2d}51!5L9hIKpG|b2DuqN?qU?3hykp`4R9;nRH9eUVOCn_plo2)JWG>dLJ?PvVq z_vHJE%_!*B<6=sK`t|G4eEUkxPye~KOv-0`Twdjw$o_r`%YWzBJm`ZZ^4>@yS`h4? z2!H|&u+3)#Ar86sK#MnCX)I+j4IqH#^TS0Q-@5}IlgIqOZabESKM+aN5y3P0%#LoO zSM2{xP~RxfZV$T8?(J^>PVy~b$&T|Ee5Qj0^*38-CSX4t9>W6QSu89p9IfDVUx1mN zPC}LFGbcepIXs&Lm$0xtAd&^jF6ecczEJ!#C|0%+lAC8n;M<<^?7;B0I)qXUdIZ8X zRvP#AgRHlNdS9^mg{QBX-2$j(02mXg$;sb{HaTr8@@*zS|Eix>{`25RfBR%v2g;Gn zj`2AkPyaD9_!g}_wORLmW{$V z{&_y(kT)_Lll#rWwS%T&PmsU!ZG7cy1LuA;=5lXF3B)m%bLIm`O;*lxfVsA>`D%$# z;?ThLw@G+cmI*2n(&_*4TnQ42LrZ(xVypB26tas8daS&Olm)OQXaCR_lRb)yk6-+( z5JaQU57En9gZFK)01>{Rr$VFaK>{lS+;mRd56#{GJRF$&m-?C7iX17Y;NhygKQlN7 z1H{a2PXNFQ$a(odSI&0`_QYrV&oJY#*G!^{2;(=vhOY(3Vb|r9vhu!8R>)$o+ z@36qH&qV%+uDIM+)g568?(cuq_)JPe{S9B1XTrQbe}Ay(|LYIlKIc22fRKgvX=0{O zLbMA$h4;y0zM_K+F!%iapLGHk<_vN*lX$i0L6lPCZu8)u4*R|=6emdSk3b#LSdI-_9b0{RZLHR17KqX>Bia&;MG3Uj4oH z_SDr-{oiZrJp`x2Qffo1a-%jTPoD6^ZT`Dv_=f#$!_6Oe=Q)4du<21kk)%5Rt(9e^ z;eX1>Umf6J#6oC5Ifdbexi#DiTF2c2q2P4F(!%ZzlmA`?O|;+zO5n$G^1q{+amQ{$ z)k=vrEcAq{TC?sygY3VHN$5X|Nl|O;gdwJfg2dYh>5T5#)c-E8qASbiJ};2}eVy}r zpb%)OG8@TAllTz%pRaCGjLJ#?{E7fGY>O7;zlwsquyr8kT)*~u|Ni|V5P20eH8q9O zD5UP+062m0Q&7+ca7Z)(aZ!2r`1tu?RmcTPfVmxb??rehF$U&c?6O~kw2DXI>r5gz zCW)1M@LCR5X$6G?yGvbUWo|tU?!Tq{bKP>x1jx4sv>kUd7d<_F9+05!{QC8a6`VyD z%-CA~j{LVPOqhH;?ChuaK#Lva+W0UTMIvyBh=hbx``a1@0943P5fR7$jf(yA$9$~y z>yyt)8_a62Z7tcE&XWx|bk_r(o>5v(j&Tr3zqM*@^&Wsi#^ChqY{t~o)RBCi3ZgUg zg8;7NZQFjmI;Hh(`9IJ&f%pHz-CMt9*$3aEf*>i4C?SnB(%sz+f`rl~ozkU*ba#V< zsDywZDoB^4f*=jj(hX-G_xbMqp8Y2rugjlAp69+lF*9q{S_3&OU)Hm&cxq`wl{_&~ zrA5J~ZDCaqtQ=KURi8O=c9!~HOV43P*tom9uYURR1qbXq1nME-H2|#{E;!tpe*;1^ z%LNJw%A;77r%wwUrW-Xct6(h9IdBWue%9OT%ecEA#KPOHUt3$7jp&xcpR_uC(tq>f z7E<^eYI3}OHcNc=d6o1j_xyoYz_%&gaGA4}Po{H+e6aAiV z=9~{X2W;@Y-E?$xvLJl$hDCUS&F1!91#j z#_jK%B#dJ5croJ`$iv3=tl}ImQ~2U;@EsudErR2V@3L)K&K(}JZxMets+-lo1I~_| z4tm2}UfV_)y!LtsC#n5se813u{`d zy!OQ+bOt7#f%<+a`ap?H#M`+OYz6g7i;IosQ*cxt@L~p^oScXO85>imOSgT>Hv7J4QFp6?`f9&3((F#++C;M z!T!mz?oY?(c3tY$a56uHylpjxQv7#rLV{$C`=Zn8v!KeSfl%je$ET+9HNZ|)Mf~^@ zZmF+&+B}SENdqv682V52!ZdY<5KCY8B<=*RX#O|SPbT>Bdu?33B z984}U0DC;_xVX3m>{9sEN>Wl%YMPqE|6o}*-}tr03j7HB$(rRt7+`b+cG0+hCTp#A zG z{<}5&r}uXj-RjPj*_-J6f$m1R1;5bdUlFH;(k2^*jIHS6;)1)hygVb|w$dM8AYFOA zmw2>>Kx{#sQm&+>JM(bg37i`svE|??KLz{R0GNj2@1=9t*uVejWzeMCr+!2 z4t$S)0tsemP}!BIBT~-~PP6Pl zwMhf&vL60eg=2Bf$FoEVB>QAziI9=Fh^6g)kSsV zJJ#n2&p`nUy^`H7N`m_VC;cZtmf`wP2wTW=3Se-d${(W#JN}4c>93+1Cl>lIv|6Sl zh5}S8gAA|93 zL0tifv3tTt%B?@}#p$u`uu@U{%R^nqb@iEUuR@0U%rkDf258a2R2o3Ydb0=b-ohbg|L; zon%frD5n6m$+efCjbs{JIAmbOh<%xd=%sC5|7&Y+DM`H|4*^2o!oEv|=;)+Bi~|$z zJ{Vk+Vs!$|Bt3!OfKiI@L+5#E7lOHJxrx>jIqf@eIC%E}?Iwi!kL^ypow^#_lZnum z_gw&Rl7_9bZEKq{;ADPtHP?4L4p7y4=W{DJ=@hE(&=yi31e4-iuirk_cqq88nTd%s zX(rFhW#r_TwGfOJZ3;!XS9c50r|wy27bq~(-KHTUBBEQ(c8jHX3T=%KB1b>Xm1-4J z7!}A2A4QO%+>yLvG&U1Azsy_18R2-xog_#>rD-s!4ios&mWW%?V=dGQ7>3K&g z21`;wAp%&HlHEwy)Fx2E(0qIKa_Cfizf>|F7p9j*Folthh(S~g0f5&M0c0-g*8Ev1 zaD<}4s9}za+Yh(HJifjbAh>LcF?u(>;dJ@79tk( z;oJOtH}3>QjX4bd8F%dA<<(aS+U&=qdJ>}IbY1(=R2Q{iROzgp)iZ`BghDV z9yX&ZCbjRMf|to;Hw-Agi#S3Jp3qcSNVANq6hu7L2cw@Dkt5fHSy(FjufQAK1N+Xw zI2Z+&ybs%4dvCwj)p6nv9~s)49XmnTb_S=4g(m8;1oY>}8;4UzzF#f0KZmJCEL)6s zZ5oiEeYipr!y$8r%f!ftm5@Ot-*t?0$sHhCz}X*CNw7><^KMUY#s{ z-unIN{9z^E8zGt}N7L(#%vLJn_=vq^l47T*2Cei5{LiKiCLmj^z?JoMK{9M`%*oBp zzK{YUu|NB7GxkamltSpz9lbHAWza}^VC^d8x%uc6-T<-SGB-;FJIcN``}tvf_o5duKkTj1-WIm&pui{h(IP1*1viy*c@@Yn+3o2TpF{g zeFN2~LNrV>pbY36&*%G3N=h%AU^{*@_z$6)zr zrjq9|gV%C|_wM;T0j-}$obYy|Im3>M+VDj}S(1%^M8=W)c&4VxMx5B$Ya`#S`Vly= z*YVo@KVimNG>wZ5CL~)a802h>dA57qZo}EBq zxOl)h4hq1gYZ!r`FIJACCuy;Z(eWVV#%nM(?8}!7P2RDE8$pN8-}X{CNqdVMGx*zE zB9fcdu8&h!l5LCVEgePj=GZ!;W*Gwt@l5^f!5X}*Gb1;*n&+Dk+!si>Xa>t@lsX}u zI`P_uVKO)(F|ZvSpjh2!W{62{WlXdYgb7HAEJB5LU7DAUmeNO zRr_y#US*ReXB5g#`k%Jg^RfJ==ifBI!d##M=6mR?XK*cxe@n)&({A94(RPUpqNdph zJefyRR8>t8^FMoS6EK5tR$T5Y=S`65+z=@e`j~TWBQxwq@m21`Fc$#7bweW*Z@z%4 zl0(xGgH<)tOJe4I>u?qV(Z91~KQu zTv|vKkW8;@sqB5EAu0{flJ=Pr4@-TY%pY6RsZfB_*>ggb7JEBTqwWe%#m~Wmc8CSz zntJE!*O#J>R5@>3_M-d%Y%EDs*O#e=!srw>tu&wG^a+Q(=dzqz%mYNVLpatidMo&U z;bvNz^h-R@%h$26SgK=t{6U7-;pg`SCO!XSn;OVz+#d4tAFDObT)B?Nb(hI&hzkjw zaWv0t*P`Y2nur>+6Q?0xSPwG**TQsJghD*PFZ?ATF>y*>Mn;d^yU%So88}KZP~lprD=E&stgVKKsZWMBo31WJ z-otGk|MoU9;3k?-@V5-r@+}qEur9->oYKE``N`!cA9UUGnTH3{#b@2sK z&bD`eaJ&MbrG`S<{+rH)TfC&Cq-=P2IJwdOB_Yzzz|>|_mznL6(N0h5n&Q9qLdX+A z!TL#uT0#PXGlYiVLrW8$tx6d8@R#xW$t};xI$OdNM+ZX6)%R;%r5d#pRpt{t%%E&}u9{J5o%d;ne6D!VvoE#=;rM+uS7H%vmLy~~pnT8c@Un};%B zVYgw)61P0hCuv;2&zuh&zQjkz!s-M11xAR5ZrX~f!LW}(#xKDB86mmLQ`)=!+m72T zYRrWmKNS^n6TbfW&?Sm!0$NITsZKM^Cp|;M%x|5kzO5pqZ@+z$NqOu6XF;C(PBQPK zZa;8M*sH0jdG+|kugVHXMwcP_4t;O}AR6BMUJ%+R3j0-+*!r*gTeoC`8qvSkNH=ex zU(Idq>ILd6C%M(Geq=>nX&d>6eXqR>%bxhZ>LAzCdo9@tZlf?yHM(re=NQH5-BV!( z^5d3FA0Bt*lnMh_{Ajx1p^4+zcl@?(>D~p;Fi!JgUEDIOY}!Qzg0_ zO-D*|O!1Y?FuLmq@jLQ!Dr+XvNeHUaC9O0$dhj6AI01%@&IQx%qpHmO6>4R=zaLR| zZv1%r`5g2r&ud!j2Uf3XG$wMR;~q{aMalosc~bqP%vM!zf z?HIZx$hFTo1!)e`6&{*Q)Wfsr>OTKTE)PlmnPk$J=;Q6?9X%_pir>>7Ka4BZ2Ow1} zwJRFEi>n);P^y;b<|?|^Hoio}b|=uf-SQQk`~wf7KsWxxCWQ4f#GdplxKJu9y|Xq@ zLOzIda&oRfS;I~eTaipGZ$K*M*EmAs4&1gPPOV?K*i15D8kGI(X z#L-(qS_m#plDP&tMPl$xG-i?beJDy4>HmsD;h|&Ny$W^9m9$zD?i*JBzu7F+jZ*#% zCuEoV{s=Grkq8s_VOi$LK*fTM;qU*$1(+4>l&8Ky%3w@z)~t0ILr@OTgqpIdDlcHj zO8fIzy}#T9yG0JiwC!ev=EDdr1~Pf=D29s}=;+X2eoQnfrz-2lstGlE*Y#dr3p?Z{ z0dWV+Ve)-tbrDWt5)T+aWk7v@q9?YX8Eyw#_$7$}-R#>`<&|LKpkDJvJgWU};*_!1 zd~$-uosXtWwyU%4Z3c2T3ga{Svj1((jK9o+fUPQ$!W)c4?D*~v>yPp-uFiM#IcA{p zGtnJw0$gAWD}^9&FlL~p&K}x&zV+qrCAIXkZ@YFx5m#iY!rMxYyiceNGc!{tiDZHY z$kCpgblNBmlLN47{A{Hq*<(_!S42u~GGelB(x|-7ZPGGL(bkfz@9_{^OakXniJqGS z?!ngEJmFW3_XlXrI}M1+$<& zi#o2{Fql6NQAWCybXPjtmJF+I*;DdhyL1+C*Y%x(&9DjSmnl6>XOzZZ+H{Jj{%X19 zryiD=JK4Nn^U^P0h2A{R*`FUAW*st=R-lebo-JX%>r0L@HXQZse$>Vd`yIyG#<&(B zSt?;j9P*q!RX2V2B=}4Wc$np%hbet809h&RMIUuO&J=Vj)q0ftA#L&bLTt8ho86Jo zKEv(}D1$_A4CFj+kyX=7@T&z!lQuxrnsigTT;3(Cd?Wg1rFcUQ-kBu~(_1I#RjvkW z^-qr|`ltycP=i_53{7fme15tMo{s)R@tLgQ93|m+Z1P2?rC<-|ftyK7y83SYLVyy_ z9zQ7=)rtn61)qw-jl3danxLR0F^(I(im7@Z-nXC{{@>-O@$bW{i&KmT+3)PY2%Dsl zKt?~MMoI&}=37Ao2@@TDNWNvc$hGA&nsEj_bfzhPlRth=-$Q6#cHejHwkEGTag>mY z9|fYVUSuz<{A-?fP=JVaRmC+0f`L}Ax<;DR_)@Yjbr(6Q^7;1e49)8(v1wi`?rN5| zEv_9v5MCyxh5Y%^!- zCDExHU0JLv++j9X!!N20>wzB%TiyHMTw)eQd6#0c-aZZURYyHaUt39R zm!Vz=>USI$@m8OYf`ebRT`SJ*0O5P?>bi#oK>SNxH8_^A~`nv}@fO6*JHAZFSR94O6FT~fD@`>mP9t>qI-*>r>BU`wv3g8x!II>#@*HiOJk=2D*HCqJw#tyd-o@WU2e{u zp94>dabI*wm*QAWa$K|bML?t&eArz3-o3x+!t)k-Ch<2S9!Mt2A9bP*Ww$#llq8Ld&r*5%{@Q&PX;uh?{ zhRP|Bq6jtoU7@T*d#_W)ij7&{CkxjgZ5$TYGG>9&ro(Iya-&3(`H zejAEw94$O^qc`8Z3F=TKt(;!gC0;4nZ=N=Pcn;^kNW|HblTm$8IVq)L+RfiMz~3eS z(#)3h+{95mpuTR9aF~#P2gS!*TYjGR?J|Ovy+pF4hfJUANCZn|d8+7*qf-gFn`uu` zh7g*h4Drc}x)Zz>j7Ud7#6d4!G%@A04!CI5Zf0m{>5FWLUHLHkxPhblzy zKLM$(r*E7!%Mh6I2RxQ@Er{yCCW3-E--e#*@kv@r+Rb690oyn*rVspJ))93>|6#e3 zf#l;bR>9Km1t^S??s|#Zb!O@I_18=SYhzNx^ktUwJ=y8C3`{nkr)^10yq^vOs_;Cb zJ8gSiSZ?b9p(OnZsRjBv3iy9cX93IP|%?sM84R1={J;9lujVilE z%A(*Z+hgnFy^x2#Hs9PiJ-xgA#nKAykR$+JU$(2QwvP8E+pm`Exes~Jm6ey5_ab)$ zbZ6pu%S@p9Nn%*JrV1X3TO^0x+IZ<;=K6ptN;K#)_=_0Ma--S%+18^e``%0)z!_Sa z4x(B29BOR7^VeYQE{o17sNExZ*;I4n>b2Y=l28#P+R(z8F;w}_hNXqb5pkxE3?M%J z8qDNnnTm43k5s`U$W-W85%?|4=eMMgvp8Ls6rlJcQSRm3?Lt+L(0&PmpXk&^@w(44 zyL4ppgC=<9S-P#N$;w}gU0=8zy1vXF<;XxK&mvyZITi;Py&vF9I_O{h4XLeLURok@ zQ=qzur1;BYjs%yjhw@F`SDm2j(r9#y$p;Cl{Y}?EXxbSCCxT-J-k8ca6&B%Pi`^Vw zDPnKO;GV4a<60!mJzSd3-j&<$2*x&BKlwB)R#rupY1m!fN@6A@eYAV&hJE1XcYhWY z=b0Zz#6l-uK^fI)1LbC;Az{6MI1vt-7`*e z8QO3sB2vAGO8$YfLGE1fz<{qXzOS~nre+X7A&2;DW0fyO57Uo=@lbTZR`fbWQta;N z9$XphEvhLFl`CSZvSbfgHd6QER8GUXB03-$1#Cbg;c>G26DE5TT_zThENOgK1u^42 zAv^4^*eo|V9Gv(q@^)^Zj8Y{2=@sa0BQ%w2{cPhnV~ckRjdcGFXd~m6_56LKS87C= zQBmT{Uh}0O*u$md55}w$&oXq_u7`t0NyPGUyHiq?k399Ul#=Vy9@DR;)UTeUrk@`; zAlPj_ee`AX^YcYCw6tZ`C5&cn_TTcXp1gNsXPb!5arvsF^f zY(8{2qM^z^@vqZ(H%*xoK#Jt|zQ>MniR+I=j_f8yzZw)*x9NWTcT0KOl zve@)i6rCRX<>Ud9J{9NoAO29ky;(%4MuQ%ni2>% zKS9r<1bWC9t|4jX)pgrn{vet}>~wlR?!B#3^x2hskn3G|320@NbCeJVPS1LEvJt=C z*dNT=Rm0o$tCy6MZX4deLz_m?{&gm0S<|vf2Y|;i_w+c$CF)0}HpRnILhGcW_=-pI zx+)`dKT47D#a;;wko+T^q0Z3J(IvB3TYj0Vdb*(1f37YsFMnMRMhR!Ux^C<9Bj+8@ zY}AIpJ#p+N8V=9jVT?59AFz1RF)MXvZ=PFjHjO(?Sjm+9M0?!4;kNFE zJReHfwtA5I*zxoVp4eeAbd*KAIM~WaJU;|QBFApyiW@BifD#)sjn>M) zVW}TUzA!T~gx2tyk!d_H&tt*>3q(l2hjMH#dmj~L{>2WtQ23%|SC_aqwl4Ca(Xr9c zzb|W_!7va8tVSm$CgS`UI6xc(VYT|2W22B?3S{zApVn8-Ri02xWYrD|ZT$J!T_2r= zFR5jLM|B?6uw2h|Y&1*YmX{j?v@DLdzlsYA1&>*C<+12h3$yEXii}#ujSO@JvLZKkg8oq393q9|-@b2q-`%(q|ER7C z04&G8N&NiRZ(3V;zQe~?^#GRl{QI~_+5OO+Q2x}kvO6&XiUSyqm6(YOi#}?7$@bPR zmHndnZ-tq1s9ukkCt%&p^PG;jONPc+VVDpVLxWUYO2fhO8BjH5k%Nk#ZY&};qmq{Y z&@csGB~p=UJ+oauk#&<*Bt(^pXa!pyA??!WQ!%BW6VZed5f}Y!cW1|&8-AkHSYA<) z@<&HYC-xpt{Du=at&xjg6Ancq?bZ05x)MLDCgZ?0p~3r!S)9m>+qMm{_Vl3gZEHo{ zfz|Tu*QI{dwQPVGP|DZO#F~3Ohva*tn4nAsYYX-;29h%G|oT-Rnw4 zW>dJ5xBK(#D{BVbI9bPHUMS+#iVNa$zF)JV7JN1Vfa?}evq~O(@i4zQ(^3i7X)aj6 zE#%ZZ6-m%;yZ{Ulg!BfjLk~w`yUQZ$w8tc%t^uvsKx` z!kMAW53MnE2luvL_pqVKnj!9-egkxF!9VNwr?QCd+1#V~=wl!Iq2>62`%1%T7Ezlt z8<##MUf7<%+D~zBXPc|xfjy*(VUZ~CO zr@c$?f^hH#ZmAbCTMv36%jVi);dA24rxjhKp#(t}6@yF3JLEpT88>4@sy2r%a(qRe z={BuC7qW6QE%|pd$LmLAbA9*^t*xh$$@IIKS;aictJK)r5o5w95BcmTRCM1_qrZeX zt0k4|b7(zs=mlRk+j(USYUW3%aveH<7<=V#iPjp_G}U12x0Ou@%B0)7)A+wpg)IZh zV;lC zlIoF<%D_D>!n^qmXHrUAI^|&n@BGvkNwg685Pu5{sUBt1W+@3APT9S^l7eTe8rqpv zqYn&e(CN{r_VQgdq=el;iI;YEZ#G&niGfK=S@-_ZDe#1~IlOZ*zJ8y@=)|SAjD2;R zQVjh)JEh{+8?9uI(5PO^%F3=Awz~NZO*1L4N0CcE0$%gu_`E#I&ECT+PgU7tqou6S zv_a)psYHU3rSaBmB%9F}WKGTM6_sJVPl$BqOqfsciF%$AalN(-7IQzHfgWfdU~prS za%nuvhB*4_`LKCn;!p zmG8I)t(_<&dX@BqkW1MK&MykYnbL6t@M-WFoS7{a+17lK>x|9_VlbI<2=KdAaI&0c z8rF8xAwBzwnl|>7aaimx@@jZor-c0-k?f=4UlUaZ+S4Ffv8$V#zJ8s!`p#R7 zr8;=8EO*$Aeo`Q>zA3dIEh?xmPPp%4o<$Z)?16D`j(1zyi z@_Rs)JtkYWrd8aJw3W(Gc2eegV-)qSyJ`FpP;lUDsJ5VfT7S7S0uELBn^W~0@7CJt z67l|e4~^4jK<~X6N0!GT>?Z$Szoc@KqL{RmAl`d%nvKn`{1%jxU+g+?XQN@7 z5o5;JleTLzUb|u(u6Wh{y?9lNyQs)qQ4Fb<6eDNQL15xiIR`*dXO4a^5U2^{<`Z5h zvPp(rqQ5ELm$Um;3!C%(&|^Yei`btVy30weR8&+(fMlr-#2T&e#3TCw;$i|CB@Y_X z2R=N)4mvg;+;3ZpEq;q#i|lr(ON-_G$}8yMmtZ%msq!XXyPL%}j!G~+=_j3!%kkDP z{b!jrSU)8w$TFECLVnwPdas8oe&KustSvP)F2VoAxx7A=)1e30ARN4M`=us8l~!y1>2l1_6>pJ^g7U{{@Z zf5#bjMe}SpWe9)dC-(qC8Z15L7K(56fC~; z>~1T_0r>S8CY@SO<y7RDmXYWY{zQPiqQ7P!8}-8H{y|x;^GEl}LPPWp z(%Vq7|8k?M%AV9dU2{!$;Je5p6z0wMX*yaPl}}K}Azqj zJh(*E7Q__&BKpP@ld92ScH^xYiC9&IT7JX-va9r*4puq*fx*P_3<}rxzm+=QT85G^ zhH3MwBwkQXI>T@(g5~dF>JNVWDK%X`d9#4E$|`N7pIcQl93mj$?$adAYJ+EOghQ22 z@Bf1D&TW!!#Nzs`>NY{tLsOYIPTeom{zzwV3oZ+zGIf5s#m2N4TrA;0MIU?BL7XC3 zR|iL7CM_|MK!3RPVmZL;*obF+&?k~tZz{Y;Qx9sLsGDz?y2^;KkVzlSlc3$%i!_wfPhd54nC2AuW7U?Y%;e>m znXA$Y`#g33n-#NrgkycDy#9s61QTk!^=g_~*jf z4>z9S8*3{11^%yjsEZ%6kXO%K$In3*49IUW+MR9^!mQ zBPwd#gP!t^%3_d1v3j{9##8J3Nuln8yBORaunMsW`Z?6dTZD%C@&;Boh7%a+ZRAnW zH!SeDFuxXCu(m#bgr#)$%6c(%j&a{G2@8ojKK@i7)6=7lJN1SUVciOmYws~0KM9U2 z&-Us)^8S{D5AtuxWP~_5cLxDt^U36&7%@F_wjLU{7&T~)oK{bND4B7nOk1`YeUer~ zNl+}~Mwj=Ssw{S>GbfrTuyG`RoFg)Ofm9;a5+|s7MS$10`m-N5YZl0nMy{YL&LA3I|-q= zM~geh;V)mVLaK~rt8FIx_xqGe1npNz6stLtkZZDnA6nDXqVGx`+J1fsYmGi=yI!-beB)0Jc#;wxA@Px-UXc9cN8eL3v>p@&p}&phIw z+im)xQGJAQ!?>qm8qyzu5FLnIc#`W(F`EIlD{?Wx_Fpk*qCNY_ZV&u`yfPj-@3vZ@ zKDS;g`&NtHU2FM9{scYW4LbtZK(6Uuf22qFbyJ;l?L`DK`SLEMgua6Kc#R;KDDC4W zj9<$o?Yxf2r&Kv_w3sJ}W=*6HZNGQ!44(dlNaQNuT}NDE$@VtmfAN#p5mgozmF^M& z3zR--{~^CRW}JF_WUQ0=SU?{VUsEuTwr(S5Y2KkY%h_gPA5b^MMD{SCn8)OUz?M1Y zih)C8OC4wEf~nHtWIO?n+#rLw>XW;n4+7K<0(ir?hkZ z#W4TNsv3(N`vl*vcKnyD3fx=(g7~Y!$Y(*WKzm=_luF=u!a#+I+9`xf-BCO z{)55tVg-l6`r_h3&RYt%KYtqEhKy$cQ0qS#K(&fH*2}y48OP$wE*K@aB;3oqCpjgG zqaG_(wprd7`Gl!WJy|*`_1=Z_q;P_-MNa$JZT{)}3xh++gwT60g^(-1pKS!%=V=?M z{go{t=NjsIhUHpCb~PaMmnb@8@i^B0F6EFreM%mXsdmYp_GE)70A(Uj_|4zw>6Ber z85wLGOw7!B*yzss%nt>RA}Ps{G>dC@$NXNK)@66^rnO%vJ$dPdNnTd~_lnq=_f}R% zy|oGBPLCzcqQM7tGLn}RzBv!{wmV`;b>~gohaXX zw8tZP4UeBZdE%m}iN98a`Dv!%^Wkc5ZeK8lfwj}jg0LU6c?7rE0PwRS?g_dUdu;}>hb%hQlc~+6 z3k~Z%<&sj~cysh0ZiggOh3N0~9|yEjPrAXbyX)0m&O)hE&YUEFGKV+vbI{%CZGzI( zRLFVm3&n)^fK#r9rLUuh)K4h$ZF&8ySP@Wra*CqbxDHqVpg|nk0Og~|TZSzTtOtHo z?zgS<*TWWRg4Uv@11`Lk?S;Jm&b@-_^Not4qG0x;5uUfSctD9cYR47-ruF*kXz7ex z_u-$rZ+2`0S6XZE^QWc+SF>yRMMXcge(gO+*XhXJxHRydD4KK=Z6BD{{ofJ+gfpiz z2owH~GiNYG>+f#LN#JUSg8hfr)Yeg&zkjQ)i1=gBfw;m{i14~HT@kd;&;3C59N_86 zS1`L4z`LuIT?K?}e20C;KnaBWQ@=^6do4k*@g z$u0H858&RP5AMKBP-pTCEbS9Cw{VwXyu5&aLf z`AuCro2Az!n0F`1Dx`~6M8!rP5xq`boMsGH`a*2$XXK(;j^oDs3>(21{@R%h}XVV07EF*sW-z{*#pa0m@p2DKMQV031AiKu<5*oLDw!roucesyPnUw1e+{ zZN-(Alu$h0uWS31g<~W;7e=V|2=d*!4^XM&&fOmgOGtd-c{FKViHu2CTLx1AtScPM zboocN;+?Zukz$wW@1<4Gef#`|k<2i|Ee|l#UD4y|T7+(XM<-*5cSz_Ju;F9q?xHXc z*PU(s4;KKjy@^BKUSw6G3W{>||F@~LL%PAy_}@MZaRS>iM{rY{5=voV;Sq=gcwg`+ zETNXZk#<;8PSdJG2fP!3`zefk+_B-5ek0T-PQbl#%`uF*Z{X#y3n~|p3hT#XPj+xo|hs8 z2H*4R$pLyHq09t1oeEGi(L;6Nq{1sFxr%k*O%OZ zmtN<(tfcqzX-^{kt7J2BWo2bPRK_@}*oTYeDzD#BCH##;ok!)oWh={M9B&^lb&xk| zG}n9MTK6D0G5IxiI9C#g>MSMo4Jg{1sA?I}`umR5az!!*aF+=8zhutcLeeF+Ao2<0 zr0f^3<@E$*$RY3yF-x>;Rb`k^KV~hMJwxn)DJi{!N~eE&K-e(3hw1Uhq9XI}z+h$u zHq4A(-=n3b^9*il2H^8;f^tbFM#PkrolylvB7&W8xEH|127Az0qXhZ}fAb*Wr1qOPy5b|veWWMD55 z)o&IQOG$noIy>=ho8My4R1qIv6sVc_}SSsudhuc0b86 z(23kZT{(fao*4-Ec$M8$1U~J^4t2NaO8>^Y%D0>#_Vxg_-ZyB%rtdx-Oe07B*v1i2=rjjH}KT_OoDre=@KBRI^#=}u-x-xVzAXm~>O5*Hc z91>V<0##4$p(IfjkF6BFQt}C|VEB5ISqElmcF0Lv9N{`^Hxp_9;mN&f7IG4OJu#4vq>cs^EYvdwQG-gZZTd@rzj|!zWLuw1sMgUR*uhlW!M!e*`RM z&e2Pto80BxnQ7*C3!GEU$*y?o0!aarQpuhX$U4<%wD5Q|rm-ZUJ8^u)hpzG3rQp~SAq*y>uH1JAs)zvf}kQr%YsOTx9xG#~(85^UIUilOY&==m5 zsY{87Uk<=!K#}SRjhnj`wE0dY z44Z!4DBP~i!s=$#QkRT5JUX%pRsn`+5lBL6TVXR)-K5HTB%XLF&2|ZCX2!=?t+ABi zgGMO%w-!Yjxv8*j@SlS&^G6SC7Fmz#T_;m?k;p$N_?J>Lvuk!YBV6{wgS=N(u=rdO za%ri$%Tt}4U1-a&alj$q8l1k}1>OH>6l9zN8R@PYG!8$3tny}^pSsNPl=O{&K5_hn z=dy(Y=H`p~%qbO^7qS3i$tNTvj1@?um8um=%F5so>N6&=V}_+rb_rS!W-F@yjeA0k z$IBD1dlFMvL5TYG zlXj_a#~I(hw&$8FA>GHG%P#jz4W+k4A%D)rhqmZ z0`-#-%+P3819R`w2?SoOq>@nR-85nPXWzBzyaf2|YbMdoYP!LG-w?pkGDu)nMI9 z3H{Id&-X0zx@~SLWaHfu~C%(1tL3Wxp(jaUS&@^STI$5 z{`}bzT0P^sIYwXa;^D;X(wA(?Km5Gm!&kI_IcY2ri`t84vepADX?hk?mjADBl@$U(ntDTmTzbwZ1s`Tdq&93YEdMVO(0-yT`_ex# z;n@-$-G>YPQk;m0!HNQh;<7U9Diww-ihK72P5Zo~w-2#`D|9P#?or;q|GQEu9Mh_` zwKY`%Kb%OT;|;a%jS2;Dh85$4#(9%PPh#NjW-nYHManx3^%1A*w;=kA-`+OU$)Q4A zG*5p#c0Ie(AXHjfN^-n8X+8~iMsl^^TM7|x4Mh1v)MWHKBEr$`p8fEr0NU?k;EfoT zlVcnT`fpq?w{(~WKTBmDvyf9tdaYCR$s_w!KZFE11LKPbb-vbJ7zgNo|ydz)dJC1Z+aCw6#Z9xH770va%k#f)gxE%dj;=YK!7n!XEdO zrn$nF;i2c@8Jq6uSt~GN7pG0?L5}>xj4wKJWF&L_A{1Uc>EI{6?g@bvmiwHdtFWj@ks;|l$6+4#)&2YTt3g5is@3nbCaI`su^oc^ zKFFxcBL9%$&k#1qQsqBG`NpmP3qAVEBO-{Z%<$IilkT<0m=fXb(o-F=q$tUUOFez7!@QN4l=KY#zJY6y?C9du zP2bIJ-y8n&6GYNYjE(Wg(Ij+htSOaf$x;O|QBzBj0$iUNVL~vRlg&UKi-l4bf!LGy zM#J@k|045uGEza*Y%^_JAMMC(RAgkN;2=_1K$Z`ot<+RUb*VeA5#RWcI|m*fIywqA zInu7A;~bd;ZW;&tqf^EI@BcA%HZ|gku255^PV-Ir&{Xlb+oAp0un^Q+Us*htX7P@7L#n@g&|m8J6g-h8B+D=@AkV@WbDg5wWK4mtA~x zh#h9V4y2#gj`8_6=ED#WZt4z~{rf3AbZ7W6VqZy9JPm#IaJ!0*X6EklJ70;7vHpF( z;2RjO%zVLO)L+K6yU!kkcw@L$l*~|nB!=C9f{Y6H+GvP5<8wwK^mKw@LmE;!nY}`8 zw#&>&llW0^VHkV;n$d2mjwvI9y0X3bXmgSqyiH0$nxzp*(0vu(z>qxJGEYBv=ihI; zhuD!0k%h4m6g14LGO;pCK^!1Q)>>IvVJC?Eh8hUz&k+!zJzbloKY~H{)1f?ZFJ5l$ zXns!4$P@)SP9QWUFsCqcK@AGlnUYE}!!mTf=a;DXsKg$4Q6m=zA=%&sER2ww)Ol_h zl7hNcs1@kjh3Y&dz>k4hxEkSNwCnoq3>G#xUi|*!;9};RJjoMYt1~ zAZb$ymrC#km@P3jHWrr{v!ki8v9VUhJto4D!Re7xTBpI9HI=4kOW(a>JrA&4xt1*;lM zFR$YhaPm z#y6%f^fk;=;k+xs(aq$iRuKEKsc9H0wv3Q7$herbORuVC}(`Qg>#Sy|t5-}0V&+jHplheeZcFb@n2v_r*I z{W3h9?EwvA)vZZL9Tv7h$Qz1ahn@i@W(#9X>s%J)KDw>q?DKAoSCVW^Hzncgq$tVK z%Dqx}Pz^+@QkaMF9Xt`bq09jnXx|X!8`bqSH6vKBuwA{PWf7;wP*MV=zz$V!*!Bwl zH7p`09~dTCr!ecP+`_%@04R)jO3eM zFRwk)^X|E1%Sk9fajo0xAfl+g#n5d712?G)v=OCk&vj8;{vIJnDYL;9PZ_*0UM0gm z%}UVJ*49o`Dp7d~18eSFs2(8G=4`TazJ~LepN2?a(i-X;+178Ws}Jo!Q9siK(c%#Dwya&vK+^aNK69~q)j4*PyP3iwiEl*tkOZ*|HE zv^AiKrUXI6SQEa7h(8d9UIpa}y1lr(1meNL$^e<1&M^2wD?*deQmjHiJ^k?T(o~{K z9qy-Y7GxS}B@=wYReMY%P-C0InC(KyZZo#_x%2ydwILm_asj(TOc9e`qHbyu%3B$AYkXWsMXcWim75n!cSimq|asD zY5!dra}69^#S#7*jOgCp-tLicai%kcvRGX}QckDUFsS1}fk~^jpjGYxm!UY=Lge?! z$uH7cpA?aNV|jpZ1sY7&cwGj2y|l4Wpfo+C`xYwd@WI+>hG`eDmdlNrT;0zOY}ZFh zu)tw4s6wZ_kmT-K-wWamM38om3+j$9JonAt;kE%_XcU~IRpFD}kN5YvIZ;Oa19sK` zS+N1rDkiv-hT!C#=LxthNCliTGyhma-R)ISOYkTMLLv5kPeZM2BjN;_hRj5k7w_9 zjQ1Pgul2))!sNcM>pYJfX6ghs=j_F1CzXf4r|cw-U<_pYmyrZL|F*uM)K|;`TNmEy|7rF}j)8${# zFY<#mJ>&8!sa_j>Zvjc`*>*D(eF&c*OQD7EOLhj|JFm}W<0;G>V`gV(5!ct(lYA;B ze++BD86;{b!&e6dK$%6c-ZHiRGwcxuWe2OugcE%ENe;D?g7Pd^xT99*DMIa|P>0sn zP_8i`Mo$aj!7lixTD`mz0~NI{lqJesfYa^v_ErM`u$knKNihSH5|4!!n3}2N4=WG% z19FtX*)G`VTLAKm?MtIsMq1&qR~7drb+bH|^mfbc^1tL0gt)+5ugSOMYE`a`y=#(ffR7Y`N); zp%k>uuOOfJRf+YZ4tW|ZTsOd}toM>Kv2!4SIwc|k<>?hl&p3@cQ&A&W zK!K}@6^!W`RiAW?!eK4yb9Oa84-&`BLTBgZ(h09gd2MioQBJ7@oN4HOfc?jUWe;H0 zlW5#>zX1)T8(vVQP=|;g{}iD`@&Hm5DdD_&3Ee|JR_`hZ~eVdZU{>n5d5FRApWAS`s=JxkC%VqNAmU z+$ll*qXttVX|uDljhP6dCuxFJSqh+qJ6NNOHZ%#qsQjv&N^&n>uCVtQkL^JZ#K<5Cfh@t|8de2U^ijT}=x}zS%O!rIMp>-t^Q21(C=Qk->7B#` zA2&BGxI533B$$>ErLg)YM@M6svs5JuPEIKH{ayoORHzUtQzVDnW1w0&NO69BAA*7t zz#%SNgYrVvy}G*EO)HpkgA$z?9kVB$$U?H%VX_DfqXFUNOr=i5UKNPL$vfisEd^bM zo(V&=0u1nGF01$Ah%pkibND1-9#1+*bpP1}2vm^3i~Y-c1>JTwJNpJQ(b{Zr@r+ip zZ`aY>9Iq1)I%udQkw1a4O?l8v$O`?J$0XV4&qsa&37f&0DRod9n|mHyFhq%=rxPej ztKgEi2*t_tGNp=0j<%Pa_7Vo@uLk?f0~I;>NF>wfA5~K@oj&6Hu^Ug{Fx_MK;VG~+ z+*z_6odN(#>LUZLRpJh~CpjB3ld@eE5his`x>=c;oyB*6Trv>Q4+gkq%Qz|v@AS|- zt1zyARBDz>@x1D#&70*kF%1?3B^8+x-Sz!IxW?5d$j%N+N={Y}i_FD^6fKKF3FM&L zQ9Er?i>!Lsbc5wJhi^$t{`D#FAh{~SP8-=Eq3@}uqo?_@vumCe5<0zll66w%x;d-` zNOsBH#6(;>Fh*#+1j#!}0Oe8S;Ez9T8Rw3Ch7f96DEprNDzG;jn-5=$qUDf5)tJ4`-2%QXITF+tnSBFNWoGc#$-l{Py#&@&KXu=|yVPEa)9ufRu#AMY}+*Y?r4 z(TcfwHd8B$c5ph&ZB+Fk*C5_^oMOn<6F$6qd|-fa%7qJi(B`Ep`fsxZo5`Y~5pa*^ zd1a}i9}uZTmBB-dX3pdBHZiY|&yuTE^2GSy;gQS` zBoS;ws2NAUgQdi(#Elo}o=vyzZIVurZg8M2xKZ>H38J@Xc2F36nPQm~<5vU)!;(=? zqAl9R&xX)wmr%SuGH0W>iGg3r%d2$POL#-XlUuN6l|(ZzFf{m;Sq#Km`JdLSkwC3G^r3rIG=M3d|?%X31gjiuBMHyy*AopUB*`!(M zLN&fptCA?r;8wFlVnInMiz;r zhX98UFO;~miWH3&As6&p=+zO9?xErZ%TsL-F!(bS$aRFSGF|obZo+jIf^F?pXWkWQ z5zYdFc9m|=UvtT9$bvXv6N$dz1kO_j{!M~tpFb(YFwx!6MApmZvUnn#hXM_1KjGCP z9kPYNhdV!qb3q9o%!v-*Lcx7=1%m0|25?QEtQg#z!y$WvbEN`oGt8+9#e$GHO$_6y z-5TIGD?N(o`PYxW{0QYQul%|sLfs_jjOps#ZqjSC&H@8fSK`QKzu|%gV=3nkIx;HA zZhKD^Hhznb1U~IrL0Rx>FVgrA8k85$|E&c8VF~46g5T|;JE7bz&pNx76l3SGz~)BH z(>jZag`=sY$+9_eLe}eR_j2{KguEq-T3L%hPae2Mcp7ewrj9jp8atAbsf6cq~N~R@zE|hX|IjD-*=(FiEY=^<-$iW(%wDPLj8b5#jyj=!aD+b1ObF(es60jg6yc6G!^AAixnncjh zW?>%M;lGZy7%mgF|9~W%YyczStGfNBV=&p%v(}=-@MXKo27u*PYqhmmq}JVC90pq&vT#(%C- zOf&$UW9>TP>%D*fz8~P+Z#FYA?eF>mK7zNOrC1&97*P>WpngUV z7Z0znQ9+IDhe#NjA$!$AiN++LxWad z5&9)Dt)R~7Gs)wY2?lNEcXS`3)uaFEl?$ z1!yE{8k=kBRLa!MfF%mn3Y>8wMx#C72W^4yeohOJRVMfodPZ>U{R99tJD@nw{vj~a zL=cN~%1&Eoz^5DF{ zyVzI@rnJIACO)aO_jtMQ3sFC4ez2iaNGm2ImmwcXW3)c;~ z8xUxz`|}p$&Ckk$2su3uhD5_>Q?h&&AS0=6B?&1I#D!Gl#bRA+aESr#)A>&Zm&K%8 zIBk+x1y_Mn>eD{1RH#g+T5OEvEx>$e2#C7uXuCopM4PFFD@@%S-s?<*8I?Wok2WY9E@8?J&N-Tzq20rm5RaS!i6uo1W|QB zr`J(aCzQVhm2*SW)2Zb}U%vk%R%#VEi1IQv1QU|670)BrZm- z0BJGEdNAQ1danCRBuK&$*XwiFn~wx^TK-YxVf>_|d|D;Xbh!B%73-SZQgj#6K7Oim zq4%@$L)|JlY|l-3Ur|`ZZO*?^3{gvY~5_j~^3J&N9CO};aJ_X@6I*gC2RNb;cCcLH$PAu?^x z3Lz3+#Xitx>#oRYQ>hbhE%UK^@~6TmBji^!_8K~)VDNcOTQNLp2c>7~*e zr%T;1VY2*+oJ>yOubm_e$R}jM%VGh&{E#4K4z?`uu^Iq@niZ?p*L2|3Q{U^f1n}o3iZ*0-fcOfGswZvLnUoW4b z&fJyDl3=3!QM$WDDl3*m&IE$1b$xa|W?j!71uXNb5fi6VgDi9kn!{H!Ach+8Awf4&>4h+e!VX~P#4klfB0+eA zb6MtSy(I!Gm)o&*+_W?|0qM6LiXPnW2yPx+f(|1U%JVbk-Z1NvrRK|}GC#A{7S=rG zieU--bY~nSOiW2ai%`gtXZG{l6~Kf&Uw}Z&u3gg>ws8hP>`@$zoT6jY2Vk;c$jJ`I z4Nfc!V(UG`x5R%=xAx6zALM{5g0;8+WDPe+Qwn<)fpak=$^5FRkUAF4UKgkcNRp7F z`n)m_sbzDr9p~$Uv@E5%XclZ}s*b`otz^t*Bii6n!Q(U(T$JA(+ynxgI9)6Ltb#98 zpa((jiy@K51Y7TB6$RY8l-v9?{mOr=NZ; zCPTF{FgQQm(`kw2cAGpiG;PRZs}YUxJGw|W8toyXu()}nNPRf=Th%AhA-aH?dF!ku zrcssAf_WO6iW9;t1FhAlPYyr?x(4=w;(9i$PuYXajDCAtPDo@uuM^%rb(|ds3aSKV zO$}Tn<8VStykS;N7!qB1iVB-A*eW;n_J$+rMsM_h)a;_5q9VoaDx20BTo;M@*{__O z#&OdWibo#^_I`t#89gi;sFf5z#1`{WVG3D$wV-FQ)5i3F^82TMpAi3F>^@W_+msV^ zNpx~xZu+a;>k96^FFhYiGL544`iWl9p9kE@I~5U--zO+AbX0S4zZ219P9f61wfyn2 z!{oDWJ_9azS$KoYxpqE)$=!{N*M^g{#6`Io{FMA_26}q+Z)uV6lruPk;;BT3|ME#{ zwlIs<4l`e>RI3-GV+e5b^S|1_g;xim=TD190sTz@rj4P*q%PgFmnS+HV_2gfU|8f z+YzID7s&Y=8p!PI?b}wt(C_JGKjahwH38T=Kj5M8t++F{l0R;~k@_W+L+kvY8)%}= zLM)tGVP?5**w??RIsE&AE>L*syX6e+`l53>l*0JhD^cjmk5(WC zMYjAlySEDG*Pw=q)hwgQ0PtMuy-o2Ec3y?^*!dn*93#cTR8;f}3yCt6;Pj<4LI0{) zDd{#sI?`vX-TnQ$dgyxu6ex!b z6}ws4@_|18+*xlPQ{>NqLY$H>EE9aMzaKXVx#G!#_j?*=8h*H5XP4i{SY(FDSZ}%G zYX}!JvNH6D^M>rbOVF>$lf?vcvq^4Q)Xg1V{FSbqx&0_ow9|P-`}nA@_@?h?g86pI z7*q#f;l429yRz*4jrB1F1=rS&{pc_h0fQSoPaOjzH79Vc_sbVO2?9+6`~7x*ZO|pi ztwo?_|>j9}Fy=6JX2&5va|^rR8W)D%a?hn_xU z8MZf|{d%bUKSfke=Q2N$jUxYthN{`lo?@cb5#n`}u7ZXyye(R!{(@bvKJ9CNeXt{= zX6(yA{PR#_anW*K?swTKUx!-X-M+4@B2hL`;yXlLwd&x7^r(mBYoPou1B=w+M*&`N ze+7l;97FbIcCZi3pKgyEeH$J9rL3f6PLC(A1ggV}6pEvjU@&YIIXQ`ZBVP2a_PhA9 z)%-R z!CbVY^rJ0}`< z`n43P!Z6~k)b`ROfWe9ksk*Thg65GFq(ZrCcjliR$I%m#h8NDAUE`UVD=hS+i`J$%hRk^72EP zwKC#l!oM)cxbrJvhfeB#0-|llS<$0^+ABU9`x=*~Ktf(cNML{+5dzw6JOCww?UX5Ry3gi}LM2c35C;n<}On8KITxNIJh^ z0y1FSvrPfl3DcDZnKx&ci>v`{!r0Nt$qn_m8)(QJ#+yD(m6P)h zorc*w?!$Z=7}7!G*yuX=T7}nfl*RYhP;d;(=r2Hnld#=;~;hjA8^1IytK4ZZSm1JM9J6(%9!*uUS!%h~86ecb(vTAW=L-~- zfxgr@`t~z+aIY{-Oe7=uisAS72y{8s@vY%Rhp<{C~%>i^+o z+&oggW+Cj(fIj}_^3a3{f5Z+KN!X2g0b~xTfzf|bW%F9@I08&AkduTxY3U@mMCFZ; zZ_fpR@>v@cJ$;&U4l6TPhGpSTWP<*(V&KjwDszkluWAXfZIJ4LDsbSWLMEE=j`oI# z3cT2tGF;tTimjMAl*e)UU8q34SM&BQ+u3`Tlk4Jvw8bl(vFF;q+Jos#&CJ&JcXkA{3sg0I z6y@R)2Mt%qmui81FMIFskfXV&$&V>bftV#tL2W_Fm%lT+vU30K`*+nC@Jwm|;}A0^ zXChdCmdPQ}&njrvWDp47IWB$fNpg*k`>S`QS-o z+h$Vmr@amUqvCuE=a=;*An>ivE)YN=%mLdwqo*J6vG4)``&LcwNG>g!SmY^~b>?~S zu?X`8o*H%f2^PfKc)In}m#XA7sF#BF8cVb@u)U902yeP$?upN0VkW+(mcK1v1M%q* zzx9zIV_EzO{&5t7$>-h|zxA|l2wGH<5Ium??)5jh`2@EkQHv0COk{E_`+Gn(lDq_j z><5rg7Y)%i@4Z@GnDtM=Um=Wu-XY`IpZ*a2tum=R{7_A*wP&fJUXPGx)+lY9oUKv#%v03|^c#ep zm5?>n#J97U1fnL8jiwZI;zzTqlJ;owNL*lnJ#1o3j-P}B=+4WU%}785?= zstvEUmlDx>^EWK@;=Vo+8R{}6GOdzvm|n9Cb1U)xLM1{|pg`;Kq6nk;EMaTXYHQ2Qd3sex(pNhU4}) zK~M7mu4JXVK9`K^wJ8jGt(>FNwxW1ZLlTMTW9}f2lNWZ=Ou^%S0Q*=U9tw_lI-kq8 z;g>$gG^L+DeOfm%Gz>sRM|T~p`cxGzIpOnWo&;Dj>$x*uds z)qcHcyRIq!O00yGIEUQz=Eu7(o9}ZxJA((Cn)QCRd%t*c?^_;1YwkOu6w=%a8)k$i z3tKE~RaeFcE*Ww=scw1bs$&AF#t{mm8(fEvBV+vXeeQlA+EYCbbkN-)m+pGvUT)NO z;TR@0VF^TA{YPE+&)=ZFU5X{=hgn%$dtLyD(b+k6_-ER$sw!M2wL<6y0O)cgjwEgj z4OKbWs)Oo!DX@xhgX}pus0CdddlXvPTP=fsok$5LRIiPr63}Gw5S>e zGpmRoLp6UQg_4ZAiJx>en1kU##BokjeRPf_7#7GaM>#E3)9Jp|(5QslMQVHj9#g(D>SIs@b*)xP9e2H9)Z*TfR35F9 z-`ouJiUQT_YV&fm>HFIaO_oQTbk*MLxMYIK{(psKNAQLOi-4&ibt%)WFPtq!oZ=#2 zO+>%i+-#NRnp$(}O}o12!;+IY^M(YXF&+QlV2TM$Gcek8Dj5P0D+7n6SjP zw+l~U1^9?b;nywEdgQ^XK|oDCen3C1kXRXO?W=_I6=&-N$ad?*AS6gBBJW$pS#_*{ zlMl5^-qVmJjoVFldJY)oxm=@LdTI$-XqPa*OQ-`XbZTJSmVG`1{00jYn{AXcrLKV6 zpBN#RTR6Oh<_cB85lDI#=9kW}l^*f0*@$ZzYSu*%b2tj>l%1;(_d`yGhnNXWnCiNb z<}3L<+ShO}{nng@^QfK4-&RUS;n9;U0Gq?fKWZgO_+}aFbYffAJu@pS51{H?X#i}8 z4C7Q|_@R`9ghX_0t&^0WpGa?G2;Wl24m&;))x+H%;u7mw@)7OBVqeB7zvZVQB(xvQ z2bV@;fB$i~pHfsBCHKM4>%PAa%2Mpazub27A#1~gS1&J*4j_Ebu~1VFJMW_VFYR5D z0xUWo9VvNLny0OwuG$R!n~K)cGMg*Yu$%B58|5PU_W*tS1%mo%KNJ?SbT3vs{E!o7 zZ34{X*`FVCNLw~c>6}9$XK()=_n$`7+Mw?}`^x|(i?PC8@cDlGODS0+(VH(q%Zavu zRh}qeD7jiUmw1n6iTMm$fd-Wohp3n8%Xr2YP<_nI&K}QPGB13z_H_h{u~gOcT36Lr z^1``8NN~)NU6*`%0Ti{>ud>7O;R4S48^&mt7Y8W>_c{kIk%(tDKL z4Q`^30`fJCBT@Hkd@k}1huVOt=}Op8`$#HEI!b?Gs~ zUr)R@`#+3&fD}N_190(})i0?RxIVvJ(AGPs5>hC;`$pWp^w)KYv`|BP%T2P1QK#yyjw#wv|FmI`kye5QgXDPVR1(~R z<1y3ism&E(hPE<3fjt0Pi>aZ)T>qayRk5y&(?HPA$x2+ud`ML(HeN zi30I&nGEx|yhZvYvQaMNtwS;A)_f?WaWy3kz?$(m97V;qTL{kUyMi^^ckyMX%@G(4$sIf8@e+-6OkxB zO9rC;F!(g&KGvB17=r%`nC)=?*mP4k{|8viHo@VS(%0WV3rSPt^^FKtxvFt>brSnt zulYRWr^$PnrX!yiGz=%IX)w=s^&V+dH~oqiQKJY`PeJ z64E3r5{X1@di6b|i=uON9|fOgtOtc6vImS9XnRK1@XCF1oKL;Kn8<$Y%>Ta zye3b;@8{PSA~(cqBsEM_R8vW|Bfh^NCS(^V`>Y^~s+C?ag>xM5M2)LUmqM%)G^BB4iZgq*?O$0`Jdhj3FBox}Ci_=GT8%Q8Vq@iXZPq6T`SY{Fds9hZ%AHgvDa5gOw^R=kxv)wla0t0ak zB(eeW-ffGimz=5MU&RKYW0;7U z&H#4Vlaxa~N>h|(?Pznon7N+nFIEknK>TrHE(*h=GF}$=HDy@e*OC1F@MB>?X1Ud4 z1WlpE=2{kxBbRgL^vQ}&*%)a^&WU($8|(2>892?n00%3te3q_E-TKe4at|=~9am7j zTrwveJRnVQ|59*hd=De!bsP##wm(m$P!c)Kv1vad;y+XppQ4&H&l*O+F zQMiQ)Q6NTxDOiu?qFdlG_5IS@8~w7(A%I95T7xEbKEo}X&ayI}W@q_#u7Jl~l&RV$ zJfs85e|9-WtfJy>XKhIWLVg=wgCa`9s_15?T6+^n#VPmx;n${ph3WyK_W4kv{#93C zWnu9pk+x&wfv~jxn-}&ToZtoC0sXE>8*!y6LA4YFs#I&EAt9fA&_Ckd+lavP>&PB2 zg$1LU2yCc87nwKskuqgAc$JP@zGb4~Va{lX4wJAW933K(LQNJnouIu(u}SkFr-1E- zioqMnv)c*|p3PZrb;~54a2E4_nCi8G$d*&TMARmD@BT$^A-V%;)M7%*oKnayv6!QD z8533G3ETxtE3#-i5SmYj9){rUZPB+p%7-up2BZRD(UubZ#%HOQhZ+N~bh5a?0u=1g zp`l1c@zE3Asg3(mr5n@z%ggs$K=!Z%YDcr)AqGWupwW3CL|Pk92Iem!Y9eI#3$%=2 zcr9I%&UIIXtgKmtq3~%UUJ2MXK9zZrih0FD23h;O>w@tX+Hazai6KpHng31-5Sd{J<#{`;dg0+ z)iN>?IOqRFG)xv@SSJ)5({+u`k;wchRMa`db!}$9vc=RhctvSgqa+o1)UOB8-(tO zE1cl8X;Fwa#k%q@JZ{|`grFg07~S{BZltp2?4SsebE%x=xIAy6FQr*==2=CensdR! z3DkkXg3wTz$!e9phO|thant~Wb#VkFgxNu#DI9K{JYC=2T^N{{IBo`uVC2KDp?OK= zRC`CqCJ^TB5mHdJ*WzyFle^kEIxchZ@p;UF?NAAr5KfH?pK}J5rFn&HV|TW|2Uj99 zwWa0pqlhd?I(m=sBtowo%C=EPzKNXpVkfYNwf0blw{s+Jg1iw{V37Cu+SeDA?Bk7e zeOzvq68l_Xe3#q3Q7wZv`NF{j8n-uR+96S%Eq1AMVo;yt}~@dwL8VYw~@Y-g!gP> zZ94QyCq+cRig)K1Ay^70fYPKfA}oTjZwyPY&yC%}7_MV%)rc|`dBfhi*gS2TLk7__}vsiml@C*dIpj$LJa z7LxOtUv(gZhd;vaMI>~ZKT-_#g>%h$y6^`8=aV0?#2fHim*x{Y zF}*y$eR%#sMW4UGvwA9-q05W8m^x5jc_biH?!^mj+slBTu$$fVb6a2AJ!i5rt5={E zlHPc~bDi-U^9%M|7^QB`fy#E5Ee94ASBz1kU(Xv}bCQaFD^X$!-<#j`->$r$J1n*4 zmG|wZKge~oel(l2dc*egX_R&qRz9*K|J42w6WNP0VSoNsaev)<%rP9o_kl&ee*Tb36HSS>R+GFay={fVpT`Ap$7P5xjo)P znW!g`Wu77rrnIu?6iC!P+XQMybqZE#U6APKqX#tKP%qw+BQ)UVo8uaG;`Strqh^Ot zr0b^pj6`L-;){i1jzZ>6|Hjc&b<2nj(BF)~1UDU#f&N}oIalO*@q=sv)T&YBFMk?` z324DY#vlVI>IXSOnV~Ou+@gdWmZ%MgP7-2szvdAT5Yz!w>vsTj2ZKs#y4B7gANGIq zkVF;91^H3d;s^y{Yfae6X!y8Pw1f4?JOs1~Gg+;AROMn4a>w6arZTxkt@Xs9H28?> ziMX!`!SUd~!0}6#2sP7|)xy0}?6C|FNMyv+dM?2vX??^`#f;uXe^7*z$1?p>rWF-e zGto#>2W}y(iy%-#_8GIBz!tg*Tc^v+$~4apWPnpC>4MPTDc}g>eaEW*3H7hr(P_<# zc*173+x0ZU$c;ZSft+|5^?&C5v$eEZ1J6~20&ZMy#^d8{ZSDO0N>!)AKiY2SbnL$T z?q9KgPJg~L8U4iXn3N8r3ccY#QEh2(oUYq^`;dx?3@^Qcjs0o5^5L?{?NDBSlV8w* zBQ9^CLgaalcv5hl$SX^S<=Zy^tF{Jv}4ONsW53`qZaRBtoCHY%cd-XN1=8 zICg)1osHCfSPwYcQl_Pwps7~Wc{rw~C)xh#dz9*Pahuzs?lHwv*d{SBx+*;24)9=9 zP$txk$7cUR&M>e_lrZRsWzKy=lXwK##7a8znhheT*nU5zI0j8L{ER9QB_o|kWIPWs z4coSbU6UXCg?@hfipTi(nswi8?_Wu6NhY4#)oli!MEk4Q-W>QIP12vF-*Myw$YvAn zkC(Q3)b@(WC1C|yC(|04=TQL+D!Elc{x?kz0K}Qf){q$?L;$0O8VLLFgi87N+sS=F z;!lg^Q0+5|b8;(v3C!IDpGZ_~Hkt$>IsX$`Cg0G;oNXCgRNyxc>}Nw%iHTp2;o~A8 z6q9Cn>P~-!aOIftY*bYw?OJe{QHzBHwF2?Cto;v&-bV0Et4>#k?M3sud*I^+7auu8 z=`lo5kKf&a;lI9uV5u1$iHwDvjE?xZe%*C|EWwxgYinegXpf|S?MK}Sh2MO3J>i_NL_qh zE7YMa)>N!Se#rHan}PlP7!4VEir6~Y-I+5asXX4Z70H`iWnWTCo?C369ZbXMGN zipJM@1ua2{L8W!-J$~48Z`+&NHX;YE`HKVU>-y3;l>A8-_6<^!$P4ArSwH&8QpwC{$0dVq2 zMx|zo>XFFjtH0r^$PebuLD{ZXv~W1P{fX}|gS_gHj--h7^yJOxrO!l`p!xa18Z;w_ zRZncLs6+b*ILw^AhsGAsk1=nlWKg?hxG|8XpTVxgYxNqpVu1Te^E7hKa;0TQ@pfan zr^hV3t?hOYBrO=y&j83(#FYS}#~RK%q*mP;UWIf9!7v0myn`5uv%w?m`u^C*N2$eqSxB4T1c(`<=dO+z_+nt zrFuW*fK@Kz`}gmJ>g7#dix{r;(ilF&9nu{d6}+U3q(XMs9oXqs^00>`H`<)U+^buK zcN+vy$S~9150#u9Mb{SEKg;)Zp)D8`i!GsKpC~fu1;bcwuiwhEWuQ4ClrS*3Z}nmY zyZWwued6lb@M`01toV;oPgpPd)cwuM*}T9R85cG@xyAN3%n>UZ=dsGEY?!B7;D=gUYidL z_fq3hQnr_~CWDchXGfEGG9NDjZ7SW3C7W|z)Nt&st{xdOq>xU&{}^OZaBlnRRm4er zq)jYKlVr?;geNdML%_|)8%9$US7N+K6pD*3##Xkld9*3$Yj*tgl|V4^#42Lk_;FQL zkz7jOT~!M!Mpc8zIsT1|tx=-#QP}UQY7rVp&jJ$>yjVe;yPEFYO5g+5z5*zC=jK-3 zp~W?5`!C*a=MIl*;kN*=_IyKFq28zn8Ai=xYyw>VC&_7p{xfEQ{;5n$8Q6^Cu9Ylo zZ3W33^HhXDrQIQfX5E3Zh|uYfrgTrtSY(0^mFu3gE-Ig*+Z~CX=0i--jUgCd>-*90 z8NND!_QMMx$;z@>r*09r^uE;Pc%#waty?~U>efYQu(K^gPP*&He0(5^cAv^AE6u!w z=bUlhLLngwGyIDqNGaxnJyUVe^H-7lBiW$RB3<}}xdLNbGks2(C7SHsBxLL4{xC@c zNl+3Ci3{;KOpp2i&T1r;7G&GRV}G(Vr!5$7`&b$iOofCzo7bu!`P}a`yUQNy8?-noV;G}3!_#?dCFAAJipRJ-;Mp8 zU%HTbPPREa4##tqlA8T#Y#OXTNMILRUaj3$8) zj;puC&v8&cx3!69a+*oCB+<#*Euo~PX!w|e|iiO2}UtV?ifc9Gj8TX1gT(J&fo`pN*+J}g6o1K~);iIRqPuVeEWFp5z< z;`RQg?oIq%K)Jr?~+|7twQ*rA$o zn9{X&LDqFU&$MFUGtS-B9qxNBD*;~`qZZHf^86`AoDXUH_eB#Xmj%rDeo-mKvizP& zIXPH;1B~3%ie+guI;7t&elI(U%hk~5l*@n7uTnvJ((^2#@D914 zSXgF#GP`k$a#lc&JnQVi;MqJ&m%Qc&HK&q|7mlhaao>~=yEk@*6nQ;QXnmX=81?ju zGyen{pOKGv!6Aqp573`+3WUO_4O3vSaTD~yw#!^jCGKVIf0Hce2npeTkf^8e#;`}L z685DXoisY5&}?-&zQm`H&D+VeV({}f&wwF{Ou*~NG|914Zl(MI<_PfbA)rh>!ok8) zX1!q7uYr^qcib!krI6>*<>j0WL14eBpb}FXMdzS3s9R*!Rzi4ZuuP+{wtytQ0nVBu zvwjy`jo5+%Y*ca0jd_^^qe#7lbHq!jNLy6Rjq*`fEK4fb@0G5=VW6$J2WcwtHHu~S zx!H@0i}7z4Y?IKmuLnKugV6=v9EiIpEh#bEQsaN|z3e9Jl-ZHu1IBfG+OgL2a?kBP}9M(O@42D)^FL{H*6*SSa#oQ{7Y@-%NE#V2obz)FyBjUbNnp);QJ0g zwVR*+zF$|L{VnFr$)2sfRlCvsMmA?{ z$w4o3P4&nI9g#jasHS`VUijUU^FO2gQo{^a*|03$)Fts7wkZpm{m_0y90rz{RFn%xY2Z$W z>r3N2uD;)XiXe4Ox5cn#0%ylOT(Um51}IAu$~AvIXa9yG*JMbi3XmUa5IJw(+dm6xG7?iPC0m7-*e&N&wTjKkIF+ft#Xz1%h&q8 zh6INJ@v8ACd|GS1$mTEEGW9)n#H&Q3#GSvN%)W&8ftZNHWK9-imnnITa?SEbEg=W0 z7rCa+@X`9K7(?ESGr5R%6Pn(>9%2Yh{kTyvqhQ@`!5d6pz0k# zOj18X6}(&@Scp=2Y!_YiKfiWcG~S&~rV_c~H^Q>I^c<_@eHHZ3=G%7Udt+_GaUq;K zwx~Qcd6*w}Q-O0=e`?d(wRxU(NxBfC2>(N|SaBk3{r#Vw~>=j~9#L zIZ_z0Rwc!62hyJK1u6yCu7U?eYU2BPRZE`tXD(71yuo(-cfoim$-4_b^2^LVyqcm8 zq*CspVlMG=Cb}oNK0)sbTM^bR=u`j6P%?88m=O^%&;V2w31voYpCGA3SJi~y^Yf2M zD|Kc^l$jNZcD{^=apYATqjmhj)#f$50KfB1dsLB18g}QUByL<%ouii2bObJtA1nV8 zh<}2?V5_#kM*XJnG4&RtX>yb)-LClL}R{o%3`G$>!C7ghnnN1GE5k7;b0rs^+{{N8m z7C==-?fN$28qj?vw^e>2B#RrBmWtyze_US4f>!vlk|K_QTl%aIhPH-2S*waJYehStSNb26MYdov(SbA%AkFcIFlQrv>k!JpqE`PF} zQXrV>MA2=2Lc6*>Z@m;K>;5T{`Brq?k0V&!{N`Wlc6uO-9ck2yu~-Sv;yUKs_GIym zPvL&~UoAjhRy%lg`e_ptABW$r{X-tj&2qfxA6qX_xH!F%w;*tiW|?3LcX}DDMi5Bzhd%UfHhl?9uR?wMPUl&LDiC*oAdko#o24FQa6nY&~~v= zP{IR&e89n`hQ9X#SxydU0}S=rJqlL3F9JhCI#5+xoDOb5uk7)B?oyX#*nenrkoHwN zKt(u~2nSe-(tgmTDj9(0I;?NGFH- zfV_%rK6vxz&$fmkQ~lr+kT z*A8vK%Ek^)$iXN7#{V9!`;AHbm%v+o&v|H^zF9GT4UZGUp7+NXINV7ZhSJ9b@fv>p z4>dQby_Ge;Yz)yX7+4gI!g>2)T#nmO7%~b@y3e!z84rY^^mH>FGo57gV5Fp|{~hUQ zzexO7GDf7gyC6P+cTJog4T0hAq6>%9-e$=*;4R({_M@e__LH$(kMe^Xp#SMDK)f(+ zCu`G^Jh31~hMJGC(ckCwV!c&-dTN*1tamz|&T;6;;&L(i)k_2ewOR3efQbjemrn7 zBgcQ$X4{c&k(z|*5I3Jm_G7SmeO@&rw(T1l%2h9uLBBxUB!L3^Uqr~$)SoS8spOo)KsaFt%YvcCU|Kj#%9Ns47}2*w~VioAKT^aEy-SKl&?RP7v6n*}oZ(+?pb z9-QsDQIJ4wVnX=1(drZ4Yk81GW*pXn)duV_pW>;E-75 z*<|H!31fIR19|cORqkpQNAbM7DknB?vts%YNp@B4w{NfLDmWjCW~%I&?>z;<;~@`5 zq#G$EutVPR36nsCB8n6uh;SAF>2hU#3h&p1pXf`({QZX{I_ zK~T_G%TcApU$eV%kwI}!lAXfcZ^rMzZ+jD7xwyQ@qv8W+Z`PIx)|{*$u@Xs9wV9!p zF?pV#1%f2mCp6EX!)1X4!8nY@H{mp!Sq9(w-sj1EM%cato>pV2+~ox%Gl=|Hh7zhh z__dzSD)zrLs@eV5=YV^sx~|NzF3C7)R<$Mv!M1JFtImDZ+I@|%FRUIj2K zo&yT#8SmLrRhLvXp6RQHuv%Y@2jB#miT?KO+f3kR{fU%x!%1;W+`pzlOl~oGX<%+% z|9pfpAb`+H;jmQ9eCqp;@jkJT+M4TE&a+Ft9!CgKZRhjEeA#%Cxg|I- z%}6Y10^X5`{G6O*kBS1o170saNRFia-id z4PlZ_n45P`vtkVh-cyiK(A8$^V8Ez*g@P>AxUow5Bz+r)!C}`GnJn&rjMD4tA-rg_ zAJ5ifs>Pk~1kF{{*FYNXvl=%l^Mpi)vzWGU*0vr*29IOun)As0-8l4mZ>PmC-DsPu zXa@{71tK%bvCf>t_Cg|!BH2eCo_B>a^YHFZcMTA7l30m*cMp#! zX&ISv4!d=M3m{aQ%Ky~#0A}s{g!qR5lBEb3)UJd^MCg!xA8sVvugmpzNJSFqpcgTR z$=8QgsHo2$ylUa${FlbDO_l8#_Vo!!*1t5*IfQ+mGPb!_Bb6e8^nHK3Y#`ww@cJz< z%%#wu{6-NR8HuXR8i#mFnJC`qQ~6)pLKQ z%h@76F<^`OS*whrgo4>I_?oVi%UYMyU*S{shox}s&OxlL&o01zSkd}-y}9EhuoNhF zGP2oL8Q>9Xqm>{aC@Y?=AOyTCljbW&vZ9cT5gSd_cHg&z%uCf~$Veo`5y5a&ZanDW zDe*hl^Cl;RzE~EK{`rPps#|{giG8bYa!WnhvqeKKAfH~{v0N5d;HYJABeGu;p)0mM zASdZRM141YiwcGVc}gub50(ea!880R$Xk~P^0=QDod70sf+WZH>C*C zpSZld>;kH|zko?z|J9VX!#A>^%kisS2AI>L*>-Su^YFND0aHcA7a|0=1kPe|_)L|R zm3zRlO3(3Ns^ASgG%~&eiAzY9&;-w_PYzo!AuI}1z!io#de|$llu&TJGww6C+S^8CtxnXArkKynQ4;n?rrzo*XOxAreS z6Hlj$rWMQ3qTTmDcjiawdPXar{t;*Y*ZLeQla%aCWU+E{xD@onbT7BYjP}r>7bvR|>F?W&t;ON`R0-lSDgU5pk`lIa#4^`z_jy0CrWQpljBa5S5 zSLI6ecQLtuTiD42d%ejf480E(3k0a1)0Lwa_>eyLi|Tmjm+8;T&{F(sE@Y5Um_t$*rt)QJ$?KX6_^{Xz~Mni0t z@(D3XSwq`^u@_|nU}S$@GtE+Ynbw@-)V}}x9>5vKuUWio`tmmdn!Ef1SYjXGoS~XF z6&0YEOK*ubV5_&gccLGmD}4fPipNmh^sQ-X{Z77t(bC$E>M!KOe63#us%~oFoA~A*FO!uiU`c{-uKvBklQKFHTw5CR#V>&oG z?(kiyho@(Bcu2^;YK{;*im$KlkugWY)(x;pGEnw0 zGqW5t>1YA!m<2F!RS2H;5HR@OxHdt+(Po}UfKWRg+m1v}%kjyI-OUo0Rb7zljN34$ z`e0p_*mkbz8uV6=W$|q%7xrsPDm7~Su1JB%R~c9n*VYyaNh%8l;HOaB=mh2Xw`8!p?lX{4XU3a1H%=#+f;`>~a1(-4~#1u%DO39(U;~TbE$gAWXsvi5?^i4o5 z&_WffQ%^8F>3L|V378;l^E=_v8?=~R(}>C4ykf{-`dBvc)z-8cSy6r5#wbAdw`@vf zcCg?u&g{|&yHvOvkZ8WS0yV5Hpt`6VphcuwUOyZl8KT{KyB!@6v=(ceFZYGTlLo5x zB=!~#4^q3MO2}8YKq%-E7=-7Z9**Y*;t^)0<6lCvd=gq+F;GHC4qvzF-oj3truCleIuU@6n?%bg$DPO>m4?BKJO#BweQaeU;qY258nZ}*LqrVvPP+5o>(E^ zJJj^wurELR;mH&03w%ahzPfD&O^l(JR{fvjZ_U>Qs4=Lq+R zA$(_HaJ>D-fJ7mA8pJfy#H?9ML0(0YM)I8`ZQv9;4oyE)^cR68@A*%6)8dLSfWy_kXYZ?El1vKd}5H z{;VPC^QW^gY4niAl9s?YSIhUDQQ8ScQcR~rK`haG&V?vvkWFp!%C}K5JZfzswIE(2 zW$sUF^i*>v=ZD;IW^#+CYEV^;Km(jLyTBZZQsG@oGiwy z*I_)Tq3i)L>9jXhX*JHma9B7~yTv(^zY!eqcI@7sm)z}>>-_3gIm7;e@KG(p z3BsFaVZG?&G-`)ZroziCNA;4Mue8VaSYMb}9Pan7MPIxVmfB3YKHA=AaZL{(h40@D zE8u>HI=??%`h3oCmKF7>yv+=ipFo1swsVCIgY|S6c?JJMZ*?^>;q6VX`qi6OG|P+3 zLuox=5sCpY1~%O&4ILxl<9X|d>#c>L5RU;gzGB*o|v%bfuMIS2Il9pzk(A>JXOvSaj-8HYpe0oo=4kmTrC5e21w`-OErS$W zN&|QUB&VisGOm2&iQFAMD;RBhUTa51pu{rg*!JJ^iD;{x~5v_DmdjW4MFyX$M&k zi~&=ZBdYS`5Pv^y;sALfNZ=*nru4=p9?T>BFb!f6$3V&9f}JR-1Tp?ML{tSJba*A? z!i9-Y+PJiUNENY?u>z&^+^5vI>X9Z&*Ssp5HTK2mz48D`oO^sTc@c> zdEK7DB-MSbJOXfe;6o*BBpIHfL+?ybfrJZ+A_^>8WSFhvWlgNY7S%E&gy0jgnV75b z&{b;x@ZbhJuVv;yiQVHUN~cL6)K^4b{x3vF7yF^%BMu&B?AqXr@>iqnBJ=YP@ zaZn+nBCBIS+}#hXBc!A7*{FUweI|9&8`uvzMwFT@p%U2FY~cwaGoBV`y4}6H*(mT)1{S*m!w+ zV9E9H&%wdjJ|BMg$5faz_rbyFBg0d(P=&lE%;b#ylhfIh?BUstND}&Np0uSH9D)~H zhW^gGnNAdGuI`UF?|JeLJKD|m=iAM$>z^kRk8bw2YiK4q$^ zNrW7(v@=TUw0J(#I`6Z@;f~^qZi!!eQfvEO=2O<2{1W71rBH17FQ`_nC4aoad`NxoLsg5{;>`}EMPsKzl3Z`w9) z7%C7OrySG->?x=~7QzHv`v)gSqmTar4mr)yn=aAlaybep;GVRRk*usCLW&17&#QAL z;fBd7ib0HAiyTgGD7@hz9?I2Qhs;uhNG{;Qk6A`yOYy+ebVb_ir=fy4Xyj#_<@XNd z4^R@B;1?X$a@)Vz4Q5dlMso1D1ZfVo>z}2DN8pue3;)`Em=h*i-n*{5YsE9!vw2sB zyEdDV8zSnPV-P!2aPq0e>uSt*yl0KDwv;V{&ohwk@S*cLbBhJmYMqu3 z4}ZgdcVC>*t8R?@@&Moy>H?!6F{{-f2lb~p_4 znxXHjNnI}6*MxZ6SMzIcSQ`0zFM}gerMZ2dt17$3MlXuxqZ{4Di>YQy=FP_*WNmE<=_(fAmv)it{Xts&nudly zmZqjZHo)!V5F*+Pl9PF45nFbur5C`d1ZoOI_>tz>*AerCC!fcBSwEG?#&D@i?sVqQ$J5{CsRG zoU3aHZeu#i7Ro#*^RcNvsIQHUI2@689Yn(xrKGFcpmLRk)w7#Ak5+s$9^OF~<6_CQ z5N0*h(SfNu45NkLMrd8GZ1%N@t+fcY*rlRkOIZ4=opleKz2{{3`8$c>GL&`4Q4dK6 zz6P_$lhjuov0O~^J0PxGD^a^AW<7;%5>z^BVHaPpprS)Rk`$%(hI8aH8(wP>W6jC+R*~VOBq^(W z@e4?-%ul=A=5Q5VzT0Fjc0lVn{engWS;x?Kvrf{r zU%FEoQ6c>-_O)1(V8D5wH!Hqt=sDMIF*J}e`_5evi8se6*MM8>8 zbmTxgT9p#NFr<6`3BG%njlg|gA#8;TPp4gvUKO1qM7y=&y zNFsWmq{7@`zZC@AdlK26bFT`<<*@)gnF^?uvzeG|xrEloCO;lbmuDZQEAETVqdNEl6qY%1 zV7Jb^(b*FO=2g^JpSWom1^V9$3p$JS=JC$p)hTeoipGSp#}vgc-h{oQ=qcObY1~RU2ZQZp^%|Z(-_FI)e@p<@Pq)Aleo`!E zVED-S4uY0=8gq*jh^ax5gX#^COoeL^R5XRz6i%Qly@oGDQY676+Q(NcC(xG5523Oe%P*k zuJ<6m$Jo`;$axJ|k2Fw@FC7Un%|!AJeQ(LfZ~umMzC>}9{c zK;3u^#E_l=r(#54@nwyw-y-(0kNS&Pd5tSA|3bSLN_&kV$pO-!>_9p5u`&6^E}8k^ zVWj6wsrzGjn2M%@oOD9FhIY+eY2(w{=4?y7?V8Uokj*$GEKcLTD^)$ymkpR% z1pu4c`W!)l93>b|{`_kd!l;r%2)Q|us922tM?mRS3&TboR0Hso5w0-b7m%=&5DMz! zS2*=DwMd7u77!MXz=~jt;3iFhqSFVI5BH)U-`Ds!&@omp%FJF(|FHi3Vrp|ql`Gux zF8F2?;{y!bWmYgj~U3p$*LD!Y5TCv}U(B#r!u9b~d7-ZkhX1(X}=#t9*%+s8LF_UXX)3cO43*}+| zuZoO(K%~*1vIF0nRJ@gb1hkXX1ggsH_oKedp4K4^nT26 zTZN<+QMJ4ImY*R$##d;Bt{Y~(U!A&uTIFH5;xH35GmFd@pwcD(Q4gyAKvz2I05WjcJs`FnbH*8 z;0C2(YOpqiS(q%osaI$v_T`s49+hd4I+E1_t1F|VsTXtV&0?)j@T72dqr$KR#%ztH zo~7F=ISL=>ZBtlHG$gvgtWL`NA1$?W?%-2PM+gr1`}t7;q}IF{ecAyg|Eg<^@Xzc4 z+E(uWTi_P$p+Y{5h#==0mlGW-bsi>j%tG*LRco^0>Q;HDA!Y>Us;;S@O8HewHG+tO34QO|vdnkJ zTg`eoy^9O0_47VzECONKWtk+jy<)F=i71*Ia13OUsv zHvNi~2(a>u5pOyLiO#|^>0&@GCAh;ze;qkF6R@7EOk9t)5Lpjn>jSLrK)PBpDVTm;NPg``<+#eWBf6gC8c!_XuIquLjSQ-}FOqv3~_%E>hRk9{^S zu2e7rS``u<&Af~%rjD}t=F@&Tl@eADbDhFH%up);;xQY>ZuG>=?)b!f zdBn^%xH^_pII1N~TT)W~mv_0MI*!j1qnl1F1MRhGz+19MQ3Hrr^i`z4}2P9a4HT)N{-rT|zxK9kq8f0Iu!2p=^cUG+NVTUL8X zSpdQyHHA2-qn;TJ4U|%7(1McjFY=B=-eDrH4~;xJ&xWcuF&gl>^aFZGgId6d^oyjb z5Y`a_t_FphH}FVn@%vb|2ABZd)k~MWIEm;eX)7&bYadL`}3%Q>vSaFQ6PmgG!dk_R(GLk>h!Nl zQ)v45q@!SPT0KtQ`lK8)pIQ!K63@T$%QjZ1ymjmAymgQ-BHhYA_`qAQ*jG;snmY~P zn9P((qw$-ymZbhYsTYjY2mHFEVg(wfmS3dyNGaDzNw)9|3P>_^iI_TjF+r@H^E`2M zz?z9tr0d?o@?YiqliyNC_9=<|tBovXWrvwR?r8_G$YUi~6Qn4iWSAxRAGD$bScHCN z57M^E10S9);OSfb!KgpN^@!oHOE>+arP>q-ce6yr#5z>7c4bd6-0sJ#ACrDiawXko z=V=^Flu5|ZjP)ooo| zo*7n$ww#Zw*ZyUWbA(;`-3^^?i%0O^WnU?77vj;{h^Lkd^gk=UVHpVRlJ$g~#1p?( z#IWfgsP1+&jFgi1>IJx#Zf8gz(K3Khk32Yr-f;noy-|Ea1D;;5h<6wo!2J9in9R?E z{sV6Ckm-ZuI-(g0bPsir08E-|gT?GUn6_oQ$=z7;V?=udrdVWu0bmbU&Nzv@m)k4a z$AFQ4#=p`P3o^9mYk8V$$xITEyLtSPoXO)k`k&f>(qqf);>6)V$-0BQq@;<>9cUY% z{%gT2$RAoW74vM02~J~ke!e8XFTQwAZe8tME1IeHkrfjv8q*ma*mHH9n<|@E>Ru=; zw2kcBgSAz-VWz?*fOvaY8`YMi>>MMH`lMD_mZLWN0;*?m8mDaw2~`4Tj{uZ|&eh>; zcu06SJyYW7>#hD%O#&F0enGi!^#uhcMWFJ<5A4-Pz%+JV-7P~+YZx6J@D5f2#hFX6 ztvpK~d->?_W4JkWciN}&oawk{eJfP^(IQPi%wYpf9#Xn^pBgjmG5T+EqKnsj0eyfP zFsNLJsN~Vq>6BY~J*QOur&u^#UWE{yna84>WU6Us&uAzI>d#7h+w7ErQsXvOl%WOQ z!Y6Zt%m;hQ+3Ix;z+VtCGCX{1PPx5>5hC9|H>VbxkPt9RJ|<8&QZ$G$fd>~6oNh{H z0i1Q+jDT+^<*x|*Jjb)cV^@0svZMt|^-W}Gs30$I=0py2v^(EkY%EL9W!D+Cysk9o zsT2Kg4eHtanCi=$(D0Y++_ZhB*CEUV)V(wMNKvXIuOlguogEJ$@8NimigHa>S1 zPO{ABodQc%W1+_9-L0*Q6i{tU0@2jrwkAVT_MzD%qgH)Pa6rJN$HT3CO{rBO`6u_r zJ~Ze&@}gthmX;Qq^Q)^r4%-8>w6ckT@ot6M!8d~78AjoVP7 z(%+g9s2sYa)i9JtrZOD0Ry}eL+nS`XgVx%f=h$+xj!(d1fU(kEpVr--r!i7uLB%^9 zk$HnMs@+C=L?z&10ZJemZM08ME5{ATZzc9dO4dFc&`2n+wA~Uudb7rGUA;?mX3Q@> z<%~eUfwum!P`&>8U^*Td3u{p6BpSBRtnOrUN*xs2=0F7bbObC#jwq-I49_OGL=TjV z+rV7MSHR^K!o>|N^TP%(hT0p&F_koHFhMU9RSMMIOUKt49aJ*OF)ruZgAKsVk~u1I z)G3Jmq)CHRKYbg!abj-nM-}i12?Fhd1(nJNwF^Z&RKjN99aXDT@{S>X&}HB7_3D>c zGAPWCJzZl~8?%qF{1mL_+PO-E^q$|Pw9>FKQ&vr-_OhTvMEa`(XsE?KYa6^dRXDh~ zqOi@)w+}ZZ!0gpAkMWhZX8XsVgT_eMSur9}Z(NMO6nO^=d(9Zd@$trj!1LTytF2yd zmI-0A;nioB<6`jc@oupxj>5K1 z!_cp1anmEe55TaPt2KDQ$6icxr>+?R&?wLAc%g;`NLJrq1e&+{u4Vx`xL=GZnNY0V zaPY5xs%)_ue?G7^UNI?_L^d?6_+E8g8UFBK58b|1h?&~1uiyT6Kr888=CW`t5MB6X zLfPKVE~LZxD5(xOxz5c&Z(gssuCdg3qc?)uPe$lU8U4r720_PtL^W+oOH0tzi+M#` z4oX1ANH-`g^-5-HiI`V_~t+T4TSLa;T;150~r9q znM4s>z?)Y`{VpO}bh!Z< z(sbm+0s+k71lk^nLqN$D2Ba6GO6N;7vNqc_J~4vHj;DLeR9;Vxa7R%j6%8AJkR3y0 z9D|2`p_Bl!aJr@&X+_{$C6zQG)iO|*Vf7=z#~+-l1+3;5i%s(K5Y9dVj4)LYEvZ3E z6S7nkS6>06-J-(6!rUM9@&Swk{sUDzh;5?Tw=vTr+@BZw`*(*5Wz)+4T^{7we_aD3 z3DXackMWpTSVtI>Mo-xIA7M(t#mYI+kXKdry)mouG2Ol1Vx5{UAFJRpiI_`0D0@OR zJ^_@bY~bQyGTRXBj3sgoygNq7kS%F^TwPtMXWGP(lw-uuwb8{Xv`K4_&&I&{Ybb)X zt^9TmK(`|%kSkp^crh-*+!WE*fa&RnZB3l|-O&_P9F`IqQDl1HGuw@!ZxYyd4GP1@ zT*So00KFulHmKZNzxiqG7Z38(i>dI7+MBoo^rzK1=n-Q;^!BqA5u4j#C<(OAJnAjy zD^WSE7Gn0k7Lp1F^%4WBrvzQ%j~#i%8UZK~Y|8KJx&9$8pU#g%5V_(ZQg=LD%flHA zM8T%U`M%a575}S}n-u6Tj#^voz)mo+O?^Z%CD-dwnFx3MHv#0Bx9e8jvSy&;#Nrh;z{?+!l+k0GY~NAFhK$Hojpp(Ken>+*b4S$YU7~Y8DMO7y2}_I8k$0ggVPYhFWU$6jYFh9I7DH^ zGA%58Lz+xWQbeJRpDC#Kk-WWav|RUB(Qv$nmIg%YQ&O?@*81lR3ezBJ<&?}MDfrhtr=h!AVJpS!0a54KBvPoJ%tE>prP!vG6o?=NvfsrI*yor)Dy*xm>DNV>> zC4pE36AM>ktU4i;p#MlZhkc<)nFVkg0Kbw61}FIcfqk3dRR(7il(;|{BDNk7{uvQD zI$rz)zI7=s#|vleL-auZs`m1JWSPEmiY(e!2#s^IQNW4t(-?*~yXLDKg$AzJDTj)B zn|Nr^TDHbvS_V%s{9YRxiC<+upttty;n;-*F1gjq{I*g@D^AB;oSk{WMB{TRa6)xP z7XzD;m!W{b&kEpQ-yg_{#TS=JMshDWX}szheN}9j8#BZCvChhzES6nXIp0g))Zn{? zwcgN3e@;El0G&ixXo*O?&4qKNQc8b_nUvRC%DzrX$tW~g>3C|5RLS!9bdv6JDVa`U z=7tUhrvG?(`nmKF@diaP0YRD?gpAkeJnjyeBbOw)cE9if(#ed7dQqAFy?9h_Q?OI; ze?#SS69~pnt3_O$n@Yjml( zl%%&KTbUX@exYoF`Jie!k(6jkawF{bK#CY9wd>uF|D9*3Su9#0k_f(nOVyIkP&Wb|S4rzfyC zyY7bN!o(C~iacVNp7!DCZEJQq11=dp=od@X)z)f(qooIEK_Bx*zvw?8hWPB^5;~;1 zSEYfG#=iiA5-_m_+3I}u3Un6{xb%RzlBto@~p9L$;okq zCdN&s=m2FKWE8}@Lh3*D2OLY*ql`pCI3=}lRG5;vNRpT`GL)>WETL?qVWe2`#jhzv zmBc)nSjl-A$QfyMQj*RJ`-?nJ%-CstYI8`}b-=iH1r-&QwR)wFIsA7|IcqTaC_W0t&rY6MJCHEsr<&D^iHLxhxwp~6fvDe4j$Q~^z(=qH2&z&NNfM{I zp6`#C2_Oy>nB>uXzE5rZVI}5RSgUyAU&&+0VRF}+`y$g}XcZN1VO5XO*T+R_T6g*z z^ucJ51?WDnZh&Rm4uIR1z4oTI=$pYl2cS7r%X%J(0&iIP0Ce7L^PT>}YS{DK!@Ur4 zu?UH%h={glV1U<^>yEsiMr_C2j%Vo85VjHP0B#VFS-kZh6d*U zr|Ac;>Qf}$)V5&}Ot|uvaBdcGK`qug9zBPJG3QS=daz(%VSD1^;$DFwS2~B->)X^% zFt5J;_XS?r;oV_7gI7Zk-D6vawS%h<`5GA5w+HkAWx#uw{$RH3J8ynwn5BP#8z|$Q zal0HV4D6g6I5ek&qqhFAG|K<)RD%;}IyJ3iu-S7#JAsbSlOA zpet&-0%ACZW}5cYk^y6qSR8b8sEpZd5WE(8xVb%mB6z!~$mK-LT-V+Z294>XlEqAMtU1tSl`FDh|Mdm9i*`*HB_+%* z-RLX;JSx;_b7NT<8~Yn6L)H3@fS@HT1m?=L){E>Nbd-S2e_vpTm?R47JKokflsFWk zmw?C>;2)-eFHUfb-(>?(VhIowvb?Zka1iNzK)~x1h3h0;hcQ_XMA_0n02J)d{ICWB z>N*i8c=$XgK72kX@LXqG!+nCN`WMsCEOnr?@wbbA<;U{y4Znp8NBm|%QVuHIjIy$_ zX(Hm{>gF@WiuHiN6c1jtxvq|F1EKrwzltPa?f{pe`cV9o`S=X3W7zig_5wI+nm=)K z^N$P+G@;|;yT71uCfmunzoHFefQl5}%RRP%%rdRFarV4CI)=%^n{~U0`*7d1L{yf* zP7JIh3IhTI1B1cpAjD{X(bUjmBmcU*bP5LPPAmBS4cb%%0KxM~TSJ4Uyu5r>f&w$8 zx!FBkge;Uam!?1oKNPMoKcAvRhDsQ9hEv2zo0@Ge!vXFS%>Dl?f_uBP+jn6EFRM&Z zJ|=qnrD1csH-czGrUxS~fPxtE7d}1R0@_6Qn2ekp7ESzH8W;~};StUyPJn^!<6Z|S z)!V%YepqmypwQ7*P$Ir*#QX?JYWdPHF);5bJ&kKB<`=z>5O_lUaworGtY_812oHe& z_ZA)|*Atq$i0o$Z1Iv-6ImG7?0Vfj{)RAF=#%3*HP0)bVqmxohKmR z#QXu-0U7TKY+jo9t-x?i0aOVVSN;K>z=7}cKjI8t&!mTkhjVA~$N;Y$DLH#PaDK77 zpBp!9V`~MV;45ph;kEy~)b`QF2A(~*@1=;Rcy)7q4XjE%kB@L9mzCyZq5gW&85RlO0%kiRyV@;gD-c=&S)= zY2Vv~q^$ z@VK3lSg*7s+N^bq8p?WJ9g4nW?r^FlDpS=4J+&c6FPS%h8`gVVS)8Ar=YnSjLkPyM zA~G^kQ9t3)9L&vaI{}MBb^v}=OPZLRj0F#eXvnnnQa!EX5yGoitX7#k?%m7JW`GJ> z1lqDJtlD{Es761Cb?4Aj65x&u)ntdNof-*MY-!oASt0rqQ9;vmcSX>g#pxfucokERBJ|A3eR()l)n zELSbiDevQZDhMP+Kn{?!heBGuVtlx`x-v1w*>(as%}ae6H{Qb3w7(x(5lBT@0X8Nj z!xHWfJBGJ0+|j!9I^sSXOs<~9J&!xze}aC?m#6iP^0QW24hNUg?6HWc4`8z=!#jh% zl^u7yz9JET9D=_I!>mxYK@FAk<2P>_v=AdzIY+*4Jg!N&JqDbuQ^$wo657A4o@CEMD zLw@9f?l$yg-3wy8Yz=-I#c|4{!o(EH1IsTE=oD?@zm^ozq)*tGlGSj2y-7^EsTh^u z*G_|JB+ca#z>HmKvWGS1=zJUMr#I269dk4r3k-ZEnhqB4F{;94odwc$6lRL!tN%(x?c0!q3L3hpq8)62*xT4B6ddCCSboK%%$x!sU zblWBJ;PJM~tZrneUwXImm znWNLL@Td3&*xfH*Lo50u=xr9LA7MKl8P(GKTFenf*sT@w|4opj`0(#=mL3FLyx|qt zOJLkQxn2c2KTf2JSqfd1Wq6ne=u8l8e0xvHUJIF}r|-h^`n(a()*y!}qb)B0E$6G} zK7w~0ZlD1r^76TfAftb%gvYVeROspH9h{xL_f&dwx@W5tXp)cC1_wiln4#`oE<3S= z?4Y@vBO?@_XtNg|2YB8T+z^Yk`d zI(V%jJ=b|~hX1mC28IX#j0gb?9dcOE#%_ez94AY_}1 zFwk`b-0in8Fu4x{Gz>)qt4N9p?8njZ%8bzctuGfg0_z(lrPT4Sp?oECye%D4y~OwA z3Q-|+=opxi??MJX-_EbD(!FN1`(G`+= zsa9}&Oo82p62=_-_jVlcRjfiMs1c53j3l&6^CbV7=i0SEa0_tH{Qcw{7_DmY#W zs;a7Z=^%%&NVC{Xs#D0DFI1abaNs8c{$C)lpx{G8Lb^)~d4r)CQRq!LaF_c~+!QmN zTdYKe-0_V^#4GhB<`O5VLi92NG!7qNp#9=&!ZG|O7kr2!Pze{vjEkEJ2=Tk%g5s~z zAX!%iquk6>)6=cP#+ctChq}=`>236t4c;z_8Fia zXI+4D<>(Bh#uqFoodMbp%*X$^1e^XpgEzrb{0W9j6c{qNhkcazR{O2oTs436$uK;MsolF6%LMLj3E=e`5?L^ za3ym=p=wZANN5UxWgJOaSnP*EO~@5PF%^1?Y$!Sm3&QxHw?KI6-&ZsS9tu0ST68;m zd&qClc;EskD1%V>)E|H$+JCata3lui2pnarcJ>GvV3Yh(2U5PqCHtrlYO9Ofe5yj zut(gj&VN@70USX}N=n8fnAel#a{ZfLMyU6?9UMu8K#s^r&d7)g_fY|~UYGSTid9Qh zN}7=abKOA#RROAFhKoNK&y(Dz;l6>b`CD<}-{ody8*ne_C@74zOqCL;RFE(xPoGLuTByj#3`^Cx zb~5(iVG5)Y?`MXFEILHUNP6) z(DS@aF*%evsZg3*kEheqgO{D}9drX?Q%lR|vzwc$7P}20ZCwF&`_0~7*))!o=&-QT zs_-ZGtia%}Sy^jn<5~Q~f56uVgKjviD~I)ITOd;i4w1sbC{4;P4UGr;2a>e3 zv}>ryW>jYp$cPFH3LpbJ4!?n+P!K4!4o!D=lTD6{$WX;fY_hVl<|as)r~p8%h>}tq zI~x^JEk%f|$7o$%_@6q}>_BZW#sw3(q0O07W? z21z%n@8293Ow#J=DhjwpLD&0}Julc_6|kLDpmlSrME?Xc*XAJIwfjdemBi3|2%OyG zLnLy<-N3ap6qd_KPd5b+#Ylj{bh$n|3q=8RJ;p>SjJ%vOD&&sJSG$h{*f0$t#U1}2 zRc{?u)w;!v0wOIX4bmObv5*F77L9a+q!J3!-5`rjDG4b>1OcT5rMpE$aw{lENW&d` z_xYZC?)mdP=WMs@o%5Yz{IabrNEoo%a&W~kh`?DvIQr1%>FMdI;o)I8?>iY%|!!*`Ih(%qK8}(u0*6>Q&p?*_|aZ$Sz^Z z-o1h4w#qfu8`=U1i$HE$WQHCi<-b1~XGgTfz#ayK|=r*5zcz z64q~t<+^X+2{uOQL&56)#o5uD7D#*ImH!Xiv>aHNn2XofWDQBFpR5Vld8=>ls0b-$ zRYWXYfudQetXlBzAPYUA>Ta;rmqIn}d*J;z&a-oWO)>TsX=-c7>AIin`JH<%Qq$?_WYGIWUrpEebj#@1_l}OwL2H}gKvnR-_LSk|?2KRoK*l2Y zbZkWwhJv|%=l_M}N&c4aG<=F7;`s=aEqm4=?Jv1yBeFogC8wo~OXe}Uy!TMg`wez} zZQ^nGbB!Y+Z6oUI>c;YN2G{p^yZvDcEbZ*9|1gi zq#Mca4+pO*PHVA5{=e$a`T6zp`a%>+4shmrKx10qp~Y{M;LzP|l%UvA?;w6<0!|}A zc%-FarX29aGSmd<3V9Kb^$?ega6@UT821@U^#JBYx|B;`xnP2bxgg;HJ+#f`M37^2vX@ zOV)GuibhSU47g}ygQWtP3SsYU3%i8Dx8;CadfJm~KpT^+9y{hFzzH5-LDdVVfj zyyngS2?mJqtv&*FbYiVP?dP?28yNTrK(TYeR)G8=c*&(-PP}rTzggus*tQ7!*serhC;rhDxwUbCw6HP!(8nWCO##m`k#n&aOyPL2awygMzFia25 zH4U%<*>~^WdDBRE6!k)Awwl90`uMllsKx!kEPiJ|R~!8H9sNMMo(NIKkHsG?8p|o! zL`jj$~<~KcS}HPEwp#jHtN@P!;P)% ze=h-x*{0i;#|iS^Tf;ScZJzU-4$=KOLLBdNIdHt=;K_Fap=RQ7@W1(oJ6iCM2}(3L zJv&P)EMy2eoPMxPXP$r<%@&Ms9LZO^UKCmKfCpu1UWGd6t_`fcIMLrrZF$#4qbtf* ztUt~p#Y{J*7+kU|ofu>5YJpR22iL8k{cg9w0Pg8ueW zZc9Pox(@*+F)LYVBb;zzpAqC=ydc8o{M@_1mg?K`QLnNTj6s67*Z9vVOf1w)z=BAz ze68z-mpjwuu%Dx_?y^8O1POy|5ioWFq?cXT40WOrp|;Y;w5in|bgMt^qg_zy3EY}$HGCz>SRoI9;lsaWxR$1 zTNZ)3mX^i8=?7LhK08a_DDw0@-7AnU!UK3bVO04EG{sB!k1jG<{DxxZ@UVLxD5$=` z7eujp$lCN;qkPw@Jy1eOXip;|r@;m}H3geW;B~S2y6AhdHL@POOm$QzOk;xhoR?99 zW~zjwH5GLfD`kp|Y2i=F*2pQ8o|Oa9BrK>$p6NR z6;VBg+s!0Lw|T~BDUNj;dRJjx2k;^QhG*r1w>~Hj5%U{DL>fGO zl6_nRnBDlSD-Pq7yl+MdAR6(xPE&yYd0Pg!jPWy;!dlM|xj^ z_JT%IM1;)KsFL!odF=JxumJg~YM>K8-?Aqp?)L|vx70Y9Qj_p3f`k42HAq5$d0OZnj8 zYpJMscm+6}CGWrq)$v^HiINeY%kazCaHBlHXFD1j4+nt%S-`ogR!gW8`{Ku7tLN%x zHNekV06P+^q^4>AwxRSSFSgrTf^b~zzYCW;TMCg1TZ$%TW*l-ZBg4a5FWW_#D(dRM z&KgZG^$aZoM~sj+i!lZR_kJLk`ES*ta3ju|yt?}EeCfbr)CCx@;Yh7#e`LEXI>1$^ zzE=O%gWy(3L|zEU?-EP9d*8U2L?zrYQj@Y{tg4!gz+rATz!8@VnV+r2%cVl}Qq?8m zxqfa`C5!kD5W<|M1W(O>d!!;f!BPghw6^DZH&bq&{w$|NBSoqy7AZFio(Wbwq zSy~JX3QEgRU$@TDe6JYcsnj)F#2@T3&a`|9H3FBFI-E5879Xa_{;)3XMWeN=LZel6 zLtUMO@BW6C()oT*Myq^dY8cFcTr@1p~%Rz|x9kEQ%X?n{7e6$DIF?l$;Z|ac4)7HuuVbAP| zYjQA8+sCq^oAR5GId+YMm56DB%w!jbp>wBhDjc(Wov2BskN+5J5ejE?@a3o!K-7X; zrpP21MG(wMu$&Kja(k8_zcTVBr3{Yb299~r6AvgwQ#KC6->8Ag;TqH;6eL@iJbBXa z8t3{NRrUJy>(__=+1aK+z@zbM50v1YTY;DSy(}-Un;gAtLCNywTZUj$5~k%pf7;FO zW?el7H30=m;0Ip_j@wcz^&^R>va{@QW$Wrn+w3yU zeHL~DyS?J7rc5xtA@4fgDVm6ZaEmA$kHGPs-FBMr4Cv5pFpGp8m z7zggDY54IpD=S6Ve>l|tAZTfEIu;RipBPhf8Z9(ihgg+Zv6v|>7z7K6WK|nkZeQOg zG0t-|z@JvXZ(woSEr>EF)^3%1$)tw=wm`K`fdOS7{^T~zXDq*7LF4Bn58~&HS*7LU zT_6NQb3pw+{C#Ybo5ekz_NS0`I6|>TLoX32%$UO+AaUaq9hY(=^pYf+kyWMgbAC4l zf$M8V>9W__S`3cHC1#B-FxG%+>R1L#}hUneD#y}OPm_AND97(|DD^P}!|C{jg z?Y7odd-zs1QrairxpIwB-KUUU+{`s~g&?GDIUih;L)%+4HB>4Vf6z*6P`Jqt>5Ff0 z>V?l4gl#KO;LYOoo5i+#@lM_!uOmFSGUSqTld1Zm+pEo;QIC+jF}>Kj`<+F8PWIwp zvKgKs*RLOKrPih;Q_$TZ+CdCDn_E<=c8D2-UvQLBl>d z&yQ7iU`GqHztB|bYj`2ABpv6(PcqEG9KuZV=;6a7z~B_LRF~yk1p{si71-2XSt0s+9@{YqXG%dQK$a6L!=dI{; zXCWCL;gv*B#e0c4LN1J?I{0J^_}Q3Z4544Q*09-MWi+Pn`(ivbATi(&(A4$F?< z7iJny34-9oOat-l2ISh3Tb;MACx*%)i%H%w?KkiX@fC~4;e>^T+$J$-`Qfj~E^4b^ zjKH_ODdaN_IMH{|Nb{+ubK3a)+cw#78M^D4W|5{B8>A>EvRKa;<1wy)@nvQvvjrlO z$|9>tXK`t=6ee)ZUobWxe(Et#Qvdajy@%|D7~PiJZxshE3}$s6Q2U&4^73k$QPAo_jO}%a2;v!76UZHhveA#y zEgSzQmWH*RwO(9z_s~VKH&6&zfqQeXTPKH{tY=+Kp(-QMKo!ONy5U!y~s1Ke3oRl(9wugNmZ7EHnW ztqi;$s`e?Wj$Yl17L|c$1B$Mn~bVEd-RjCI~Q0p`|I`Fee#!IB8#* zOEXLU%kMVH*2qyI0$9Js?Om6IWqLg7VR`|5OU# z8x0M`_F4H)xO8Pm^`uo<$r>mPcQh@ZaE+@V_eXwG)zmXKdi+)-B8`*c{E6db3fN`l zi3JRmJ=fD&!t>|RHDogTojb{HYa(}rBUgk-m;&*a($h63@SH{L(p9Q007%9sVvvb+ zL4Qmac0~Nj_^s^(pdfXWZME`wSM2cXq~-%~H+-ac-C0#Pk1!x#=;9 zY6Hx7I5axJgny3M33Vxc(g+FEnl|bIqZP|J5dVnXA5l+*xKGC&E3KYh zcALa@V5G#zUfD<2ak;O~Kb|_7dwiC+_EVF;{CKQ(@O)_vr z;a8K_6;0i@Sdz)})nho*0bUbjk$+ff8eX9_$hWwjE-vm7G$lqEl}-NV9>b%9J6eKUsSAQO=ahet1pY-6O4(x>*wai zo6a)uP{tSI`uG+vh{yk^D(P~fVG2@Jm0=*x7iQ z$(b$v-n9K)g=74Jd-g}P{vbuL@Q3Q%4sZ&c8-+H;?y&?hS3HFqi(${8|#6~FRn|A~U z-3O=0@Q?VI`!s?DoV~9WbdG4_G-3MXB2CswHXnPCFB>AGl`oOf8&~Chmhe-r+aGU_ zTf|8>`0vfVmRPvAA`cAUP5ezYgT>4%5-G%}hG%4GoFH#mY#3L{CBjMcR{f-U zCa7@U=yJ%dbv9P9&qM@2-N$BdUr{Pq4hZyB0`4H;>wB~l9ue`PNZawhTX`R|vK~|c zdP4*Y+)h@7z`;$L<8vOPnr9AxLCIKrcp}(Lr62fL!Iv~c+soDCe({KKRp~1hX4@tm z;&^3ECNT%&BD|NszkPgw0IaV1nl{s+TBZI(1+Vq*$%%>QV8OLI0eG_hA5i+}w6%;<-W z+__M<+^*NnGQ3v?oh5Jl={8v~ZyHt>IpfU>_t<_?CraR2DS-Tw{*?lo!!P&}wk2;X zR(f$3x^Z3zHi*HloCxc`)-zbDzsmUx=Ut{&&W+wV&5l3Hkm(b4odaq^AUyN`M z^7B0S)e5{(3eSp`sYCAzGctxo#%t`7$wgtZ#`fqWLiz}F4le#2N7 z8-)dJQO_#B$Rx?=xR8-Lsoqsubb$|u@dVR>V)2)RId{|pMVDvjPkeo|!059f&W?Q3 z@9aQJo5uqm+b?x%%H*Ch5@6VzXcv2n3mN%XL8g4(6q`U3eLe zo-ijOX87jgVc;Dc^Kh1GbZ=q*^>(ULsH~2Ua6!z?p^6im97Nn6&4t$K+UO7dPO`kS zLjZ%(Wax<7OpM%7tYOre`g4{ zcvyq1rS;sqzsgqa7O4}9c^KbhB*IKYjfEd`8Bbe#602{7O<6ymJ1FMI#m8viCaJ!e zn!qg!f02i@VwXf3y`!>MH8Ya4AD+5KX|ivHtPhaO+fB^Te+hs6<$vj^{j2R4*iQ_m z7ovUrcN&Q0bjV0_bQJh0o$u) zjSl6CZaq_0#uED2X~NqYmQNZe1*{4O)H6T$Nly8qH{)!s8uO35@_4A88ZJVOMr44^ zY_M3v;oTSd0L{tpfxQZ+dE5g|F5U7l{8j7sefB>sAmWklCkjoiQIadfu^k(PTcFaJu07Mq1z4_}JdjabvzGpvq7T<0%M0lOH-duHXmC zr)79D8lsTM;ru{nl&mVXBNZl7g(YToS(U_^`}Raxf=^Nno0ce@ z?tI*l`5mkgUjz_D>f)k#Ah$4gz`JLA&%kuil%Af>YuWVZg%t=P(yczy($4@eIS8Df z5e0-en*|47HjNOx1)9c7-FmMrZQa~KmY5u4VAr7sduDutd)OIV3q&BX%*tY6XsuLJ z=*v%Gp$kt*AvWY1O<6xyUwtp=y`edrMblxY%A%C2c&|`{lAZ7XWw4enlXr8C+{&En zX2`{1md`8Y+uR!QmwSW!xkgWb)>znQwa9sbK4uqx56z3n%0q}*?io{M678zdpP-}? zD&Hl)O$H#&pRfydY^F>CdVL?j-0f>__Kn)@^(uE^x4F7}Q1rf5)6#02HVmo?-d1Fk zl9I9mjicbkVNc7~Rw2R#wbbCj`XpimvL`=(JaC~Z{vG7dYkA1qw_snfM>5${^1ZCR z-6pfc>iE=>%E_E54M!J~JyJVWzX2O(vRJVnVs|mM>u_gvALNb_FQRlX1<=`vO{fn- zvB++PYThYTA@v5_zgEOHq3(V_=$J~K!u#>=5*xH?U^z`b8oFS~oNWsyMcR$n4ioFl zeEN>_dd0iM$j@)AuVGoNVx!7vJx0Pz^ zZfXC6zDpROIy5yjXxIPS{T{flg;0w5lx0cFr|;s~*_b{tSm0@X9|`+)yuO|u#!xNC zzoFu)H(`q`g?&n0KXM`@u~g)eBb$+&R|B3JurtFIyP* zf6|_e2BFkz?D}NSbJX~wwxb(XHrn8+i$-Jd3$-qwrhDBc2qHJ;Q-3eany~^ zpV++9e#RRdXCJJ>RRg(4rOt9El*N47GkO6owcLc=_)IwH>=%RxN_bGqQ|5sPp4nA; zGg{1Xlexk(ryY|Ly(Eb#0!Ih)&ZVA{4^s|GCXzFp{7R6^qiodK2of-ZlGW%p14Ynr z49qEpG)(Fp+fes~LmjgNL(D&d4me5bC3SS<%5R=`uy%0rBW0Zu=5U9iR zCI0>`(WXOqLuwqpx)I)L3gtdpiZ9odgjTxa!D=+sayzO0409$4;!5 z>sO%Z-Rp^k^?wRG|TPWUXB~+P|x>ugA1=p|V4Vjl(u9WlJ@w~&6;2+WQNb7Wxdg7a2 zq$Zz@=`hC&R?PMM<5U^Vs3K+ zqg^c@UgWQk3~59}_EMOE+$0}T*wA5sj&1b0WlE)2D3P{2BQLkz-YT@G_HXCAYydo9 zS0D!Np&sRq_R%)Act(N&>Zu^txjGv=sHt|F79Swcg`OF#K=xJ;d* z{~3j;goS0jA)gmEmgyJ-NyqS7z7GU**ktmZdt$}q?tg*m41!9axt(G0?}=S^ZcwY? z-LXs+`A7M)cPx6VQQ9ynjey=JFlY>?Ly^^9hl$33P-tleR^F@@_pk5Qot>Sd4|^Tt z$i7SC5FqTC!Y42y-c;81Jtm}UJ?Fb$8WSD8u1^I!QH1!UPe!2@)fjo=z(l=d3`Ycu1z zwM{xP)TeWvxVuKYLRQ)0`>h>|iX#?}hxzd{VQ9>K_vo7>UYNwhqB`=#tA9h>qW@L0amGwyygFWf?W3!vjmdvTJR% z1ACfU#-k_pu3!D!RaI5hTwJPT(G_QMPt;8DI5cwgnWsotXR21}LL3e_;)Y1{Kov0= ztvu+wrSw=vwvXle?UHXy;cH)3Ba=qC%FQupuT=E$ah0sSs$vLUzU|$9uAJe9j$`Dt zqf3pQ#@&UMzt9p*S?(vJ!Q#bzQ*zhsb!EqWzvC@ZquA~E3+d$i~zyhqDy$Hy$q&i*t!E*k%=Wt^9wM9YWHR@04tB?OJ*-$&}K8cU5p1z~&c&gM%GQ^$r?xK$!EN zlJ--w@IUV33ct<3-5y)8SH#Uz$jWc2>IWl;1{eh&4-OAsjIe!Feu^O<@Bx-=)~xq& z(I;vPq=@^2EIg4^hP^fdYV7tOND=5}9RB-i%;ro5PPWe3wRJ+o*7q+waWE_MoKSjw zJcY$6@@dhJd12l)4)Z7~O9`^Uh^|rKJTc$cOY;_(MU#`nDNhYtryXbRK7z(@e zR2-WX$)dp&>2&gT_{F6WWE!Be_z{qj3XtTSsXzU`0OKJGj0{V#F6wh)Gx-stxeldf z(i|7{|2XN}#+z{C68$3-p3NvdG=vgvSV9@}u?CRqVTVnpsRX7neLn7e%i->jznn7> z0-&GBclDfKD!EPLk4OYK&zeLj1525bnv0jayP0rm1GyT`mm~LQ{DNmcOiDO3Za1Ki` ze?z{LX)2&l=nL&;zM~dY3#fbukQ~~$m@-x6#2#4aODZ5czUXggXuu*|+VSyHSG$Sx z3wx8@Cx&>1g-Pj^UoRHIvr>A+S5pdy_9t1QT547;{CBS#eLlQ5NI{pk$T(pgCr~7i zVA?NHK9&<2ia;7ujz<$4SaO292eA+8u5AGH1I=-|G_ud>^L?ypo@c1hEe;V{(SsxK zEl3!sD$%`~Z_tXv>An@se97T|J~h@i@QdTLef(b$V)@|t78-}Yh-f`2(RZRh8SNoX zpH*F1w9jfHz8|)SJzp-Jwd|YXe!lp4r|~%TTJn1V$p+IhULBTg{w+XbZ+d49M1(K( zexeg6RCHovia;UuLy3N3a|CTL=J&DEICqS7d)F<0w0-?|<_zmLwL8x$=32HN^Xywv zRejFVj2pzc6spBbA)^cCbA!ikCRD&f*$^~rqyFKw@mZp)yFkggthm+#}#C&*xTNLlx7Q=4;IFxK1;);FCuRg(PPh}U4t)Uyfl zEWaZd1pFV;zr^FU{PxQf^L6t|{7cwCy2JmddPDMgtR+)q(KBy`#rQ|fKcc&V9mL$? z5&Mrv+ZxGK6p_X~>*25RQSleH2>%)jOG&nV{Xud4(+|oz*$?cOr_2pcSOqJX{~OmU z4(dJVN%&++j`K4n)(9i1p0}7ilCcTX@ZF_0j(iziozZYGCKhNzb|!(pUaC&j_}bB$ z>+1l*t9^(vy!o;Ubk5$xl?Sr#4S#Fw$R=hk(je|D#ygVn@xIoevmDbYk>=2e=$L1& zK=GXK)U2ae_c7MUT#45xZ*gbht}#^(j~y#wFjXjf%To+YJw7j#*==MWup)J7zqwKP zSKKv+Ys}R4GkZdGyBCd6E4#IP#P8cZoj>RFsMRhsZhFO|1#iCa z9RTj}_bS8{w>308X(eXPInMBAr+)CK*5&I1fF)<|TDBfD2u-vt+-<<(BTx_qIBFUs zK&y=A-@f`4PR7vtbK>mZrSS^qDBXB&AVU1W{?$-if_8nQ0iwBVIS>0pDIkuTmWSNb zC)O7Jf1)R@NaP_`2V?ttFIC$g3L{Kk-{SokB<7b1VoVw1jQ_A}1&qXV zHM{J83!i2==M4$#Gukn2^dMA^NKdEI^k0gO6i9z%D%3x1QbIcX!*EUcKGbij^Qlc%@$^6?aRZ9z&0YBY*YV1GpCRgP51BlJ~T5>;O0TOs) z-)=D2$yoW>rlWIg=zcoouTNBe?=!*i>XX#d2;C?g-_0~7fql{xXaC*)w7ALPw*nj$ zLl)(mAmr8h2f;Ih4s{>4(?R4n|)$}-Wr6naL z>Kf|mE<=hLJ4Ost`fe?O5jD6B_*FbaQcho!=)5T0HpJ zgX~#v3311bib2a>P^R3c$U3ylXSu`cdF7ZS7wm07n+vgFY&bpQh%M=D!l( z+V?d}`Tm}H0thJh5tX*mv|^kt5)}6Cfr_yN| z7#78{X=5Y^9X34qS|9&ybbRCQ+E`xIO;z0o!lmy^0HqsaZ!TMU(U6g)8-Sv40Ww1V zeJPyC4I%JbIv6JL;@)R7z4z0OTZp5=Fh^T2(@^m%&tS2Q(7OlTE|xD!V3K76SV9^W z+VD3o`nzuF{1|LY&$m=CX}{)|45}KsLthbAk%fU)l>+AWc>Dh6>_M+5{2thQ?B$;x z9}l^Y70DC#po+vbCy#%S-b6;|3@o#lmoV1bJBE^xs(z8rIne6UAUyAL(YyB}ATh3h zGlX!wk8fh#`fYd}>A8wkz3~k$Z#Rehu|Xe^mx=cqWP?b|81+%|$hR(Yx%l>(&6o{u z0gIch@oFVfHB|mSBg3*4T02D6=hTp)^vV0q9|BCR0c*Ai98*&}X!fn_=(v)8zJ_Sr zZ2200{+=S`(L*^hifF@`4Uc(;tXpk+13VqCR15S2=RChzhsgiw(!#P(@UQ%A&o$Fc zZ*KlHNh*Jw0h_+kaMPF2arOsS;Eexm9h)7zCJph*)yhI`I(dztDHOGWJs z3=FYrK+H)X5aCXm<2)?;St_KG%vw*;NyFQPE2rtQl<2SM|NHCjD5^8!b>Fmk%$MDj zx?x}?nKU}@1l}RuyIfq({E>6(%j~bl)|Ye`7P>J4j($zMuEQw@og}6SV^Dp6s3+j3 zu8f0tj67^5_M0C#&v;L7>ae!#5SV%pk@Jp**n4ptxvc#>zmtD4$LHd3HV;Z*HE{N| zM;3Q_k9r`j_hJFwNG#AXN?GY(D9E*|Uyd~&9fSLFrRLJlR;bk*MKeQh6$b9@XaK;m_uMz3q#yc z>bAu8x1MSg{=EHWDZx@$l;%`^SehZS6OA5--P1S|4d1- zq<)Nk1Coh;kTwb==Rffvm`$6Ejg9>Ro5}@n1HLM&qbxjpeX5!oka#iCM(czr&VfbzxkK9TYWHmtL}6mY$4>==(hV{V!vh@P;n0V8k-{G)V-f7$4x7E0OU&bGBmEU6LCa+2Ftjm_TC`|f@@MR%_3!^iil1Hs&82yEQ%cuS%iuXnmJ zpy-RQr8qrDWWyXXFEI@i7|Yl*6a`r`(lzRbN)QG@_>n^gMOXO|Q9}&QTxk(6sW}qq z(`s9m^Wvxr8nYkP)E(wD4v__J_KC(#VBW{hl@8Ql+RXmuV3!2$tYHLvUuLPPsjoy# zBThCg<&g9D?t@h-EZy0sL5-)lOsJ+l_fD?5$mucdK4DtRFg5;(<_tO^wn(_^GnJL+ z;njHU9qG3+gebBI(+x!kKbR+6313c_+tp5FroW}T^eaErD;Ao|l{tLvJG0UKwE4GZ zto-x8&sE-gQVE?V4sq5{6R4g=2jJ#9tmw1*w|2>oZshV>!L}@bryL8T}`W zdaKNg=PdS{3Ag(utLno?fh z+zi>8zO&CgvD9QVBG_|7D7;aK$iuN%N zFq(OHSeyaqd*V62hEq20z9|#sY^tvL8gsiU`qycR^jLvxmo z@K%T>yyG&RJo?Huf*$Sldc!B@xh2P~C+?L*D%wY{;k6>tH8ssOvX*n+Gseb4|& zDfLf42N zkU5sP2BmoD{Kt{RxGp2Ix;W_kLSL#P z1XB!y{zueqLwZ`qfY8v3G;w}iUP7m!?2g7s35;(Oi*)^6_=xo~MJsDGF(devBykMq4%>Ng=DxlKAY`F;q8jx zjS2w?y4xqnJ9se~_v_~V>piZ$7eCHQi7>lmAGoZuBlDj*&eWKE0Z$6*%}qlY8*wBf z#Ibw_qB7_-MTOq;c%8M@|FoO#w9-~u32T*h_-J;KJ2tQj+tgGgC8*Ce_uwX*6`xmR zqSQ|)DJSb)^~_CH_*7ma`9;_V)dxBZpK>pN!eSJvS455nMn{t3db_%-xb8`L-+ z-5>rfxQ;@tj|})Qoj+pg-7MdRbFCd-a#>HCo0}^Iec0?ux&{qR->q_usxHR0(mdSq zTd~{ASB88P*Mp6E8gasb{adUYlWvd=I)Kz(cy8rR-rnSd72Zi34=ZTt=p`V znPNBU`8`#H!w+wSkKm)-y4fP;?u@ba!GwnfFWg8#Oa1_4Mj?_Q6h_{B|Fp41R7}iA z83tArFch;@E$~*QQiO*`W;L|q+y@Bw9SCrwI9OOBq$Wj6fc2$#Xm5`xQ6)A0fbAXG zK`GJm2r5&S3gY|rxI*O2@m?{}+X@@(2akbMF)c(Y*p=8g-hCqG5jiXAml~vay-) zYyRnrpK83T!U!NSKiIgW-j(8e`lT5n7Y!H|ruQ3-PcZj|lvIr}<#qbgrLyzfUvLZZ zd-As_1bMO*%?kH;!-pG?rphFCwE2df7$N@o zbUA`Zq?nCGn+?(Ult#t%O<|re;@LQ7f6HL{JvzNIqDoPBtZu#JduP~M!kAfT$}i1( zQ2LgRIR2`Q?NEVm&<7ekzxTTzU5;NKa`W<Sc#vm=1*J~Uykin^60rb=0lo73M#kFZ4| z%K}IjRP7n@C|}D@#<3G!*`9W}L=nsk(%4gb(fZ*ON3fb!H$;`2m#cj8z3tAlVE(y+ zkystwp7(-w_+~9f42?({N^-{~oA&=|0YvgdH2-qSqEa$tRxd&+sQjW!!VVQ^`mLEn zUlKdEQJ88Y<4oRWSQ#5(rjN>LBbid2o($X3a4R6MPDVpAYk_dME>V0a(Q?LvL}qME zsr~mY(9FRVX@Vr5iFqGcK0Uwzl7TOvhcd+;=F-Ydw!gf8o@`%^V=n9Vf;;Zd+SKdO z_eI(4RoU3AUPqx1x-uEcV=H)_^|*585p)i*IQdHU^~ zeE-9r)s>a^$#jUl0odvgZY781z4e>(l!5%iqRJyRcHNP2H2oyA|&*Yp{f0DjM zBhm+0%v08$;Uz4t^-ivLN^+e2XkFf2C7+J6!ioE2_6ek$TYlKZy(d+0Qj1nNm%!0a z>@vsNx-cm-nlUZoTRgP4!R3|d3s@v@LK*C>4E#;nz>q1mQ0xjV32);Zd~#;$v6MDJwl=>uJf_)|io?9!U=%KzhOzq81qK1d}{9E4=)XAJ{G3|;hu^6v$ ze)#dqq;NABK0J>35}wQ>OKrA4KE$U@dcRm$_cjfSt1`PIS5zN_T%THmS$^J{5Qd4n zEo-TFc3St5wA9j%^Pm&lf2-CO?3<>KOkzpA zFi4{}QaV`+xMtYbXfdCo(G#9ze`oS^!eFYIU?r4ayPDy4|FQG>jX(Prh5h)P7iWiS zb2H|~w0?S?!ri^`1k}fs3MJ!@6rS(}VVP{o%#4~c{^g|37WWSQPe{d<_&BU*qQjhJ z;oC2&@OXF9o`+W zm@hC{0OlDa4Z+{?LZ;v0G+gU_s?r^Uyl)kg`5Ys23x6G^Ik;JtA5->E@>MuZ#97aL zp`6U!Fqkozb#*_XW#@#l%mT;XBG2blWyMvzcT2uSN+~LNa-njqFO&1o`AIGo;RIR! z&-r1%<4`d8;&>PmaUVAx-P6DEW@~w8r{fQ>+kabrmgkLZD%IYV=o=zBcDMWUv>cob zHBVscUN0*vllbB8Q=a_GXVf9f;@=m5{e5quC;t<($7oMI)vH^?*HxltWO^ziF;m?@lKr{tui-zK4+)Z^c!X``$b+!nY4<{zrB9cl!KiCizeei*5 zILc3+(cu$Ep{Q-;yMNTL)oN~i*QF^<>kFDm{Fi!I9LZQ7=7!<)?1zj!v-MK;?>qf+ z_5~!k=YGo8?6g-|T_*oz9^0~mA?D)|h-jX$D_F@U4vJK4-kaV;u*r43xD`T9&q*{Q zCHqQxHFpyg>14qAy)3b4)gDvc+LG8nEJK9pt1$n=*O1yxz%ijd>g}S0~~?D69@q^(@tkCDJ|6KxX{5U4W*`br$i zevWG_=EI_`I7X>%F=i`P>D>L16*q&#I`O!*vo|+qoY1z9K-XP-e7Q{(am46MF&xDE zbI7QS4?ENAQqRm=K48$?$)0RHZ`wUbSYvhP!QiHCS zs4dKIcYb7#7L(6mpS6>A{+uZpA+>k0Ru^|4qnaW{)R}Oa;g}H%aD{moh~EJ0U}@mG z#1&dSuLU-elVc!zTLa)}n?|nw=}%Vw;L&gJVB38He?X2$JNpc5z59$7Bs4>3j` z9~b}raqwJOzfdvtwzwaKL%RlYDWA8-Fhj0vkG|^q-v`I0FwwbyzDSo7^c1 zv5K^t!+Dev5S^R!gz@`s*eC3yWyxHrC-O2(X->Zm4ZX(-Qd?=?y=sx=VK-~A4~*X6 z5EV7N7wRd^<>t2T)N}tPALoWf;E_FwIaizT)6ngD|D$iduKR0kp+4!95_lMT_5E~N zs0a2UG2CrWF=a2{E^TVAt3!Qg`8d5)_goG%g-85=*T;k6;T~Dr8-ZFXjBOJ=lm2M@ zv^6pEt(Fzv>!88%dw-hkI?`~D__R9P*3wBp&cXp^O{Yb;-0iptyP~93nJ~v$i6d0) zB40>p^+~x3X=fdLQ5<7qWi>}BlJ0ZR@aF80@{!7}NktTun10#DFTRg|3Qqq5@#(?9 z?Cd{G4HLa`x$I^7P1UpCVFBKT)XF9}c7B5|E&hzO7xNm(7dffP9}R{Tb`#aXXn4VE zc-PbM^{;{|mCy(W0|NtRHMO{}HKK+c>PN#xkm6JQsP*sXf8e`GQaXD+i0y^^P@?+% zUJZ_Mr2XlgpIa3~Yr?+rDP2B81rf4$IWTW17$gY^7X>nUL^D#Q6Z(JmS@hi1p1Q5o z$5WrZEOAaD81yUS#;F_Ckatfk2?Degb6#+!?;80Hg_Vk+(*6rX#x0DQ6k2{lpSo#8 zA~%jciqbtZyhT0}3zaSm(h!Syn{uT0V(nnj{v!@ku*tQT5R}ospZ^m!E|(W>LjEKyaoZ|n*gpKzec|4wgl;abAe8`Y7kqLt461dsVFNy zC&I@M_zN<^=bbcc`c>mE3dq#G;(hdjV_<6g!(n#A&rO%L&pgXGLty&rz~@cA)2d_-F{QvaFZ#TOOu&}btk@|)dL z^u0r5f~LfRXbm&QHa|^0)}lM;A6K?D%gO#`XyyxZ+R5X#!x<~keUK+bdrzmgHWs3b zFM51>2q)t)*Ye$r()`;dfTla=;2BTh%-N|vGyT@uJ_xM!dkQi#r7aDa760(ZTz@nt zY98|=dQW@Wj#nlKEIlVRRPuKTI|sSGJpYMU~pG>ZZj#-0kqo8WKi{k?fbfr=IIL zeb9WO_-}2L@c@wJiMf-TYn^923G#|5GVk7PZb0Xt%><$lu^|G5z=-a$m^7l0`#_tv zFumbT7{1=3dK31~5|)>2U3lW@^GM6$tvp(Fa6C<}!M@-CPEvPc$x!NVCE#m}fW`&s z#`beeo_l}$bOnqHVW4LQIrH^HDK#B`tsz@KPft%Z0RDdkbo-*rBDF&#X`NhyCX+RuhLibh^mZ-s#z-aRtAb`em)|sF;AOC-*R*TchzUTX{~7j@zMUs>$YX zVQw{dJ>7<%B;lO6Ce{IqleAKIjBVLJdSna?)x}2)SWL-Uew?Y{`9rqHYd^9jG@--R zKf^N=p>?_#?+YGrXJ9qr-2y{(AE3htq?sBJ2Bn9EVkQbJ@-3;wApHpteei|d%*n={#Kdin)PMfYU-RZM+2Nh=%|xVoR{7h`=^LC?nb;(j4H6Zd*p)fUr( zKPJVEEnlC?E}6*8gMqy2c60L{U0p)TZT_p{Id2P1rZxMp3G696{scSyx9@#IV&6Vr zF7G^!m)v?A;nP@3%SGlE9sDM8 zwwkM$qM7eRcp#v1hA!+EcOp{g5yGNDRP;-}3!%8K-?4TNr{tzK%Oreh5s|*X>!P!u zM?YHr^ZGe^v)%GQN+3t`uDIRXw+w!V?-sbn^@BuBO-*I(?5af?_kNG}kcm~hd7S(v z{j2e3&0d#LBRW4`i+KBY#QkWbBA6P`!xHP7KN{B$yY(2U9269^nh!_hV?dPv&#vCt zk1u3o1=X54DknGP-r+&?lxPpZ3uYPkRIYGnEhZ;`a1c{KyX^jJ8k9OdXdFzTnL8Q5 zG#alNMq^so_c%LcDrbVyV?-z-o~*Uz-HkbZaF1k{_uCTEX$V$ue3!NapJ~<`P#7HB zrWc5OP>3>x-!EomI$O7>`wjZnnw5rZlpeccY_38VK96mAUwC+a%K(6ulp51onhLIg zg$1=d2ALa?Np&Bg{;f8b6O6nM60^{(smM2qbCp~ADSaYa&YIlA?@wqI!L;JC60I6R ztkGTEhk|N6m$B13;y<&VV#l-FTjT&!Bo!douq2w&yn;#k0gC- zb&aopO;{QvQQnDK8aM{;fvYJ{5Z6wPjkT+vFE?kGnGbtd;c@^FDys8^aE_oFE6oXnNJ} z*~10v_}iqWomKjK72mKeGjP!S(9HbaiOIQsBDjU)N}I`7Vc#5l&%X?Nx%ljX40RFi z7JtS4XpLpjyO_8Ohk;I%NEX}SW)Fker4dC$JL$;nHH+ zDgIrwv}at(mOh#bMK+=aI3d>|NZ;$85BgQ>8_hgzF{Mc0F#t9u9rI}D{J1{@m}C6 zTYw8*#76|}r;-(&nQu|x;x-9dCv%}uQOEhe8SXhmyY>*F;|nJ*=F#&-ij$43fKf32 z*H5Ih;pD*%Q*mb0Z)^!(DNsk|(fRYm?0pfW!}aiaFQBgBM==tufHM5YTQ*Fl=kWvG zx0}NvSScHmy0`|l@9QUg9Ep#aLSBN1Qtdn9Xv-9|4~Owg6V$;C9m#QOWuFn|@+Xcv zvzN4)*7nY5o<}sEZ)q3#NwaL1gd|_H6P3OYLbxm=KgpYsln{bJ;XaT7&{WmbOteP4 z{$bm>SylZ5ilg&DIzQJkfVQ2P_}%DzHJCdFGK4;#RCQDK5A^nm*%YYvW$}0*{0<>8 zeR9@%zHRU7^?YsT8%8{niDK1*+n+RDWu%GvWoQQl8gi8hNUERl3+fdbdcL7Td7_K@ zjH&Q7HzL`Fx)&u(l>PD20qqLvSJDSU))QqZ(jobCln1_>3$z_r0ZCzXUOx<}wg1`I z8(Z0T)w@G3kCowhH02Do50uVQ-X(aS5g`$gk+W9=Ttj0Y9=-HaW7lztQ1uOm2TK%z zifRmazxCoF?=ce-sA|1UH>k5HJYo_W zR#g^n6AE#fLQ1tFBcl>b**0}0Un4oC0u|#C=NOAI6xsK?+FXNm} zl9>`o63ulySzX44%VPe~FSuKKq$j?doQdCcYsAY=pm!PEtBLO$Oiy30!CNyR?cIt9 zB(>xTzGagywoh|zYDiPRhybR$=M;20jwtzTe0Eeq?e)GCe-;c&H-8AWf_h`X0>}VE z#Y2|S7`5HB8Sejsvq5}WEW9a04Wf#dpXR*69i_r>4peA-(#40-vxGisxS}cex60au zJz_XV8Sbf9Y(YpXZk%!Em{R{}y`hmzo1iORq?-<)sS*0Jo01w|J(J6%P0xF)f$3J; zhEo3``_a10_bCIfdi3j!iqG2)PEPd}_TxX>EX@xSZf?6QNGl`&h(Nw1D+>A-bmtJg zuQ}P+EaMpmOM?bcQAcOxp*5NY3~U(^4#Um<+X=`!<)3Cty1c2;%qS*e1d=ysER8(& z0vWU@j7FtJ)`)qa)W;*BK^Ldo8a1xtlmz*a=E zH3y@Keix?SMTJq2o3d-A6aMQ^ju zf%@171K7)-fEF{X^lRC0T_i$?OGr?`{3^K>00z(QU237ut4~M2i?yh1IeHG&L>yG? z$!Y4P9w-Z-w;u-*Y6cZT<5&^_mF0Z`i30*>gxt$)u+3|8)4h5*v#+IZ~|$B7^j*k4NE2{~k>bK-?Cp%5uB>!NID8?EgamqJ3?CO~Ndc961OUcxbpQ`@UX z|IFS)!m!FlPfs5XoXM3*kaqrbYDqpMiEvh)g??|fkB`r<{ey$Hx|$5bRltIBeq20DMO?i?}UIE z)aSQ@WQBCGY`2R^nr;{i&(9}VcA#Ci#$>%U7Or>et%oPS3_oMaXjG5ZdOl%q=r#UZ z&;32S*hwt50Fke8t&bvJ-ibxJ|HZVF$~kTnN&yl^diohJ@V}B0FS+_fPC!n6z5)Gp za>Z0`=4O@xE|5^=0yNyyjF=c-3bvv!{-Vbm;PlzFq~BE0Djp&Nx&p$FQ=S{ZOzN~6 zD(ur-)J}zvc3QbnJH8-?KZHXsNfjR6rN-^*x&Q3mB5t*e4IY^|w^y`682@T$MLI?` ziIL|>`eWS3wvj09R0h2#shi${p zwB||6vKAErRl0%CMo6r#EN&#f=k0Ie6M@rU*UI_3G;|xlUePBCVQcrqD%*2M_3k48 ztp75MBO{?Hms7Ix-QL$MdOq;il^}UMk=TYC6kOxZo{c zs21AnJ8*afAAvuZx0Zqo1HQ~h8`a>SI;G98d&?>r-Oh;U5*1q-=#$U+X_EPUa|1=Q z*60Ku6ZhT<1hq@7o?>jK_RT1HBo2Q7%o!4{F27DbhSvSrVw_T9g-*@#3~0$^M4YG& z(!p&W@nN{uY%~UF0LAKD-v48&D)Gi*H{?T>f_UsGH%;fil>({VDId_N6sU$^AgXy^ z$W48;{&k@5su;x^7J=turpRw3YMdtxoXt>hu073Kr3=6bekc#yBldW4dD%P0;N1Eeq;1HX5=y2#s@WuZh$z{~o<-Dzs}#%O@VHL@;zh~{fGT%kcE zR2pOfiB;6Q+&W#c*RLZP2k&kqOuy|Z9uPKeRrE8W>FRvc0~K{`Yf({%NuV`zj4B1Y za9Ix9(kZk$BwU+8squOi3@EvEDXp#3ivThn2Up9fs+-%{)Fbh6!8^Xd?X-q@joUe3 zJO(F;9@yTNXE2F;E{k%Iv)a*$RO ze@Zwmi0wN+KW_*G&r%@h$f$vknZf>LRedLq#Y8eFpDmG7Q2hS<@#C%xp1-)liZ?of zyE|MU_~K&aY7+!CTSB3nQi7HP&h4A&<;C#3SgLrR`zZvNM$REb`>JnoZ0~5RF5;iO zW^Suc@s{RySWDyYf5)_J(e4@cPK^8PD^j4I{{Uq`P~u!U&zVGI-1RsG@@|Z)|KOEn}B)- z8Mp!nO0uxAvlkQ`RP>Q)Mf@d20y*A$K!6Md&zP9kyt)mkojrUfg!h$K-X3VMES~4YFotc&rl<+&P-OWa}amP4J%D{@PB+S2cb09u^mV z>YX)YdVkDk zKDR^DAV5Gaf;6?OxvOL)ZQ=B282sHw9w)nr0LHa3lBGJ{#Cm?$6{Jk&?4d6gX+|)}T^)`iESh<(rowL>p#+Fx_u~e=yMo z7!2>mSwTO~$=rNh0OY^DCWt*tgkg7Y5E2j=9)UF4mG||r_e)6Xj1unrbj4<2PwoLx zB<(98H7-K4Vh3D0I@QEO>eyE&J{Y(_`Wyi&^s=CkOZLhA*s4;Ncl>`eMlX@GtY!uqMv<jx>cbk zpUBFrUu&uo3NPXq+HOOH$;;Xt8cuc2v7`1j_zFk;O^(A4ztsLOEWk!zlhf9E;<|8! z3oGc|bASSRsgjM2&GFZ-UmYt})&8k8qH~*%t@aQq3EK8H*#7;!E}_*}x*s`g=zZxp zdbl=<2Tmx}jm`twgChh;;A*r1Fstda56-ipHg~e29$o<~nvE97Qe9UU3yj7zb*CXh zutBHZ#KGav05rryp%=qO`RIixHM=}#TM;kYw;6jOr_JO$v=n}cum3Sd8{&s3yYf-+;%+m%~tg7WjMCu8d}8Q#n${0w(|AT)8eu(<9yqa z^Qmw>MjR{;Vo`7!hYb2t@@D}9Yj)x5S8=<|O{bMS^*O^}(NfryvwG_4+>eBWE@RWI znu;$H^{g4keSoetjaVUj4w0rwy2iqjB8eHuY&bcbpvluqCPE z*~-$(kO!_p9b%&%j(gN!qj<~Z z2kYbzFYx|Z|H%5%0z%aaptD0}=s2$N2HqA9qXeee)(H)9+$Zj5OjDXY!GlAUZC7q@ zfd%3Pg}~E{q$KA>P%StJK+Iv$^xW6GulJ9pa3w?W|7ff&YL<>Yflcz2#L$I?pNs3j zoa1n>NWHAt<562tQIQCo-ZeKRk;_{7Qdw7icH&fFX2k-f%#7HS6j!srr>eZ}s5f`` zU$E#l1R$OTN+6{vqhdNWnFucj$IcvtA#1lT@v#R&IKC(}l2}GYbBVxi-v(Z8#rNJ` zGYU#dE5BMp_pLe_n6e)ppp29-WFJl=;Tw&Y2qD3BQwc3`;r(yq)+j}_ zjNe79M$!dAZz><=T$OL$z7_uKp|q~ZAvwSQ@#DuqNHj-Y-GL!G*5fVFJxUJ!@YHG( zx)%wxPH;9qmX)!xaB_~2;FMluuZ9fosK22d|8Vj6Oo)-OdvRwc^Rl0#4do$$xnqaA zl3kx2EKCsePrPg4#Q%W(*tGRfBH@Muixk)E;H{|m-NcIgnd4?LdMCHlZ-SWxHVS{da5KCG@1Z3aX4wjV#A1l{$hKtUu23*eee*#IY=s5~? zxk5-K_%=jh;o?SOVPn4sdn9^|H|hZ7jPErLV}1BAoC$mu{uVYg@C9Uo9ohvj4ub9g zIBpjxGnw5{nVCivAdVXe&rpw`{bkJg<}L|IA<5ml1txFaItc^GprbjrB*ywb+3~tqG^!L*Mg~Tn2y|@hcNx*qyE;(rr@~}Jk4F8O z>L4eQdLbFFmA;s+?zAtQ3hak0Ec=PS-PdE9NN#tK#JUG-wLC~;{l}(_d+_^J*kCgP} z2w= z;e%%mM6qqlpDZ3ycqvFoNPHSf;bJs~L9}n=E4kSgkn>z#Sz3BoBec1Cxqg=VpOu6V zOn%)3!Qkc;tf`Q;ThFaBQ(x@EP4M{yXoLzdX0kPNaXFcVp64VaCnRId_p>xNZy^P7 zJU}~BO=3Y|LB9E5|7x0w@rZQuNBsbD z{D4l{8vUEUVO@`A9tFvK=;`g$`=3Y`blG~D+a)C>1#eQA8_9Knr-FjNSu`OfqhWRd zDYRvI$jnAJZ)q6hARdX&_km}CkQZDGzi!;+`5>2zcHeFc7BX|l?DkYVi$6VB2?wqO z359@DSrOEQMq6`j*BUgPVo_;jsnLaWu=phabv7 znd29gZW`$N0qw-%A@vZ?XF&MB=?2Y!Peo+}lYPc(NDl8vXrE$WJNz`5epwVA7B&!S zYG!9=rwqfpFR&AhAkt4_(A1Y2ui;9RT78W4`GY0vTZq_w^xvu=KklrPj#m2!?Y#`J zcFeWC$ysm6VosnT@e8JNLyc(8>lN7{g5?Ia#9d z5FBo#cM0#`FTb={a~AIJ>mz_J&UDn(^>Q1`Xq)mtipc^7l~wQ^(14N53zj99TIhSF zh*xbhHJT(WC=@cVMvE><>$rhFdicuj6GFRW3%aj+*7!X>1Dtke22^2!0}vZC{*CJz zG|DegbTMcMpg&{*RrdrAHg?(HjSW34I#B>gJ_kD!b@;b2ga#M*(FXSVO9=8d%F4=y z%E~|PiHNkNLsRFSh=@o@d|uk~0&b*aXz-{(TU_x&sqVgeph@vlWputESgPKt?Xhvp z+n;V;n>+QB&bopawd3|Ocq{5`NI^!2B_A@#3}~`wvW|+U(Px8vrfarcUj;R=*sW8} zYgA(T_DQzyh8w2Dsk~-JDWksmEwcZg-y-7IaL-Dm&N41_r_7_N2up85bAcC-#fksu zB1dc4cuX7fwv^&HWf229)6{wfnpsFGZQ=H9!+#|=fdad2HG%&gPolVJj9ML92?ml} zebb5<*+NNAvAd5}3)}{Tw>vo8-Y7~(OOACps*u9XNiQ86kOiNg7+l!6K*-(Ml^_AYkzviXPrLDn&dsu~4n&`GIGT)yo$kDvGciTFq zeD>%9+g4+7>=$KmYNh!6tDBXjL`wZ52q}9XO%~k#X$>$b?1EHK%EhJO$t8%cg|Zto zg&?Eh{*+Kmcy|yTTv3pX*oFV|O(IEUIZL5cNGZX*#;MELPt4ns)Ua00gO7-}yKFMt z?D|ZBkU)Hsi%Hp~rlc?%x-Joe<{hng^X^xyh3sI;k>^)^Wk!EY@?m2#p{Vy7IB(tV zaQ1~a?`Qb06j3Dj-<#(pPNTc`ozTv0#&I$fgODL)Vd2$d@FuGQ5Y|n$ZIH!}gMl&p z`~m{p0_gLQ0D)-%vgA=O;gfmfKPxMR#%aNV4m>!4J|uK6m};6`EH(HYZ(ea_Uw7^| zP4tF4uzQ*;f{JZ8f)3&r!E~F7_;}X6()3q-c}Q3o0nF**plmK${oPpec4DsCwPtm) zGV=rq8lD3YONeASAmvGxVmX`>7;8!FrC)*^C>kNtuKae~B_JKS9ic_v{sCEYCLgi; z;J*du)ah`QvwI4K_~W+&q*b889uW;hU+o7B4Ccnfc3>c-y0>0hX)auRb#*npPhA3& z5@Hb8{;Ny>OK>r})QkzGne?_q%2Jm_X;nebly{a<{ zJgN!)&7(5?_ox^)|F1{oOc;ZqF*`_ndwEU6k=OMZgOrPBkXiNwNayrtM_cY!sc_$$ zwiY3@6Xg0NG8$5Y#M?jL{MFf(W|p*;n~O?wszwF*A&NfSnr=$80sFX&1~wUygCfm~n`=SwPl=T(x(m_G*TqXI|08&x zzHnk9zo$4@(Dr6aI{&vN-CZ@FxZj-o@@kXZJDTWx5u(;Komwc>STxIP7dhL+;>|Px zq_hFrOAb5RpNEyHneEnvcQuAEZwLBxpp*8o2D_{C`uN;Da7jcCVpkCbGZ2w49)d(8 z$wniYilEg-$7c#a-~T~>5Ay-uhfk?+m(nP6;VX0Y3k}U6w4@YgkeeX}&0C=`@rkTa z+)H>({NenUx-;Ik@pTYVZ?D(5<;`b_x8Fx+N?F2H2UvhmQHdcj5~bfQUA5YLH)w>; zNEEnwk;(mVpC(i}P=S?rN0Vio=zbJTQz2*nze0zoXr<6UhjL~YQp~)*TM7}`h?}#& zDJj~^D&T9k%lY5+hfr~B6mRc9L{F>r6f;^V5cB{07?E3$3%^TB`{G#+w!1QkHe|i< z+~eO=&|P?uRNc>-5nt6j73jah<9?TF0GU`iVha#vgu zlZJM&4NQ|9AxnA%HUqCu(x5hal=A!!40p2OhX$Hja!&z)a0%hx4}LtLX60&aZEX|} zp@{2^*8zvLl(aN=IrQvHg=6bP!~zFR=Nu2dP2F<%f2))B_5WU-7bHj`*kofPi)ynI zB77ggdc>$OHs@-S`jpm#HZ1ei*aN zDC=rBf7gn)-1&U+8JS&Ic!m`70ZC+j)4m`IH^Dk2ot*6K6)nBS=Jc|w5Xm$q0O;`p zOghd!nGTEZ5|WYWzIoWw-%kj$3_@Vx6W??ifVgdR06MuVjIyPbl{*=r13$MH83u2hSr`k20edKkAkWs+;it5nkkyJ;4qw zLc@DO6iRr?&!`lCdUjqIPfW7KG}!A|Iy5e|v$PH?eit_GVCs|naOYl%j2F5d8MbkPa0K)?)LJ{X$k5(g6O`fqD=)Obk$$mc$@>XCwEUm8t=HB3kDq+k zClloo8};w#FOWhE_A4|sRcTd^H}QKvJ1vtKzl}cuePR~bq155?H0z^ZRk5?;A(a7$ zzk!GYhtyH`K!i0qO%OVb=G1UhQ>Qz^^v3k;GYFhn4fgiRHk>Y}b@|*zMmqh(!jrun z1fH2Q&U2w)M0%G4nrw_LEb9%h&XZswZA1+e{OvuBYb%NNXvs#)#r2u0|ANF0PL`&Y zEP7e9kdk(oFuqrh0tEuIjYI@+2Yg?VW%i zFGCl>(uQYS;8W9gsr9nB>w!okn`M&fThe#Yn1JBs}kOc|k?>M;y1wC%!xxf?C zAMG~|UA~a^k-*s~s%n2XT`TJWPK>VrUEc+jLOzhAekLa+@g74_i#6m)WzPD8$KcNrgtlM%*N|{FL>W5X-;X&pZ?|o=O0W#sf`}@D_ z)IT#LHFbq{chtc7Ju6p2iy=Ifhq;048zt@0C9^E`xU=DSV5R07bBxA=`w@>G zsIZe4zjZXe$s>3yB#OqLhPF?1Q*NloXCBCkn*O9$a6jIw#3*oQ%qD1r+J9RVDnUY94-X_$BO=8|2HdE8NIPm(Gu==>I2pk7xZ3eJ)W&ohNv(leoj+z9o<-vGp^ z#^WS1;R2e_l0PSWpFN;nSRT_c9rql+>)Y~ajxw2O(Ye;P@Vg3*fJ0dmC(k(cpTB9m zOH{ua9@DiH)$T=0h4+U;?JE_p?7C!!(T5?=jpQY*Df5|6P=lYeoe|u0Bo+2x6f|9BGHL{%E`f_2@ zRtFxY$Aw3sQ_8#khw}t>?pRxHCoU73fGWp#qw@LD4;gY0!pNOypRLMsT~(@o%+yP*U>*NgJ!m z+9E4YT-d4Qxmt7IaiGerJrnk*;}pQxw%;-EezG=FXM4FZeKzeH-ZUVEp8mUa(Ba+R zAQAV0%R@{O$Ks|cKT#+?q*oRfn~Q7n$6tp(V>5@ot@cv*dcBGHvz*^g?EY>4{O?9M zzN1`P zE*lPXlg;xzBrKFN0^ z1Gg=C*RxTKyos~)#|3iB@+9ARUe6HHe#&x{?DUi^u~5gr3_ z*S@SzRGSXdj&|UQG=sEdqXv(`D-E{(Xi1In&Snq&o0t7>pRuf9hXU!qdOOMBv(j8o z`$sJQ_>InQVf|!Jh=hgLOaGu(1A*|>sC8RtqC#3&a?djvDY zbwhUl=C4{)B#GkHdOf?(BfCl!DH$gdf! z7{%S>y=*)xH2Um)Vk;oCl2y@$@P#q(ZvdDH35qqAjU|`w0op7aP+CubdoO{nJ}Kx3 z=P&i8xjx5##p>T9$w9;2WtnbIg7J^T1_JU5tx?SvVLuyQw2JEK8XB_8_4joiZ%l;# z84_e%R{B6|wPT?#?=cXrCMlhq9iQZWIsRr;n+~mX+ez%v)T%&G zSm92a65SkPRqO2#zVn<%oXbwX)T^scb7rrbj}{51J$a+JtJXye{CkvMt@1s_nJU)=;YOJ>XU;i%Nj3i)3&8N`*R{n67FlES{s_ZDsFo4o z%WRrgzeqB}jl5=P?g(9kNLLhy>?NDdt(y;V0n)dM9;kkB&sK08y=IT80=o!Rs9Pe>Kxb7Pd8Ybnw%rz5o$ z`f4n%m+WQ6=@?4(b(vwg?t)PqgN6Of*{KQb+L)Q`prwCZRb#8p-){>sS_Pprp64Nx znO<^&JO_>uA7>{%CnW>Xq3S%!|5eXlCDX~MX2pEsS%muVpbo^U?F%=`myzgg0vDzm;TYnDHUO z!esd$oi0~$q|$AQ*kP|R|Hoo0evZ_I&WbK_wM`dxDs76Emiz+8RER=O&d#MIeOf1W zT_#R@d#R3$mM%s$BLa!DqwxxU*Y)^@%^^1=THl zZGnXfXL9GA$Fx_e6JPAAKmo63>``mQ(Ms8Gu7w$n@47L2QiJ^*KWZad1e}O2n;h|_ zy69;Ic(tBin${;Do#duYQQ4h36D%HT>`nH@1uY()#T;X`)F+-t73P^PTX6>^cMHrq zMoC={v;^aYdtV#~SFNOVk;NoLxZ_d@u)Do)R@yvw3SG7n{kvnzwDZ|9dUd?Y{|Rr4 zwC2uqbVh>Xm*4;LQe7{-RzD72(ezJvC=O1dFlX%bdQ@$N4+UH|avRD-W+Jq3NL_AM znc&C@INzn5**!?#Sn+sTFl{}C$@)>u|4ybtrtoDwWYl*wv&bDScqCsyLe@0|iA&DS z?%XzS5On6~g-X&LX}s3^zZ9%M$&hV>70KdyFKT|_X;tG)rq_|5aY1bI{gZ7yd@{R? zny@YR<1qH)-w!7S(;mLcrRDiLQ7?t1FpANOiHS}0Cp#O27=ueIg_RUZ29v!Exk`99 zxW2oC7^tB=Cja<(qnus;rSzi1(izUHv4c4F{kS(D_PK=i#^c0g6$e)sF0CSZidC*7 zqes2lP7{B6FLKaK4qOaB{kxx)nfbdzM9H(5b=TpoP1N<(C-p6haCCi7uqCj*aTN`8 z9VXJo^tDIVhtPBib^}^Dh*z0NT{I{V{3Tk7!|9Q3qH4(#`8w(O6_w)_jZdxz!x5ja z|N5aUP9Wvvos!|aHnrK6N>zS7#xayZx4r3{YnmRNh5F>fGuiQ+fB8B_=8PEJUn}E= z=v~Xv`Gq_-e@Y$b932cnX7Dyds#1Z(G8p%BJFDW4VV+O8cPoYWPF!n1>O8@ocQU=N z>fc*u&+Gbuv%D0)-Ke$=#lbSXBSOP88n1J=y;J`$^>f~nwasT&OW!Umwv-AMTiV*3 zF4L75)Hep#7Z=!D4oCc{yiWcsfwpZ+oKVAdJy^}yf@1C;vkFuIjwClo!L={-V!o^I zjqqI6>)@M#0XiQ^{0cS;G;l`CZfj}pk~r6jT+~gL>Cizln(@_l9SMpp3Bxw1NB4e6 z;B5(jlQNdcc~SQ9z1SkMFnOt@{{lF1p!UqpFnU?*-=l>{JQ0Fc7W~mwgj_+l&x((u)PX^CMp5vLlZRR>feKmg) zTt{6{93@y5BXFvtE*aqRcA0#kdoZmlQpzZ-?00&2NI4}k`UBzolN#KX>`Lvc{n^SFV8qrTq7y{7*#=l-Dx9NuAic^ctW7apP~>Xl(=3r?s*$TOpWF^`9RI~66`D==MQ2eDTOUrn zyNx0i6Yu?{dWVCvMQ_6>_RLkyur~S4{CL!gxv$<(lytOhuq+d9Eau!ZrmUS;HSy2UK?k3>aqzu7&L_>(!@&cHTq2LK3GkVfn zhL!Q?#+B*60U;jS+ycrQhFEvn5%JjU^D92!vWK6dIhG zq|)eaFhQ?Yg3vc2KNSTv>dHyu*?q8vfkxKH%oKN!n7#`oEm0>PU zTgb`zj<)w~K1^P&RzLeBcjO0lVtis)(e7?|g(K?EH*en1o)6q(TWfPIRu+~Y?D_Gn z(x|i^9lfD_e8T1=XJO%Bc>u#3&a=4mK_o7KVSw9FX7O zzpkk%CRQhewtA`!)aZ(y0Mui*(Xk~Wdp?aPa`BOr*NS31P}0k*)nz%wgqzbDfF>3* zHzqfAogAb8d4>nn!KF9gyj2kEm7w*EJAo`BlQ2i(Z;XXhPL2sZDmK<@cx>IrY# z&FGoHp`#`vF;N=GoOD`RTFWXo-eyx%Q}~f(Gcz;A(4?(>H^o^3E{m;;2j17`R3Nr} z^TwpXy|nPgC0qb}dzgXYJ=Czhu^|s;>C~%8mw`8TAQ`7IdW+|IVz~g=XP4;IFf7p* z8X6ip07U|PtLOpq8=>s&0IGH1f(eY!SHhR^e3nTWgc8{W!yCB%&`h!3pv(<2qSIc%Bhmru{0DSprV zEW4=sx+0z%s; z-;0h{8f8MWLJ@#_>>x}m_ugZu=j<~(%O2cXQMZc(G1Xg z1OEIk+HV1nBNuML;do}XpB@PN_W&q<#ED_exO)p%`!&Rz#f1fN=k4Dw7u$e-4oG8B z=^wz_PHqL{;@f@AGVL#4X?MG~0h^fe8=k)JeflfB=L!m=;O6EM^8AaJ>@#{5RKkv( zK7Su|eSZF>q4%n^HeRb)_I00+O<%rvnH_&roTCyN6La`ulCfy%XlCTRkA+`UT3Y%5 zq&-B;0KCX~`WK5*CYq@;%0IXn!Xwb1Er`67Qo3Kg_l zM0a&_TSoA@Xe6=feC=5R5=`$i=4a`mMYfiJ`hM3suR>CfYd8Y_<Cv-6cz1qoWLu_|u z&w=2Oj7ujI`GB09Jjeh@zQmcLhkM;Vti8;OKs;e^1q>G!NC6}S#WbBpHS-sekYll!mNa@%C|dHb5r02snkA^FGmcqAlkih9|3uO@f;$?ZASpq$CQ* z0LNtqU4o=6$(NlTwA;lEc6NQ4ql5Y3Na5 z4tzg@m|R+#2DTpgfqY6j*uLmA%lMCDs8gt8cb9!%&>Gr-*$3+^(auY12Ey5Xc>Ps> z9bNcP?ErgEjf8Z(Z<$ZURjptzzTgjbA|NU>8PSNGE>BA+>+`QkA zYLmc2FhRT|r=(=BSs^!+p;1)bJpl?<^~p1AgmR}mEjiWkA5%)13(`i}2R#cQnVO7t zmr?{F^oQww=N{0AH~9>x3q#}sW$sRjPT1i^^SM~Y2nJYu-Qy3*wkeBpC1iO1ViY7r zwDp-6p5T+GCZ#W=zCf$MsH&sR<7*{}{P$-tppY<6_XkGaeOU$Ah$i_t(ovKj{(g{J z6-!{zScR*JRRvq-c$XVYbR%3tUvi-fg*a}z{izR(GjIGZ1TS^RwNXZyZyVobUUo5`M_WYBFY;>3!FR zx#U~o9ef*7u1Ik@y0z^;{&pI8oE%Shw?(Lja%IfcP_$U|20F=g=C2MrZRJAHGk{MinG6c~-t$VxHU6cE2``9*x}f#svIZmmqH{1TJo)putBNy(P%t+nSMsiKY!t-F zF2NA$T)sOpF1HWA*tRLlVRqt%O28fT+W77R92A7iUP!m(vs;nh#O{c#@8bp#%?p>I ze%)MM?Ku?v&DdP9D@j{>t5@q8(^p&m1@V|NjjC5&9=*)X@l5upGl;MkuU>s!UQrBb zviXBFN?w(Xj^C~#ERHshf%+J2Eg*t#9gR8glDa@#JRUxoQqFi+LTNtW^w}(uVv%0h z@qBh5MF1xC@N*fNVAr5M0i-|pA3r_lF#{og6^xfZ5*}(fc`w})ubLpP_V`AmoTyHa z@AIVBzfaY6X;GcBK6Hax4W%N2+;gDT=|kj%2v1_Mz7D2@GktuPxTmTsrPr4)+*W(4 zZVqEc>^*B64KlCMY@Xr|(&re{aweMShv(`6b-|h2a@yfhY^X6m z3SL&U5C?@6of(?WKWc|vCO_^Ee71gU5sTE(;~-lL9v8cZ*_-joD*&7(KxYyR?9nH= z6M6k7VZc2vY9nL_B@~Ijep8AvBj$qd_TtT( zf3nPg%cM*EiINuJtBmq9IFGF}>8l<#?fctbvF;<(!j3I1E$y32cF$nkk$|sUhEvHI zTN+RW)||@|Q#g|Dlg;Qr; zU#>bYwn%kEemE&1>>*=eSZkO5^$?=r(W~7{bIZ+*RP|C#CWj|ff|5(vIBRCwqz~<) zDinK)X6$|jCtCGi(WVNSKftlEq29XIr4aTyjs3=^(-$_$TcQ@c9Cc5joXjL{!Fg}5 z)_CK|v`c#$aX;18rsUBz{YMJn`<+ke_-o(Ea?MP}$gg}ql+V4ca@|cGm|GNcEY&iZ z$!ujn+nDt@@nP*t8`=CvzFKg6HDsQNNuBk5m1=vV*fH+g0Qq!*Ea8OWq5OX3Y)k)B z-Iorgv+o6ySQ#?2sO6rYJ$rq2^wJoWb0)Deuj=wld5}eei6f|g=C^6r_YY%j@+SLy zE+K-*k=2npr3^|e1dpRUuij^5@_SxSvI*FG>-oojKOOr1oc0Wdu^@2T1GT9UaLh?2 zgQ??0brSLxSqjVqp7XAqw_bJ(xpHkEjN8@#x$K9_4`#}VYtt7`{N@Wp0e_Sli*M#L<8pXvqQ#weA6Fsw; z(#H|Cp_=V>47eA6E-rzov&qkb!>U`mx(~C=)RkDqXw^UE3iCbH+Dg7}AeF&nUPtbIKWbv9w+9fl3#5MIXkMC`EX^786_vUzR*39 zHD~td=Y?B!@%>*78M=7nVskorOAevppU0*n4VYiF2wZA^+SXb^Nf%IRVSMZE z66CNW-9N(+Z76)-#>QZE0cF9U(#a3%x>b|d!fC`uH zkdU|_E#2KAbuV2?NQpEGk}BQZ-5?+-4Ud3ycXtcYm*(Ak$8U_6e;EVDIs2S_)|z|H z&sqhAuQoWMWTAYz@pf{-BHuijmHv5w7XcBj{X9qj1SKVr))1fG48u~s*rDU~vKCyW zl4@~k0b!+>eq46s20RlFo;n<_^TC7>&aZpmkFmM^{FL- z14YiSz)lpA>s@Hsjgp*r)E*owxTNG)G)O6$l4E9O6^oE;^cqQZdFYoiZZ%_cr^3n; z4s5pW+Y@JEJ;AcYfsl>g8%qDec8@Kt8YoD+T}G9;&N@AFOOsrE^xE`fpE6=8NkkrNkr@Fc>vB2fE7-|67pUNBk=!k7pkJ zt05peL={A*r31?iX3^n1X|%f4)zpD8Mn~{9{9Ei6DMmz46g;I$byiqPO(HFsYo)uY zFz9doX%~QH+5G?u#D14CSJ8{!>ivz6T1bXbluz-%0^2DiRYA(c`eb;qzPE?NMXnTX zy}`o~fGpVJ_?G%Vf>6hefmt+8JYV##!$yZC1+nTmJ9pZaL38wPPBemPRG#|?6Fm}KBhOBzvH|ItTK zI6-g<`p?MV*@lATE3iR{YILUDw1w%zG=mDwFf^f-(0 zmzndm7+7bTn6T@KYUn|e517PQlE40$nm)cet?%-*vx|uuIY=;_-+sv-6tlHG7K^(g zkH2GHo?;1;O-o}Q&~}u?2x;FOrHf3Ou2?SnIeUE-4f~As?}my-+Do*~)8zxJo#YH3|l%(W;LePD!IR5Z}x6@l3T}NL&1NGENf(&mWqyAIs}a04^-dMAtfRNpX`9PZNNU zDy8=w@Sx`^-jk^F21$Vc5ND0-eU9Rnck;nY%S&RT{iCYt=39b#NR{l$Nzv*l<_G2> z_7Caf&7EMUh8+&(T4r zn-c~5^5t1b6z2m}5RDVq&+w)_{{EJQEYxQ}-8QMNDEF0PIilWYitE5mAMPZKj( zLs^v|?za>=llfKdR8a|b+ztdCeV2@wcm;GWSe}?=q ziqyLQu07mDP!`fuzuz`=cHEKLFHob4j=;WKZH<7M9O&1)?yhchI3=EQ+IdYQMnHq~ z`!a>%vnt{lRsZpl)M18it{`WQdoNSO$|nxvB!UN~H^W6qq}RO-9Qk?qqp|HsF$z_r zG!Hkc(hTHemkPIi2ZDp)?{v-SXWiW1@}YAU`jJF+)~5NhZ$L-Jr~;dezl;$Ww;g8f zQRaY4MAAn|5*|+G}xod-Dnn z7#Ml7UI`JwRaI`k*a(`xDn(O|N{Ri-ZW0@*ANx~HUO08XdM*|nJuf%kh&46jYezh9 zDU1nPslPAn$9$L1j2}D(hCbK&iyh1Ur)j*EmGBCJ7|uw)3tSk>SMHshjGrXwk~kRx zE~cbk1dF8(SY7v=ZhvPafsu*^^cvbVVo<2RtD6*p5tCv)w?`bZlk4>ga+M2+oxk|$ z3L7fc@2^g`tIOdt0g-q=LThEW#ciiv9ci_F{Vx2@m4nA z?g#+H6`n0M*y4Z{SQptAX&1LHldu%|`- z-fU$8QwJcdsAXhi2#W3%sw7JR660H=WcAn7Y^t0i77%ly#d6@SBlVM|G>cNl)&0R% zW8ZbRb6Decpg%RFHTrz6T1Z$}JS-%n@BO8x1L_9u6Y!Ed!0MH{2-+MP+D5sX<0Pd24U)+8T^u%BHPRkPRIM`Yq>y zWX6a#VIEPL#%K=Vk~$`?!UVq^m}tPJ`_s1;!=HqZO>A&*aFiosWZwJgWb;R1OL#aI zT`BM?TAl*cYygd^hpCYh@c4NGch;`KjQg1sATrbL1RTPw-pqjz?NjNC{=IU;1}=9% zq{A2+A5UZs*I2;Km-dZ?<Lyj~QqJ`aEUDlg+e#KTZ1L?wvc(Bjns|_wLF6RE{(aqm~T!Xr1>lih;GfBjq zi`RZB4WJX{A6sLY6X2PgCF0hECCd}a-@YscLS-&!?YGUhzBT{8aR*wb$JGf`_&yVW z-SzbKt44eQ!>w}t29SR$-HMJ>a<^N!zN;ZMegBRcby+hp0r$?Uu%ad;8czhu(7b~d z7vhKVa?-!&*j!8>IK8!gD`?)X`ru@h_&mnA zG;h#+pvU^~3K}l0 z*iS9;`R64#Y=LEn`?NOZO|P8@B6msp0^v2Qd%KSpnZ)}A-(b(&`%1fRRWA3TSeWMS zzo@{8Ja4bYn@e=P)YRzwV(VZths`0Ql5-=X5H%J09rcQUweA)o;+k&1Cwl03_64_% z?)!behqShCz}rUf_3Kwmo2ehgQSWL05E{ci3ze0Po`sT*QV2THhrW>I!Y}tThjail z5#{PSdBk~}%%k}C2Hd2+VDT=`&Jx;WrI&?KH)p#rE*>89H2~GsyJc#w22pG!Syga7 zzB9A+M@J3>{+>XBVD|S_X|2(MaDMzDMT0w0sB=BMZdl86YCcNR71YgPf*?`pdKV@{~QjjLmx@A;w2{yVg?Wgp-Bi=|~6WMn?;C6{N>0a>~kDHUXdX zsn5%e9-cTisj23&GB#xasQ|Ga8vHzo0x(CtXK*lFeVoWu(a}@fv=a@2hL!~aS+B>g zug`5y&d)6}gBQB50j7jjSXh{WAU`(>_o_iX?#iW`+Zf@1BXjln*GnN6SgW{1FQ zO+%Mj_M))Dk#RfXp!f<$9G7cD0|&m7La&UbE@@%u6~%bi7mEzrCO|EOs}@cHZ$E5Z83LL-UF#h!+&KJD3hN9T4}u%;OdsDs#*@? zTD=>AEgsf^rNOa2>3q2}8PLz1sb>djIwGH{X-{%*1V+DE4(sXAhl$HYWRZ0QR$dhR zU)#T*!86wze32<2smxffskiudtn}5A{W}|_h_b4-I1PE%-YL79V_*JR{@UEpAyxBb zo_Xh4pE(yfh0GR+8Jn7sQ5AQjbL2=6ntqK13+Qpo{SQWKA?amqS64m-CO{T+rw~!F z6=~6R_RC&u8t?Q&Da!E|3^!;DbE7!6$Z63j&w?;)>T>&W>|)zb{0A$vKIzj^nUauy z{@6fraYrwLxv|ht`inr%+KC|glC!_W6WVrm2OfNUd_JN+_suGj+dcDMLH}g#-b871 zhJ?suZtcI+CPdc&dOmyx_4Po9r;(8nvrhSfWdX)?B|}VBoFlwg6AICYdHU&rNP zxaDaZawrrOR%;>gVdiTs)*S)rPz+@!gVwd!@;4|2!&wYi3D((DZwY$cG3;?iGBl?rJ*3z2V(l))fAkm0xk?qWdI+(g zADg;T5s%V!$(?I`OSPNGr0mT1A#dW0r?JwW!%lcjd3^duK4h~mdb$dF#?GF>z-n+W zyEwWu;rREnzOWthY*K1<6jIOsI?+?yupE)3=wJ5Ghc99?x#5Y3&YS_ActLRRHM4to zxU>x712C-*TW-XdPt??gC&1(OMOI!OhhqlQn?yeA3{(y^X9!>0BP?9By=mdC?ss%5 zG69*NJ~JPlyv((d^>CO}B6b$?-sQTeo*sh^z(R;~%as$oq>dy6Ud+vL5sq%WPt1JE^8D6gFTB(BX*w4Ygcj}XX5c; zb+t%x#BR63>B>i={5@>F`|Tm_#o8JkCLz9rgpc#8&T^5obY6F+lBxLAYCiuKF3QUi z`G-e1kYYf3CK41EHbCyHnW15QsvEJKgMO~btVYbCt!Bt*w*Gb0KbK>S4fsf+#HcyrNdftdQw(&SYxSFn3X!Lo!S&K`HV-@66 z7Z8VL8E{-VGn8)?hjxmqxd z!zN;NXfKDWsrBDB?f4ISzgtTEmKdl7;rBFJ#bV=gn*ryLD>RfO9;=tBk?)LKMZi^! zW|c?KGfU(@!5wN3C1HAbDRLm3VU+8gJdaAg+-8C;=`Tei_djh@Azp`sQHJiysOY?dIBbz8wF`~lY6DpxzP>5DDs%7xY+OYLKTXN?XO~b35J>EGXd+f;~!z=D#Ca&ujf`a2I z#ju`AEf)P|SC;;rWuAQ;IizSLe|)VY7wrGvt(v;RNSc(kEngN+3c2KL5^yg2iFw{C ztS7Kqa2D#KpsRoNw&;(`E;$h9GQxHCV4?p`@h8k|Ni}M{WQvbQ+;vZ8LLwaGo3@f1Dt<=YP* zZtp=I8c?*|8))D2>B8Yk(yhcsk48`)k@G}r+42VyaS>~s{rZA{8wUKIHmseQQg#+ms+tG{)gPfeo1K@X9Pc@$NAP^cn)+_B5d-p=1`ZVY)hoI2En0)#j*Z-?&C~A>` za9vl;?7q>7-7iQ<+}Gv8!otMMzq8z4F7P$!20SzzoI)YjJ$jIR?i1UGHDs?(_0k}x zrS+{AIK(Sa`SoLCUz=7tsgYw*zRS)wx}_*S3&Wi9liOygQtyXL8>LT54BFE(Kt}^N z%*V#rqyw%!{GZXRO5;QG>8stXN>hX>URHQJqNB$}Co3`p1h5h!O+)b&vB^}G)hrv= z$p1{c$g7>~)C(}+%-+lPjt#3Q=cXF)2Qs#V{yK0vv#P`M>_PSGi)RrxuKS=D4%jHC z%Lpz>`2F|4AV@6&STfz&+1brKt+#eK(?w`#M<&{`Na1Q4*r*cfv97hIFU}4sEUe6O zDdd>4i2il_-dXbVvYx{~KVie6RMM0n*f6r-Wnn2T1z!2G?Slg&?WOq5Ya(nvz--Ak z1c3oJb8~vBcU2Jn)a60EC^b|8!T4dXRedjM)FB+Q%ijf#>wVhd_w=!(6ZS=ozd4A! z*Bdv^O?*V{8v3gfe!rmGeg?pxVPRlAz|Q)5Zba*u=PN#B5G#L06mMZ_$|WKs0LE5;L= zOF>fX9G{he+v1-aINIj*PaCq|Mtg+`{^tKgy0)jt+ku#ug6AE9{o>d!%8jSurYjP7 zw-iS;&JY)N@IND2#~kyBg{WFj_6l{h9bE$hlb_YqrTu0a82$#u3k!e*v;k62x2GVg zWm5mE243gOag7DO-)Aqr4kl`1qE-b>3|~0kPj78!4zkSh##{34I2wt6= zOy|sdxclgEu{adVO16)TI1~j0{0RZ=?Oszu!|~6A9ADk9&LA-p-{7$NJ5pxj`u^_5 z2wG1_0#^0KrR^kF7rSc-w)<6jjRZwtpY;1b5(5UW*-MQO= z>p2^%k|M#C2_GjHejX2$PVlNyfVWtUr9xV5!^F1yoiXZ%4S5NcH33Vih(ZKh%O1~sK zTf_F^@2mwdQ}wm+oBJ{20n7ofEihR1f;1V;>gp;icqc7P#+j4iiNBohLyX4G!FpdJ zF*0L%6GBU8+|VRjl`#bq1jOlWk@bWu3S-CtZ^vz6Kl z>9h8dmuDBhvoUV-tJ7NqdNh|Nd9&u)I@9$kN=WM`ek0oGW+=LQ5J9ZS+~xHj=J4o+ zsNb|%1=-o74NCFM(q?961|Y^BNi&c z0?G74+|VvRB{O>d?{w;49~IrND^8;&!R^7u*mV|fR2VJjp$I}UGMiL>E8S-@S}^%z zrGobb`5I^6ZqBw27vpL=zr)L_EIc)rWjXA(2`*K5KP4KlJ@xYda?6XPgoG(D%d<`s zMA522c(g|7)1eCZr%D*cQc}nOB^w8l`dr8;D3icq)geP0y$mLvY9yZdjgb#wt)s=^ zf>lbu{b|T5kA#-CGkIW{UgcrNQ(H?O%+1Am&Q`{QY4^be?7nE-59Ip#PAG8#bl)5d zA65rnD52q~DTW0_S=rdin->(`rytYwE-rOHG%mdAC(=MgjbfYWotyJ`J6mN@3V3ZZ z6Xiw*3*bzu1vrgiAlo+6-!9=81Cf*hb6a+RGWqE2EX#|E!VL#nPuuD0keh;c zNb!!VtV52zJ5g>vF6O42ZM|tIFLdTFR)b#y{`k$+>O$9UMp1o@nYhxIe5*5Cfuw;)F-d zo$;#}MG$0d8MwTWQA>Gou{A-kjHWo4e^3RaPt?3~kV+kSjBo^qL`^NwpMgzt4NODN z^Zm{Mg%$&B84&+VA5KBe&=x;m|>4oIo+#$6|>N;TE2-6;*o94gW^XA zD9Ef$!21;5)$fZ14m6R1&XXNK= zNV0{hD1@t2(VMgO;=d>k?z+YgmQjmgVrSvv2~vHdzyq6}xGX9!&xp&Otxf7^!O;1x<4|3z)f5Mcz#CB%-TOlufQ3yGSC?ao59W`zLxJd9MI^I zlaiv3j0aVqkGTYxf|-eli5bMdWx7g^j38qtwO2mmg2?%y?GI0$Id~rUzWx0T3H&I^ Ms>+m0z5V+C0GBe^(EtDd literal 0 HcmV?d00001 diff --git a/doc/fluid/design/concurrent/images/channel_send.png b/doc/fluid/design/concurrent/images/channel_send.png new file mode 100644 index 0000000000000000000000000000000000000000..006ebb4a5a4bcd32c97847e9fb7729a740255f7c GIT binary patch literal 85643 zcmY&<1yojB7cC%2DJdY`ozh)Wf`mx7Al=>FASK;MNJ)2hmy~pOch}qazx(cej?XcU zADkU4=bCF7EGHxW8UY6Z0s`Xo2MG~*2neV_@K+S>CHSV#;s^Kv5yS@(Aq6L`{Zv>d zg|4Y?SqF~fMZ_PZ0gJA%sF08th{sqkLUJ($=b0p;@c5|Ebga}+2q8gK5*Wg;JNWNn z{Osh}&7RV1_0Rkz`Y6;boR7}@AY6|I6PE`0&JOFghMgiiBR;e=eI76=k2@ubKq*)A z4gS@JQ)T$BN)rD|nLKoqcu-)h&!CcR#-#4a*#(ls?}ednr*iq_lLWi33Sk#cO63az z`;*M76QVyFteKNU*wLzv_jZsjbePn?d%Pj1BnyY%^o4-)`sefJyO%g|iS2+sy2p;X zrv9hNsk|n&0)>K~FQc1zikjNK;Gs%moMb0cd0j3@I>YGqG4!ME^Ds+~?; zN5**;HuW0)iEc9^%AyYZcAxxYKK67s=M;b5ozmc98$PO2Z(6N3#6_)I6Kc$+g;k_v z`>RcNUXT226n=!B{{Ry%?+vsd#6O?b9B2{R1j?sVC5+IntmgP!S=!TMBNNsg+H0rV zopz{@!j5~XYyWF8`5^6gr^i+v=)aT@bMz?@1gK|f%0HK*edG$D6OgUC%-|eU|A1R^onZ=Dh3IP z2;!g5G6IwZ)?l=UM?+G&>@ic*yId(R<)EJ$B5X^{Ut%S)F6~6`@YMqQ%(81^&g|%S zzA;FpTTGa^HBVuW8F-%0b8O+Mp(4D1M1lC{V}%V_1Gg#bxu?wgo5js8oO(-9HrCK; z=D0vBr-LS{guPY&wWEV5w>w2wjz)%QpW$nR!goXJYj>Ike-82q?H)I)EB3^0{yicD zR5il1l;fRKVN)Qu;&%CpVwHY7O`(rFDc%%LR9sB4)M{_VhT52FmReDr5z}Jo z#fmLreCUKm0?a>G1$T}N^pb+M9W3#^Qz_aU=?_r8ph87EAjDRYEK7|{uuir3soDQX zdE=v+Bj3bP-O;EY;_n^suN`_p!*MDyc~;v`AifMe8L)anBd+SNK0UoWkA*;+N=VS_ z{fTbvdQLraLakPHn|hU=coJ%AB1Zq;yIi7Jh3Gu$UitkL%X(BV>TWPAVy0)4lxQf0 zby>6>uR!1FybzFf(HmxCBz#JHk>u%=QL4Yf{jYo>AbZfQ@^sQl^R=ByEw5#jLhG!D znCT}!3F+fomVK1^DA6KDS#tEQP$9{^KBW??hrfnN`lqyL#lOZYC=ZQCk&yd%`Dln) zQRjJ3PG6;xwXO zsZQh}BjnRXWcGf>Cp+TDphMmE(TUSU_>Fq|yT$zs-A}Lnbzv{BROk@0gxn|gO8>>Y zxtZ$Qy~_~|Not-Ds|Fo+~<3gGC!FY zGzH|9uib4lqt&Ep#bznx>%A0s==(~k?7u4X|6CLT`V^HaMknp8XIs1dUaGQCxj5|n zvLtRBh1149nOf1&b>O$*&t{@gidM6Kw~8c#Y=Akdq(9QmmoNWPM@ed0KsKWbkL%8L z;$pLgDPLm_VF&)vdcUlgq8(m0O#-HLxGh_Yd3=Q_iDaQ@;u zI&KDsyWsrcXUc3q_5X`%US4nE-$*c3s9aXQOfgn(GW<{_N!sl|)lU&UBcU!^(il}V zl+XO*!iP?jjUWu>9}k0o#6qWv)JZFMmQ!oM;q!5?d}Lrpp5*%`Sk3QSovxb7T)lR0 zB?tBI@1s}|c=sS|CbmRy$`_gqaFhECkQ&Z7Kgi{MxBRJDol;+Rw0&I7f`eHAp#l9L ze>qlxig+`K=pcQ~KG@Q^Hz~#Dui`-`d6iSgdBSNsgetHNYSkT6><7J;H|?)8~Q^hY>l zDIQ}Ri;w|7s_uYNY1?*-=sAalEWW3j(u`qvX%A)#$x&i2hodqkjP z(&TIA$0HU7J0UMJ;V98KUCbNU=xM(Ux{h>ybkt>|!udxI5YTUgh?MbU7nsR%>|-r{ zJTA6{7}ciWCMHIUHHU*&2u z+))oD9)C9cq5l|lAR5>wTly$LyI4SOW5v7c9iHS=n13QY#qG^C$y#74=E`dsj>{JX zFaI&@NPfr&S@tXXLPc1OZG}7fUj;-DkBucTEqG6AmaIGJ_s2J)9QuD)Sz-UZzlK<= z-s#B0tUqkrW0n3!{KRnH#cEw>OxC~;wF?ujtZR~3=gm%R1O)g@d3{NYd^~Y&yemh% zoSdH+i8ES&oN<<5#{Rda6Dly0cxG-db-3ki6q@Fo(@Cr2P1H z-k`rzeEQ}U>ch%6hm4Q^ajqAT5zcc0w^biSa*#%&s#3?&s0lme?uVRi-F7rik5Q)3 zM%Dtua{Ji|ew$i4I7-eTs&D_%t{!!iQW{?+#Eh0yyly;|g74W8+wIbZ<>RXxe_@6# zC4wF#AudAfUxhLCpDWG7I}OfqpDG-b7Zr%Z#L~*EJ3f^q^SgNR=8)z7u3+SqdLfRK zaf;E8j<|^~E-M#LplP%?;cRzxsF}p-nrVeZm{NSQFveDCw!*wJTg=xWpYH9R_pxp) zCMDscw#Oxg{o$pL)v&CoCt5M?-OgKt2)Nwn9+|P`)VsV-q1~)^(KB}UzpZhYbRn9~ zJOsv!ScPv=(3mY}-DXebkX$yZ;`0q#(iDE!z^t&iLZ5}z;#TVX5uUAeAicS{8(p7? z&U-*SuuVLetxo>w@%DW_weh)H`qXxq2Ol*n?|GT|GE}ifqhz}D%{Qb&J_%p3*Cet1 zzL!T_f@g8=T4Zf_>5m+#jgMRTU(9o-2FL5@Vg#D6EYVS$q(o>#CvIxr{%ciVKep=a z#u0xgDzrmn><=KHt1-(Zwrm!6hrRq!AGQbI9u<-9@E}qlkodk3vm~?hmVGB2C7YA^qUsh_)rV)>P;`0;;Qfxore z&z(5wLSl7av74gdt(fKJRG3o=NA$@jm1%-V4l=ctX+~H60)tdPSw&wY^ zHN%xICp14E(;Y3dDQWpsb=u5Wadtcpp2TwoVt+D}IQ|%?9R7X3b7nluVVk#LYi``( zh`XKjV{K>Q%h6agMtXKWp$xgU0O4$@-V5@1NvE&4#o7%r_(^uzd1#+l&wa0q`>>?8 z+h;=Cv3#*Ze`0-LGF*_(#W$Ry2s_{`Awv!8C5bfQ4}^bR}1lJ)h!k_!QnyZjS1 zH9NJhSY1k9mau=~CQD4hYu?5xw5-`=r-%|v_i@UjAzVf7sr0}V0UnX~L6gtkVlpwn zomB%RnJ%wzBJU$Uwly=rtjtX#;)kZ{m!Pas%czN&T%hO!CWarRo>!v1G*$)IA?x~o~56;dzGle zTepOh0bSa3KcttQ=-j-Q8DDXyhP&`i`G&;Pd|HQNnJeGuET<)%9pM!bDM3MvOQ78s z?6D_WCV=7z-Zs8c2vhqXjxmGg{b_-1*I*y3Op1v6Ww?JZlVQZ}2RsX<+!|(>ZJ$kn ze?&<{(;arZ*VOfHD+uO`rK&+J!srKjz@gkmpm6*67OH!``4RaNI&S(G%fDf3)x+{KAAJ)d~4rgp&ZI|3ELYT zyO1(k?G1+M|6+ja+H(Ih@^m!MnbhRTjYo(xo=%hbYCx#s&(L;$by05y2VB&lsBQC` z%*A_8x&*?BfG}4N5)bNf{bbCE2G1i62h;eH9KN|Hx^kUIj`l_hYqOr4##g<)uBZmI zsKo8B2qB=xh+3}-YqcIDUvj9xKWAhl^5u87d&heA{nf>vr|}Hb&+QK^P4iq1cAwrI zegYSdU^w5Tb%!RtwYiGVGgwH&2?42ImT5$T)mO{&&|XsJ z;*T`8&sp&Zpfu0&fzzr*m7H{qth}D{R0;PpDvnal0=g#tx z!|7V^Jf*gvI5`{lO^w8xOqPfe{~&JPLN@8BFEY(vDm?m)d(=pr9XTDoq3t0Rc!^I_ zuCs}^*;0JK&A&RMYJX-oSj*dv%{7SBlq&ZYjxz-Ux}qa0Q}8*$m!^C57e5V+^ zRxx_7B{Y~%cwfGHrkO@s(Rcrw{r-MSEQ!yV9mM#}5Iq^%o8yNFDn9zAUY|tezH7>j z`G}*c#M9Fk0RhsSdah4VX0>?rS0qd2LrehP1>I(b4woF>PT?o2R*}^|(X|f{g!`e=z&IXo3p#}Zy;PwqenIG(1d~_B9eqc8O+?rd<)9S3|1(D zbPJDM^E~YnZ8fDs`E9IrPHnEuj<;$pF21c1Fh^Llo1eqD+5F;fS1srJ-H9V%Y_Gx> z@~0Y_cKnM+5OM9?_%F`p_nf%u84c(%mVfIcJst4w{;pkfziFYP;YDn8(YM{*{A+*nxfPYQFt5 zzAaBCGD9-ERvAx!@}NDz)&ppwgoQ;IroYux5HJ7Ce9bQ<46EYLsWomP2W>GNC9VN*c? zBT8YJ7OGDLenh=D`uv*ghD<1;Vxo_^VZrdhz-4XbzC=v2<8t-PRtHq_O$ltzOZT+Y zn=)f(3b=WU4rRvq?}5NLFAU)`LqGkA`Wbi6%L+Q(xt#uV{K)u+Fhj{X$aK#{*o2*Y{vI|&IhXB9D3R2Va0ZDE zbV)$HMD$QTI%&=J*M!npJXP=~VKH$RrI$aT<;&5wf5+0#iB+agA|(JMP9_E6qgt*B z+X0TWF0#A?cgAShf`TpQV1Fjbc-xn6Eom{3j^!~yu75v%k+s!wqu7V{+W{q#6YLf2 zNHhowi4qCxJr8q|Yk~cJzFxAXwB1671R3wtNK{OQ=76h(-`Fm&&gm;r1cNAT%=2b- z?h$dI1_}xiEY?YDFyBx3?X~)OH_jEOK{7bc7eTLowB@Pvr~U%Lh1Og`qJJI16cA8Q z0EjV0^-uxenJTf>ef4^L=*6p4ldFq!1s(%?1ae<{<+;~C8!9(CnLBD_&PIa|ccCW` z0qX)>nlp|H{yAAANH6oZ@fyx;k4X9PbCtUE{VWiU4?ax1I?Kq>0+X``&IZn_) z_2nytgci(@0EQRphm7@H>UrtG-BW5i?^+%W<$o&Q@v=dkEcGbSd)x3UrZ70y&an7M zoAu}TR)?k%H4UhCttfzC4aY=hp7|CzK`3OmHp~7J-|y-9mud;=$~ustZN`Tbk@g&^ z4k?<^tkr97zrL*;4hudIEpr0z4%Z9*{#;*W{Dvl=N}#;|!=Ye$5`3g_r74n#81|(S26dvx2H(oMKW5xckuy9z*M&L|*U^p#Ab=KxMcF`R3{Bx>rWq^v%u)R4uSi=-~8RuRo zwquay5E~M$DYmRIpZ;?$aSE$>%Ag-5UNIyD98yCRcz|N|C*)@j;Pq+M%_&9o{E|=NI1ux9Y*L#PjQ) zwN6w|y82zaz`V(4*4XESQguA~9nNCg@JUv%LUEBVpFnOwOOKRU!Cn%-lr_F^AUk3ISGkgQhGlfhtIH?;4YA&Xn>qD zkWaUKT7Dlo$y;oGLBJnEw$(0X$=5{E^Lc~K`S*CVu7(PI!Lw;Yd8ee{(T1)@Q_9NU zoNnxN2BWVZ9WhSW?TnF|EH-kb3V0Iu%)sw|BKxb@IKdw+AdOKW9w~ zh?Yv*XTA0ago!lcMdn7_#2=eI!UC!iZQ(M&4YFm4N^~~=barHVlf<+n=DW~3T3*x zzJ}YuhKGkg--y?iH!-2PIG9tuzS%7-ktkBan(+*Ufc^NFWB!0*mC%uTyO&{%S-8=p zrmto^AgW-iQujS$TaffghTEJEwPr3)$$6MyeC4bRi4I2;xM`4=moyVHK6i}E#jfbZ z-c)Wz2GMMp9;{}ALs$}B)k&k&q0Y zFHpejh2c0Are&_n97|g)uWQzY%uVBsH#B*#;r(^!Tv*n|OUAUVT@PJUb{xkSCepHa z82jj4@~J>r6EuT1^KowVf@HN?77Pgs!w@;Wz1Yha@_p&!Wzg1Wy(X6QPA?#G?bqo2 zod;X!*7mj$x7#&S`kveqJ-bZlUsTm80?f~GmhnYFUR85p6Q4WnB*R&ZY>;+#!Ymz) zx_pJ@xc1Y2|GNa+;pC4q3*wF{PC^PV+us6P3qOsG31ejo5VLg0}qTkZUCK^&O(5G!?K7if4k+_9rTmE{zZ(qX z63xqe2*QIFOP@8Ou^pD3l3LBgeASw0y05!D)gAYm%g=Dh6ABq^u&=K#MOP>|I9Q=T zf%*$f*uh*4dak)ft(7-G1Taj2Ncd={Nm1X4pR9c_eSnfey*|TK6y>K|96lw%=5NL$ z73MKu%M+KX$YEQ5viiYRb9DcnW4!olXO0$YR2S>9?-pi}^_Po01mNF8&;(=&Gi2dp zE$3rWZ?LhcMG`*C%ljFfW(fHj&DUCY(i-qS-JkiCZW>jJWmyM5Ij}G&AV0^EOrmA^ zz`MRpM^_hPx4`7V?tXpJ0;f3mqF?gD^vu-pyw>Adx z-0^fL1@HYIO-9+J3so-TaRHad0jc7emu$}(it=jmx%p!5rGu$=)J_2`s}9YoL1!4z zVeF@4(zTI0Y`)^`twHDykvwf{d(fcz5*wzoBr%r9CpwNyQ4uv}7aWt8DnYG!0{Uzx zm|>Ex<6qz5)tL@Iy1C_+Acb_RynQwvoM)`PgYf(e0R=-rMAPP`z3x2DgF&Zc>t57- zM;$Ac5SY0y>F46X5TbPqgWjnNFlYc61B4Oqke5=B}!~PN?6IX+ahfPtLuUdDkyOBDp3eYkO@0kIp22k=;*J@zX=*hZ3q zXRb?{SyU$IW_Up+ z+KQ!76R>dUFCsCNH1ABy~QB{S$4vV=3SL{v@8ISD^Lt?N4e ze`W!ISp_laz52V71U%ITrmZmM3LP;!)ofoh1E95aG(pLiFJJssrkB?kbOn7suXj*n z1Dy{f+_OLlJ^(qtfc~GH1&w`XqJx?#GCf4TqCk2k{Kat*p#J}M0{QH)B%nZU`?#{Q z(i0sWy%*%g_cw2Q_W;Ft;1RnqutQY^=aHfMqkLj|y1u=|JUcxNkt>`;y?-*O;OhO# z41@FPWy@HO6s1Os2cKl&q=6}G(yJ;p_a2j~pDJ{5Z<@5yv&VNV&_8jb<}BQ~`v%Db z6^)jPKSIY)B!KW6%ggI$KwEThN;qUh4A%=JU2%V_AVXguBy#Zr3M$Ff#f4o;N~&=B zmudxcnAjRFCgx8BWaO)FK|#kz@bEvDmvzb?hM%s^&+S&zD=RDSJmwx5si}7?Y;4M@ z$;rC_b84%rr$_o;SeOAPtlK>?G0`=6n+i4cJGx4xs2flM4osOD4^dJV$J;NcK#QKw zT9ga&7gWUTX=`hvpER)_KR-VY(eIDxvlz<`u2-!v@Bo;E^6^u?f=tbn8T)t?skmSI z^t8&tqj zRp%`Fj#anlD=18~f$cGCJHPzw~1kcbx zdE($Z_v-4Z5rB%7c02OEGPzQrK|$AtB`uFhVj}Pi?;v6&(!5e&Y`r^~MJm>Nm~bGV zY$zbw=(ZN^ZRZ-Bn(qAk{9du)gkc;8e~nP9GO3%P*;0O*()JwfkEJb=ilcjXeSNLb z2(Cg`G1EAI!_h6d7CY z%eD9Z6fNil!I>-oNNGM@O6&?CxMWi z&?H^DR0h3Hkq}B2N1TMuQ!9r9=Oq{g;v*I|F@n)dx&C-eObkX;RFwYaU}DxM;aSly zt`FBIYpizLa-VH%POI|r@;It3mzrCirwW`=BcpX3f`9?VO+kp83JR_Q10rAawqO^? zR4P&v0QOB$VK|~xWx2FCv?-B$3y%4_^h*bwT9Lz1Uo@pQNv6V%3bItHkRXH4%nxwA z8SMA3^#M|N;{yS0OPsNSmKhWjRQJ+%WdW2zCaxE|Fl7E@r9g)=lT0qo&O=f%G6Y(^ zx>kv=;Ch9actnA?2XySR;E9Obfmw_ly;rVjr6nUxm#O*k2DX1dbc7R2atPa=!g{_CQobu)t>~=UK}lq?lm;a-e3~ z!WP8ISsKNH`>{snvrBFFi`buXFv}zj@aG3}Q+CJ8ZS;o2Db8(&q1N7H@DiVx8lHt9 zW#iUigc8XOY%c#QBl;jBB&43%ckh(JxfD^Y)@21v zd}fNCH-F@SR)d2g0YHMT90@gws|_cN!ue>iX;&-^C%BhRKe60$iQim_miY&a`_kg-nG2o(2TqJS0gGf2~HHP3}m#fcDtHfK)aGO%GkFjJRTf zLLD|1ma=~kbL_Z{ew6}^&kRG^pzqc3N{2#≧~Mf{j{)ixb>+cKdH*JUJ$i#>as) zcwQITAtJ%SNeDO|YRL6{C(oIKOH_S)u8)V3*gmr`Gq-;Q4cjdONa`TIhEC?1y}?8l zg(esKRdI6ZWOjQK!}qK_7>o$8kU8jHI!d>2y;F~;8v}%ie69|u0Ctlas(#he1Epmg zFc^X+U}*23nGZZc!F`_@i?YFwg9a=Mjn2^_Sd21hX=zBU=i4MiNvYv)F+d)Eh8a~` zguf3BhLVbm7V0ldLAf2Eg1J-smdD|j{#3E16cQfiakYfBpQ=9@kwKdVRoNik_vmQj z?r+!=EuNk<%EfpG3k~t$pQIuwxU5Ekp_ufU_4a$1XfwWyzD5pIGH(D{IA#QD{RZtV zcraU~4K%CPtUrLs78#T*n9Oee4I3NV5|7K~YgjG4zx^c&N}&!zrOIY^x`1aI`msOn zW?mi@jb=S-7y)0>($^gQfw)mn!EOT~oUcdA&UWmUZ3(jqV4}(ZoKgCm{oEVZT^znz zSshOpwy8vYSxQ@==}!ztVq2k4@6(P z)lPVq)nLkA-Ll_7sHEEmHA|QxRawYE2IAUdUY#EU>yU|v zDBAW>%2KqMrb@Ka#{groP;XzU+l4?Jkr=_}dMO9QpTbTB6dZsFUO-g#}5!~M`LM~^8g`nK4X~v>FRJH zbrBgzMf@9rpx`i&%Eo*{gmH32M8wJE!CaD;7X-}bNQZGQ3FUe_h5o z-hqM85DePQQ>{QF2QC{0I4B!H{L%$#(TU);x3|k96Y#!W-QJKz{DQzhP2!D+#UM8j zNBnl)_)XzdZlWi%$&S}&+pBZQe z#)Y)ci)DcHTn`MhSyCi<-T<100S(d-&OkIoU!75i70bABRTo7L2Y6P_1t;o}T!Sr$ z0&-$#Lrp1hvUGwpp*83eFz7)RJ-|tp;h9PF{k^uxL6VCd z;S(zcN(o~=84;7J*_7z$tkd&z1)aT(nXfYNuMrTKvBSjd3Pc*FV|`{4xkYWRQH>q6 z66YxJ%K!&lI&&E{F-ffW` z_B@j`AZ}3e&OctUJX1A_RNvryRGqUj4p>Hb&*rMl=M;u5ZoOZ?`o%9hD^lm+8;+#q zG&ml8rybWy^SCu00AX?tZ4@czB<{VdCUF8jNgOu2CA-bzy)z)d-U^hXVLT}oDvica zf8Jj2i*}fKIl6yqaevZ7Sj5E{RiB?ZIa&JKXXtK30T zKb<&j*b1H5%Tfb2qu)A`zAs;Qdz^G($(VD&raKu=%JSd%@n0{$$o)ee&<+el0$4;= zB8%bok`hMy?cd&yk9U^3EBiEjqadCFrYzO!biFTFR|g7rpLE%*ZJ()ZaAC!JR%5<6l%X@Qk1K;onij{Z<79PW4JhcXggA!O|g8g+1g1jXU zsNpw*{+I*F9F{-%-EUEj#->2fOX9T7EQN$eH1O~{znrri4vPt80W*$oBkiB-`b~@Hs;UE-69dzRpsd(_MDOEmhT|bd) zbh6%^t0_S-ZU)&YmPRo|MdB=hDS4>UYDJj%w!J6&L-aN{)*E@wUrMw^suclF!fV#u zda>yY$DBSo3}z+E^?@Mt#V!5b78Swc-wgJ@i_Z$HYv^f!s3r_)e}L; zIKgATtI*bSvIyg5CAlt6=q`fGVezJaV+8oeqa3cYc`E1D%u zH-Gf_A~Q3yADkl~GX1#N`1mvvT%FT4@R7ELQ>82$PXbV243Y^q|x?&oUef|BJH)mUj92AI!N6mLK zR9SgG0Fc{X`I=kVq@Tq9Txf7aRL^D9Zq|59yWi=|(F=-B#Nip;GPnlO*-lm-$+W@i zP~}8seLrhsT*Kz(=HdEW4Olp~;9!6v5?ng0tI1HK4#?r%308P%Sy|8jfEd%RdOc%X zioEJ*#MzYrV3QA#e%Sz2k|96`fp&-8!>#?w z({#EGSG9BU0F8R}FC9u)*6U!xkpte`v-^AC+yY~a6)grWL$(5@5Hb;!@Rmq^_B&&u zIvtNm2fE1&T8$E*e$ECyv#}e#mF6k~_s#llS_`R`&1_n(wi;xYi6&P^p^uc5l!!+S z(-r17Ia2YQ2aMG8^z>tC&0Te?M9as0e7wB(PV(~d2kopQMy&D`219<3{akG+v#oX6 zbapoetMtM*bIePPbSBe63WZA2ZyVi6&uRo8yL;+}-JYZ9`R!f_qWaJJ3Hh z#X$@XS-^B}CEW&RwRGAheC>~=g>svQ>`=QO!zJIQM|AYHRWF#5Kn7m-$Kb<|o_D-^ zz587?O7LcxLPFz~k7`&2;B~&rvU}jD!S!dHZ)SpCLnhU2D(5=rz`tJXpDbU53TUV&eXqgUSsum*VKRG;kD^%35$Wh!QIcP=Qm^X<|3_!B4o zt=vX4O7URenK#t$Y94x{Nb4AvY?s3hmQ5WFX32R|!5q)5wzrZbzc8iShL;D_=mPVj!`4n<|pDJ=*33p(V z^Z^NL@QRLJ!7q_;sC4<+rTp4VcsM6Xjv%(ESVN7IeV1{AMVSP+SCUtM|9Id2x;;v- z+4>B@K%F&dVAHb54c~LPuIJwI@#WjMZ>bc_8-A$Zoh+%M$R@K(=grN{sR!BEb|#h> z3MneIeXD$x8+LYwFwDcrCESV>K{~{%bzPQDf6D82ZAJ4b(~l?w^dw|go^GP>uDE20 z+^nDjD6t)C-3a8FHO2!G3SdwToE;n<;aFScAM*GbByChNmrDFy86&rvu)2Up6w(1*$e0wfV)u@u;u)!hdhN z8xLcyuNu*bo2t5^~XEpQ~-^bUSES^Bt zff0CMYH?fZe%;pGIAy~#6VPbec;Ig_cRxI8^tk$5(-SLpIZX2#whbK^HkdO8SI8_w z;KirMU#tbtXiov!pcBwux)D6eQ0cV899&XNw%T*_NjGx@jFxAmLPof)pkzJ-HtbXYWZM!&$ditm5M(tiYns94cYjS1HX1Z{ivR2V+Ma)IR3Qt#i9M zZ6BY8g*PZpPcJ>a=QjOA*M_Iw%qU>Jw#|xYTpYMX>tX2oKp^gQ*6e4A9{n1JHDfE? zQ>&MJ69PT=E35IC$n{FD+rIWqmuZ>+mqfQMwE+3%1neAMdJQtcR{TvLQGEQBTD5=H zSOnVO{dq#xmb0(1hNVZF8RFoHp+CmpJ*(~S456M9%?72Z;I#oo(Pv@0+SEv~ZbiJc zEtwNx-O{j@gnqgc@<}8eOQ~-|6`bQ9x(|yXY&%}w?Ag3&L7gQ{P>noW!wT1#gJxOE zxHpgU$Bb80X?s<+UvPJ~iLU)7=dn>X%G@X6tX_5A_V#7ywX3aP6ib_~b&GhTP0Iv2Ue*26u&#mDQkBh~52Rw>P_*7Izkv@Y0K;ncDk+wB zn3MNGfh|I3L%mC0s_Qm(zqQ&1IpT}>J+iw%@J{9^iEDM9pc_*av|jxDMf=uQ#o?0g zVd<2RGQph72~U4>`$Y@iaFjlQKave&IzP5Bbx8F14jg-g= zJXA)Sva9Z4&*TU5^R>bvUSNxrbv+E>z5cgUjwb`kf$)SLkZ%~} z@lfRpCyVQI=U*UK)h9iN2IUu+U))D3O(spwiZvVdXL05*1y>#4ab6`L%RnyUEjD_D zY<(U!-DHJ9Vg*D69E;~k4If)Y>d2I>30$aUnv`>R6+Ydzs+qv{@kHSHk!^Rh2odbpyzu%=B>~ zvkR*bID!04!FXv|E7mkFtI8RN<0Vxc7G;L(Ti9+q+jUFpHVj!HIW+j}xuk7>D<)_zr2zcPT;ZXtdk3 zelbzeR6tQU_GWjTVT`EncTC5Z9qFR>COfVRaO!l~G9rE*{xmFLSuXIkrT6hrXljcw zVyb2FT&c_HxKMJL)F!G9V>@;xi&XI9ZE&SJn+o3=kFYWJC8tulq7J}hNc5=Dxt(;FOo^y< zbKj(o>PaTH?#vFyi5@LzVbPJK5D`FyB*%36F1p9XFlf5Wfq9~|GQ&RRD^=R?BrfeF z<-XD!vyhI0ry8HB;KM&fbJexAoWoz@JAQC3ts}MlQW+d<{Fp8__czJor+1c4A7GHnVFeNvkg@e>`skGnw(^q4;6i(H9gT^HeE|UsK(9yxZ z+r~?4P`qn2!@Of;?4WfxZa;E6J``=-plZ=*#Z^64uWTHj^F<2M51xNyLWk}Y2kQNP zUEj)wsaUMx{1LEfx=x>w@x~}x3mx%zovhYpDhv%J8_2IzOTY~++RQ-o%+XdUwV1Dc z=V*(j{F+}YDR9bSoelpcOm7h_CcZ0Qy{5Qy;&sW%V9Qwx*K^(>5auHyl6t<-*O^d% zvzU^Sl6U<4T6*fcm62V`%p7RTu??VEV>mcC7&C{P0xVh=We^5)2zmB)T=5FBWK;u8 zUx5M7MZTb^>~*q7K=O~~f6nzAal)6-`nupCNY_x{D2EK!8~1DkGnZe9ag;!_Wdv01 z+}qKz{HOyOnn6lbci-RXb*pe#%wNxdVk)rjn;!^J2^Q`1U5H>I%ce~L9}_a!^GPGp4cJGQ{6l&1^g9O=vAB#qnk=9*$M zIm>5tNQ)V|fc0;;oNGf&_dM;R?A#MNHK1zjgFmwB^hv~WzB)NUE#B}wSY7ZwwGcum zqhp!!+Vt{rg?ly?WP{N^yE&sqa7{)EFftL|&gfh3Dz zc^?y*I~!oE{gP^K)|cpN>lhX#T|t@LUY)w+ewA8U#D4UrrvbIBcXI-95g{3wKOTmK z@qQSwU=AsqvqG-s;PwSLZq4&?@BSW_(8T%=9uCg!%=ykZENFNrGwfZ3pjV>*QmeK4 zG7Q*J%%8cr$d1A%hYa%8%^%EsR8fZRVJ%bpZra?AFJl@_mp~_xbboIc)gTtJvPq z(3dIGSt*jw_oEjnRLK8Mn{XB`zvvJx%01WJLL9f?j-2Iz5{dDgWXpH?VA&{`n3y&I z_w@s?>-KWNRNXVsD#;Ut%8WYz_e2dB4@hqS&=vfN>53>5BW1IdBoR4$`xF5?#o&un zpH3N%UiT|*>s66MXI{aqtStYBz47t!e83{KU!++4EuKboF-P#owMuooBm3KY5^A1O zSPa8atT&BVT3Ty7pC0G25bLyLrk0xyZ`9_jK@A1^M+(J}0V)pT=AkA+g3ZuxIH>z- zz<)z3F9luRYf#q`^=puQD{hz`85zkN-*LjO<0&qj%weN`K4&F{PWC^u08FyvnFS)} zyoi&H;*0T*oT6)>h!ehr4%Omk&~EWD}Iv^$pN zp1m4RQdZ1=GXL||WMpKd{#=da)%Q;oUuR+f^NkHUSd$l^gCQ_b?)}6r2@K&IR0$%g zyO0RRbuxD7CKz0UbMB#YGpEJ|O_;17?*qsD5wUuwu9k)(0sTX^&Sopw4LZj8!08)R zYLBrQ@wBQ;6p3vHchIpIwHhr)oVAt`OZFC<8suKa4>6vcv<=KyhjfFQ0D)7&p-YV>qL zVB6D98(Vm{+yRdp%I$ba2d6RYxhZi9CKCLxZo>%$7(^nS!!~xta}l9Z&k~)R3}r@^ zRQKU(3tKuZSNXywiIJybOB1ZwSln;~vlE(jL z){GnXaYHp^nrb~BxcTn(D}-kQ4s^T+2L`QoyTCsAfL5+czc^e-#$?c-T4%9cPf#A1 zF4qq|=0Y^rGEI!HU)wk^>uE~a5Fsh}C}QkTg+lo})ZM!Fobp_F(Na*~zD;le)ESBv zBtjr3oJNyNEEbJKv+Y)RN_C6J8;!0Tig#iO=J=h?n2T3`7}m3I=IxWtE{RH_Q&Ljs z=+3&;6}I@U^@X(XeYJ^RAs~>1O9CAG*3iJRYl;`lDc$J%#gmn!XW@!SzQBUq02Dl5LVm@0z{USG+$g?J8UXsF6wIX)so^bp$(88>EGSOH;fl5i5 zX?AyQ(kek#Rx$YrQpf}}3$j&B2q;touiw4_lWlTL&t8EL0An}<2dMVVUC>|aM*0<#hgoIif#Kg&-i0ponQzG(i$Dy zg6!}RrNkbX$DLfsl&9711hvN%Y#=(kLXp~qg`i;T`ujkn6G<&ezBgOR*>nk-6iLK! zvAf8>El{j@o@1s3CBv5&VD50{xihyEj84hxGy;VePTq=}4jewv@!H&Otvl>}IK8G# z4v0q|G?{ob0f>=0qzGsuBBFej!}^4?g-Iwk71ee>7(dPg4OCxENc;)-&p$u*C34vs z2&<{7Im2R5U+97}tP>@qKA$1SL!bl1ZQkcId_Q^@#Y#j)Wt7zS)x{HZBGUk$7dxPF zWK*y0`A~JybhXIh?CLrqDJ6xwXY-;{{RdD=iUmnvaT;V9-D1O=IBzeX=aeNcw^W~* zPrT6|tA4n$GU~9{=)9Q9ZEqu*TArr|=4cxCO45Qn0BXQFQi&rD4d5z91z5xeZ4fCG z+==HfT=)a#SYA(x8Nz{kK)GzQD@H|jBj!=iJ70pHgIyquI= zTsQY%KD2)C;ucZoLJ;bCIrKqM%psHktJh#HlIOKPi0tbdhNP!!<7#U!l_ay7V!rcy zbbGpQSnDrtb70*DmW1*j;k@`?{}K>s-Hc&BvP(zq8%HAhE&z40L)f zfx0$2)32^?_u6jd0>G4}4sC~zSivi6+9*{mKnTn6q}}B!`s3D!64gQx)Bk;(Uu18e z+?S^dqId5;f9>t+Nq4UNw1#3SPZJ0PcsC~pC6u`$%6$fGXOp@<>69O zkpTLBsnMYv>40Fv7XvInv{<8`#9cxTvxjyV$pDn1vy#YNIy^m1*!W*8sLHI7jfOu1 zqspR13lvyOz)4Lz)Enr8PfvzUWy7P=%bVO zqo3-`Y(J=b&=;hojh{qw7mS>4j(x8Hn;Io4DT|gZv;8CB5BOwU6GAOykw-y8Q;`@K zcfA5C8=_GgC2RETa-mdza?@C{nD2s_*zJSU!3LPg|AHQ++t?U|vDNNa5<)^km!$lB z^Ae9=Js-+-b1%n1nOgEy=hV~_v7Wy#@2@^0_&3uQbh`kkj@+A@8#f0#JH~{>#6o=) z92MJ28A8K6w+EnINFw_3zu$>9LLdgwqn+X#q}Aw475y$yLqkIw)Uwx|oSdj)X@siW zW-1$-BVCJ1PsJKgf?bYc*x5hU6iJ@iZC(uAj>z#jl| zP(?VOon2`*Shy)1=!Kiof?2Al;EM@1F2FY2>m9WL2c|M`rg_`A3cH1iJc0DoRogr3e+l!QY7 zTXyGzf`>VS>k!**Bl~)v#RJg*ikav{ z^zBRjSv(AK?jo=^Ftme`xvjmS|KXbEUCUOPd`XXx>zkYKgV+CGr;5p{4-fFag`;O$ z8S#~-1q zpZiA>Pb`z4e##)CxL7E3B*W!NKt~7NCvbW~jGr9;-H%5vFCBjtklnRF(YFMA=tBbo z(w*s=P_SRBfsvwkAOa`}1;8ql0nQQkJ%hi?Tz&lP7iVtVZ2*_`MMR73DH;>~*L(?m6a#q#>vxmVQQXp$ zZm*Xw#qP-YXAP$lQE1M4!pQ;VY>0mN){=nh)d)d=^US5o8W1*}FM5+$kAEe~!W?iYnc{rqS@Hl!*RW|v|>ngnZz7P!BLm+x;qXnijz(<#!otm)mpQC zZKl|PLb3jfsZerisfr3Q|6K?>@rx}u$^Y6b+Mj)S8lDO9*_{XW4&PrX00wygAXcwo zr}>7Oq`AQo`hkAUgVyjLEG(w~DG76_j9n=Mv_=D9MNeXV^L8VpTw0p3XcW-={5?JVr=$$FW7FkRQ-ogDp zo50Y$5)g~{#}>Z^fA?R4UW-FK1lV_XhX1=1r6b~2@6QlRNKye7-3)WNx@&?Ni4C5e zC3@8Z^7RrOzyCia2MwgNkf8r9AjE>ehmw*Cp~IWi9egxr4r#Bf6h`1_nB2rqh9J`L zP?nmFXI9daBp$%-M0+z1Vneghqx(QB>_U(%JBlYVsw0{wJL+4shPH}@ z2=n-V{(zVN-G=Lvc280RO7v~K#a$Ox5j!YQsE>ENX}OIx94A$`1(#9w-`1UWAE!P9 z5rV#QURC%}*7}BsdioLm`nOx7{RZ_h@IICQrm)5p6b-}!Q!&WuG()7^(&$F*=h4sT z0SkIF$Ir)L0B=TC=FNXc2=otzfR`K3BxZ3iesTl!=|)b4KtUk^Jql@**6UE`SP^{| zwl%?l18TKg1qBQAfb@O58lfjm68P_;@f`HoE_0$T$&wZ8J86l~!$5$3-kzJ1z#d@- z&X!h9r;zN9&dx+o@tsP(P*6KEQ!kPrOXnydWy4=*2GO96g@py#^Dp;9ZkJpnaI$sE z6++vW0T#9ywl!6)4h~M2c(v4GqmO&WpC&0|yHiT0wo<`q_<~J~yX3Q9n;YpU-jq0w zcyttM3`nX=oLpSq+$DF|Y6;2_s8U=M_g!0m1^B?X!v71`@8QK<)v_G5zEVAUPHK3A zKp@ruN%;fNcXuT~HB7N|^c>U>-h)e)WoKji1~_>hNy#jiwxe5FS=sz^ZUKRv)}{Je zZ7r>)+`K#&Elo{fhWMVm>FMcSpbW$UbC#up!@33AeJzy=vr((@6()@0;^I0Wq+9{5 z5svm#`T(Wk8;7{~t1Q1t#ZN5z1{nL%{dX8i2Ss-M6#3_fqa5hYKHKIPrFVO{ySruZH#;vX>FQ07w zs#0FJqj15JR0#2B>rt@)fRV@@lCx<0D2MD*&(Yio5fC9;fj{Og7R=qN4d8&2cK%go9*;;$;x{8WFeE0KrpW%rUm1|`}{7YV54b5aI!=-bP4E;exfb{lO){m;| z{|jJ0Wgwn60lr>y3?Pm z29%)@a}zh3Z&I8f;TOFxEPU`dDCl+>Bni%b#~X^E)OGhY9-F)<$h(0Cp{kuJD8BKs z=`(=G_kikA?WB^%;yIS?VBrWj)?|S8QZtA@PjgdbkPo&{^caJ}2ggTR1dIa5^ z(0np${MlsJM;@A(INS}+yJ3;s9|PUb-q-RU9vt{Se)~WG{AJ-2>z);m;=Tj$_nX-6 zpVHji&*E!0ioXEQdm(P2ahL~YyBX$do2GF=G>2YZ<@HSfOy7W$H#d1e%6xbNIF&1a zI?W!WYm{|?s9^-8?X{Ugmadb}zwCh-B$ytxSt}wcBjaxm=GcAo-G5`WPEJU;BV7*8 z|MMwe=w(v9i%Y{70r6kG^%3{CDUOuI^eP^W{?nm^o@NIxi%R*U6B2gw^7C8Xfy9W_ z>(9{1U8BmZ(wGc?se^-q<3+!{O+Y|B4Dk0~M3=O=`tE7b6Y)^d(!PG(pT^4yvdCl4 zv_4D!gYZBQ^E&}1nJ6l1YPSc9amxxRJl|J@7CN30!P;H!iFlYBaG-CsYKXs592f*# za#B)i*+fKY2lJ%7#KL{&&8M5*dM_OO`tGzbQd|x$-`UX6(9H!%&u!zgEL4600Rh1M zdplmfvOVPw+ z1EfDzw~VFvW5AP~OjIka=>ux!V_-D>`s;yxVH2pP07)`S_2a~@tncBP_Uq)qnqRCU zB8L^!pn7__63P7fo<;?edDBbH+oNH5AAsbxZj8qmG*E(lkGeK?S- z8JJ<}B&9E)Mh71-bYE|6fXSIih~3!?Mt9=$?}YX-(K6?<_uir|+U)6#`SPqikzGIk zzA9jo|3uY|4h%S300)M<&m3pyCExkQ#honqpg;wnuFINF6rl50Kr5zY=jShEMF}>3 zuo!1&!XP6f^D^J0(B-0dvDBImZe@eI(0dsLjRD{pSeO*I`49I2BpS`;(+eC^QUN>G za>7Ybf5}JX*hHusMVzR>ne^!&dF87Z^aAYCcsJNQcX4&aoyaV|D8$b{2(*W6w4yop zZ@xHQ5~K{^`NcU56ADvtY0Zwu8^ka zBQYx!1gHlU|BH~SQwpTD%L#LFaS4EfrTajVtw~3vgHc7LSS4*8*?(1khW#}Hz0oV>w+-!Pl{((7z*^H)C4^M7b zp{Ap2`~oDictAb%knwzF0zvo?P(}yZK|#XXRU9*2I$z=cZ%DD9^sJJ0k#hyUp|0jq zxxc-8BzLSiuWkX^j~^KK2fFBgS8}?B&H!AwG+K+@zV&DVOvr$G9Vp!3OsoepG-3& zNSO7iTQ5J5;<+wWO#YCwde2*f5~(gCpiTQIixIt_OME{}`BIgL+t45&zaMk%F-;ENbP z88LVPz^reTk*K8hL&n#yKHFf`d;unbv8Zlf&YsK$!nb_`$a5d+S(C?_BY)Lkz9!WD zM)PCMKF}sA>JFZpQr8~+AWKQ7%J@f6rMRs#<}U@R@}Exqy1?rLbyiht;9&_Fwqce; z&+aXJ~iZTuj@!> z99CoiaNLet%vpOM@Mq^7qwEdJQ)Jz0{&@MPs}Xl{G+aRz07WoSha-K!xP5t^&?-ARS{pC z*TxrSi95^D;V)M4Lb;LP`4~o%b7Hm+;@ryL-k`lFT^DHl6;vPWOf`eK`KfESw|#9^ zG!y6gDHsSGU57y(mv@bLmTlThP`$=v&Fd0J9}{d}ZvmyQSmss58vC+7QvOenSM!C{ zC4{crxHvfMTtJ|(0TvBTc*G&aQ;$3E-@li+ zSklzi&dtck=;3QX zva-JCy={C6Zom;lKV61PQx<-@`_OGZNvzm#o~vxo?O~N>o?iSpN7NwDUvlJwT9sPE> z(IoAK{(}6?aq2R`g?1n))nf(?%N=H2k zoK)mp!txz1AlA3Kr~cx6nMdl6%2DZ)gjINGB%VV;l2=-Ex41r+O#4As4X7JlVT!i# zcT#XOyKu`-u4#r`3>L~S0sX`4H(2ktcLNw3F_n~+&#r+5=wpjjdkCb)mJV*Eunt!6 zt(LCt3tn$iYi3`HQA4)-3Mw~Iy*ernqc>i$eNa~E1M%OzI5k_%(|~P;b3E#bHwP^P zI0Ul*xg|K0BhU?e-Xj-L__>DG?)G~y!maf}jWq8Xbgj(FqE%!$%zQ;GGEID{4??1H z;LS2fWV+qW>_t+@TZn2mV$IBzGYR}F)Z`vRHyD+yxcK zFGhU+{&z!4{9PZJz3XSIdQ|q5><+u4z^Y4X% z#wxRka=;_CJRaI4LYlKjQrh`0AYk^O@0|_l0 zHLDR7pK$*#&owYbZ;rKd_oyn?QQUho)at3u6H9C?@St#IFOhFr3usbfn9~oPn$0~1> zUbY;s0VD2j;4F6X8Uhn99IOl5qc)3Z9g)&J(EHfSI zWfc{sKe1__6?{=Yg1+q7pS?{;3|UaS+rD-uXaJuUl{(#XeaC^u^6`@=Ukl~S@_Y=Y zZ~vDCSoE)MaY~9;Fq*!nqYOckpKV#*UP4fVUCc|GHyX(9w9sf6UjW$m9J*tuCYRpR zd<+I6XssQ63XhOPaa`>O%{vFfaq1vG_~7Q~xM3f6Q7w$Eb#2*`^}C1mG;p3a-<)sb z0k0MDY}m>G%Ux9MBYj1$bNX4`Armm{*V`Zv*qE%}(T^MF) zLJ*HIG9WmA@w3A1QFNVp?j#UXT8AeJTLM9g*7^mII^?hUEn@fpqDuwh(D^F0=pKsD z8J4QVgjCu4c9bBFw~w8*11nm45ftGmvT?e*;0Lk}vVo+7-aN^_^I+d-VwaZ>x(2IP z8lU~oMmtYqv|9ORnu_r;srq>l5 z&DG>A0t(cHMg|5}!Xq4qp$9h`rHQ9xWL1cn%)KvXSk|e1i2-*wjP{LRC`MVB?|N4^ z_hX3;f20h)7kx(lR%2u1R!_nmzb;@gyonXscz6EEZ!G@aVuK$j;973aH;$P*!i>F) zH-_VcyIr+qY}#PcE(ve5vt_U{yk>4Au!``&GVuDrG+q23qqMcWKfgCw31lhOP4xlB{#W&T zwR21nEx<3<{LVDip)pYyJD~0)Qc?#$s}yNiO3u&c=$c>PxltTh)r@v5(F)siK~3M9 zD{?a&<6gmX`+u^;!BE-1)x90UFcb1mZ+yw5jjrX~jLsz}POe@O6^+ z^DMHmH^*R+emel0w*(P`#Nf?o!1wOB0*0kGTc66u^yOuF!CT_cZ*k(Iqrk|>R)39r zTfH^O^Ic80 zy$%wLO6y0Oy`QW;%uvR;DX0aLV5#)C;t8`t(sctW&ufa4`lK)I%UmE1*9wJLO!Sr9dTrM>wDEQG;Nma42^uJB(91ThcGX(LRYq z1eZVAF)Q3D`OeAiU*Ie#D+l4f&n^g&gJ>2Pm!Ir*soDEn9uyN6XQZZjxmjDUOp7~D zZohrI3P0IfYV$l=|KT;z-~aFD16kUkc!8$D#DzyDRbzO+UTsRH2*d}BcKm}8iOSU! z6)mSUH8pWqE9@Y$_p)W_b;>wdqfvjR@9iiOquw3vuootEaV>*9(tl}BC2v`-nNeA= z=~COy#j%L9L@>0r z)L&fE?!NF9C{}QFgofM-_srYI6<>#ivi;s&!q8;%fy7XWv-pz;o$|&h;0u(HlBi#F+Xo% zg|oj&nE{6ujF8)quIEcvT6*fxJapYFPPi^kScRAc zlb&Hxt7B?DI=N0>E;(!K7-yELrB)sf-e1>yuM6kR8zy;`!Z6=gG+Jbqs%<@IDf{}@ zg8zp9i~fAkHsdBto0G$yE@W@LwM~?roG(>R)CL;Z zZJd04VvQd-nlwYZDUGRfc^!#O_BQj<)4#m^_RZgo(pOi}R6#5f!AnbH?6aC%dmXbY zG@X{6ZPMmECHU?`t<@2J`w>P~Urgv7t6ZsVsDgjAaJjQKp(!y%mTlaMN{(r~unIHF zq=0Y?9-cTW=-IOdpoKP62ATYvxTYNxY%~6NUg6$m9Y>8l2z#txKNFB(DY@8*WnS;| zZ?|=ge)*2Dr{L3WU0hwOeWREK&v;8WVvX<1EW+V7W!Fg+0OLx0jF-_28q3kAq@)N| zef$UuW_FItc8>Gyr1feRj)?B`=*fzq#(qlcuzlJf9HHzg=LQ{;ub&@i%fYBaTy~+h zC!g#0S&(Kq8A%UI{mFAYj9x*8VywFxPYlc0qECy{y7ZR$MSwWgg2dH+;Lv!(Q*S$O zgub@xZZuKQb{a{y@T$h61nJ#|d~l?O<;K1wM-9?fhOM%>3V4)Ehjs3bPCX@NcqiJ! za-q02X!sS;A|4jrKQOT6c|59a`FiWqS8p?5QGEpu<)a{BiO5gS`c$ikYeGsWhnQf3 zWpLVcyoMr@&y#Rlx;Nu`t)oo0c(M6e<{svmv0)eC$%5#LUzR&sG1rqqjzG4#*?BzL zcxgjIN++btZ{%F`eG^cu`IdriZ@}1daWUTe0w0 zc)@ul81*{!W@w&7pQz_7J-t^6-Rl_Wz*-Mjbr2z|?D#zR(y3>u$;r!j5lDk|cl7i9 zUiL)e$H_1rPz;$g1JU*l*nh(89k7D&uvh8|+_J5TJR+SZ6m)dEM+9;CZkzhSE7#)$?Zng6)fk567}{a_B$vPVZsSpv-32f~+%l+Tw*7 z#lKcyX;h2!?3eb*6TH(u9=0&_@ku-Ha-UX(dP!FPWf=~z%GQN?QnyqS;Se9jZwP#F zIvu_a4-Y4aW0JkhnR9Ix*iB%1_C$h^sggNYbNXCek|T&B*Ff@9B2x1-^(zPSA(sn9 zZl7ch^?K6p*E0s|dso*jL7-8<4}IB_@dc1*Cd(}J(^#oZ#T`V}Gl~!P{L}}o@DD&V zoZsrA&>wVw6>5Y)iXfI#V?)UJEvG1Qi`e*d4Zi93ZqKqz>R-i264ea~26E7C51moM zs6Po8f12;^Q>`BCv)k<_%8gH$SrEU(AtQwR^SuA|CW&#{)n-`Be-E?<$*eZ1TmMp9 zE24y_^pU-VG{OW@7f^!1An=oj*mGnp5-a2+`k0eLUY*e+?5%#{)Y^d1<%fH@6jBg1 zz2uWuZSqs4UEG%s?JCahT6sTzPrCS(xsJbm7;lE0gt;Na;g2jQJ9}v$fK7Q3Ew8o6 zg>vqJtgMj; zWFoT*28hfImakW^wl-cN*vN6b zJJ@TiMDM)#^QNQ_+3FY@J#e?l-X@5mzF!=>6502e+Lzha1=w`7mMM*&78bbVC7#DHe{z7dPDHp`5TvI z;OLVU)w`e2dy<@f~!k5P=oAK&yHLFlk&`vEHQv`GOkXrkX9|6+?$ z>`KB3#6!Bkm!8Xdskx-zHFO8=jUbjXi9xVZ znmYbjkYZUbU$E(jyLot87s#g1&58f*ffA)8c_GP2Pw9;ADEx3raUtXoF6J!x9)?Hp z0c0To3qaAxLXZa7g$kF)3a@u(um^JvC<>e=s8~kL_+W2QbU%S$U!8^@CocUr_oPyl zdxNc%UEaLWr#x4&>AO7H4P-pi;1y2A4> z`6_pd1g2TAOtDkdn@Oz-x?BpEPaSof)QwfL6laq-UqdR&fSG@2Hm^HEu#EU{9yA&F z^zajeDi86S$iPz)*S_fXfbhTvwBL9p3xk9<+8y0VRi?EjR$Zaw4cVK!;`wdsl=m&_ zo5GA4RCKuAA{A$ErA+9OH;6rGYA+O~Ki|Fn)*qhDF|F4TN0vI_w0geN9*VQa4@%V$ zQFH|{`5?GH`uV=1VD`3~#?sC<=he5W&zwKum-uoawAH)BuFG`NEDoMZrcFIP-cKNh z->2Fg8ksJJZWe7&Z(>uoKFu#yk@;l$XLlJsi6WrY@>}wz`Q}A%j9KP9+e}7KVMlqJFTkN=#Ot5kJ;6H1RM5)m$kiQYMW0oW_WWj;4#$bO_h_N2jLkTY*_o zg(WE^71P>>NgyDiz3*B1Sz4fG9QJ}yFLwhQV$|Qs(p47UuE{~U)?L)Y0Vz+dCCgU4 zpY&XNHo%UuT#2gUIPy@Kd#?O!d1GvdMaXa2hazvgf6VKgG|;z*PXgc z&@7~_!Tu;y{>Lo+8Pg;%h)jS&l$_&Wmaw0`2OfMh@H54W>~~K8t}jj~xOAq!fSQw- z(t&@iiL-=pzBfssO?y7h zBJ%=q8c~wH(piqtOP?7ulz3`wewM0}3}h}pNtQUM{+hMgey)1W2=M_W#*8-C=g(`o zaMSV+$6_`lb5`-}rFjGtBxv?LJV+{ry{}_JuO|6r(g?pKz zXT5enunt8$C~&8+RD83yO7tmo(4PlFj_L-knaYG=!kn%n2(FyJ0+p|4{B7z8Gsg1~ zw~`AAZ}Lc#PTq)4MZR}Vw+hQcu=r$WOV?)p=QWIO+>ov+j|r>QR@WE5PvcqPZ;S~!iKivvl;ir!tCse3J^o;TY?=( zu{i2Tx2jlJ(&YQj!8JBGsAB!o4^z;j^lV{%~w<_x@wGpoV zbxESZ^=_XpyA-y@$Wvu^B{ux()SaJ9KHHF#wsPp<3#CV-wKI`!Cyf4LNAdPP5FhwB zNNI8!mi~*KttJkWCFI4r<;%DJgekp|oSG(s~=Vh%^eCbBa;qWt7VHz%8FVtE**(5crSz}AL1bG{&uek!y!*H$NDb|h5Q_IO>8^o4hE_1@-(ESfLX4xSlA6#dM&gj7Y=Mn-)CjDiWDyT(u#v3D-(Z!1PbSTG~Z z3zC-{iJe+N04aaBt`!-Xw4X4l9FQ5jZib}BI=h!PvkPOG!WQ4}H znOkv>neHY5Ty!Pn8xVI|wU)k|8Cs9Zo`?rQM1LPs=4Ku?w9YBg%;K|G`-FbIBKPam!R|9tb7T7_P!2--TbWp@>uE*}j6{xHBB~~= z>wEVgT|_PkPSIf8!~SPe_h86|DQ47sPEjK$e$Zv(QD7VUNE;H`;?ahgoKStdr-QhN z0}|-X#J$DWp|3v$a_S>y7K!<_$pr~CVGjry+X4_pZo(QU7WO4nyQja{$ zB7_~>UWenBfi|(4pxN*}-8@O*)crRc(`?g%(OP7|Hallp89ke+=bS#d6(feVyonn; zi^qq_RgpJ0_V)HJQC0jxLTw*`#G=O0Uk?lc4*e@bUk`pj*O9zGF@hdu`4>~NTNQ4+ z(0Q5PFktMEDG2W&ugli;VC%!BgtObT2u07=I*oBTTjKExJ?#@20n_@R;>-9PZ z7Z9eK6l%a^l$AUUYB>I}8+kLL7}yXQihs!ge5X`uL07T!%tUg+Bel()uioh>XIYLv zo1jmBMVL{OaR)BSuId$2;0d(6$)99{%*;oi(0nFZwRK2A0~H^{_hq_6J+R0xKJzQK zgg4y8jyLKZ)}TW6Mand3w@YTjR|K`OQ}UaAIRz|K=pS6(-$GXFcRm$6{R7HhuJr&f zJ1cU;L%Soo*xvpbvbtRorHFy*dT-_W5TIv!8*^XxI5IA-b*m6`?u5N|EMcCN>6e79E7dk9x z+z3Sz@;kAz(pb-sqB0O4Y(E=lX=aA9Fl4tK!=VH28#6Z$mi!}9N9V%mohi$6MhOz& zUpkxwB$M5TwijFLfz>y?=KrLmqy_*3HvR|1k)I_}4~h0$M9-5g0!}7wI-0|a;-L1y z$B}q!%an6*$$QmrCz_r~OM7J{NG%J$P0V8W*hMWBoWD8F{RHDyx9R#LrUQixY|{>Zm{# z7etXAJA-GP+=cav#+Rm*E`zQTWk=Db6TsTE|8)@}T82k1H|5brYliZuTsLgt?PO4tRi4Z zKG0q$3x_u`ds`Cn-UDg`DRUbKgp%GQy7_Jpjz;_QtK1Hv9B|^~{N9J6{!W^h2U*{@0~O?LPsrKbMU zA2U7^u?_&0a|?hy{iwbUJV)VtJ7v$D)7R6~H2Jq2N>YE($^0(~qnD$J7{(h7P2J0^ zud6#_<*`*T=~dHXs;>*UCZ0yACu5ulL81j2`;XVju5fE{7;{iZ0zIr zlJVf)Sh&f?x|hG0Z#Rr)|9mzEPq|OdMQ+q0a7uVIzxum6{35f4uqc|uu2XbaxoTP4 zu4Cn|S4%vz|FexCd;hmC8Vvg<=l9)&yF*W6D|18&Vwbv}h_0+zTG|#kGrlZf#couY zbe~n9U!eKlCycRfU|hpV($-#E;Umgmzl=SOn{`r=Co^0w?=VueQZsT7`Wo`7mDcD= z`nMGLAyYh3qA=?WnNGYMaKxO=a6_twLOG?3O#-dPtsiAmu}|T;CpDzh(Z&fos$Zb2 z7|x)7K>JYrXFqaYeQs7}y>_yl@-y)c1qo0FQJ^OYy#zL_>dNc7Nk&~hBtI;ev>sc! zeR{P_Ie(-1*jmAj1vO#n`>lIDohVr(d;hD2j!0F@>8w6z#rf@?gspp@vwTV==q~tD zJuvX!Irxzm?=q4)LIPXC+?+MJY*Lg=4Mw{PoiDj|yMJsYqi~J2J56?2#rdMn&xXY4 z#;|5Pk&frYtQQcRMVq01Y;CR@^6W^@*(WmR3%i!zy6MlTn-}NAGfJ(`IMk#Drlv!Q zY&fDY{=#64H?gWC92imvAn=&CO0w|M?^iuU)SP0UsQLZ)Tf z*0$}zSrQK%`f-TEg}fI(xA`z~n)_13u%Q}gsmbBW*cr-?3Xl%Jnhb?UGQ$bPVE1ny zVEr!(kY(LEqY+FIEKJHZAfAG;UEam(0Jkezso<st8ljR_(YeDEgY`UDvmFSyRzG;EV*SXPqWNwW=m(kw9);k~PeJ!RX5bQzCA zC|}1)XsT-R_y`QGWxA`Lt^V4i-Np#>x}$g9VeBX=&~pFFH7q zT^j@|ux${aQ^1YE!-q!?^O_YzJbbae)jMFtqxSm_^}`|*_L)cpM`=2ik(kb`RLKpj z1<6> zE3%d3NmhJ)@^3n{hF=cq|7eMf>pBH zViby>7Z`P*s5QBldyLG91&=#olOtv4ORDeVqv9|-bxcP8uG*H#_11*>o>Eq4a{3bU zvo-dR5kE*#%=k;8yhdrmn+reh(lD}wdf|SmE}9{+jnGa|Bm*i_5R4yHfV_^u2311D zE`5bDHa+$h1X^ca&8^^Tne`ror)%;gvd1HM#gp(W>o+apI@Az%&>+;P1 z98|;Cth!!INC{(u!r;^CXA&0eW-znSfDXtJ(dAu&gPxY|s7bRz$2WTN(yCQQiDX+Z60CFt7$$U4t7if4JbIHNSB!h;ff#KRQqIrUKVe+Nd~x9f5UzK3VdkoB$Z& z33)A##H`0E-S)fGTKDRT>61zh-=;|*yL=oxB}(zeW-5yN9NUXM9eKaAwN`lg$sGcG ze0o4STpJ~(STp-?uh55cW(MGb;bPN2pj3YiehMu3TxqkM^(;D0^_YFOu&ky>dD){Q zNi{0KhE{9iNd|t_jYwu)ZYh&kT{JeNugcxB*;=*vdo5?NkFlCPa?NrImr-w!!WFrV z2S4(`zQ3BLC1Wj7EezMrvzg|RIDu4Jmx4ki1|5ea+u1Aqddc-H+j3%hv5j?wRk(DG zo6OQLzKD8+oPu$dSf<4fhSjW6i>u`35W8p3+LnM<9SU*>O~x;=m54r;LDA?vBZ=u^ zZmz$5&ebgr(8o#?9aXi-6eeL`&lmmW;S)M}QXG*ya|zEaVg*Rz7LNJ6^WV?Y?5TsgV~x!g%S z(+Z==IJ8$qS?F6HsCB+b1r2J(L1W$PN{xbJKoI(BdwCtLfvkLI%Mw}A6{*^(HBPYV zE^4Ae&2iA2Emmu&Z)|UQ(4tBbt9xNI!s@jV%;>ud=Gz$yRZ=R)D52$>&9@mG7zmd{ zU40i9H+$=ofrr~~jOb*{J87Vd$stMmP>3yYEz25x?!W0Y9vn5{Q3vt>u+N&FvNulF z0DAtFrl#iD#}6OM!<(xc2LLhj3$~L&Sa3jHJ!Q(c0!%nokUAkR$@nkCtvd8~ zrb^qnBSy&avy>fmrYf&7e(+!T#w6kybIwz6C$*zc#4x+j<#zJUA>_Okt+-8*=RO{5 zIxT7%vx@~&x#P$z)%+Na?|3#s=V9*s`A801|78>*%eA>4OobhfU75*c-XrHJ0slj$ zHbS>JRtX^qdt1U33QcQ_!AMFAhB*Q?4faB3g zaRtCAM`IyJ_aHp2Rio6%H|XBqCwd@Rp4u?w<-1AW3oCWlJWI>pz(z&`t!eHal}P8D z$X_5S6@dDFXPod-BzutjA^35|xq3d(Q+f9@pf}4&Q`4s_btn(;IwUDXsh8ub_bChK zO#R)A3iokjw&<^NYq2T3Ce6)T(z{K!-?&Xde)akR{qiPpX;Zta94m|L6-HaXW)eIX zVW2sF015}s8+Mzv{yujG`Bsn1erdkan~x9M{s;gMhGd8JVfcff?{dk)+P3j$UwVRi zUOgzYiI?8|X1GR?PdPOui=P!>!qdBz;Ge|^jpHJCh6s(IOU^1nHhN+TTZQ~q=ul3E z@FNUS^wqS5Luc#@^r{-U4EWQ?OZ_XTo2#J7fIsCCmrc@vDTN)E+^dW5g9lO13y1;s+5N}G`4r1{O=x;=Ha&q$ipBzKg75Yb#EBvOG@1DT>^b%24Gpw2iYgD^T^BTKg$>a3H|L$J#EbvC$4AK)hI2z+ zFH3I|@Avn3bb})FT7e3Vxp(840ia-3#{ZjRh8F3V_ zeKGOOMFv^VYQ~2S-XRB4Ap8(o1rY?@h(LvZjSC3@5s!5z@lPU%xO5+|qKS)W!X+P}{J0i^W>V+lgWhea~zTPsR%C6fQ z-gI|J3JQV>C>_!w0!oN-&bB#IX7=g?&hfTMA=Jx5_?LqtE1g!$;JC@OQf=SwuDIvnBeD!9lGg2^z zH`Tbi_?W!?MKi61dd<;t@}&>PcxKgC+Sdx9noxkEno9#T9YTjKcF&o>T2P*nnM#|8 ze5?(chu7E@6A;>&6yA)(=&Z`i65b!0=_I*R-S`{n4hCgELWySE42aZ?&(I_m$oNkn zFb8AdVpyW#BUwZb--{tl6GYHD3!4#43P{Jeu09@o!#JWl^$f-9UEVf?WZYam81gVp z7Ag0c9~G`w#V;-`{RUqjGv#bz?q~@#6GGg$do{DJ$FHh-5a@_+mj1ul8x$}xUi2z9 zuRmsNQ`6)`~Pr=h!XZ9yapmi8D!BrSuUYbR#aeH6Uh+u-WO703cP{S zOVB{b9H?P5FaPGe?S93=1qk9P&8Rm4>rx-apanLS%v?bp}XYtfO zi;_d4ujf1uIi})=$Zt)R_;#U!e-;uV^;VEPqt^~OhGVWmBJ;s4wsfGOL`XCFh}9_JcQVgdslWfr0YW3aDWzh= z=Y|fl104@A={Htza`{J9A}DTteCoc`~Jh}-5Q{LtB(v0D(2?rr_ysRz0PtJ{u4q@ z3>z)Yii{%qwLrJFR{vnljy`Eg<+u}5&JL7-M)Gm(vPYDQARYff4r5GHyU_a%43v$@`d4xOVX%CSq z2)G7v`vN2S1A=$Wf)hueFT<}4OoVO~eEIT=9*gXeakK49%E5RQ(^o32%%>^MbTu>M zhm+$)&C%0cDIxy`aT7*%GSFjme*-?ILO5W$Ct z$Hlt{^-z?uF9+2PdeqvWkWk%Y&(anY*ceZu|+V&1-Q1lNx z=oKZo)BKsc)27kSI~p^+>3zHNsA;rK;WonF3?qcKzO-lz&;Iq3#A?hHHZ-X=dMjdN zEE++kHhCvcE?>ySKs;;F9N{B^oYhrbsO!IF>cd_1zSlV`t~iTWDjY#thNN}8r{`26 zC+-!f!j2j72F-4DvG^Bd?rkQSL?V~~zrcE!2T&@E;S|RyLxP7o$ z+R8$*YAu6wmLtM)pPFeP%@I9Swn1_I@dq4I^Nv)vOURHyepYRHL9_CK+&k{yUYW6L z-kDAP3f(&uYin!1uO}`&5;RZCXG|pGzUm;kDGN2e)ew)IArXK=gN4mvFwvNFK{p#AGT?HiSH%j>DGOQ*tjh0&^lK zH8l$QRXXo65xN2kQ&atv*jWAxKN1N`>yhe;xVQTj*IsF0k{92SB8a(Ox}MS>^dzN? zG)IH`0zv0`rXc9t&mKH@wa+VIlI%y6M3ZS~6?N(DkbU#{M$^mojnOl?sXXN-yv?Rh z$a&VHR|;2YZzPJf@IuC=z(klRq(j_8!XRAu_48*dMJ1)KlD<(F2VFKu<0Pa{=2`fM z-9nsWD@2(tzt&y|Bgy52TaE2zXwe2mm6sNN0?Yb~dU8B@L60$vb1kNZm?eUMsg>X8z&$|lj)NY4!LDp<4si*d`vQ2x+ z8%HWx?u=blRP}IwnzvW+Ix)K9&H}}-BxebU$3(k20(sEM;uN(P`;yT^yUnmcM0wI5YW!v{K?_2_KmEf z<}-IQhNF9~Lz=bgDi*i{RabDjELC)-zsgB&sk`WWVK==rSBtCkQ{{2tdSuz;Ombpk z`404Wwuvl8$)I@sUlIs2P+Q*V~QPOSOCAhhO=akf8z=X z$IY9Z|UzoZNB z(#~w1AHABIny;FH!P#R%0i8-}4R2$PvfK*og_o6go_K%gqQ+0aSfw%YV9S&$d9~EL z|B6Cccy1oh%Okt0y}0{axOrAGsT1I%_6y9ZTexrK-4Ye%g-$>I? zOZNT4kz(E^HWjqa{WFqtg!wj>{Q-C+8W;8%f6$V9!vp>Q9H-ab5QNZgj;Gg~I~m_J zGon}nTR2-;4C95YGZEM1c)RkYI40n$Rej3M&5h*AWLsps&qvsb4#*;}-o48Y5NW{> z-sLh}-F>VSk>*$(bE^Wt(>;~3c;`I4Y2KL-ZS0HjaF_wdKwaJREU_!`=R4QbPfcNg zjc>%$g9;r!n3ctcntUDOze{MR@}ti(J1B;C(!PO+2p)B~0R(q0Vq#8A{z1t-2?z*U zB#sti7|}}iO`6B-12m7Ixps!V4VRjGB26x^?*83~7jmzSBv&bu$*Zt>iT%R7FxqfM z508|CtCXH%mXylwUu^q2*!atr-Grr<47P;m!+#K20lQ1^pJmj0R_+2XmsUcL>@ z_8S+;Gr;-M>vNyqlD8BHajRdx)}K>Q{Y<+#;>DXT7W6Oc$@gW`9?<^Ad2{ba=gUgp zz7MipF>Ff5vo&xArc0$pHdz3}QDp`QPxStqCarn!yoZr$bs~kvH=FQr z5KX*r?4H>7wfu_Q>-7FjjXz0oQo{WikTR0>L>KHxI=F?g)G z{uwZxE+8Y&1Cw6`H@=F0rdLuS*D@`t&cg{$f>{!C9R$2Hyy|9>8e}6gzdM;~CTQjDcwC_+z?2G8M^xB+aW^)e*WT%tgeN5fXAeeR#Vn7H=UXiSAhZ%e}9LN7}~r6M47@slYmC_CJP8I_|{w-iy(4 z8@F!WMBimnbo+T|{povN^b&=T&1i4H;h0xP<8 z37=A)`(MB7&m6m=YB~7_XZOZ@5bVGyfC@%m3nTSRZES@}+!~r# zYJOcc#f_I3Zo>8Nu4%1fAhUAsxgqm#kWLvwvrPLjO6H@TYK*k%0A1yekB{qizn<~L ze`uT)1E6ypxJ+hrL3>L{+OR}Y*EVP#3579;6qmn$KSyoSS=LO$CJFtK<$c7Qdgh?) zy9%x4(GL!#iF2hRyIB*51Y0q@$eEMuUwKs=VxHpo08zQ7w3ds57^%K{fKb!Cd1y4` zS3^-lPx_mVoXozRPq7iLxsf=is;Pi^53QCZ=A)wjEzH3mY$G&_hxgkNG7&Yhsg3Pn zhg@u)sm4(tS#v}^y0^>uRjNTn9I>T{pm~^}KTCqQ+S*PSl;Xa#nQyjS1%dy`)#H0u zojkL*uj0dYk;@Irpf{0dZ*TwQHSzr0l46Q*UqAl@MK0P8C5Eerm`@HNxQ3FgnA{;w#wr|let2lA=xkuf9G*=;Pu*I>t$uVe_Fi6#ifBD-WrOA*eLGc{1*ck&x-@Tlt z0yX>N)_dXM;lIOqh{)FPR1fE&JiBzs;6>jbRQ*U=RlD-+H9r^!8}tA>qy^Fozi=K4 z8k5UB%?x&7V^qJ*Mz+hPyAl(ZAhzhB%;)yFWQY+rm-Lod<)rv#_!ZbT?&22yYHXyd zsXfF#@v6sTr!n@7G5i8vkdMC={rDp{*?~__;pj!kdCNz3KD^{HPE0t7$xv6SzDX3t zSU^;`%MJQXoUM|!mJT2HJ^o;y24XF;62sR?xniZ{cUdT*< zcv;&N2dQ!zkTt;kdyWz<_fRy$!RG$k#T&yyBL*f>JE9W!+#RZaay#Eo{oJ&%H8bl{ zBXpcz_^qQ!jK$rczMxe^wt%Q6?UGztzHE2Si0Ija>(BQ&5C%OzKrtqL26qF8xq{J` zhs#_a*{$1QVf@Plu-53!H5AGIir>^QTx%F6eceMhO~TvuOJ3et8sxrN4cVkz80P3~ zmy{TDLpTi0{TLtmt%z4Om3MV?IMFX5444dQxFc@SP}A2c-hEbg5$&EX*SuPPSg441 z()pOFQ=s;3Ha>SPp>}oq-AMy}&qWMR;vDHWF_VD&`YzJ3wku50@*6!eMAEPtMn&a+ z$LQR@Be6efLIf+FNnKr?Q?Qwz*5tBK^E#gF$7Z9;EuXTo3iJyNrN_Yrk4AtzTsD+i zK;M1otutgfB26?USBQV7W^0gNovQ{fPRdyd&_@|~!M4@fg6OnFMr+shlgHCniT9~P zZT7&?5{Eto98@qTZ)*}3YR-< zW{=Wua-;-di`IE)SnKWMQ!Sv$+(K%egy%1GL2M!kNo6KWq}cW{^o32OW;3cRaJM|^ z_=b>@IW2G00cqPpWjka&2c`%xm#a2 zs`AXF?&QRi5qu#Z{phwZ7 zn_@QjBFvIQ9(LGMNl1!Lzz1BCuiuQG&s=c`bO(khFI~Fe?KkT?2%1E<88sbYlwRop zBC4LR*L?ZU)Cufz%?)w!GNZ z??We~i1XIs%%5_b@3sS(U2H!aSwmTQQ>0VWn_bCXld5Fsqil z2<-914Om{|V&LwvuT$i1gV6BvqOHZI;;Y-l+D;n_t!8o0WQ!eu`7r`ITpv@4-Ml+k z1ENgqXid(0Xq-&c_g-J;Qv6@&dq{4kKxjMp3!o0UK-ZS%0}YL#8NHpU<9yAs+UtWv z%?UGfCUJBo_tPIr!dE84G{pP+8oi3RxN?kkRzDgqE;v5{l=m}%r>a?YVg%RhFspQQoy*2UqSq1@2+pBl$5;(zZH24 zwFQXz<`x$SfBW#M0bSVWMXRt~A7*{$2idrL*9EmncRMHju(gdh=EKJL{aEV_Ql8kE zU17K!hM#QC%EMf!faMK+(Ypp4ezI5U9${&yBWO#V?||t0CyA|EjFV;$By%zPDVYr){VHxj z^Ls($<9%~*+G%RXoe4_uBXW=*nq>69OGzkrGN;9inc%fx!FmO6A?XakUcP(_TO>yp z`QigZG-4}?l6Q5N+HZ2AWaX_+NCvTE{mmGxQ^CDueno}+b8!)oa?~~9KwVSwE;YCG zmBki!`~)iQmZXxX?LbP@*?DXm^*qd|JlT>+R8MNMz{$xe%hcTb@<}ZQNk)SWA_cEz z`G#`dH{oyGq}7)gmZ7E~DYLH$FMI^hy4AFH+QS)rk7a|NJ5M&u4! z;wAM4jiBbL0oR}TDxj8QzfcM>K_e!+s{Mypk1}~33MRKKZEYnMzE0Ra49YWm94SaM z97og5`r!|=c#NN2)SI~z)^nkeKNMnXw|I)=);N90S81yI-qkqvGgOk)G@mrOj-~Cu z3mYj>p6(I%XFdag8{LQLQJ@@jPM)mtB1yJ#ChJWYjon-n3JWNjA^;g%57)Kf->a`) zy?R0-wk35xH#PH8w`SBrg-)h74Eg}oY_bG^mO&VgKb;;~w=4Uz>Hh`RD`eL^8h@qG z3)0?SfVj%0sioCJWHz0oWupgOzB~ISRF3nFAp)O1eR}EzhQS}Lt*vWboWHm`^%*Hb zoFsZ3b@mc_NIj%N)!l&hU)&(0V8X$F(_9#WY zOZxV8(w6QN0!1$!pwFlRjKx=vK_kK%>cgX)nD#jX4f47LT2N*Z9|H%nIq;*w@BfhA z5BKp7$JdRlogSro^;)^z;vTy+Goz!26CgIL46vrcLhvfA(ZJ2{ycE9oZ+dS~aJ|pZ&;MRj z^d}lBB~Fc6RvedPpZ=Ue7Xdr2H8!QBSl@+kp|4KQa>4Ojvj>XMa)8n*G$-(8^{je7 zLnEPnl$LGK@RZ=X;r@s*kA006YAs;va#zZVeSkjG5uhFFZ>g0wPlT z;l1Cp{I!5#Rc3+=x6!y)D{aNcZ!%t*$o>BL;?D3E+*lSXtvd$x}m_J1GG|G6N!VuHt}a z#VCs`#S$vJ-u+ScXQJ;7ZjS%pt2AhS`K<${OC}(d|5Z~%!}~jM_{N$OhWZ|Bas}~0 ztM16gBq(GxfoKOz)nW`uaC8mGFD^qfr4VQZoM-;{aeU39{FlMAm+7!3c&_g>?KR*h zEPQuAz<@be`2I;ot*jxOhuzG=f<#$KNxZDG@^lMY0|!$b+i%$Pj|9&2j4jJC0- z>BoV5_S^XQ`1V!RQ&TfDor@nQ@2nwG1lFFPj$UjYdjcoNTS-Z&2~?+?ic3mrp4r+y z(b3gC&gWTXG3m_Q(ol1XD8+)}#ReBACj(rPk|Mwvq+k+O^(2g|=H3teW1li)C=80t zHXcgoo6pmtql>CCGXpW`_&;OaHF{|h#xn&j>Y`93xh2WPmFEOj)u}ve--?QODnPj( zM1M`ppFe*d1XuQbn9mENjyNtXptnp-0XEA7-Ca44)zmoP-wu(YLWtwBN*+T)L!waG z7p8Z~;aM(uM5(?LEJBb1DpuTUsM=Nu&+_jCjw)(lRj%21mCn}IQ9z4h@+Ekde}tkh zCs^}d5Tw|Luh)d^;*}wFrMS|m9}$$k#(JS>OT`y-dJS*H7RQDYuUz5d?FN7A#T9FG zuIT^%uWT$%f|Ue@WX3CV<4R~wHZJO1Wq&`KuS3Vfshoe`NAB?9BT_`!BVSYIpYP7b z8bs%@M1y<#s3m@aNlD)+SD8=Bxvt`-76IzBXJ}zATxyb8h2t zQqAAL9z;*&ME$n%kGt?G`m*q9xh~P)ciNbaMNaJ2WJq7m;rM@je8X^9$=9L(c^Tc` zpAA-mh%@XGibrt0fq|kcBS8J{o6^^XrEepM8V>sM=z6y9v;X&le*gTS%ttHja$zf3 zrlH(4O3sMpnJhsGH5u_k=-E%)3w*DadmR`q%Mp8-J-TgIbcOMou_B#?kk0(AwwGaIO(FGu>Ge$)ZY6zGH2?6%I6b;PGA~GkKCQPICmWw z9ksmdb7;f|%I7&u(=HRwmqD_sn3Z9j)(Z~LuCkFQ)Sd#K@-IqW9H}?~@h1s1%6clH*~A=#WtV{-P*U0fXZ=eZ zFz>}mN={1Zgz9<;iDC^1keWkl_HrE#6F;1b+`cChGlY)Bsn0=aOMmpbvc#jm%f2!G zpLL?Jb4|aZ35zE7IxS)jC7j0zq5D@PViAP5XL3O-l8`~T_l-4*I5k`bQt1OJ4$ZW1 zq_v{ALn~7e!tvR&FJna}K~48mk_CyToc`}$DGhqda<$ngh|!#<3uT3@0QN&IVgPsU zWrA@!_4Ox3#sPZ=aA+k$@uv^HCj&IylRyu;r(et|?E&xn;=u5~dtQoPmw67U#V>dl zVfFwfNVJwSng>x$&a(h)jc=^9T2I_Rm0ckf=Og2TV9RMZm8U1fu{#Ns(@t>o<79*r z`CS*B&xExR1oT*2JS!}JHy-Xc)T;@YNY9J)MJ*zvv}gmN;l@B#BuJ)Keo|vYOE|jF zdg^U<$7S8}jA#tYW0~Ied!5*yMvCQNmI!Xh_RI30kC(Q+v8|N=IZftUHiHkCqN?xz zjfCggF!p%K=8$<$h*tCHgvaQea6_RsA?!dUpUIG3>U8qXdVmndGAKg}fPhLY04G|V zAa!Uw?R{XtXWWRp12wiJ;M+C@gnu~vIPl@~f5YhfKQAVQ-}ygd*Q)I_y5^#Q{cZ=A zQ=|x_7+mF8AyH1;t)~^B#O^e#W08m=`8<`ZhTi;KZYN0QAQA{4N&SsQq)-1@o9tcj z_P?nw8TJw?79)~fJlA3Dk=;U-zL1njw8WN|=TnFNm<#E{GK$@b1dMSJ*_s$I9_yGpxu7X*@$McUv=wJT( zWb6Mt*}C8mw08cpC)X{0KKy=ve?Y>c;_uh!cA(#3`xf^}<|NhMS_rH2{cowHo3g zaSf~cY4|#5oZj;+?~P_ZxEs$q$9AWkoBf6FNc-R4OCRDYY@r!24ExN}yhv;EAe?8l zIf36qaYFNUcr`aEoIb#8iDoTF7+4 z3;Gmm6p>t=L#6un{OHt1xX@~j5hwhvX;G9VFHOf!ph%yi$d9H-S1Nn`Z{X9TE-dM6 zzty5Sp*rs0@VFgtIQAj(zoTTn%*o8jhtwHc8hxgU)mb$31nsTh9GwU=$EwWVX~QkR z@;9pehx}Jr3uIq%-lc3;XP}jhvQL0c?C}d6^Bc>Q63U1Su{m zIsvIH?a|2I1uBzG1C-?K{B6zK*w}UOdwV_#1|TM2;Lrt{+@c^!@2m0j=|vq-#ioUt zy-dT7_}RE0HGivP;2-Ne_Cy0X?d7lTd5{RIAECbMA-9fV>GpSze%<%zP?)-b| zi`{@)fbP)bG4Fq~xL|Vv5D<>+>h7-2udY5Xq>h?g{=N_qx_mV#!v;>qB6r}KB*Ouw z7Y|&-Az(3XSlQU(EVG*45KPBKy@Lx&`=aT~N9HIS$T}ab~8br$(S>c@Yu%I-mfW z7#b`lvRidvj$WJh-u-uutaS{*c{>Y)Wu1)RnLHUf$2IMeh6_|M*_ zGie_bJkdd2FkORDDe^#OxO1)1)l^5CpI!8fLm<$Z1iS2~)l;7%vlcjnHK+k|w*%Y& zqfhk?mu!lTl!|U>2{w_2^Bw13{5kpg8j{gXfo?uIz`9Di2gItk~?+Z!z`1siHY|qWahWjG# z+%Vj4&@OV!(JRbN7tu_*&DK8FLZb{>0&15>K2D32ILu}_MnpkQ~k+2q{ywLKAM`^;#bSUR(qPc zfkIWq1IF3;W1m0GZ%T+;{mv2`iWx@VnE9)o?g$Pp7~S`n2)ogt`16wX&I?Nj=H971 znK!>r8vn9_OZ1Tb1_3=OYJ@mkGj}xXGhl?2{~6&VjL_XyPTZ=iKq3{TchpmTI3H>Y zv$3}(T_v(hot>XA2nHR~y6WnK6Er0pyLQ3t48XoJD-ph}{s`7XiIWWb{JWPcZ{Lk8 zICm*?Q6(S1BX>_Isp#%J&+#L^lWh!cAB+{wYs)9*RUb7tw=T|SF8Eq5d)nxL`lW47 zCHMAiAfO%z;(o`xf`*!tufF<{G+|H1J{zfztPeisaZXDur8abeuiYSW{hu<-{?Fnt z*7fE*YyiJhLh+ea)(UDiF6F!4Q>}*EXEx92jR*25?MYunri+8Xy9Mim558D~Cf`bp zii$3Q5UU60Co6A?SH;8y9)JXdfmAYD z^SK6U_uco9#eF@N1e>Et1GPD1H(&|o?LAMs&YtJI+P@SH`pMz6;EmoKX?f3av zXUs=EkU%q*s zfALfDf(IOe_~EzFl9%8j3)aFnmbV)GB!_;ye_w|>Lo$J(z$Pl{S;MQ8Dpr03?Os1B zva?s3kp%SbNMKcTyA(a3QZ*kNy7hcq`e0{CyPl?u-&xpAhP>SLtUUz&aX@M?LB--TnbeS5hOR#}j3&Ds8~g_g)3 z%K+CO-c2Axtpvv`m&KaOV}laOVf?eIB3Ml6XI!KNwqwOjm4BM)|GCCyr8qud);Zi$ ztbA9hsjclc0I%3wY`c}kXRL0u3(k1SE>JW!<@uu6@k$#Uy}PAN)o<=yLq)j&@h10ZY&w(aG$FeuI{?vN{qbNncFC}9)6VxKxlB>S z>(D+Y6SnR2?E6Z-IM}zDHAf~5039a)w}2+D3XM!l-yuLU635F<>|JSGa={kNM;jv0n}zYlLR{S9ow!gYt}aRp|T_uWe=6-x?NDg z@4BJSV&*3gtPKI*FO#D)bXx7IU+3aBvWl?Su@zFbluOPn9T+ia#i;uOX4cV+piAibhOXhn-_;Il8RPV zpRvf%T#=f!R|xrfP(Fd^Q-}_b8%{vMC$*apv-#op6NIv2^&TX9VkWoXB*Ykh?S+;B zeZmz_Q?(u8|oYV@ZxiUv3@ut@+l1iP{#j;>}K~QHJ_c&>DWx(bJ;>_E#-x z>oy2FzENTyLnA|t{OaRh-t@|qJY0mF+;;%LE7@~N5m3B0@pWVASs`N~IkybqGhmIq z`Q+Z9v-1sZF1^OWG+sUzCo_5xM^Q4wJcuPZChtLy7J}QAhD7Kt`@vlt5LnZO2M1H0 zg?)YArtLpK1>qacA10Ng$-o3w^|x^3RvsHOG_5dtC;Ugmh-G(xH?`JNymPFC-ixxMB{M zN>1!(ALaSv#bo5QQ@Hx&iov@V3!TGo+j;&D#T}HNa~-`~%XsuXQ(Mw157_3LoKGqb zFOBvH%w-{5MoFcheLG%PQ062w{}Q6jyc!m6B8ppiuq{OX6=xUC<`2uR0a{F9Pbn4h z08r}^ByTM*i%EfTxb^6}pMQk*F_RT1jSx4ti8X0Z<~TWCg~eVJrzg7uDtbq7bsW1% zAwg95Tm?Ph((NZUWkPsyi9ftxKH|W|g>PMCZ+7#s@^K5?|7=lrW;81rkKOUuhr2z4 zbI{mzO}Th!r{Kr0n>*_zJ-WGGP^>9?)8e$NLheg1=2obI&Grt${WlxeF|&A`kwmX8 zZx4)4eem1-k3wP?&&bcu-yuQDxMG6O8QP^jz;LbVXRUE3fAge094&*W>-$hQ_H5&r zVLhU2wgcI)O)puYLyzpLqkPM^cI;nUQlnj_0v7ak&&H1m>Z@V z4#+Da5wCO25fU{1;(J2d2;5PeLkN`3x|g4PwMpDdk<5K+y(?tIq34yRVNs27T!ZvQ z@K8vxxwEp7A*te4i>T7^@X%&tA(6{HuXbM33{7M$;#5ALRC%>$a<|U4*|mLV|KMRv z3s9y2bO{HNio(VRDMV$|aP6&z1a!s3DDVRcC%BG1sHv{5-FbbMZa5Z?l7eUh$MSU; zMsnYX>Ft1Bt7ASXK5_9W^uEISjnW2X#13#WO`v6!Gsisv7ga*1W_B4Otr)nzkhaP) z$k?2?>5qrrh@I{V($*){FXVBLX=bLZvfh~=IUovIgYl$<%mPK&RPDA+jB!MXS%_I8 zNkEgnWTWn4{+nIRnG15$$3-fl9usl$V}N@{jN<3N;)$2XSzerTQi?!FX#ka9CktC$ z@gO9BQF%8HcE9mbgMuOv@6%5p!w0zCmI-^=P9ENXxHb$apA4&d3Lw;p< z1#K%HRsYy@k}ZkaJl0d1nuURN1q9#4hcdQ5Q`}d|Fi<5c?S4u&Q%Bj)=R0{ zPd~G4;s}Q_HGgZ*hyw-4u(hM3BTd?sHATwb6gZwoVrb^SqSMlz>gnt2TZSD;5Q}|A zU&a+OR`i2=1lfl8tpd4%Nr)|?fL+}geV6=>2$4|hIOk=SsEW7b2g!FHu>GB$^qrz2LTl7@IdW zqyLd}G=33{G@p#tP%hDCJCBEl8SfDqg2xEBse&B`jgN)h>n^`R+~+=={vg%}$bv-* zY=DAl$O&y_v}T3wxpH)_0Pov|XOmTm=P|KOw>R(6d{R{bx?y5iA2s(!q)N0jLJE#X zjt(Y;i4RWAw;j6P_GQ`C+p?t1CCM{wy}#NvTMqpu-8We{TuX1yyjlx<6m2Rm{{3m` z_@R~8dwE_%ZzKOdZy&RTGNn8hMN@ zh(%3FsXfDE zwM52p-*m21yaplSaPlCR>3AcVP@79i90~x zaW`|Db1F61*P0??C(+EGu9G?UxpiGx4V8e4F=oQ2p0T=*1?SCDU70sWIpZdbOw2w; zLyTiRPM#XCM(|OfvnseytFloe7|=Rh5c70HwN~1t+M-r@kO{f&LgOcS44R69X4}0t z^^F(`2t#7uz>H9;n}VZ753mRDrS8|)-orw6JxKC@i(zv$(daHoEw(WFgFjZK6pqBE zLOSLr>##NiQ}{1$hSF^Z^FVrvX!jp_%Z-x6xx&XzRP5j<1S7TKpN^pF-QqUb4+ zCKD}c@voWt*oKpc<0!oUnH$d>-LEv@7)n@`zDwpryWijrk}L&uQo=VjjdXxq2g05i9y~e>cD<)?66s@!RnEsroino}7?Mb0ap_gLysnOZh zp_#_OYD5Y0Y9irQ3X)oo;591?B~oNqSr;J4nSOBiBC*&h%wkpid@qmGC6x+qcOguC z7Y)g97Si|{3y~E5+2%n2S<`TRFM%p^I?a75CU(44?Df1H2EVn|2yMLgRq{v;R$`Yx zTEPQI@NYwt z#9S=e)Akw9yGa zJdkK38?`a_dw&h>>d}-ye?Y}S@1+pTMsvk|Rkw%0Th~kwI!!-&vPv+25JjL$?OHn_ z;5$xycBh!pm0t8h(o+k~Tz0RzhJK=I@dxwNwa5KeuF03YP^t3xoJE&d1a4fI-w%je!Q$o_w-5GM=#kbS#TD{{_e}WJYuu-PEeMBOP)1mfWp{{Z{4&#YD>u$tI_s03 z9-(l={9CAXi=#|!?Zk$<+%dM^1Rls<*M8N|z-UU<6q_X*CEEX+@$&1yCoXPBNJ4@q z6r6fj1xZo~-5h7Tkh#?Oe!RJEvACEd@a zHf_!#*$>vTd`b7S*Y=jUo(?NIoE~acBK5Ed;h&zU6YBkY&MjpV}HxX zke2XI*4<2|(x6Z7jrQ*&uSaF0l}mS0MO2NiBloh0Q@AHa+Wp;@pvF;sqxJa94X0^@ zn?YeNE(ufI*>30!6@vVLAPg7W%|MYJJbure|jxO0(*oRy24XK0>nhy??rM4y(XaQIW93-IAaFjVhm3(6yGP zSSxV7Hoh(}&v02|70qsk-C;WCR)>n-)2tRZCS!4$n*NM!t%d&Q%mk#z_;0ux z`|3r{pABGy7A#%U!>uGMNXZ*#b>WG$78IY$|yG`RJ9frPY<`*8QS%e1GQIfPBPm0~qFNA=k9s(Kwck>0Mt)5o~JGGCOl@tN<{ z#|F{*>l}DYCEUEyQ&p5JfkVt-XsQ>LCS9>*)6fv+W@$uA)^>Hv)uvPWI__l`;ajm- z1k~KmH5#|7$d7(zjNLckqbjg5dHbd+tVMu6)k7_DnAMu|D${pu-aJ@uwoYJ8Kaudb zjr2&J=C_2P4vRVwxqYg3;hk!)qirr!{VZ`uZ!&-o^f&8^}ZIVLnqUKGqbblG)QwJBQnmr?$S)F(_FhElUoJ_v4J@O^6F zJWq6U!qg54pu-UqpdT+?$1_>J(>b3iYvC?Z@*)naK5PryUsU^p5kauD`5YFuNMJK( zs}Lp5mgr+5nK7yO4E#G~air#II^Kn!AI(&7WlZ}^6fmdxg>wX3-1p)W!|1i3Y~?wA zZAuw6**A4XKEHkLhd?fvmp5qek zH}}J>D=`P;rAc{tYQjv(5C0e=tA40CywERcUGFo^6tL)`mpJ~@>4c-#mQP%LqjQ#D zx*s>Y&h*>LbVXFm;O(nzrNf2b*y+n4=GJTYY<=B$v3tbCmu#PMwcYrKlW*Q{J?M_( zBVm%5jI}r}p%yt1o}u+WDs%mmuJc;SbBjjrqY*(9f$3$G)qTv3yKXpwXVS@(i?l^e zQ8stSl^W#;tOAq*Or@6brk|SVc$3=YsU!rHmPGBn8pqycBKz?25lO3*bQEJtls9hu zI_X_GZJh3xo&7mcgyv`wSG#PLA4SoDmW8CL&dW;UY1a~m*8|EwdoZive3rJM8TK|G z?f;t2>QADeXKYfE<7b!KbLe*OF@CGC`9Xu!#q^L$d6-0SV}lkmU?5r_KYom!?f8WC zMeNb1dY>yRW-_Yyn=+Lcb6fSIv>t!lE-gHL&#ZY#`KoZl*0jvZHHp`c!frgSyZ`4_ zvnXw;4D4+XNl9Y>{rAXS;i66qIKnC>pDJcTFDZM zRBFk`KMU1rA-)WTD%&q7Imwg+&Hg{8zA~z+s9Rh5Z~*D5^`wOQbuc zJ0%VcN00_-kd$r-xtsU9@Axib@Pi-hz4lsj&3xuF-4}+LQH~fC^6g{!^-PP2+1>n0 zB%PPZ(2bU8`68*tB4SBytesuu2YI_PMYEd4mAGm$5vJeLOGF@jb_`c`T&~otcUX&` zUYPr8OzLmpggj*~vLcz*sKqk9h_6(}8zEJw`0ky?Qm0}%DT!y@{4)sV-H;#J84mQl zrY2UCS~UhmS3!~|MpcD^ZoD^e^IG&{ym4sUA&*NLRJeRY#b^R`;WDbdPnRf^0~Fw+ zA$bhGl`s**V#kQ%h}yvQ7~05n@XA*BGPoX^YoDw&W`JgRFtaTeF(NB|g>eLF*xdZk zWY8Wdby!{_KU1ruqR9aKyk!|gAU8JOl_=LG)hAqP@Y7$(R|DCtiaTwzG! znk_|}IRI|10ZF9s8JTDe)c?k=Uk=A8Y)Po1I}DV)1RfI{uB<)W&Dq%=%26yf zI3!(!fv$0lZ9;&v#9@wKu9iP3Hca6c9k^u$%Z?^CT5mv!6V7M$9&Vk?zan!6vCSjU zHSha><-Aha;T~f^BzRL21}R2BF9aiobEWS&5}LGh7yLJ(?XCOUBFY2TKMrj>aA;c+ zQUqTa>&P4l7}pw*30vPHh1Nr0quxYw1LeTo*9HL7yXZrcA z+90BlVtssOFAN9Ne6l-%QaUP2oC*Y!1J)l84tAt*^W@gdtEcJ%1T3xr0ud(@mdu6A z{i$9X!2X~$Jz|>u5OqE<3gH+S-qMEnfyqfJ#ygjlIc@6g7^LTcE;3O=gnbMJF22P9 zYE*R2gssdd*dZoco&nd$b}LE|tXnWTFO5O+#G5m}YnVbJ6nr+zzxZ+U77EKuT>?ux z55Q>4&Ag3QbMkeJYS0ug-?JnJGU+FC=_A?}YBSZ-w#^joNo@o8f_(Hvn`T!)xu!`S z&;NNv^H?Hd`dEt!D->_mGx`dZYP^}oWLbSfG9!5!6QP8FhR{rcfSIiOyn1J=Cff}^ z53?KUeY!d}iV9+iMw1GV2(EW~gN|@6qkyKvTM(|<>0u|AbD@c3Hzbcqb`RRe=;j2C zO|yoU@k1icwLL{uw-FeGevM0R`s^Bl2&tWrjKA3dNpl~LBsx$*`ke66rA zpwRc**i$g7+0#8Si2E4t4)s*nl3NDKBNE*yduL(V zI|V{}^BMcp*x!~Y#tcuRtVIWaTpK^{=LbEkT*kF7@P z1}or5&*Pqwb3d&f7^r462@?V}5s^m#+Tq=3n5bQMcXt<*la=-G(&6F+4~tIUjY>^L zwWNf>&Sd!%8l|aF59^6;LH5olF$vEMj@rb9qToDoS*nMHL=F+N1{oQdksCP8alpzo zoj1OSt586FVA`A>H9O=LOwrDeAUqNkp2*yM(c1{YQJjh$Zo34PeX%5zgcQU+Ae4qb zkQwU9A!~^bF;M}))N%r|g`436^f!ZWTLeJH-fD{jf`UBeB+_@F4rVAxubu>0An4+3m(8MnX&nf)RqFj%_+!sA# z>CA<2j~^L-%2XPhqn4iGSbhABLgI*;(2^XNNEJvyNMNzFO*Cw=$T+*UfdN!m3JVia zsQ9Gh_oZ`#X4`3gU{;;T60V5Itz#5_t72!@wp2tS&y%T!5;Pem z1&oFtM*iF)S`u<3AbYnOncrnimcyTHEp5SNIt=C_#fJ%)DH0Dzq5lZt-gS^a{n zjEZT!wS&y;4T-{C=e{S+GlhnoUcJviT0`kXpjva~UW*zlcIj($f!X zdee(S?w8JFXtlp=mFUp$NgD_OmNGIjjBYEYRE{>omsPR*MWK9e?9|YkIAB7pd8FQM zvJPo-nf#=1C=*FJZ{3anC;kD(ivNHR!K;<`_V$YM#UAg+El8*|?z*M1@L4*KX?VZ_T;kT?j=8nP5VSj=xBX*;a4#&gn+d-5#`g zC4l<4|IJvRlpg9Olb>qM+}BL6FV_Tnf6ihyM%LiR5V}8a)GCEx)eVxYy^uv;7QvAA z83f>ojY=7w777SOBJarA3@tJwGcL?!ScGFX3HjtQB#k&sd@@uptueq!c}(xWVa#b^ z1jRXJNT&Q33$UvYZuvorzMFE-WT_0qot}(2fg}})#1ajkUU!j#A@&Boj%E*iU1F5L z>ufzY0wK7Ma0U^HFg7l(8A5bIK5%S(YdiB#*~$RUF7gQN6s@~J1H%EukHd=iUSj@5 z9UH_AK|jZ|R_r2W76l1&{GJ~GV{t_rNM~%VA!saq>5ys1>dqVnHF1*qR}Tl50C&Q_ z9$@;FN4!%$_(Zy8HmFEo{xqE)$hl~v1ZrC1s`>l)Xd@>t&m3Ws`JTUlRFzLAgLBGf z7;?&4r$;Msk02FzPMPQ%%DOGqKMqTR3}8Ji&ai0CxylqG64jsKEg?)qkwsmXW;Kmp zNC8S+aJVooiyp_-P%M5P9rh@4(+?f9dx&GQwfB5AZckQWRUEr{VQ>j$J$Xa5&!%Bk z&FH$N{#TA2?=9I1W(ON4LrN2uX!)=-3~;yMQcIkkY?6)eAcu76o0NCzV?JRFf3i|o znFuN9IysZ-saY-{*^znz#%kmu79fp~K<3df+Hn1^>YHKk*&Unu{f(AhAZ`<(bIqeq z;TJO@iz@eTBv@D=%=PXh==lWBYcCYDP^R(7u8ijk*beQ3wiCC`Kt&4|Yh@6Ze1 zwU0>VU2wvMBC!_)I(FAOZ5mmG6Hc^5f9EY-OGsn{bd=8%#|v=EzgHyW;58GjE`+1w z3h2V_wM5!bBP=s!`sGE$kJmHSwk-^Uj_Vzo69rt7*a!EDgY=O5-QRmlXMSh{)Y4BZ z_q%-HByibH{Jj7eSQ1e5w_JQ@c6jpg%l9Eh6C&x)vMp`Q;6p|!g8UYKka>VJSWvbf zOel81(OZU7`4f-t4P+rA{%DCm7~Zl1-%maK$5ktX;WB(MDH{>Db(|Ma32v%2`Tp>k z^qYG*6A&}^IW8$@t?4)Em{Px|DA41JV&evzfX_uTnGz@_9>Vp{w?P-N0v|uiV+|s% zQxB?bTO}Zor>DKnwLcc?Zcy@R!)>*0c-w?A5Z42-K=r>Ue1nTE;({p$a~X%k9nF^h zxJir2r<+LyJz!U0b&JY!`u5JbeMoJxArLPowNo<39 z^-suBh@t50T-(MGOCE94Il%Df<|7g#|L*)O8Yve}3MB?QhPk7*j1gz|*u5V7VblYB-g}CiP^Y6`u^4Uk z3P|KKxUNsb$9M^F%_!yxxLE#i)KBntIrWEX>SGuR!oNc3HQPYs2Ez|=oLSURs%f(Y z($+-HO65r?R6C<9C^vwFbGRL;9#U4P7tzyKcHSLo5c{tO-vPHWF8+tfjR2M~5Hif$ z@|eh!cv%XwRb&G>fRK;ac3w6=4A$^*OvIKBf=d~ERL+z7W*9|+BSmOohZCqKB0)`o zjv0c9G}=TQbdvPMCL9$~eX-hraX!Yf&W1u$+O4-r!9|}{q!@e(EsAq6(kc8`Hwi`= z(o-)s@kOA0;D&){UIf)XDZS1}7Si+tN$ZG9fWnBA%K}PVEUaft>V`oaA|hpuLPi|$ z{E@+0ViH;DK?)Yd>uGhETX6f;e1+J9-{3$!3YNt#}-oIf;XYX*_1BhboV<2gk8z`r-fN z8k@=hR6vDVz8HmZcQhyjpcN95=Z%P>vOFC<$`w-2}(>-ou zD9O8YR!ZoeNJ`S@P<+xnZQC~k1IR25;f6&r$X_w6<0i7&t$hSGJwr*7d!pdhF-Znc zR&wN8DGj5xK_Y|aTq|zhD<6?Koe#3r`McNk*4qY^994-|)w^yNj9Mc&UPDf3b0oT@ z&pV9*ziMa0`dSJZwZc0n_~D0RNXUdb#K zD+JiFAvt`Dtd>w!Y7wlZ2sC{vmcJo$fES*@=)oa{s#aKr?N z`@zh~$eqt=p_tuUrn`|ge0l9;+L)`xxA{WQ*M(}TIh|TSKtPC+GQ=-4*O9-K4<7>~ zDKjMnmQe6G^L!A3+=Ok-8#{sQ>KWMxYqo>zHJ3G0J#9AbTkfl3WSO#nD+oNYllM!H_}j#TIGiSrMMmkkgelT6 z+8Aygr4n?@!5zDkbuyh%KEE~UW91&cjFWz9o_LyK_ha^W^2-jUUs+2GF0&UH1;wz{ zc|(j`cYAb&FHzz(a|D#=a6Ews@MLca?}RXiL)h+!ARjw?49yAwf{5v!)pEc*hk=nm zsj#r=9Lp0_r=`9WXfR^+hetJx3)yS0w7=bPmBzR^hid#L^&iK4X2u>iK6co$kh#9hb!` z<1cn-ek}Eiko4oYg%6+iX-h6Y1hwKEuk?vw9V7LVVLp*B%M^OP{VenRBdN97tj9F^8{elK?_SswJJAAtJuzJ zki`Aeqc9h&Y_jwloNH69p256u5GX8cvTU?55^wm7OL)LkXyCz)Us#bAUJf zD6oslWSO_=G(*QY*$(aDM9(;EvbB(grjt5i=M6LN(ijS9Hx<)Kija; z&`+yfBM?^>GM=gqg>ly)`1$#boNn}PpnOyAZbu9O5=@exnMHvgdd%;0*=ce>_ABD5 zI->lCx(y2y6qHbGFXL$}w*Uyx=L0;n7N50bi`1fG|0yAL@ zkID|V@qtpBkn0KAj9+gd(vs4A{h|=Tc?00m$Xp~qxJU(|!8TNAmV%YkLo6AAYf}mo z@|vz1vKoCZ%)P`?%05&QXRi>UO2we2MEJ#1H0Cu|6gen(t!!VSx9aDL{_4;5`>I$dFV&*hD~Fa1n$YpZS_Kv3bU+$7C1>Qf37SZ9>;#hxDq zqR^EDJM6q|697?|3Z=gLCg^0F!y=4kpNe)k%$q&)DCYJXWVTZwDC%5tH00~kPwk3O zf==0j#1(LNY8?6tC4EHFJPGaXZ=w=BGwGkONDXh0VFdd&B>C#&&?3I>PJHE_3B|=b z_#9>w?2mHsC?$Z|_#{cbl5@(RsN0&4Cd4(Hm6VBPhBi8g)EHaOMfqM>p_y|fT;)Rq zniuTlXXWH`9-=1M0QN=<&5%EoKR&B00f5lV zfCld#5#I7qQQzDgOiOzLEg&0gCf!R5?^HSsxpO$W`s1Rrc-$M05%I1+;p-W3^L^d6 zVjt9@vPAmqAn{33gk}0FavwkY3O)rc(FJHF(0gzxnZh%7 zAs&5y_I7uN6yP61F(`c!BYtyHjDv(BWfG&R<>8R%GJw%%0s8Yq2Yw;>D(eF2&$Dy(!(&N-;gBgt95 zk5)%4(!~JU>eF-sVhqA9S)EY;<;|fP$AJl-$UxH;rW>H8Xh?L$f84V9X zwSqBzk5BoYN5ms~NWRk>gVBe=Gq(2h4xs1NdWG>u0r=)y@A;0CIiVU0uc>ClI_B#w}e=KGTS(0cq53ClCsMXxW}FV z(uQ3NuZL;xx_j%rbhT>)7`L$&xH>O|ktyOqfw$g+VWHQdn|;`EKfw$g0FFF08T4FK z^$3)cB*(WZr9;Z6l1w^)Jpg>%ojZWWh?$PAEY)x-JX+Z13=kGNX(BED4y?_PD0~w` z3~PT|fIJ}q(;=Y7*BBD#j@j!7#Y4EPK@lHi?q2B}*(8iqA*P1bJBOneI%$RLgYJS) ziq)cRhQTOYwV?_-7P#SrC!1%tY*=H>)~I~43JRk&oEF2sNx7|2r@*%KJ6Qs*=0`w^ zl6+)n2)9-1i+Nonwl#mYq(+jpYWf<~e*O4uIs;}N2Rjdsb{@c2bpbd7U>M{G`E~$r zY<<)1dDDZ9g|+0y#itN*K*fgbI@;m(J#Ax7r9BJ-MS=Rr{t zQw1S$J(MM&Ejb2GPEM9QLe43Qy96pI)_Ak^;;+L&s@mN7s)b*km#z}@+t17&M~8}ONEWSjyPFis6-?YOV96wf;3v53KeT<1t~ zd$v24YU5Na7oP>q=4YQv%HvN%0MahB&f#W#?sT?+fB3?FV}h#sIB^DTw_iYA5|V34 zp#yZ%2NA@~>OvCP5Kle4C|$LsxrjgQJ&lG-dB)r|{k8RkJbl4ekc?Uc-V>z|ccCD$$IP+bcWb?V?~~Hh7uzykrL8d`b_$Bk zfS{n4SN#j+1&R<7)r}XISTNO}Pzkwc4uRU^Yg_sQ9WCwmw4g0ni2_VaCz&})OxBUd ztNE^Gddz198PaImHu?eD)^@r!iXS;FehaLHRg2Z)Tr8A-6zv2Zen;cWmht=C6d1kv zf{IgHcU(4EdI!je@1=v{X+S z2vlLS;^N}^2?+@uN@F4rc2uRwSg90gsVw>8XsLXHyFZWa@Mpxtb6AKOD`pAmw{E6m z?K3LZ{RLFtFU2oRR%`eaP>QN((S`S8F4q_l_nL#;JNrP_(eA@|0^-{6gKrrxaY)mE z6&4WZI>FFy$(PojFn*2h6p?X7t1d2?=JrP4<9y5mzI`*6%qDY z6RS_veL%&G_mfSEn5LQ}4$A(0IDk)W=0*4_1hdXap@Z}BKr&F~bta*wAQ=BG>BYE_ z>2d`22gzBH*(?N#<-a(dcdE_V--I3<93a$nm*jCO?8m{bTdlQAVOYPTnZRT=^AdV}v7mR6a5L2)@1H-s zT}93!Cd;@LD+_c}&DO*pTaAW*T@TF~T1;cnC>pE)s=RWzOxmMBC+~WoLRW3J+1z%P zkw2CBN+It*px3W_y~~?j6c7$#z}w-9kFM=%cQF5GvN*v)c*aGi3=LnldsPjb8$dto znzm7s6R$xKF5pX&aqF7)bJqxZA^Mc@j%+&=+!A48Vuk@fRW$VUy*EIQ#u%XewxR}k zL2DRXr9$6z5s3!n9KsDY_W@QPJD?#;3keZXOj%i31ecUl^lP+g$1ze5d^-ggVSC{E6q~=?Xnj*NbN~p(Y;)c;EB@8w@AWQaipaqbf(%=wg93EQvxP0wj%?NvT$!#lY8*C(w3lvbtkIz7b$&ZP);jdfYrH zFi;GU*&j_V&NE`Gy8x0t=0F3%y1Te|?@vJCexf`P3{2vYzc+q)5x^rSS4-j>Ogue% zcnNyAwzIakw|@eXa03GnNUcN5uVO&aAemaZecvwNytLnt>5GlV!^V_ZKH>|}&}N=##qXAaRSB;gf*JxOL=nQ^OM95p(4Ow`~ne!lzfeHh*B-lfMz zLz)eoqIHUq7rxEIh*9+vmw-oX7#6iP!W_xb`d|&SCFjJRzIkyCT!ZpvfN(5V47d%Oq~v zubh+fSFKel*^NY*t&KB_T?t0D70H=-IxGTQ8x|c#r*+p?T0VatnZG|jKd1HdT>nZ; zWcNLMQu&fDcRx1RduUgp7;tk|)oW8yQes_JQlVX0X&*0_n{|I~M#gTs@rUTkLj@i6 z6^GqHKAmA}^xn?MEXcj}9XI_%ZO@mro`9}l-YXxgwtVLAaS8^i&`_%NBHh)I5K+n*d!B15 zp)3=TgtG9*yA*74QO#0%8hD0)=UNAzZ`9wK;|AO`^FKBowpG17iH`r`M`hNU3B*N? z#klXzx%>uS*MUqlSw(rJ?C>IfH`?3^v?AjJO`(xuY*Wp2-&aDht%axlPTtno`ys)< z)$n=ju9~m7g8zIy%G3l?s(_46cZ$uf}JLjBj6$DtE4^$G zqJEBJx){nC&Tb=b`0n*+a#$I@Z2#(B2cRi|Tde47Ge?+}`lXYZkRr#S{>PwoLFkCOvf@a#MWp znd1r{)j`c_z6}k%cZ#2Hlw2D78C=RSHi@G}{HeFTffVeJQKv%U)N^*BM&)+*)AM4< zT4LU+%i}G9&E3K*a+Ch+=QOEIQm-^IyuQZo_o}eDI&iJ1)ptERnE{h;D9GI<)coh< zWS?KNSOfouVNa-cUgPfX5Z_BZ{n&d|wHRO2sV2IIrw-)^ONs zBgy>wbusu?Y|!5|jet}vvq>tz%nOg__Ffz0YK1^o9)$>(b2yEbWK*T;>tM8Hb?*k2G|&Vg)6V)kT>+uPYh&0+gv%< zhM{2#(T$%hF|TGgKASmu4z0#WlOg}%UY@9ZsirT@>1o&k`M(P!F1cT&3JDz8n7x80 z=0e?AC>Wcq)-VcBIA5vzPUd#CC=TDcJkDnRgA?t9YF5s}#q!!8=eGGHyd#$LLL?73 zC0Sc-VwNt{o^+}=zo|H$qcxg9ZW2}h?dbxKO5JA)zsfZj{H9Hd6PmJro4i7fX&e)C z8Th-nU8`R1N7B>T`@6f|L<3pn`*>iV zWcyfUP(d!jxC1P{AIWK*gZ%tFt7;VA397d^2So%;=Hv;_vJvTD&eD6>kD15oevml1 z3)1(TTlnY#ab$LT5=sd6g{OI;;y}j~A3(#{^g2RJrPyLq`1LOV_3u4#jht|&+eYAO z)$wNb=G?YyC07K{J>P*cyfTAB4{a?NuG<3XMFy9c!lz3PPPxIsx94{D(#YP7!&B4k6D?~EDJ(s7fogbaAEcKMYzaI?{fzRO7ev(NDo(Cyh z%6{Nge1g?|J)wDSij!Mft02FkA}OAdn3|>HuUrCLn!mWuAk*}#y=pK2I=Q$hRK651 z+@7$r9D?bg=>>yRTc9Q2YfRuJP%ODZZFXZ!vpk{?Gmn|GV`%#L%_s z8z~f%?8nOcct`ZfKuRRnr13z1z;rSD3;kC3^7p-jyWu8`Bxr13Q<;ie@s<};2cvft zj1uJE+nW>LPTPTDTz4Gyev9I+N*Dq57Bn&e)(z`)>|Fc(FWqaq5Euo8p7% zGZ**HonuwCRPgMS%RaHKcZWKv7Bu$awJ3FS1Z2n9bjlYKS03-|uueB#Ivx_;ABq1^ z(L+B9`F0zUg#?fEf<(;I?RnoaN_`I=TZw&lVqk zbCWZwRAh`iEM8X<9wLwVia+fn3Ou{IKv?!NcLTh=;YDs`H&6QjikArB300fh^ecog zi7mZr#zG=>3okny`5$`5rY8Az>W^l&cMj*KV)J;p>XDPMt)u!UnfE|RBR!z|)lWFE ztE!^*LbCh>iOs!m-#aJS_1XgKqQPhPF(2h<*0uo?xE!gj2{3TkBI@}1S-MJ9nmJmaQUtJDkwFVx&+F%7TD zCmu*bu~QB?>sGh6v}u9@Qx3`N>Bq_RzABa$S?o_~Ef>i$LMP4GUr3kgKKFRI%7v9O z%eh02Lxu;UetUs@R%824F!o3rk%0G{gizHD=)At?)+1)TLd-w>>&*Vm_do})r+4H5 zH!YGm_T5?`fsb}<2={+8hg|=*I^htLtL0xGP}--EKJ%oDWpjoNqmd1Y5md8|RaR<$ zlV$$J;g0y8+4Qt!Lb9wkM>Oh4w?fgyS*@G~6ToSl`smzB?_(OfCO8ZK{ja zJ#J8wu5Hiz?0U~}bHA-*AZSM=%WLQ86^W#lp^#`{VYh?MEu)gL<;s3xrX(?*q>e$a)54 z2uJ=(_eBc^Y`mQGypVV4DXVyinQ>R+A1{)Bv2rT;stv9eJoOexM88^4cEKpcR)%fU zBOCEH`~JKI;sJAIya8fYNDm*+KQQHplRrLg&A$BH3i?;r@cb}S{HoZFc;^ZX-V=%W zf@|`)A|_3HWxGT7V0pyjDvE6_x7&A?v)Wx=`N%Sw=i6ZHOA%gZ>n zU-}RdPkg+(7K=ZVYBO~>R~n0x%#Hu-e5&SQzeWT`19KVlazf(%z0DShNdLGFd1(Il zz+*pOAT~rxHxp+yVfWCui-Ut9v4Q%O@gUzq@665~wxjBM8#h_2NA~z{e7-}#>^2AK zE%|3Zs!a`c*t_tIyYaVx!D@kv?gnRLo)bK-%-4GUsV|ZZV%=y+?WN>_hxohxu?D|I zg$g0Sx4FK+G}|pG+nbcXrTlH)*M1Rqp3Y(vEtbm{2K)4UF!zi1dQse_;>S5=+_}oj zHTE~1T9NOwVlfHT2Fbft#%=EStn2m3UaSTZ-ejUhTs6PyTeIeX_FpCtbu@qU(S2bU zVgJ9IL{bm86MH7p?t5Zma$yI)A4UzK$@UROs?Q?x^1+HK$ItUTO=ofBVjgmFyie~;|RxS=NA`No)9VZYp{-6hW+18m@4pkVqo z7B<1~cZ0SfU(*jz{M|-1U#P}?}+*4Wk*$2Ye!-eNZ z?N-l~MbK55sGr?dBwDk3zklcRCX8^8?*jT@X3HD^*wgP|`0KSpW6<_*1h++V$*FQ3QTobqL_U6lob{tlj1Y94A za&Ii3pk7rta5#Qjqs_3Pr1sRaLtpW?px?u5=M&TW_WFK7CI9PaHmCLWwtq9Y@ggxC z4qH2I(#aP!-@GJ}btMNqGdggiVPO&}S&AgGy*0^U5*vDrHJr3thpN<7IC0{)97lRGY&tear7sD#4H(`Cm zv1SY8*wGJAT;{{MU-Ko}J=%|50WUCLKF&Y1B%(cko~_Wn2F&d@xXMlOUX($14G7c$RPI25Jp+3 zn|hV~Kf@|0;lzvC)KK9UF{$TX(z{WbXC_OdefRqcK|MB^4*(odmXq=} zQu#AX;AFS^tK9zo$(lEaD<~Xu@-fOszI_s7*sf_9$`1VCu^v}x2{Ktm9*F#j)MBqO zn~2ot5>nF?RC4|kqwM&6Hmk+srp+miLrt$e`j$jgE_Nh9I@$M99r((Mg|0I$(Vc*i z1g&1@5l-G1TWb8-DxeR@>p$q&wKGvNS{5d9pJp<^n@k;K2 z^qO|Niy4qXCPC==_4(7zFd8sW9MAasr!TJp5@{^1+RzO$O2jz}VSW`5#cSUcKvTg} zNwp8P$?)h0{YpS1zLu|@vC7)5GWbDb7WoyMOr^pIBui0R3dS$8+E6RVu$f{!O4SHg zM=Lyj^hKVpzJL43PP0RFN8HEvE!U(ivh`G4!dRSqAVCrr7R!x2MJw)824-HcN|>$E z*9-{I@@y5$|Bx8(UlSEyz})8CWTC#IcYVU2Mlb(%Z|k4SImW|T|3LL^+mB2YT%|A2 zUvDai2T;$hqsO!e5d+1<<{W=2eR=D6Elrhb8uYRgUyp2YgF;R;L9fbDamsn})ymO* zyDbc91|aCNJ!{yhu9K?NsU>l`H$xnNYWuoe!VejBHxF1Kb0dn|&Cn2W@0DU6V(+!L z3uJLwfgh2<;Gzp4nbj8pr{9iv$8km9o{NDo5!3%L!%&D@&{v|@Uhb~P`hDKUhw&c53{H%tZ0C801w_yl zFsro^K%!^^*Z`pLSO`q9m<8JEDgYuh7MdNPd@@(9&)wFmDtD=s--4XMex>aD96eF6k7oE9^f32rmn#y%1fX;GN!Sj@eP0t=3u@CnTU{ME z_PvEczY_0d3E0*cRL|$I_!WspEN9s2XjX^^pjger30;BdCZ=UIhHciR(m=U{4wqRc z4%>RF$x}BW8XHr$XlkYPRbtYqhNe=vU^_XXuo`(ZZ`|tDrnrZ){MD!PIH8(|E4jl7 z1bf6?UU@e@5E5eISPYa;m%ot#h)N=(?eR)mVuX`XmskALvuT;XxbG!Rtiv89;i+X= z(HROoRZ+gwJ5<0gqAANn=gJ02(84Xt6{IA~rbP(7knwzQU%%XaZR#Dhek^J?D%G40 zNiDB%Tjp|he2X*m2Gs+Li4aJ@IA8cc;u;20SIbn0$8&}GuQz6BdZ0}f37piT%**;-2L|`{pWdvZCWj@$M>7Yqp;Xdu; zeq2*2$C|VjJ`MD>E{pl*btI9Ff@kYF%r&<;J-^>-(S+OB?J^i<#6>#rn6|Y>}wR>AakAiBM zVAbMJ?<@TJS``Amv`DYRoYDY)z%L=<4-$L!I>)%NUyC7SzEV{C1caQw_x~uFDAJ2TFs$P)>AGSy^>d8xWb z6R}BH{y@;gG7WA-uLUBe`K^z;qv+hTGpVyN@3J_EvvE{2)_xetu8^zB$OwC;90{~( zy;I>QuP(+|BAl0dZ*xr$u_}{gCEw!o&BU_Hu*XUG`TVb~6jkE7=}Y2k1jQ{MB==(S zUGdEGP|j)HS>iQ{x>~3$4j56ZSN0#JxxetvaBXdcdRIldgnivr4?8Y>-O+H~YrGCVYA*;)m_<<(lL7(A+H<8E6xIZjZb0mr0|m zWFd?YxbJ?)jI(Ut%v@x`;S3vU?I};ZCLV4M{v;yR$0i#*o^jjQUCV#hvp<6Ty_|ma zz7s;`_>NZ+k92C4H&BTb@|APt{^PC-I#1@X6n|)8Gq&44H`6Y4)PMD_4TSy)AI)?V zb_HIo2gK2YaZnSRJtsa`COeaC1Z~6f5({K}JigCOzH6*mnc349S8oz(sUgltomBO| zP6FKQ=Ks3chSB6ff_fkvan==(i{S)ccsDFP1)$E5DX!>5$V0?e0B*8k+v)jTVNYb2 z@@ZAT#W;s!zX*P-F;`?_3`A%GW??2Zr=o}Y{vrYE<^0s`45rYBWc&Bk3SD@t1+Na* zBak=lJeE~MtDz>8^YX`ZcZgkxNssGpNV6AkNqv`qldo)MAP4i zWq*$kL4*==M3u5{k4vQ|0t-nBDd!5jO*`8?xe<^MJFzYG0|eVXy&u1N`4{`}VYZbN zNU-hVT&VhjU~{5Z0Wkbh|ZuIN6lUYH`Db z+X?&CV!I23IDuz9I9JDn)HCPz;|g*e?%`Us=NvKis~K`C*{a^;va^wyvl|r94)@;$ zKxU*{E+c<Qf-M6NLv)%VLW{#(wC3y#(9%b{I+@LQ z#Y#IeJkw#SE8Dw%z2~G1pb@#J_ndrrV57uNh3R-q z|9z&-;`K%eM8JA_E*hE>|0YUZ!?1!*QbM_*R)X!ItJRbUfZi`gNQEtbOdN@Osjue< zR!Fwkv(O9>nAMuLFuk{36l2aK#5L;ij9rgE%8<>FtYCLsR;H9>7agV8kLCYZlq}k* zbv;}DQ6;19UypJ2ImaiDwTM_CU3fg*#!8+;OaYi3VrAH~P<_DAW<^Bj_hHtPgox%A zbA3Z6-YUJmXn3MR%0

BUPs{neiA1S`@MNXok2{8?`5KBV$0SDOYK-*1Qt(T%!AqlxSi59A~Gjq0CQH>^Fg@U8n0@)(S%?==ku5 z>l3PRM?f~_UK=%4Mq7t*aU{>ltVUW|j7KLV3R(d>@3lhfb z)JJyBM%vX`!qHAzG!X}$hl6e z6}G9j+OEFrMv50Y<~~#N*1S(0>xnn5k1k^|&hk7S;mmKsKufE$o+|X4o^C#VMAoMy zZ1KA;0`g55psOE#cLaw4mrsJip7AX_nO)@uiJQPm@FE~2lWAPzqbp}$IVI*U^6PY|LE^*dmZUtjmhtr!?VfC`idu2 zY8@pIR=K$9HfE{4Qz>r8|6HP3#i-^Z@b}2K7t=3JB`BKn?Rk@`BGKa3CvkFeFgmiY z#VEwp)Y*9)ljw*oDdTvg|J{Z|mg5pEqaYeRv?zmX{arFOaJr-P>Qrm{UQEfK_Xa%3 zU>`fZ{}sA~-66?p`^e8^zT`x;KO21V*vn5_LAqnNZMR=zSTg_;ih%@ISpu*Rz=|PG zQvX}?xDbnZ8zO2fJg9THA|B-&Eztd~2yO|6R5d=fc(#|t_m}OwP+1(1$mN-rB>7;_ zt;qfQ?<)T-nf&M9b2U^TxF&_uE#l8{ewngdenBePDVc=%X-&)dL3C~qFXfhlczD8yJ#}3*+&@};?JOux4YLboKVrv z2l7->Vxwz&!P_H(uzaug6|LYxDJKDN6U(J1iPNZHgW4PkOD3@cYuxhxVS=4|(0|VJ z6)38eYsKP=DSRT*EgEv}EzS6ohy&v0P@ty5#Vp0rP^F5Yr81=o9lDh8ch`X=uEYSo zMl01wojVe@@b4*Q81f}7vDfeGWQ}p|X-)Ee^=s*c zHXs>nMh1``m|$AMe7)8U%C>5~pOr>Yxzu|n9@(**JQaSqh9Y4tBX+VKHWk82y4OY9-GZA$TA>!>I2+5_ppPcU10!ov3v$pe{kmVXp`*1>iFFb6G zAgb|EHlL?Y3BiQG4sFp|=#wWQ^?^I2Q6**q5s73T?a0$FQ&0uLKa zzlgiMt(dyIt$TXie&F9$=v}9}y4qxe-D-48bgGb3?RE=kW+TVbOni5_tk`BW*;2+J z3RTF{_B{OtHFOe(*d#TIYH>+pzsI;bM3OBp;b&&tQ|v5w4gRO9YYdG0%i2krG-{kQ z4I10F+Ss;j+i27@w%Ihc8r!ywiEX~W{&(Nq?tGt_d(S<1j%O9isIC~zjW3v{`pkE$ z-Q0s8zYMCRjV-^k(h1o?1}^KMNH+uyTz0qiLdKH{0$QU1F<}=*{R@euQmHuFILtJAuATVMSO5h3AZ`nR>JQmJ1>G>Z2#F_b&3EGY-Q z&jG~vXoJrQIzDhgN$m=!yPyKO$Jnn2g7>QBZV$~`AX^J%>>}lZpYAvGuNCs`&p1!K z%&f{8DWwX=zl!R90-GGe$MWPJwdO7AMC($oyEW>Lv!?LNo6Rc3H*VZ${s4~S8IPF` z0C3DWU3R*`L3nbx(ClLDG%RrwWD8aGNxuXT5%CzCx!{X#<%tpu6#+?f)md}#44l#> zyV13X-?aK|ub%x1R4KozyM`O+28~!}imwfH);cMDI#J->HlD%T{M}*snNqexvS%$Q zXb%|k+d9U-?bKpU7KdN=faOo!HL2ur=1%>@R~ecukYrhOnq5QdIYMIv4j1b2NHXni zQxKbC!6(N^R9WGv^e$z@Qe4icExi|5RCWmq_i>v6cM4S71KCt%3Hs3^ij-wBgXN7~ zw&XTJh=3Baz4!}*x4pK;+d7}fEcEN{hP zxVh_2h>|EldvuB-G0jRv8|Pma*ZRo< zpNbF#n0Yspmrw&btqGwl{-mPoi^lYrSeCrOCffETOzrKu#y|$$wMDirdV+OOarll5 zQ=_Vx*l9H%VdHRgl5BKM-^sZP(=;mO=7Dio6lc~KZ0GcqW#0%IVi$+aU`qqsH4fVj zZ9oWmv`qr;y8k3c8@=ryg+e2PLQ@_=8VLa_t0rV5W>=*(g+-Z?Ib(M z6CQDj0 zCe5F3ozZLVO(9mZ4CARyl}7fU;5`E?LV*j5Z3j*K(} ztc58B?u?&d#Heu_zn5n^n(`&B^3 z=K&0(z?1uV8?dguM!5#=`G;7oD7!bD5P-WMDUdKfh+_8+(xu_4GC4Iqq4nA8XmIy3 zHO1V#xAon->2JhWl!BMLS5W?lVQxQm;QD`1nJH0;gzNH{>~PRtY`#+#QHYJ2!VEJ! zY-gTUKpHY~YB~r$VP6GnVsF|&?(zcdgIH&QVvemouI+NzLvDAL%Y7hh@t|1L|jqn|i?AtZzM2SX*XkMzLC?XU#ruS`s1WL(8 zfXcC8s&vei0AurnZX;ickOw&_h35gQLwb!a| zaK4^5cQY(Nx_NNI$fS>)&jY>u`l>x=)*`YGU(`bo4v**h zD_mg#EASiAWvwvbPp%i0!G}v!U<=0Ui_kjiBnMF{1qMyEaB{`TiLz!~j7RoFb1Mel zD`$GJ`a0oukY7MADzC@Hft)QW|(CMO{;n4=|@ykEpv;#=AY|-oVjNm`;!u_K>?lEL-s@nV55Ag zpxd5p-@=OsYHdhic)OdAwyo7T+5dXxwS!(!ed;EVOr1#o<=d01n{{%xsH|zv1F&e- z?BQ^o@MS26TP>0h`T(i+C|#0zLQbl;@{?9A?c`Kmf!(s8Lr@DJ_@;dC#Mc zTHZ*A?(wqO9di6;Y_z7xASm`@m&N+DO_q8pRU9=rMme)*i?`)o9p@)-86n&c3>9m1 zf5-(N-hvwGM}q9<`jDn7#F-J=78jPZNY>?bG|{pC^6ip!Otbd8WY4oea)~mXYrDvwH7_k}_9YuRkHhl(L=2U7@4C4$6;H4+%oj|j_s3ezmYPhs26r9yIO!D2 z-uW|tMo=VzhU$LuvycAz6=6|%v`4=v=6AFcBlGaf5dg z<~K7$)0%NkR_q0n=JQrt9XS<=Hfa>CT3g8!{XT7y3;g>wi+=G})%l^vRZw+@7|%}U&Q>zxia z;EJDZ%#$K{NiV=xTPxWVGh<@&j7m=EwU)5HvyK6UMr2xC-MfM6T#lonLns4?5Cr`M z+Ji#zX`z|OD2b~m)&y<1D=I|#DEjZ!=P16?WB4@4{?sb3t%iNdvY{XmY{vlR=wxp1 z!nYa!R0Sf~x1U4Ihi#$FXPZ7BUt*z&PGz^9q=aC-!7)yq3MEgPs}Wa$KJRJsuzz zmm;+*nClN@8&KSG&iF%ftSwC$42T1jJ0!y=ek{VzP4?KnN4ZY>lGz`bCfYBnoz2hw zBBp$F=U~|tZg297bmGL<*IlLhqBz|JDoND@-T&uL5^JDu?*Q0*=d?dTSgcqeGuNnE zu9X9%fG2@aIk0mDNH*Ar8hiy@B*xK?XVjt{yvWfjj@>l+ZB^~-2Q5K3N*_MZ?^P!o zdoNc8GSENWus@n@a4gjs(_zqk5-;P5${kj7;L9g}k25(90hGpwsWW zCzDPU0+xR;iY0GvZJ7T&V^@C2&eZgBkE7Tj zL6Y39FaRj#Gxk*PFt?ujE0cUVl1BX`2avqu^~@CDTU$d%y_`ZT)I>#%rJ&>tdO0Wy z)nX&(uB!5MY5tG~R<|^A8f5YEd(0-`GHgu%EU-}_UxLNs+H7cGAp7;@X*yprsSs#c zt#D6%d45)N>jT<|&4HCC*Ut}*htmZ}PwKU1DDkQz<;QmSoDecX0uJD1NZzmJyz3z&ti;K6;Pla^`*To&vq@tU138pF3{q~x69_qzF zTL`BaCHO#hV9ySV+qoW>$2A4cisE->wb0Ro}x&#LQGHf!u3_I56m;^eQM3PY&fSLm1r6y-;$<%KSS9{V_Dy5&u zN4AnJif$ADsuoEz;M0qCfCK;9c!90|dL}@>K2uwZnQ!4kwe;H<`cqJ0af^phYST~K z@QTvcFA0TQbOB@7L?b{~3QSctxooB!Fe*1Wpbr=v2cwSzOqU||_4Q?|21XNC1AQ^E zgaO|ZbudW*BCerdp#VgT{6oYFl)W@)zD%_AlAi1hp4pxDmj}!YPS*rNTx?V^e6}*_ ze95-(ekW&fDXCm$P;V0|pK zzU4}E@WIx58mQo`ev}QEPr?EY;}P>ZE2^%}gD?Z@gC*k2C{i^X6cm0It*ZX7D~|Dm zYzNDPEFMBprN$ zW|Fd53;_@j&*6w-WfFNfO^>bp-5WXhvk=9 zhWi^mi*-e$*l#@Adsf%)(r?&J=0sV3^3tVD=p(OJz7Qb$M>4hn^5!)rf6qz$Dzs1x)!&i#V->SBw<&t6GN`?9f!g za^d*K#qnL^wCME4tC+1#3q8l}dPWF~&82}x+oS8A@|*kVJo@Z=^J!^(NpbPKtdA81 z0P>oQD;O_&6SX$KKF3q~X~*Yct))dIol=+n>$x&L7;%5#@cC(>oyEZp^YZ2bsoqT+Xcv=<%bpdYN5y0i0|HNh7!c zhQeBTtTfK?17&ZRo2wS_1NHM!dR^4>7aMw9-Qmz8pT$aJ`r$~|n`@CJ&vT=>GkyKe zhuVde23Q4`1tst zP~On%V<^k@^W^Oo>iO5a+f{y3eWt9aV{zT6Q%O(6^lUb5K4U_rP&^UvqDV%d6!K3D))dX4B;2BT*ppa0K$Z^CRx~hrS#Tr+U*hvuL?M;cOVAdY_D; z>)nFZB87}o5!kDQqc{0c5pSSOkPZ8k*De6N4tvBPae9h5ImCV4_6n=UU%|vlpgjG!!^lxk(wk!FG_dWe%vh7}_BUioU zNVy<>^X*HjzQgehl*O8l+eiOii`N3(v0UY&SEK%w6XmKBd*Na?xze^3MXEH;5!V9(kCa9II$1t!`t3Da`P;Hs2V%qg+v3jE@ZdAfX8 zi~Ug>O>E2A1Z7?z)(!^V&VuR1^{#0vB{6Ya#CA=Ek5LYuugzmj@!XklJVUqVY7XdA zy*?5l9#C=c;(Scg+3jt$P9IOq6Ns|+gM5xrmYru3X87K{m+jI1RfDU^loG&C_*V>R zN5h%cjt4Ivzb%k;zIQH>Q#uNssnvJ>RqkII?J@fP6HoBP#(ih|YfjvoR<0y~76G56 zhG&4=1wV(Bx}Hga-t6KK^{|3$q`>_LI|DsuFkQ*6U;rKUg{D?Qe zVFu{lChPsCxOX(FEWUaB+#jf>@;Gu5=QcV78zUfHG{TVjiQdfLa35R_-qc#IU$Dbm z`E8;Vd8Opi=rn44)1krDw`-@^LrEMoiYIsF*d4*O{_>sE`%+Df|NA^WzRRNluTFGmP);d_sS#h<+g0CAnK_$?{b#A z=5h1}YE8dRLwea2_ZMyZ=by?)E|vD<$sDnKSW^ix27>*_wp#5El1!`3o|5%nQ0-?7 z`(NUBzwp`JXU+5RIl*4BqhVGvU<9hc1ac9{%ES&EmyUcXOHh#53;d&VLsr+Yd^Q^WDK5p)5L}xr_kW&i&NS3BbBTbvD#wgJJ&g0UFmw z8L_Rz_yUF`d3Z}Rj~5M`NB}Yv4COu)3}inc#_W8u^rf8*tJf19@168gL;&d(t^Pw7 zrPR(E9D?B_FjaAGGzs}8avM>S zU2x%%@PuogJK_4mMYEJ=r+5t>W@Hnb3?1Ow1yCs)q${VRxZ{0OjZ<; z`Wv}?%?->n%JyA&a{U>jyp1XH_g*>ecFg7OU#yMT;uw657(^dxPMm%A9Ao%)s zfX<)ug3~(7ChYdPMvficN9lv zLoasvq^~5Uzgg$`dhPTC*7MpQdn-Lr?d;?qAIj=?aIA(DdS*D7r4{ph;waPs(FS3Rxj_0EgGIxFy*=0t-D5tI zFZ03fWgQ1ixBwzb03qZDmTqVOGBEdQ{t5(Er1?NC> zjtYn;Z3D^%FAp82lT$6cHhzWS6h2&U6!**~Hc-`{onIthDAofycfY1ooh-WM1yP@m zB;Cfw#*WwJEA&HD491PavQRhE=01e3zW7^+Zw!D&CTulXqC3)#RwB$;! ztp}C3{l2q|dz0Zf%Kg%)D^}~s(Q(&^w0rvLX&pQDK`*Vb@gcC9CXWFH%n73^39jiq z>>k|X*1-ZmH{jRh+F+TZoTo-nU zdHh}n6&B_9s7NfQMZVf-XQQr^Vx#yy$?V4O|7I~{e2j?%N`D!7Jz&g+5 z+;u19bdKL&)&zDu$NiOQwAhY~NjEDM*ZQ|D=i7}}V^_}R=brkwMq0r(R_6C|kN!pT ze=b{C2+6V8ndeyqZvi=H&eSAD7Pz|?mi=IBsFwv|>L4TQ#Zog}T0LUW`fyFd!YJ$V z(nX2nuHKqk9>SViC8;ZZd}nFN@99QBPQ2@`dw6}vs^FCIA5s!PWRZ8k^7Gar>Z%{v zf2*IKm4HKcf$j;$X2!JAix4GK{{V?et~pVr6GZEO6P+>d(K+`Se4;@WdQR_W#iH%; z65cvmcKNR%3(VyaondTeH*4(=#K8sx=xS)FNEO7DQ_k$QRyL5+1>u2Mh(`o^XBK41 z>+Q0}qk?ueTJv6ZvK-EP_-YMZr*T{p_*t0C-s7%I5Hj$ko%$Ch_4Oq+pVg#4bwOVC z2Ry9w;xITT!dprOfYd-%&}4l#@W<0Onbc2<_G3cH%!V{2=BvL?U>S~Pi(y8L-d^k&Z2 z^f*_sT%OYPGOkqUYl5PEzjof~A+J1T@v~S?A@9*HLX@X@CxeOVx0wZTFK?A0<1;X_ z`Hgv(fm;%I{-*UiYQXQR{~WS&Qb?^j_qb&*$B-74hLoC$UFm*;>vnm}igvru=5c#n z1qFFEnBA^Fkle^%yT>tixa^*RneCV8j30l3rWthQrj=||6Hqtk;#_YU@DO^&Wo9u2 z)+ejp$MqWY+M4D3j;4qZ1@v_$Vze>h5tk3U^SoFzjjPImU`D_+9-F(xc$oLtA?>(Rw0vXo;=%VwuE|Stj70LA#;#t@D&sP zrt{5;@OY1qfo;k(0K1oKPB8NK3rIB%!oFO*o|@p=R(I|rPCqug?ZWR>{M<9Vpbhx> z6yxoY$+C%}C=92Qg;?PPd%c6-c<+8y8MO&Jk~8Yol`6CAh`wC~7%y*}@;~KBwo`7L zvSrml+;Ka-FNgGGywHjzX1-xXhN#P3>Kq1x7Glp=uK{VY#Hi7ua_ZFJ_D-P+ayk&Ws?l1j|v?T0+Sk??ray*_UwsD%3+7Q(-7 zNrenfmTWJSiSt%&GhL+|KkFp1bI({szcEqeSh=`OaK)I{3t^4%Nm|7B;#qxr_q${J zz6umw(!02R1cO9)^o)L_(l;T7JZNkv*H<&f$DER*JA=gVjty!%kWS#UI?upG2XeB( z+2v>e)(L)f-Ni!*`{}T7lN7_*S{vktRK&z(A)^kXfJKiky<{G5;~dLeGm%c*tZ2w@ zK4%ckEm55}Kh|hGbQFlWnPD^edCu$g>CX;HS;2&AvyNWhO96iXo1)!q90-_nSWRq6ZLQ~(X7`B z1WZ(@;g*u1U<9lq4z;754hx3Oy7!0H5UC{c7h@xnFG)Ugl}qMLEr%knXhY8vYBBHP zuI$#@x{o`Y^La<=YAB+w*&ScspWh+{#zC)UyT=7rF5f|pXxDxh{*ji#q9<=udb@dY z&~r?d49RI=ChI7{RN;gws=%OGg$GvjE<7Ae zzv5yz$xmLlTL|~=W$~I!a&lwUpWnIZIvqQb{hr+Of7i?Ck!w6lDFdzR|GLKvZ`H0M zt^JWzMqE0?L;8J}iYl~9^OEo(#QY@hw_@8r?l9qG(bS`b8Q`zu*YXk&PE!AvR}mzTwF_8JZUMEMlt4EXx8Xx z1++<;OqEhv)YBrDP2*XbpN=>|VL~)qFZmZ_ub(%3;JAsI#Kgr-rRCaGrpm>@+Uy*A zFjT+GuZu|FjxmXgkBVRY1FZgKOtXp5?t3b5oo$}`ES#H`l%^~d=nn4;l++R_lH=1M zT0>+)mI>S7&?>{89a^K>(;jXQ_%nk|_#Dw2>B@xvlIBaP>eBZM0j(nsmBdMN8RPx)v#XFxGzy6+f2$cw+RAhh-P^tdc$Lf&+@; z`t!Kf+Ng%BV}DwUN_7p?Rcp6JVe3KI51G0AKZ8HChh<-5f6#f-# zl;0bT-0M76ZLC>KK7A~i(Xw}G?Z1O;k$mj1U^RTQw9*NXmXl+73q?V>L0@vhK^n)) zc+}7RJf87L_ciwX`f@gS)z$eC1=mdaFWasJ389NI0z*i2A7$h{EuN5|RObVOW@R0* zyDWpd0>)@hpPh3sA-kkN?O>>8h-FC$vYjj&xUnz8-Gp{?7=Ox$S$wq!(@C2n{^Lyj z>(m0mMCaB44?P0XZNGGkc&B08?cBpg->sZDLeLa&*SJ=XOk#^5u)oVo>{d5z3DIZt zD=VpDUcW`SQrA38sz6mki_X+u_B&arvvXV5Fq>QnU2QhXhfe44#v+Tt=}9e2{xolT zl{;$qR4!QQGwd^m-0QI=mu$+9_~F{J>d36q&S4Q;(-pEKoRT;PaIYAw=Th;>l6!Nl zLYtfK@(GwbEBpKuQ!ozTB~3~Hdu+d4Sg$xe6Zt)~cdVpT*y7GoB2-0|yRhQIAJjp7 z($WYEwI*y%R_nMQ0A($iC!DAMqT6ZFCqee}#kC?};djzRhZ9V59ULCQq# zG(Llsoa-n4U;JooYBqHL!ZRVpC9YjS6s*$}D5){%>GN-#1OEIR*AZ5kzYEM`@an~lfUi95hk%oOtSvN z87oEA0ujj|_tX}e>)UmByvWb;b}jbtvtU93pVIR z1i##eEtF<_qvr9B3|Pe2%4~b==}vD?TW{V$nwT6x1t1+1EiJf_kx>;SuN1b{4#W4l zDwD~B-n2ge>$lSq+D`?_z-Kf#cEN4}8Y0AJq>LERXm7;KU}AE*>L()%##7$Vz%@Y= z3MGd2NFG{L6S8!s{VhHvlo4};zt|typ9f-{H2F%X=Eb0h#Wlz0hqy&Pf+7H}@YRs2 zWObN2y{LPjHR>2lBr(a7ja^`K7}C_Kw$Nfb=7w+s7db~vax}F8I9T@KPc-WErGTlV zm*j{4M12A*S|5Cpt!aSUg#?n@1rxZPH61&B;cNTRo!}Gy{n6<{R7q54VR6E z_EM?2&;J}N0J2S!VVh+VL|%uvYot|V$>m>CEbq~aZ3QYd&SmvY2+xJtOU$6{=yTv$ z(1?o8^6?cw-dC=GsIxfqQTTOgG+|tsS`{W-^ln_sUp><1GL%)u4{s0fTN0b}KK>%_ zi1U2%E681(tL#5byWagv48Tz2c>E1uO;*a4n!<71>DbC8M%g@+EgR?)%Keh#F}+@~ z-8{LfC~W14Mt=84EWha0Z;Qb2w7@RavH9nb)BD%0QgL8yyr)gY`a+xMQ)&IelgQyl z&4T-gSYwzN9-aNU19Y|E0eF94{flVemJE61O|@~aYrO55Dc6T00rIxG-o3uGX8(x^ z{-am0{0-nu7KKnuY3BljtST%g6dPl0rcu){LH%Ps+E3E@7_>6)t^6KGgV77Sy?7WP zO*npky&n}s&Pwj#8F`>q3l$;))WSbut#zKnoSG^o(^unXuc{j8=HRbxKt#4Q95=DZ z)9wTeQujAvJ5oau{}1DXb7n2164&^hdt_hTRAg7er0&f|tlL{>n!!eMy8n5wTr>)K z{imI&4GAJ~WqdFhdhKtS#i?BHh1VxdGdqlU(Hp3!)^(vDPnz#&4k}{n$;`~qYjw!U z3k1r=AeEhJ{l`HyPnD^`#TGgfJglsrr>2;{Xi9=X_%0WK!yorD?!y*kw=Y(V|RnEv7K1- z@qAGhr2VqnB)nwg-o*Lxf+psRnUH!(r$rJIY8CPH9+yTIlvneY)yr-{kqgyJ`~uH! zXw#`Wl}?QDBfQobhuU1(a-W4B-Te z6+#r_am>f(!MWi{RI))7hbH{5jSzq$I2VL;^eIR2O~JXGEgr!D86m2w%&N@ISr?Zp zkA8A{R-io4Dy~qX848Z?VGt8{izXeC!Ei`mjojHGDQ9NmM%6BcpWgZFgai=zA{!y_ zd(Y&a&q?l{nbmq+Nd+qsL?376(4M;#r;7)RtqS3;{e~5(SdoP@)QHBQ^gVctHnV0_ z)l5B^t~WJPtz0!nThmY=&6oIGhhY*_VRX!F7c zZ5IybG68DQB9-<2P=i%jULY@e&(1wxMwtkO(Vj-?N61L6YFBA3JCVFh6I@mSHYxZZ z#&-^8p{~spwgX3wUo-52@PECT9}Go&hb(n6d>%3u&xjbwW}U)TiA$VB<7blG()Y!w z?IezF5=&lfsZ3E7aQ7j@uPWe~vDi=CXP}nx-#p6Ep+SDPuvKC(sFUq)W1|LHyD#Z? zq9j8@F%k|6)l*y4R}Y7isK~l#AQPRPtDiZ+tx-Z0*V2;)|3ggvK!Ft0ksuTgyQP-z zOaju8u@gCD&QMRF%tO+`2^*VsNa8~_dpb>mFAQdCqxrV&F-GNVc~A% zze`H%17Cwi47tJ+NF{-iK^1dA%U7^(5lYV}B-v{zUgzJKL~`e@rT9y3LOYPi&wYc2 WWK+fYxv;^2kGQamQ2AHAfd2#1QGn(E literal 0 HcmV?d00001 From c4886584047122b5b358021a6c21977c259142d0 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 29 Mar 2018 15:34:36 +0800 Subject: [PATCH 173/180] follow comments --- doc/fluid/design/concepts/cpp_data_feeding.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/fluid/design/concepts/cpp_data_feeding.md b/doc/fluid/design/concepts/cpp_data_feeding.md index 9c44dec4b9..aabc1ba75a 100644 --- a/doc/fluid/design/concepts/cpp_data_feeding.md +++ b/doc/fluid/design/concepts/cpp_data_feeding.md @@ -113,7 +113,7 @@ To solve this problem, we introduce `ReaderHolder` as a wrapper. It acts as an e To create and invoke readers, some new ops are introduced: -### Operators That Creates Readers +### Operators That Create Readers Each reader has its creation op. File readers' creation ops have no input and yield the created file reader as its output. Decorated readers' creation ops take the underlying readers as inputs and then yield new decorated readers. @@ -168,17 +168,19 @@ while_op(not_completed) { } ``` -Two important considerations for these programs are as follows: +A few important considerations for these programs are as follows: -1. The multiple\_reader is the batch\_reader's underlying reader, and the batch\_reader is the double\_buffer\_reader's underlying reader. `read_op`, `has_next_op` and other reader related ops will only invoke the top-most reader. In this case, it's the double\_buffer\_reader. +1. `not_completed`, `pass_count` and other variables shown above are all Fluid Variables. -2. All readers exist in both `startup_program` and `main_program`. And they are persistable. +2. The multiple\_reader is the batch\_reader's underlying reader, and the batch\_reader is the double\_buffer\_reader's underlying reader. `read_op`, `has_next_op` and other reader related ops will only invoke the top-most reader. In this case, it's the double\_buffer\_reader. + +3. All readers exist in both `startup_program` and `main_program`. And they are persistable. ### Simplify Configuration by MultiPassReader -The Program configuration mentioned above is somehow complicated. Users need to be very similar to concepts of Program and Block to prevent making mistakes in their code. To make the usage of C++ readers more friendly to beginning users, we introduce `MultiPassReader`. +The Program configuration mentioned above is complicated. Users need to be very familiar to concepts of Program and Block to prevent making mistakes in their code. To make the usage of C++ readers more friendly to new users, we introduce `MultiPassReader`. -`MultiPassReader` is a decorated reader. A multi-pass reader is used to continuously yield data for several pass training. It takes the number of passes to run as one of its attributes('pass_num') and maintains a counter to record how many passes it has completed. Each time its underlying reader reaches the EOF, the multi-pass reader checks whether it has completed the training of given number of pass. If not, the underlying reader will be re-initialized and starts a new pass automatically. Before completing the whole training, the return of MultiPassReader's `HasNext()` will always be `true`. +`MultiPassReader` is a decorated reader. A multi-pass reader is used to continuously yield data for several training passes. It takes the number of passes to run as one of its attributes('pass_num') and maintains a counter to record how many passes it has completed. Each time its underlying reader reaches the EOF, the multi-pass reader checks whether it has completed the training of given number of pass. If not, the underlying reader will be re-initialized and starts a new pass automatically. Before completing the whole training, the return of MultiPassReader's `HasNext()` will always be `true`. With `MultiPassReader`, the startup program would be like this: From 8425c2c859b22f263e213d4fed454890b598948c Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Thu, 29 Mar 2018 16:34:33 +0800 Subject: [PATCH 174/180] Speed/sequence op1 (#9217) * "add functors" * "remove old code" * "fix" * "fix ci" * "add details" * "fix ci" * "fix ci" * "fix ci" * "fix ci" * "remove unused code" --- .../fluid/operators/math/sequence_pooling.cc | 112 ++++- .../fluid/operators/math/sequence_pooling.cu | 381 ++++++++++++++---- .../fluid/operators/math/sequence_pooling.h | 20 +- paddle/fluid/operators/sequence_pool_op.h | 102 +---- .../fluid/tests/unittests/test_seq_pool.py | 110 ++--- 5 files changed, 484 insertions(+), 241 deletions(-) diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index f7a6f2bdf4..5ae42ab973 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -19,8 +19,17 @@ namespace paddle { namespace operators { namespace math { +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +template +using EigenVector = framework::EigenVector; +template +using EigenMatrix = framework::EigenMatrix; + template -class MaxSeqPoolFunctor { +class MaxSeqPoolFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::LoDTensor& input, framework::Tensor* output, @@ -60,7 +69,7 @@ class MaxSeqPoolFunctor { }; template -class MaxSeqPoolGradFunctor { +class MaxSeqPoolGradFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::Tensor& out_grad, @@ -93,10 +102,101 @@ class MaxSeqPoolGradFunctor { } }; -template class MaxSeqPoolFunctor; -template class MaxSeqPoolFunctor; -template class MaxSeqPoolGradFunctor; -template class MaxSeqPoolGradFunctor; +template +class SequencePoolFunctor { + public: + /* max pool has index output */ + void operator()(const platform::CPUDeviceContext& context, + const std::string pooltype, const framework::LoDTensor& input, + framework::Tensor* output, + framework::Tensor* index = nullptr) { + if (pooltype == "MAX") { + math::MaxSeqPoolFunctor max_pool; + max_pool(context, input, output, index); + return; + } + auto lod = input.lod()[0]; + auto& place = *context.eigen_device(); + for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { + Tensor in_t = + input.Slice(static_cast(lod[i]), static_cast(lod[i + 1])); + Tensor out_t = output->Slice(i, i + 1); + int64_t h = static_cast(lod[i + 1] - lod[i]); + int64_t w = input.numel() / input.dims()[0]; + auto in_e = EigenMatrix::From(in_t, framework::make_ddim({h, w})); + auto out_e = EigenVector::Flatten(out_t); + if (pooltype == "AVERAGE") { + out_e.device(place) = in_e.mean(Eigen::array({{0}})); + } else if (pooltype == "SUM") { + out_e.device(place) = in_e.sum(Eigen::array({{0}})); + } else if (pooltype == "SQRT") { + out_e.device(place) = in_e.sum(Eigen::array({{0}})) / + std::sqrt(static_cast(h)); + } else if (pooltype == "LAST") { + out_e.device(place) = in_e.chip(h - 1, 0); + } else if (pooltype == "FIRST") { + out_e.device(place) = in_e.chip(0, 0); + } else { + PADDLE_THROW("unsupported pooling pooltype"); + } + } + } +}; + +template +class SequencePoolGradFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const std::string pooltype, const framework::Tensor& out_grad, + framework::LoDTensor* in_grad, + /* max pool has index */ + const framework::Tensor* index = nullptr) { + if (pooltype == "MAX") { + math::MaxSeqPoolGradFunctor max_pool_grad; + max_pool_grad(context, out_grad, *index, in_grad); + return; + } + + if (pooltype == "LAST" || pooltype == "FIRST") { + // set X@Grad be zero at first when pooltype is LAST/FIRST + math::SetConstant functor; + functor(context, in_grad, 0); + } + auto lod = in_grad->lod()[0]; + auto& place = *context.eigen_device(); + for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { + auto in_g_t = in_grad->Slice(static_cast(lod[i]), + static_cast(lod[i + 1])); + auto out_g_t = out_grad.Slice(i, i + 1); + int64_t h = static_cast(lod[i + 1] - lod[i]); + int64_t w = in_grad->numel() / in_grad->dims()[0]; + auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); + auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); + auto out_g_e_v = EigenVector::Flatten(out_g_t); + Eigen::DSizes bcast(h, 1); + + if (pooltype == "AVERAGE") { + in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); + } else if (pooltype == "SUM") { + in_g_e.device(place) = (out_g_e).broadcast(bcast); + } else if (pooltype == "SQRT") { + in_g_e.device(place) = + (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); + } else if (pooltype == "LAST") { + in_g_e.chip(h - 1, 0).device(place) = out_g_e_v; + } else if (pooltype == "FIRST") { + in_g_e.chip(0, 0).device(place) = out_g_e_v; + } else { + PADDLE_THROW("unsupported pooling pooltype"); + } + } + } +}; + +template class SequencePoolFunctor; +template class SequencePoolFunctor; +template class SequencePoolGradFunctor; +template class SequencePoolGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index d61407c020..1935364da3 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/sequence_pooling.h" +#include "paddle/fluid/platform/cuda_helper.h" namespace paddle { namespace operators { @@ -22,113 +23,331 @@ namespace math { #define FLT_MAX __FLT_MAX__ template -__global__ void KeMaxSequencePool(const T* input, const size_t* starts, - T* output, int* index, int64_t num_seq, - int64_t dim) { - int dim_idx = threadIdx.x; - int seq_id = blockIdx.x; - if (seq_id >= num_seq) return; - size_t start = starts[seq_id]; - size_t end = starts[seq_id + 1]; - - for (int64_t i = dim_idx; i < dim; i += blockDim.x) { - T max_val = static_cast(-FLT_MAX); - int max_id = -1; - for (size_t step_id = start; step_id < end; step_id++) { - if (max_val < input[step_id * dim + i]) { - max_val = input[step_id * dim + i]; - max_id = step_id; +struct MaxPoolFunctor { + HOSTDEVICE void operator()(const T* input, const size_t start, + const size_t end, const size_t item_dim, T* output, + int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + T max_val = static_cast(-FLT_MAX); + int max_index = -1; + for (int i = start; i < end; ++i) { + if (max_val < input[item_dim * i + tid]) { + max_val = input[item_dim * i + tid]; + max_index = i; + } } + output[tid] = max_val; + index[tid] = max_index; } - output[seq_id * dim + i] = max_val; - index[seq_id * dim + i] = max_id; } -} +}; template -class MaxSeqPoolFunctor { - public: - void operator()(const platform::CUDADeviceContext& context, - const framework::LoDTensor& input, framework::Tensor* output, - framework::Tensor* index) { - auto in_dims = input.dims(); - auto out_dims = output->dims(); - auto idx_dims = index->dims(); - PADDLE_ENFORCE_GT(in_dims.size(), static_cast(1)); - PADDLE_ENFORCE_GT(out_dims.size(), 1); - for (int64_t i = 1; i < in_dims.size(); ++i) { - PADDLE_ENFORCE_EQ(in_dims[i], out_dims[i]); +struct AvgPoolFunctor { + HOSTDEVICE void operator()(const T* input, const size_t start, + const size_t end, const size_t item_dim, T* output, + int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + T val = static_cast(0); + for (int i = start; i < end; ++i) { + val += input[item_dim * i + tid]; + } + // end, start is lod, so end - start != 0 + output[tid] = val / static_cast(end - start); } - PADDLE_ENFORCE_EQ(idx_dims, out_dims); + } +}; - auto starts = input.lod()[0]; - const T* in_data = input.data(); - T* out_data = output->data(); - int* max_index = index->data(); +template +struct SumPoolFunctor { + HOSTDEVICE void operator()(const T* input, const size_t start, + const size_t end, const size_t item_dim, T* output, + int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + T val = static_cast(0); + for (int i = start; i < end; ++i) { + val += input[item_dim * i + tid]; + } + output[tid] = val; + } + } +}; - int64_t num_seq = out_dims[0]; - int64_t dim = output->numel() / num_seq; +template +struct SqrtPoolFunctor { + HOSTDEVICE void operator()(const T* input, const size_t start, + const size_t end, const size_t item_dim, T* output, + int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + T val = static_cast(0); + for (int i = start; i < end; ++i) { + val += input[item_dim * i + tid]; + } + // end, start is lod, so end - start != 0 + output[tid] = val / sqrt(end - start); + } + } +}; - dim3 threads(256, 1); - dim3 grid(num_seq, 1); - auto stream = context.stream(); - KeMaxSequencePool<<>>( - in_data, starts.CUDAData(context.GetPlace()), out_data, max_index, - num_seq, dim); +template +struct LastPoolFunctor { + HOSTDEVICE void operator()(const T* input, const size_t start, + const size_t end, const size_t item_dim, T* output, + int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + output[tid] = input[item_dim * (end - 1) + tid]; + } } }; template -__global__ void KeMaxSequencePoolGrad(const T* out_grad, const int* max_index, - T* in_grad, int64_t num_seq, - int64_t dim) { - int idx = threadIdx.x + blockIdx.x * blockDim.x; - int col_idx = idx % dim; - if (idx < num_seq * dim) { - int step_id = max_index[idx]; - in_grad[step_id * dim + col_idx] = out_grad[idx]; +struct FirstPoolFunctor { + HOSTDEVICE void operator()(const T* input, const size_t start, + const size_t end, const size_t item_dim, T* output, + int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + output[tid] = input[item_dim * start + tid]; + } } +}; + +template +__global__ void sequence_pool_kernel(Range_OP op, const T* input, + const size_t* lod, const size_t lod_size, + const size_t item_dim, T* output, + int* index) { + int bid = blockIdx.x; + if (bid >= lod_size - 1) return; + size_t start = lod[bid]; + size_t end = lod[bid + 1]; + int* index_offset = nullptr; + if (index != nullptr) { + index_offset = &index[bid * item_dim]; + } + op(input, start, end, item_dim, &output[bid * item_dim], index_offset); } template -class MaxSeqPoolGradFunctor { +class SequencePoolFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& out_grad, - const framework::Tensor& index, - framework::LoDTensor* in_grad) { - auto og_dims = out_grad.dims(); - auto idx_dims = index.dims(); - auto ig_dims = in_grad->dims(); - PADDLE_ENFORCE_GT(og_dims.size(), static_cast(1)); - PADDLE_ENFORCE_GT(ig_dims.size(), static_cast(1)); - for (int64_t i = 1; i < og_dims.size(); ++i) { - PADDLE_ENFORCE_EQ(og_dims[i], ig_dims[i]); + const std::string pooltype, const framework::LoDTensor& input, + framework::Tensor* output, + framework::Tensor* index = nullptr) { + auto lod = input.lod()[0]; + const size_t item_dim = output->numel() / output->dims()[0]; + dim3 threads(1024, 1); + dim3 grid(lod.size(), 1); + if (pooltype == "MAX") { + sequence_pool_kernel< + T, MaxPoolFunctor><<>>( + MaxPoolFunctor(), input.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), index->data()); + } else if (pooltype == "AVERAGE") { + sequence_pool_kernel< + T, AvgPoolFunctor><<>>( + AvgPoolFunctor(), input.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "SUM") { + sequence_pool_kernel< + T, SumPoolFunctor><<>>( + SumPoolFunctor(), input.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "SQRT") { + sequence_pool_kernel< + T, SqrtPoolFunctor><<>>( + SqrtPoolFunctor(), input.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "LAST") { + sequence_pool_kernel< + T, LastPoolFunctor><<>>( + LastPoolFunctor(), input.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "FIRST") { + sequence_pool_kernel< + T, FirstPoolFunctor><<>>( + FirstPoolFunctor(), input.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); + } else { + PADDLE_THROW("unsupported pooling pooltype"); } - PADDLE_ENFORCE_EQ(idx_dims, og_dims); + } +}; - const T* og_data = out_grad.data(); - const int* max_index = index.data(); - T* ig_data = in_grad->data(); +template +struct MaxPoolGradFunctor { + HOSTDEVICE void operator()(const T* out_grad, const size_t start, + const size_t end, const size_t item_dim, + T* in_grad, const int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + for (int i = start; i < end; ++i) { + if (i == index[tid]) { + in_grad[item_dim * i + tid] = out_grad[tid]; + } else { + in_grad[item_dim * i + tid] = static_cast(0); + } + } + } + } +}; - SetConstant set_zero; - set_zero(context, in_grad, static_cast(0.0)); - int64_t num_seq = og_dims[0]; - int64_t dim = out_grad.numel() / num_seq; +template +struct AvgPoolGradFunctor { + HOSTDEVICE void operator()(const T* out_grad, const size_t start, + const size_t end, const size_t item_dim, + T* in_grad, const int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + for (int i = start; i < end; ++i) { + in_grad[item_dim * i + tid] = out_grad[tid] / (end - start); + } + } + } +}; - unsigned int blocks = (num_seq * dim + 128 - 1) / 128; - dim3 threads(128, 1); - dim3 grid(blocks, 1); - auto stream = context.stream(); - KeMaxSequencePoolGrad<<>>( - og_data, max_index, ig_data, num_seq, dim); +template +struct SumPoolGradFunctor { + HOSTDEVICE void operator()(const T* out_grad, const size_t start, + const size_t end, const size_t item_dim, + T* in_grad, const int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + for (int i = start; i < end; ++i) { + in_grad[item_dim * i + tid] = out_grad[tid]; + } + } + } +}; + +template +struct SqrtPoolGradFunctor { + HOSTDEVICE void operator()(const T* out_grad, const size_t start, + const size_t end, const size_t item_dim, + T* in_grad, const int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + for (int i = start; i < end; ++i) { + in_grad[item_dim * i + tid] = + out_grad[tid] / (sqrt(static_cast(end - start))); + } + } + } +}; + +template +struct LastPoolGradFunctor { + HOSTDEVICE void operator()(const T* out_grad, const size_t start, + const size_t end, const size_t item_dim, + T* in_grad, const int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + for (int i = start; i < end; ++i) { + if (i == end - 1) { + in_grad[item_dim * i + tid] = out_grad[tid]; + } else { + in_grad[item_dim * i + tid] = static_cast(0); + } + } + } + } +}; + +template +struct FirstPoolGradFunctor { + HOSTDEVICE void operator()(const T* out_grad, const size_t start, + const size_t end, const size_t item_dim, + T* in_grad, const int* index) { + for (int tid = threadIdx.x; tid < item_dim; tid += blockDim.x) { + for (int i = start; i < end; ++i) { + if (i == start) { + in_grad[item_dim * i + tid] = out_grad[tid]; + } else { + in_grad[item_dim * i + tid] = static_cast(0); + } + } + } + } +}; + +template +__global__ void sequence_pool_grad_kernel(Range_OP op, const T* out_grad, + const size_t* lod, + const size_t lod_size, + const size_t item_dim, T* in_grad, + const int* index) { + int bid = blockIdx.x; + if (bid >= lod_size - 1) return; + size_t start = lod[bid]; + size_t end = lod[bid + 1]; + const int* index_offset = nullptr; + if (index != nullptr) { + index_offset = &index[bid * item_dim]; + } + op(&out_grad[bid * item_dim], start, end, item_dim, in_grad, index_offset); +} + +template +class SequencePoolGradFunctor { + public: + void operator()(const platform::CUDADeviceContext& context, + const std::string pooltype, const framework::Tensor& out_grad, + framework::LoDTensor* in_grad, + /* max pool has index */ + const framework::Tensor* index = nullptr) { + auto lod = in_grad->lod()[0]; + const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; + dim3 threads(1024, 1); + dim3 grid(lod.size(), 1); + if (pooltype == "MAX") { + sequence_pool_grad_kernel< + T, MaxPoolGradFunctor><<>>( + MaxPoolGradFunctor(), out_grad.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), index->data()); + } else if (pooltype == "AVERAGE") { + sequence_pool_grad_kernel< + T, AvgPoolGradFunctor><<>>( + AvgPoolGradFunctor(), out_grad.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "SUM") { + sequence_pool_grad_kernel< + T, SumPoolGradFunctor><<>>( + SumPoolGradFunctor(), out_grad.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "SQRT") { + sequence_pool_grad_kernel< + T, SqrtPoolGradFunctor><<>>( + SqrtPoolGradFunctor(), out_grad.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "LAST") { + sequence_pool_grad_kernel< + T, LastPoolGradFunctor><<>>( + LastPoolGradFunctor(), out_grad.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); + } else if (pooltype == "FIRST") { + sequence_pool_grad_kernel< + T, FirstPoolGradFunctor><<>>( + FirstPoolGradFunctor(), out_grad.data(), + lod.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); + + } else { + PADDLE_THROW("unsupported pooling pooltype"); + } } }; -template class MaxSeqPoolFunctor; -template class MaxSeqPoolFunctor; -template class MaxSeqPoolGradFunctor; -template class MaxSeqPoolGradFunctor; +// sequence pooling +template class SequencePoolFunctor; +template class SequencePoolFunctor; +template class SequencePoolGradFunctor; +template class SequencePoolGradFunctor; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/sequence_pooling.h b/paddle/fluid/operators/math/sequence_pooling.h index ecb76884f6..38e7802229 100644 --- a/paddle/fluid/operators/math/sequence_pooling.h +++ b/paddle/fluid/operators/math/sequence_pooling.h @@ -21,23 +21,23 @@ namespace paddle { namespace operators { namespace math { -#define FLT_MAX __FLT_MAX__ - template -class MaxSeqPoolFunctor { +class SequencePoolFunctor { public: - void operator()(const DeviceContext& context, + /* max pool has index output */ + void operator()(const DeviceContext& context, const std::string pooltype, const framework::LoDTensor& input, framework::Tensor* output, - framework::Tensor* index); + framework::Tensor* index = nullptr); }; -template -class MaxSeqPoolGradFunctor { +template +class SequencePoolGradFunctor { public: - void operator()(const DeviceContext& context, + void operator()(const DeviceContext& context, const std::string pooltype, const framework::Tensor& out_grad, - const framework::Tensor& index, - framework::LoDTensor* in_grad); + framework::LoDTensor* in_grad, + /* max pool has index */ + const framework::Tensor* index = nullptr); }; } // namespace math diff --git a/paddle/fluid/operators/sequence_pool_op.h b/paddle/fluid/operators/sequence_pool_op.h index 8706ff14aa..c58d677c92 100644 --- a/paddle/fluid/operators/sequence_pool_op.h +++ b/paddle/fluid/operators/sequence_pool_op.h @@ -23,12 +23,6 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -template -using EigenVector = framework::EigenVector; -template -using EigenMatrix = framework::EigenMatrix; template class SequencePoolKernel : public framework::OpKernel { @@ -37,11 +31,13 @@ class SequencePoolKernel : public framework::OpKernel { auto* in = context.Input("X"); auto* out = context.Output("Out"); std::string pooltype = context.Attr("pooltype"); + Tensor* index = nullptr; + if (pooltype == "MAX") { + index = context.Output("MaxIndex"); + } auto dims = in->dims(); auto lod = in->lod(); - int64_t w = in->numel() / dims[0]; - // InferShape by lod PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now."); PADDLE_ENFORCE_GE( @@ -50,45 +46,14 @@ class SequencePoolKernel : public framework::OpKernel { "The first dimension of Input(X) must be large than batch size."); dims[0] = lod[0].size() - 1; out->Resize({dims}); - - auto lod_level_0 = lod[0]; - out->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); if (pooltype == "MAX") { - math::MaxSeqPoolFunctor max_pool; - auto* index = context.Output("MaxIndex"); index->Resize({dims}); index->mutable_data(context.GetPlace()); - max_pool(dev_ctx, *in, out, index); - return; - } - - auto& place = - *context.template device_context().eigen_device(); - for (int i = 0; i < static_cast(lod_level_0.size()) - 1; ++i) { - Tensor in_t = in->Slice(static_cast(lod_level_0[i]), - static_cast(lod_level_0[i + 1])); - Tensor out_t = out->Slice(i, i + 1); - int64_t h = static_cast(lod_level_0[i + 1] - lod_level_0[i]); - auto in_e = EigenMatrix::From(in_t, framework::make_ddim({h, w})); - auto out_e = EigenVector::Flatten(out_t); - - if (pooltype == "AVERAGE") { - out_e.device(place) = in_e.mean(Eigen::array({{0}})); - } else if (pooltype == "SUM") { - out_e.device(place) = in_e.sum(Eigen::array({{0}})); - } else if (pooltype == "SQRT") { - out_e.device(place) = in_e.sum(Eigen::array({{0}})) / - std::sqrt(static_cast(h)); - } else if (pooltype == "LAST") { - out_e.device(place) = in_e.chip(h - 1, 0); - } else if (pooltype == "FIRST") { - out_e.device(place) = in_e.chip(0, 0); - } else { - PADDLE_THROW("unsupported pooling pooltype"); - } } + math::SequencePoolFunctor pool; + pool(context.template device_context(), pooltype, *in, out, + index); } }; @@ -96,58 +61,17 @@ template class SequencePoolGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); std::string pooltype = context.Attr("pooltype"); - - auto dims = in->dims(); - auto lod = in->lod()[0]; - int64_t w = in->numel() / dims[0]; - - in_g->mutable_data(context.GetPlace()); - auto& dev_ctx = context.template device_context(); - + const Tensor* index = nullptr; if (pooltype == "MAX") { - math::MaxSeqPoolGradFunctor max_pool_grad; - auto* index = context.Input("MaxIndex"); - max_pool_grad(dev_ctx, *out_g, *index, in_g); - return; - } - - if (pooltype == "LAST" || pooltype == "FIRST") { - // set X@Grad be zero at first when pooltype is LAST/FIRST - math::SetConstant functor; - functor(dev_ctx, in_g, 0); - } - auto& place = - *context.template device_context().eigen_device(); - - for (int i = 0; i < static_cast(lod.size()) - 1; ++i) { - auto in_g_t = - in_g->Slice(static_cast(lod[i]), static_cast(lod[i + 1])); - auto out_g_t = out_g->Slice(i, i + 1); - int64_t h = static_cast(lod[i + 1] - lod[i]); - auto in_g_e = EigenMatrix::From(in_g_t, {h, w}); - auto out_g_e = EigenMatrix::From(out_g_t, {1, w}); - auto out_g_e_v = EigenVector::Flatten(out_g_t); - Eigen::DSizes bcast(h, 1); - - if (pooltype == "AVERAGE") { - in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); - } else if (pooltype == "SUM") { - in_g_e.device(place) = (out_g_e).broadcast(bcast); - } else if (pooltype == "SQRT") { - in_g_e.device(place) = - (out_g_e / std::sqrt(static_cast(h))).broadcast(bcast); - } else if (pooltype == "LAST") { - in_g_e.chip(h - 1, 0).device(place) = out_g_e_v; - } else if (pooltype == "FIRST") { - in_g_e.chip(0, 0).device(place) = out_g_e_v; - } else { - PADDLE_THROW("unsupported pooling pooltype"); - } + index = context.Input("MaxIndex"); } + in_g->mutable_data(context.GetPlace()); + math::SequencePoolGradFunctor pool; + pool(context.template device_context(), pooltype, *out_g, + in_g, index); } }; diff --git a/python/paddle/fluid/tests/unittests/test_seq_pool.py b/python/paddle/fluid/tests/unittests/test_seq_pool.py index 0488475721..2e48ef0e88 100644 --- a/python/paddle/fluid/tests/unittests/test_seq_pool.py +++ b/python/paddle/fluid/tests/unittests/test_seq_pool.py @@ -49,6 +49,61 @@ class TestSeqAvgPool(OpTest): self.check_grad(["X"], "Out") +class TestSeqSumPool(TestSeqAvgPool): + def compute(self, x, lod, out): + self.attrs = {'pooltype': "SUM"} + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = sub_x.sum(axis=0) + + +class TestSeqMaxPool(TestSeqAvgPool): + def set_data(self): + self.op_type = 'sequence_pool' + x = np.random.uniform(0.1, 1, [13, 23]).astype('float32') + lod = [[0, 4, 5, 8, 13]] + for i in range(4): + l = lod[0][i + 1] - lod[0][i] + x[lod[0][i] + np.random.randint(l), :] += 2.0 + + self.inputs = {'X': (x, lod)} + + out = np.zeros((4, 23)).astype('float32') + self.outputs = {'Out': out} + return x, lod, out + + def compute(self, x, lod, out): + self.attrs = {'pooltype': "MAX"} + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = np.amax(sub_x, axis=0) + + +class TestSeqSqrtPool(TestSeqAvgPool): + def compute(self, x, lod, out): + self.attrs = {'pooltype': "SQRT"} + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + len = lod[0][i + 1] - lod[0][i] + out[i] = sub_x.sum(axis=0) / np.sqrt(len) + + +class TestSeqLastPool(TestSeqAvgPool): + def compute(self, x, lod, out): + self.attrs = {'pooltype': "LAST"} + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = sub_x[-1, :] + + +class TestSeqFirstPool(TestSeqAvgPool): + def compute(self, x, lod, out): + self.attrs = {'pooltype': "FIRST"} + for i in range(4): + sub_x = x[lod[0][i]:lod[0][i + 1], :] + out[i] = sub_x[0, :] + + class TestSeqAvgPool2D(TestSeqAvgPool): def set_data(self): self.op_type = 'sequence_pool' @@ -68,14 +123,6 @@ class TestSeqAvgPool2D(TestSeqAvgPool): out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) -class TestSeqSumPool(TestSeqAvgPool): - def compute(self, x, lod, out): - self.attrs = {'pooltype': "SUM"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - out[i] = sub_x.sum(axis=0) - - class TestSeqSumPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): self.attrs = {'pooltype': "SUM"} @@ -84,15 +131,6 @@ class TestSeqSumPool2D(TestSeqAvgPool2D): out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) -class TestSeqSqrtPool(TestSeqAvgPool): - def compute(self, x, lod, out): - self.attrs = {'pooltype': "SQRT"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - len = lod[0][i + 1] - lod[0][i] - out[i] = sub_x.sum(axis=0) / np.sqrt(len) - - class TestSeqSqrtPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): self.attrs = {'pooltype': "SQRT"} @@ -108,28 +146,6 @@ class TestSeqSqrtPool2D(TestSeqAvgPool2D): self.check_grad(["X"], "Out", max_relative_error=0.06) -class TestSeqMaxPool(TestSeqAvgPool): - def set_data(self): - self.op_type = 'sequence_pool' - x = np.random.uniform(0.1, 1, [13, 23]).astype('float32') - lod = [[0, 4, 5, 8, 13]] - for i in range(4): - l = lod[0][i + 1] - lod[0][i] - x[lod[0][i] + np.random.randint(l), :] += 2.0 - - self.inputs = {'X': (x, lod)} - - out = np.zeros((4, 23)).astype('float32') - self.outputs = {'Out': out} - return x, lod, out - - def compute(self, x, lod, out): - self.attrs = {'pooltype': "MAX"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - out[i] = np.amax(sub_x, axis=0) - - class TestSeqMaxPool2D(TestSeqAvgPool2D): def set_data(self): self.op_type = 'sequence_pool' @@ -151,14 +167,6 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D): out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11)) -class TestSeqLastPool(TestSeqAvgPool): - def compute(self, x, lod, out): - self.attrs = {'pooltype': "LAST"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - out[i] = sub_x[-1, :] - - class TestSeqLastPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): self.attrs = {'pooltype': "LAST"} @@ -167,14 +175,6 @@ class TestSeqLastPool2D(TestSeqAvgPool2D): out[i] = np.reshape(sub_x[-1, :], (3, 17)) -class TestSeqFirstPool(TestSeqAvgPool): - def compute(self, x, lod, out): - self.attrs = {'pooltype': "FIRST"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - out[i] = sub_x[0, :] - - class TestSeqFirstPool2D(TestSeqAvgPool2D): def compute(self, x, lod, out): self.attrs = {'pooltype': "FIRST"} From 34a440fa646ea9627efc2be27c6efbb51642dfe2 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Thu, 29 Mar 2018 17:26:49 +0800 Subject: [PATCH 175/180] Revert "make append activation in place by default (#9417)" This reverts commit ce16400daedfa8f793d20d44081db7f417af693a. --- python/paddle/fluid/layer_helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 4341e06596..d771837fc5 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -398,6 +398,7 @@ class LayerHelper(object): return input_var if isinstance(act, basestring): act = {'type': act} + tmp = self.create_tmp_variable(dtype=input_var.dtype) if 'use_mkldnn' in self.kwargs: act['use_mkldnn'] = self.kwargs.get('use_mkldnn') @@ -407,9 +408,9 @@ class LayerHelper(object): self.append_op( type=act_type, inputs={"X": [input_var]}, - outputs={"Out": [input_var]}, + outputs={"Out": [tmp]}, attrs=act) - return input_var + return tmp def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: From 9bbd753425609b8f03a1a4593dca272a00c8f1e6 Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Fri, 30 Mar 2018 00:44:39 +0800 Subject: [PATCH 176/180] change WITH_FLUID to WITH_FLUID_ONLY (#9427) --- CMakeLists.txt | 5 ++--- paddle/CMakeLists.txt | 2 +- python/CMakeLists.txt | 6 +++--- python/setup.py.in | 8 ++++---- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e11f86d0e..5506fcb010 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,8 +53,7 @@ option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF) option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF) -# TODO: Only compile PaddlePaddle fluid version by WITH_FLUID option. -option(WITH_FLUID "Compile PaddlePaddle fluid only(TODO)" OFF) +option(WITH_FLUID_ONLY "Compile PaddlePaddle fluid only" OFF) option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF) option(GLIDE_INSTALL "Download and install go dependencies " ON) option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF) @@ -109,7 +108,7 @@ if (WITH_C_API AND WITH_PYTHON) endif() if (WITH_C_API) - set(WITH_FLUID OFF CACHE STRING "Disable install fluid when compile the C_API" FORCE) + set(WITH_FLUID_ONLY OFF CACHE STRING "Disable install fluid when compile the C_API" FORCE) endif() if(MOBILE_INFERENCE) diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index d2a4b13354..c44f8a8a8e 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT WITH_FLUID) +if(NOT WITH_FLUID_ONLY) add_subdirectory(cuda) add_subdirectory(function) add_subdirectory(utils) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 90c2dfbba7..b0242b20b8 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -4,7 +4,7 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES}) -if(NOT WITH_FLUID) +if(NOT WITH_FLUID_ONLY) file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB_RECURSE V2_PY_FILES ./paddle/v2/ *.py) @@ -62,7 +62,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) set(paddle_python_deps ${PADDLE_PYTHON_BUILD_DIR}/.timestamp ${MKL_DEPENDS}) -if(NOT WITH_FLUID) +if(NOT WITH_FLUID_ONLY) set(paddle_python_deps ${paddle_python_deps} paddle_pserver_main paddle_trainer paddle_merge_model) if(WITH_SWIG_PY) list(APPEND paddle_python_deps python_api_wheel) @@ -73,7 +73,7 @@ add_custom_target(paddle_python ALL DEPENDS ${paddle_python_deps}) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) if (WITH_TESTING) - if(NOT WITH_FLUID) + if(NOT WITH_FLUID_ONLY) add_subdirectory(paddle/trainer_config_helpers/tests) if (WITH_SWIG_PY) # enable v2 API unittest only when paddle swig api is compiled diff --git a/python/setup.py.in b/python/setup.py.in index 4cb5409524..831d173d42 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -68,7 +68,7 @@ packages=['paddle', 'paddle.fluid.proto.profiler', 'paddle.fluid.layers'] -if '${WITH_FLUID}'== 'OFF': +if '${WITH_FLUID_ONLY}'== 'OFF': packages+=['paddle.proto', 'paddle.trainer', 'paddle.trainer_config_helpers', @@ -87,7 +87,7 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: # the prefix is sys.prefix which should always be usr paddle_bins = '' -if '${WITH_FLUID}'== 'OFF': +if '${WITH_FLUID_ONLY}'== 'OFF': paddle_bin_dir = 'opt/paddle/bin' paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', @@ -95,7 +95,7 @@ if '${WITH_FLUID}'== 'OFF': '${PADDLE_BINARY_DIR}/paddle/scripts/paddle'] package_data={'paddle.fluid': ['core.so']} -if '${WITH_FLUID}'== 'OFF': +if '${WITH_FLUID_ONLY}'== 'OFF': package_data['paddle.v2.master']=['libpaddle_master.so'] package_data['py_paddle']=['*.py','_swig_paddle.so'] @@ -106,7 +106,7 @@ package_dir={ 'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform', 'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework', } -if '${WITH_FLUID}'== 'OFF': +if '${WITH_FLUID_ONLY}'== 'OFF': package_dir['py_paddle']='${PADDLE_SOURCE_DIR}/paddle/py_paddle' From 5f9da86ba562c543a623ff0d99f06bd2e935edb3 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Thu, 29 Mar 2018 09:44:58 -0700 Subject: [PATCH 177/180] Fix the order of reads and write from buffered channel (#9423) * Fix Issue 9388 * Fix typos --- paddle/fluid/framework/channel_impl.h | 100 +++++++++++++------------ paddle/fluid/framework/channel_test.cc | 34 +++++++-- 2 files changed, 77 insertions(+), 57 deletions(-) diff --git a/paddle/fluid/framework/channel_impl.h b/paddle/fluid/framework/channel_impl.h index 378a0bab1c..c47d629289 100644 --- a/paddle/fluid/framework/channel_impl.h +++ b/paddle/fluid/framework/channel_impl.h @@ -87,6 +87,21 @@ class ChannelImpl : public paddle::framework::Channel { return value; } + std::shared_ptr get_first_message( + std::deque> &queue, ChannelAction action) { + while (!queue.empty()) { + // Check whether this message was added by Select + // If this was added by Select then execute the callback + // to check if you can execute this message. The callback + // can return false if some other case was executed in Select. + // In that case just discard this QueueMessage and process next. + std::shared_ptr m = queue.front(); + queue.pop_front(); + if (m->callback == nullptr || m->callback(action)) return m; + } + return nullptr; + } + size_t cap_; std::recursive_mutex mu_; bool closed_; @@ -131,36 +146,21 @@ void ChannelImpl::Send(T *item) { // If there is a receiver, directly pass the value we want // to send to the receiver, bypassing the channel buffer if any if (!recvq.empty()) { - std::shared_ptr m = recvq.front(); - recvq.pop_front(); - // Do the data transfer - // We will do this data transfer if either of the following - // cases are true - // 1. callback == nullptr // This means it was a regular channel send - // 2. callback returns true - bool do_send = true; - if (m->callback != nullptr) do_send = m->callback(ChannelAction::SEND); - if (do_send) + std::shared_ptr m = + get_first_message(recvq, ChannelAction::SEND); + + if (m != nullptr) { *(m->data) = std::move(*item); - else { - // We cannot do the data transfer because - // this QueueMessage was added by Select - // and some other case was executed. - // So call the Send function again. - // We do not care about notifying other - // because they would have been notified - // by the executed select case. + m->Notify(); + lock.unlock(); + send_return(); + return; + } else { lock.unlock(); Send(item); send_return(); return; } - - // Wake up the blocked process and unlock - m->Notify(); - lock.unlock(); - send_return(); - return; } // Unbuffered channel will always bypass this @@ -201,32 +201,34 @@ bool ChannelImpl::Receive(T *item) { } // If there is a sender, directly receive the value we want - // from the sender, bypassing the channel buffer if any + // from the sender. In case of a buffered channel, read from + // buffer and move front of send queue to the buffer if (!sendq.empty()) { - std::shared_ptr m = sendq.front(); - sendq.pop_front(); - // Do the data transfer - // We will do this data transfer if either of the following - // cases are true - // 1. callback == nullptr // This means it was a regular channel send - // 2. callback returns true - bool do_receive = true; - if (m->callback != nullptr) - do_receive = m->callback(ChannelAction::RECEIVE); - if (do_receive) - *item = std::move(*(m->data)); - else - // We cannot do the data transfer because - // this QueueMessage was added by Select - // and some other case was executed. - // So call the Receive function again. - // We do not care about notifying other - // because they would have been notified - // by the executed select case. - return recv_return(Receive(item)); - - // Wake up the blocked process and unlock - m->Notify(); + std::shared_ptr m = + get_first_message(sendq, ChannelAction::RECEIVE); + if (buf_.size() > 0) { + // Case 1 : Channel is Buffered + // Do Data transfer from front of buffer + // and add a QueueMessage to the buffer + *item = std::move(buf_.front()); + buf_.pop_front(); + // If first message from sendq is not null + // add it to the buffer and notify it + if (m != nullptr) { + // Copy to buffer + buf_.push_back(std::move(*(m->data))); + m->Notify(); + } // Ignore if there is no first message + } else { + // Case 2: Channel is Unbuffered + // Do data transfer from front of SendQ + // If front is nullptr, then recursively call itself + if (m != nullptr) { + *item = std::move(*(m->data)); + m->Notify(); + } else + return recv_return(Receive(item)); + } lock.unlock(); return recv_return(true); } diff --git a/paddle/fluid/framework/channel_test.cc b/paddle/fluid/framework/channel_test.cc index e2380bb54b..1184bfdae1 100644 --- a/paddle/fluid/framework/channel_test.cc +++ b/paddle/fluid/framework/channel_test.cc @@ -36,23 +36,25 @@ TEST(Channel, ChannelCapacityTest) { delete ch; } -void RecevingOrderEqualToSendingOrder(Channel *ch) { +void RecevingOrderEqualToSendingOrder(Channel *ch, int num_items) { unsigned sum_send = 0; std::thread t([&]() { - for (int i = 0; i < 5; i++) { + for (int i = 0; i < num_items; i++) { ch->Send(&i); sum_send += i; } }); - for (int i = 0; i < 5; i++) { - int recv = 999; + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + for (int i = 0; i < num_items; i++) { + int recv = -1; EXPECT_EQ(ch->Receive(&recv), true); EXPECT_EQ(recv, i); } std::this_thread::sleep_for(std::chrono::milliseconds(200)); CloseChannel(ch); t.join(); - EXPECT_EQ(sum_send, 10U); + unsigned expected_sum = (num_items * (num_items - 1)) / 2; + EXPECT_EQ(sum_send, expected_sum); delete ch; } @@ -185,12 +187,28 @@ TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) { TEST(Channel, RecevingOrderEqualToSendingOrderWithUnBufferedChannel) { auto ch = MakeChannel(0); - RecevingOrderEqualToSendingOrder(ch); + RecevingOrderEqualToSendingOrder(ch, 20); +} + +TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel1) { + // Test that Receive Order is same as Send Order when number of items + // sent is less than size of buffer + auto ch = MakeChannel(10); + RecevingOrderEqualToSendingOrder(ch, 5); +} + +TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel2) { + // Test that Receive Order is same as Send Order when number of items + // sent is equal to size of buffer + auto ch = MakeChannel(10); + RecevingOrderEqualToSendingOrder(ch, 10); } -TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel) { +TEST(Channel, RecevingOrderEqualToSendingOrderWithBufferedChannel3) { + // Test that Receive Order is same as Send Order when number of items + // sent is greater than the size of buffer auto ch = MakeChannel(10); - RecevingOrderEqualToSendingOrder(ch); + RecevingOrderEqualToSendingOrder(ch, 20); } void ChannelCloseUnblocksReceiversTest(Channel *ch) { From c414fbbeb16475cea96651d5a7d46e5c37093d03 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 29 Mar 2018 16:04:19 -0700 Subject: [PATCH 178/180] hookup WITH_FLUID_ONLY in TeamCity build.sh (#9509) --- paddle/scripts/docker/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 322f72e4a5..12c3a50d49 100755 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -53,6 +53,7 @@ function cmake_gen() { -DWITH_FAST_BUNDLE_TEST=ON -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -78,6 +79,7 @@ EOF -DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_FAST_BUNDLE_TEST=ON \ -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \ + -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON } From a75de489c5921c173f4255ef2537160a5bbf354f Mon Sep 17 00:00:00 2001 From: weixing Date: Fri, 30 Mar 2018 10:36:55 +0800 Subject: [PATCH 179/180] Fix some errors (#9403) --- .../build_from_source_cn.rst | 1 + .../build_from_source_en.rst | 1 + .../build_and_install/docker_install_cn.rst | 1 + .../build_and_install/docker_install_en.rst | 1 + doc/fluid/build_and_install/index_cn.rst | 3 +- doc/fluid/build_and_install/index_en.rst | 3 +- .../build_and_install/pip_install_cn.rst | 1 + .../build_and_install/pip_install_en.rst | 1 + doc/fluid/design/algorithm/index_cn.rst | 7 +++ doc/fluid/design/algorithm/index_en.rst | 7 +++ doc/fluid/design/concepts/README.md | 12 ++--- doc/fluid/design/concepts/index_cn.rst | 18 +++++++ doc/fluid/design/concepts/index_en.rst | 18 +++++++ doc/fluid/design/concepts/scope.md | 4 +- doc/fluid/design/concepts/var_desc.md | 2 + doc/fluid/design/concurrent/index_cn.rst | 8 +++ doc/fluid/design/concurrent/index_en.rst | 8 +++ doc/fluid/design/data_type/index_cn.rst | 7 +++ doc/fluid/design/data_type/index_en.rst | 7 +++ .../distributed_lookup_table_design.md | 2 +- doc/fluid/design/dist_train/index_cn.rst | 9 ++++ doc/fluid/design/dist_train/index_en.rst | 9 ++++ doc/fluid/design/dynamic_rnn/index_cn.rst | 8 +++ doc/fluid/design/dynamic_rnn/index_en.rst | 8 +++ doc/fluid/design/dynamic_rnn/rnn_design.md | 15 +++--- doc/fluid/design/execution/index_cn.rst | 8 +++ doc/fluid/design/execution/index_en.rst | 8 +++ doc/fluid/design/execution/switch.md | 6 +-- doc/fluid/design/index_cn.rst | 17 ++++++ doc/fluid/design/index_en.rst | 17 ++++++ doc/fluid/design/interface/index_cn.rst | 4 ++ doc/fluid/design/interface/index_en.rst | 4 ++ doc/fluid/design/memory/index_cn.rst | 7 +++ doc/fluid/design/memory/index_en.rst | 7 +++ doc/fluid/design/modules/evaluator.md | 20 +++---- doc/fluid/design/modules/index_cn.rst | 14 +++++ doc/fluid/design/modules/index_en.rst | 14 +++++ doc/fluid/design/modules/net_op_design.md | 22 ++++---- doc/fluid/design/modules/optimizer.md | 8 +-- doc/fluid/design/motivation/index_cn.rst | 10 ++++ doc/fluid/design/motivation/index_en.rst | 10 ++++ .../design/motivation/refactorization.md | 36 ++++++------- doc/fluid/design/muti_devices/index_cn.rst | 9 ++++ doc/fluid/design/muti_devices/index_en.rst | 9 ++++ .../design/muti_devices/kernel_hint_design.md | 2 +- .../design/muti_devices/kernel_selection.md | 2 +- doc/fluid/design/network/index_cn.rst | 7 +++ doc/fluid/design/network/index_en.rst | 7 +++ doc/fluid/dev/api_doc_std_cn.md | 52 +++++++++---------- doc/fluid/dev/index_cn.rst | 11 ++++ doc/fluid/dev/index_en.rst | 11 +++- doc/fluid/dev/name_convention.md | 6 +-- doc/fluid/dev/new_op_kernel_en.md | 18 +++---- doc/fluid/dev/op_markdown_format.md | 10 ++-- doc/fluid/dev/use_eigen_cn.md | 18 +++---- doc/fluid/dev/use_eigen_en.md | 10 ++-- doc/fluid/getstarted/concepts/index_cn.rst | 4 ++ doc/fluid/getstarted/concepts/index_en.rst | 4 ++ doc/fluid/getstarted/index_cn.rst | 19 ++++++- doc/fluid/getstarted/index_en.rst | 18 ++++++- doc/fluid/getstarted/quickstart_cn.rst | 1 + doc/fluid/getstarted/quickstart_en.rst | 1 + doc/fluid/howto/index_cn.rst | 5 ++ doc/fluid/howto/index_en.rst | 5 +- .../howto/optimization/benchmark/README.md | 1 + .../howto/optimization/benchmark/index_cn.rst | 8 +++ .../howto/optimization/benchmark/index_en.rst | 8 +++ .../optimization/benchmark/vgg16/README.md | 1 + .../howto/optimization/cpu_profiling_cn.md | 2 +- .../howto/optimization/cpu_profiling_en.md | 4 +- doc/fluid/howto/optimization/index_cn.rst | 9 ++++ doc/fluid/howto/optimization/index_en.rst | 9 ++++ doc/fluid/howto/optimization/timeline.md | 2 +- doc/fluid/index_cn.rst | 2 +- doc/fluid/index_en.rst | 2 +- .../design/interface/00.why_plain_c.md | 0 .../interface/01.inference_implementation.md | 0 doc/v2/design/interface/index_cn.rst | 7 +++ doc/v2/design/interface/index_en.rst | 7 +++ doc/v2/design/mkl/mkldnn.md | 6 +-- 80 files changed, 531 insertions(+), 139 deletions(-) create mode 120000 doc/fluid/build_and_install/build_from_source_cn.rst create mode 120000 doc/fluid/build_and_install/build_from_source_en.rst create mode 120000 doc/fluid/build_and_install/docker_install_cn.rst create mode 120000 doc/fluid/build_and_install/docker_install_en.rst mode change 100644 => 120000 doc/fluid/build_and_install/index_cn.rst mode change 100644 => 120000 doc/fluid/build_and_install/index_en.rst create mode 120000 doc/fluid/build_and_install/pip_install_cn.rst create mode 120000 doc/fluid/build_and_install/pip_install_en.rst create mode 100644 doc/fluid/design/algorithm/index_cn.rst create mode 100644 doc/fluid/design/algorithm/index_en.rst create mode 100644 doc/fluid/design/concepts/index_cn.rst create mode 100644 doc/fluid/design/concepts/index_en.rst create mode 100644 doc/fluid/design/concurrent/index_cn.rst create mode 100644 doc/fluid/design/concurrent/index_en.rst create mode 100644 doc/fluid/design/data_type/index_cn.rst create mode 100644 doc/fluid/design/data_type/index_en.rst create mode 100644 doc/fluid/design/dist_train/index_cn.rst create mode 100644 doc/fluid/design/dist_train/index_en.rst create mode 100644 doc/fluid/design/dynamic_rnn/index_cn.rst create mode 100644 doc/fluid/design/dynamic_rnn/index_en.rst create mode 100644 doc/fluid/design/execution/index_cn.rst create mode 100644 doc/fluid/design/execution/index_en.rst create mode 100644 doc/fluid/design/interface/index_cn.rst create mode 100644 doc/fluid/design/interface/index_en.rst create mode 100644 doc/fluid/design/memory/index_cn.rst create mode 100644 doc/fluid/design/memory/index_en.rst create mode 100644 doc/fluid/design/modules/index_cn.rst create mode 100644 doc/fluid/design/modules/index_en.rst create mode 100644 doc/fluid/design/motivation/index_cn.rst create mode 100644 doc/fluid/design/motivation/index_en.rst create mode 100644 doc/fluid/design/muti_devices/index_cn.rst create mode 100644 doc/fluid/design/muti_devices/index_en.rst create mode 100644 doc/fluid/design/network/index_cn.rst create mode 100644 doc/fluid/design/network/index_en.rst create mode 100644 doc/fluid/getstarted/concepts/index_cn.rst create mode 100644 doc/fluid/getstarted/concepts/index_en.rst create mode 120000 doc/fluid/getstarted/quickstart_cn.rst create mode 120000 doc/fluid/getstarted/quickstart_en.rst create mode 120000 doc/fluid/howto/optimization/benchmark/README.md create mode 100644 doc/fluid/howto/optimization/benchmark/index_cn.rst create mode 100644 doc/fluid/howto/optimization/benchmark/index_en.rst create mode 120000 doc/fluid/howto/optimization/benchmark/vgg16/README.md create mode 100644 doc/fluid/howto/optimization/index_cn.rst create mode 100644 doc/fluid/howto/optimization/index_en.rst rename doc/{fluid => v2}/design/interface/00.why_plain_c.md (100%) rename doc/{fluid => v2}/design/interface/01.inference_implementation.md (100%) create mode 100644 doc/v2/design/interface/index_cn.rst create mode 100644 doc/v2/design/interface/index_en.rst diff --git a/doc/fluid/build_and_install/build_from_source_cn.rst b/doc/fluid/build_and_install/build_from_source_cn.rst new file mode 120000 index 0000000000..ae4e8c7c48 --- /dev/null +++ b/doc/fluid/build_and_install/build_from_source_cn.rst @@ -0,0 +1 @@ +../../v2/build_and_install/build_from_source_cn.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/build_from_source_en.rst b/doc/fluid/build_and_install/build_from_source_en.rst new file mode 120000 index 0000000000..1ac828c973 --- /dev/null +++ b/doc/fluid/build_and_install/build_from_source_en.rst @@ -0,0 +1 @@ +../../v2/build_and_install/build_from_source_en.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/docker_install_cn.rst b/doc/fluid/build_and_install/docker_install_cn.rst new file mode 120000 index 0000000000..965b2e2055 --- /dev/null +++ b/doc/fluid/build_and_install/docker_install_cn.rst @@ -0,0 +1 @@ +../../v2/build_and_install/docker_install_cn.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/docker_install_en.rst b/doc/fluid/build_and_install/docker_install_en.rst new file mode 120000 index 0000000000..79d7341a7b --- /dev/null +++ b/doc/fluid/build_and_install/docker_install_en.rst @@ -0,0 +1 @@ +../../v2/build_and_install/docker_install_en.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/index_cn.rst b/doc/fluid/build_and_install/index_cn.rst deleted file mode 100644 index 9276236f9f..0000000000 --- a/doc/fluid/build_and_install/index_cn.rst +++ /dev/null @@ -1,2 +0,0 @@ -安装与使用 ------------- diff --git a/doc/fluid/build_and_install/index_cn.rst b/doc/fluid/build_and_install/index_cn.rst new file mode 120000 index 0000000000..f697fcd8fa --- /dev/null +++ b/doc/fluid/build_and_install/index_cn.rst @@ -0,0 +1 @@ +../../v2/build_and_install/index_cn.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/index_en.rst b/doc/fluid/build_and_install/index_en.rst deleted file mode 100644 index cc1e61a58a..0000000000 --- a/doc/fluid/build_and_install/index_en.rst +++ /dev/null @@ -1,2 +0,0 @@ -Build and Install ------------- diff --git a/doc/fluid/build_and_install/index_en.rst b/doc/fluid/build_and_install/index_en.rst new file mode 120000 index 0000000000..502f66a413 --- /dev/null +++ b/doc/fluid/build_and_install/index_en.rst @@ -0,0 +1 @@ +../../v2/build_and_install/index_en.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/pip_install_cn.rst b/doc/fluid/build_and_install/pip_install_cn.rst new file mode 120000 index 0000000000..07deca84b8 --- /dev/null +++ b/doc/fluid/build_and_install/pip_install_cn.rst @@ -0,0 +1 @@ +../../v2/build_and_install/pip_install_cn.rst \ No newline at end of file diff --git a/doc/fluid/build_and_install/pip_install_en.rst b/doc/fluid/build_and_install/pip_install_en.rst new file mode 120000 index 0000000000..7f39c99819 --- /dev/null +++ b/doc/fluid/build_and_install/pip_install_en.rst @@ -0,0 +1 @@ +../../v2/build_and_install/pip_install_en.rst \ No newline at end of file diff --git a/doc/fluid/design/algorithm/index_cn.rst b/doc/fluid/design/algorithm/index_cn.rst new file mode 100644 index 0000000000..0883a9dc9c --- /dev/null +++ b/doc/fluid/design/algorithm/index_cn.rst @@ -0,0 +1,7 @@ +梯度更新算法 +------------ + +.. toctree:: + :maxdepth: 1 + + parameter_average.md diff --git a/doc/fluid/design/algorithm/index_en.rst b/doc/fluid/design/algorithm/index_en.rst new file mode 100644 index 0000000000..59fe68dcf7 --- /dev/null +++ b/doc/fluid/design/algorithm/index_en.rst @@ -0,0 +1,7 @@ +Gradient Update Algorithm +-------------------------------------- + +.. toctree:: + :maxdepth: 1 + + parameter_average.md diff --git a/doc/fluid/design/concepts/README.md b/doc/fluid/design/concepts/README.md index bf0e4dddc1..ed3f5aab28 100644 --- a/doc/fluid/design/concepts/README.md +++ b/doc/fluid/design/concepts/README.md @@ -2,7 +2,7 @@ A few months ago when we were trying to replace CMake with Bazel, @emailweixu su Here are some initial thoughts. Your comments are welcome! -### Required CMake Function +# Required CMake Function I think we need only the following few CMake functions to make a project description mean and clean: @@ -25,7 +25,7 @@ Also, - to describe external dependencies, we need `external_library`. - to build shared libraries, we need `shared_library`. -### An Example Project +## An Example Project Suppose that we have aforementioned functions defined in our `/cmake` directory. The following example `CMakeLists.txt` describes a project including the following source files: @@ -102,11 +102,11 @@ shared_library(api ``` -### Implementation +## Implementation As above example CMakeLists.txt executes, each function invocation adds "nodes" to a dependency graph. It also use this graph to generate CMake commands including `add_executable`, `add_dependencies`, `target_link_libraries`, and `add_test`. -### Using Package Manager For Go +## Using Package Manager For Go Building Go binaries and libraries need to satisfy their dependencies, generally we can do `go get ./...` to download and compile all external dependencies. The @@ -122,7 +122,7 @@ problems are: at many cloud file hosting, so users what to compile paddle by themselves can download this "vendor" package from a mirror site. -#### Choose A Suitable Tool +### Choose A Suitable Tool As mentioned by @wangkuiyi, [Here](https://github.com/golang/go/wiki/PackageManagementTools) list dozens of Go package managers. We choose the tool using following principles: @@ -140,7 +140,7 @@ management tool has been started at: https://github.com/golang/dep to resolve such problems, but it's currently at Alpha stage. So the best choice now is glide obviously. -#### Manage Go Packages +### Manage Go Packages - Dependencies: `go/glide.yaml` will store the dependencies and their versions which is directly imported by paddle. `go/glide.lock` will store all dependencies recursively diff --git a/doc/fluid/design/concepts/index_cn.rst b/doc/fluid/design/concepts/index_cn.rst new file mode 100644 index 0000000000..eec8a2f14c --- /dev/null +++ b/doc/fluid/design/concepts/index_cn.rst @@ -0,0 +1,18 @@ +核心概念 +------------- + +.. toctree:: + :maxdepth: 1 + + README.md + cpp_data_feeding.md + functions_operators_layers.md + program.md + variable.md + var_desc.md + tensor.md + tensor_array.md + lod_tensor.md + block.md + scope.md + executor.md diff --git a/doc/fluid/design/concepts/index_en.rst b/doc/fluid/design/concepts/index_en.rst new file mode 100644 index 0000000000..036e1da255 --- /dev/null +++ b/doc/fluid/design/concepts/index_en.rst @@ -0,0 +1,18 @@ +Core Concepts +-------------------------------------- + +.. toctree:: + :maxdepth: 1 + + README.md + cpp_data_feeding.md + functions_operators_layers.md + program.md + variable.md + var_desc.md + tensor.md + tensor_array.md + lod_tensor.md + block.md + scope.md + executor.md diff --git a/doc/fluid/design/concepts/scope.md b/doc/fluid/design/concepts/scope.md index 4da76eebb7..dcf7664935 100644 --- a/doc/fluid/design/concepts/scope.md +++ b/doc/fluid/design/concepts/scope.md @@ -30,7 +30,7 @@ Scope is an association of a name to variable. All variables belong to `Scope`. Variable can not belong to many scopes. If you want to use variables from parent scope, you can use `parent scope`. -1. Scope should destruct all Variables inside it when itself is destructed. User can never store `Variable` pointer somewhere else. +1. Scope should destruct all Variables inside it when itself is destructed. User can never store `Variable` pointer somewhere else. Because Variable can only be got from Scope. When destroying Scope, we also need to destroy all the Variables in it. If user store `Variable` pointer to private data member or some global variable, the pointer will be an invalid pointer when associated `Scope` is destroyed. @@ -78,7 +78,7 @@ In `Scope` class, there is a private data member called `parent_`. `parent_` is A local scope is very useful when we implement Recurrent Neural Network. Each timestep of an RNN should be a `Net`. Each `Net` of timestep (`StepNet` for short) should use an independent local scope. Just like variables in a while loop is inside a local scope in programming languages. By using a single `StepNet` and changing local scope, we can implement an RNN easily. -# Interface Design +## Interface Design ```cpp class Variable { diff --git a/doc/fluid/design/concepts/var_desc.md b/doc/fluid/design/concepts/var_desc.md index 6a45af1995..fcba08c07f 100644 --- a/doc/fluid/design/concepts/var_desc.md +++ b/doc/fluid/design/concepts/var_desc.md @@ -1,3 +1,5 @@ +# Design Doc: Var_desc + ## Background PaddlePaddle divides the description of neural network computation into two stages: compile time and runtime. At compile time, the neural network computation is described as a `ProgramDesc` whereas at runtime an `Executor` interprets the `ProgramDesc` to compute the operations. diff --git a/doc/fluid/design/concurrent/index_cn.rst b/doc/fluid/design/concurrent/index_cn.rst new file mode 100644 index 0000000000..e47135e9fc --- /dev/null +++ b/doc/fluid/design/concurrent/index_cn.rst @@ -0,0 +1,8 @@ +并发编程 +------------ + +.. toctree:: + :maxdepth: 1 + + concurrent_programming.md + parallel_do.md diff --git a/doc/fluid/design/concurrent/index_en.rst b/doc/fluid/design/concurrent/index_en.rst new file mode 100644 index 0000000000..0727e75798 --- /dev/null +++ b/doc/fluid/design/concurrent/index_en.rst @@ -0,0 +1,8 @@ +Concurrent Programming +------------------------- + +.. toctree:: + :maxdepth: 1 + + concurrent_programming.md + parallel_do.md diff --git a/doc/fluid/design/data_type/index_cn.rst b/doc/fluid/design/data_type/index_cn.rst new file mode 100644 index 0000000000..b60167b6b1 --- /dev/null +++ b/doc/fluid/design/data_type/index_cn.rst @@ -0,0 +1,7 @@ +数据类型 +------------ + +.. toctree:: + :maxdepth: 1 + + float16.md diff --git a/doc/fluid/design/data_type/index_en.rst b/doc/fluid/design/data_type/index_en.rst new file mode 100644 index 0000000000..6a88d17943 --- /dev/null +++ b/doc/fluid/design/data_type/index_en.rst @@ -0,0 +1,7 @@ +Data Type +------------ + +.. toctree:: + :maxdepth: 1 + + float16.md diff --git a/doc/fluid/design/dist_train/distributed_lookup_table_design.md b/doc/fluid/design/dist_train/distributed_lookup_table_design.md index e543adf0f9..9887291389 100644 --- a/doc/fluid/design/dist_train/distributed_lookup_table_design.md +++ b/doc/fluid/design/dist_train/distributed_lookup_table_design.md @@ -1,4 +1,4 @@ -## Design Doc: Distributed Lookup Table Operator +# Design Doc: Distributed Lookup Table Operator A lookup table operator in PaddlePaddle where the table could be out of the memory of a computer. diff --git a/doc/fluid/design/dist_train/index_cn.rst b/doc/fluid/design/dist_train/index_cn.rst new file mode 100644 index 0000000000..ed6f3dda27 --- /dev/null +++ b/doc/fluid/design/dist_train/index_cn.rst @@ -0,0 +1,9 @@ +分布式训练 +------------ + +.. toctree:: + :maxdepth: 1 + + distributed_architecture.md + distributed_lookup_table_design.md + parameter_server.md diff --git a/doc/fluid/design/dist_train/index_en.rst b/doc/fluid/design/dist_train/index_en.rst new file mode 100644 index 0000000000..f84688f168 --- /dev/null +++ b/doc/fluid/design/dist_train/index_en.rst @@ -0,0 +1,9 @@ +Distributed Training +--------------------- + +.. toctree:: + :maxdepth: 1 + + distributed_architecture.md + distributed_lookup_table_design.md + parameter_server.md diff --git a/doc/fluid/design/dynamic_rnn/index_cn.rst b/doc/fluid/design/dynamic_rnn/index_cn.rst new file mode 100644 index 0000000000..1d224d22cf --- /dev/null +++ b/doc/fluid/design/dynamic_rnn/index_cn.rst @@ -0,0 +1,8 @@ +动态RNN +------------ + +.. toctree:: + :maxdepth: 1 + + rnn.md + rnn_design.md diff --git a/doc/fluid/design/dynamic_rnn/index_en.rst b/doc/fluid/design/dynamic_rnn/index_en.rst new file mode 100644 index 0000000000..568f496e4f --- /dev/null +++ b/doc/fluid/design/dynamic_rnn/index_en.rst @@ -0,0 +1,8 @@ +Dynamic RNN +------------ + +.. toctree:: + :maxdepth: 1 + + rnn.md + rnn_design.md diff --git a/doc/fluid/design/dynamic_rnn/rnn_design.md b/doc/fluid/design/dynamic_rnn/rnn_design.md index 3d38b9a0ad..cecfcd3307 100644 --- a/doc/fluid/design/dynamic_rnn/rnn_design.md +++ b/doc/fluid/design/dynamic_rnn/rnn_design.md @@ -99,7 +99,7 @@ private: - 由于传递过程是以复制`shared_ptr`的方式实现,因此框架只需要传递一次 `lod_start_pos` 2. 对于不感知 `lod_start_pos` 的Op足够透明 -3. 需要修改 `lod_start_pos` 的producer Op可以在 `Run` 时更新自己的 `lod_start_pos` 数据 +3. 需要修改 `lod_start_pos` 的producer Op可以在 `Run` 时更新自己的 `lod_start_pos` 数据 具体的设计分为以下3小节 @@ -189,7 +189,7 @@ struct SortedSeqItem { std::vector sorted_seqs; ``` -来追踪序列排序后的位置,并添加一个新的接口 +来追踪序列排序后的位置,并添加一个新的接口 ```c++ std::vector SortBySeqLen(const LODTensor& tensor); @@ -233,7 +233,10 @@ x x - 将每个序列concat 为规则的mini-batch表示 ## 参考文献 -1. [Tensorflow Bucketing](https://www.tensorflow.org/versions/r0.12/api_docs/python/contrib.training/bucketing) -2. [mxnet Bucketing](http://mxnet.io/how_to/bucketing.html) -3. [variable length input in RNN scenario](https://discuss.pytorch.org/t/about-the-variable-length-input-in-rnn-scenario/345/5) -4. [Level of details](https://en.wikipedia.org/wiki/Level_of_detail) +[Tensorflow Bucketing](https://www.tensorflow.org/versions/r0.12/api_docs/python/contrib.training/bucketing) + +[mxnet Bucketing](http://mxnet.io/how_to/bucketing.html) + +[variable length input in RNN scenario](https://discuss.pytorch.org/t/about-the-variable-length-input-in-rnn-scenario/345/5) + +[Level of details](https://en.wikipedia.org/wiki/Level_of_detail) diff --git a/doc/fluid/design/execution/index_cn.rst b/doc/fluid/design/execution/index_cn.rst new file mode 100644 index 0000000000..ed31b01742 --- /dev/null +++ b/doc/fluid/design/execution/index_cn.rst @@ -0,0 +1,8 @@ +执行流程 +------------- + +.. toctree:: + :maxdepth: 1 + + switch.md + if_else_op.md diff --git a/doc/fluid/design/execution/index_en.rst b/doc/fluid/design/execution/index_en.rst new file mode 100644 index 0000000000..fcf846da34 --- /dev/null +++ b/doc/fluid/design/execution/index_en.rst @@ -0,0 +1,8 @@ +Execution Process +-------------------------------------- + +.. toctree:: + :maxdepth: 1 + + switch.md + if_else_op.md diff --git a/doc/fluid/design/execution/switch.md b/doc/fluid/design/execution/switch.md index 827d0601c6..1c337bd715 100644 --- a/doc/fluid/design/execution/switch.md +++ b/doc/fluid/design/execution/switch.md @@ -1,6 +1,6 @@ -### Design Doc: Switch +# Design Doc: Switch -### Background +## Background Many programming languages provide `switch` as a generalization of `if-elif-else`. We want to add it to Fluid. @@ -19,7 +19,7 @@ with switch() as switch: fluid.print("Case 3") ``` -### The Semantics +## The Semantics 1. A `switch` control-flow checks cases one-by-one. 1. The condition of each case is a boolean value, which is a scalar, and differs from the `fluid.if_else` control-flow, which condition could be a vector of boolean values. diff --git a/doc/fluid/design/index_cn.rst b/doc/fluid/design/index_cn.rst index f1887be690..e9f55214f4 100644 --- a/doc/fluid/design/index_cn.rst +++ b/doc/fluid/design/index_cn.rst @@ -1,2 +1,19 @@ 设计思想 ------------ + +.. toctree:: + :maxdepth: 1 + + motivation/index_cn.rst + execution/index_cn.rst + concepts/index_cn.rst + data_type/index_cn.rst + memory/index_cn.rst + muti_devices/index_cn.rst + dynamic_rnn/index_cn.rst + concurrent/index_cn.rst + algorithm/index_cn.rst + network/index_cn.rst + modules/index_cn.rst + interface/index_cn.rst + dist_train/index_cn.rst diff --git a/doc/fluid/design/index_en.rst b/doc/fluid/design/index_en.rst index 18a4b4122f..2802dc3a31 100644 --- a/doc/fluid/design/index_en.rst +++ b/doc/fluid/design/index_en.rst @@ -1,2 +1,19 @@ Design ------------ + +.. toctree:: + :maxdepth: 1 + + motivation/index_en.rst + execution/index_en.rst + concepts/index_en.rst + data_type/index_en.rst + memory/index_en.rst + muti_devices/index_en.rst + dynamic_rnn/index_en.rst + concurrent/index_en.rst + algorithm/index_en.rst + network/index_en.rst + modules/index_en.rst + interface/index_en.rst + dist_train/index_en.rst diff --git a/doc/fluid/design/interface/index_cn.rst b/doc/fluid/design/interface/index_cn.rst new file mode 100644 index 0000000000..69a8d9bad4 --- /dev/null +++ b/doc/fluid/design/interface/index_cn.rst @@ -0,0 +1,4 @@ +多语言接口 +------------ + +TBD diff --git a/doc/fluid/design/interface/index_en.rst b/doc/fluid/design/interface/index_en.rst new file mode 100644 index 0000000000..22abc71f98 --- /dev/null +++ b/doc/fluid/design/interface/index_en.rst @@ -0,0 +1,4 @@ +Multi-Language Interface +----------------------- + +TBD diff --git a/doc/fluid/design/memory/index_cn.rst b/doc/fluid/design/memory/index_cn.rst new file mode 100644 index 0000000000..c507c638bd --- /dev/null +++ b/doc/fluid/design/memory/index_cn.rst @@ -0,0 +1,7 @@ +内存管理 +------------ + +.. toctree:: + :maxdepth: 1 + + memory_optimization.md diff --git a/doc/fluid/design/memory/index_en.rst b/doc/fluid/design/memory/index_en.rst new file mode 100644 index 0000000000..f7526437a7 --- /dev/null +++ b/doc/fluid/design/memory/index_en.rst @@ -0,0 +1,7 @@ +Memory Management +------------------- + +.. toctree:: + :maxdepth: 1 + + memory_optimization.md diff --git a/doc/fluid/design/modules/evaluator.md b/doc/fluid/design/modules/evaluator.md index 11cc129d56..de9605b0e6 100644 --- a/doc/fluid/design/modules/evaluator.md +++ b/doc/fluid/design/modules/evaluator.md @@ -1,10 +1,10 @@ -## Evaluator Design +# Evaluator Design -### Problem Statement +## Problem Statement During training or inference, we provide an evaluation function to measure the model performance, for example, accuracy, precision, etc. In the operator based framework design, the data passes through the network pipeline batch by batch. As a result, inside the operator, we only calculate the metrics for one minibatch. Thus, we need to provide a mechanism to calculate the metrics for each N pass/batch the user wants. -### Evaluator Design +## Evaluator Design Currently, every operation is expressed in the graph. We divide the evaluator process into three steps. 1. Initialize the metric state and add it into the block. @@ -14,11 +14,11 @@ Currently, every operation is expressed in the graph. We divide the evaluator pr 3. Merge the mini-batch statistics to form the evaluation result for multiple mini-batches. When it comes to distributed training/Multi-GPU training, aggregate the value from different devices. -### Implementation -This design is shown in the Python API. -Each metric operator needs to caculate the metric statistic and return the batch-aware states. Python side is responsible for accumulating the states for each pass. +## Implementation +This design is shown in the Python API. +Each metric operator needs to caculate the metric statistic and return the batch-aware states. Python side is responsible for accumulating the states for each pass. + - ```python class Evaluator(object): """ @@ -32,7 +32,7 @@ class Evaluator(object): The initialization of Evaluator should be responsible for: create metric states and append to the main_program - """ + """ pass def _update_ops(self, input, label, **kwargs) @@ -40,14 +40,14 @@ class Evaluator(object): Add mini-batch evaluator caculate operators to the main_program. Add increment operator to accumulate the metric states. """ - + def reset(self, executor, reset_program=None): """ Reset metric states at the begin of each pass/user specified batch number. Execute the reset_program to reset the states. """ - + def eval(self, executor, eval_program=None): """ diff --git a/doc/fluid/design/modules/index_cn.rst b/doc/fluid/design/modules/index_cn.rst new file mode 100644 index 0000000000..b25783f0f5 --- /dev/null +++ b/doc/fluid/design/modules/index_cn.rst @@ -0,0 +1,14 @@ +代码结构和重要模块 +----------------- + +.. toctree:: + :maxdepth: 1 + + backward.md + python_api.md + regularization.md + infer_var_type.md + optimizer.md + prune.md + register_grad_op.md + net_op_design.md diff --git a/doc/fluid/design/modules/index_en.rst b/doc/fluid/design/modules/index_en.rst new file mode 100644 index 0000000000..2108156e08 --- /dev/null +++ b/doc/fluid/design/modules/index_en.rst @@ -0,0 +1,14 @@ +Code Structure and Important Modules +------------------------------------- + +.. toctree:: + :maxdepth: 1 + + backward.md + python_api.md + regularization.md + infer_var_type.md + optimizer.md + prune.md + register_grad_op.md + net_op_design.md diff --git a/doc/fluid/design/modules/net_op_design.md b/doc/fluid/design/modules/net_op_design.md index a5f0483081..e64ac2fb1c 100644 --- a/doc/fluid/design/modules/net_op_design.md +++ b/doc/fluid/design/modules/net_op_design.md @@ -1,16 +1,16 @@ # Network Design `Network` is the container and controller of a set of operators, -user can build a real network from a `NetDesc` which is a protobuf message +user can build a real network from a `NetDesc` which is a protobuf message and use `Network.Run()` to run all the operators in the network. -A network object knows all Operators belonging to this network. Variables, -which are inputs and outputs of these operators, +A network object knows all Operators belonging to this network. Variables, +which are inputs and outputs of these operators, are created and managed by a hierarchy of Scope objects. -# API +## API -## Net +### Net To make the `Network` extendable, a base class is defined like this ```c++ @@ -43,8 +43,8 @@ class Net { }; ``` -All network implementations should build networks from a protobuf message which -describes the structure of a real network; `Run` method should be implemented by +All network implementations should build networks from a protobuf message which +describes the structure of a real network; `Run` method should be implemented by all implementations to offer a universal method to forward or backward compute a network. `Net::Create` is a method of factory pattern and can be implemented like @@ -64,7 +64,7 @@ std::unique Net::Create(const NetDesc& def) { ``` Network is designed as the container of operators. to make it more extendable, -we decouple it from the related variable resources. +we decouple it from the related variable resources. `Run(Scope* scope)` takes the scope as a argument so that it can run in different scopes. @@ -80,7 +80,7 @@ if (net) { } ``` -## `PlainNet` as a simple implementation of `BaseNet` +### `PlainNet` as a simple implementation of `BaseNet` A very basic implementation is as follows. All it does is simply to run every operators in sequence. @@ -211,9 +211,9 @@ class NetBuilder final { } ``` -## Compatibility with RNN +### Compatibility with RNN -Benefitting from the decoupling of `PlainNet.Run` and `Scope`, `PlainNet` is compatible with future RNN design, +Benefitting from the decoupling of `PlainNet.Run` and `Scope`, `PlainNet` is compatible with future RNN design, for example we can implement a simple recurrent neural network as follows ```c++ diff --git a/doc/fluid/design/modules/optimizer.md b/doc/fluid/design/modules/optimizer.md index 691081c268..1c25fde9ca 100644 --- a/doc/fluid/design/modules/optimizer.md +++ b/doc/fluid/design/modules/optimizer.md @@ -1,6 +1,6 @@ -## Optimizer Design +# Optimizer Design -### The Problem +## The Problem A PaddlePaddle program, or a block, is a sequence of operators operating variables. A training program needs to do three kinds of works: @@ -19,7 +19,7 @@ It's true that users should be able to create all these operators manually by ca In this design, we propose a high-level API that automatically derives the optimisation pass and operators from the forward pass. -### High-level Python API to describe the training process +## High-level Python API to describe the training process 1. User write code to describe the network: @@ -54,7 +54,7 @@ In this design, we propose a high-level API that automatically derives the optim sess.run(target= opt_op_list, ...) ``` -#### Optimizer Python interface: +### Optimizer Python interface: ```python class Optimizer(object): diff --git a/doc/fluid/design/motivation/index_cn.rst b/doc/fluid/design/motivation/index_cn.rst new file mode 100644 index 0000000000..7706e73eca --- /dev/null +++ b/doc/fluid/design/motivation/index_cn.rst @@ -0,0 +1,10 @@ +设计动机和目标 +------------- + +.. toctree:: + :maxdepth: 1 + + api.md + refactorization.md + fluid.md + fluid_compiler.md diff --git a/doc/fluid/design/motivation/index_en.rst b/doc/fluid/design/motivation/index_en.rst new file mode 100644 index 0000000000..10b64b257c --- /dev/null +++ b/doc/fluid/design/motivation/index_en.rst @@ -0,0 +1,10 @@ +Design Motivations and Goals +-------------------------------------- + +.. toctree:: + :maxdepth: 1 + + api.md + refactorization.md + fluid.md + fluid_compiler.md diff --git a/doc/fluid/design/motivation/refactorization.md b/doc/fluid/design/motivation/refactorization.md index f93d6155e1..7c39fabcc6 100644 --- a/doc/fluid/design/motivation/refactorization.md +++ b/doc/fluid/design/motivation/refactorization.md @@ -97,13 +97,13 @@ Compile Time -> IR -> Runtime --- -# Operator/OpWithKernel/OpKernel +## Operator/OpWithKernel/OpKernel ![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/49caf1fb70820fb4a6c217634317c9306f361f36/op_op_with_kern_class_diagram.dot) --- -# Operator +## Operator ![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/dd598e8f1976f5759f58af5e5ef94738a6b2e661/op.dot) * `Operator` is the fundamental building block of the user interface. @@ -113,7 +113,7 @@ Compile Time -> IR -> Runtime --- -# OpWithKernel/Kernel +## OpWithKernel/Kernel ![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/9d7f4eba185cf41c8e2fbfb40ae21890dbddcd39/op_with_kernel.dot) @@ -124,7 +124,7 @@ Compile Time -> IR -> Runtime --- -# Why separate Kernel and Operator +## Why separate Kernel and Operator * Separate GPU and CPU code. * Make Paddle capable of running without GPU. @@ -132,7 +132,7 @@ Compile Time -> IR -> Runtime * For example, same multiplication op can have different implementations kernels such as FP16 kernel, FP32 kernel, MKL, eigen kernel. --- -# Libraries for Kernel development +## Libraries for Kernel development * `Eigen::Tensor` contains basic math and element-wise functions. * Note that `Eigen::Tensor` has broadcast implementation. @@ -143,16 +143,16 @@ Compile Time -> IR -> Runtime * Hand-writing `GPUKernel` and `CPU` code * Do not write in header (`.h`) files. CPU Kernel should be in cpp source (`.cc`) and GPU kernels should be in cuda (`.cu`) files. (GCC cannot compile GPU code.) --- -# Operator Registration +## Operator Registration -## Why is registration necessary? +### Why is registration necessary? We need a method to build mappings between Op type names and Op classes. -## How is registration implemented? +### How is registration implemented? Maintaining a map, whose key is the type name and the value is the corresponding Op constructor. --- -# The Registry Map +## The Registry Map ### `OpInfoMap` @@ -166,7 +166,7 @@ Maintaining a map, whose key is the type name and the value is the corresponding - **`checker`**: Used to check attributes. --- -# Related Concepts +## Related Concepts ### Op_Maker It's constructor takes `proto` and `checker`. They are completed during Op_Maker's construction. ([ScaleOpMaker](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/scale_op.cc#L37)) @@ -178,7 +178,7 @@ REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) ``` --- -# Registration Process +## Registration Process 1. Write an Op class and its gradient Op class, if required. 2. Write an Op maker class. In the constructor of this class, describe the inputs, outputs and attributes of the operator. 3. Invoke the macro `REGISTER_OP`. This macro will @@ -186,13 +186,13 @@ REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) 2. Using the completed `proto` and `checker`, it will add a new key-value pair to the `OpInfoMap` --- -# Backward Module (1/2) +## Backward Module (1/2) ### Create Backward Operator - Mapping from forward Op to backward Op ![backward](https://gist.githubusercontent.com/dzhwinter/a6fbd4623ee76c459f7f94591fd1abf0/raw/61026ab6e518e66bde66a889bc42557a1fccff33/backward.png) --- -# Backward Module (2/2) +## Backward Module (2/2) ### Build Backward Network - **Input**: a graph of forward operators - **Output**: a graph of backward operators @@ -205,7 +205,7 @@ REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) --- -# Scope, Variable, Tensor +## Scope, Variable, Tensor * `Tensor` is an n-dimension array with type. * Only dims and data pointers are stored in `Tensor`. @@ -218,8 +218,8 @@ REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) * `Scope` has a hierarchical structure. The local scope can get variables from its parent scope. --- -# Block (in design) -## the difference between original RNNOp and Block +## Block (in design) +### the difference between original RNNOp and Block - As an operator is more intuitive than `RNNOp`, - Offers a new interface `Eval(targets)` to deduce the minimal block to `Run`, - Fits the compile-time/ runtime separation design paradigm. @@ -227,7 +227,7 @@ REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) - When graph executes, a Block with `BlockDesc` is passed. It then creates `Op` and `Var` instances and then invokes `Run`. --- -# Milestone +## Milestone - Take Paddle/books as the main line, the requirement of the models motivates framework refactoring, - Model migration - Framework development gives **priority support** to model migration, for example, @@ -240,7 +240,7 @@ REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) - Accept imperfection, concentrate on solving the specific problem at the right price. --- -# Control the migration quality +## Control the migration quality - Compare the performance of migrated models with old ones. - Follow the google C++ style guide. - Build the automatic workflow of generating Python/C++ documentations. diff --git a/doc/fluid/design/muti_devices/index_cn.rst b/doc/fluid/design/muti_devices/index_cn.rst new file mode 100644 index 0000000000..1f8439e862 --- /dev/null +++ b/doc/fluid/design/muti_devices/index_cn.rst @@ -0,0 +1,9 @@ +多设备支持 +------------ + +.. toctree:: + :maxdepth: 1 + + operator_kernel_type.md + kernel_selection.md + kernel_hint_design.md diff --git a/doc/fluid/design/muti_devices/index_en.rst b/doc/fluid/design/muti_devices/index_en.rst new file mode 100644 index 0000000000..819e9c5d77 --- /dev/null +++ b/doc/fluid/design/muti_devices/index_en.rst @@ -0,0 +1,9 @@ +Multi-Device Support +---------------------- + +.. toctree:: + :maxdepth: 1 + + operator_kernel_type.md + kernel_selection.md + kernel_hint_design.md diff --git a/doc/fluid/design/muti_devices/kernel_hint_design.md b/doc/fluid/design/muti_devices/kernel_hint_design.md index a54b7da045..728c8f0b96 100644 --- a/doc/fluid/design/muti_devices/kernel_hint_design.md +++ b/doc/fluid/design/muti_devices/kernel_hint_design.md @@ -1,4 +1,4 @@ -## Problem +# Problem In PaddlePaddle's [Design](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md), one Operator may have multiple kernels. Users may have some personal preference to choose a certain type of kernel for an operator, such as `force_cpu` to choose a CPU kernel, `use_cudnn` to choose a CUDNN kernel, we need to provide a way for users to do this. In the current design, we use KernelType to describe one kernel. diff --git a/doc/fluid/design/muti_devices/kernel_selection.md b/doc/fluid/design/muti_devices/kernel_selection.md index 9719e031c7..39ea2b0009 100644 --- a/doc/fluid/design/muti_devices/kernel_selection.md +++ b/doc/fluid/design/muti_devices/kernel_selection.md @@ -1,4 +1,4 @@ -## Background +# Background Every operator has many kernels because there are multiple data types, places, data layout, library type that Fluid supports. We use the `OpKernelType ` to describe kernel types that operators can hold. The `OpKernelType ` is as follows: diff --git a/doc/fluid/design/network/index_cn.rst b/doc/fluid/design/network/index_cn.rst new file mode 100644 index 0000000000..3557d55fe4 --- /dev/null +++ b/doc/fluid/design/network/index_cn.rst @@ -0,0 +1,7 @@ +复杂网络设计 +------------ + +.. toctree:: + :maxdepth: 1 + + sequence_decoder.md diff --git a/doc/fluid/design/network/index_en.rst b/doc/fluid/design/network/index_en.rst new file mode 100644 index 0000000000..73a7137236 --- /dev/null +++ b/doc/fluid/design/network/index_en.rst @@ -0,0 +1,7 @@ +Complex Network Design +------------------------ + +.. toctree:: + :maxdepth: 1 + + sequence_decoder.md diff --git a/doc/fluid/dev/api_doc_std_cn.md b/doc/fluid/dev/api_doc_std_cn.md index 5596b2653a..b50f18f21d 100644 --- a/doc/fluid/dev/api_doc_std_cn.md +++ b/doc/fluid/dev/api_doc_std_cn.md @@ -45,11 +45,11 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 - Python API Definition - 格式: - + [Python API Definition] - + - 示例 - + ``` fc(input, size, @@ -63,19 +63,19 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 ``` - Function Description - + - 格式 本模块应包含以下内容(排列顺序为文档撰写顺序): [Function Description] - + [Formula] - + [Symbols' Descriptions if necessary] - + [References if necessary] - + - 示例 [Function Description] @@ -119,18 +119,18 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 [References if necessary] 因fc没有必要列出的参考文献,故该内容省略。其他情况下需明确给出对应的参考文献和对应连接,以 layer_norm 为例: - + ``` Refer to `Layer Normalization `_ for more details. ``` - + - Args Description - + - 格式 - + \[Arg's Name\][(Data Type, Default Value)][Description] - + - 示例 fc的部分参数注释如下: @@ -145,35 +145,35 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 ``` - Returns - + - 格式 - + [Name][Shape] - + - 示例 - + ``` Returns: A tensor variable storing the transformation result. ``` - + 当返回值为包含多个参数的tuple时,应按顺序逐个介绍各参数,以dynamic_lstm为例: - + ``` Returns: A tuple containing: The hidden state of LSTM whose shape is (T X D). The cell state of LSTM whose shape is (T X D). ``` - + - Raises - 格式 - + [Exception Type][Condition] - 示例 - + ``` Raises: ValueError: If the rank of the input is less than 2. @@ -182,7 +182,7 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 - Note - 格式 - + [Note] - 示例 @@ -198,15 +198,15 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 2. When num_heads == 1, scaled_dot_product_attention has no learnable parameters. ``` - + - Examples - 格式 \[Python Code Snipper] - + - 示例 - + ``` Examples: .. code-block:: python diff --git a/doc/fluid/dev/index_cn.rst b/doc/fluid/dev/index_cn.rst index e1edf079fa..e70bf5dff3 100644 --- a/doc/fluid/dev/index_cn.rst +++ b/doc/fluid/dev/index_cn.rst @@ -1,2 +1,13 @@ 开发标准 ------------ + +.. toctree:: + :maxdepth: 1 + + new_op_en.md + new_op_kernel_en.md + use_eigen_en.md + name_convention.md + support_new_device.md + releasing_process.md + op_markdown_format.md diff --git a/doc/fluid/dev/index_en.rst b/doc/fluid/dev/index_en.rst index faf9dfcd31..f0e9afcfcc 100644 --- a/doc/fluid/dev/index_en.rst +++ b/doc/fluid/dev/index_en.rst @@ -1,4 +1,13 @@ Development ------------ -This is Development page +.. toctree:: + :maxdepth: 1 + + new_op_en.md + new_op_kernel_en.md + use_eigen_en.md + name_convention.md + support_new_device.md + releasing_process.md + op_markdown_format.md diff --git a/doc/fluid/dev/name_convention.md b/doc/fluid/dev/name_convention.md index a02b356f05..75830ef28c 100644 --- a/doc/fluid/dev/name_convention.md +++ b/doc/fluid/dev/name_convention.md @@ -1,8 +1,8 @@ -## Operator's Parameter Name Convention +# Operator's Parameter Name Convention To make the operator document itself more clear, we recommend operator names obey the listing conventions. -### OpProtoMaker names +## OpProtoMaker names When defining an operator in Paddle, a corresponding [OpProtoMaker](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L170) (TODO: OpProtoMaker Doc)need to be defined. All the Input/Output and Attributes will write into the [OpProto](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L61) , and will be used in client language to create operator. @@ -20,7 +20,7 @@ When defining an operator in Paddle, a corresponding [OpProtoMaker](https://gith - Order. - Follow the order of Input/Output, then Attribute, then Comments. See the example in best practice. -### Best Practice +## Best Practice Here we give some examples to show how these rules will be used. diff --git a/doc/fluid/dev/new_op_kernel_en.md b/doc/fluid/dev/new_op_kernel_en.md index 123df0a7ee..55dea8d0a3 100644 --- a/doc/fluid/dev/new_op_kernel_en.md +++ b/doc/fluid/dev/new_op_kernel_en.md @@ -1,14 +1,14 @@ -## Add Kernels for a New Device +# Add Kernels for a New Device -### Background +## Background PaddlePaddle Fluid have hundreds of operators. Each operator could have one or more kernels. A kernel is an implementation of the operator for a certain device, which could be a hardware device, e.g., the CUDA GPU, or a library that utilizes a device, e.g., Intel MKL that makes full use of the Xeon CPU. [This document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md) explains how to add an operator, and its kernels. The kernels of an operator are indexed by a C++ type [`OpKernelType`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md). An operator chooses the right kernel at runtime. This choosing mechanism is described [here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md). -### Write Kernels for A New Device +## Write Kernels for A New Device -#### Add A New Device +### Add A New Device For some historical reaons, we misuse the word *library* for *device*. For example, we call the deivce type by *library type*. An example is the header file [`library_type.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/library_type.h#L24). We will correct this ASAP. @@ -23,7 +23,7 @@ enum class LibraryType { ``` -#### Add A New [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53) +### Add A New [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53) If you have a new kind of Device, firstly you need to add a new kind of [`Place`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53). For example `CUDAPlace`: @@ -45,7 +45,7 @@ struct CUDAPlace { typedef boost::variant Place; ``` -#### Add [device context]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37)) +### Add [device context]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37)) After a new kind of Device is added, you should add a corresponding [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37) for it. ```cpp @@ -58,7 +58,7 @@ class DeviceContext { }; ``` -#### Implement new [OpKernel](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L351) for your Device. +### Implement new [OpKernel](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L351) for your Device. A detailed documentation can be found in [`new_op_and_kernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md) @@ -85,7 +85,7 @@ class OpKernel : public OpKernelBase { ``` -#### Register the OpKernel to framework +### Register the OpKernel to framework After writing the components described above, we should register the kernel to the framework. @@ -107,7 +107,7 @@ take [`conv2d`]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/oper REGISTER_OP_KERNEL(conv2d, CPU, paddle::platform::CPUPlace, paddle::operators::GemmConvKernel, paddle::operators::GemmConvKernel); - + REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace, paddle::operators::CUDNNConvOpKernel, paddle::operators::CUDNNConvOpKernel); diff --git a/doc/fluid/dev/op_markdown_format.md b/doc/fluid/dev/op_markdown_format.md index 0ee804d592..4e539d7992 100644 --- a/doc/fluid/dev/op_markdown_format.md +++ b/doc/fluid/dev/op_markdown_format.md @@ -15,26 +15,26 @@ The signature of the operator. Each section mentioned above has been covered in further detail in the rest of the document. -# PaddlePaddle Operator Name +## PaddlePaddle Operator Name This should be in all small letters, in case of multiple words, we separate them with an underscore. For example: `array to lod tensor` should be written as `array_to_lod_tensor`. This naming convention should be standard across all PaddlePaddle operators. -# Standard Operator Name +## Standard Operator Name This is the standard name of the operator as used in the community. The general standard is usually: - Standard abbreviations like `SGD` are written in all capital letters. - Operator names that have multiple words inside a single word use `camelCase` (capitalize word boundaries inside of a word). - Keep numbers inside a word as is, with no boundary delimiters. - Follow the name of the operator with the keyword: `Activation Operator.` -# Operator description +## Operator description This section should contain the description of what the operator does, including the operation performed, the literature from where it comes and was introduced first, and other important details. The relevant paper/article including the hyperlink should be cited in this section. -# LaTeX equation +## LaTeX equation This section should contain an overall equation of the update or operation that the operator performs. The variables used in the equation should follow the naming convention of operators as described [here](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/name_convention.md). Two words in the same word should be separated by an underscore (`_`). -# The signature +## The signature This section describes the signature of the operator. A list of Inputs and Outputs, each of which have a small description of what the variable represents and the type of variable. The variable names follow the `CamelCase` naming convention. The proposed format for this is: `Section : VariableName : (VariableType) VariableDescription diff --git a/doc/fluid/dev/use_eigen_cn.md b/doc/fluid/dev/use_eigen_cn.md index f36843b440..75922e7d85 100644 --- a/doc/fluid/dev/use_eigen_cn.md +++ b/doc/fluid/dev/use_eigen_cn.md @@ -1,16 +1,16 @@ -## 在Paddle中如何使用Eigen +# 在Paddle中如何使用Eigen 神经网络本质上是一个计算图,计算需要的数据存放在`Tensor`中,而计算过程是由`Operartor`来描述的。在执行时,`Operator`调用对应`OpKernel`中的`Compute`接口,实现对`Tensor`的操作。 -### Eigen Tensor模块 +## Eigen Tensor模块 Eigen Tensor模块对element-wise计算提供了强大的支持,并且书写一份代码,可以同时在CPU、GPU执行。但Eigen Tensor是一个正在开发中的模块,因此可能测试不够完备,文档较少。 关于Eigen Tensor模块的详细介绍请参考[文档1](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) 和[文档2](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md) -### paddle::framework::Tensor +## paddle::framework::Tensor Paddle Tensor定义在framework目录下,其主要接口如下: @@ -20,14 +20,14 @@ class Tensor { /*! Return a pointer to mutable memory block. */ template inline T* data(); - + /** * @brief Return a pointer to mutable memory block. * @note If not exist, then allocation. */ template inline T* mutable_data(platform::Place place); - + /** * @brief Return a pointer to mutable memory block. * @@ -38,17 +38,17 @@ class Tensor { */ template inline T* mutable_data(DDim dims, platform::Place place); - + /*! Resize the dimensions of the memory block. */ inline Tensor& Resize(const DDim& dims); - + /*! Return the dimensions of the memory block. */ inline const DDim& dims() const; private: /*! holds the memory block if allocated. */ std::shared_ptr holder_; - + /*! points to dimensions of memory block. */ DDim dim_; }; @@ -129,7 +129,7 @@ From是EigenTensor模板提供的一个接口,可以实现从paddle::framework -### 实现计算 +## 实现计算 当需要完成计算时,我们需要等式左边的EigenTensor调用device接口。在这里需要注意的是,这里的EigenTensor之间的运算只是改变了原有Tensor中的数据,而不会改变原有Tensor的shape信息。 diff --git a/doc/fluid/dev/use_eigen_en.md b/doc/fluid/dev/use_eigen_en.md index 3a466f73d1..3313d097cb 100644 --- a/doc/fluid/dev/use_eigen_en.md +++ b/doc/fluid/dev/use_eigen_en.md @@ -1,9 +1,9 @@ -## How to use Eigen in Paddle +# How to use Eigen in Paddle Essentially, a neural network is a compute graph. T data needed for the computation is stored in `Tensor`s and its computation procedure is described by `Operator`s. An `Operator` calls the `Compute` interface in its corresponding `OpKernel` and operates on the `Tensor`. -### Eigen Tensor Module +## Eigen Tensor Module The Eigen Tensor module supports powerful element-wise computation. In addition, a piece of code written using it can be run on both the CPU and the GPU. @@ -12,7 +12,7 @@ Note that Eigen Tensor is still being actively developed, so its tests are not c For details on Eigen Tensor module, please see [doc 1](https://github.com/RLovelett/eigen/blob/master/unsupported/Eigen/CXX11/src/Tensor/README.md) and [doc 2](https://bitbucket.org/eigen/eigen/src/default/unsupported/Eigen/CXX11/src/Tensor/README.md). -### paddle::framework::Tensor +## paddle::framework::Tensor Paddle Tensor's is defined in the framework directory with the following interface: @@ -105,7 +105,7 @@ void Compute(const framework::ExecutionContext& context) const override { ``` -### paddle::framework::Tensor到EigenTensor的转换 +## paddle::framework::Tensor到EigenTensor的转换 As shown above, in actual computation, we need to transform the input and output `Tensor`s into formats Eigen supports. We show some functions in [eigen.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/eigen.h) to implement the transformation from `paddle::framework::Tensor`to `EigenTensor/EigenMatrix/EigenVector/EigenScalar`. @@ -129,7 +129,7 @@ For more transformations, see the [unit tests](https://github.com/PaddlePaddle/P -### Implementing Computation +## Implementing Computation While computing, the device interface is needed from the EigenTensors on the left hand side of the assignments. Note that the computation between EigenTensors only changes the data originally inthe Tensor and does not change all the shape information associated with the Tensor. diff --git a/doc/fluid/getstarted/concepts/index_cn.rst b/doc/fluid/getstarted/concepts/index_cn.rst new file mode 100644 index 0000000000..2e7f70fc4c --- /dev/null +++ b/doc/fluid/getstarted/concepts/index_cn.rst @@ -0,0 +1,4 @@ +基本使用概念 +============ + +TBD diff --git a/doc/fluid/getstarted/concepts/index_en.rst b/doc/fluid/getstarted/concepts/index_en.rst new file mode 100644 index 0000000000..78cca1e2a3 --- /dev/null +++ b/doc/fluid/getstarted/concepts/index_en.rst @@ -0,0 +1,4 @@ +Concepts +============ + +TBD diff --git a/doc/fluid/getstarted/index_cn.rst b/doc/fluid/getstarted/index_cn.rst index c4d8525f23..75af7354be 100644 --- a/doc/fluid/getstarted/index_cn.rst +++ b/doc/fluid/getstarted/index_cn.rst @@ -1,4 +1,19 @@ 新手入门 ------------- +============ -新手入门 + +如果需要快速了解PaddlePaddle的使用,可以参考以下指南。 + +.. toctree:: + :maxdepth: 1 + + quickstart_cn.rst + + +在使用PaddlePaddle构建应用时,需要了解一些基本概念。 +这里以一个线性回归为例子,详细介绍了PaddlePaddle的使用流程,包括数据格式,模型配置与训练等。 + +.. toctree:: + :maxdepth: 1 + + concepts/use_concepts_cn.rst diff --git a/doc/fluid/getstarted/index_en.rst b/doc/fluid/getstarted/index_en.rst index a4efd05e2f..75a43f4af8 100644 --- a/doc/fluid/getstarted/index_en.rst +++ b/doc/fluid/getstarted/index_en.rst @@ -1,4 +1,18 @@ GET STARTED ------------- +============ -This is get started page +If you want to quickly know how to use PaddlePaddle, please refer to the following guide: + +.. toctree:: + :maxdepth: 1 + + quickstart_en.rst + +While using PaddlePaddle to build applications, please understand some basic concepts. + +Here is an example of linear regression. It introduces workflow of PaddlePaddle, including data format, model configuration and training, etc. + +.. toctree:: + :maxdepth: 1 + + concepts/index_en.rst diff --git a/doc/fluid/getstarted/quickstart_cn.rst b/doc/fluid/getstarted/quickstart_cn.rst new file mode 120000 index 0000000000..93a9e4e37a --- /dev/null +++ b/doc/fluid/getstarted/quickstart_cn.rst @@ -0,0 +1 @@ +../../v2/getstarted/quickstart_cn.rst \ No newline at end of file diff --git a/doc/fluid/getstarted/quickstart_en.rst b/doc/fluid/getstarted/quickstart_en.rst new file mode 120000 index 0000000000..6e1894faa1 --- /dev/null +++ b/doc/fluid/getstarted/quickstart_en.rst @@ -0,0 +1 @@ +../../v2/getstarted/quickstart_en.rst \ No newline at end of file diff --git a/doc/fluid/howto/index_cn.rst b/doc/fluid/howto/index_cn.rst index a92abad0c5..97aeaf167d 100644 --- a/doc/fluid/howto/index_cn.rst +++ b/doc/fluid/howto/index_cn.rst @@ -1,2 +1,7 @@ 进阶使用 ------------ + +.. toctree:: + :maxdepth: 1 + + optimization/index_cn.rst diff --git a/doc/fluid/howto/index_en.rst b/doc/fluid/howto/index_en.rst index 06036bdce5..fd21e167ce 100644 --- a/doc/fluid/howto/index_en.rst +++ b/doc/fluid/howto/index_en.rst @@ -1,4 +1,7 @@ HOW TO ------------ -This is how to page +.. toctree:: + :maxdepth: 1 + + optimization/index_en.rst diff --git a/doc/fluid/howto/optimization/benchmark/README.md b/doc/fluid/howto/optimization/benchmark/README.md new file mode 120000 index 0000000000..db30af7f53 --- /dev/null +++ b/doc/fluid/howto/optimization/benchmark/README.md @@ -0,0 +1 @@ +../../../../../benchmark/cluster/README.md \ No newline at end of file diff --git a/doc/fluid/howto/optimization/benchmark/index_cn.rst b/doc/fluid/howto/optimization/benchmark/index_cn.rst new file mode 100644 index 0000000000..9404800eb8 --- /dev/null +++ b/doc/fluid/howto/optimization/benchmark/index_cn.rst @@ -0,0 +1,8 @@ +基准 +------------ + +.. toctree:: + :maxdepth: 1 + + vgg16/README.md + README.md diff --git a/doc/fluid/howto/optimization/benchmark/index_en.rst b/doc/fluid/howto/optimization/benchmark/index_en.rst new file mode 100644 index 0000000000..1e200b660c --- /dev/null +++ b/doc/fluid/howto/optimization/benchmark/index_en.rst @@ -0,0 +1,8 @@ +Benchmark +------------ + +.. toctree:: + :maxdepth: 1 + + vgg16/README.md + README.md diff --git a/doc/fluid/howto/optimization/benchmark/vgg16/README.md b/doc/fluid/howto/optimization/benchmark/vgg16/README.md new file mode 120000 index 0000000000..ca963ef5f0 --- /dev/null +++ b/doc/fluid/howto/optimization/benchmark/vgg16/README.md @@ -0,0 +1 @@ +../../../../../../benchmark/cluster/vgg16/README.md \ No newline at end of file diff --git a/doc/fluid/howto/optimization/cpu_profiling_cn.md b/doc/fluid/howto/optimization/cpu_profiling_cn.md index d59be670c2..17f895573a 100644 --- a/doc/fluid/howto/optimization/cpu_profiling_cn.md +++ b/doc/fluid/howto/optimization/cpu_profiling_cn.md @@ -8,7 +8,7 @@ PaddlePaddle 用户一般通过调用 Python API 编写深度学习程序。大 * Python 与 C++ 混合代码的性能分析 -## Python代码的性能分析 +# Python代码的性能分析 ### 生成性能分析文件 diff --git a/doc/fluid/howto/optimization/cpu_profiling_en.md b/doc/fluid/howto/optimization/cpu_profiling_en.md index 01e5fddf61..abe4493c17 100644 --- a/doc/fluid/howto/optimization/cpu_profiling_en.md +++ b/doc/fluid/howto/optimization/cpu_profiling_en.md @@ -14,7 +14,7 @@ the profiling and tuning of 1. the Python code and 1. the mixture of Python and C++ code. -## Profiling the Python Code +# Profiling the Python Code ### Generate the Performance Profiling File @@ -81,7 +81,7 @@ focus on. We can sort above profiling file by tottime: We can see that the most time-consuming function is the `built-in method run`, which is a C++ function in `libpaddle.so`. We will -explain how to profile C++ code in the next section. At this +explain how to profile C++ code in the next section. At this moment, let's look into the third function `sync_with_cpp`, which is a Python function. We can click it to understand more about it: diff --git a/doc/fluid/howto/optimization/index_cn.rst b/doc/fluid/howto/optimization/index_cn.rst new file mode 100644 index 0000000000..27cc967023 --- /dev/null +++ b/doc/fluid/howto/optimization/index_cn.rst @@ -0,0 +1,9 @@ +性能优化 +------------ + +.. toctree:: + :maxdepth: 1 + + timeline.md + cpu_profiling_cn.md + benchmark/index_cn.rst diff --git a/doc/fluid/howto/optimization/index_en.rst b/doc/fluid/howto/optimization/index_en.rst new file mode 100644 index 0000000000..4ce624fe8f --- /dev/null +++ b/doc/fluid/howto/optimization/index_en.rst @@ -0,0 +1,9 @@ +Performance Optimization +--------------------------- + +.. toctree:: + :maxdepth: 1 + + timeline.md + cpu_profiling_en.md + benchmark/index_en.rst diff --git a/doc/fluid/howto/optimization/timeline.md b/doc/fluid/howto/optimization/timeline.md index 9d9565a3e6..96481ae2a6 100644 --- a/doc/fluid/howto/optimization/timeline.md +++ b/doc/fluid/howto/optimization/timeline.md @@ -1,4 +1,4 @@ -## how to use timeline tool to do profile +# how to use timeline tool to do profile 1. Add `with profiler.profiler(...)` to the main training loop. After run, the code will generate a profile record file `/tmp/profile`. **Warning**: Please do not run too many batches when use profiler to record timeline information, for the profile record will grow with the batch number. diff --git a/doc/fluid/index_cn.rst b/doc/fluid/index_cn.rst index be3bed4393..d878d192ca 100644 --- a/doc/fluid/index_cn.rst +++ b/doc/fluid/index_cn.rst @@ -5,8 +5,8 @@ :maxdepth: 1 getstarted/index_cn.rst - design/index_cn.rst build_and_install/index_cn.rst + design/index_cn.rst howto/index_cn.rst dev/index_cn.rst faq/index_cn.rst diff --git a/doc/fluid/index_en.rst b/doc/fluid/index_en.rst index 87c831420a..2bc76b5898 100644 --- a/doc/fluid/index_en.rst +++ b/doc/fluid/index_en.rst @@ -5,8 +5,8 @@ :maxdepth: 1 getstarted/index_en.rst - design/index_en.rst build_and_install/index_en.rst + design/index_en.rst howto/index_en.rst dev/index_en.rst faq/index_en.rst diff --git a/doc/fluid/design/interface/00.why_plain_c.md b/doc/v2/design/interface/00.why_plain_c.md similarity index 100% rename from doc/fluid/design/interface/00.why_plain_c.md rename to doc/v2/design/interface/00.why_plain_c.md diff --git a/doc/fluid/design/interface/01.inference_implementation.md b/doc/v2/design/interface/01.inference_implementation.md similarity index 100% rename from doc/fluid/design/interface/01.inference_implementation.md rename to doc/v2/design/interface/01.inference_implementation.md diff --git a/doc/v2/design/interface/index_cn.rst b/doc/v2/design/interface/index_cn.rst new file mode 100644 index 0000000000..2509a5c5f4 --- /dev/null +++ b/doc/v2/design/interface/index_cn.rst @@ -0,0 +1,7 @@ +多语言接口 +------------ + +.. toctree:: + :maxdepth: 1 + + 00.why_plain_c.md diff --git a/doc/v2/design/interface/index_en.rst b/doc/v2/design/interface/index_en.rst new file mode 100644 index 0000000000..356e58c39c --- /dev/null +++ b/doc/v2/design/interface/index_en.rst @@ -0,0 +1,7 @@ +Multilingual Interface +----------------------- + +.. toctree:: + :maxdepth: 1 + + 00.why_plain_c.md diff --git a/doc/v2/design/mkl/mkldnn.md b/doc/v2/design/mkl/mkldnn.md index e2fe1e6b26..1bd2e7bc34 100644 --- a/doc/v2/design/mkl/mkldnn.md +++ b/doc/v2/design/mkl/mkldnn.md @@ -44,7 +44,7 @@ MKL,MKLML以及MKL-DNN三者关系如下表: | Name | Open Source | License | Descriptions | | :---------- | :--------------- | :---------- | :------------ | -| MKL | No | Proprietary | Accelerate math processing routines | +| MKL | No | Proprietary | Accelerate math processing routines | | MKLML | No | Proprietary | Small package of MKL, especially for Machine Learning | | MKL-DNN | Yes | Apache 2.0 | Accelerate primitives processing routines especially for Deep Neural Networks | @@ -89,7 +89,7 @@ PaddlePaddle/Paddle ### CMake 在`CMakeLists.txt`中提供一个与MKL有关的总开关:`WITH_MKL`,它负责决定编译时是否使用MKLML和MKL-DNN -- `WITH_MKLML` 控制是否使用MKLML库。 +- `WITH_MKLML` 控制是否使用MKLML库。 当打开`WITH_MKL`时,会自动使用MKLML库作为PaddlePaddle的CBLAS和LAPACK库,同时会开启Intel OpenMP用于提高MKLML的性能。 编译时会把对应的头文件和库放在`build/third_party/install/mklml/*`目录下对应的地方。 MKLML的库目前都是动态库,主要包括`libiomp5.so`和`libmklml_intel.so`。 @@ -172,7 +172,7 @@ if use_mkldnn self.layer_type = mkldnn_* ``` -所有MKL-DNN的`layer_type`会以*mkldnn_*开头,这些会在`MKLDNN*Layer`注册layer的时候保证,以示区分。 +所有MKL-DNN的`layer_type`会以*mkldnn_*开头,这些会在`MKLDNN*Layer`注册layer的时候保证,以示区分。 同时,会在`paddle/utils.Flags`中添加一个`use_mkldnn`的flag,用于选择是否使用MKL-DNN的相关功能。 From f0af1398b8216428255b7981a4fe0b490d2c03e6 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 30 Mar 2018 11:30:05 +0800 Subject: [PATCH 180/180] add prefetch_op (#9495) * add prefetch_op * fix ci * optimize code * optimize code * fix include --- paddle/fluid/operators/CMakeLists.txt | 6 +- paddle/fluid/operators/detail/grpc_client.cc | 50 +++++++- paddle/fluid/operators/detail/grpc_client.h | 7 ++ paddle/fluid/operators/prefetch_op.cc | 115 +++++++++++++++++++ paddle/fluid/operators/send_op.cc | 20 +--- paddle/fluid/operators/send_recv_util.h | 36 ++++++ paddle/fluid/operators/send_vars_op.cc | 23 +--- 7 files changed, 213 insertions(+), 44 deletions(-) create mode 100644 paddle/fluid/operators/prefetch_op.cc create mode 100644 paddle/fluid/operators/send_recv_util.h diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 8341170d68..9ed79453b9 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -183,6 +183,8 @@ if(WITH_DISTRIBUTE) set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") op_library(send_op DEPS ${DISTRIBUTE_DEPS}) set_source_files_properties(send_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + op_library(prefetch_op DEPS ${DISTRIBUTE_DEPS}) + set_source_files_properties(prefetch_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) op_library(recv_op DEPS ${DISTRIBUTE_DEPS}) set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) op_library(listen_and_serv_op DEPS ${DISTRIBUTE_DEPS}) @@ -191,9 +193,9 @@ if(WITH_DISTRIBUTE) set_source_files_properties(send_vars_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS}) set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op listen_and_serv_op sum_op executor) + cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op listen_and_serv_op sum_op executor) else() - set(DEPS_OPS ${DEPS_OPS} send_op recv_op listen_and_serv_op send_vars_op send_barrier_op) + set(DEPS_OPS ${DEPS_OPS} send_op prefetch_op recv_op listen_and_serv_op send_vars_op send_barrier_op) endif() op_library(cond_op DEPS framework_proto tensor net_op) diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/detail/grpc_client.cc index 03b789f326..9652bb888b 100644 --- a/paddle/fluid/operators/detail/grpc_client.cc +++ b/paddle/fluid/operators/detail/grpc_client.cc @@ -88,10 +88,13 @@ bool RPCClient::AsyncGetVariable(const std::string& ep, const auto ch = GetChannel(ep_val); framework::Async([var_name_val, ep_val, p_scope, p_ctx, time_out, ch, this] { + // prepare input sendrecv::VariableMessage req; req.set_varname(var_name_val); + ::grpc::ByteBuffer buf; + RequestToByteBuffer(req, &buf); - // varhandle + // var handle VarHandle var_h; var_h.ep = ep_val; var_h.scope = p_scope; @@ -103,9 +106,6 @@ bool RPCClient::AsyncGetVariable(const std::string& ep, s->Prepare(var_h, time_out); s->response_call_back_ = ProcGetResponse; - ::grpc::ByteBuffer buf; - RequestToByteBuffer(req, &buf); - auto call = s->stub_g_.PrepareUnaryCall( s->context_.get(), "/sendrecv.SendRecvService/GetVariable", buf, &cq_); call->StartCall(); @@ -117,6 +117,48 @@ bool RPCClient::AsyncGetVariable(const std::string& ep, return true; } +bool RPCClient::AsyncPrefetchVariable(const std::string& ep, + const platform::DeviceContext& ctx, + const framework::Scope& scope, + const std::string& in_var_name, + const std::string& out_var_name, + int64_t time_out) { + const platform::DeviceContext* p_ctx = &ctx; + const std::string ep_val = ep; + const std::string in_var_name_val = in_var_name; + const std::string out_var_name_val = out_var_name; + const framework::Scope* p_scope = &scope; + const auto ch = GetChannel(ep_val); + + framework::Async([in_var_name_val, out_var_name_val, ep_val, p_scope, p_ctx, + time_out, ch, this] { + auto* var = p_scope->FindVar(in_var_name_val); + + ::grpc::ByteBuffer req; + SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req); + + // var handle + VarHandle var_h; + var_h.ep = ep_val; + var_h.scope = p_scope; + var_h.name = out_var_name_val; + var_h.ctx = p_ctx; + + // stub context + GetProcessor* s = new GetProcessor(ch); + s->Prepare(var_h, time_out); + s->response_call_back_ = ProcGetResponse; + + auto call = s->stub_g_.PrepareUnaryCall( + s->context_.get(), "/sendrecv.SendRecvService/GetVariable", req, &cq_); + call->StartCall(); + call->Finish(&s->reply_, &s->status_, (void*)s); + }); + + req_count_++; + return true; +} + void RPCClient::AsyncSendBatchBarrier(const std::string& ep, int64_t time_out) { const auto ch = GetChannel(ep); diff --git a/paddle/fluid/operators/detail/grpc_client.h b/paddle/fluid/operators/detail/grpc_client.h index 8216ac52fb..fe237e54ef 100644 --- a/paddle/fluid/operators/detail/grpc_client.h +++ b/paddle/fluid/operators/detail/grpc_client.h @@ -172,6 +172,13 @@ class RPCClient { const std::string& var_name, int64_t time_out = 600 * 1000); + bool AsyncPrefetchVariable(const std::string& ep, + const platform::DeviceContext& ctx, + const framework::Scope& scope, + const std::string& in_var_name, + const std::string& out_var_name, + int64_t time_out = 600 * 1000); + void AsyncSendBatchBarrier(const std::string& ep, int64_t time_out = 600 * 1000); diff --git a/paddle/fluid/operators/prefetch_op.cc b/paddle/fluid/operators/prefetch_op.cc new file mode 100644 index 0000000000..09ab7da663 --- /dev/null +++ b/paddle/fluid/operators/prefetch_op.cc @@ -0,0 +1,115 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/detail/grpc_client.h" +#include "paddle/fluid/operators/send_recv_util.h" + +namespace paddle { +namespace operators { + +class PrefetchOp : public framework::OperatorBase { + public: + PrefetchOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void RunImpl(const framework::Scope& scope, + const platform::Place& place) const override { + auto ins = Inputs("X"); + auto outs = Outputs("Out"); + + std::vector epmap = Attr>("epmap"); + + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + auto& ctx = *pool.Get(place); + + auto client_var_name = Output("RPCClient"); + PADDLE_ENFORCE_NOT_NULL(scope.FindVar(client_var_name), + "Can not find variable '%s' in the scope.", + client_var_name); + auto* client_var = scope.FindVar(client_var_name); + detail::RPCClient* rpc_client = client_var->GetMutable(); + + for (size_t i = 0; i < ins.size(); i++) { + if (NeedSend(scope, ins[i])) { + VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << "to get " + << outs[i] << "back"; + rpc_client->AsyncPrefetchVariable(epmap[i], ctx, scope, ins[i], + outs[i]); + } else { + VLOG(3) << "don't send no-initialied variable: " << ins[i]; + } + } + PADDLE_ENFORCE(rpc_client->Wait()); + } +}; + +class PrefetchOpMaker : public framework::OpProtoAndCheckerMaker { + public: + PrefetchOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(LoDTensor) Input Id variables to be sent").AsDuplicable(); + AddOutput("RPCClient", + "(RPCClient) The RPC client object which will be" + "initialized at most once."); + AddOutput("Out", + "(SelectedRows) result " + "to be fetched from parameter server") + .AsDuplicable(); + AddAttr>( + "epmap", + "(string vector, default 127.0.0.1:6164)" + "Server endpoints in the order of input variables for mapping") + .SetDefault({"127.0.0.1:6164"}); + AddComment(R"DOC( +Prefetch operator + +This operator will send Ids variables to listen_and_serve op at +the parameter server and fetch result back. +)DOC"); + } +}; + +class PrefetchOpVarTypeInference : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc& op_desc, + framework::BlockDesc* block) const override { + auto out_var_name = op_desc.Output("RPCClient").front(); + auto& out_var = block->FindRecursiveOrCreateVar(out_var_name); + auto var_type = framework::proto::VarType::RAW; + out_var.SetType(var_type); + } +}; + +class PrefetchOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext* ctx) const override {} +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(prefetch, ops::PrefetchOp, + paddle::framework::EmptyGradOpMaker, ops::PrefetchOpMaker, + ops::PrefetchOpVarTypeInference, + ops::PrefetchOpShapeInference); diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc index 0752bd1bbd..d47f66de21 100644 --- a/paddle/fluid/operators/send_op.cc +++ b/paddle/fluid/operators/send_op.cc @@ -12,35 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" - -#include #include "paddle/fluid/operators/detail/grpc_client.h" +#include "paddle/fluid/operators/send_recv_util.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { -static bool NeedSend(const framework::Scope& scope, - const std::string& varname) { - auto* var = scope.FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", - varname); - if (var->IsType()) { - return var->Get().IsInitialized(); - } else if (var->IsType()) { - return var->Get().rows().size() > 0UL; - } else { - PADDLE_THROW( - "Variable type in send side should be in " - "[LodTensor, SelectedRows]"); - } - return false; -} class SendOp : public framework::OperatorBase { public: diff --git a/paddle/fluid/operators/send_recv_util.h b/paddle/fluid/operators/send_recv_util.h new file mode 100644 index 0000000000..196f56f634 --- /dev/null +++ b/paddle/fluid/operators/send_recv_util.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +namespace paddle { +namespace operators { + +inline bool NeedSend(const framework::Scope& scope, + const std::string& varname) { + auto* var = scope.FindVar(varname); + PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", + varname); + if (var->IsType()) { + return var->Get().IsInitialized(); + } else if (var->IsType()) { + return var->Get().rows().size() > 0UL; + } else { + PADDLE_THROW( + "Variable type in send side should be in " + "[LodTensor, SelectedRows]"); + } + return false; +} + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/send_vars_op.cc b/paddle/fluid/operators/send_vars_op.cc index 523e9e2780..2cbd9e2394 100644 --- a/paddle/fluid/operators/send_vars_op.cc +++ b/paddle/fluid/operators/send_vars_op.cc @@ -12,34 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" - -#include #include "paddle/fluid/operators/detail/grpc_client.h" +#include "paddle/fluid/operators/send_recv_util.h" namespace paddle { namespace operators { -static bool NeedSend(const framework::Scope& scope, - const std::string& varname) { - auto* var = scope.FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", - varname); - if (var->IsType()) { - return var->Get().IsInitialized(); - } else if (var->IsType()) { - return var->Get().rows().size() > 0UL; - } else { - PADDLE_THROW( - "Variable type in send side should be in " - "[LodTensor, SelectedRows]"); - } - return false; -} class SendVarsOp : public framework::OperatorBase { public: @@ -95,7 +78,7 @@ Send operator This operator will send variables to listen_and_serve op at the parameter server. )DOC"); - AddAttr("ync_send", + AddAttr("sync_send", "(int, default 0)" "sync send or async send.") .SetDefault(0);