Feature/mem opt pass refactor (#18735)

* first version memory optimize pass, test=develop

* remove move_tensor_sharing_pass, test=develop

* refine code comments, add unittests, test=develop

* turn off memory_optimize by default, test=develop

* follow huihuang's comments, test=develop

* follow chengduoZH's comments, test=develop

* fix grammar error, add const qualifier, fix pass_test exception message, test=develop

* follow chengduoZH's comments 2nd, test=develop
DDDivano-patch-1
Zeng Jinle 6 years ago committed by GitHub
parent c5f47c2107
commit a802da650b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -3,7 +3,10 @@ cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context
cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory)
cc_library(share_tensor_buffer_functor SRCS share_tensor_buffer_functor.cc DEPS framework_proto scope place operator op_registry)
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(share_tensor_buffer_op_handle SRCS share_tensor_buffer_op_handle.cc DEPS op_handle_base scope computation_op_handle share_tensor_buffer_functor)
cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper)
@ -59,12 +62,7 @@ cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope d
cc_library(eager_deletion_op_handle SRCS eager_deletion_op_handle.cc DEPS lod_tensor selected_rows reference_count_pass_helper)
cc_library(share_tensor_buffer_op_handle SRCS share_tensor_buffer_op_handle.cc DEPS op_handle_base scope)
set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass buffer_shared_inplace_op_pass)
if (WITH_GPU)
list(APPEND SSA_GRAPH_EXECUTOR_DEPS reference_count_pass)
endif()
set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass buffer_shared_inplace_op_pass buffer_shared_cross_op_memory_reuse_pass)
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS})
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope

@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph_to_program_pass.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_print_pass.h"

@ -108,11 +108,6 @@ struct BuildStrategy {
// FLAGS_use_mkldnn=false
std::unordered_set<std::string> mkldnn_enabled_op_types_;
// FIXME(liuwei1031) disable memory_optimzie and enable_inplace in 1.4
// to open them by default, we need to solve the fetch variable issue
// TODO(liuwei1031): memory_optimize depends on kStaleProgramOpDescs,
// it is not appropriate, because kStaleProgramOpDescs will be removed in the
// near future.
bool memory_optimize_{false};
// Turn on inplace by default.

@ -108,6 +108,8 @@ class OpHandleBase {
ir::Node *Node() { return node_; }
const ir::Node *Node() const { return node_; }
void SetLocalExecScopes(
const std::unordered_map<Scope *, Scope *> &scope_map);

@ -0,0 +1,126 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/share_tensor_buffer_functor.h"
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
namespace details {
// TODO(zjl): support SelectedRows
static inline const Tensor &GetTensorFromVar(const Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>();
} else {
PADDLE_THROW("Variable must be type of LoDTensor");
}
}
static inline Tensor *GetMutableTensorFromVar(Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->GetMutable<LoDTensor>();
} else {
PADDLE_THROW("Variable must be type of LoDTensor");
}
}
ShareTensorBufferFunctor::ShareTensorBufferFunctor(
Scope *scope, size_t scope_idx, const std::string &op_type,
const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
const std::vector<std::string> &out_var_names)
: scope_(scope),
scope_idx_(scope_idx),
op_type_(op_type),
in_var_infos_(in_var_infos),
out_var_names_(out_var_names) {
PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size());
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
AddReuseVarPair(in_var_infos_[i], out_var_names_[i]);
}
}
std::unordered_map<std::string, std::string>
ShareTensorBufferFunctor::ReusedVars() const {
std::unordered_map<std::string, std::string> result;
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
result.insert({in_var_infos_[i]->Name(), out_var_names_[i]});
}
return result;
}
void ShareTensorBufferFunctor::AddReuseVarPair(
const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name) {
PADDLE_ENFORCE_NOT_NULL(in_var_info, "in_var_info cannot be nullptr");
PADDLE_ENFORCE_NE(in_var_info->Name(), out_var_name,
"in/out cannot have same name: %s", out_var_name);
in_var_infos_.emplace_back(in_var_info);
out_var_names_.emplace_back(out_var_name);
}
void ShareTensorBufferFunctor::CallOnce() {
PADDLE_ENFORCE(in_out_vars_.empty(), "in_out_vars_ must be initialized here");
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
auto *in_var = exec_scope_->FindVar(in_var_infos_[i]->Name());
auto *out_var = exec_scope_->FindVar(out_var_names_[i]);
PADDLE_ENFORCE_NOT_NULL(in_var);
PADDLE_ENFORCE_NOT_NULL(out_var);
PADDLE_ENFORCE_NE(in_var, out_var);
in_out_vars_.emplace_back(in_var, out_var);
}
}
void ShareTensorBufferFunctor::operator()(Scope *exec_scope) {
if (!exec_scope_) {
PADDLE_ENFORCE_NOT_NULL(exec_scope);
exec_scope_ = exec_scope;
CallOnce();
} else {
PADDLE_ENFORCE(exec_scope_ == exec_scope, "Scope must be the same");
}
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
const auto &in_tensor = GetTensorFromVar(in_out_vars_[i].first);
auto *out_tensor = GetMutableTensorFromVar(in_out_vars_[i].second);
auto *in_var_info = in_var_infos_[i];
if (UNLIKELY(in_var_info->IsSkipped())) {
// If in_var is inplaced in the previous batch and we want to fetch
// in_var in the current batch, we have to reset memory of out_var
// to avoid wrong calculation result.
if (in_tensor.Holder() == out_tensor->Holder()) {
VLOG(1) << "Clear " << out_var_names_[i]
<< " because you may want to fetch an inplaced variable "
<< in_var_info->Name()
<< " in previous batch: " << in_var_info->Name() << " -> "
<< out_var_names_[i];
out_tensor->clear();
}
} else {
out_tensor->ShareBufferWith(in_tensor);
VLOG(2) << "Share tensor buffer when running " << op_type_ << " : "
<< in_var_info->Name() << " -> " << out_var_names_[i];
}
}
}
} // namespace details
} // namespace framework
} // namespace paddle

@ -0,0 +1,73 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
namespace paddle {
namespace framework {
namespace details {
// NOTE(paddle-dev): ShareTensorBufferFunctor is responsible for
// performing memory reuse in run-time. ShareTensorBufferOpHandle
// is only a wrapper of ShareTensorBufferFunctor.
// Once we find the run-time memory reuse strategy is time-consuming in
// scheduling, we should need a pass to move ShareTensorBufferFunctor into
// each ComputationOpHandle. ShareTensorBufferFunctor is preserved for
// this probable movement.
class ShareTensorBufferFunctor {
public:
ShareTensorBufferFunctor(
Scope *scope, size_t scope_idx, const std::string &op_type,
const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
const std::vector<std::string> &out_var_names);
void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info,
const std::string &out_var_name);
void operator()(Scope *exec_scope);
std::unordered_map<std::string, std::string> ReusedVars() const;
size_t GetScopeIdx() const { return scope_idx_; }
Scope *GetScope() { return scope_; }
private:
void CallOnce();
private:
Scope *scope_;
Scope *exec_scope_{nullptr};
size_t scope_idx_;
std::string op_type_;
std::vector<const ir::MemOptVarInfo *> in_var_infos_;
std::vector<std::string> out_var_names_;
std::vector<std::pair<const Variable *, Variable *>> in_out_vars_;
};
} // namespace details
} // namespace framework
} // namespace paddle

@ -25,55 +25,42 @@ namespace paddle {
namespace framework {
namespace details {
// TODO(zjl): support SelectedRows
static inline const Tensor &GetTensorFromVar(const Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>();
} else {
PADDLE_THROW("Variable must be type of LoDTensor");
}
}
ComputationOpHandle *GetUniquePendingComputationOpHandle(
ShareTensorBufferOpHandle *share_tensor_op) {
ComputationOpHandle *result_op = nullptr;
for (ir::Node *out_var : share_tensor_op->Node()->outputs) {
for (ir::Node *pending_op : out_var->outputs) {
auto &op = pending_op->Wrapper<OpHandleBase>();
auto *compute_op = dynamic_cast<ComputationOpHandle *>(&op);
PADDLE_ENFORCE_NOT_NULL(compute_op);
static inline Tensor *GetMutableTensorFromVar(Variable *var) {
if (var->IsType<LoDTensor>()) {
return var->GetMutable<LoDTensor>();
} else {
PADDLE_THROW("Variable must be type of LoDTensor");
if (result_op == nullptr) {
result_op = compute_op;
} else {
PADDLE_ENFORCE_EQ(result_op, compute_op);
}
}
}
PADDLE_ENFORCE_NOT_NULL(result_op);
return result_op;
}
ShareTensorBufferOpHandle::ShareTensorBufferOpHandle(
ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type,
const std::vector<ir::MemOptVarInfo *> &in_var_infos,
const std::vector<const ir::MemOptVarInfo *> &in_var_infos,
const std::vector<std::string> &out_var_names)
: OpHandleBase(node),
scope_(scope),
scope_idx_(scope_idx),
op_type_(op_type),
in_var_infos_(in_var_infos),
out_var_names_(out_var_names) {
PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size());
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
Add(in_var_infos_[i], out_var_names_[i]);
}
}
functor_(scope, scope_idx, op_type, in_var_infos, out_var_names) {}
std::unordered_set<std::string> ShareTensorBufferOpHandle::ReusedVarSet()
const {
std::unordered_set<std::string> result;
for (auto &in_var_info : in_var_infos_) {
result.insert(in_var_info->Name());
}
return result;
std::unordered_map<std::string, std::string>
ShareTensorBufferOpHandle::ReusedVars() const {
return functor_.ReusedVars();
}
void ShareTensorBufferOpHandle::Add(ir::MemOptVarInfo *in_var_info,
const std::string &out_var_name) {
PADDLE_ENFORCE_NOT_NULL(in_var_info, "in_var_info cannot be nullptr");
PADDLE_ENFORCE_NE(in_var_info->Name(), out_var_name,
"in/out cannot have same name: %s", out_var_name);
in_var_infos_.emplace_back(in_var_info);
out_var_names_.emplace_back(out_var_name);
void ShareTensorBufferOpHandle::AddReuseVarPair(
const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name) {
functor_.AddReuseVarPair(in_var_info, out_var_name);
}
void ShareTensorBufferOpHandle::InitCUDA() {
@ -84,49 +71,7 @@ void ShareTensorBufferOpHandle::InitCUDA() {
#endif
}
void ShareTensorBufferOpHandle::CallOnce() {
PADDLE_ENFORCE(in_out_vars_.empty(), "in_out_vars_ must be initialized here");
Scope *exec_scope = local_exec_scopes_[0];
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
auto *in_var = exec_scope->FindVar(in_var_infos_[i]->Name());
auto *out_var = exec_scope->FindVar(out_var_names_[i]);
PADDLE_ENFORCE_NOT_NULL(in_var);
PADDLE_ENFORCE_NOT_NULL(out_var);
PADDLE_ENFORCE_NE(in_var, out_var);
in_out_vars_.emplace_back(in_var, out_var);
}
}
void ShareTensorBufferOpHandle::RunImpl() {
if (in_var_infos_.size() != in_out_vars_.size()) {
CallOnce();
}
for (size_t i = 0; i < in_var_infos_.size(); ++i) {
const auto &in_tensor = GetTensorFromVar(in_out_vars_[i].first);
auto *out_tensor = GetMutableTensorFromVar(in_out_vars_[i].second);
auto *in_var_info = in_var_infos_[i];
if (UNLIKELY(in_var_info->IsSkipped())) {
// If in_var is inplaced in the previous batch and we want to fetch
// in_var in the current batch, we have to reset memory of out_var
// to avoid wrong calculation result.
if (in_tensor.Holder() == out_tensor->Holder()) {
VLOG(1) << "Clear " << out_var_names_[i]
<< " because you may want to fetch an inplaced variable "
<< in_var_info->Name()
<< " in previous batch: " << in_var_info->Name() << " -> "
<< out_var_names_[i];
out_tensor->clear();
}
} else {
out_tensor->ShareBufferWith(in_tensor);
VLOG(2) << "Share tensor buffer when running " << op_type_ << " : "
<< in_var_info->Name() << " -> " << out_var_names_[i];
}
}
}
void ShareTensorBufferOpHandle::RunImpl() { functor_(local_exec_scopes_[0]); }
} // namespace details
} // namespace framework

@ -14,22 +14,15 @@
#pragma once
#include <string>
#include <unordered_set>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/details/share_tensor_buffer_functor.h"
namespace paddle {
namespace framework {
class Variable;
class Scope;
class Tensor;
namespace ir {
class MemOptVarInfo;
} // namespace ir
namespace details {
class ShareTensorBufferOpHandle : public OpHandleBase {
@ -37,16 +30,19 @@ class ShareTensorBufferOpHandle : public OpHandleBase {
ShareTensorBufferOpHandle(
ir::Node *node, Scope *scope, size_t scope_idx,
const std::string &op_type,
const std::vector<ir::MemOptVarInfo *> &in_vars_infos,
const std::vector<const ir::MemOptVarInfo *> &in_vars_infos,
const std::vector<std::string> &out_var_names);
std::unordered_set<std::string> ReusedVarSet() const;
std::unordered_map<std::string, std::string> ReusedVars() const;
Priority GetPriority() const override { return Priority::kHighest; }
size_t GetScopeIdx() const { return scope_idx_; }
size_t GetScopeIdx() const { return functor_.GetScopeIdx(); }
void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info,
const std::string &out_var_name);
void Add(ir::MemOptVarInfo *in_var_info, const std::string &ou_var_name);
const ShareTensorBufferFunctor &Functor() const { return functor_; }
protected:
std::string Name() const override { return "buffer_share"; }
@ -55,20 +51,17 @@ class ShareTensorBufferOpHandle : public OpHandleBase {
void InitCUDA() override;
std::vector<Scope *> GetLocalScopes() override { return {scope_}; }
std::vector<Scope *> GetLocalScopes() override {
return {functor_.GetScope()};
}
private:
void CallOnce();
Scope *scope_;
size_t scope_idx_;
std::string op_type_;
std::vector<ir::MemOptVarInfo *> in_var_infos_;
std::vector<std::string> out_var_names_;
std::vector<std::pair<const Variable *, Variable *>> in_out_vars_;
ShareTensorBufferFunctor functor_;
};
ComputationOpHandle *GetUniquePendingComputationOpHandle(
ShareTensorBufferOpHandle *share_tensor_op);
} // namespace details
} // namespace framework
} // namespace paddle

@ -21,6 +21,7 @@
#include <utility>
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"
namespace paddle {
@ -74,12 +75,16 @@ struct VarHandleBase {
OpHandleBase* GeneratedOp() { return generated_op_; }
const OpHandleBase* GeneratedOp() const { return generated_op_; }
const std::unordered_set<OpHandleBase*>& PendingOps() const {
return pending_ops_;
}
ir::Node* Node() { return node_; }
const ir::Node* Node() const { return node_; }
protected:
// The operator who generate this variable. nullptr if the variable
// is a root node.
@ -96,6 +101,9 @@ struct VarHandleBase {
//
// NOTE: runtime variables have place.
struct VarHandle : public VarHandleBase {
DISABLE_COPY_AND_ASSIGN(VarHandle);
public:
virtual ~VarHandle();
std::string DebugString() const override;

@ -19,6 +19,7 @@
#include <vector>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"

@ -22,3 +22,4 @@ cc_library(record_skip_memory_opt_vars_pass SRCS record_skip_memory_opt_vars_pas
cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle multi_devices_helper graph pass)
cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass)
cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass)

@ -50,11 +50,11 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const {
for (auto &pair : each_scope_ops) {
// If variable has more than 1 last lived ops, this variable cannot
// be inplaced.
if (pair.second.size() != 1) {
if (pair.second.ops().size() != 1) {
continue;
}
auto *op = *(pair.second.begin());
auto *op = *(pair.second.ops().begin());
const std::string &op_type = op->GetOp()->Type();
const framework::OpDesc *op_desc = op->Node()->Op();
PADDLE_ENFORCE_NOT_NULL(op_desc);
@ -141,7 +141,7 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const {
<< out_var_handle_ptr->Name()
<< ". Debug String is: " << op->GetOp()->DebugString();
} else {
VLOG(4) << "Inplace failed in op " << op_type << ": "
VLOG(3) << "Inplace failed in op " << op_type << ": "
<< in_var_handle_ptr->Name() << " -> "
<< out_var_handle_ptr->Name();
}

@ -205,7 +205,7 @@ void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const {
for (auto &var_ops_map : last_live_ops) {
for (auto &var_ops_pair : var_ops_map) {
const std::string &var_name = var_ops_pair.first;
for (auto *op : var_ops_pair.second) {
for (auto *op : var_ops_pair.second.ops()) {
op_vars_map[op].insert(var_name);
}
}

@ -19,6 +19,7 @@
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.h"
#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_info.h"

@ -36,8 +36,6 @@ namespace ir {
constexpr char kMemOptSkipVars[] = "@MEM_OPT_SKIP_VARS@";
typedef std::unordered_set<std::string> MemOptSkipVars;
constexpr char kUseCuda[] = "use_cuda";
std::vector<ir::Node*> SortOpLikeDescOrder(const ir::Graph& graph);
// NOTE(dzh): A ordered set for node reuse in memory optimize.

@ -81,18 +81,26 @@ class MemoryReusePass : public Pass {
bool TryReuseVar(details::VarHandle *in_var,
details::VarHandle *out_var) const;
std::unordered_set<ir::Node *> FindNodesByName(
const std::string &name, const std::vector<ir::Node *> &nodes) const;
bool IsInVarReusable(const details::VarHandle &in_var) const;
bool IsOutVarReusable(const details::VarHandle &out_var) const;
std::unordered_set<Node *> FindNodesByName(
const std::string &name, const std::vector<Node *> &nodes) const;
size_t ScopeNum() const { return all_vars_->size(); }
int64_t GetMemorySize(const details::VarHandle &var) const;
private:
VarDesc *GetVarDesc(details::VarHandle *var) const;
VarDesc *GetVarDesc(const details::VarHandle &var) const;
bool IsVarsReusable(details::VarHandle *in_var,
details::VarHandle *out_var) const;
bool IsVarPairReusable(const details::VarHandle &in_var,
const details::VarHandle &out_var) const;
bool IsVarAlreadyReused(details::VarHandle *var) const;
bool IsInVarAlreadyReused(const details::VarHandle &in_var) const;
bool IsOutVarAlreadyReused(const details::VarHandle &out_var) const;
details::ShareTensorBufferOpHandle *InsertShareTensorBufferOpHandleToGraph(
details::ComputationOpHandle *op) const;
@ -110,15 +118,19 @@ class MemoryReusePass : public Pass {
private:
mutable Graph *graph_;
mutable bool use_cuda_;
mutable details::GraphVars *all_vars_;
mutable MemOptVarInfoMapList *var_infos_;
mutable std::vector<LastLiveOpsOfVars> *last_live_ops_of_vars_;
mutable std::unordered_map<details::ComputationOpHandle *,
details::ShareTensorBufferOpHandle *>
ops_;
mutable std::vector<std::unordered_set<std::string>> reused_var_names_;
mutable std::vector<std::unordered_set<std::string>> reused_in_var_names_;
mutable std::vector<std::unordered_set<std::string>> reused_out_var_names_;
mutable std::vector<std::unordered_map<std::string, VarDesc *>> var_descs_;
};

@ -66,6 +66,24 @@ const std::unordered_set<details::OpHandleBase *> &OpGraphView::PendingOps(
return pending_ops_.at(op);
}
const std::unordered_set<details::OpHandleBase *> &OpGraphView::PrecedingOps(
details::OpHandleBase *op) const {
EnforceHasOp(op);
return preceding_ops_.at(op);
}
std::unordered_map<details::OpHandleBase *, size_t>
OpGraphView::GetPrecedingDepNum() const {
std::unordered_map<details::OpHandleBase *, size_t> result;
result.reserve(preceding_ops_.size());
for (auto &pair : preceding_ops_) {
result.emplace(pair.first, pair.second.size());
}
return result;
}
size_t OpGraphView::OpNumber() const { return preceding_ops_.size(); }
} // namespace ir
} // namespace framework
} // namespace paddle

@ -33,13 +33,24 @@ class OpGraphView {
const std::unordered_set<details::OpHandleBase *> &PendingOps(
details::OpHandleBase *op) const;
const std::unordered_set<details::OpHandleBase *> &PrecedingOps(
details::OpHandleBase *op) const;
std::unordered_map<details::OpHandleBase *, size_t> GetPrecedingDepNum()
const;
bool HasOp(details::OpHandleBase *op) const;
size_t OpNumber() const;
// Use a visitor to visit all pending ops of op
// Stop when callback returns false
template <typename Callback>
bool VisitAllPendingOps(details::OpHandleBase *op, Callback &&callback) const;
template <typename Callback>
void BreadthFirstVisit(Callback &&callback) const;
private:
void Build(const std::vector<details::OpHandleBase *> &ops);
void EnforceHasOp(details::OpHandleBase *op) const;
@ -75,6 +86,52 @@ bool OpGraphView::VisitAllPendingOps(details::OpHandleBase *op,
return true;
}
template <typename Callback>
void OpGraphView::BreadthFirstVisit(Callback &&callback) const {
auto op_deps = GetPrecedingDepNum();
size_t op_num = op_deps.size();
std::unordered_set<details::OpHandleBase *> visited_ops;
std::queue<details::OpHandleBase *> ready_ops;
size_t num_calls = 0;
for (auto iter = op_deps.begin(); iter != op_deps.end();) {
if (iter->second != 0) {
++iter;
continue;
}
visited_ops.insert(iter->first);
ready_ops.push(iter->first);
callback(iter->first);
++num_calls;
op_deps.erase(iter++);
}
while (!ready_ops.empty()) {
auto *cur_op = ready_ops.front();
ready_ops.pop();
auto &pending_ops = PendingOps(cur_op);
for (auto *pending_op : pending_ops) {
if (visited_ops.count(pending_op) > 0) {
continue;
}
if (--op_deps.at(pending_op) == 0) {
visited_ops.insert(pending_op);
op_deps.erase(pending_op);
ready_ops.push(pending_op);
callback(pending_op);
++num_calls;
}
}
}
PADDLE_ENFORCE_EQ(num_calls, op_num, "There are unvisited ops");
PADDLE_ENFORCE_EQ(visited_ops.size(), op_num, "There are unvisited ops");
PADDLE_ENFORCE(op_deps.empty(), "There are unvisited ops");
}
} // namespace ir
} // namespace framework
} // namespace paddle

@ -346,6 +346,8 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const {
// Seldomly, some vars may have no pending or preceding computation ops
// Just break;
if (status == LastLiveOpSearchStatus::kFailure) {
VLOG(1) << "Cannot find last live ops of variable " << var_name
<< " in scope " << (*iter)->scope_idx();
break;
}
@ -362,7 +364,9 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const {
VLOG(10) << "Extract " << result.size() << " ops of var " << var_name;
var_infos[i][var_name].reset(
new MemOptVarInfo(var_name, result.size()));
last_live_ops_of_vars[i].emplace(var_name, std::move(result));
auto &last_live_ops_of_var = last_live_ops_of_vars[i][var_name];
last_live_ops_of_var.set_var(*iter);
*(last_live_ops_of_var.mutable_ops()) = std::move(result);
break;
}

@ -39,10 +39,28 @@ using GarbageCollectorMap =
const char kMemOptVarInfoMapList[] = "mem_opt_var_info_map_list";
const char kGarbageCollector[] = "garbage_collector";
const char kAllPlaces[] = "all_places";
const char kUseCuda[] = "use_cuda";
using LastLiveOpsOfVars =
std::unordered_map<std::string,
std::unordered_set<details::ComputationOpHandle *>>;
class LastLiveOpOfVarInfo {
public:
details::VarHandle *var() { return var_; }
void set_var(details::VarHandle *var) { var_ = var; }
const std::unordered_set<details::ComputationOpHandle *> &ops() const {
return ops_;
}
std::unordered_set<details::ComputationOpHandle *> *mutable_ops() {
return &ops_;
}
private:
details::VarHandle *var_{nullptr};
std::unordered_set<details::ComputationOpHandle *> ops_;
};
using LastLiveOpsOfVars = std::unordered_map<std::string, LastLiveOpOfVarInfo>;
const char kLastLiveOpsOfVars[] = "last_live_ops_of_var";
VarDesc *TryGetLatestVarDesc(const std::vector<details::VarHandle *> &vars);

@ -99,7 +99,7 @@ class Node {
// Test if the Node is wrapped by type T.
template <typename T>
bool IsWrappedBy() {
bool IsWrappedBy() const {
return std::type_index(typeid(T)) == wrapper_type_;
}

@ -36,7 +36,8 @@ Graph* Pass::Apply(Graph* graph) const {
ApplyImpl(graph);
// TODO(panyx0718): Add more verifications.
PADDLE_ENFORCE(!HasCircle(*graph),
"Illegal Pass. Generated graph shouldn't has cycle.");
"Illegal Pass %s. Generated graph shouldn't have cycle.",
Type());
PADDLE_ENFORCE(VarDescIsConsistency(*graph),
"The VarDescs of persistable variable are not consistency.");
applied_ = true;

@ -99,7 +99,7 @@ TEST(PassTest, TestPassAttrCheck) {
} catch (paddle::platform::EnforceNotMet e) {
exception = std::string(e.what());
}
ASSERT_TRUE(exception.find("shouldn't has cycle") != exception.npos);
ASSERT_TRUE(exception.find("shouldn't have cycle") != exception.npos);
}
} // namespace ir

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save