|
|
@ -19,6 +19,7 @@
|
|
|
|
#include "paddle/fluid/framework/details/computation_op_handle.h"
|
|
|
|
#include "paddle/fluid/framework/details/computation_op_handle.h"
|
|
|
|
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
|
|
|
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
|
|
|
#include "paddle/fluid/framework/details/reference_count_pass.h"
|
|
|
|
#include "paddle/fluid/framework/details/reference_count_pass.h"
|
|
|
|
|
|
|
|
#include "paddle/fluid/framework/ir/graph_helper.h"
|
|
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
namespace paddle {
|
|
|
|
namespace framework {
|
|
|
|
namespace framework {
|
|
|
@ -71,14 +72,13 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
|
|
|
|
// Step 2: Find all variables in non-computation ops which refers to variables
|
|
|
|
// Step 2: Find all variables in non-computation ops which refers to variables
|
|
|
|
// in computation ops
|
|
|
|
// in computation ops
|
|
|
|
std::unordered_set<std::string> names;
|
|
|
|
std::unordered_set<std::string> names;
|
|
|
|
std::unordered_map<OpHandleBase *, std::unique_ptr<ReferenceCountOpHandle>>
|
|
|
|
std::unordered_map<OpHandleBase *, ReferenceCountOpHandle *>
|
|
|
|
compute_ref_cnt_map;
|
|
|
|
compute_ref_cnt_map;
|
|
|
|
|
|
|
|
|
|
|
|
auto get_ref_cnts_from_compute_op = [&](
|
|
|
|
auto get_ref_cnts_from_compute_op = [&](
|
|
|
|
const std::unique_ptr<OpHandleBase> &op,
|
|
|
|
OpHandleBase *op, const std::vector<VarHandleBase *> &vars) {
|
|
|
|
const std::vector<VarHandleBase *> &vars) {
|
|
|
|
|
|
|
|
std::vector<std::string> var_names_in_op;
|
|
|
|
std::vector<std::string> var_names_in_op;
|
|
|
|
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op.get());
|
|
|
|
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
|
|
|
|
if (compute_op == nullptr ||
|
|
|
|
if (compute_op == nullptr ||
|
|
|
|
!platform::is_gpu_place(compute_op->GetPlace()))
|
|
|
|
!platform::is_gpu_place(compute_op->GetPlace()))
|
|
|
|
return var_names_in_op;
|
|
|
|
return var_names_in_op;
|
|
|
@ -121,9 +121,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
auto update_ref_cnts_from_non_compute_op = [&](
|
|
|
|
auto update_ref_cnts_from_non_compute_op = [&](
|
|
|
|
const std::unique_ptr<OpHandleBase> &op,
|
|
|
|
OpHandleBase *op, const std::vector<VarHandleBase *> &vars) {
|
|
|
|
const std::vector<VarHandleBase *> &vars) {
|
|
|
|
if (dynamic_cast<ComputationOpHandle *>(op) != nullptr) return;
|
|
|
|
if (dynamic_cast<ComputationOpHandle *>(op.get()) != nullptr) return;
|
|
|
|
|
|
|
|
for (VarHandleBase *var_handle_base : vars) {
|
|
|
|
for (VarHandleBase *var_handle_base : vars) {
|
|
|
|
auto *var_handle = dynamic_cast<VarHandle *>(var_handle_base);
|
|
|
|
auto *var_handle = dynamic_cast<VarHandle *>(var_handle_base);
|
|
|
|
if (var_handle == nullptr || !var_handle->Node()->IsVar()) continue;
|
|
|
|
if (var_handle == nullptr || !var_handle->Node()->IsVar()) continue;
|
|
|
@ -151,21 +150,21 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
|
|
|
|
ref_cnt_node, next_compute_op->GetScope(), place, {var_name},
|
|
|
|
ref_cnt_node, next_compute_op->GetScope(), place, {var_name},
|
|
|
|
gcs[place.device].get(), cur_ref_cnts[place.device].get());
|
|
|
|
gcs[place.device].get(), cur_ref_cnts[place.device].get());
|
|
|
|
AddDependencyBetween(next_compute_op, ref_cnt_handle, graph.get());
|
|
|
|
AddDependencyBetween(next_compute_op, ref_cnt_handle, graph.get());
|
|
|
|
compute_ref_cnt_map[next_compute_op].reset(ref_cnt_handle);
|
|
|
|
compute_ref_cnt_map[next_compute_op] = ref_cnt_handle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
auto &all_ops = graph->Get<GraphOps>(kGraphOps);
|
|
|
|
auto all_ops = ir::FilterByNodeWrapper<OpHandleBase>(*graph);
|
|
|
|
for (auto &op : all_ops) {
|
|
|
|
for (auto &op : all_ops) {
|
|
|
|
auto in_var_names = get_ref_cnts_from_compute_op(op, op->Inputs());
|
|
|
|
auto in_var_names = get_ref_cnts_from_compute_op(op, op->Inputs());
|
|
|
|
auto out_var_names = get_ref_cnts_from_compute_op(op, op->Outputs());
|
|
|
|
auto out_var_names = get_ref_cnts_from_compute_op(op, op->Outputs());
|
|
|
|
if (in_var_names.empty() && out_var_names.empty()) continue;
|
|
|
|
if (in_var_names.empty() && out_var_names.empty()) continue;
|
|
|
|
in_var_names.insert(in_var_names.end(), out_var_names.begin(),
|
|
|
|
in_var_names.insert(in_var_names.end(), out_var_names.begin(),
|
|
|
|
out_var_names.end());
|
|
|
|
out_var_names.end());
|
|
|
|
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op.get());
|
|
|
|
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
|
|
|
|
auto place = boost::get<platform::CUDAPlace>(compute_op->GetPlace());
|
|
|
|
auto place = boost::get<platform::CUDAPlace>(compute_op->GetPlace());
|
|
|
|
ir::Node *ref_cnt_node =
|
|
|
|
ir::Node *ref_cnt_node =
|
|
|
|
graph->CreateEmptyNode("reference_count", ir::Node::Type::kOperation);
|
|
|
|
graph->CreateEmptyNode("reference_count", ir::Node::Type::kOperation);
|
|
|
@ -173,7 +172,7 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
|
|
|
|
ref_cnt_node, compute_op->GetScope(), place, in_var_names,
|
|
|
|
ref_cnt_node, compute_op->GetScope(), place, in_var_names,
|
|
|
|
gcs[place.device].get(), cur_ref_cnts[place.device].get());
|
|
|
|
gcs[place.device].get(), cur_ref_cnts[place.device].get());
|
|
|
|
AddDependencyBetween(compute_op, ref_cnt_handle, graph.get());
|
|
|
|
AddDependencyBetween(compute_op, ref_cnt_handle, graph.get());
|
|
|
|
compute_ref_cnt_map[compute_op].reset(ref_cnt_handle);
|
|
|
|
compute_ref_cnt_map[compute_op] = ref_cnt_handle;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (auto &op : all_ops) {
|
|
|
|
for (auto &op : all_ops) {
|
|
|
@ -181,11 +180,11 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
|
|
|
|
update_ref_cnts_from_non_compute_op(op, op->Outputs());
|
|
|
|
update_ref_cnts_from_non_compute_op(op, op->Outputs());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::unique_ptr<OpHandleBase>> new_all_ops;
|
|
|
|
std::vector<OpHandleBase *> new_all_ops;
|
|
|
|
new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size());
|
|
|
|
new_all_ops.reserve(compute_ref_cnt_map.size() + all_ops.size());
|
|
|
|
for (auto &op : all_ops) {
|
|
|
|
for (auto &op : all_ops) {
|
|
|
|
new_all_ops.emplace_back(std::move(op));
|
|
|
|
new_all_ops.emplace_back(std::move(op));
|
|
|
|
auto it = compute_ref_cnt_map.find(new_all_ops.back().get());
|
|
|
|
auto it = compute_ref_cnt_map.find(new_all_ops.back());
|
|
|
|
if (it != compute_ref_cnt_map.end()) {
|
|
|
|
if (it != compute_ref_cnt_map.end()) {
|
|
|
|
// Add LeafNode to ReferenceCountOpHandle
|
|
|
|
// Add LeafNode to ReferenceCountOpHandle
|
|
|
|
auto *dummy_leaf = new DummyVarHandle(graph->CreateControlDepVar());
|
|
|
|
auto *dummy_leaf = new DummyVarHandle(graph->CreateControlDepVar());
|
|
|
|