diff --git a/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc b/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc index 8e8258ffb1..58ec427859 100644 --- a/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc +++ b/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.cc @@ -12,17 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h" #include #include +#include #include #include - #include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_registry.h" -DEFINE_uint32(fuse_parameter_memory_size, 0, // 0 KB +DEFINE_uint64(fuse_parameter_memory_size, 0, // 0 KB "fuse_parameter_memory_size is up limited memory size " "of one group parameters' gradient which is the input " "of communication calling(e.g NCCLAllReduce). " @@ -40,355 +41,365 @@ DEFINE_int32( namespace paddle { namespace framework { namespace details { +// SetFuseParameterGroupsSize and SetFuseParameterMemorySize are used in unit +// test, because it is invalid that seting 'FLAGS_fuse_parameter_memory_size' +// and 'FLAGS_fuse_parameter_groups_size' in unit test. +void SetFuseParameterGroupsSize(int group_size) { + FLAGS_fuse_parameter_groups_size = group_size; +} -static const char kUnKnow[] = "@UNKNOW@"; -static framework::proto::VarType::Type kDefaultDtype = - framework::proto::VarType::Type::VarType_Type_BOOL; +int GetFuseParameterGroupsSize() { return FLAGS_fuse_parameter_groups_size; } -class AllocContinuousSpaceForGradPass : public ir::Pass { - protected: - void ApplyImpl(ir::Graph *graph) const override { - ir::Graph &result = *graph; +void SetFuseParameterMemorySize(uint64_t memory_size) { + FLAGS_fuse_parameter_memory_size = memory_size; +} - auto &places = Get>(kPlaces); - auto &local_scopes = Get>(kLocalScopes); +uint64_t GetFuseParameterMemorySize() { + return FLAGS_fuse_parameter_memory_size; +} - ResetAttribute(kParamsAndGrads, &result); - ResetAttribute(kGroupGradsAndParams, &result); +static const char kUnKnow[] = "@UNKNOW@"; +static framework::proto::VarType::Type kDefaultDtype = + framework::proto::VarType::Type::VarType_Type_BOOL; - // NOTE: The operator nodes should be in topology order. - std::vector topo_nodes = ir::TopologySortOperations(result); - auto ¶ms_grads = result.Get(kParamsAndGrads); - for (auto &node : topo_nodes) { - RecordParamsAndGrads(node, ¶ms_grads); - } +void AllocContinuousSpaceForGradPass::ApplyImpl(ir::Graph *graph) const { + ir::Graph &result = *graph; - if (params_grads.size() == 0) { - VLOG(10) << "Doesn't find gradients"; - return; - } + auto &places = Get>(kPlaces); + auto &local_scopes = Get>(kLocalScopes); - std::unordered_map vars; - for (ir::Node *node : result.Nodes()) { - if (node->IsVar() && node->Var()) { - // Note: The graph may have the same name node. For example, parameter - // is the input of operator and it also is the output of optimizer; - vars.emplace(node->Var()->Name(), node); - } - } + ResetAttribute(kParamsAndGrads, &result); + ResetAttribute(kGroupGradsAndParams, &result); - auto &group_grads_params = - result.Get(kGroupGradsAndParams); + // NOTE: The operator nodes should be in topology order. + std::vector topo_nodes = ir::TopologySortOperations(result); + auto ¶ms_grads = result.Get(kParamsAndGrads); + for (auto &node : topo_nodes) { + RecordParamsAndGrads(node, ¶ms_grads); + } - // Note: the order of params_grads may be changed by SetGroupGradsAndParams. - SetGroupGradsAndParams(vars, params_grads, &group_grads_params); + if (params_grads.size() == 0) { + VLOG(10) << "Doesn't find gradients"; + return; + } - params_grads.clear(); - for (auto &group_p_g : group_grads_params) { - params_grads.insert(params_grads.begin(), group_p_g.begin(), - group_p_g.end()); - } - for (auto &p_g : params_grads) { - std::swap(p_g.first, p_g.second); + std::unordered_map vars; + for (ir::Node *node : result.Nodes()) { + if (node->IsVar() && node->Var()) { + // Note: The graph may have the same name node. For example, parameter + // is the input of operator and it also is the output of optimizer; + vars.emplace(node->Var()->Name(), node); } + } - // Set Gradients as Persistable to prevent this var becoming reusable. - auto dtype = kDefaultDtype; - for (auto &p_g : params_grads) { - // Get gradient var - auto iter = vars.find(p_g.second); - PADDLE_ENFORCE(iter != vars.end(), "%s is not found.", p_g.second); - iter->second->Var()->SetPersistable(true); - - PADDLE_ENFORCE(IsSupportedVarType(iter->second->Var()->GetType())); + auto &group_grads_params = + result.Get(kGroupGradsAndParams); - // Get Dtype - auto ele_dtype = iter->second->Var()->GetDataType(); - if (dtype == kDefaultDtype) { - dtype = ele_dtype; - PADDLE_ENFORCE_NE(ele_dtype, kDefaultDtype, - "The data type should not be bool."); - } - PADDLE_ENFORCE_EQ(ele_dtype, dtype, - "The data type of input is not consistent."); - } + // Note: the order of params_grads may be changed by SetGroupGradsAndParams. + SetGroupGradsAndParams(vars, params_grads, &group_grads_params); - // Create a FusedVarsSet to avoid duplicating names for fused_var in other - // pass. - if (!result.Has(kFusedVars)) { - result.Set(kFusedVars, new FusedVars); - } - // the kFusedGrads is used be fuse_optimizer_op_pass. - result.Set(kFusedGrads, new FusedGrads); - - // the fused_var_name should be unique, so it appends - // params_grads.begin()->second. - auto fused_var_name = std::string(kFusedVarNamePrefix) + "@GRAD@" + - params_grads.begin()->second; - result.Get(kFusedGrads) = fused_var_name; - auto &fused_var_set = result.Get(kFusedVars); - PADDLE_ENFORCE_EQ(fused_var_set.count(fused_var_name), 0, - "%s is duplicate in FusedVars.", fused_var_name); - fused_var_set.insert(fused_var_name); - - InitFusedVarsAndAllocSpaceForVars(places, local_scopes, vars, - fused_var_name, params_grads); + params_grads.clear(); + for (auto &group_p_g : group_grads_params) { + params_grads.insert(params_grads.begin(), group_p_g.begin(), + group_p_g.end()); + } + for (auto &p_g : params_grads) { + std::swap(p_g.first, p_g.second); } - template - void ResetAttribute(const std::string &attr_name, ir::Graph *graph) const { - if (graph->Has(attr_name)) { - VLOG(10) << attr_name << " is reset."; - graph->Erase(attr_name); + // Set Gradients as Persistable to prevent this var becoming reusable. + auto dtype = kDefaultDtype; + for (auto &p_g : params_grads) { + // Get gradient var + auto iter = vars.find(p_g.second); + PADDLE_ENFORCE(iter != vars.end(), "%s is not found.", p_g.second); + iter->second->Var()->SetPersistable(true); + + PADDLE_ENFORCE(IsSupportedVarType(iter->second->Var()->GetType())); + + // Get Dtype + auto ele_dtype = iter->second->Var()->GetDataType(); + if (dtype == kDefaultDtype) { + dtype = ele_dtype; + PADDLE_ENFORCE_NE(ele_dtype, kDefaultDtype, + "The data type should not be bool."); } - graph->Set(attr_name, new AttrType); + PADDLE_ENFORCE_EQ(ele_dtype, dtype, + "The data type of input is not consistent."); } - void SetGroupGradsAndParams( - const std::unordered_map &var_nodes, - const ParamsAndGrads ¶ms_grads, - GroupGradsAndParams *group_grads_params) const { - SetGroupAccordingToLayers(var_nodes, params_grads, group_grads_params); - SetGroupAccordingToMemorySize(var_nodes, group_grads_params); - SetGroupAccordingToGroupSize(var_nodes, group_grads_params); + // Create a FusedVarsSet to avoid duplicating names for fused_var in other + // pass. + if (!result.Has(kFusedVars)) { + result.Set(kFusedVars, new FusedVars); } - - void SetGroupAccordingToLayers( - const std::unordered_map &var_nodes, - const ParamsAndGrads ¶ms_grads, - GroupGradsAndParams *group_grads_params) const { - std::unordered_map> layer_params; - - for (size_t i = 0; i < params_grads.size(); ++i) { - auto pos = params_grads[i].first.find_first_of("."); - if (pos == std::string::npos) { - layer_params[std::string(kUnKnow)].emplace_back(i); - } else { - layer_params[params_grads[i].first.substr(0, pos)].emplace_back(i); - } + // the kFusedGrads is used be fuse_optimizer_op_pass. + result.Set(kFusedGrads, new FusedGrads); + + // the fused_var_name should be unique, so it appends + // params_grads.begin()->second. + auto fused_var_name = std::string(kFusedVarNamePrefix) + "@GRAD@" + + params_grads.begin()->second; + result.Get(kFusedGrads) = fused_var_name; + auto &fused_var_set = result.Get(kFusedVars); + PADDLE_ENFORCE_EQ(fused_var_set.count(fused_var_name), 0, + "%s is duplicate in FusedVars.", fused_var_name); + fused_var_set.insert(fused_var_name); + + InitFusedVarsAndAllocSpaceForVars(places, local_scopes, vars, fused_var_name, + params_grads); +} + +template +void AllocContinuousSpaceForGradPass::ResetAttribute( + const std::string &attr_name, ir::Graph *graph) const { + if (graph->Has(attr_name)) { + VLOG(10) << attr_name << " is reset."; + graph->Erase(attr_name); + } + graph->Set(attr_name, new AttrType); +} + +void AllocContinuousSpaceForGradPass::SetGroupGradsAndParams( + const std::unordered_map &var_nodes, + const ParamsAndGrads ¶ms_grads, + GroupGradsAndParams *group_grads_params) const { + SetGroupAccordingToLayers(var_nodes, params_grads, group_grads_params); + SetGroupAccordingToMemorySize(var_nodes, group_grads_params); + SetGroupAccordingToGroupSize(var_nodes, group_grads_params); +} + +void AllocContinuousSpaceForGradPass::SetGroupAccordingToLayers( + const std::unordered_map &var_nodes, + const ParamsAndGrads ¶ms_grads, + GroupGradsAndParams *group_grads_params) const { + std::unordered_map> layer_params; + + for (size_t i = 0; i < params_grads.size(); ++i) { + auto pos = params_grads[i].first.find_first_of("."); + if (pos == std::string::npos) { + layer_params[std::string(kUnKnow)].emplace_back(i); + } else { + layer_params[params_grads[i].first.substr(0, pos)].emplace_back(i); } + } - group_grads_params->reserve(layer_params.size()); - for (size_t i = 0; i < params_grads.size(); ++i) { - auto pos = params_grads[i].first.find_first_of("."); - std::string key = kUnKnow; - if (pos != std::string::npos) { - key = params_grads[i].first.substr(0, pos); - } - auto iter = layer_params.find(key); - if (iter == layer_params.end()) continue; - - group_grads_params->emplace_back(); - auto &local_group_grads_params = group_grads_params->back(); - for (auto &idx : iter->second) { - local_group_grads_params.emplace_back( - std::make_pair(params_grads[idx].second, params_grads[idx].first)); - } - layer_params.erase(iter); + group_grads_params->reserve(layer_params.size()); + for (size_t i = 0; i < params_grads.size(); ++i) { + auto pos = params_grads[i].first.find_first_of("."); + std::string key = kUnKnow; + if (pos != std::string::npos) { + key = params_grads[i].first.substr(0, pos); } - - VLOG(10) << "SetGroupAccordingToLayers: "; - for (size_t i = 0; i < group_grads_params->size(); ++i) { - VLOG(10) << "group " << i; - std::stringstream out; - for (auto &p_g : group_grads_params->at(i)) { - out << "(" << p_g.second << ", " << p_g.first << "), "; - } - VLOG(10) << out.str(); + auto iter = layer_params.find(key); + if (iter == layer_params.end()) continue; + + group_grads_params->emplace_back(); + auto &local_group_grads_params = group_grads_params->back(); + for (auto &idx : iter->second) { + local_group_grads_params.emplace_back( + std::make_pair(params_grads[idx].second, params_grads[idx].first)); } + layer_params.erase(iter); } - void SetGroupAccordingToMemorySize( - const std::unordered_map &var_nodes, - GroupGradsAndParams *group_grads_params) const { - if (FLAGS_fuse_parameter_memory_size == 0) { - return; + VLOG(10) << "SetGroupAccordingToLayers: "; + for (size_t i = 0; i < group_grads_params->size(); ++i) { + VLOG(10) << "group " << i; + std::stringstream out; + for (auto &p_g : group_grads_params->at(i)) { + out << "(" << p_g.second << ", " << p_g.first << "), "; } - size_t group_memory_size = - static_cast(FLAGS_fuse_parameter_memory_size); - GroupGradsAndParams local_group_grads_params; - - size_t j = 0; + VLOG(10) << out.str(); + } +} + +void AllocContinuousSpaceForGradPass::SetGroupAccordingToMemorySize( + const std::unordered_map &var_nodes, + GroupGradsAndParams *group_grads_params) const { + const uint64_t group_memory_size = GetFuseParameterMemorySize(); + if (group_memory_size == 0) { + return; + } + GroupGradsAndParams local_group_grads_params; + size_t j = 0; + while (j < group_grads_params->size()) { + local_group_grads_params.emplace_back(); + auto &group_p_g = local_group_grads_params.back(); + size_t local_group_memory_size = 0; while (j < group_grads_params->size()) { - local_group_grads_params.emplace_back(); - auto &group_p_g = local_group_grads_params.back(); - size_t local_group_memory_size = 0; - while (j < group_grads_params->size()) { - std::for_each( - group_grads_params->at(j).begin(), group_grads_params->at(j).end(), - [&local_group_memory_size, - &var_nodes](const std::pair &g_p) { - auto iter = var_nodes.find(g_p.second); - PADDLE_ENFORCE(iter != var_nodes.end(), "%s is not found.", - g_p.second); - auto shape = iter->second->Var()->GetShape(); - size_t size = - framework::SizeOfType(iter->second->Var()->GetDataType()); - std::for_each(shape.begin(), shape.end(), - [&size](const int64_t &n) { size *= n; }); - local_group_memory_size += size; - }); - group_p_g.insert(group_p_g.end(), group_grads_params->at(j).begin(), - group_grads_params->at(j).end()); - ++j; - if (local_group_memory_size >= group_memory_size) { - break; - } - } - } - - std::swap(*group_grads_params, local_group_grads_params); - - VLOG(10) << string::Sprintf( - "SetGroupAccordingToMemorySize(memory_size: %d):", - FLAGS_fuse_parameter_memory_size); - for (size_t i = 0; i < group_grads_params->size(); ++i) { - VLOG(10) << "group " << i; - std::stringstream out; - for (auto &g_p : group_grads_params->at(i)) { - auto iter = var_nodes.find(g_p.second); - PADDLE_ENFORCE(iter != var_nodes.end(), "%s is not found.", g_p.second); - auto shape = iter->second->Var()->GetShape(); - size_t size = framework::SizeOfType(iter->second->Var()->GetDataType()); - std::for_each(shape.begin(), shape.end(), - [&size](const int64_t &n) { size *= n; }); - out << string::Sprintf("(%s(%d), %s)", g_p.second, size, g_p.first); + std::for_each( + group_grads_params->at(j).begin(), group_grads_params->at(j).end(), + [&local_group_memory_size, + &var_nodes](const std::pair &g_p) { + auto iter = var_nodes.find(g_p.second); + PADDLE_ENFORCE(iter != var_nodes.end(), "%s is not found.", + g_p.second); + auto shape = iter->second->Var()->GetShape(); + size_t size = + framework::SizeOfType(iter->second->Var()->GetDataType()); + std::for_each(shape.begin(), shape.end(), + [&size](const int64_t &n) { size *= n; }); + local_group_memory_size += size; + }); + group_p_g.insert(group_p_g.end(), group_grads_params->at(j).begin(), + group_grads_params->at(j).end()); + ++j; + if (local_group_memory_size >= group_memory_size) { + break; } - VLOG(10) << out.str(); } } - void SetGroupAccordingToGroupSize( - const std::unordered_map &var_nodes, - GroupGradsAndParams *group_grads_params) const { - if (FLAGS_fuse_parameter_groups_size == 1) { - return; - } - size_t group_size = static_cast(FLAGS_fuse_parameter_groups_size); - if (FLAGS_fuse_parameter_groups_size == -1) { - group_size = group_grads_params->size(); - } - PADDLE_ENFORCE_GT(group_size, 1); - size_t groups = (group_grads_params->size() + group_size - 1) / group_size; - GroupGradsAndParams local_group_grads_params; - local_group_grads_params.reserve(groups); - - size_t j = 0; - for (size_t i = 0; i < groups; ++i) { - local_group_grads_params.emplace_back(); - auto &group_p_g = local_group_grads_params.back(); - group_p_g.reserve(group_size); - while (j < group_grads_params->size()) { - group_p_g.insert(group_p_g.end(), group_grads_params->at(j).begin(), - group_grads_params->at(j).end()); - ++j; - if (j % group_size == 0) break; - } - } - std::swap(*group_grads_params, local_group_grads_params); - - VLOG(10) << "SetGroupAccordingToGroupSize(group_size: " << group_size - << "): "; - for (size_t i = 0; i < group_grads_params->size(); ++i) { - VLOG(10) << "group " << i; - std::stringstream out; - for (auto &p_g : group_grads_params->at(i)) { - out << "(" << p_g.second << ", " << p_g.first << "), "; - } - VLOG(10) << out.str(); + std::swap(*group_grads_params, local_group_grads_params); + + VLOG(10) << string::Sprintf("SetGroupAccordingToMemorySize(memory_size: %d):", + group_memory_size); + for (size_t i = 0; i < group_grads_params->size(); ++i) { + VLOG(10) << "group " << i; + std::stringstream out; + for (auto &g_p : group_grads_params->at(i)) { + auto iter = var_nodes.find(g_p.second); + PADDLE_ENFORCE(iter != var_nodes.end(), "%s is not found.", g_p.second); + auto shape = iter->second->Var()->GetShape(); + size_t size = framework::SizeOfType(iter->second->Var()->GetDataType()); + std::for_each(shape.begin(), shape.end(), + [&size](const int64_t &n) { size *= n; }); + out << string::Sprintf("(%s(%d), %s)", g_p.second, size, g_p.first); } + VLOG(10) << out.str(); } +} - private: - bool IsSupportedVarType(const proto::VarType::Type &type) const { - // Current only support LOD_TENSOR. - return type == proto::VarType::LOD_TENSOR; +void AllocContinuousSpaceForGradPass::SetGroupAccordingToGroupSize( + const std::unordered_map &var_nodes, + GroupGradsAndParams *group_grads_params) const { + if (GetFuseParameterGroupsSize() == 1) { + return; } - - void RecordParamsAndGrads(ir::Node *node, - ParamsAndGrads *params_grads) const { - try { - bool is_bk_op = - static_cast(boost::get(node->Op()->GetAttr( - OpProtoAndCheckerMaker::OpRoleAttrName())) & - static_cast(OpRole::kBackward)); - if (!is_bk_op) return; - - // Currently, we assume that once gradient is generated, it can be - // broadcast, and each gradient is only broadcast once. - auto backward_vars = - boost::get>(node->Op()->GetNullableAttr( - OpProtoAndCheckerMaker::OpRoleVarAttrName())); - PADDLE_ENFORCE_EQ(backward_vars.size() % 2, static_cast(0)); - - for (size_t i = 0; i < backward_vars.size(); i += 2) { - VLOG(10) << "Trainable parameter: " << backward_vars[i] - << ", gradient: " << backward_vars[i + 1]; - - params_grads->emplace_back(std::make_pair( - backward_vars[i] /*param*/, backward_vars[i + 1] /*grad*/)); - } - } catch (boost::bad_get e) { + const int group_size = GetFuseParameterGroupsSize() == -1 + ? static_cast(group_grads_params->size()) + : GetFuseParameterGroupsSize(); + PADDLE_ENFORCE_GT(group_size, 1); + size_t groups = (group_grads_params->size() + group_size - 1) / group_size; + GroupGradsAndParams local_group_grads_params; + local_group_grads_params.reserve(groups); + + size_t j = 0; + for (size_t i = 0; i < groups; ++i) { + local_group_grads_params.emplace_back(); + auto &group_p_g = local_group_grads_params.back(); + group_p_g.reserve(group_size); + while (j < group_grads_params->size()) { + group_p_g.insert(group_p_g.end(), group_grads_params->at(j).begin(), + group_grads_params->at(j).end()); + ++j; + if (j % group_size == 0) break; } } - - void InitFusedVarsAndAllocSpaceForVars( - const std::vector &places, - const std::vector &local_scopes, - const std::unordered_map &vars, - const std::string &fused_var_name, - const ParamsAndGrads ¶ms_grads) const { - // Init Gradients and FusedVars - VLOG(10) << "Init FusedVars and Gradients."; - for (auto it = local_scopes.rbegin(); it != local_scopes.rend(); ++it) { - auto &scope = *it; - - PADDLE_ENFORCE(scope->FindVar(fused_var_name) == nullptr, - "%s has existed in scope.", fused_var_name); - scope->Var(fused_var_name)->GetMutable(); - - for (auto &p_g : params_grads) { - auto iter = vars.find(p_g.second); - PADDLE_ENFORCE(iter != vars.end()); - PADDLE_ENFORCE_NOT_NULL(iter->second->Var()); - PADDLE_ENFORCE_EQ(iter->second->Var()->GetType(), - proto::VarType::LOD_TENSOR); - scope->Var(p_g.second)->GetMutable(); - } + std::swap(*group_grads_params, local_group_grads_params); + + VLOG(10) << string::Sprintf("SetGroupAccordingToGroupSize(group_size: %d):", + group_size); + for (size_t i = 0; i < group_grads_params->size(); ++i) { + VLOG(10) << "group " << i; + std::stringstream out; + for (auto &p_g : group_grads_params->at(i)) { + out << "(" << p_g.second << ", " << p_g.first << "), "; + } + VLOG(10) << out.str(); + } +} + +bool AllocContinuousSpaceForGradPass::IsSupportedVarType( + const proto::VarType::Type &type) const { + // Current only support LOD_TENSOR. + return type == proto::VarType::LOD_TENSOR; +} + +void AllocContinuousSpaceForGradPass::RecordParamsAndGrads( + ir::Node *node, ParamsAndGrads *params_grads) const { + try { + bool is_bk_op = + static_cast(boost::get(node->Op()->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())) & + static_cast(OpRole::kBackward)); + if (!is_bk_op) return; + + // Currently, we assume that once gradient is generated, it can be + // broadcast, and each gradient is only broadcast once. + auto backward_vars = + boost::get>(node->Op()->GetNullableAttr( + OpProtoAndCheckerMaker::OpRoleVarAttrName())); + PADDLE_ENFORCE_EQ(backward_vars.size() % 2, static_cast(0)); + + for (size_t i = 0; i < backward_vars.size(); i += 2) { + VLOG(10) << "Trainable parameter: " << backward_vars[i] + << ", gradient: " << backward_vars[i + 1]; + + params_grads->emplace_back(std::make_pair(backward_vars[i] /*param*/, + backward_vars[i + 1] /*grad*/)); } + } catch (boost::bad_get e) { + } +} + +void AllocContinuousSpaceForGradPass::InitFusedVarsAndAllocSpaceForVars( + const std::vector &places, + const std::vector &local_scopes, + const std::unordered_map &vars, + const std::string &fused_var_name, + const ParamsAndGrads ¶ms_grads) const { + // Init Gradients and FusedVars + VLOG(10) << "Init FusedVars and Gradients."; + for (auto it = local_scopes.rbegin(); it != local_scopes.rend(); ++it) { + auto &scope = *it; + + PADDLE_ENFORCE(scope->FindVar(fused_var_name) == nullptr, + "%s has existed in scope.", fused_var_name); + scope->Var(fused_var_name)->GetMutable(); - // Alloc continuous space for vars. - std::vector grads_name; - std::vector params_name; - grads_name.reserve(params_grads.size()); - params_name.reserve(params_grads.size()); for (auto &p_g : params_grads) { - params_name.emplace_back(p_g.first); - grads_name.emplace_back(p_g.second); - } - framework::ProgramDesc program_desc; - AppendAllocSpaceForVarsOp(params_name, grads_name, fused_var_name, - program_desc.MutableBlock(0)); - - for (size_t i = 0; i < local_scopes.size(); ++i) { - for (auto &op_desc : program_desc.Block(0).AllOps()) { - auto op = OpRegistry::CreateOp(*op_desc); - op->Run(*local_scopes[i], places[i]); - } + auto iter = vars.find(p_g.second); + PADDLE_ENFORCE(iter != vars.end()); + PADDLE_ENFORCE_NOT_NULL(iter->second->Var()); + PADDLE_ENFORCE_EQ(iter->second->Var()->GetType(), + proto::VarType::LOD_TENSOR); + scope->Var(p_g.second)->GetMutable(); } } - void AppendAllocSpaceForVarsOp(const std::vector ¶ms_name, - const std::vector &grads_name, - const std::string &fused_var_name, - BlockDesc *global_block) const { - auto op_desc = global_block->AppendOp(); - op_desc->SetType("alloc_continuous_space"); - op_desc->SetInput("Input", params_name); - op_desc->SetOutput("Output", grads_name); - op_desc->SetOutput("FusedOutput", {fused_var_name}); + // Alloc continuous space for vars. + std::vector grads_name; + std::vector params_name; + grads_name.reserve(params_grads.size()); + params_name.reserve(params_grads.size()); + for (auto &p_g : params_grads) { + params_name.emplace_back(p_g.first); + grads_name.emplace_back(p_g.second); + } + framework::ProgramDesc program_desc; + AppendAllocSpaceForVarsOp(params_name, grads_name, fused_var_name, + program_desc.MutableBlock(0)); + + for (size_t i = 0; i < local_scopes.size(); ++i) { + for (auto &op_desc : program_desc.Block(0).AllOps()) { + auto op = OpRegistry::CreateOp(*op_desc); + op->Run(*local_scopes[i], places[i]); + } } -}; +} + +void AllocContinuousSpaceForGradPass::AppendAllocSpaceForVarsOp( + const std::vector ¶ms_name, + const std::vector &grads_name, + const std::string &fused_var_name, BlockDesc *global_block) const { + auto op_desc = global_block->AppendOp(); + op_desc->SetType("alloc_continuous_space"); + op_desc->SetInput("Input", params_name); + op_desc->SetOutput("Output", grads_name); + op_desc->SetOutput("FusedOutput", {fused_var_name}); +} } // namespace details } // namespace framework diff --git a/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h b/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h new file mode 100644 index 0000000000..e6d56f17cc --- /dev/null +++ b/paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h @@ -0,0 +1,79 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include +#include +#include +#include "paddle/fluid/framework/details/build_strategy.h" +#include "paddle/fluid/framework/details/multi_devices_helper.h" +#include "paddle/fluid/framework/ir/graph_helper.h" + +namespace paddle { +namespace framework { +namespace details { + +void SetFuseParameterGroupsSize(int group_size); +int GetFuseParameterGroupsSize(); + +void SetFuseParameterMemorySize(uint64_t memory_size); +uint64_t GetFuseParameterMemorySize(); + +class AllocContinuousSpaceForGradPass : public ir::Pass { + protected: + void ApplyImpl(ir::Graph *graph) const override; + + template + void ResetAttribute(const std::string &attr_name, ir::Graph *graph) const; + + void SetGroupGradsAndParams( + const std::unordered_map &var_nodes, + const ParamsAndGrads ¶ms_grads, + GroupGradsAndParams *group_grads_params) const; + + void SetGroupAccordingToLayers( + const std::unordered_map &var_nodes, + const ParamsAndGrads ¶ms_grads, + GroupGradsAndParams *group_grads_params) const; + + void SetGroupAccordingToMemorySize( + const std::unordered_map &var_nodes, + GroupGradsAndParams *group_grads_params) const; + + void SetGroupAccordingToGroupSize( + const std::unordered_map &var_nodes, + GroupGradsAndParams *group_grads_params) const; + + private: + bool IsSupportedVarType(const proto::VarType::Type &type) const; + + void RecordParamsAndGrads(ir::Node *node, ParamsAndGrads *params_grads) const; + + void InitFusedVarsAndAllocSpaceForVars( + const std::vector &places, + const std::vector &local_scopes, + const std::unordered_map &vars, + const std::string &fused_var_name, + const ParamsAndGrads ¶ms_grads) const; + + void AppendAllocSpaceForVarsOp(const std::vector ¶ms_name, + const std::vector &grads_name, + const std::string &fused_var_name, + BlockDesc *global_block) const; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 0d1a1ca077..f0ea6d9b0a 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/details/alloc_continuous_space_for_grad_pass.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/framework.pb.h" @@ -165,6 +166,11 @@ PYBIND11_MODULE(core, m) { // to enable eager deletion mode in unittest. m.def("_set_eager_deletion_mode", &paddle::framework::SetEagerDeletionMode); + m.def("_set_fuse_parameter_group_size", + &paddle::framework::details::SetFuseParameterGroupsSize); + m.def("_set_fuse_parameter_memory_size", + &paddle::framework::details::SetFuseParameterMemorySize); + m.add_object("_cleanup", py::capsule([]() { ScopePool::Instance().Clear(); })); diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py index 4c1acc98d5..12d854fb54 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py @@ -14,10 +14,11 @@ from __future__ import print_function import os -os.environ['FLAGS_fuse_parameter_memory_size'] = "131072" -os.environ['FLAGS_fuse_parameter_groups_size'] = "3" import paddle.fluid as fluid +fluid.core._set_fuse_parameter_group_size(3) +fluid.core._set_fuse_parameter_memory_size(131072) + import paddle.fluid.layers.ops as ops from paddle.fluid.initializer import init_on_cpu from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter