commit
33473890f3
@ -0,0 +1,199 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fuse_adam_op_pass.h"
|
||||
#include <algorithm>
|
||||
#include "paddle/fluid/framework/ir/graph_helper.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
const std::string FuseAdamOpPass::GetOpType() const { return "adam"; }
|
||||
|
||||
const std::vector<std::string> FuseAdamOpPass::GetAuxiliaryVarNames() const {
|
||||
return {"Param", "Moment1", "Moment2", "Beta1Pow", "Beta2Pow"};
|
||||
}
|
||||
|
||||
void FuseAdamOpPass::FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const {
|
||||
FuseAdamOps(aux_var_set, fused_vars_name, adam_ops, graph);
|
||||
FuseScaleOps(aux_var_set.at("Beta1Pow"), fused_vars_name.at("Beta1Pow"),
|
||||
adam_ops, graph);
|
||||
FuseScaleOps(aux_var_set.at("Beta2Pow"), fused_vars_name.at("Beta2Pow"),
|
||||
adam_ops, graph);
|
||||
}
|
||||
|
||||
void FuseAdamOpPass::FuseAdamOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const {
|
||||
PADDLE_ENFORCE_GT(adam_ops.size(), static_cast<size_t>(0));
|
||||
|
||||
// Check attributions
|
||||
// NOTE: If new attribution is added, the following code maybe need change.
|
||||
int op_role = boost::get<int>(
|
||||
adam_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
|
||||
float beta1 = boost::get<float>(adam_ops[0]->Op()->GetAttr("beta1"));
|
||||
float beta2 = boost::get<float>(adam_ops[0]->Op()->GetAttr("beta2"));
|
||||
float epsilon = boost::get<float>(adam_ops[0]->Op()->GetAttr("epsilon"));
|
||||
bool lazy_mode = boost::get<bool>(adam_ops[0]->Op()->GetAttr("lazy_mode"));
|
||||
int64_t min_row_size_to_use_multithread = boost::get<int64_t>(
|
||||
adam_ops[0]->Op()->GetAttr("min_row_size_to_use_multithread"));
|
||||
for (auto &adam_op : adam_ops) {
|
||||
PADDLE_ENFORCE_EQ(beta1,
|
||||
boost::get<float>(adam_op->Op()->GetAttr("beta1")));
|
||||
PADDLE_ENFORCE_EQ(beta2,
|
||||
boost::get<float>(adam_op->Op()->GetAttr("beta2")));
|
||||
PADDLE_ENFORCE_EQ(epsilon,
|
||||
boost::get<float>(adam_op->Op()->GetAttr("epsilon")));
|
||||
PADDLE_ENFORCE_EQ(lazy_mode,
|
||||
boost::get<bool>(adam_op->Op()->GetAttr("lazy_mode")));
|
||||
PADDLE_ENFORCE_EQ(min_row_size_to_use_multithread,
|
||||
boost::get<int64_t>(adam_op->Op()->GetAttr(
|
||||
"min_row_size_to_use_multithread")));
|
||||
PADDLE_ENFORCE_EQ(op_role, boost::get<int>(adam_op->Op()->GetAttr(
|
||||
OpProtoAndCheckerMaker::OpRoleAttrName())));
|
||||
}
|
||||
|
||||
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
|
||||
// node.
|
||||
|
||||
VLOG(10) << "Insert adam to graph ";
|
||||
OpDesc adam_desc(adam_ops[0]->Op()->Block());
|
||||
adam_desc.SetType("adam");
|
||||
adam_desc.SetInput("Param", {fused_vars_name.at("Param")});
|
||||
adam_desc.SetInput("Grad", {fused_vars_name.at("Grad")});
|
||||
adam_desc.SetInput("Moment1", {fused_vars_name.at("Moment1")});
|
||||
adam_desc.SetInput("Moment2", {fused_vars_name.at("Moment2")});
|
||||
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
|
||||
adam_desc.SetInput("LearningRate", adam_ops[0]->Op()->Input("LearningRate"));
|
||||
adam_desc.SetInput("Beta1Pow", adam_ops[0]->Op()->Input("Beta1Pow"));
|
||||
adam_desc.SetInput("Beta2Pow", adam_ops[0]->Op()->Input("Beta2Pow"));
|
||||
|
||||
adam_desc.SetOutput("ParamOut", {fused_vars_name.at("Param")});
|
||||
adam_desc.SetOutput("Moment1Out", {fused_vars_name.at("Moment1")});
|
||||
adam_desc.SetOutput("Moment2Out", {fused_vars_name.at("Moment2")});
|
||||
adam_desc.SetAttr("beta1", beta1);
|
||||
adam_desc.SetAttr("beta2", beta2);
|
||||
adam_desc.SetAttr("epsilon", epsilon);
|
||||
adam_desc.SetAttr("lazy_mode", lazy_mode);
|
||||
adam_desc.SetAttr("min_row_size_to_use_multithread",
|
||||
min_row_size_to_use_multithread);
|
||||
adam_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role);
|
||||
|
||||
auto adam_node = graph->CreateOpNode(&adam_desc);
|
||||
|
||||
InserInputAndOutputForOptOps(adam_ops, adam_node);
|
||||
}
|
||||
|
||||
void FuseAdamOpPass::FuseScaleOps(const std::vector<std::string> &beta_name,
|
||||
const std::string &fused_var_name,
|
||||
const std::vector<ir::Node *> &adam_ops,
|
||||
ir::Graph *graph) const {
|
||||
PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size());
|
||||
const std::string scale_op_name = "scale";
|
||||
|
||||
// Get the scale_ops of dealing the adam's beta var.
|
||||
std::vector<ir::Node *> scale_ops;
|
||||
scale_ops.reserve(beta_name.size());
|
||||
for (size_t i = 0; i < adam_ops.size(); ++i) {
|
||||
auto &beta_1_pow_name = beta_name[i];
|
||||
auto beta_pow_iter = std::find_if(
|
||||
adam_ops[i]->inputs.begin(), adam_ops[i]->inputs.end(),
|
||||
[&beta_name, &beta_1_pow_name](ir::Node *var_node) -> bool {
|
||||
return var_node->Var() && var_node->Var()->Name() == beta_1_pow_name;
|
||||
});
|
||||
PADDLE_ENFORCE(beta_pow_iter != adam_ops[i]->inputs.end());
|
||||
|
||||
auto beta_pow_node = *beta_pow_iter;
|
||||
auto scale_op_iter = std::find_if(
|
||||
beta_pow_node->outputs.begin(), beta_pow_node->outputs.end(),
|
||||
[&scale_op_name](ir::Node *op_node) -> bool {
|
||||
return op_node->Op() && op_node->Op()->Type() == scale_op_name;
|
||||
});
|
||||
PADDLE_ENFORCE(scale_op_iter != beta_pow_node->outputs.end());
|
||||
|
||||
scale_ops.emplace_back(*scale_op_iter);
|
||||
}
|
||||
PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size());
|
||||
|
||||
// Check attributions
|
||||
// NOTE: If new attribution is added, the following code maybe need change.
|
||||
int op_role = boost::get<int>(
|
||||
scale_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
|
||||
float scale = boost::get<float>(scale_ops[0]->Op()->GetAttr("scale"));
|
||||
float bias = boost::get<float>(scale_ops[0]->Op()->GetAttr("bias"));
|
||||
bool bias_after_scale =
|
||||
boost::get<bool>(scale_ops[0]->Op()->GetAttr("bias_after_scale"));
|
||||
for (auto &scale_op : scale_ops) {
|
||||
PADDLE_ENFORCE_EQ(scale,
|
||||
boost::get<float>(scale_op->Op()->GetAttr("scale")));
|
||||
PADDLE_ENFORCE_EQ(bias, boost::get<float>(scale_op->Op()->GetAttr("bias")));
|
||||
PADDLE_ENFORCE_EQ(
|
||||
bias_after_scale,
|
||||
boost::get<bool>(scale_op->Op()->GetAttr("bias_after_scale")));
|
||||
PADDLE_ENFORCE_EQ(op_role, boost::get<int>(scale_op->Op()->GetAttr(
|
||||
OpProtoAndCheckerMaker::OpRoleAttrName())));
|
||||
}
|
||||
|
||||
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
|
||||
// node.
|
||||
|
||||
VLOG(10) << "Insert fused scale to graph.";
|
||||
OpDesc scale_desc(scale_ops[0]->Op()->Block());
|
||||
scale_desc.SetType("scale");
|
||||
scale_desc.SetInput("X", {fused_var_name});
|
||||
scale_desc.SetOutput("Out", {fused_var_name});
|
||||
scale_desc.SetAttr("scale", scale);
|
||||
scale_desc.SetAttr("bias", bias);
|
||||
scale_desc.SetAttr("bias_after_scale", bias_after_scale);
|
||||
scale_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role);
|
||||
auto scale_node = graph->CreateOpNode(&scale_desc);
|
||||
|
||||
for (auto scale_op : scale_ops) {
|
||||
// set inputs
|
||||
scale_node->inputs.insert(scale_node->inputs.begin(),
|
||||
scale_op->inputs.begin(), scale_op->inputs.end());
|
||||
for (auto &input : scale_op->inputs) {
|
||||
std::replace(input->outputs.begin(), input->outputs.end(), scale_op,
|
||||
scale_node);
|
||||
}
|
||||
// set outputs
|
||||
scale_node->outputs.insert(scale_node->outputs.begin(),
|
||||
scale_op->outputs.begin(),
|
||||
scale_op->outputs.end());
|
||||
for (auto &output : scale_op->outputs) {
|
||||
std::replace(output->inputs.begin(), output->inputs.end(), scale_op,
|
||||
scale_node);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete scale_ops
|
||||
for (auto &scale_op : scale_ops) {
|
||||
graph->RemoveNode(scale_op);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(fuse_adam_op_pass, paddle::framework::details::FuseAdamOpPass)
|
||||
.RequirePassAttr(paddle::framework::details::kPlaces)
|
||||
.RequirePassAttr(paddle::framework::details::kLocalScopes);
|
@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/build_strategy.h"
|
||||
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
|
||||
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class FuseAdamOpPass : public FuseOptimizerOpPass {
|
||||
private:
|
||||
virtual const std::string GetOpType() const;
|
||||
|
||||
virtual const std::vector<std::string> GetAuxiliaryVarNames() const;
|
||||
|
||||
// Fuse Adam Ops and Scale Ops which are used to update "Beta1Pow", "Beta2Pow"
|
||||
virtual void FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const;
|
||||
|
||||
void FuseAdamOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const;
|
||||
|
||||
void FuseScaleOps(const std::vector<std::string> &aux_var_set,
|
||||
const std::string &fused_var_name,
|
||||
const std::vector<ir::Node *> &adam_ops,
|
||||
ir::Graph *graph) const;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,240 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
|
||||
#include <algorithm>
|
||||
#include <unordered_set>
|
||||
#include "paddle/fluid/framework/ir/graph_helper.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
|
||||
ir::Graph &result = *graph;
|
||||
|
||||
auto &places = Get<const std::vector<platform::Place>>(kPlaces);
|
||||
auto &local_scopes = Get<const std::vector<Scope *>>(kLocalScopes);
|
||||
|
||||
const std::string fuse_op_type = GetOpType();
|
||||
const std::vector<std::string> aux_var_names = GetAuxiliaryVarNames();
|
||||
|
||||
// Step 1: Get the specified op and auxiliary variables.
|
||||
std::vector<ir::Node *> topo_nodes = ir::TopologySortOperations(result);
|
||||
std::unordered_map<std::string, std::vector<std::string>> aux_var_set;
|
||||
std::vector<ir::Node *> opt_ops;
|
||||
for (auto &node : topo_nodes) {
|
||||
GetSpecifiedOpsAndVars(fuse_op_type, aux_var_names, node, &opt_ops,
|
||||
&aux_var_set);
|
||||
}
|
||||
|
||||
VLOG(10) << "Find " << fuse_op_type << " operators: " << opt_ops.size();
|
||||
if (opt_ops.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.Has(kFusedOptType)) {
|
||||
VLOG(10)
|
||||
<< "Currently only support fusing one type optimizer op. Has fused "
|
||||
<< result.Get<FusedOptType>(kFusedOptType);
|
||||
return;
|
||||
} else {
|
||||
result.Set(kFusedOptType, new FusedOptType);
|
||||
}
|
||||
result.Get<FusedOptType>(kFusedOptType) = fuse_op_type;
|
||||
|
||||
// Step 2: Insert fused_var_name to FusedVars, and the FusedVars need be
|
||||
// initialized in scopes before execution.
|
||||
if (!result.Has(kFusedVars)) {
|
||||
result.Set(kFusedVars, new FusedVars);
|
||||
}
|
||||
std::unordered_map<std::string, std::string> fused_vars_name;
|
||||
fused_vars_name.reserve(aux_var_names.size() + 1);
|
||||
auto &fused_var_set = result.Get<FusedVars>(kFusedVars);
|
||||
const std::string prefix(kFusedVarNamePrefix);
|
||||
// NOTE: the fused_var_name should be unique.
|
||||
for (auto &var_name : aux_var_names) {
|
||||
auto fused_var_name = prefix + "_" + fuse_op_type + "_" + var_name + "_" +
|
||||
aux_var_set[var_name][0];
|
||||
VLOG(10) << fused_var_name;
|
||||
fused_vars_name.emplace(var_name, fused_var_name);
|
||||
PADDLE_ENFORCE_EQ(fused_var_set.count(fused_var_name), 0);
|
||||
fused_var_set.insert(fused_var_name);
|
||||
}
|
||||
|
||||
// Step 3: Get the fused Gradient's name
|
||||
auto ¶ms_grads = result.Get<ParamsAndGrads>(kParamsAndGrads);
|
||||
if (!result.Has(kFusedGrads)) {
|
||||
PADDLE_THROW(
|
||||
"The alloc_continuous_space_for_grad_pass should be called before this "
|
||||
"pass.");
|
||||
}
|
||||
auto &fused_grad = result.Get<FusedGrads>(kFusedGrads);
|
||||
auto &fused_vars = result.Get<FusedVars>(kFusedVars);
|
||||
auto iter = std::find(fused_vars.begin(), fused_vars.end(), fused_grad);
|
||||
PADDLE_ENFORCE(iter != fused_vars.end(), "Not find the fused_grad.");
|
||||
fused_vars_name.emplace("Grad", fused_grad);
|
||||
|
||||
// Step 4: Sort the parameters and auxiliary variables according
|
||||
// to parameters' name to make variables' name correspond correctly.
|
||||
PADDLE_ENFORCE(result.Has(kParamsAndGrads), "Does't find kParamsAndGrads.");
|
||||
PADDLE_ENFORCE_EQ(params_grads.size(), aux_var_set.begin()->second.size(),
|
||||
"The size of params_grads and aux_var_set are not equal.");
|
||||
SortParametersAndAuxVars(params_grads, &aux_var_set, &opt_ops);
|
||||
|
||||
// Step 5: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
|
||||
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops separately.
|
||||
InitFusedVarsAndAllocSpaceForVars(places, local_scopes, aux_var_names,
|
||||
aux_var_set, fused_vars_name);
|
||||
|
||||
// Step 6: Fuse optimizer Ops and Scale Ops
|
||||
FuseOptimizerOps(aux_var_set, fused_vars_name, opt_ops, &result);
|
||||
|
||||
// Step 7: Remove optimizer Ops
|
||||
for (auto &opt_op : opt_ops) {
|
||||
graph->RemoveNode(opt_op);
|
||||
}
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
|
||||
const std::vector<platform::Place> &places,
|
||||
const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<std::string> &aux_var_names,
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name) const {
|
||||
VLOG(10) << "Init FusedVars.";
|
||||
// Alloc parameters and auxiliary vars in the respective scope.
|
||||
size_t idx = local_scopes.size();
|
||||
for (auto iter = local_scopes.rbegin(); iter != local_scopes.rend();
|
||||
++iter, --idx) {
|
||||
auto &scope = *iter;
|
||||
for (auto &var_name : aux_var_names) {
|
||||
auto fused_var_name = fused_vars_name.at(var_name);
|
||||
VLOG(10) << "Init " << fused_var_name;
|
||||
PADDLE_ENFORCE(scope->FindVar(fused_var_name) == nullptr,
|
||||
"%s has exist in scope[%d]", fused_var_name, idx);
|
||||
scope->Var(fused_var_name)->GetMutable<LoDTensor>();
|
||||
}
|
||||
}
|
||||
|
||||
ProgramDesc program_desc;
|
||||
auto *global_block = program_desc.MutableBlock(0);
|
||||
for (auto &var_name : aux_var_names) {
|
||||
AppendAllocContinuousSpace(aux_var_set.at(var_name),
|
||||
fused_vars_name.at(var_name), true,
|
||||
global_block);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < local_scopes.size(); ++i) {
|
||||
for (auto &op_desc : global_block->AllOps()) {
|
||||
auto op = OpRegistry::CreateOp(*op_desc);
|
||||
op->Run(*local_scopes[i], places[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::SortParametersAndAuxVars(
|
||||
const std::vector<std::pair<std::string, std::string>> ¶ms_grads,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_vars_set,
|
||||
std::vector<ir::Node *> *ops) const {
|
||||
PADDLE_ENFORCE_NE(aux_vars_set->count("Param"), static_cast<size_t>(0));
|
||||
auto ¶m_vec = aux_vars_set->at("Param");
|
||||
|
||||
std::vector<size_t> param_sort_idx;
|
||||
param_sort_idx.reserve(param_vec.size());
|
||||
|
||||
for (auto &p_g : params_grads) {
|
||||
auto iter = std::find(param_vec.begin(), param_vec.end(), p_g.first);
|
||||
PADDLE_ENFORCE(iter != param_vec.end());
|
||||
auto idx = std::distance(param_vec.begin(), iter);
|
||||
param_sort_idx.emplace_back(idx);
|
||||
}
|
||||
|
||||
for (auto &aux_vars : *aux_vars_set) {
|
||||
std::vector<std::string> sorted_vars;
|
||||
sorted_vars.reserve(aux_vars.second.size());
|
||||
for (size_t i = 0; i < aux_vars.second.size(); ++i) {
|
||||
sorted_vars.emplace_back(aux_vars.second.at(param_sort_idx[i]));
|
||||
}
|
||||
std::swap(aux_vars.second, sorted_vars);
|
||||
|
||||
std::stringstream out;
|
||||
for (auto &var_name : aux_vars.second) {
|
||||
out << var_name << " ";
|
||||
}
|
||||
VLOG(10) << aux_vars.first << ": " << out.str();
|
||||
}
|
||||
|
||||
std::vector<ir::Node *> sorted_ops;
|
||||
sorted_ops.reserve(ops->size());
|
||||
for (size_t i = 0; i < ops->size(); ++i) {
|
||||
sorted_ops.emplace_back(ops->at(param_sort_idx[i]));
|
||||
}
|
||||
std::swap(*ops, sorted_ops);
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::GetSpecifiedOpsAndVars(
|
||||
const std::string &op_type, const std::vector<std::string> &aux_vars_name,
|
||||
ir::Node *node, std::vector<ir::Node *> *ops,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_args_name)
|
||||
const {
|
||||
if (node->Op()->Type() != op_type) return;
|
||||
|
||||
for (auto &var_n : aux_vars_name) {
|
||||
auto arg_names = node->Op()->Input(var_n);
|
||||
PADDLE_ENFORCE_EQ(arg_names.size(), static_cast<size_t>(1));
|
||||
(*aux_args_name)[var_n].emplace_back(arg_names[0]);
|
||||
VLOG(10) << var_n << ", " << arg_names[0];
|
||||
}
|
||||
ops->emplace_back(node);
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::AppendAllocContinuousSpace(
|
||||
const std::vector<std::string> &args, const std::string &out_arg,
|
||||
bool copy_data, BlockDesc *global_block) const {
|
||||
auto op_desc = global_block->AppendOp();
|
||||
op_desc->SetType("alloc_continuous_space");
|
||||
op_desc->SetInput("Input", args);
|
||||
op_desc->SetOutput("Output", args);
|
||||
op_desc->SetOutput("FusedOutput", {out_arg});
|
||||
op_desc->SetAttr("copy_data", copy_data);
|
||||
op_desc->SetAttr("check_name", true);
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::InserInputAndOutputForOptOps(
|
||||
const std::vector<ir::Node *> &opt_ops, ir::Node *opt_node) const {
|
||||
std::unordered_set<ir::Node *> inputs;
|
||||
std::unordered_set<ir::Node *> outputs;
|
||||
for (auto opt_op : opt_ops) {
|
||||
// set inputs
|
||||
inputs.insert(opt_op->inputs.begin(), opt_op->inputs.end());
|
||||
for (auto &input : opt_op->inputs) {
|
||||
replace(input->outputs.begin(), input->outputs.end(), opt_op, opt_node);
|
||||
}
|
||||
// set outputs
|
||||
outputs.insert(opt_op->outputs.begin(), opt_op->outputs.end());
|
||||
for (auto &output : opt_op->outputs) {
|
||||
replace(output->inputs.begin(), output->inputs.end(), opt_op, opt_node);
|
||||
}
|
||||
}
|
||||
opt_node->inputs.insert(opt_node->inputs.begin(), inputs.begin(),
|
||||
inputs.end());
|
||||
opt_node->outputs.insert(opt_node->outputs.begin(), outputs.begin(),
|
||||
outputs.end());
|
||||
}
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,75 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/build_strategy.h"
|
||||
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class FuseOptimizerOpPass : public ir::Pass {
|
||||
protected:
|
||||
void ApplyImpl(ir::Graph *graph) const override;
|
||||
|
||||
protected:
|
||||
virtual void SortParametersAndAuxVars(
|
||||
const std::vector<std::pair<std::string, std::string>> ¶ms_grads,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_var_set,
|
||||
std::vector<ir::Node *> *ops) const;
|
||||
|
||||
void InserInputAndOutputForOptOps(const std::vector<ir::Node *> &opt_ops,
|
||||
ir::Node *opt_node) const;
|
||||
|
||||
private:
|
||||
virtual const std::string GetOpType() const = 0;
|
||||
|
||||
virtual const std::vector<std::string> GetAuxiliaryVarNames() const = 0;
|
||||
|
||||
virtual void FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const = 0;
|
||||
|
||||
void GetSpecifiedOpsAndVars(
|
||||
const std::string &op_type, const std::vector<std::string> &aux_vars_name,
|
||||
ir::Node *node, std::vector<ir::Node *> *ops,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_args_name)
|
||||
const;
|
||||
|
||||
void AppendAllocContinuousSpace(const std::vector<std::string> &args,
|
||||
const std::string &out_arg, bool copy_data,
|
||||
BlockDesc *global_block) const;
|
||||
|
||||
void InitFusedVarsAndAllocSpaceForVars(
|
||||
const std::vector<platform::Place> &places,
|
||||
const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<std::string> &aux_var_names,
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name)
|
||||
const;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,74 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fuse_sgd_op_pass.h"
|
||||
#include <algorithm>
|
||||
#include "paddle/fluid/framework/ir/graph_helper.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
const std::string FuseSgdOpPass::GetOpType() const { return "sgd"; }
|
||||
|
||||
const std::vector<std::string> FuseSgdOpPass::GetAuxiliaryVarNames() const {
|
||||
return {"Param"};
|
||||
}
|
||||
|
||||
void FuseSgdOpPass::FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &sgd_ops, ir::Graph *graph) const {
|
||||
FuseSgdOps(aux_var_set, fused_vars_name, sgd_ops, graph);
|
||||
}
|
||||
|
||||
void FuseSgdOpPass::FuseSgdOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &sgd_ops, ir::Graph *graph) const {
|
||||
PADDLE_ENFORCE_GT(sgd_ops.size(), static_cast<size_t>(0));
|
||||
|
||||
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
|
||||
// node.
|
||||
|
||||
int op_role = boost::get<int>(
|
||||
sgd_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
|
||||
VLOG(10) << "Insert sgd to graph ";
|
||||
// Add fused scale
|
||||
OpDesc Sgd_desc(sgd_ops[0]->Op()->Block());
|
||||
Sgd_desc.SetType("sgd");
|
||||
Sgd_desc.SetInput("Param", {fused_vars_name.at("Param")});
|
||||
Sgd_desc.SetInput("Grad", {fused_vars_name.at("Grad")});
|
||||
Sgd_desc.SetOutput("ParamOut", {fused_vars_name.at("Param")});
|
||||
|
||||
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
|
||||
Sgd_desc.SetInput("LearningRate", sgd_ops[0]->Op()->Input("LearningRate"));
|
||||
|
||||
// NOTE: multi_devices_pass requires that every op should have a role.
|
||||
Sgd_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role);
|
||||
|
||||
auto sgd_node = graph->CreateOpNode(&Sgd_desc);
|
||||
|
||||
InserInputAndOutputForOptOps(sgd_ops, sgd_node);
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(fuse_sgd_op_pass, paddle::framework::details::FuseSgdOpPass)
|
||||
.RequirePassAttr(paddle::framework::details::kPlaces)
|
||||
.RequirePassAttr(paddle::framework::details::kLocalScopes);
|
@ -0,0 +1,50 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/build_strategy.h"
|
||||
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
|
||||
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class FuseSgdOpPass : public FuseOptimizerOpPass {
|
||||
private:
|
||||
virtual const std::string GetOpType() const;
|
||||
|
||||
virtual const std::vector<std::string> GetAuxiliaryVarNames() const;
|
||||
|
||||
// Fuse Sgd Ops
|
||||
virtual void FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &sgd_ops, ir::Graph *graph) const;
|
||||
|
||||
void FuseSgdOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &sgd_ops, ir::Graph *graph) const;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue