Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into quan_ck
test=developrevert-16555-model_data_cryption_link_all_lib
commit
d41b623a72
@ -0,0 +1,42 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc")
|
||||
SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc")
|
||||
SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE)
|
||||
SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE)
|
||||
INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR})
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_dgc
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
GIT_REPOSITORY "https://github.com/PaddlePaddle/Fleet"
|
||||
GIT_TAG "2d04dc3800cdd0601f1b65d547dabcc60b0cf9dc"
|
||||
SOURCE_DIR "${DGC_SOURCES_DIR}"
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND cd collective && make -j
|
||||
INSTALL_COMMAND mkdir -p ${DGC_INSTALL_DIR}/lib/ ${DGC_INCLUDE_DIR}/dgc
|
||||
&& cp ${DGC_SOURCES_DIR}/collective/build/lib/libdgc.a ${DGC_LIBRARIES}
|
||||
&& cp ${DGC_SOURCES_DIR}/collective/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/
|
||||
BUILD_IN_SOURCE 1
|
||||
)
|
||||
|
||||
ADD_LIBRARY(dgc SHARED IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES})
|
||||
ADD_DEPENDENCIES(dgc extern_dgc)
|
||||
|
||||
LIST(APPEND external_project_dependencies dgc)
|
||||
|
@ -0,0 +1,199 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fuse_adam_op_pass.h"
|
||||
#include <algorithm>
|
||||
#include "paddle/fluid/framework/ir/graph_helper.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
const std::string FuseAdamOpPass::GetOpType() const { return "adam"; }
|
||||
|
||||
const std::vector<std::string> FuseAdamOpPass::GetAuxiliaryVarNames() const {
|
||||
return {"Param", "Moment1", "Moment2", "Beta1Pow", "Beta2Pow"};
|
||||
}
|
||||
|
||||
void FuseAdamOpPass::FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const {
|
||||
FuseAdamOps(aux_var_set, fused_vars_name, adam_ops, graph);
|
||||
FuseScaleOps(aux_var_set.at("Beta1Pow"), fused_vars_name.at("Beta1Pow"),
|
||||
adam_ops, graph);
|
||||
FuseScaleOps(aux_var_set.at("Beta2Pow"), fused_vars_name.at("Beta2Pow"),
|
||||
adam_ops, graph);
|
||||
}
|
||||
|
||||
void FuseAdamOpPass::FuseAdamOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const {
|
||||
PADDLE_ENFORCE_GT(adam_ops.size(), static_cast<size_t>(0));
|
||||
|
||||
// Check attributions
|
||||
// NOTE: If new attribution is added, the following code maybe need change.
|
||||
int op_role = boost::get<int>(
|
||||
adam_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
|
||||
float beta1 = boost::get<float>(adam_ops[0]->Op()->GetAttr("beta1"));
|
||||
float beta2 = boost::get<float>(adam_ops[0]->Op()->GetAttr("beta2"));
|
||||
float epsilon = boost::get<float>(adam_ops[0]->Op()->GetAttr("epsilon"));
|
||||
bool lazy_mode = boost::get<bool>(adam_ops[0]->Op()->GetAttr("lazy_mode"));
|
||||
int64_t min_row_size_to_use_multithread = boost::get<int64_t>(
|
||||
adam_ops[0]->Op()->GetAttr("min_row_size_to_use_multithread"));
|
||||
for (auto &adam_op : adam_ops) {
|
||||
PADDLE_ENFORCE_EQ(beta1,
|
||||
boost::get<float>(adam_op->Op()->GetAttr("beta1")));
|
||||
PADDLE_ENFORCE_EQ(beta2,
|
||||
boost::get<float>(adam_op->Op()->GetAttr("beta2")));
|
||||
PADDLE_ENFORCE_EQ(epsilon,
|
||||
boost::get<float>(adam_op->Op()->GetAttr("epsilon")));
|
||||
PADDLE_ENFORCE_EQ(lazy_mode,
|
||||
boost::get<bool>(adam_op->Op()->GetAttr("lazy_mode")));
|
||||
PADDLE_ENFORCE_EQ(min_row_size_to_use_multithread,
|
||||
boost::get<int64_t>(adam_op->Op()->GetAttr(
|
||||
"min_row_size_to_use_multithread")));
|
||||
PADDLE_ENFORCE_EQ(op_role, boost::get<int>(adam_op->Op()->GetAttr(
|
||||
OpProtoAndCheckerMaker::OpRoleAttrName())));
|
||||
}
|
||||
|
||||
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
|
||||
// node.
|
||||
|
||||
VLOG(10) << "Insert adam to graph ";
|
||||
OpDesc adam_desc(adam_ops[0]->Op()->Block());
|
||||
adam_desc.SetType("adam");
|
||||
adam_desc.SetInput("Param", {fused_vars_name.at("Param")});
|
||||
adam_desc.SetInput("Grad", {fused_vars_name.at("Grad")});
|
||||
adam_desc.SetInput("Moment1", {fused_vars_name.at("Moment1")});
|
||||
adam_desc.SetInput("Moment2", {fused_vars_name.at("Moment2")});
|
||||
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
|
||||
adam_desc.SetInput("LearningRate", adam_ops[0]->Op()->Input("LearningRate"));
|
||||
adam_desc.SetInput("Beta1Pow", adam_ops[0]->Op()->Input("Beta1Pow"));
|
||||
adam_desc.SetInput("Beta2Pow", adam_ops[0]->Op()->Input("Beta2Pow"));
|
||||
|
||||
adam_desc.SetOutput("ParamOut", {fused_vars_name.at("Param")});
|
||||
adam_desc.SetOutput("Moment1Out", {fused_vars_name.at("Moment1")});
|
||||
adam_desc.SetOutput("Moment2Out", {fused_vars_name.at("Moment2")});
|
||||
adam_desc.SetAttr("beta1", beta1);
|
||||
adam_desc.SetAttr("beta2", beta2);
|
||||
adam_desc.SetAttr("epsilon", epsilon);
|
||||
adam_desc.SetAttr("lazy_mode", lazy_mode);
|
||||
adam_desc.SetAttr("min_row_size_to_use_multithread",
|
||||
min_row_size_to_use_multithread);
|
||||
adam_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role);
|
||||
|
||||
auto adam_node = graph->CreateOpNode(&adam_desc);
|
||||
|
||||
InserInputAndOutputForOptOps(adam_ops, adam_node);
|
||||
}
|
||||
|
||||
void FuseAdamOpPass::FuseScaleOps(const std::vector<std::string> &beta_name,
|
||||
const std::string &fused_var_name,
|
||||
const std::vector<ir::Node *> &adam_ops,
|
||||
ir::Graph *graph) const {
|
||||
PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size());
|
||||
const std::string scale_op_name = "scale";
|
||||
|
||||
// Get the scale_ops of dealing the adam's beta var.
|
||||
std::vector<ir::Node *> scale_ops;
|
||||
scale_ops.reserve(beta_name.size());
|
||||
for (size_t i = 0; i < adam_ops.size(); ++i) {
|
||||
auto &beta_1_pow_name = beta_name[i];
|
||||
auto beta_pow_iter = std::find_if(
|
||||
adam_ops[i]->inputs.begin(), adam_ops[i]->inputs.end(),
|
||||
[&beta_name, &beta_1_pow_name](ir::Node *var_node) -> bool {
|
||||
return var_node->Var() && var_node->Var()->Name() == beta_1_pow_name;
|
||||
});
|
||||
PADDLE_ENFORCE(beta_pow_iter != adam_ops[i]->inputs.end());
|
||||
|
||||
auto beta_pow_node = *beta_pow_iter;
|
||||
auto scale_op_iter = std::find_if(
|
||||
beta_pow_node->outputs.begin(), beta_pow_node->outputs.end(),
|
||||
[&scale_op_name](ir::Node *op_node) -> bool {
|
||||
return op_node->Op() && op_node->Op()->Type() == scale_op_name;
|
||||
});
|
||||
PADDLE_ENFORCE(scale_op_iter != beta_pow_node->outputs.end());
|
||||
|
||||
scale_ops.emplace_back(*scale_op_iter);
|
||||
}
|
||||
PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size());
|
||||
|
||||
// Check attributions
|
||||
// NOTE: If new attribution is added, the following code maybe need change.
|
||||
int op_role = boost::get<int>(
|
||||
scale_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName()));
|
||||
float scale = boost::get<float>(scale_ops[0]->Op()->GetAttr("scale"));
|
||||
float bias = boost::get<float>(scale_ops[0]->Op()->GetAttr("bias"));
|
||||
bool bias_after_scale =
|
||||
boost::get<bool>(scale_ops[0]->Op()->GetAttr("bias_after_scale"));
|
||||
for (auto &scale_op : scale_ops) {
|
||||
PADDLE_ENFORCE_EQ(scale,
|
||||
boost::get<float>(scale_op->Op()->GetAttr("scale")));
|
||||
PADDLE_ENFORCE_EQ(bias, boost::get<float>(scale_op->Op()->GetAttr("bias")));
|
||||
PADDLE_ENFORCE_EQ(
|
||||
bias_after_scale,
|
||||
boost::get<bool>(scale_op->Op()->GetAttr("bias_after_scale")));
|
||||
PADDLE_ENFORCE_EQ(op_role, boost::get<int>(scale_op->Op()->GetAttr(
|
||||
OpProtoAndCheckerMaker::OpRoleAttrName())));
|
||||
}
|
||||
|
||||
// NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
|
||||
// node.
|
||||
|
||||
VLOG(10) << "Insert fused scale to graph.";
|
||||
OpDesc scale_desc(scale_ops[0]->Op()->Block());
|
||||
scale_desc.SetType("scale");
|
||||
scale_desc.SetInput("X", {fused_var_name});
|
||||
scale_desc.SetOutput("Out", {fused_var_name});
|
||||
scale_desc.SetAttr("scale", scale);
|
||||
scale_desc.SetAttr("bias", bias);
|
||||
scale_desc.SetAttr("bias_after_scale", bias_after_scale);
|
||||
scale_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role);
|
||||
auto scale_node = graph->CreateOpNode(&scale_desc);
|
||||
|
||||
for (auto scale_op : scale_ops) {
|
||||
// set inputs
|
||||
scale_node->inputs.insert(scale_node->inputs.begin(),
|
||||
scale_op->inputs.begin(), scale_op->inputs.end());
|
||||
for (auto &input : scale_op->inputs) {
|
||||
std::replace(input->outputs.begin(), input->outputs.end(), scale_op,
|
||||
scale_node);
|
||||
}
|
||||
// set outputs
|
||||
scale_node->outputs.insert(scale_node->outputs.begin(),
|
||||
scale_op->outputs.begin(),
|
||||
scale_op->outputs.end());
|
||||
for (auto &output : scale_op->outputs) {
|
||||
std::replace(output->inputs.begin(), output->inputs.end(), scale_op,
|
||||
scale_node);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete scale_ops
|
||||
for (auto &scale_op : scale_ops) {
|
||||
graph->RemoveNode(scale_op);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(fuse_adam_op_pass, paddle::framework::details::FuseAdamOpPass)
|
||||
.RequirePassAttr(paddle::framework::details::kPlaces)
|
||||
.RequirePassAttr(paddle::framework::details::kLocalScopes);
|
@ -0,0 +1,55 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/build_strategy.h"
|
||||
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
|
||||
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class FuseAdamOpPass : public FuseOptimizerOpPass {
|
||||
private:
|
||||
virtual const std::string GetOpType() const;
|
||||
|
||||
virtual const std::vector<std::string> GetAuxiliaryVarNames() const;
|
||||
|
||||
// Fuse Adam Ops and Scale Ops which are used to update "Beta1Pow", "Beta2Pow"
|
||||
virtual void FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const;
|
||||
|
||||
void FuseAdamOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const;
|
||||
|
||||
void FuseScaleOps(const std::vector<std::string> &aux_var_set,
|
||||
const std::string &fused_var_name,
|
||||
const std::vector<ir::Node *> &adam_ops,
|
||||
ir::Graph *graph) const;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,240 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h"
|
||||
#include <algorithm>
|
||||
#include <unordered_set>
|
||||
#include "paddle/fluid/framework/ir/graph_helper.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
|
||||
ir::Graph &result = *graph;
|
||||
|
||||
auto &places = Get<const std::vector<platform::Place>>(kPlaces);
|
||||
auto &local_scopes = Get<const std::vector<Scope *>>(kLocalScopes);
|
||||
|
||||
const std::string fuse_op_type = GetOpType();
|
||||
const std::vector<std::string> aux_var_names = GetAuxiliaryVarNames();
|
||||
|
||||
// Step 1: Get the specified op and auxiliary variables.
|
||||
std::vector<ir::Node *> topo_nodes = ir::TopologySortOperations(result);
|
||||
std::unordered_map<std::string, std::vector<std::string>> aux_var_set;
|
||||
std::vector<ir::Node *> opt_ops;
|
||||
for (auto &node : topo_nodes) {
|
||||
GetSpecifiedOpsAndVars(fuse_op_type, aux_var_names, node, &opt_ops,
|
||||
&aux_var_set);
|
||||
}
|
||||
|
||||
VLOG(10) << "Find " << fuse_op_type << " operators: " << opt_ops.size();
|
||||
if (opt_ops.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (result.Has(kFusedOptType)) {
|
||||
VLOG(10)
|
||||
<< "Currently only support fusing one type optimizer op. Has fused "
|
||||
<< result.Get<FusedOptType>(kFusedOptType);
|
||||
return;
|
||||
} else {
|
||||
result.Set(kFusedOptType, new FusedOptType);
|
||||
}
|
||||
result.Get<FusedOptType>(kFusedOptType) = fuse_op_type;
|
||||
|
||||
// Step 2: Insert fused_var_name to FusedVars, and the FusedVars need be
|
||||
// initialized in scopes before execution.
|
||||
if (!result.Has(kFusedVars)) {
|
||||
result.Set(kFusedVars, new FusedVars);
|
||||
}
|
||||
std::unordered_map<std::string, std::string> fused_vars_name;
|
||||
fused_vars_name.reserve(aux_var_names.size() + 1);
|
||||
auto &fused_var_set = result.Get<FusedVars>(kFusedVars);
|
||||
const std::string prefix(kFusedVarNamePrefix);
|
||||
// NOTE: the fused_var_name should be unique.
|
||||
for (auto &var_name : aux_var_names) {
|
||||
auto fused_var_name = prefix + "_" + fuse_op_type + "_" + var_name + "_" +
|
||||
aux_var_set[var_name][0];
|
||||
VLOG(10) << fused_var_name;
|
||||
fused_vars_name.emplace(var_name, fused_var_name);
|
||||
PADDLE_ENFORCE_EQ(fused_var_set.count(fused_var_name), 0);
|
||||
fused_var_set.insert(fused_var_name);
|
||||
}
|
||||
|
||||
// Step 3: Get the fused Gradient's name
|
||||
auto ¶ms_grads = result.Get<ParamsAndGrads>(kParamsAndGrads);
|
||||
if (!result.Has(kFusedGrads)) {
|
||||
PADDLE_THROW(
|
||||
"The alloc_continuous_space_for_grad_pass should be called before this "
|
||||
"pass.");
|
||||
}
|
||||
auto &fused_grad = result.Get<FusedGrads>(kFusedGrads);
|
||||
auto &fused_vars = result.Get<FusedVars>(kFusedVars);
|
||||
auto iter = std::find(fused_vars.begin(), fused_vars.end(), fused_grad);
|
||||
PADDLE_ENFORCE(iter != fused_vars.end(), "Not find the fused_grad.");
|
||||
fused_vars_name.emplace("Grad", fused_grad);
|
||||
|
||||
// Step 4: Sort the parameters and auxiliary variables according
|
||||
// to parameters' name to make variables' name correspond correctly.
|
||||
PADDLE_ENFORCE(result.Has(kParamsAndGrads), "Does't find kParamsAndGrads.");
|
||||
PADDLE_ENFORCE_EQ(params_grads.size(), aux_var_set.begin()->second.size(),
|
||||
"The size of params_grads and aux_var_set are not equal.");
|
||||
SortParametersAndAuxVars(params_grads, &aux_var_set, &opt_ops);
|
||||
|
||||
// Step 5: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
|
||||
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops separately.
|
||||
InitFusedVarsAndAllocSpaceForVars(places, local_scopes, aux_var_names,
|
||||
aux_var_set, fused_vars_name);
|
||||
|
||||
// Step 6: Fuse optimizer Ops and Scale Ops
|
||||
FuseOptimizerOps(aux_var_set, fused_vars_name, opt_ops, &result);
|
||||
|
||||
// Step 7: Remove optimizer Ops
|
||||
for (auto &opt_op : opt_ops) {
|
||||
graph->RemoveNode(opt_op);
|
||||
}
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
|
||||
const std::vector<platform::Place> &places,
|
||||
const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<std::string> &aux_var_names,
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name) const {
|
||||
VLOG(10) << "Init FusedVars.";
|
||||
// Alloc parameters and auxiliary vars in the respective scope.
|
||||
size_t idx = local_scopes.size();
|
||||
for (auto iter = local_scopes.rbegin(); iter != local_scopes.rend();
|
||||
++iter, --idx) {
|
||||
auto &scope = *iter;
|
||||
for (auto &var_name : aux_var_names) {
|
||||
auto fused_var_name = fused_vars_name.at(var_name);
|
||||
VLOG(10) << "Init " << fused_var_name;
|
||||
PADDLE_ENFORCE(scope->FindVar(fused_var_name) == nullptr,
|
||||
"%s has exist in scope[%d]", fused_var_name, idx);
|
||||
scope->Var(fused_var_name)->GetMutable<LoDTensor>();
|
||||
}
|
||||
}
|
||||
|
||||
ProgramDesc program_desc;
|
||||
auto *global_block = program_desc.MutableBlock(0);
|
||||
for (auto &var_name : aux_var_names) {
|
||||
AppendAllocContinuousSpace(aux_var_set.at(var_name),
|
||||
fused_vars_name.at(var_name), true,
|
||||
global_block);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < local_scopes.size(); ++i) {
|
||||
for (auto &op_desc : global_block->AllOps()) {
|
||||
auto op = OpRegistry::CreateOp(*op_desc);
|
||||
op->Run(*local_scopes[i], places[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::SortParametersAndAuxVars(
|
||||
const std::vector<std::pair<std::string, std::string>> ¶ms_grads,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_vars_set,
|
||||
std::vector<ir::Node *> *ops) const {
|
||||
PADDLE_ENFORCE_NE(aux_vars_set->count("Param"), static_cast<size_t>(0));
|
||||
auto ¶m_vec = aux_vars_set->at("Param");
|
||||
|
||||
std::vector<size_t> param_sort_idx;
|
||||
param_sort_idx.reserve(param_vec.size());
|
||||
|
||||
for (auto &p_g : params_grads) {
|
||||
auto iter = std::find(param_vec.begin(), param_vec.end(), p_g.first);
|
||||
PADDLE_ENFORCE(iter != param_vec.end());
|
||||
auto idx = std::distance(param_vec.begin(), iter);
|
||||
param_sort_idx.emplace_back(idx);
|
||||
}
|
||||
|
||||
for (auto &aux_vars : *aux_vars_set) {
|
||||
std::vector<std::string> sorted_vars;
|
||||
sorted_vars.reserve(aux_vars.second.size());
|
||||
for (size_t i = 0; i < aux_vars.second.size(); ++i) {
|
||||
sorted_vars.emplace_back(aux_vars.second.at(param_sort_idx[i]));
|
||||
}
|
||||
std::swap(aux_vars.second, sorted_vars);
|
||||
|
||||
std::stringstream out;
|
||||
for (auto &var_name : aux_vars.second) {
|
||||
out << var_name << " ";
|
||||
}
|
||||
VLOG(10) << aux_vars.first << ": " << out.str();
|
||||
}
|
||||
|
||||
std::vector<ir::Node *> sorted_ops;
|
||||
sorted_ops.reserve(ops->size());
|
||||
for (size_t i = 0; i < ops->size(); ++i) {
|
||||
sorted_ops.emplace_back(ops->at(param_sort_idx[i]));
|
||||
}
|
||||
std::swap(*ops, sorted_ops);
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::GetSpecifiedOpsAndVars(
|
||||
const std::string &op_type, const std::vector<std::string> &aux_vars_name,
|
||||
ir::Node *node, std::vector<ir::Node *> *ops,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_args_name)
|
||||
const {
|
||||
if (node->Op()->Type() != op_type) return;
|
||||
|
||||
for (auto &var_n : aux_vars_name) {
|
||||
auto arg_names = node->Op()->Input(var_n);
|
||||
PADDLE_ENFORCE_EQ(arg_names.size(), static_cast<size_t>(1));
|
||||
(*aux_args_name)[var_n].emplace_back(arg_names[0]);
|
||||
VLOG(10) << var_n << ", " << arg_names[0];
|
||||
}
|
||||
ops->emplace_back(node);
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::AppendAllocContinuousSpace(
|
||||
const std::vector<std::string> &args, const std::string &out_arg,
|
||||
bool copy_data, BlockDesc *global_block) const {
|
||||
auto op_desc = global_block->AppendOp();
|
||||
op_desc->SetType("alloc_continuous_space");
|
||||
op_desc->SetInput("Input", args);
|
||||
op_desc->SetOutput("Output", args);
|
||||
op_desc->SetOutput("FusedOutput", {out_arg});
|
||||
op_desc->SetAttr("copy_data", copy_data);
|
||||
op_desc->SetAttr("check_name", true);
|
||||
}
|
||||
|
||||
void FuseOptimizerOpPass::InserInputAndOutputForOptOps(
|
||||
const std::vector<ir::Node *> &opt_ops, ir::Node *opt_node) const {
|
||||
std::unordered_set<ir::Node *> inputs;
|
||||
std::unordered_set<ir::Node *> outputs;
|
||||
for (auto opt_op : opt_ops) {
|
||||
// set inputs
|
||||
inputs.insert(opt_op->inputs.begin(), opt_op->inputs.end());
|
||||
for (auto &input : opt_op->inputs) {
|
||||
replace(input->outputs.begin(), input->outputs.end(), opt_op, opt_node);
|
||||
}
|
||||
// set outputs
|
||||
outputs.insert(opt_op->outputs.begin(), opt_op->outputs.end());
|
||||
for (auto &output : opt_op->outputs) {
|
||||
replace(output->inputs.begin(), output->inputs.end(), opt_op, opt_node);
|
||||
}
|
||||
}
|
||||
opt_node->inputs.insert(opt_node->inputs.begin(), inputs.begin(),
|
||||
inputs.end());
|
||||
opt_node->outputs.insert(opt_node->outputs.begin(), outputs.begin(),
|
||||
outputs.end());
|
||||
}
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,75 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/build_strategy.h"
|
||||
#include "paddle/fluid/framework/details/multi_devices_helper.h"
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class FuseOptimizerOpPass : public ir::Pass {
|
||||
protected:
|
||||
void ApplyImpl(ir::Graph *graph) const override;
|
||||
|
||||
protected:
|
||||
virtual void SortParametersAndAuxVars(
|
||||
const std::vector<std::pair<std::string, std::string>> ¶ms_grads,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_var_set,
|
||||
std::vector<ir::Node *> *ops) const;
|
||||
|
||||
void InserInputAndOutputForOptOps(const std::vector<ir::Node *> &opt_ops,
|
||||
ir::Node *opt_node) const;
|
||||
|
||||
private:
|
||||
virtual const std::string GetOpType() const = 0;
|
||||
|
||||
virtual const std::vector<std::string> GetAuxiliaryVarNames() const = 0;
|
||||
|
||||
virtual void FuseOptimizerOps(
|
||||
const std::unordered_map<std::string, std::vector<std::string>> &vars_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name,
|
||||
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const = 0;
|
||||
|
||||
void GetSpecifiedOpsAndVars(
|
||||
const std::string &op_type, const std::vector<std::string> &aux_vars_name,
|
||||
ir::Node *node, std::vector<ir::Node *> *ops,
|
||||
std::unordered_map<std::string, std::vector<std::string>> *aux_args_name)
|
||||
const;
|
||||
|
||||
void AppendAllocContinuousSpace(const std::vector<std::string> &args,
|
||||
const std::string &out_arg, bool copy_data,
|
||||
BlockDesc *global_block) const;
|
||||
|
||||
void InitFusedVarsAndAllocSpaceForVars(
|
||||
const std::vector<platform::Place> &places,
|
||||
const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<std::string> &aux_var_names,
|
||||
const std::unordered_map<std::string, std::vector<std::string>>
|
||||
&aux_var_set,
|
||||
const std::unordered_map<std::string, std::string> &fused_vars_name)
|
||||
const;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue