Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into quan_ck
	
		
	
				
					
				
			test=developrevert-16555-model_data_cryption_link_all_lib
						commit
						d41b623a72
					
				| @ -0,0 +1,42 @@ | ||||
| # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| 
 | ||||
| INCLUDE(ExternalProject) | ||||
| 
 | ||||
| SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc") | ||||
| SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc") | ||||
| SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE) | ||||
| SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE) | ||||
| INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR}) | ||||
| 
 | ||||
| ExternalProject_Add( | ||||
|     extern_dgc | ||||
|     ${EXTERNAL_PROJECT_LOG_ARGS} | ||||
|     GIT_REPOSITORY "https://github.com/PaddlePaddle/Fleet" | ||||
|     GIT_TAG "2d04dc3800cdd0601f1b65d547dabcc60b0cf9dc" | ||||
|     SOURCE_DIR "${DGC_SOURCES_DIR}" | ||||
|     CONFIGURE_COMMAND "" | ||||
|     BUILD_COMMAND cd collective && make -j | ||||
|     INSTALL_COMMAND mkdir -p ${DGC_INSTALL_DIR}/lib/  ${DGC_INCLUDE_DIR}/dgc | ||||
|         && cp ${DGC_SOURCES_DIR}/collective/build/lib/libdgc.a ${DGC_LIBRARIES} | ||||
|         && cp ${DGC_SOURCES_DIR}/collective/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/ | ||||
|     BUILD_IN_SOURCE 1 | ||||
| ) | ||||
| 
 | ||||
| ADD_LIBRARY(dgc SHARED IMPORTED GLOBAL) | ||||
| SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES}) | ||||
| ADD_DEPENDENCIES(dgc extern_dgc) | ||||
| 
 | ||||
| LIST(APPEND external_project_dependencies dgc) | ||||
| 
 | ||||
| @ -0,0 +1,199 @@ | ||||
| //   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| //
 | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License.
 | ||||
| // You may obtain a copy of the License at
 | ||||
| //
 | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| //
 | ||||
| // Unless required by applicable law or agreed to in writing, software
 | ||||
| // distributed under the License is distributed on an "AS IS" BASIS,
 | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||||
| // See the License for the specific language governing permissions and
 | ||||
| // limitations under the License.
 | ||||
| 
 | ||||
| #include "paddle/fluid/framework/details/fuse_adam_op_pass.h" | ||||
| #include <algorithm> | ||||
| #include "paddle/fluid/framework/ir/graph_helper.h" | ||||
| #include "paddle/fluid/framework/op_registry.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace framework { | ||||
| namespace details { | ||||
| 
 | ||||
| const std::string FuseAdamOpPass::GetOpType() const { return "adam"; } | ||||
| 
 | ||||
| const std::vector<std::string> FuseAdamOpPass::GetAuxiliaryVarNames() const { | ||||
|   return {"Param", "Moment1", "Moment2", "Beta1Pow", "Beta2Pow"}; | ||||
| } | ||||
| 
 | ||||
| void FuseAdamOpPass::FuseOptimizerOps( | ||||
|     const std::unordered_map<std::string, std::vector<std::string>> | ||||
|         &aux_var_set, | ||||
|     const std::unordered_map<std::string, std::string> &fused_vars_name, | ||||
|     const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const { | ||||
|   FuseAdamOps(aux_var_set, fused_vars_name, adam_ops, graph); | ||||
|   FuseScaleOps(aux_var_set.at("Beta1Pow"), fused_vars_name.at("Beta1Pow"), | ||||
|                adam_ops, graph); | ||||
|   FuseScaleOps(aux_var_set.at("Beta2Pow"), fused_vars_name.at("Beta2Pow"), | ||||
|                adam_ops, graph); | ||||
| } | ||||
| 
 | ||||
| void FuseAdamOpPass::FuseAdamOps( | ||||
|     const std::unordered_map<std::string, std::vector<std::string>> &vars_set, | ||||
|     const std::unordered_map<std::string, std::string> &fused_vars_name, | ||||
|     const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const { | ||||
|   PADDLE_ENFORCE_GT(adam_ops.size(), static_cast<size_t>(0)); | ||||
| 
 | ||||
|   // Check attributions
 | ||||
|   // NOTE: If new attribution is added, the following code maybe need change.
 | ||||
|   int op_role = boost::get<int>( | ||||
|       adam_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())); | ||||
|   float beta1 = boost::get<float>(adam_ops[0]->Op()->GetAttr("beta1")); | ||||
|   float beta2 = boost::get<float>(adam_ops[0]->Op()->GetAttr("beta2")); | ||||
|   float epsilon = boost::get<float>(adam_ops[0]->Op()->GetAttr("epsilon")); | ||||
|   bool lazy_mode = boost::get<bool>(adam_ops[0]->Op()->GetAttr("lazy_mode")); | ||||
|   int64_t min_row_size_to_use_multithread = boost::get<int64_t>( | ||||
|       adam_ops[0]->Op()->GetAttr("min_row_size_to_use_multithread")); | ||||
|   for (auto &adam_op : adam_ops) { | ||||
|     PADDLE_ENFORCE_EQ(beta1, | ||||
|                       boost::get<float>(adam_op->Op()->GetAttr("beta1"))); | ||||
|     PADDLE_ENFORCE_EQ(beta2, | ||||
|                       boost::get<float>(adam_op->Op()->GetAttr("beta2"))); | ||||
|     PADDLE_ENFORCE_EQ(epsilon, | ||||
|                       boost::get<float>(adam_op->Op()->GetAttr("epsilon"))); | ||||
|     PADDLE_ENFORCE_EQ(lazy_mode, | ||||
|                       boost::get<bool>(adam_op->Op()->GetAttr("lazy_mode"))); | ||||
|     PADDLE_ENFORCE_EQ(min_row_size_to_use_multithread, | ||||
|                       boost::get<int64_t>(adam_op->Op()->GetAttr( | ||||
|                           "min_row_size_to_use_multithread"))); | ||||
|     PADDLE_ENFORCE_EQ(op_role, boost::get<int>(adam_op->Op()->GetAttr( | ||||
|                                    OpProtoAndCheckerMaker::OpRoleAttrName()))); | ||||
|   } | ||||
| 
 | ||||
|   // NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
 | ||||
|   // node.
 | ||||
| 
 | ||||
|   VLOG(10) << "Insert adam to graph "; | ||||
|   OpDesc adam_desc(adam_ops[0]->Op()->Block()); | ||||
|   adam_desc.SetType("adam"); | ||||
|   adam_desc.SetInput("Param", {fused_vars_name.at("Param")}); | ||||
|   adam_desc.SetInput("Grad", {fused_vars_name.at("Grad")}); | ||||
|   adam_desc.SetInput("Moment1", {fused_vars_name.at("Moment1")}); | ||||
|   adam_desc.SetInput("Moment2", {fused_vars_name.at("Moment2")}); | ||||
|   // TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
 | ||||
|   adam_desc.SetInput("LearningRate", adam_ops[0]->Op()->Input("LearningRate")); | ||||
|   adam_desc.SetInput("Beta1Pow", adam_ops[0]->Op()->Input("Beta1Pow")); | ||||
|   adam_desc.SetInput("Beta2Pow", adam_ops[0]->Op()->Input("Beta2Pow")); | ||||
| 
 | ||||
|   adam_desc.SetOutput("ParamOut", {fused_vars_name.at("Param")}); | ||||
|   adam_desc.SetOutput("Moment1Out", {fused_vars_name.at("Moment1")}); | ||||
|   adam_desc.SetOutput("Moment2Out", {fused_vars_name.at("Moment2")}); | ||||
|   adam_desc.SetAttr("beta1", beta1); | ||||
|   adam_desc.SetAttr("beta2", beta2); | ||||
|   adam_desc.SetAttr("epsilon", epsilon); | ||||
|   adam_desc.SetAttr("lazy_mode", lazy_mode); | ||||
|   adam_desc.SetAttr("min_row_size_to_use_multithread", | ||||
|                     min_row_size_to_use_multithread); | ||||
|   adam_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role); | ||||
| 
 | ||||
|   auto adam_node = graph->CreateOpNode(&adam_desc); | ||||
| 
 | ||||
|   InserInputAndOutputForOptOps(adam_ops, adam_node); | ||||
| } | ||||
| 
 | ||||
| void FuseAdamOpPass::FuseScaleOps(const std::vector<std::string> &beta_name, | ||||
|                                   const std::string &fused_var_name, | ||||
|                                   const std::vector<ir::Node *> &adam_ops, | ||||
|                                   ir::Graph *graph) const { | ||||
|   PADDLE_ENFORCE_EQ(beta_name.size(), adam_ops.size()); | ||||
|   const std::string scale_op_name = "scale"; | ||||
| 
 | ||||
|   // Get the scale_ops of dealing the adam's beta var.
 | ||||
|   std::vector<ir::Node *> scale_ops; | ||||
|   scale_ops.reserve(beta_name.size()); | ||||
|   for (size_t i = 0; i < adam_ops.size(); ++i) { | ||||
|     auto &beta_1_pow_name = beta_name[i]; | ||||
|     auto beta_pow_iter = std::find_if( | ||||
|         adam_ops[i]->inputs.begin(), adam_ops[i]->inputs.end(), | ||||
|         [&beta_name, &beta_1_pow_name](ir::Node *var_node) -> bool { | ||||
|           return var_node->Var() && var_node->Var()->Name() == beta_1_pow_name; | ||||
|         }); | ||||
|     PADDLE_ENFORCE(beta_pow_iter != adam_ops[i]->inputs.end()); | ||||
| 
 | ||||
|     auto beta_pow_node = *beta_pow_iter; | ||||
|     auto scale_op_iter = std::find_if( | ||||
|         beta_pow_node->outputs.begin(), beta_pow_node->outputs.end(), | ||||
|         [&scale_op_name](ir::Node *op_node) -> bool { | ||||
|           return op_node->Op() && op_node->Op()->Type() == scale_op_name; | ||||
|         }); | ||||
|     PADDLE_ENFORCE(scale_op_iter != beta_pow_node->outputs.end()); | ||||
| 
 | ||||
|     scale_ops.emplace_back(*scale_op_iter); | ||||
|   } | ||||
|   PADDLE_ENFORCE_EQ(scale_ops.size(), beta_name.size()); | ||||
| 
 | ||||
|   // Check attributions
 | ||||
|   // NOTE: If new attribution is added, the following code maybe need change.
 | ||||
|   int op_role = boost::get<int>( | ||||
|       scale_ops[0]->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())); | ||||
|   float scale = boost::get<float>(scale_ops[0]->Op()->GetAttr("scale")); | ||||
|   float bias = boost::get<float>(scale_ops[0]->Op()->GetAttr("bias")); | ||||
|   bool bias_after_scale = | ||||
|       boost::get<bool>(scale_ops[0]->Op()->GetAttr("bias_after_scale")); | ||||
|   for (auto &scale_op : scale_ops) { | ||||
|     PADDLE_ENFORCE_EQ(scale, | ||||
|                       boost::get<float>(scale_op->Op()->GetAttr("scale"))); | ||||
|     PADDLE_ENFORCE_EQ(bias, boost::get<float>(scale_op->Op()->GetAttr("bias"))); | ||||
|     PADDLE_ENFORCE_EQ( | ||||
|         bias_after_scale, | ||||
|         boost::get<bool>(scale_op->Op()->GetAttr("bias_after_scale"))); | ||||
|     PADDLE_ENFORCE_EQ(op_role, boost::get<int>(scale_op->Op()->GetAttr( | ||||
|                                    OpProtoAndCheckerMaker::OpRoleAttrName()))); | ||||
|   } | ||||
| 
 | ||||
|   // NOTE: fused_var is only exist in scope, so the graph doesn't have fused_var
 | ||||
|   // node.
 | ||||
| 
 | ||||
|   VLOG(10) << "Insert fused scale to graph."; | ||||
|   OpDesc scale_desc(scale_ops[0]->Op()->Block()); | ||||
|   scale_desc.SetType("scale"); | ||||
|   scale_desc.SetInput("X", {fused_var_name}); | ||||
|   scale_desc.SetOutput("Out", {fused_var_name}); | ||||
|   scale_desc.SetAttr("scale", scale); | ||||
|   scale_desc.SetAttr("bias", bias); | ||||
|   scale_desc.SetAttr("bias_after_scale", bias_after_scale); | ||||
|   scale_desc.SetAttr(OpProtoAndCheckerMaker::OpRoleAttrName(), op_role); | ||||
|   auto scale_node = graph->CreateOpNode(&scale_desc); | ||||
| 
 | ||||
|   for (auto scale_op : scale_ops) { | ||||
|     // set inputs
 | ||||
|     scale_node->inputs.insert(scale_node->inputs.begin(), | ||||
|                               scale_op->inputs.begin(), scale_op->inputs.end()); | ||||
|     for (auto &input : scale_op->inputs) { | ||||
|       std::replace(input->outputs.begin(), input->outputs.end(), scale_op, | ||||
|                    scale_node); | ||||
|     } | ||||
|     // set outputs
 | ||||
|     scale_node->outputs.insert(scale_node->outputs.begin(), | ||||
|                                scale_op->outputs.begin(), | ||||
|                                scale_op->outputs.end()); | ||||
|     for (auto &output : scale_op->outputs) { | ||||
|       std::replace(output->inputs.begin(), output->inputs.end(), scale_op, | ||||
|                    scale_node); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   // Delete scale_ops
 | ||||
|   for (auto &scale_op : scale_ops) { | ||||
|     graph->RemoveNode(scale_op); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| }  // namespace details
 | ||||
| }  // namespace framework
 | ||||
| }  // namespace paddle
 | ||||
| 
 | ||||
| REGISTER_PASS(fuse_adam_op_pass, paddle::framework::details::FuseAdamOpPass) | ||||
|     .RequirePassAttr(paddle::framework::details::kPlaces) | ||||
|     .RequirePassAttr(paddle::framework::details::kLocalScopes); | ||||
| @ -0,0 +1,55 @@ | ||||
| //   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| //
 | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License.
 | ||||
| // You may obtain a copy of the License at
 | ||||
| //
 | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| //
 | ||||
| // Unless required by applicable law or agreed to in writing, software
 | ||||
| // distributed under the License is distributed on an "AS IS" BASIS,
 | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||||
| // See the License for the specific language governing permissions and
 | ||||
| // limitations under the License.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <string> | ||||
| #include <unordered_map> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| #include "paddle/fluid/framework/details/build_strategy.h" | ||||
| #include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h" | ||||
| #include "paddle/fluid/framework/details/multi_devices_helper.h" | ||||
| #include "paddle/fluid/framework/ir/graph.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace framework { | ||||
| namespace details { | ||||
| 
 | ||||
| class FuseAdamOpPass : public FuseOptimizerOpPass { | ||||
|  private: | ||||
|   virtual const std::string GetOpType() const; | ||||
| 
 | ||||
|   virtual const std::vector<std::string> GetAuxiliaryVarNames() const; | ||||
| 
 | ||||
|   // Fuse Adam Ops and Scale Ops which are used to update "Beta1Pow", "Beta2Pow"
 | ||||
|   virtual void FuseOptimizerOps( | ||||
|       const std::unordered_map<std::string, std::vector<std::string>> &vars_set, | ||||
|       const std::unordered_map<std::string, std::string> &fused_vars_name, | ||||
|       const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const; | ||||
| 
 | ||||
|   void FuseAdamOps( | ||||
|       const std::unordered_map<std::string, std::vector<std::string>> &vars_set, | ||||
|       const std::unordered_map<std::string, std::string> &fused_vars_name, | ||||
|       const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const; | ||||
| 
 | ||||
|   void FuseScaleOps(const std::vector<std::string> &aux_var_set, | ||||
|                     const std::string &fused_var_name, | ||||
|                     const std::vector<ir::Node *> &adam_ops, | ||||
|                     ir::Graph *graph) const; | ||||
| }; | ||||
| 
 | ||||
| }  // namespace details
 | ||||
| }  // namespace framework
 | ||||
| }  // namespace paddle
 | ||||
| @ -0,0 +1,240 @@ | ||||
| //   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| //
 | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License.
 | ||||
| // You may obtain a copy of the License at
 | ||||
| //
 | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| //
 | ||||
| // Unless required by applicable law or agreed to in writing, software
 | ||||
| // distributed under the License is distributed on an "AS IS" BASIS,
 | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||||
| // See the License for the specific language governing permissions and
 | ||||
| // limitations under the License.
 | ||||
| 
 | ||||
| #include "paddle/fluid/framework/details/fuse_optimizer_op_pass.h" | ||||
| #include <algorithm> | ||||
| #include <unordered_set> | ||||
| #include "paddle/fluid/framework/ir/graph_helper.h" | ||||
| #include "paddle/fluid/framework/op_registry.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace framework { | ||||
| namespace details { | ||||
| 
 | ||||
| void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const { | ||||
|   ir::Graph &result = *graph; | ||||
| 
 | ||||
|   auto &places = Get<const std::vector<platform::Place>>(kPlaces); | ||||
|   auto &local_scopes = Get<const std::vector<Scope *>>(kLocalScopes); | ||||
| 
 | ||||
|   const std::string fuse_op_type = GetOpType(); | ||||
|   const std::vector<std::string> aux_var_names = GetAuxiliaryVarNames(); | ||||
| 
 | ||||
|   // Step 1: Get the specified op and auxiliary variables.
 | ||||
|   std::vector<ir::Node *> topo_nodes = ir::TopologySortOperations(result); | ||||
|   std::unordered_map<std::string, std::vector<std::string>> aux_var_set; | ||||
|   std::vector<ir::Node *> opt_ops; | ||||
|   for (auto &node : topo_nodes) { | ||||
|     GetSpecifiedOpsAndVars(fuse_op_type, aux_var_names, node, &opt_ops, | ||||
|                            &aux_var_set); | ||||
|   } | ||||
| 
 | ||||
|   VLOG(10) << "Find " << fuse_op_type << " operators: " << opt_ops.size(); | ||||
|   if (opt_ops.size() == 0) { | ||||
|     return; | ||||
|   } | ||||
| 
 | ||||
|   if (result.Has(kFusedOptType)) { | ||||
|     VLOG(10) | ||||
|         << "Currently only support fusing one type optimizer op. Has fused " | ||||
|         << result.Get<FusedOptType>(kFusedOptType); | ||||
|     return; | ||||
|   } else { | ||||
|     result.Set(kFusedOptType, new FusedOptType); | ||||
|   } | ||||
|   result.Get<FusedOptType>(kFusedOptType) = fuse_op_type; | ||||
| 
 | ||||
|   // Step 2: Insert fused_var_name to FusedVars, and the FusedVars need be
 | ||||
|   // initialized in scopes before execution.
 | ||||
|   if (!result.Has(kFusedVars)) { | ||||
|     result.Set(kFusedVars, new FusedVars); | ||||
|   } | ||||
|   std::unordered_map<std::string, std::string> fused_vars_name; | ||||
|   fused_vars_name.reserve(aux_var_names.size() + 1); | ||||
|   auto &fused_var_set = result.Get<FusedVars>(kFusedVars); | ||||
|   const std::string prefix(kFusedVarNamePrefix); | ||||
|   // NOTE: the fused_var_name should be unique.
 | ||||
|   for (auto &var_name : aux_var_names) { | ||||
|     auto fused_var_name = prefix + "_" + fuse_op_type + "_" + var_name + "_" + | ||||
|                           aux_var_set[var_name][0]; | ||||
|     VLOG(10) << fused_var_name; | ||||
|     fused_vars_name.emplace(var_name, fused_var_name); | ||||
|     PADDLE_ENFORCE_EQ(fused_var_set.count(fused_var_name), 0); | ||||
|     fused_var_set.insert(fused_var_name); | ||||
|   } | ||||
| 
 | ||||
|   // Step 3: Get the fused Gradient's name
 | ||||
|   auto ¶ms_grads = result.Get<ParamsAndGrads>(kParamsAndGrads); | ||||
|   if (!result.Has(kFusedGrads)) { | ||||
|     PADDLE_THROW( | ||||
|         "The alloc_continuous_space_for_grad_pass should be called before this " | ||||
|         "pass."); | ||||
|   } | ||||
|   auto &fused_grad = result.Get<FusedGrads>(kFusedGrads); | ||||
|   auto &fused_vars = result.Get<FusedVars>(kFusedVars); | ||||
|   auto iter = std::find(fused_vars.begin(), fused_vars.end(), fused_grad); | ||||
|   PADDLE_ENFORCE(iter != fused_vars.end(), "Not find the fused_grad."); | ||||
|   fused_vars_name.emplace("Grad", fused_grad); | ||||
| 
 | ||||
|   // Step 4: Sort the parameters and auxiliary variables according
 | ||||
|   // to parameters' name to make variables' name correspond correctly.
 | ||||
|   PADDLE_ENFORCE(result.Has(kParamsAndGrads), "Does't find kParamsAndGrads."); | ||||
|   PADDLE_ENFORCE_EQ(params_grads.size(), aux_var_set.begin()->second.size(), | ||||
|                     "The size of params_grads and aux_var_set are not equal."); | ||||
|   SortParametersAndAuxVars(params_grads, &aux_var_set, &opt_ops); | ||||
| 
 | ||||
|   // Step 5: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
 | ||||
|   // Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops separately.
 | ||||
|   InitFusedVarsAndAllocSpaceForVars(places, local_scopes, aux_var_names, | ||||
|                                     aux_var_set, fused_vars_name); | ||||
| 
 | ||||
|   // Step 6: Fuse optimizer Ops and Scale Ops
 | ||||
|   FuseOptimizerOps(aux_var_set, fused_vars_name, opt_ops, &result); | ||||
| 
 | ||||
|   // Step 7: Remove optimizer Ops
 | ||||
|   for (auto &opt_op : opt_ops) { | ||||
|     graph->RemoveNode(opt_op); | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars( | ||||
|     const std::vector<platform::Place> &places, | ||||
|     const std::vector<Scope *> &local_scopes, | ||||
|     const std::vector<std::string> &aux_var_names, | ||||
|     const std::unordered_map<std::string, std::vector<std::string>> | ||||
|         &aux_var_set, | ||||
|     const std::unordered_map<std::string, std::string> &fused_vars_name) const { | ||||
|   VLOG(10) << "Init FusedVars."; | ||||
|   // Alloc parameters and auxiliary vars in the respective scope.
 | ||||
|   size_t idx = local_scopes.size(); | ||||
|   for (auto iter = local_scopes.rbegin(); iter != local_scopes.rend(); | ||||
|        ++iter, --idx) { | ||||
|     auto &scope = *iter; | ||||
|     for (auto &var_name : aux_var_names) { | ||||
|       auto fused_var_name = fused_vars_name.at(var_name); | ||||
|       VLOG(10) << "Init " << fused_var_name; | ||||
|       PADDLE_ENFORCE(scope->FindVar(fused_var_name) == nullptr, | ||||
|                      "%s has exist in scope[%d]", fused_var_name, idx); | ||||
|       scope->Var(fused_var_name)->GetMutable<LoDTensor>(); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   ProgramDesc program_desc; | ||||
|   auto *global_block = program_desc.MutableBlock(0); | ||||
|   for (auto &var_name : aux_var_names) { | ||||
|     AppendAllocContinuousSpace(aux_var_set.at(var_name), | ||||
|                                fused_vars_name.at(var_name), true, | ||||
|                                global_block); | ||||
|   } | ||||
| 
 | ||||
|   for (size_t i = 0; i < local_scopes.size(); ++i) { | ||||
|     for (auto &op_desc : global_block->AllOps()) { | ||||
|       auto op = OpRegistry::CreateOp(*op_desc); | ||||
|       op->Run(*local_scopes[i], places[i]); | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| void FuseOptimizerOpPass::SortParametersAndAuxVars( | ||||
|     const std::vector<std::pair<std::string, std::string>> ¶ms_grads, | ||||
|     std::unordered_map<std::string, std::vector<std::string>> *aux_vars_set, | ||||
|     std::vector<ir::Node *> *ops) const { | ||||
|   PADDLE_ENFORCE_NE(aux_vars_set->count("Param"), static_cast<size_t>(0)); | ||||
|   auto ¶m_vec = aux_vars_set->at("Param"); | ||||
| 
 | ||||
|   std::vector<size_t> param_sort_idx; | ||||
|   param_sort_idx.reserve(param_vec.size()); | ||||
| 
 | ||||
|   for (auto &p_g : params_grads) { | ||||
|     auto iter = std::find(param_vec.begin(), param_vec.end(), p_g.first); | ||||
|     PADDLE_ENFORCE(iter != param_vec.end()); | ||||
|     auto idx = std::distance(param_vec.begin(), iter); | ||||
|     param_sort_idx.emplace_back(idx); | ||||
|   } | ||||
| 
 | ||||
|   for (auto &aux_vars : *aux_vars_set) { | ||||
|     std::vector<std::string> sorted_vars; | ||||
|     sorted_vars.reserve(aux_vars.second.size()); | ||||
|     for (size_t i = 0; i < aux_vars.second.size(); ++i) { | ||||
|       sorted_vars.emplace_back(aux_vars.second.at(param_sort_idx[i])); | ||||
|     } | ||||
|     std::swap(aux_vars.second, sorted_vars); | ||||
| 
 | ||||
|     std::stringstream out; | ||||
|     for (auto &var_name : aux_vars.second) { | ||||
|       out << var_name << " "; | ||||
|     } | ||||
|     VLOG(10) << aux_vars.first << ": " << out.str(); | ||||
|   } | ||||
| 
 | ||||
|   std::vector<ir::Node *> sorted_ops; | ||||
|   sorted_ops.reserve(ops->size()); | ||||
|   for (size_t i = 0; i < ops->size(); ++i) { | ||||
|     sorted_ops.emplace_back(ops->at(param_sort_idx[i])); | ||||
|   } | ||||
|   std::swap(*ops, sorted_ops); | ||||
| } | ||||
| 
 | ||||
| void FuseOptimizerOpPass::GetSpecifiedOpsAndVars( | ||||
|     const std::string &op_type, const std::vector<std::string> &aux_vars_name, | ||||
|     ir::Node *node, std::vector<ir::Node *> *ops, | ||||
|     std::unordered_map<std::string, std::vector<std::string>> *aux_args_name) | ||||
|     const { | ||||
|   if (node->Op()->Type() != op_type) return; | ||||
| 
 | ||||
|   for (auto &var_n : aux_vars_name) { | ||||
|     auto arg_names = node->Op()->Input(var_n); | ||||
|     PADDLE_ENFORCE_EQ(arg_names.size(), static_cast<size_t>(1)); | ||||
|     (*aux_args_name)[var_n].emplace_back(arg_names[0]); | ||||
|     VLOG(10) << var_n << ", " << arg_names[0]; | ||||
|   } | ||||
|   ops->emplace_back(node); | ||||
| } | ||||
| 
 | ||||
| void FuseOptimizerOpPass::AppendAllocContinuousSpace( | ||||
|     const std::vector<std::string> &args, const std::string &out_arg, | ||||
|     bool copy_data, BlockDesc *global_block) const { | ||||
|   auto op_desc = global_block->AppendOp(); | ||||
|   op_desc->SetType("alloc_continuous_space"); | ||||
|   op_desc->SetInput("Input", args); | ||||
|   op_desc->SetOutput("Output", args); | ||||
|   op_desc->SetOutput("FusedOutput", {out_arg}); | ||||
|   op_desc->SetAttr("copy_data", copy_data); | ||||
|   op_desc->SetAttr("check_name", true); | ||||
| } | ||||
| 
 | ||||
| void FuseOptimizerOpPass::InserInputAndOutputForOptOps( | ||||
|     const std::vector<ir::Node *> &opt_ops, ir::Node *opt_node) const { | ||||
|   std::unordered_set<ir::Node *> inputs; | ||||
|   std::unordered_set<ir::Node *> outputs; | ||||
|   for (auto opt_op : opt_ops) { | ||||
|     // set inputs
 | ||||
|     inputs.insert(opt_op->inputs.begin(), opt_op->inputs.end()); | ||||
|     for (auto &input : opt_op->inputs) { | ||||
|       replace(input->outputs.begin(), input->outputs.end(), opt_op, opt_node); | ||||
|     } | ||||
|     // set outputs
 | ||||
|     outputs.insert(opt_op->outputs.begin(), opt_op->outputs.end()); | ||||
|     for (auto &output : opt_op->outputs) { | ||||
|       replace(output->inputs.begin(), output->inputs.end(), opt_op, opt_node); | ||||
|     } | ||||
|   } | ||||
|   opt_node->inputs.insert(opt_node->inputs.begin(), inputs.begin(), | ||||
|                           inputs.end()); | ||||
|   opt_node->outputs.insert(opt_node->outputs.begin(), outputs.begin(), | ||||
|                            outputs.end()); | ||||
| } | ||||
| }  // namespace details
 | ||||
| }  // namespace framework
 | ||||
| }  // namespace paddle
 | ||||
| @ -0,0 +1,75 @@ | ||||
| //   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | ||||
| //
 | ||||
| // Licensed under the Apache License, Version 2.0 (the "License");
 | ||||
| // you may not use this file except in compliance with the License.
 | ||||
| // You may obtain a copy of the License at
 | ||||
| //
 | ||||
| //     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| //
 | ||||
| // Unless required by applicable law or agreed to in writing, software
 | ||||
| // distributed under the License is distributed on an "AS IS" BASIS,
 | ||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||||
| // See the License for the specific language governing permissions and
 | ||||
| // limitations under the License.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <unordered_map> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| #include "paddle/fluid/framework/details/build_strategy.h" | ||||
| #include "paddle/fluid/framework/details/multi_devices_helper.h" | ||||
| #include "paddle/fluid/framework/ir/graph.h" | ||||
| 
 | ||||
| namespace paddle { | ||||
| namespace framework { | ||||
| namespace details { | ||||
| 
 | ||||
| class FuseOptimizerOpPass : public ir::Pass { | ||||
|  protected: | ||||
|   void ApplyImpl(ir::Graph *graph) const override; | ||||
| 
 | ||||
|  protected: | ||||
|   virtual void SortParametersAndAuxVars( | ||||
|       const std::vector<std::pair<std::string, std::string>> ¶ms_grads, | ||||
|       std::unordered_map<std::string, std::vector<std::string>> *aux_var_set, | ||||
|       std::vector<ir::Node *> *ops) const; | ||||
| 
 | ||||
|   void InserInputAndOutputForOptOps(const std::vector<ir::Node *> &opt_ops, | ||||
|                                     ir::Node *opt_node) const; | ||||
| 
 | ||||
|  private: | ||||
|   virtual const std::string GetOpType() const = 0; | ||||
| 
 | ||||
|   virtual const std::vector<std::string> GetAuxiliaryVarNames() const = 0; | ||||
| 
 | ||||
|   virtual void FuseOptimizerOps( | ||||
|       const std::unordered_map<std::string, std::vector<std::string>> &vars_set, | ||||
|       const std::unordered_map<std::string, std::string> &fused_vars_name, | ||||
|       const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const = 0; | ||||
| 
 | ||||
|   void GetSpecifiedOpsAndVars( | ||||
|       const std::string &op_type, const std::vector<std::string> &aux_vars_name, | ||||
|       ir::Node *node, std::vector<ir::Node *> *ops, | ||||
|       std::unordered_map<std::string, std::vector<std::string>> *aux_args_name) | ||||
|       const; | ||||
| 
 | ||||
|   void AppendAllocContinuousSpace(const std::vector<std::string> &args, | ||||
|                                   const std::string &out_arg, bool copy_data, | ||||
|                                   BlockDesc *global_block) const; | ||||
| 
 | ||||
|   void InitFusedVarsAndAllocSpaceForVars( | ||||
|       const std::vector<platform::Place> &places, | ||||
|       const std::vector<Scope *> &local_scopes, | ||||
|       const std::vector<std::string> &aux_var_names, | ||||
|       const std::unordered_map<std::string, std::vector<std::string>> | ||||
|           &aux_var_set, | ||||
|       const std::unordered_map<std::string, std::string> &fused_vars_name) | ||||
|       const; | ||||
| }; | ||||
| 
 | ||||
| }  // namespace details
 | ||||
| }  // namespace framework
 | ||||
| }  // namespace paddle
 | ||||
Some files were not shown because too many files have changed in this diff Show More
					Loading…
					
					
				
		Reference in new issue