You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
316 lines
13 KiB
316 lines
13 KiB
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#pragma once
|
|
|
|
#include <algorithm>
|
|
#include <iterator>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "paddle/fluid/framework/executor.h"
|
|
#include "paddle/fluid/framework/executor_cache.h"
|
|
#include "paddle/fluid/framework/op_desc.h"
|
|
#include "paddle/fluid/framework/op_registry.h"
|
|
#include "paddle/fluid/framework/operator.h"
|
|
#include "paddle/fluid/framework/program_desc.h"
|
|
#include "paddle/fluid/framework/scope.h"
|
|
#include "paddle/fluid/framework/var_type_traits.h"
|
|
#include "paddle/fluid/framework/variable.h"
|
|
#ifdef PADDLE_WITH_MKLDNN
|
|
#include "paddle/fluid/platform/mkldnn_helper.h"
|
|
#endif
|
|
|
|
DECLARE_bool(use_mkldnn);
|
|
|
|
namespace paddle {
|
|
namespace operators {
|
|
|
|
using StepScopeVar = std::vector<framework::Scope *>;
|
|
using BlockDesc = framework::BlockDesc;
|
|
|
|
using Variable = framework::Variable;
|
|
using LoDTensor = framework::LoDTensor;
|
|
using SelectedRows = framework::SelectedRows;
|
|
|
|
namespace details {
|
|
|
|
// all input vars should be LoDTensor & is initialized
|
|
static void CheckInputVarStatus(const Variable &var,
|
|
const std::string &var_name) {
|
|
PADDLE_ENFORCE_EQ(
|
|
var.IsType<LoDTensor>(), true,
|
|
platform::errors::InvalidArgument(
|
|
"The input variable %s of "
|
|
"RunProgram(Grad)Op holds "
|
|
"wrong type. Expect type is LoDTensor, but receive type is %s.",
|
|
var_name, platform::demangle(framework::ToTypeName(var.Type()))));
|
|
PADDLE_ENFORCE_EQ(
|
|
var.Get<LoDTensor>().IsInitialized(), true,
|
|
platform::errors::InvalidArgument("The tensor in input variable %s of "
|
|
"RunProgram(Grad)Op "
|
|
"is not initialized.",
|
|
var_name));
|
|
}
|
|
|
|
static void CheckOutputVarStatus(const Variable &src_var,
|
|
const Variable &dst_var,
|
|
const std::string &var_name) {
|
|
if (dst_var.IsType<LoDTensor>()) {
|
|
PADDLE_ENFORCE_EQ(
|
|
src_var.IsType<LoDTensor>(), true,
|
|
platform::errors::InvalidArgument(
|
|
"The output variable %s get from "
|
|
"RunProgram(Grad)Op's internal scope holds "
|
|
"wrong type. Expect type is LoDTensor, but receive type is %s.",
|
|
var_name,
|
|
platform::demangle(framework::ToTypeName(src_var.Type()))));
|
|
PADDLE_ENFORCE_EQ(src_var.Get<LoDTensor>().IsInitialized(), true,
|
|
platform::errors::InvalidArgument(
|
|
"The tensor in output variable %s get from "
|
|
"RunProgram(Grad)Op's internal "
|
|
"scope is not initialized.",
|
|
var_name));
|
|
} else if (dst_var.IsType<SelectedRows>()) {
|
|
PADDLE_ENFORCE_EQ(
|
|
src_var.IsType<SelectedRows>(), true,
|
|
platform::errors::InvalidArgument(
|
|
"The output variable %s get from "
|
|
"RunProgram(Grad)Op's internal scope holds "
|
|
"wrong type. Expect type is SelectedRows, but receive type is %s.",
|
|
var_name,
|
|
platform::demangle(framework::ToTypeName(src_var.Type()))));
|
|
PADDLE_ENFORCE_EQ(src_var.Get<SelectedRows>().value().IsInitialized(), true,
|
|
platform::errors::InvalidArgument(
|
|
"The tensor in output variable %s get from "
|
|
"RunProgram(Grad)Op's "
|
|
"internal scope is not initialized.",
|
|
var_name));
|
|
|
|
} else {
|
|
PADDLE_THROW(platform::errors::InvalidArgument(
|
|
"The RunProgram(Grad)Op only support output "
|
|
"variable of type LoDTensor or SelectedRows, "
|
|
"but received variable %s's type is %s",
|
|
var_name, platform::demangle(framework::ToTypeName(dst_var.Type()))));
|
|
}
|
|
}
|
|
|
|
static void VariableShare(const Variable &src_var, Variable *dst_var) {
|
|
// The previous check ensures that the variable type can only be LoDTensor or
|
|
// SelectedRows.
|
|
if (src_var.IsType<LoDTensor>()) {
|
|
auto *lod_tensor = dst_var->GetMutable<LoDTensor>();
|
|
lod_tensor->ShareDataWith(src_var.Get<LoDTensor>());
|
|
lod_tensor->set_lod(src_var.Get<LoDTensor>().lod());
|
|
} else if (src_var.IsType<SelectedRows>()) {
|
|
auto *selected_rows = dst_var->GetMutable<SelectedRows>();
|
|
selected_rows->mutable_value()->ShareDataWith(
|
|
src_var.Get<SelectedRows>().value());
|
|
selected_rows->set_rows(src_var.Get<SelectedRows>().rows());
|
|
selected_rows->set_height(src_var.Get<SelectedRows>().height());
|
|
}
|
|
}
|
|
|
|
static void ShareVarsIntoScope(const std::vector<Variable *> &vars,
|
|
const std::vector<std::string> &var_names,
|
|
framework::Scope *scope) {
|
|
for (size_t i = 0; i < vars.size(); ++i) {
|
|
auto *var = scope->Var(var_names[i]);
|
|
CheckInputVarStatus(*vars[i], var_names[i]);
|
|
VariableShare(*vars[i], var);
|
|
}
|
|
}
|
|
|
|
static void ShareVarsFromScope(const std::vector<Variable *> &vars,
|
|
const std::vector<std::string> &var_names,
|
|
framework::Scope *scope) {
|
|
for (size_t i = 0; i < vars.size(); ++i) {
|
|
if (var_names[i] == framework::kEmptyVarName) {
|
|
VLOG(2) << "find variable name is " << framework::kEmptyVarName
|
|
<< ", skip it!";
|
|
continue;
|
|
}
|
|
// NOTE: Here skip not found var is dangerous, if a bug is caused here,
|
|
// the result is grad calculation error, which will be very hidden!
|
|
auto *var = scope->FindVar(var_names[i]);
|
|
PADDLE_ENFORCE_NOT_NULL(
|
|
var, platform::errors::NotFound("The output variable %s is not in "
|
|
"RunProgram(Grad)Op'"
|
|
"s internal scope.",
|
|
var_names[i]));
|
|
CheckOutputVarStatus(*var, *vars[i], var_names[i]);
|
|
VariableShare(*var, vars[i]);
|
|
}
|
|
}
|
|
|
|
} // namespace details
|
|
|
|
template <typename DeviceContext, typename T>
|
|
class RunProgramOpKernel : public framework::OpKernel<T> {
|
|
public:
|
|
void Compute(const framework::ExecutionContext &ctx) const override {
|
|
VLOG(2) << "RunProgramOpKernel Compute";
|
|
// Step 1. prepare inputs, outputs, attrs
|
|
auto &input_vars = ctx.MultiInputVar("X");
|
|
auto ¶m_vars = ctx.MultiInputVar("Params");
|
|
auto output_vars = ctx.MultiOutputVar("Out");
|
|
|
|
auto input_var_names = ctx.InputNames("X");
|
|
auto output_var_names = ctx.OutputNames("Out");
|
|
|
|
// current program may not hold parameters
|
|
std::vector<std::string> param_names;
|
|
if (!param_vars.empty()) {
|
|
param_names = ctx.InputNames("Params");
|
|
}
|
|
|
|
auto start_op_index = ctx.Attr<int64_t>("start_op_index");
|
|
auto end_op_index = ctx.Attr<int64_t>("end_op_index");
|
|
auto is_test = ctx.Attr<bool>("is_test");
|
|
|
|
// NOTE(chenweihang): In order not to add new variable type, use vector
|
|
// here. Originally, here can use scope directly.
|
|
auto *out_scope_vec = ctx.Output<StepScopeVar>("OutScope");
|
|
PADDLE_ENFORCE_EQ(
|
|
out_scope_vec->size(), 1,
|
|
platform::errors::InvalidArgument(
|
|
"The OutScope of RunProgramGradOp should only hold one scope."));
|
|
|
|
// Step 2. prepare executor and init persistable variables
|
|
framework::Executor exe(ctx.GetPlace());
|
|
auto exe_ctx = framework::GetExecutorInfoFromCache(
|
|
exe, ctx, {output_var_names}, /*is_grad=*/false);
|
|
|
|
// NOTE(Aurelius84): While training some models, forward can be called many
|
|
// times and then apply backpropagation all at once, such as Reinforcement
|
|
// Learning. Tensor data in multi-step training should be saved into single
|
|
// scope separately. Otherwise, the gradients can be miscalculated because
|
|
// always using the Tensor data of the last step in forward.
|
|
framework::Scope *global_inner_scope = out_scope_vec->front();
|
|
VLOG(2) << "The number of sub scopes before forward: "
|
|
<< out_scope_vec->front()->kids().size();
|
|
framework::Scope &scope = global_inner_scope->NewScope();
|
|
|
|
// share input_vars & parameters into scope
|
|
details::ShareVarsIntoScope(input_vars, input_var_names, &scope);
|
|
details::ShareVarsIntoScope(param_vars, param_names, &scope);
|
|
|
|
// Step 3. run ops
|
|
exe.RunPartialPreparedContext(exe_ctx.get(), &scope, start_op_index,
|
|
end_op_index, /*create_local_scope=*/false,
|
|
/*create_vars=*/true,
|
|
/*keep_kids=*/!is_test);
|
|
|
|
// Step 4. Get Output
|
|
details::ShareVarsFromScope(output_vars, output_var_names, &scope);
|
|
|
|
// Debug info: scope info when run end
|
|
VLOG(3) << framework::GenScopeTreeDebugInfo(out_scope_vec->front());
|
|
// Step 5. Drop all children scopes while testing.
|
|
if (is_test) {
|
|
out_scope_vec->front()->DropKids();
|
|
}
|
|
VLOG(2) << "The number of sub scopes after forward: "
|
|
<< out_scope_vec->front()->kids().size();
|
|
#ifdef PADDLE_WITH_MKLDNN
|
|
if (FLAGS_use_mkldnn) DontClearMKLDNNCache(ctx.GetPlace());
|
|
#endif
|
|
}
|
|
};
|
|
|
|
template <typename DeviceContext, typename T>
|
|
class RunProgramGradOpKernel : public framework::OpKernel<T> {
|
|
public:
|
|
void Compute(const framework::ExecutionContext &ctx) const override {
|
|
VLOG(2) << "RunProgramGradOpKernel Compute";
|
|
// Step 1. prepare inputs and outputs
|
|
auto &output_grad_vars = ctx.MultiInputVar(framework::GradVarName("Out"));
|
|
auto input_grad_vars = ctx.MultiOutputVar(framework::GradVarName("X"));
|
|
auto param_grad_vars = ctx.MultiOutputVar(framework::GradVarName("Params"));
|
|
|
|
// if all output vars are set to stop_gradient, grad op no need to executed
|
|
if (input_grad_vars.empty() && param_grad_vars.empty()) return;
|
|
|
|
auto output_grad_var_names = ctx.InputNames(framework::GradVarName("Out"));
|
|
// NOTE: after PR22939 [Add double grad] merged, the grad op maker's
|
|
// SetOutput will set to None if the input var stop_gradient=True,
|
|
// it will cause an NotFound error when ctx.OutputNames() is called
|
|
std::vector<std::string> input_grad_var_names;
|
|
std::vector<std::string> param_grad_names;
|
|
if (!input_grad_vars.empty()) {
|
|
input_grad_var_names = ctx.OutputNames(framework::GradVarName("X"));
|
|
}
|
|
if (!param_grad_vars.empty()) {
|
|
param_grad_names = ctx.OutputNames(framework::GradVarName("Params"));
|
|
}
|
|
|
|
auto *block = ctx.Attr<BlockDesc *>("global_block");
|
|
auto orig_end_op_index = ctx.Attr<int64_t>("end_op_index");
|
|
// NOTE: skip `shape` and `fill_constant` op created by
|
|
// fluid.backward.gradients, one forward output will generate one `shape`
|
|
// and `fill_constant`
|
|
int64_t start_op_index = orig_end_op_index + (output_grad_vars.size() * 2);
|
|
int64_t end_op_index = block->OpSize();
|
|
|
|
auto *out_scope_vec = ctx.Input<StepScopeVar>("OutScope");
|
|
PADDLE_ENFORCE_EQ(
|
|
out_scope_vec->size(), 1,
|
|
platform::errors::InvalidArgument(
|
|
"The OutScope of RunProgramGradOp should only hold one scope."));
|
|
|
|
framework::Scope *global_inner_scope = out_scope_vec->front();
|
|
auto sub_scope_num = global_inner_scope->kids().size();
|
|
VLOG(2) << "The number of sub scopes before backward: " << sub_scope_num;
|
|
PADDLE_ENFORCE_GT(sub_scope_num, 0,
|
|
platform::errors::InvalidArgument(
|
|
"The OutScope of RunProgramGradOp should hold at "
|
|
"least one sub scope."));
|
|
|
|
auto &scope = *(global_inner_scope->kids().front());
|
|
|
|
// Step 2. prepare executor and scope
|
|
framework::Executor exe(ctx.GetPlace());
|
|
auto exe_ctx = framework::GetExecutorInfoFromCache(
|
|
exe, ctx, {input_grad_var_names, param_grad_names},
|
|
/*is_grad=*/true);
|
|
|
|
details::ShareVarsIntoScope(output_grad_vars, output_grad_var_names,
|
|
&scope);
|
|
// Debug info: scope info when run end
|
|
VLOG(3) << framework::GenScopeTreeDebugInfo(out_scope_vec->front());
|
|
|
|
// Step 3. run ops
|
|
exe.RunPartialPreparedContext(exe_ctx.get(), &scope, start_op_index,
|
|
end_op_index, /*create_local_scope=*/false,
|
|
/*create_vars=*/true, /*keep_kids=*/false);
|
|
|
|
// Step 4. get outputs
|
|
details::ShareVarsFromScope(input_grad_vars, input_grad_var_names, &scope);
|
|
details::ShareVarsFromScope(param_grad_vars, param_grad_names, &scope);
|
|
|
|
// Step5. drop current scope
|
|
global_inner_scope->DeleteScope(&scope);
|
|
VLOG(2) << "The number of sub scopes after backward: "
|
|
<< global_inner_scope->kids().size();
|
|
}
|
|
};
|
|
|
|
} // namespace operators
|
|
} // namespace paddle
|