Add pre-condition check for fuse optimizer op pass (#21005)

* add pre condition check for fuse optimizer op pass, test=develop

* add log & set init to zero, test=develop

* fix test_fuse_all_reduce_pass failed, test=develop

* polish details, test=develop

* refine PADDLE_ENFORCE & remove needless VLOG, test=develop

* refactor op check method, test=develop
custom_op_abi
Chen Weihang 6 years ago committed by GitHub
parent 6cc544aa28
commit 826254f664
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -55,25 +55,26 @@ class FuseOptimizerOpPass : public ir::Pass {
const std::unordered_map<std::string, std::string> &fused_vars_name,
const std::vector<ir::Node *> &adam_ops, ir::Graph *graph) const = 0;
void GetSpecifiedOpsAndVars(
void GetFusingVarNamesMap(
const std::vector<std::string> &aux_vars_name,
const std::vector<ir::Node *> &opt_nodes,
std::unordered_map<std::string, std::vector<std::string>> *aux_args_name)
const;
void AppendAllocContinuousSpace(const std::vector<std::string> &in_args,
const std::vector<std::string> &out_args,
const std::string &fused_out_arg,
const proto::VarType::Type &dtype,
BlockDesc *global_block, bool copy_data,
bool check_name = true) const;
void AppendCoalesceTensorOp(const std::vector<std::string> &in_args,
const std::vector<std::string> &out_args,
const std::string &fused_out_arg,
const proto::VarType::Type &dtype,
BlockDesc *global_block, bool copy_data,
bool check_name = true) const;
void InitFusedGradsAndAllocSpaceForGrads(
const std::vector<std::string> &params,
const std::vector<std::string> &grads, const std::string &fused_grad_name,
const proto::VarType::Type &dtype, ir::Graph *result) const;
void FuseGradientsToContinuousSpace(const std::vector<std::string> &params,
const std::vector<std::string> &grads,
const std::string &fused_grad_name,
const proto::VarType::Type &dtype,
ir::Graph *result) const;
void InitFusedVarsAndAllocSpaceForVars(
void FuseVarsToContinuousSpace(
const std::vector<std::string> &aux_var_names,
const std::unordered_map<std::string, std::vector<std::string>>
&aux_var_set,
@ -83,6 +84,12 @@ class FuseOptimizerOpPass : public ir::Pass {
std::unordered_map<std::string, std::vector<Node *>> GetVarInfo(
const Graph &result) const;
bool OpWithKernelSupportCPUAndGPU(const std::string &op_type) const;
bool GradGeneratedOpKernelCheck(
const std::unordered_map<std::string, std::vector<ir::Node *>> &vars_info,
const std::string &grad_var_name) const;
proto::VarType::Type GetDtypeOfVar(
const std::unordered_map<std::string, std::vector<ir::Node *>> &vars_info,
const std::string &name) const;

@ -24,7 +24,7 @@ namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class CoalesceTensorOp : public framework::OpKernel<T> {
class CoalesceTensorOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
auto &in_var_names = context.Inputs("Input");
@ -32,24 +32,39 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
auto &in_vars = context.MultiInputVar("Input");
auto out_vars = context.MultiOutputVar("Output");
PADDLE_ENFORCE_GT(in_var_names.size(), static_cast<size_t>(0));
PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size());
PADDLE_ENFORCE_GT(in_var_names.size(), static_cast<size_t>(0),
"The CoalesceTensorOp has no input.");
PADDLE_ENFORCE_EQ(
in_var_names.size(), out_var_names.size(),
"The number of CoalesceTensorOp's input and output is not match.");
// Input & Output check: only support LoDTensor
for (size_t i = 0; i < in_var_names.size(); ++i) {
// Only support LoDTensor
PADDLE_ENFORCE_NOT_NULL(in_vars[i], "%s should not be nullptr,",
in_var_names[i]);
PADDLE_ENFORCE_NOT_NULL(out_vars[i], "%s should not be nullptr,",
out_var_names[i]);
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensor>());
PADDLE_ENFORCE(out_vars[i]->IsType<framework::LoDTensor>());
PADDLE_ENFORCE_NOT_NULL(
in_vars[i],
"The input variable %s of CoalesceTensorOp does not exist.",
in_var_names[i]);
PADDLE_ENFORCE_NOT_NULL(
out_vars[i],
"The output variable %s of CoalesceTensorOp does not exist.",
out_var_names[i]);
PADDLE_ENFORCE_EQ(
in_vars[i]->IsType<framework::LoDTensor>(), true,
"The input variable %s of CoalesceTensorOp is not LoDTensor.",
in_var_names[i]);
PADDLE_ENFORCE_EQ(
out_vars[i]->IsType<framework::LoDTensor>(), true,
"The output variable %s of CoalesceTensorOp is not LoDTensor.",
in_var_names[i]);
}
auto in_tensors = context.MultiInput<framework::LoDTensor>("Input");
if (context.Attr<bool>("check_name")) {
for (size_t i = 0; i < in_var_names.size(); ++i) {
PADDLE_ENFORCE_EQ(in_var_names[i], out_var_names[i]);
PADDLE_ENFORCE_EQ(
in_var_names[i], out_var_names[i],
"The input and output variable of CoalesceTensorOp is different.");
}
} else {
// Init the output as input
@ -124,8 +139,8 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
std::stringstream ss;
ss << "alloc_space_for_vars: ";
for (size_t i = 0; i < var_names.size(); ++i) {
PADDLE_ENFORCE(lod_tensors[i]->IsInitialized(), "%s is not initialized.",
var_names[i]);
PADDLE_ENFORCE_EQ(lod_tensors[i]->IsInitialized(), true,
"%s is not initialized.", var_names[i]);
auto size = lod_tensors[i]->numel();
PADDLE_ENFORCE_GT(size, 0);
@ -140,14 +155,14 @@ class CoalesceTensorOp : public framework::OpKernel<T> {
}
};
class AllocContinuousSpaceOp : public framework::OperatorWithKernel {
class CoalesceTensorOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {}
};
class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
class CoalesceTensorOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Input",
@ -179,7 +194,7 @@ class AllocContinuousSpaceOpMaker : public framework::OpProtoAndCheckerMaker {
"they are the same separately.")
.SetDefault(false);
AddComment(R"DOC(
AllocContinuousSpace Operator.
CoalesceTensor Operator.
coalesce_tensor is used to make the address of Output
continuous according to the Input. This Op will alloc a big tensor
@ -200,22 +215,22 @@ setting the Output with a constant value.
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(coalesce_tensor, paddle::operators::AllocContinuousSpaceOp,
paddle::operators::AllocContinuousSpaceOpMaker);
REGISTER_OPERATOR(coalesce_tensor, paddle::operators::CoalesceTensorOp,
paddle::operators::CoalesceTensorOpMaker);
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CPU_KERNEL(
coalesce_tensor,
ops::CoalesceTensorOp<paddle::platform::CPUDeviceContext, plat::float16>,
ops::CoalesceTensorOp<paddle::platform::CPUDeviceContext, int>,
ops::CoalesceTensorOp<paddle::platform::CPUDeviceContext, float>,
ops::CoalesceTensorOp<paddle::platform::CPUDeviceContext, double>);
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::CoalesceTensorOpKernel<paddle::platform::CPUDeviceContext, double>);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL(
coalesce_tensor,
ops::CoalesceTensorOp<paddle::platform::CUDADeviceContext, plat::float16>,
ops::CoalesceTensorOp<paddle::platform::CUDADeviceContext, int>,
ops::CoalesceTensorOp<paddle::platform::CUDADeviceContext, float>,
ops::CoalesceTensorOp<paddle::platform::CUDADeviceContext, double>);
ops::CoalesceTensorOpKernel<paddle::platform::CUDADeviceContext,
plat::float16>,
ops::CoalesceTensorOpKernel<paddle::platform::CUDADeviceContext, int>,
ops::CoalesceTensorOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::CoalesceTensorOpKernel<paddle::platform::CUDADeviceContext, double>);
#endif

Loading…
Cancel
Save