Add Much Complex Test and Fix Bugs for Control Flow cond API (#21532)
Add tests to use dy/dx to make sure the gradient values calculated by the control flow backward is correct. Also fixed bugs detected by those tests. Fix bugs: 1. Unlike sum_op, optimizer ops don't allow uninitialized input tensor. But in conditional_block_grad_op, since the conditional_block may not run, the output gradient tensor may be uninitialized, which will cause the optimizer op error. To fix it, we should let optimizer ops support uninitialized input like sum_op or assign the uninitialized gradient to 0 when the conditional_block_grad_op doesn't run. I found there are about 10+ optimizer ops. **To be simpler, I just assign output gradient of the conditional_block_grad_op to 0 in this PR**. But it can be further explored whether we can make optimizer ops like sum_op to support uninitialized input tensor because theoretically we can speed up without the assigning in conditional_block_grad_op. 2. Infer parameter shapes during append_backward. I didn't know that all our parameters are in global block. When op_desc is inferring shapes at the sub-block, it may not know the shape of gradients of parameters whose shape information is at global block. I fixed it by inferring shapes of gradients from forward var. This PR also did some code clean up: 1. Print the var name when sgd_op catches shape error so that it is easier to debug 2. Fix a typo: dicta -> dictpaddle_tiny_install
parent
c5aec2fe68
commit
1dcf6a7212
@ -0,0 +1,80 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/controlflow/conditional_block_op.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/framework/executor.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/framework/var_type.h"
|
||||
|
||||
USE_NO_KERNEL_OP(conditional_block);
|
||||
USE_NO_KERNEL_OP(conditional_block_grad);
|
||||
|
||||
using LoDTensor = paddle::framework::LoDTensor;
|
||||
using LoDTensorArray = paddle::framework::LoDTensorArray;
|
||||
using Scope = paddle::framework::Scope;
|
||||
using Variable = paddle::framework::Variable;
|
||||
using Place = paddle::platform::Place;
|
||||
|
||||
TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
|
||||
Place place = paddle::platform::CPUPlace();
|
||||
Scope scope;
|
||||
|
||||
Variable* cond_var = scope.Var("condition");
|
||||
LoDTensor* cond_tensor = cond_var->GetMutable<LoDTensor>();
|
||||
paddle::framework::DDim cond_dims = paddle::framework::make_ddim({1});
|
||||
bool* cond_data = cond_tensor->mutable_data<bool>(cond_dims, place);
|
||||
cond_data[0] = false;
|
||||
|
||||
Variable* input_var = scope.Var("input_lod_tensor_array");
|
||||
LoDTensorArray* input_tensors = input_var->GetMutable<LoDTensorArray>();
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
paddle::framework::DDim in_dims =
|
||||
paddle::framework::make_ddim({i + 1, i + 2});
|
||||
LoDTensor lod_tensor;
|
||||
float* in_data = lod_tensor.mutable_data<float>(in_dims, place);
|
||||
for (int j = 0; j < (i + 1) * (i + 2); ++j) {
|
||||
in_data[j] = static_cast<float>(j);
|
||||
}
|
||||
input_tensors->push_back(lod_tensor);
|
||||
}
|
||||
|
||||
Variable* input_grad_var = scope.Var("input_lod_tensor_array@GRAD");
|
||||
LoDTensorArray* grad_tensors = input_grad_var->GetMutable<LoDTensorArray>();
|
||||
grad_tensors->resize(5);
|
||||
|
||||
paddle::framework::AttributeMap attrs;
|
||||
attrs.insert({"is_scalar_condition", true});
|
||||
|
||||
auto conditional_grad_op = paddle::framework::OpRegistry::CreateOp(
|
||||
"conditional_block_grad",
|
||||
{{"Input", {"input_lod_tensor_array"}}, {"Cond", {"condition"}}},
|
||||
{{"Input@GRAD", {"input_lod_tensor_array@GRAD"}}}, attrs);
|
||||
|
||||
conditional_grad_op->Run(scope, place);
|
||||
|
||||
const LoDTensorArray& out_tensors = input_grad_var->Get<LoDTensorArray>();
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
paddle::framework::DDim out_dims = out_tensors[i].dims();
|
||||
EXPECT_EQ(paddle::framework::make_ddim({i + 1, i + 2}), out_dims);
|
||||
const float* out_data = out_tensors[i].data<float>();
|
||||
for (int j = 0; j < (i + 1) * (i + 2); ++j) {
|
||||
EXPECT_EQ(0, out_data[j]);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in new issue