Add double grad for conv_transpose (#29706)

* add double grad for conv_transpose
revert-31562-mean
LielinJiang 4 years ago committed by GitHub
parent 224f3bcbb1
commit e5af650b71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

File diff suppressed because it is too large Load Diff

@ -513,6 +513,85 @@ class ConvTransposeGradOpMaker : public framework::SingleGradOpMaker<T> {
}
};
/*
* Inputs: I, W, dO, ddI, ddW
* Outputs: ddO, dW, dI
*/
template <typename T>
class ConvTransposeDoubleGradMaker : public framework::SingleGradOpMaker<T> {
public:
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
void Apply(GradOpPtr<T> op) const override {
op->SetType(this->ForwardOpType() + "_grad");
// I, W, dO, ddI, ddW
op->SetInput("Input", this->Input("Input"));
op->SetInput("Filter", this->Input("Filter"));
op->SetInput("DOutput", this->Input(framework::GradVarName("Output")));
op->SetInput("DDInput", this->OutputGrad(framework::GradVarName("Input")));
op->SetInput("DDFilter",
this->OutputGrad(framework::GradVarName("Filter")));
// ddO, dI, dW
// Unlike grad op, double grad op does not use name@GRAD@GRAD
// as key of ops' inputs and outputs.
auto ddx = this->OutputGrad(framework::GradVarName("Input"));
auto ddw = this->OutputGrad(framework::GradVarName("Filter"));
op->SetOutput("DDOutput",
ddx.empty()
? this->EmptyInputGrad()
: this->InputGrad(framework::GradVarName("Output")));
op->SetOutput("DFilter", ddx.empty() ? this->EmptyInputGrad()
: this->InputGrad("Filter"));
op->SetOutput("DInput", ddw.empty() ? this->EmptyInputGrad()
: this->InputGrad("Input"));
op->SetAttrMap(this->Attrs());
}
};
void ConvTransposeOpDoubleGrad::InferShape(
framework::InferShapeContext* ctx) const {
auto x_dims = ctx->GetInputDim("Input");
auto w_dims = ctx->GetInputDim("Filter");
auto do_dims = ctx->GetInputDim("DOutput");
if (ctx->HasOutput("DDOutput") &&
(ctx->HasInput("DDInput") || (ctx->HasInput("DDFilter")))) {
ctx->SetOutputDim("DDOutput", do_dims);
}
if (ctx->HasOutput("DFilter") && ctx->HasInput("DDInput")) {
ctx->SetOutputDim("DFilter", w_dims);
}
if (ctx->HasOutput("DInput") && ctx->HasInput("DDFilter")) {
ctx->SetOutputDim("DInput", x_dims);
}
}
framework::OpKernelType ConvTransposeOpDoubleGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
use_cudnn &= platform::is_gpu_place(ctx.GetPlace());
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
use_cudnn &= dev_ctx.cudnn_handle() != nullptr;
}
#endif
framework::LibraryType library_;
if (use_cudnn) {
library_ = framework::LibraryType::kCUDNN;
} else {
library_ = framework::LibraryType::kPlain;
}
framework::DataLayout layout_ = framework::DataLayout::kAnyLayout;
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "Input"), ctx.GetPlace(),
layout_, library_);
}
} // namespace operators
} // namespace paddle
@ -523,7 +602,11 @@ REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp,
ops::Conv2DTransposeOpMaker,
ops::ConvTransposeGradOpMaker<paddle::framework::OpDesc>,
ops::ConvTransposeGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_transpose_grad, ops::ConvTransposeOpGrad);
REGISTER_OPERATOR(
conv2d_transpose_grad, ops::ConvTransposeOpGrad,
ops::ConvTransposeDoubleGradMaker<paddle::framework::OpDesc>,
ops::ConvTransposeDoubleGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(conv2d_transpose_grad_grad, ops::ConvTransposeOpDoubleGrad);
REGISTER_OP_CPU_KERNEL(
conv2d_transpose,

@ -24,6 +24,9 @@ REGISTER_OP_CUDA_KERNEL(conv2d_transpose,
REGISTER_OP_CUDA_KERNEL(conv2d_transpose_grad,
ops::GemmConvTransposeGradKernel<CUDA, float>,
ops::GemmConvTransposeGradKernel<CUDA, double>);
REGISTER_OP_CUDA_KERNEL(conv2d_transpose_grad_grad,
ops::GemmConvTransposeGradKernel<CUDA, float>,
ops::GemmConvTransposeGradKernel<CUDA, double>);
// conv3d
REGISTER_OP_CUDA_KERNEL(conv3d_transpose,

@ -114,6 +114,16 @@ class ConvTransposeOpGrad : public framework::OperatorWithKernel {
const framework::ExecutionContext& ctx) const override;
};
class ConvTransposeOpDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
template <typename DeviceContext, typename T>
class GemmConvTransposeKernel : public framework::OpKernel<T> {
public:

@ -0,0 +1,159 @@
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.core as core
import gradient_checker
from decorator_helper import prog_scope
class TestConvTransposeDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
shape = [2, 4, 3, 3]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
y = layers.conv2d_transpose(
x, 2, filter_size=1, groups=1, bias_attr=False)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
def test_grad(self):
places = []
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
class TestConvTranspose2DoubleGradCheck_AsyPadding(
TestConvTransposeDoubleGradCheck):
@prog_scope()
def func(self, place):
shape = [2, 2, 3, 3]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
y = layers.conv2d_transpose(
input=x,
num_filters=2,
filter_size=1,
padding=[1, 0, 0, 1],
bias_attr=False,
use_cudnn=True)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
class TestConvTranspose2DoubleGradCheck_PaddingSAME(
TestConvTransposeDoubleGradCheck):
@prog_scope()
def func(self, place):
shape = [2, 2, 3, 3]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
y = layers.conv2d_transpose(
input=x,
num_filters=2,
filter_size=1,
padding="SAME",
bias_attr=False,
use_cudnn=True)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
class TestConvTranspose2DoubleGradCheck_PaddingVALID(
TestConvTransposeDoubleGradCheck):
@prog_scope()
def func(self, place):
shape = [2, 2, 3, 3]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
y = layers.conv2d_transpose(
input=x,
num_filters=2,
filter_size=1,
padding="VALID",
bias_attr=False,
use_cudnn=True)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
class TestConvTranspose2DoubleGradCheck_ChannelLast(
TestConvTransposeDoubleGradCheck):
@prog_scope()
def func(self, place):
shape = [2, 3, 3, 2]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
y = layers.conv2d_transpose(
input=x,
num_filters=2,
filter_size=1,
padding=[1, 1],
bias_attr=False,
use_cudnn=True,
groups=1,
data_format="NHWC")
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
if __name__ == "__main__":
unittest.main()

@ -108,6 +108,7 @@ STATIC_MODE_TESTING_LIST = [
'test_conv3d_transpose_layer',
'test_conv3d_transpose_part2_op',
'test_conv_nn_grad',
'test_conv_transpose_nn_grad',
'test_conv_shift_op',
'test_cos_sim_op',
'test_create_global_var',

Loading…
Cancel
Save