Double backward of conv2d. (#17211)

* Add conv2d_grad_grad_op
* Extracte the cuDNN conv algo searching code in conv_cudnn_helper.h.
    - Now use it in conv2d_grad_grad.
    - Will simply the searching code in conv2d and conv2d_grad in next PR.
* Enhance and fix bug in unit testing of gradient_checker.
* Support to fetch empty variables,return None in Python.
revert-17304-fix_default_paddle_version
qingqing01 6 years ago committed by GitHub
parent f456c8beb8
commit e32c9888f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -386,9 +386,10 @@ class ExecutionContext {
template <typename T>
T& GetKernelConfig(int idx) const {
PADDLE_ENFORCE(kernel_configs_ && kernel_configs_->size() > idx,
"%s selected kernel doesn't have kernel config %lu <= %d",
op_.Type().c_str(), kernel_configs_->size(), idx);
PADDLE_ENFORCE(
kernel_configs_ && kernel_configs_->size() > static_cast<size_t>(idx),
"%s selected kernel doesn't have kernel config %lu <= %d",
op_.Type().c_str(), kernel_configs_->size(), idx);
return *boost::get<std::shared_ptr<T>>(kernel_configs_->at(idx));
}

@ -644,6 +644,7 @@ class LeakyReluDoubleGrad : public framework::OperatorWithKernel {
//
// ReluGrad: dx = dy if y >= 0 else 0
// ReluGradGrad: ddy = ddx if y >= 0 else 0
// dy = 0
//
class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker {
public:
@ -655,11 +656,12 @@ class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpDescMaker {
op->SetType("relu_grad_grad");
// input1: Out
op->SetInput("Out", Input("Out"));
// X@GRAD@GRAD: ddx
// input2: ddx
op->SetInput("DDX", OutputGrad(framework::GradVarName("X")));
op->SetAttrMap(Attrs());
// Out@GRAD@GRAD: ddy
// output1: ddy
op->SetOutput("DOut", InputGrad("Out"));
// output2: ddy
op->SetOutput("DDOut", InputGrad(framework::GradVarName("Out")));
return std::unique_ptr<::paddle::framework::OpDesc>(op);
}

@ -54,7 +54,13 @@ class FetchOp : public framework::OperatorBase {
// FIXME(yuyang18): Should we assume the fetch operator always generate
// CPU outputs?
TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
if (src_item.IsInitialized() && src_item.numel() > 0) {
TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
} else {
// Not copy, if the src tensor is empty.
dst_item.clear();
dst_item.Resize({0});
}
dst_item.set_lod(src_item.lod());
VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -506,13 +506,100 @@ class Conv3DGradMaker : public framework::SingleGradOpDescMaker {
}
};
/*
* Inputs: I, W, dO, ddI, ddW
* Outputs: ddO, dW, dI
*/
class Conv2DDoubleGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType(this->ForwardOpType() + "_grad");
// I, W, dO, ddI, ddW
op->SetInput("Input", Input("Input"));
op->SetInput("Filter", Input("Filter"));
op->SetInput("DOutput", Input(framework::GradVarName("Output")));
op->SetInput("DDInput", OutputGrad(framework::GradVarName("Input")));
op->SetInput("DDFilter", OutputGrad(framework::GradVarName("Filter")));
// ddO, dI, dW
// Unlike grad op, double grad op does not use name@GRAD@GRAD
// as key of ops' inputs and outputs.
op->SetOutput("DDOutput", InputGrad(framework::GradVarName("Output")));
op->SetOutput("DFilter", InputGrad("Filter"));
op->SetOutput("DInput", InputGrad("Input"));
op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDesc>(op);
}
};
void ConvOpDoubleGrad::InferShape(framework::InferShapeContext* ctx) const {
auto x_dims = ctx->GetInputDim("Input");
auto w_dims = ctx->GetInputDim("Filter");
auto do_dims = ctx->GetInputDim("DOutput");
if (ctx->HasOutput("DDOutput")) {
ctx->SetOutputDim("DDOutput", do_dims);
}
if (ctx->HasOutput("DFilter")) {
ctx->SetOutputDim("DFilter", w_dims);
}
if (ctx->HasOutput("DInput")) {
ctx->SetOutputDim("DInput", x_dims);
}
}
framework::OpKernelType ConvOpDoubleGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const {
int customized_type_value =
framework::OpKernelType::kDefaultCustomizedTypeValue;
framework::LibraryType library_{framework::LibraryType::kPlain};
std::string data_format = ctx.Attr<std::string>("data_format");
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
#ifdef PADDLE_WITH_CUDA
if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN;
} else {
PADDLE_THROW("Now ConvDoubleGrad only supports cuDNN.");
}
#endif
auto type = framework::OpKernelType(ctx.Input<Tensor>("Input")->type(),
ctx.GetPlace(), layout_, library_,
customized_type_value);
#ifdef PADDLE_WITH_CUDA
if (library_ == framework::LibraryType::kCUDNN) {
std::vector<framework::KernelConfig>& configs = kernel_configs_map_[type];
if (configs.empty()) {
std::shared_ptr<framework::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>> p0(
new framework::AlgorithmsCache<cudnnConvolutionFwdAlgo_t>());
configs.push_back(p0);
std::shared_ptr<
framework::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>>
p1(new framework::AlgorithmsCache<cudnnConvolutionBwdFilterAlgo_t>());
configs.push_back(p1);
std::shared_ptr<framework::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>>
p2(new framework::AlgorithmsCache<cudnnConvolutionBwdDataAlgo_t>());
configs.push_back(p2);
}
}
#endif
return type;
}
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(conv2d, ops::ConvOp, ops::Conv2DOpMaker,
ops::ConvOpInferVarType, ops::Conv2DGradMaker);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad);
REGISTER_OPERATOR(conv2d_grad, ops::ConvOpGrad, ops::Conv2DDoubleGradMaker);
REGISTER_OPERATOR(conv2d_grad_grad, ops::ConvOpDoubleGrad);
// depthwise convolution op
REGISTER_OPERATOR(depthwise_conv2d, ops::ConvOp, ops::Conv2DOpMaker,

@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
@ -107,6 +108,16 @@ class ConvOpGrad : public framework::OperatorWithKernel {
const framework::ExecutionContext& ctx) const override;
};
class ConvOpDoubleGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override;
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override;
};
template <typename DeviceContext, typename T>
class GemmConvKernel : public framework::OpKernel<T> {
public:

@ -29,13 +29,14 @@ namespace platform {
using framework::Tensor;
template <typename T>
cudnnDataType_t ToCudnnDataType(const T& t) {
inline cudnnDataType_t ToCudnnDataType(const T& t) {
auto type = framework::ToDataType(t);
return ToCudnnDataType(type);
}
template <>
cudnnDataType_t ToCudnnDataType(const framework::proto::VarType::Type& t) {
inline cudnnDataType_t ToCudnnDataType(
const framework::proto::VarType::Type& t) {
cudnnDataType_t type = CUDNN_DATA_FLOAT;
switch (t) {
case framework::proto::VarType::FP16:
@ -59,14 +60,14 @@ class ActivationDescriptor {
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t));
CUDNN_ENFORCE(dynload::cudnnDestroyActivationDescriptor(t));
t = nullptr;
}
}
};
ActivationDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr));
CUDNN_ENFORCE(dynload::cudnnCreateActivationDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
template <typename T>
@ -88,14 +89,14 @@ class TensorDescriptor {
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t));
CUDNN_ENFORCE(dynload::cudnnDestroyTensorDescriptor(t));
t = nullptr;
}
}
};
TensorDescriptor() {
T* raw_ptr;
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
CUDNN_ENFORCE(dynload::cudnnCreateTensorDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
@ -111,7 +112,7 @@ class TensorDescriptor {
if (groups > 1) {
dims_with_group[1] = dims_with_group[1] / groups;
}
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
CUDNN_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
desc_.get(), ToCudnnDataType(tensor.type()), dims_with_group.size(),
dims_with_group.data(), strides.data()));
}
@ -120,5 +121,83 @@ class TensorDescriptor {
std::unique_ptr<T, Deleter> desc_;
};
class FilterDescriptor {
public:
using T = cudnnFilterStruct;
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
CUDNN_ENFORCE(dynload::cudnnDestroyFilterDescriptor(t));
t = nullptr;
}
}
};
FilterDescriptor() {
T* raw_ptr;
CUDNN_ENFORCE(dynload::cudnnCreateFilterDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
T* desc() const { return desc_.get(); }
void set(const Tensor& tensor, const cudnnTensorFormat_t format,
const int groups = 1) {
auto dims = framework::vectorize2int(tensor.dims());
if (groups > 1) {
dims[1] = dims[1] / groups;
}
CUDNN_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
desc_.get(), ToCudnnDataType(tensor.type()), format, dims.size(),
dims.data()));
}
private:
std::unique_ptr<T, Deleter> desc_;
};
class ConvolutionDescriptor {
public:
using T = cudnnConvolutionStruct;
struct Deleter {
void operator()(T* t) {
if (t != nullptr) {
CUDNN_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(t));
t = nullptr;
}
}
};
ConvolutionDescriptor() {
T* raw_ptr;
CUDNN_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&raw_ptr));
desc_.reset(raw_ptr);
}
T* desc() { return desc_.get(); }
T* desc() const { return desc_.get(); }
void set(cudnnDataType_t dtype, const std::vector<int>& pads,
const std::vector<int>& strides, const std::vector<int>& dilations,
const int groups = 1) {
cudnnDataType_t compute_type =
(dtype == CUDNN_DATA_DOUBLE) ? CUDNN_DATA_DOUBLE : CUDNN_DATA_FLOAT;
T* desc = desc_.get();
CUDNN_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor(
desc, pads.size(), pads.data(), strides.data(), dilations.data(),
CUDNN_CROSS_CORRELATION, compute_type));
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_DEFAULT_MATH));
#if CUDNN_VERSION_MIN(7, 0, 1)
CUDNN_ENFORCE(
platform::dynload::cudnnSetConvolutionGroupCount(desc, groups));
if (dtype == CUDNN_DATA_HALF) {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
desc, CUDNN_TENSOR_OP_MATH));
}
#endif
}
private:
std::unique_ptr<T, Deleter> desc_;
};
} // namespace platform
} // namespace paddle

@ -472,6 +472,9 @@ inline std::string TensorDTypeToPyDTypeStr(
} // namespace details
inline py::array TensorToPyArray(const framework::Tensor &tensor) {
if (!tensor.IsInitialized()) {
return py::array();
}
bool is_gpu_tensor = platform::is_gpu_place(tensor.place());
const auto &tensor_dims = tensor.dims();
auto tensor_dtype = tensor.type();

@ -119,7 +119,10 @@ def as_numpy(tensor):
They can not be completely cast to Python ndarray. \
Please set the parameter 'return_numpy' as 'False' to \
return LoDTensor itself directly.")
return np.array(tensor)
if tensor._is_initialized():
return np.array(tensor)
else:
return None
def has_feed_operators(block, feed_targets, feed_holder_name):

@ -82,6 +82,10 @@ def set_var_in_scope(scope, place, name, value, recursive_seq_len=None):
return t
def var_to_np_array_in_scope(scope, place, name):
return np.array(scope.var(name).get_tensor())
def make_jacobian(x, y_size, np_dtype):
if isinstance(x, fluid.framework.Variable):
return np.zeros((_product(x.shape), y_size), dtype=np_dtype)
@ -192,14 +196,18 @@ def _compute_analytical_jacobian(program, x, y, place, scope):
x = _as_list(x)
jacobian = make_jacobian(x, y_size, np_type)
dx = _as_list(dx)
for i in six.moves.xrange(y_size):
_set_item(dy_t, i, 1, np_type)
dx_res = exe.run(program, scope=scope, fetch_list=dx)
for j in six.moves.xrange(len(x)):
jacobian[j][:, i] = dx_res[j].flatten()
if dx_res[j] is not None:
jacobian[j][:, i] = dx_res[j].flatten()
else:
jacobian[j][:, i] = np.zeros(
dx[j].shape, dtype=np_type).flatten()
_set_item(dy_t, i, 0, np_type)
return jacobian
@ -242,6 +250,7 @@ def grad_check(x,
# check input arguments
x = _as_list(x)
y = _as_list(y)
for v in x:
v.stop_gradient = False
v.persistable = True
@ -274,9 +283,24 @@ def grad_check(x,
]
# [y_idx, x_idx]
analytical = [
_compute_analytical_jacobian(program, x, yi, place, scope) for yi in y
]
analytical = []
for yi in y:
prog = program.clone()
clone_x = []
clone_y = None
for b in prog.blocks:
if b.has_var(yi.name):
clone_y = b.var(yi.name)
break
for xi in x:
for b in prog.blocks:
if b.has_var(xi.name):
clone_x.append(b.var(xi.name))
break
analytical.append(
_compute_analytical_jacobian(prog, clone_x, clone_y, place, scope))
for i, (x_idx,
y_idx) in enumerate(product(*[range(len(x)), range(len(y))])):
@ -334,6 +358,7 @@ def double_grad_check(x,
if y_grads is None:
scope = fluid.executor.global_scope()
y_grads = []
y_grads_init = []
for yi in y:
dyi_name = _append_grad_suffix_(yi.name)
np_type = dtype_to_np_dtype(yi.dtype)
@ -343,9 +368,20 @@ def double_grad_check(x,
v = np.random.random(size=yi.shape).astype(np_type)
set_var_in_scope(scope, place, dyi_name, v)
y_grads.append(dy)
y_grads_init.append(v)
else:
y_grads = _as_list(y_grads)
y_grads_init = [
var_to_np_array_in_scope(scope, place, v.name) for v in y_grads
]
# append first order grads
target_grads = calc_gradient(y, x, y_grads)
# y_grads are the input of first-order backward,
# so, they are also the input of second-order backward.
x += y_grads
x_init = _as_list(x_init)
x_init += y_grads_init
grad_check(x, target_grads, x_init, place, program, eps, atol, rtol)

@ -46,7 +46,6 @@ class TestMulGradCheck(unittest.TestCase):
class TestReluDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
# the shape of input variable shoule be clearly specified, not inlcude -1.
shape = [2, 8]
eps = 0.005
dtype = np.float64
@ -71,7 +70,6 @@ class TestReluDoubleGradCheck(unittest.TestCase):
class TestLeakyReluDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
# the shape of input variable shoule be clearly specified, not inlcude -1.
shape = [3, 7]
eps = 0.005
alpha = 0.2
@ -79,6 +77,7 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
x = layers.data('x', shape, False, dtype)
x.persistable = True
y = layers.leaky_relu(x, alpha=alpha)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
x_arr[np.abs(x_arr) < 0.005] = 0.02
@ -90,8 +89,30 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)
class TestConvDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
shape = [2, 4, 14, 16]
eps = 0.005
dtype = np.float64
x = layers.data('x', shape, False, dtype)
y = layers.conv2d(x, 4, 1, bias_attr=False)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
w = fluid.default_main_program().global_block().all_parameters()
w_arr = []
for p in w:
w_arr.append(np.random.uniform(-1, 1, p.shape).astype(dtype))
gradient_checker.double_grad_check(
[x] + w, y, x_init=[x_arr] + w_arr, place=place, eps=eps)
def test_grad(self):
if core.is_compiled_with_cuda():
places = [fluid.CUDAPlace(0)]
for p in places:
self.func(p)
if __name__ == "__main__":

Loading…
Cancel
Save