|
|
|
@ -111,14 +111,16 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
|
|
|
|
|
context.Output<Tensor>(framework::GradVarName("Input"));
|
|
|
|
|
Tensor* filter_grad_ =
|
|
|
|
|
context.Output<Tensor>(framework::GradVarName("Filter"));
|
|
|
|
|
input_grad->mutable_data<T>(context.GetPlace());
|
|
|
|
|
filter_grad_->mutable_data<T>(context.GetPlace());
|
|
|
|
|
|
|
|
|
|
// The filter and filter_grad will be reshaped in the calculations,
|
|
|
|
|
// so here use an assignment operation,
|
|
|
|
|
// that avoids modifying the variable in the Scope.
|
|
|
|
|
Tensor filter = *context.Input<Tensor>("Filter");
|
|
|
|
|
Tensor filter_grad = *filter_grad_;
|
|
|
|
|
Tensor filter_grad;
|
|
|
|
|
if (filter_grad_) {
|
|
|
|
|
filter_grad_->mutable_data<T>(context.GetPlace());
|
|
|
|
|
filter_grad = *filter_grad_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::vector<int> strides = context.Attr<std::vector<int>>("strides");
|
|
|
|
|
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
|
|
|
|
@ -162,12 +164,20 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
|
|
|
|
|
framework::DDim filter_matrix_shape = {filter.dims()[0],
|
|
|
|
|
filter.numel() / filter.dims()[0]};
|
|
|
|
|
filter.Resize(filter_matrix_shape);
|
|
|
|
|
filter_grad.Resize(filter_matrix_shape);
|
|
|
|
|
|
|
|
|
|
auto t1 = framework::EigenVector<T>::Flatten(filter_grad);
|
|
|
|
|
t1.device(context.GetEigenDevice<Place>()) = t1.constant(static_cast<T>(0));
|
|
|
|
|
auto t2 = framework::EigenVector<T>::Flatten(*input_grad);
|
|
|
|
|
t2.device(context.GetEigenDevice<Place>()) = t2.constant(static_cast<T>(0));
|
|
|
|
|
if (filter_grad_) {
|
|
|
|
|
filter_grad.Resize(filter_matrix_shape);
|
|
|
|
|
auto t1 = framework::EigenVector<T>::Flatten(filter_grad);
|
|
|
|
|
t1.device(context.GetEigenDevice<Place>()) =
|
|
|
|
|
t1.constant(static_cast<T>(0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (input_grad) {
|
|
|
|
|
input_grad->mutable_data<T>(context.GetPlace());
|
|
|
|
|
auto t2 = framework::EigenVector<T>::Flatten(*input_grad);
|
|
|
|
|
t2.device(context.GetEigenDevice<Place>()) =
|
|
|
|
|
t2.constant(static_cast<T>(0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto* device_context =
|
|
|
|
|
const_cast<platform::DeviceContext*>(context.device_context_);
|
|
|
|
@ -176,35 +186,47 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
|
|
|
|
|
// convolution backward weight operator: im2col + gemm
|
|
|
|
|
int in_step = input_channels / groups;
|
|
|
|
|
int out_step = output_channels / groups;
|
|
|
|
|
Tensor in_grad_batch;
|
|
|
|
|
Tensor in_batch;
|
|
|
|
|
for (int i = 0; i < batch_size; i++) {
|
|
|
|
|
Tensor out_grad_batch =
|
|
|
|
|
output_grad->Slice<T>(i, i + 1).Resize(output_matrix_shape);
|
|
|
|
|
Tensor in_grad_batch = input_grad->Slice<T>(i, i + 1).Resize(input_shape);
|
|
|
|
|
Tensor in_batch = input->Slice<T>(i, i + 1).Resize(input_shape);
|
|
|
|
|
if (input_grad) {
|
|
|
|
|
in_grad_batch = input_grad->Slice<T>(i, i + 1).Resize(input_shape);
|
|
|
|
|
}
|
|
|
|
|
if (filter_grad_) {
|
|
|
|
|
in_batch = input->Slice<T>(i, i + 1).Resize(input_shape);
|
|
|
|
|
}
|
|
|
|
|
for (int g = 0; g < groups; g++) {
|
|
|
|
|
// gemm
|
|
|
|
|
Tensor out_grad_slice =
|
|
|
|
|
out_grad_batch.Slice<T>(g * out_step, (g + 1) * out_step);
|
|
|
|
|
Tensor filter_slice = filter.Slice<T>(g * out_step, (g + 1) * out_step);
|
|
|
|
|
math::matmul<Place, T>(filter_slice, true, out_grad_slice, false,
|
|
|
|
|
T(1.0), &col_matrix, T(0.0), device_context);
|
|
|
|
|
|
|
|
|
|
// col2im
|
|
|
|
|
Tensor in_grad_slice =
|
|
|
|
|
in_grad_batch.Slice<T>(g * in_step, (g + 1) * in_step);
|
|
|
|
|
col2im(in_grad_slice, col, strides[0], strides[1], paddings[0],
|
|
|
|
|
paddings[1], device_context);
|
|
|
|
|
|
|
|
|
|
// im2col
|
|
|
|
|
Tensor in_slice = in_batch.Slice<T>(g * in_step, (g + 1) * in_step);
|
|
|
|
|
im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1],
|
|
|
|
|
device_context);
|
|
|
|
|
|
|
|
|
|
// gemm
|
|
|
|
|
Tensor filter_grad_slice =
|
|
|
|
|
filter_grad.Slice<T>(g * out_step, (g + 1) * out_step);
|
|
|
|
|
math::matmul<Place, T>(out_grad_slice, false, col_matrix, true, T(1.0),
|
|
|
|
|
&filter_grad_slice, T(1.0), device_context);
|
|
|
|
|
if (input_grad) {
|
|
|
|
|
// gemm
|
|
|
|
|
Tensor filter_slice =
|
|
|
|
|
filter.Slice<T>(g * out_step, (g + 1) * out_step);
|
|
|
|
|
math::matmul<Place, T>(filter_slice, true, out_grad_slice, false,
|
|
|
|
|
T(1.0), &col_matrix, T(0.0), device_context);
|
|
|
|
|
|
|
|
|
|
// col2im
|
|
|
|
|
Tensor in_grad_slice =
|
|
|
|
|
in_grad_batch.Slice<T>(g * in_step, (g + 1) * in_step);
|
|
|
|
|
col2im(in_grad_slice, col, strides[0], strides[1], paddings[0],
|
|
|
|
|
paddings[1], device_context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (filter_grad_) {
|
|
|
|
|
// im2col
|
|
|
|
|
Tensor in_slice = in_batch.Slice<T>(g * in_step, (g + 1) * in_step);
|
|
|
|
|
im2col(in_slice, col, strides[0], strides[1], paddings[0],
|
|
|
|
|
paddings[1], device_context);
|
|
|
|
|
|
|
|
|
|
// gemm
|
|
|
|
|
Tensor filter_grad_slice =
|
|
|
|
|
filter_grad.Slice<T>(g * out_step, (g + 1) * out_step);
|
|
|
|
|
math::matmul<Place, T>(out_grad_slice, false, col_matrix, true,
|
|
|
|
|
T(1.0), &filter_grad_slice, T(1.0),
|
|
|
|
|
device_context);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|