commit
						7a891a3321
					
				@ -0,0 +1,133 @@
 | 
				
			|||||||
 | 
					/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					   you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					   You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					   distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					   See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					   limitations under the License. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "paddle/operators/gemm_conv2d_op.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace paddle {
 | 
				
			||||||
 | 
					namespace operators {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int outputSize(int input_size, int filter_size, int padding, int stride) {
 | 
				
			||||||
 | 
					  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
 | 
				
			||||||
 | 
					  return output_size;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Conv2DOp : public framework::OperatorWithKernel {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  using framework::OperatorWithKernel::OperatorWithKernel;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 protected:
 | 
				
			||||||
 | 
					  void InferShape(const framework::InferShapeContext &ctx) const override {
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Input"),
 | 
				
			||||||
 | 
					                            "Input(Input) of Conv2DOp should not be null.");
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Filter"),
 | 
				
			||||||
 | 
					                            "Input(Filter) of Conv2DOp should not be null.");
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Output"),
 | 
				
			||||||
 | 
					                            "Output(Output) of Conv2DOp should not be null.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto in = ctx.Input<Tensor>("Input");
 | 
				
			||||||
 | 
					    auto filter = ctx.Input<Tensor>("Filter");
 | 
				
			||||||
 | 
					    auto out = ctx.Output<framework::LoDTensor>("Output");
 | 
				
			||||||
 | 
					    std::vector<int> strides = Attr<std::vector<int>>("strides");
 | 
				
			||||||
 | 
					    std::vector<int> paddings = Attr<std::vector<int>>("paddings");
 | 
				
			||||||
 | 
					    int groups = Attr<int>("groups");
 | 
				
			||||||
 | 
					    int input_channels = in->dims()[1];
 | 
				
			||||||
 | 
					    int output_channels = filter->dims()[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_EQ(in->dims().size(), 4, "Conv2DOp input should be 4-D.");
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_EQ(filter->dims().size(), 4,
 | 
				
			||||||
 | 
					                      "Conv2DOp filter should be 4-D.");
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_EQ(input_channels, filter->dims()[1] * groups,
 | 
				
			||||||
 | 
					                      "The number of input channels should be equal to filter "
 | 
				
			||||||
 | 
					                      "channels * groups.");
 | 
				
			||||||
 | 
					    PADDLE_ENFORCE_EQ(
 | 
				
			||||||
 | 
					        output_channels % groups, 0,
 | 
				
			||||||
 | 
					        "The number of output channels should be divided by groups.");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto output_height =
 | 
				
			||||||
 | 
					        outputSize(in->dims()[2], filter->dims()[2], paddings[0], strides[0]);
 | 
				
			||||||
 | 
					    auto output_width =
 | 
				
			||||||
 | 
					        outputSize(in->dims()[3], filter->dims()[3], paddings[1], strides[1]);
 | 
				
			||||||
 | 
					    out->Resize(
 | 
				
			||||||
 | 
					        {in->dims()[0], filter->dims()[0], output_height, output_width});
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Conv2DOpMaker : public framework::OpProtoAndCheckerMaker {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  Conv2DOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
 | 
				
			||||||
 | 
					      : OpProtoAndCheckerMaker(proto, op_checker) {
 | 
				
			||||||
 | 
					    AddInput(
 | 
				
			||||||
 | 
					        "Input",
 | 
				
			||||||
 | 
					        "The input tensor of convolution operator. "
 | 
				
			||||||
 | 
					        "The format of input tensor is NCHW. Where N is batch size, C is the "
 | 
				
			||||||
 | 
					        "number of channels, H and W is the height and width of image.");
 | 
				
			||||||
 | 
					    AddInput(
 | 
				
			||||||
 | 
					        "Filter",
 | 
				
			||||||
 | 
					        "The filter tensor of convolution operator."
 | 
				
			||||||
 | 
					        "The format of the filter tensor is MCHW, where M is the number of "
 | 
				
			||||||
 | 
					        "output image channels, C is the number of input image channels, "
 | 
				
			||||||
 | 
					        "H and W is height and width of filter. "
 | 
				
			||||||
 | 
					        "If the groups attribute is greater than 1, C equal the number of "
 | 
				
			||||||
 | 
					        "input image channels divided by the groups.");
 | 
				
			||||||
 | 
					    AddOutput("Output",
 | 
				
			||||||
 | 
					              "The output tensor of convolution operator."
 | 
				
			||||||
 | 
					              "The format of output tensor is also NCHW.");
 | 
				
			||||||
 | 
					    AddAttr<std::vector<int>>("strides", "strides of convolution operator.")
 | 
				
			||||||
 | 
					        .SetDefault({1, 1});
 | 
				
			||||||
 | 
					    AddAttr<std::vector<int>>("paddings", "paddings of convolution operator.")
 | 
				
			||||||
 | 
					        .SetDefault({0, 0});
 | 
				
			||||||
 | 
					    AddAttr<int>(
 | 
				
			||||||
 | 
					        "groups",
 | 
				
			||||||
 | 
					        "group size of convolution operator. "
 | 
				
			||||||
 | 
					        "Refer to grouped convolution in Alex Krizhevsky's paper: "
 | 
				
			||||||
 | 
					        "when group=2, the first half of the filters are only connected to the "
 | 
				
			||||||
 | 
					        "first half of the input channels, and the second half only connected "
 | 
				
			||||||
 | 
					        "to the second half.")
 | 
				
			||||||
 | 
					        .SetDefault(1);
 | 
				
			||||||
 | 
					    AddComment(R"DOC(
 | 
				
			||||||
 | 
					The convolution operation calculates the output based on the input, filter
 | 
				
			||||||
 | 
					and strides, paddings, groups parameters. The size of each dimension of the
 | 
				
			||||||
 | 
					parameters is checked in the infer-shape.
 | 
				
			||||||
 | 
					)DOC");
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Conv2DOpGrad : public framework::OperatorWithKernel {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  using framework::OperatorWithKernel::OperatorWithKernel;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 protected:
 | 
				
			||||||
 | 
					  void InferShape(const framework::InferShapeContext &ctx) const override {
 | 
				
			||||||
 | 
					    auto in = ctx.Input<Tensor>("Input");
 | 
				
			||||||
 | 
					    auto filter = ctx.Input<Tensor>("Filter");
 | 
				
			||||||
 | 
					    auto d_in =
 | 
				
			||||||
 | 
					        ctx.Output<framework::LoDTensor>(framework::GradVarName("Input"));
 | 
				
			||||||
 | 
					    auto d_filter =
 | 
				
			||||||
 | 
					        ctx.Output<framework::LoDTensor>(framework::GradVarName("Filter"));
 | 
				
			||||||
 | 
					    if (d_in) d_in->Resize(in->dims());
 | 
				
			||||||
 | 
					    if (d_filter) d_filter->Resize(filter->dims());
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace operators
 | 
				
			||||||
 | 
					}  // namespace paddle
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace ops = paddle::operators;
 | 
				
			||||||
 | 
					REGISTER_OP(conv2d, ops::Conv2DOp, ops::Conv2DOpMaker, conv2d_grad,
 | 
				
			||||||
 | 
					            ops::Conv2DOpGrad);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					REGISTER_OP_CPU_KERNEL(
 | 
				
			||||||
 | 
					    conv2d, ops::GemmConv2DKernel<paddle::platform::CPUPlace, float>);
 | 
				
			||||||
 | 
					REGISTER_OP_CPU_KERNEL(
 | 
				
			||||||
 | 
					    conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::CPUPlace, float>);
 | 
				
			||||||
@ -0,0 +1,22 @@
 | 
				
			|||||||
 | 
					/* Copyright (c) 2016 PaddlePaddle Authors All Rights Reserve.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					   you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					   You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					   distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					   See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					   limitations under the License. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "paddle/operators/gemm_conv2d_op.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace ops = paddle::operators;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					REGISTER_OP_GPU_KERNEL(
 | 
				
			||||||
 | 
					    conv2d, ops::GemmConv2DKernel<paddle::platform::GPUPlace, float>);
 | 
				
			||||||
 | 
					REGISTER_OP_GPU_KERNEL(
 | 
				
			||||||
 | 
					    conv2d_grad, ops::GemmConvGrad2DKernel<paddle::platform::GPUPlace, float>);
 | 
				
			||||||
@ -0,0 +1,231 @@
 | 
				
			|||||||
 | 
					/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||||
 | 
					you may not use this file except in compliance with the License.
 | 
				
			||||||
 | 
					You may obtain a copy of the License at
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Unless required by applicable law or agreed to in writing, software
 | 
				
			||||||
 | 
					distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||||
 | 
					WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
 | 
					See the License for the specific language governing permissions and
 | 
				
			||||||
 | 
					limitations under the License. */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#pragma once
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "paddle/framework/eigen.h"
 | 
				
			||||||
 | 
					#include "paddle/framework/op_registry.h"
 | 
				
			||||||
 | 
					#include "paddle/operators/math/im2col.h"
 | 
				
			||||||
 | 
					#include "paddle/operators/math/math_function.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					namespace paddle {
 | 
				
			||||||
 | 
					namespace operators {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					using Tensor = framework::Tensor;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Place, typename T>
 | 
				
			||||||
 | 
					class GemmConv2DKernel : public framework::OpKernel {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  void Compute(const framework::ExecutionContext& context) const override {
 | 
				
			||||||
 | 
					    const Tensor* input = context.Input<Tensor>("Input");
 | 
				
			||||||
 | 
					    // The filter will be reshaped in the calculations,
 | 
				
			||||||
 | 
					    // so here use an assignment operation,
 | 
				
			||||||
 | 
					    // that avoids modifying the variable in the Scope.
 | 
				
			||||||
 | 
					    Tensor filter = *context.Input<Tensor>("Filter");
 | 
				
			||||||
 | 
					    Tensor* output = context.Output<Tensor>("Output");
 | 
				
			||||||
 | 
					    output->mutable_data<T>(context.GetPlace());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
 | 
				
			||||||
 | 
					    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
 | 
				
			||||||
 | 
					    int groups = context.Attr<int>("groups");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int batch_size = input->dims()[0];
 | 
				
			||||||
 | 
					    int input_channels = input->dims()[1];
 | 
				
			||||||
 | 
					    int filter_height = filter.dims()[filter.dims().size() - 2];
 | 
				
			||||||
 | 
					    int filter_width = filter.dims()[filter.dims().size() - 1];
 | 
				
			||||||
 | 
					    int output_channels = output->dims()[1];
 | 
				
			||||||
 | 
					    int output_height = output->dims()[2];
 | 
				
			||||||
 | 
					    int output_width = output->dims()[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    paddle::operators::math::Im2ColFunctor<
 | 
				
			||||||
 | 
					        paddle::operators::math::ColFormat::kCFO, Place, T>
 | 
				
			||||||
 | 
					        im2col;
 | 
				
			||||||
 | 
					    // use col_shape in the im2col calculation
 | 
				
			||||||
 | 
					    framework::DDim col_shape = {input_channels / groups, filter_height,
 | 
				
			||||||
 | 
					                                 filter_width, output_height, output_width};
 | 
				
			||||||
 | 
					    // use col_matrix_shape in the gemm calculation
 | 
				
			||||||
 | 
					    framework::DDim col_matrix_shape = {
 | 
				
			||||||
 | 
					        input_channels / groups * filter_height * filter_width,
 | 
				
			||||||
 | 
					        output_height * output_width};
 | 
				
			||||||
 | 
					    Tensor col;
 | 
				
			||||||
 | 
					    col.mutable_data<T>(col_shape, context.GetPlace());
 | 
				
			||||||
 | 
					    // col_matrix shares the same piece of data with col,
 | 
				
			||||||
 | 
					    // but will be reshaped into a two-dimensional matrix shape
 | 
				
			||||||
 | 
					    // to call the matrix multiplication interface.
 | 
				
			||||||
 | 
					    Tensor col_matrix = col;
 | 
				
			||||||
 | 
					    col_matrix.Resize(col_matrix_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    framework::DDim input_shape = {input->dims()[1], input->dims()[2],
 | 
				
			||||||
 | 
					                                   input->dims()[3]};
 | 
				
			||||||
 | 
					    framework::DDim filter_matrix_shape = {filter.dims()[0],
 | 
				
			||||||
 | 
					                                           filter.numel() / filter.dims()[0]};
 | 
				
			||||||
 | 
					    filter.Resize(filter_matrix_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    framework::DDim output_matrix_shape = {output_channels,
 | 
				
			||||||
 | 
					                                           output_height * output_width};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto* device_context =
 | 
				
			||||||
 | 
					        const_cast<platform::DeviceContext*>(context.device_context_);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // convolution operator: im2col + gemm
 | 
				
			||||||
 | 
					    int in_step = input_channels / groups;
 | 
				
			||||||
 | 
					    int out_step = output_channels / groups;
 | 
				
			||||||
 | 
					    for (int i = 0; i < batch_size; i++) {
 | 
				
			||||||
 | 
					      Tensor in_batch = input->Slice<T>(i, i + 1).Resize(input_shape);
 | 
				
			||||||
 | 
					      Tensor out_batch = output->Slice<T>(i, i + 1).Resize(output_matrix_shape);
 | 
				
			||||||
 | 
					      for (int g = 0; g < groups; g++) {
 | 
				
			||||||
 | 
					        // im2col
 | 
				
			||||||
 | 
					        Tensor in_slice = in_batch.Slice<T>(g * in_step, (g + 1) * in_step);
 | 
				
			||||||
 | 
					        im2col(in_slice, col, strides[0], strides[1], paddings[0], paddings[1],
 | 
				
			||||||
 | 
					               device_context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // gemm
 | 
				
			||||||
 | 
					        Tensor out_slice = out_batch.Slice<T>(g * out_step, (g + 1) * out_step);
 | 
				
			||||||
 | 
					        Tensor filter_slice = filter.Slice<T>(g * out_step, (g + 1) * out_step);
 | 
				
			||||||
 | 
					        math::matmul<Place, T>(filter_slice, false, col_matrix, false, T(1.0),
 | 
				
			||||||
 | 
					                               &out_slice, T(0.0), device_context);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					template <typename Place, typename T>
 | 
				
			||||||
 | 
					class GemmConvGrad2DKernel : public framework::OpKernel {
 | 
				
			||||||
 | 
					 public:
 | 
				
			||||||
 | 
					  void Compute(const framework::ExecutionContext& context) const override {
 | 
				
			||||||
 | 
					    const Tensor* input = context.Input<Tensor>("Input");
 | 
				
			||||||
 | 
					    const Tensor* output_grad =
 | 
				
			||||||
 | 
					        context.Input<Tensor>(framework::GradVarName("Output"));
 | 
				
			||||||
 | 
					    Tensor* input_grad =
 | 
				
			||||||
 | 
					        context.Output<Tensor>(framework::GradVarName("Input"));
 | 
				
			||||||
 | 
					    Tensor* filter_grad =
 | 
				
			||||||
 | 
					        context.Output<Tensor>(framework::GradVarName("Filter"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // The filter and filter_grad will be reshaped in the calculations,
 | 
				
			||||||
 | 
					    // so here use an assignment operation,
 | 
				
			||||||
 | 
					    // that avoids modifying the variable in the Scope.
 | 
				
			||||||
 | 
					    Tensor filter = *context.Input<Tensor>("Filter");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<int> strides = context.Attr<std::vector<int>>("strides");
 | 
				
			||||||
 | 
					    std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
 | 
				
			||||||
 | 
					    int groups = context.Attr<int>("groups");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int batch_size = input->dims()[0];
 | 
				
			||||||
 | 
					    int input_channels = input->dims()[1];
 | 
				
			||||||
 | 
					    int filter_height = filter.dims()[filter.dims().size() - 2];
 | 
				
			||||||
 | 
					    int filter_width = filter.dims()[filter.dims().size() - 1];
 | 
				
			||||||
 | 
					    int output_channels = output_grad->dims()[1];
 | 
				
			||||||
 | 
					    int output_height = output_grad->dims()[2];
 | 
				
			||||||
 | 
					    int output_width = output_grad->dims()[3];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    paddle::operators::math::Col2ImFunctor<
 | 
				
			||||||
 | 
					        paddle::operators::math::ColFormat::kCFO, Place, T>
 | 
				
			||||||
 | 
					        col2im;
 | 
				
			||||||
 | 
					    paddle::operators::math::Im2ColFunctor<
 | 
				
			||||||
 | 
					        paddle::operators::math::ColFormat::kCFO, Place, T>
 | 
				
			||||||
 | 
					        im2col;
 | 
				
			||||||
 | 
					    // use col_shape in the im2col and col2im calculation
 | 
				
			||||||
 | 
					    framework::DDim col_shape = {input_channels / groups, filter_height,
 | 
				
			||||||
 | 
					                                 filter_width, output_height, output_width};
 | 
				
			||||||
 | 
					    // use col_matrix_shape in the gemm calculation
 | 
				
			||||||
 | 
					    framework::DDim col_matrix_shape = {
 | 
				
			||||||
 | 
					        input_channels / groups * filter_height * filter_width,
 | 
				
			||||||
 | 
					        output_height * output_width};
 | 
				
			||||||
 | 
					    Tensor col;
 | 
				
			||||||
 | 
					    col.mutable_data<T>(col_shape, context.GetPlace());
 | 
				
			||||||
 | 
					    // col_matrix shares the same piece of data with col,
 | 
				
			||||||
 | 
					    // but will be reshaped into a two-dimensional matrix shape
 | 
				
			||||||
 | 
					    // to call the matrix multiplication interface.
 | 
				
			||||||
 | 
					    Tensor col_matrix = col;
 | 
				
			||||||
 | 
					    col_matrix.Resize(col_matrix_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    framework::DDim input_shape = {input->dims()[1], input->dims()[2],
 | 
				
			||||||
 | 
					                                   input->dims()[3]};
 | 
				
			||||||
 | 
					    framework::DDim output_matrix_shape = {
 | 
				
			||||||
 | 
					        output_grad->dims()[1],
 | 
				
			||||||
 | 
					        output_grad->dims()[2] * output_grad->dims()[3]};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    framework::DDim filter_matrix_shape = {filter.dims()[0],
 | 
				
			||||||
 | 
					                                           filter.numel() / filter.dims()[0]};
 | 
				
			||||||
 | 
					    filter.Resize(filter_matrix_shape);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    auto* device_context =
 | 
				
			||||||
 | 
					        const_cast<platform::DeviceContext*>(context.device_context_);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // convolution backward input operator:  gemm + col2im
 | 
				
			||||||
 | 
					    // convolution backward weight operator: im2col + gemm
 | 
				
			||||||
 | 
					    int in_step = input_channels / groups;
 | 
				
			||||||
 | 
					    int out_step = output_channels / groups;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (input_grad) {
 | 
				
			||||||
 | 
					      input_grad->mutable_data<T>(context.GetPlace());
 | 
				
			||||||
 | 
					      auto t = framework::EigenVector<T>::Flatten(*input_grad);
 | 
				
			||||||
 | 
					      t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for (int i = 0; i < batch_size; i++) {
 | 
				
			||||||
 | 
					        Tensor out_grad_batch =
 | 
				
			||||||
 | 
					            output_grad->Slice<T>(i, i + 1).Resize(output_matrix_shape);
 | 
				
			||||||
 | 
					        Tensor in_grad_batch =
 | 
				
			||||||
 | 
					            input_grad->Slice<T>(i, i + 1).Resize(input_shape);
 | 
				
			||||||
 | 
					        for (int g = 0; g < groups; g++) {
 | 
				
			||||||
 | 
					          // gemm
 | 
				
			||||||
 | 
					          Tensor out_grad_slice =
 | 
				
			||||||
 | 
					              out_grad_batch.Slice<T>(g * out_step, (g + 1) * out_step);
 | 
				
			||||||
 | 
					          Tensor filter_slice =
 | 
				
			||||||
 | 
					              filter.Slice<T>(g * out_step, (g + 1) * out_step);
 | 
				
			||||||
 | 
					          math::matmul<Place, T>(filter_slice, true, out_grad_slice, false,
 | 
				
			||||||
 | 
					                                 T(1.0), &col_matrix, T(0.0), device_context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          // col2im
 | 
				
			||||||
 | 
					          Tensor in_grad_slice =
 | 
				
			||||||
 | 
					              in_grad_batch.Slice<T>(g * in_step, (g + 1) * in_step);
 | 
				
			||||||
 | 
					          col2im(in_grad_slice, col, strides[0], strides[1], paddings[0],
 | 
				
			||||||
 | 
					                 paddings[1], device_context);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (filter_grad) {
 | 
				
			||||||
 | 
					      filter_grad->mutable_data<T>(context.GetPlace());
 | 
				
			||||||
 | 
					      Tensor filter_grad_ = *filter_grad;
 | 
				
			||||||
 | 
					      filter_grad_.Resize(filter_matrix_shape);
 | 
				
			||||||
 | 
					      auto t = framework::EigenVector<T>::Flatten(filter_grad_);
 | 
				
			||||||
 | 
					      t.device(context.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      for (int i = 0; i < batch_size; i++) {
 | 
				
			||||||
 | 
					        Tensor out_grad_batch =
 | 
				
			||||||
 | 
					            output_grad->Slice<T>(i, i + 1).Resize(output_matrix_shape);
 | 
				
			||||||
 | 
					        Tensor in_batch = input->Slice<T>(i, i + 1).Resize(input_shape);
 | 
				
			||||||
 | 
					        for (int g = 0; g < groups; g++) {
 | 
				
			||||||
 | 
					          // im2col
 | 
				
			||||||
 | 
					          Tensor out_grad_slice =
 | 
				
			||||||
 | 
					              out_grad_batch.Slice<T>(g * out_step, (g + 1) * out_step);
 | 
				
			||||||
 | 
					          Tensor in_slice = in_batch.Slice<T>(g * in_step, (g + 1) * in_step);
 | 
				
			||||||
 | 
					          im2col(in_slice, col, strides[0], strides[1], paddings[0],
 | 
				
			||||||
 | 
					                 paddings[1], device_context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					          // gemm
 | 
				
			||||||
 | 
					          Tensor filter_grad_slice =
 | 
				
			||||||
 | 
					              filter_grad_.Slice<T>(g * out_step, (g + 1) * out_step);
 | 
				
			||||||
 | 
					          math::matmul<Place, T>(out_grad_slice, false, col_matrix, true,
 | 
				
			||||||
 | 
					                                 T(1.0), &filter_grad_slice, T(1.0),
 | 
				
			||||||
 | 
					                                 device_context);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}  // namespace operators
 | 
				
			||||||
 | 
					}  // namespace paddle
 | 
				
			||||||
@ -0,0 +1,94 @@
 | 
				
			|||||||
 | 
					import unittest
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					from op_test import OpTest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestConv2dOp(OpTest):
 | 
				
			||||||
 | 
					    def setUp(self):
 | 
				
			||||||
 | 
					        self.init_groups()
 | 
				
			||||||
 | 
					        self.op_type = "conv2d"
 | 
				
			||||||
 | 
					        batch_size = 2
 | 
				
			||||||
 | 
					        input_channels = 3
 | 
				
			||||||
 | 
					        input_height = 5
 | 
				
			||||||
 | 
					        input_width = 5
 | 
				
			||||||
 | 
					        output_channels = 6
 | 
				
			||||||
 | 
					        filter_height = 3
 | 
				
			||||||
 | 
					        filter_width = 3
 | 
				
			||||||
 | 
					        stride = 1
 | 
				
			||||||
 | 
					        padding = 0
 | 
				
			||||||
 | 
					        output_height = (input_height - filter_height + 2 * padding
 | 
				
			||||||
 | 
					                         ) / stride + 1
 | 
				
			||||||
 | 
					        output_width = (input_width - filter_width + 2 * padding) / stride + 1
 | 
				
			||||||
 | 
					        input = np.random.random((batch_size, input_channels, input_height,
 | 
				
			||||||
 | 
					                                  input_width)).astype("float32")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        filter = np.random.random(
 | 
				
			||||||
 | 
					            (output_channels, input_channels / self.groups, filter_height,
 | 
				
			||||||
 | 
					             filter_width)).astype("float32")
 | 
				
			||||||
 | 
					        output = np.ndarray(
 | 
				
			||||||
 | 
					            (batch_size, output_channels, output_height, output_width))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.inputs = {'Input': input, 'Filter': filter}
 | 
				
			||||||
 | 
					        self.attrs = {
 | 
				
			||||||
 | 
					            'strides': [1, 1],
 | 
				
			||||||
 | 
					            'paddings': [0, 0],
 | 
				
			||||||
 | 
					            'groups': self.groups
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        output_group_channels = output_channels / self.groups
 | 
				
			||||||
 | 
					        input_group_channels = input_channels / self.groups
 | 
				
			||||||
 | 
					        for batchid in xrange(batch_size):
 | 
				
			||||||
 | 
					            for group in xrange(self.groups):
 | 
				
			||||||
 | 
					                for outchannelid in range(group * output_group_channels,
 | 
				
			||||||
 | 
					                                          (group + 1) * output_group_channels):
 | 
				
			||||||
 | 
					                    for rowid in xrange(output_height):
 | 
				
			||||||
 | 
					                        for colid in xrange(output_width):
 | 
				
			||||||
 | 
					                            start_h = (rowid * stride) - padding
 | 
				
			||||||
 | 
					                            start_w = (colid * stride) - padding
 | 
				
			||||||
 | 
					                            output_value = 0.0
 | 
				
			||||||
 | 
					                            for inchannelid in range(
 | 
				
			||||||
 | 
					                                    group * input_group_channels,
 | 
				
			||||||
 | 
					                                (group + 1) * input_group_channels):
 | 
				
			||||||
 | 
					                                for frowid in xrange(filter_height):
 | 
				
			||||||
 | 
					                                    for fcolid in xrange(filter_width):
 | 
				
			||||||
 | 
					                                        input_value = 0.0
 | 
				
			||||||
 | 
					                                        inrowid = start_h + frowid
 | 
				
			||||||
 | 
					                                        incolid = start_w + fcolid
 | 
				
			||||||
 | 
					                                        if ((inrowid >= 0 and
 | 
				
			||||||
 | 
					                                             inrowid < input_height) and
 | 
				
			||||||
 | 
					                                            (incolid >= 0 and
 | 
				
			||||||
 | 
					                                             incolid < input_width)):
 | 
				
			||||||
 | 
					                                            input_value = input[batchid][
 | 
				
			||||||
 | 
					                                                inchannelid][inrowid][incolid]
 | 
				
			||||||
 | 
					                                        filter_value = filter[outchannelid][
 | 
				
			||||||
 | 
					                                            inchannelid % input_group_channels][
 | 
				
			||||||
 | 
					                                                frowid][fcolid]
 | 
				
			||||||
 | 
					                                        output_value += input_value * filter_value
 | 
				
			||||||
 | 
					                            output[batchid][outchannelid][rowid][
 | 
				
			||||||
 | 
					                                colid] = output_value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.outputs = {'Output': output}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_check_output(self):
 | 
				
			||||||
 | 
					        self.check_output()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_check_grad(self):
 | 
				
			||||||
 | 
					        self.check_grad(set(['Input', 'Filter']), 'Output')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_check_grad_no_filter(self):
 | 
				
			||||||
 | 
					        self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter']))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_check_grad_no_input(self):
 | 
				
			||||||
 | 
					        self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input']))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def init_groups(self):
 | 
				
			||||||
 | 
					        self.groups = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestWithGroup(TestConv2dOp):
 | 
				
			||||||
 | 
					    def init_groups(self):
 | 
				
			||||||
 | 
					        self.groups = 3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    unittest.main()
 | 
				
			||||||
					Loading…
					
					
				
		Reference in new issue