commit
6d2cfe9220
@ -0,0 +1,157 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/im2sequence_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class Im2SequenceOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||
"Input(X) of Im2SequenceOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||
"Output(Out) of Im2SequenceOp op should not be null.");
|
||||
|
||||
auto in_dim = ctx->GetInputDim("X");
|
||||
PADDLE_ENFORCE_EQ(in_dim.size(), 4,
|
||||
"Input(X) format must be 4D tensor, eg., NCHW.");
|
||||
|
||||
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
|
||||
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
|
||||
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
|
||||
|
||||
int batch_size = in_dim[0];
|
||||
int img_channels = in_dim[1];
|
||||
int img_height = in_dim[2];
|
||||
int img_width = in_dim[3];
|
||||
|
||||
int output_height = OutputSize(img_height, kernels[0], paddings[0],
|
||||
paddings[2], strides[0]);
|
||||
int output_width =
|
||||
OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
|
||||
|
||||
ctx->SetOutputDim("Out", {batch_size * output_height * output_width,
|
||||
img_channels * kernels[0] * kernels[1]});
|
||||
}
|
||||
};
|
||||
|
||||
class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
Im2SequenceOpMaker(OpProto* proto, OpAttrChecker* op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X",
|
||||
"(Tensor) The input tensor has NCHW format."
|
||||
"N: batch size"
|
||||
"C: channels"
|
||||
"H: height"
|
||||
"W: width");
|
||||
AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
|
||||
AddAttr<std::vector<int>>("kernels",
|
||||
"(vector<int>), the "
|
||||
"kernels(kernel_height, kernel_width)");
|
||||
AddAttr<std::vector<int>>("strides",
|
||||
"(vector<int> default:{1, 1}), the "
|
||||
"strides(h_stride, w_stride)")
|
||||
.SetDefault({1, 1});
|
||||
AddAttr<std::vector<int>>("paddings",
|
||||
"(vector<int> default:{0, 0, 0, 0}), the "
|
||||
"paddings(up_pad, left_pad, down_pad, right_pad)")
|
||||
.SetDefault({0, 0, 0, 0});
|
||||
AddComment(R"DOC(
|
||||
This op uses kernels to scan images and converts these images to sequences.
|
||||
After expanding, The number of time steps are output_height * output_width
|
||||
and the dimension of each time step is kernel_height * kernel_width * channels,
|
||||
in which:
|
||||
|
||||
output_height =
|
||||
1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
|
||||
stride_height;
|
||||
output_width =
|
||||
1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
|
||||
stride_width;
|
||||
|
||||
This op can be used after convolution neural network, and before recurrent neural network.
|
||||
|
||||
Given:
|
||||
|
||||
x = [[[[ 6. 2. 1.]
|
||||
[ 8. 3. 5.]
|
||||
[ 0. 2. 6.]]
|
||||
|
||||
[[ 2. 4. 4.]
|
||||
[ 6. 3. 0.]
|
||||
[ 6. 4. 7.]]]
|
||||
|
||||
[[[ 6. 7. 1.]
|
||||
[ 5. 7. 9.]
|
||||
[ 2. 4. 8.]]
|
||||
|
||||
[[ 1. 2. 1.]
|
||||
[ 1. 3. 5.]
|
||||
[ 9. 0. 8.]]]]
|
||||
x.dims = {2, 2, 3, 3}
|
||||
|
||||
And:
|
||||
|
||||
kernels = [2, 2]
|
||||
strides = [1, 1]
|
||||
paddings = [0, 0, 0, 0]
|
||||
|
||||
Then:
|
||||
|
||||
output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
|
||||
[ 2. 1. 3. 5. 4. 4. 3. 0.]
|
||||
[ 8. 3. 0. 2. 6. 3. 6. 4.]
|
||||
[ 3. 5. 2. 6. 3. 0. 4. 7.]
|
||||
[ 6. 7. 5. 7. 1. 2. 1. 3.]
|
||||
[ 7. 1. 7. 9. 2. 1. 3. 5.]
|
||||
[ 5. 7. 2. 4. 1. 3. 9. 0.]
|
||||
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
|
||||
output.dims = {8, 9}
|
||||
output.lod = [[0, 4, 8]]
|
||||
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class Im2SequenceGradOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
protected:
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
|
||||
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
|
||||
"Input(Out@GRAD) shouldn't be null.");
|
||||
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
|
||||
im2sequence_grad, ops::Im2SequenceGradOp);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
im2sequence,
|
||||
ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
im2sequence_grad,
|
||||
ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);
|
@ -0,0 +1,25 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#define EIGEN_USE_GPU
|
||||
#include "paddle/operators/im2sequence_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
im2sequence,
|
||||
ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
im2sequence_grad,
|
||||
ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);
|
@ -0,0 +1,135 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
You may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/framework/data_layout.h"
|
||||
#include "paddle/framework/eigen.h"
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/math/im2col.h"
|
||||
#include "paddle/operators/math/math_function.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
using LoDTensor = framework::LoDTensor;
|
||||
|
||||
inline int OutputSize(int input_size, int filter_size, int padding_0,
|
||||
int padding_1, int stride) {
|
||||
const int output_size =
|
||||
(input_size + padding_0 + padding_1 - filter_size) / stride + 1;
|
||||
return output_size;
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Im2SequenceKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
const Tensor* in = ctx.Input<Tensor>("X");
|
||||
LoDTensor* out = ctx.Output<LoDTensor>("Out");
|
||||
out->mutable_data<T>(ctx.GetPlace());
|
||||
// TODO(wanghaoshuang): Add layout checker after 'set_layout'
|
||||
// being available for python API
|
||||
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
|
||||
// "Input(X) layout must be NCHW");
|
||||
auto in_dim = in->dims();
|
||||
int batch_size = in_dim[0];
|
||||
int img_channels = in_dim[1];
|
||||
int img_height = in_dim[2];
|
||||
int img_width = in_dim[3];
|
||||
|
||||
auto kernels = ctx.Attr<std::vector<int>>("kernels");
|
||||
auto strides = ctx.Attr<std::vector<int>>("strides");
|
||||
auto paddings = ctx.Attr<std::vector<int>>("paddings");
|
||||
int output_height = OutputSize(img_height, kernels[0], paddings[0],
|
||||
paddings[2], strides[0]);
|
||||
int output_width =
|
||||
OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
|
||||
|
||||
const std::vector<int> dilations({1, 1});
|
||||
|
||||
auto out_dims = out->dims();
|
||||
out->Resize({batch_size, out->numel() / batch_size});
|
||||
for (int i = 0; i < batch_size; i++) {
|
||||
const Tensor src =
|
||||
in->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
|
||||
Tensor dst = out->Slice(i, i + 1).Resize(
|
||||
{output_height, output_width, img_channels, kernels[0], kernels[1]});
|
||||
|
||||
math::Im2ColFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
f(dev_ctx, src, dilations, strides, paddings, &dst);
|
||||
}
|
||||
out->Resize(out_dims);
|
||||
|
||||
// set lod information
|
||||
// TODO(wanghaoshuang): Move this to InferShape
|
||||
framework::LoD lod(1);
|
||||
lod[0].reserve(batch_size + 1);
|
||||
for (int i = 0, offset = 0; i < batch_size + 1; ++i) {
|
||||
lod[0][i] = offset;
|
||||
offset += output_height * output_width;
|
||||
}
|
||||
out->set_lod(lod);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class Im2SequenceGradKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto* in = ctx.Input<Tensor>("X");
|
||||
Tensor* d_out =
|
||||
const_cast<Tensor*>(ctx.Input<Tensor>(framework::GradVarName("Out")));
|
||||
auto* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
d_x->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
auto x_v = framework::EigenVector<T>::Flatten(*d_x);
|
||||
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
|
||||
x_v.device(place) = x_v.constant(0.0);
|
||||
|
||||
auto in_dim = in->dims();
|
||||
int batch_size = in_dim[0];
|
||||
int img_channels = in_dim[1];
|
||||
int img_height = in_dim[2];
|
||||
int img_width = in_dim[3];
|
||||
|
||||
auto kernels = ctx.Attr<std::vector<int>>("kernels");
|
||||
auto strides = ctx.Attr<std::vector<int>>("strides");
|
||||
auto paddings = ctx.Attr<std::vector<int>>("paddings");
|
||||
int output_height = OutputSize(img_height, kernels[0], paddings[0],
|
||||
paddings[2], strides[0]);
|
||||
int output_width =
|
||||
OutputSize(img_width, kernels[1], paddings[1], paddings[3], strides[1]);
|
||||
|
||||
const std::vector<int> dilations({1, 1});
|
||||
|
||||
auto d_out_dims = d_out->dims();
|
||||
d_out->Resize({batch_size, d_out->numel() / batch_size});
|
||||
for (int i = 0; i < batch_size; i++) {
|
||||
Tensor dst =
|
||||
d_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
|
||||
const Tensor src = d_out->Slice(i, i + 1).Resize(
|
||||
{output_height, output_width, img_channels, kernels[0], kernels[1]});
|
||||
math::Col2ImFunctor<math::ColFormat::kOCF, DeviceContext, T> f;
|
||||
auto& dev_ctx = ctx.template device_context<DeviceContext>();
|
||||
f(dev_ctx, src, dilations, strides, paddings, &dst);
|
||||
}
|
||||
d_out->Resize(d_out_dims);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,167 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
import unittest
|
||||
import numpy as np
|
||||
from op_test import OpTest
|
||||
|
||||
|
||||
def get_output_shape(attrs, in_shape):
|
||||
img_height = in_shape[2]
|
||||
img_width = in_shape[3]
|
||||
|
||||
paddings = attrs['paddings']
|
||||
kernels = attrs['kernels']
|
||||
strides = attrs['strides']
|
||||
|
||||
output_height = \
|
||||
1 + \
|
||||
(img_height + paddings[0] + paddings[2] - kernels[0] + strides[0] - 1) / \
|
||||
strides[0]
|
||||
|
||||
output_width = \
|
||||
1 + \
|
||||
(img_width + paddings[1] + paddings[3] - kernels[1] + strides[1] - 1) / \
|
||||
strides[1]
|
||||
|
||||
return output_height, output_width
|
||||
|
||||
|
||||
def im2col(attrs, im, col):
|
||||
"""
|
||||
im: {CHW}
|
||||
col:
|
||||
{outputHeight, outputWidth, inputChannels, filterHeight, filterWidth}
|
||||
"""
|
||||
input_channels, input_height, input_width = im.shape
|
||||
output_height, output_width, _, filter_height, filter_width = col.shape
|
||||
|
||||
stride_height, stride_width = attrs['strides']
|
||||
padding_height, padding_width = attrs['paddings'][0:2]
|
||||
|
||||
for col_row_idx in range(0, output_height):
|
||||
for col_col_idx in range(0, output_width):
|
||||
for channel in range(0, input_channels):
|
||||
for filter_row_idx in range(0, filter_height):
|
||||
for filter_col_idx in range(0, filter_width):
|
||||
im_row_offset = col_row_idx * stride_height \
|
||||
+ filter_row_idx - padding_height
|
||||
|
||||
im_col_offset = col_col_idx * stride_width \
|
||||
+ filter_col_idx - padding_width
|
||||
|
||||
if (im_row_offset < 0 or
|
||||
im_row_offset >= input_height or
|
||||
im_col_offset < 0 or
|
||||
im_col_offset >= input_width):
|
||||
col[col_row_idx][col_col_idx][channel][\
|
||||
filter_row_idx][filter_col_idx] = 0.0
|
||||
else:
|
||||
im_offset = (channel * input_height + im_row_offset \
|
||||
) * input_width + im_col_offset
|
||||
|
||||
col[col_row_idx][col_col_idx][channel][\
|
||||
filter_row_idx][filter_col_idx] = im[channel][ \
|
||||
im_row_offset][im_col_offset]
|
||||
|
||||
|
||||
def Im2Sequence(inputs, attrs):
|
||||
output_height, output_width = get_output_shape(attrs, inputs.shape)
|
||||
img_channels = inputs.shape[1]
|
||||
batch_size = inputs.shape[0]
|
||||
out = np.zeros([
|
||||
batch_size, output_height, output_width, img_channels,
|
||||
attrs['kernels'][0], attrs['kernels'][1]
|
||||
]).astype("float32")
|
||||
|
||||
for i in range(len(inputs)):
|
||||
im2col(attrs, inputs[i], out[i])
|
||||
|
||||
out = out.reshape([
|
||||
batch_size * output_height * output_width,
|
||||
img_channels * attrs['kernels'][0] * attrs['kernels'][1]
|
||||
])
|
||||
return out
|
||||
|
||||
|
||||
class TestBlockExpandOp(OpTest):
|
||||
def config(self):
|
||||
self.batch_size = 1
|
||||
self.img_channels = 3
|
||||
self.img_height = 4
|
||||
self.img_width = 4
|
||||
self.attrs = {
|
||||
'kernels': [2, 2],
|
||||
'strides': [1, 1],
|
||||
'paddings': [1, 1, 1, 1]
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
self.config()
|
||||
self.op_type = "im2sequence"
|
||||
x = np.random.uniform(0.1, 1, [
|
||||
self.batch_size, self.img_channels, self.img_height, self.img_width
|
||||
]).astype("float32")
|
||||
|
||||
out = Im2Sequence(x, self.attrs)
|
||||
self.inputs = {'X': x}
|
||||
self.outputs = {'Out': out}
|
||||
|
||||
def test_check_output(self):
|
||||
self.check_output()
|
||||
|
||||
def test_check_grad_normal(self):
|
||||
self.check_grad(['X'], 'Out')
|
||||
|
||||
|
||||
class TestBlockExpandOpCase2(TestBlockExpandOp):
|
||||
def config(self):
|
||||
self.batch_size = 2
|
||||
self.img_channels = 3
|
||||
self.img_height = 4
|
||||
self.img_width = 5
|
||||
self.attrs = {
|
||||
'kernels': [2, 1],
|
||||
'strides': [2, 1],
|
||||
'paddings': [2, 1, 2, 1]
|
||||
}
|
||||
|
||||
|
||||
class TestBlockExpandOpCase3(TestBlockExpandOp):
|
||||
def config(self):
|
||||
self.batch_size = 3
|
||||
self.img_channels = 1
|
||||
self.img_height = 4
|
||||
self.img_width = 5
|
||||
self.attrs = {
|
||||
'kernels': [2, 1],
|
||||
'strides': [2, 1],
|
||||
'paddings': [2, 0, 2, 0]
|
||||
}
|
||||
|
||||
|
||||
class TestBlockExpandOpCase4(TestBlockExpandOp):
|
||||
def config(self):
|
||||
self.batch_size = 2
|
||||
self.img_channels = 2
|
||||
self.img_height = 3
|
||||
self.img_width = 3
|
||||
self.attrs = {
|
||||
'kernels': [2, 2],
|
||||
'strides': [1, 1],
|
||||
'paddings': [0, 0, 0, 0]
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue