You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
195 lines
6.8 KiB
195 lines
6.8 KiB
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#include "paddle/fluid/operators/im2sequence_op.h"
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace paddle {
|
|
namespace operators {
|
|
|
|
class Im2SequenceOp : public framework::OperatorWithKernel {
|
|
public:
|
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
|
|
|
protected:
|
|
void InferShape(framework::InferShapeContext* ctx) const override {
|
|
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
|
|
platform::errors::NotFound(
|
|
"The input 'X' of Im2SequenceOp is not found."));
|
|
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
|
|
platform::errors::NotFound(
|
|
"The output 'Out' of Im2SequenceOp is not found."));
|
|
auto in_dim = ctx->GetInputDim("X");
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
in_dim.size(), 4,
|
|
platform::errors::InvalidArgument(
|
|
"The dimesions size of input 'X' in Im2SequenceOp should be 4. But "
|
|
"received dimesions size=[%d], dimesions=[%s].",
|
|
in_dim.size(), in_dim));
|
|
auto img_channels = in_dim[1];
|
|
|
|
auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
|
|
auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
|
|
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
|
|
if (!ctx->IsRuntime()) {
|
|
// set lod level for compile-time
|
|
framework::VarDesc* out_desc =
|
|
BOOST_GET(framework::VarDesc*, ctx->GetOutputVarPtrs("Out")[0]);
|
|
out_desc->SetLoDLevel(1);
|
|
}
|
|
|
|
ctx->SetOutputDim("Out",
|
|
{in_dim[0], img_channels * kernels[0] * kernels[1]});
|
|
}
|
|
};
|
|
|
|
class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
public:
|
|
void Make() override {
|
|
AddInput("X",
|
|
"(Tensor) The input tensor has NCHW format."
|
|
"N: batch size"
|
|
"C: channels"
|
|
"H: height"
|
|
"W: width");
|
|
AddInput("Y",
|
|
"(Tensor) The input tensor of image real size(H, W)."
|
|
"2-D with shape [batchsize, 2]")
|
|
.AsDispensable();
|
|
AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
|
|
AddAttr<std::vector<int>>("kernels",
|
|
"(vector<int>), the "
|
|
"kernels(kernel_height, kernel_width)");
|
|
AddAttr<std::vector<int>>("strides",
|
|
"(vector<int> default:{1, 1}), the "
|
|
"strides(h_stride, w_stride)")
|
|
.SetDefault({1, 1});
|
|
AddAttr<std::vector<int>>("paddings",
|
|
"(vector<int> default:{0, 0, 0, 0}), the "
|
|
"paddings(up_pad, left_pad, down_pad, right_pad)")
|
|
.SetDefault({0, 0, 0, 0});
|
|
AddAttr<std::vector<int>>("out_stride",
|
|
"the attribute is valid only when input(Y)"
|
|
"is not NULL.this attribute represents the"
|
|
"scaling of the pic through the CNN"
|
|
"(vector<int> dedault:{1,1}),the out_stride"
|
|
" (out_stride_height, out_stride_width)")
|
|
.SetDefault({1, 1});
|
|
AddComment(R"DOC(
|
|
This op uses kernels to scan images and converts these images to sequences.
|
|
After expanding, The number of time steps are output_height * output_width
|
|
and the dimension of each time step is kernel_height * kernel_width * channels,
|
|
in which:
|
|
|
|
output_height =
|
|
1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
|
|
stride_height;
|
|
output_width =
|
|
1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
|
|
stride_width;
|
|
|
|
This op can be used after convolution neural network, and before recurrent neural network.
|
|
|
|
Given:
|
|
|
|
x = [[[[ 6. 2. 1.]
|
|
[ 8. 3. 5.]
|
|
[ 0. 2. 6.]]
|
|
|
|
[[ 2. 4. 4.]
|
|
[ 6. 3. 0.]
|
|
[ 6. 4. 7.]]]
|
|
|
|
[[[ 6. 7. 1.]
|
|
[ 5. 7. 9.]
|
|
[ 2. 4. 8.]]
|
|
|
|
[[ 1. 2. 1.]
|
|
[ 1. 3. 5.]
|
|
[ 9. 0. 8.]]]]
|
|
x.dims = {2, 2, 3, 3}
|
|
|
|
And:
|
|
|
|
kernels = [2, 2]
|
|
strides = [1, 1]
|
|
paddings = [0, 0, 0, 0]
|
|
|
|
Then:
|
|
|
|
output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
|
|
[ 2. 1. 3. 5. 4. 4. 3. 0.]
|
|
[ 8. 3. 0. 2. 6. 3. 6. 4.]
|
|
[ 3. 5. 2. 6. 3. 0. 4. 7.]
|
|
[ 6. 7. 5. 7. 1. 2. 1. 3.]
|
|
[ 7. 1. 7. 9. 2. 1. 3. 5.]
|
|
[ 5. 7. 2. 4. 1. 3. 9. 0.]
|
|
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
|
|
output.dims = {8, 8}
|
|
output.lod = [[0, 4, 8]]
|
|
|
|
)DOC");
|
|
}
|
|
};
|
|
|
|
class Im2SequenceGradOp : public framework::OperatorWithKernel {
|
|
public:
|
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
|
|
|
protected:
|
|
void InferShape(framework::InferShapeContext* ctx) const override {
|
|
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
|
|
platform::errors::NotFound(
|
|
"The input 'X' of Im2SequenceGradOp is not found."));
|
|
PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true,
|
|
platform::errors::NotFound(
|
|
"The input %s of Im2SequenceGradOp is not found.",
|
|
framework::GradVarName("Out")));
|
|
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
class Im2SequenceGradMaker : public framework::SingleGradOpMaker<T> {
|
|
public:
|
|
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
|
|
|
|
protected:
|
|
void Apply(GradOpPtr<T> op) const override {
|
|
op->SetType("im2sequence_grad");
|
|
op->SetInput("X", this->Input("X"));
|
|
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
|
|
op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
|
|
op->SetAttrMap(this->Attrs());
|
|
}
|
|
};
|
|
|
|
} // namespace operators
|
|
} // namespace paddle
|
|
|
|
namespace ops = paddle::operators;
|
|
REGISTER_OPERATOR(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
|
|
ops::Im2SequenceGradMaker<paddle::framework::OpDesc>,
|
|
ops::Im2SequenceGradMaker<paddle::imperative::OpBase>);
|
|
REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
|
|
REGISTER_OP_CPU_KERNEL(
|
|
im2sequence,
|
|
ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
|
|
REGISTER_OP_CPU_KERNEL(
|
|
im2sequence_grad,
|
|
ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);
|