You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							195 lines
						
					
					
						
							6.8 KiB
						
					
					
				
			
		
		
	
	
							195 lines
						
					
					
						
							6.8 KiB
						
					
					
				| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License. */
 | |
| 
 | |
| #include "paddle/fluid/operators/im2sequence_op.h"
 | |
| #include <memory>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| 
 | |
| namespace paddle {
 | |
| namespace operators {
 | |
| 
 | |
| class Im2SequenceOp : public framework::OperatorWithKernel {
 | |
|  public:
 | |
|   using framework::OperatorWithKernel::OperatorWithKernel;
 | |
| 
 | |
|  protected:
 | |
|   void InferShape(framework::InferShapeContext* ctx) const override {
 | |
|     PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
 | |
|                       platform::errors::NotFound(
 | |
|                           "The input 'X' of Im2SequenceOp is not found."));
 | |
|     PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
 | |
|                       platform::errors::NotFound(
 | |
|                           "The output 'Out' of Im2SequenceOp is not found."));
 | |
|     auto in_dim = ctx->GetInputDim("X");
 | |
| 
 | |
|     PADDLE_ENFORCE_EQ(
 | |
|         in_dim.size(), 4,
 | |
|         platform::errors::InvalidArgument(
 | |
|             "The dimesions size of input 'X' in Im2SequenceOp should be 4. But "
 | |
|             "received dimesions size=[%d], dimesions=[%s].",
 | |
|             in_dim.size(), in_dim));
 | |
|     auto img_channels = in_dim[1];
 | |
| 
 | |
|     auto kernels = ctx->Attrs().Get<std::vector<int>>("kernels");
 | |
|     auto strides = ctx->Attrs().Get<std::vector<int>>("strides");
 | |
|     auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
 | |
|     if (!ctx->IsRuntime()) {
 | |
|       // set lod level for compile-time
 | |
|       framework::VarDesc* out_desc =
 | |
|           BOOST_GET(framework::VarDesc*, ctx->GetOutputVarPtrs("Out")[0]);
 | |
|       out_desc->SetLoDLevel(1);
 | |
|     }
 | |
| 
 | |
|     ctx->SetOutputDim("Out",
 | |
|                       {in_dim[0], img_channels * kernels[0] * kernels[1]});
 | |
|   }
 | |
| };
 | |
| 
 | |
| class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
 | |
|  public:
 | |
|   void Make() override {
 | |
|     AddInput("X",
 | |
|              "(Tensor) The input tensor has NCHW format."
 | |
|              "N: batch size"
 | |
|              "C: channels"
 | |
|              "H: height"
 | |
|              "W: width");
 | |
|     AddInput("Y",
 | |
|              "(Tensor) The input tensor of image real size(H, W)."
 | |
|              "2-D with shape [batchsize, 2]")
 | |
|         .AsDispensable();
 | |
|     AddOutput("Out", "(LodTensor) The output data of im2sequence op,");
 | |
|     AddAttr<std::vector<int>>("kernels",
 | |
|                               "(vector<int>), the "
 | |
|                               "kernels(kernel_height, kernel_width)");
 | |
|     AddAttr<std::vector<int>>("strides",
 | |
|                               "(vector<int> default:{1, 1}), the "
 | |
|                               "strides(h_stride, w_stride)")
 | |
|         .SetDefault({1, 1});
 | |
|     AddAttr<std::vector<int>>("paddings",
 | |
|                               "(vector<int> default:{0, 0, 0, 0}), the "
 | |
|                               "paddings(up_pad, left_pad, down_pad, right_pad)")
 | |
|         .SetDefault({0, 0, 0, 0});
 | |
|     AddAttr<std::vector<int>>("out_stride",
 | |
|                               "the attribute is valid only when input(Y)"
 | |
|                               "is not NULL.this attribute represents the"
 | |
|                               "scaling of the pic through the CNN"
 | |
|                               "(vector<int> dedault:{1,1}),the out_stride"
 | |
|                               " (out_stride_height, out_stride_width)")
 | |
|         .SetDefault({1, 1});
 | |
|     AddComment(R"DOC(
 | |
| This op uses kernels to scan images and converts these images to sequences.
 | |
| After expanding, The number of time steps are output_height * output_width
 | |
| and the dimension of each time step is kernel_height * kernel_width * channels,
 | |
| in which:
 | |
| 
 | |
| output_height =
 | |
|     1 + (padding_height + padding_down + img_height - kernel_height + stride_height - 1) /
 | |
|             stride_height;
 | |
| output_width =
 | |
|     1 + (padding_left + padding+right + img_width - kernel_width + stride_width - 1) /
 | |
|             stride_width;
 | |
| 
 | |
| This op can be used after convolution neural network, and before recurrent neural network.
 | |
| 
 | |
| Given:
 | |
| 
 | |
| x = [[[[ 6.  2.  1.]
 | |
|        [ 8.  3.  5.]
 | |
|        [ 0.  2.  6.]]
 | |
| 
 | |
|       [[ 2.  4.  4.]
 | |
|        [ 6.  3.  0.]
 | |
|        [ 6.  4.  7.]]]
 | |
| 
 | |
|      [[[ 6.  7.  1.]
 | |
|        [ 5.  7.  9.]
 | |
|        [ 2.  4.  8.]]
 | |
| 
 | |
|       [[ 1.  2.  1.]
 | |
|        [ 1.  3.  5.]
 | |
|        [ 9.  0.  8.]]]]
 | |
| x.dims = {2, 2, 3, 3}
 | |
| 
 | |
| And:
 | |
| 
 | |
| kernels = [2, 2]
 | |
| strides = [1, 1]
 | |
| paddings = [0, 0, 0, 0]
 | |
| 
 | |
| Then:
 | |
| 
 | |
| output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
 | |
|                [ 2.  1.  3.  5.  4.  4.  3.  0.]
 | |
|                [ 8.  3.  0.  2.  6.  3.  6.  4.]
 | |
|                [ 3.  5.  2.  6.  3.  0.  4.  7.]
 | |
|                [ 6.  7.  5.  7.  1.  2.  1.  3.]
 | |
|                [ 7.  1.  7.  9.  2.  1.  3.  5.]
 | |
|                [ 5.  7.  2.  4.  1.  3.  9.  0.]
 | |
|                [ 7.  9.  4.  8.  3.  5.  0.  8.]]
 | |
| output.dims = {8, 8}
 | |
| output.lod = [[0, 4, 8]]
 | |
| 
 | |
| )DOC");
 | |
|   }
 | |
| };
 | |
| 
 | |
| class Im2SequenceGradOp : public framework::OperatorWithKernel {
 | |
|  public:
 | |
|   using framework::OperatorWithKernel::OperatorWithKernel;
 | |
| 
 | |
|  protected:
 | |
|   void InferShape(framework::InferShapeContext* ctx) const override {
 | |
|     PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
 | |
|                       platform::errors::NotFound(
 | |
|                           "The input 'X' of Im2SequenceGradOp is not found."));
 | |
|     PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true,
 | |
|                       platform::errors::NotFound(
 | |
|                           "The input %s of Im2SequenceGradOp is not found.",
 | |
|                           framework::GradVarName("Out")));
 | |
|     ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
 | |
|   }
 | |
| };
 | |
| 
 | |
| template <typename T>
 | |
| class Im2SequenceGradMaker : public framework::SingleGradOpMaker<T> {
 | |
|  public:
 | |
|   using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
 | |
| 
 | |
|  protected:
 | |
|   void Apply(GradOpPtr<T> op) const override {
 | |
|     op->SetType("im2sequence_grad");
 | |
|     op->SetInput("X", this->Input("X"));
 | |
|     op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
 | |
|     op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
 | |
|     op->SetAttrMap(this->Attrs());
 | |
|   }
 | |
| };
 | |
| 
 | |
| }  // namespace operators
 | |
| }  // namespace paddle
 | |
| 
 | |
| namespace ops = paddle::operators;
 | |
| REGISTER_OPERATOR(im2sequence, ops::Im2SequenceOp, ops::Im2SequenceOpMaker,
 | |
|                   ops::Im2SequenceGradMaker<paddle::framework::OpDesc>,
 | |
|                   ops::Im2SequenceGradMaker<paddle::imperative::OpBase>);
 | |
| REGISTER_OPERATOR(im2sequence_grad, ops::Im2SequenceGradOp);
 | |
| REGISTER_OP_CPU_KERNEL(
 | |
|     im2sequence,
 | |
|     ops::Im2SequenceKernel<paddle::platform::CPUDeviceContext, float>);
 | |
| REGISTER_OP_CPU_KERNEL(
 | |
|     im2sequence_grad,
 | |
|     ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);
 |