You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
161 lines
4.7 KiB
161 lines
4.7 KiB
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
|
|
#pragma once
|
|
|
|
#include "Layer.h"
|
|
#include "paddle/math/MathUtils.h"
|
|
namespace paddle {
|
|
|
|
/**
|
|
* @brief A Base Convolution Layer, which convolves the input image
|
|
* with learned filters and (optionally) adds biases.
|
|
*/
|
|
|
|
class ConvBaseLayer : public Layer {
|
|
protected:
|
|
typedef std::vector<int> IntV;
|
|
|
|
/// True if it's convolution layer, false if it's deconv layer
|
|
bool isConv_;
|
|
|
|
/// The number of filters.
|
|
int numFilters_;
|
|
/// The x dimension of the padding.
|
|
IntV padding_;
|
|
/// The y dimension of the padding.
|
|
IntV paddingY_;
|
|
/// The x dimension of the stride.
|
|
IntV stride_;
|
|
/// The y dimension of the stride.
|
|
IntV strideY_;
|
|
/// The x dimension of a filter kernel.
|
|
IntV filterSize_;
|
|
/// The y dimension of a filter kernel.
|
|
IntV filterSizeY_;
|
|
/// The spatial dimensions of the convolution input.
|
|
IntV channels_;
|
|
/// The spatial dimensions of input feature map height.
|
|
IntV imgSizeH_;
|
|
/// The spatial dimensions of input feature map width.
|
|
IntV imgSizeW_;
|
|
/// filterPixels_ = filterSizeX_ * filterSizeY_.
|
|
IntV filterPixels_;
|
|
/// filterChannels_ = channels_/groups_.
|
|
IntV filterChannels_;
|
|
/// The spatial dimensions of output feature map height.
|
|
IntV outputH_;
|
|
/// The spatial dimensions of output feature map width.
|
|
IntV outputW_;
|
|
/// Group size, refer to grouped convolution in
|
|
/// Alex Krizhevsky's paper: when group=2, the first half of the
|
|
/// filters are only connected to the first half of the input channels,
|
|
/// and the second half only connected to the second half.
|
|
IntV groups_;
|
|
/// Whether the bias is shared for feature in each channel.
|
|
bool sharedBiases_;
|
|
|
|
/// shape of weight: (numChannels * filterPixels_, numFilters)
|
|
WeightList weights_;
|
|
/// If shared_biases is false shape of bias: (numFilters_, 1)
|
|
/// If shared_biases is ture shape of bias:
|
|
/// (numFilters_ * outputX * outputY, 1)
|
|
std::unique_ptr<Weight> biases_;
|
|
|
|
/// True by default. The only difference is the calculation
|
|
/// of output size.
|
|
bool caffeMode_;
|
|
|
|
/*The expandInput_ and transOutValue_ are used for CPU expand conv calc*/
|
|
/// Expand one sample at a time. shape:
|
|
/// (numChannels * filterPixels_, outputSizeH * outputSizeW)
|
|
MatrixPtr expandInput_;
|
|
/// The transpose of output, which is an auxiliary matrix.
|
|
MatrixPtr transOutValue_;
|
|
|
|
public:
|
|
explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {}
|
|
|
|
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
|
|
|
|
/**
|
|
* imgSizeH_ and imgSizeW_ will be set according to the previous input layers
|
|
* in this function. Then it will calculate outputH_ and outputW_ and set them
|
|
* into output argument.
|
|
*/
|
|
virtual size_t calOutputSize();
|
|
|
|
Weight& getWeight(int idx) { return *weights_[idx]; }
|
|
|
|
/**
|
|
* Calculate output size based on caffeMode_.
|
|
* - input(+padding): 0123456789
|
|
* - imageSize(+padding) = 10;
|
|
* - filterSize = 3;
|
|
* - stride = 2;
|
|
* - caffeMode_ is true:
|
|
- output: (012), (234), (456), (678)
|
|
- outputSize = 4;
|
|
* - caffeMode_ is false:
|
|
* - output: (012), (234), (456), (678), (9)
|
|
* - outputSize = 5;
|
|
*/
|
|
int outputSize(int imageSize, int filterSize, int padding, int stride) {
|
|
int outputSize;
|
|
if (!caffeMode_) {
|
|
outputSize =
|
|
(imageSize - filterSize + 2 * padding + stride - 1) / stride + 1;
|
|
} else {
|
|
outputSize = (imageSize - filterSize + 2 * padding) / stride + 1;
|
|
}
|
|
CHECK_GE(outputSize, 1);
|
|
return outputSize;
|
|
}
|
|
|
|
int imageSize(int outputSize, int filterSize, int padding, int stride) {
|
|
int imageSize;
|
|
if (!caffeMode_) {
|
|
imageSize =
|
|
(outputSize - 1) * stride + filterSize - 2 * padding - stride + 1;
|
|
} else {
|
|
imageSize = (outputSize - 1) * stride + filterSize - 2 * padding;
|
|
}
|
|
CHECK_GE(imageSize, 1);
|
|
return imageSize;
|
|
}
|
|
|
|
/**
|
|
* Create or resize expandInput_.
|
|
*/
|
|
void resetExpandInput(size_t height, size_t width);
|
|
|
|
/**
|
|
* Create or resize transOutValue_.
|
|
*/
|
|
void resetConvOutput(size_t batchSize, int inIdx);
|
|
|
|
/**
|
|
* Add shared bias.
|
|
*/
|
|
void addSharedBias();
|
|
|
|
/**
|
|
* Add unshared bias.
|
|
*/
|
|
void addUnsharedBias();
|
|
};
|
|
|
|
} // namespace paddle
|