You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
112 lines
3.7 KiB
112 lines
3.7 KiB
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
|
|
#pragma once
|
|
|
|
#include "ConvBaseLayer.h"
|
|
#include "paddle/math/Matrix.h"
|
|
#include <vector>
|
|
|
|
namespace paddle {
|
|
|
|
/**
|
|
* @brief A subclass of ConvBaseLayer by cuDNN implementation. It only
|
|
* supports GPU mode. We automatic select CudnnConvLayer for GPU
|
|
* mode and ExpandConvLayer for CPU mode if you set type of "conv".
|
|
* User also can specfiy type of "exconv" or "cudnn_conv" for
|
|
* particular type.
|
|
*
|
|
* The config file api is img_conv_layer.
|
|
*/
|
|
class CudnnConvLayer : public ConvBaseLayer {
|
|
private:
|
|
/// resize Cudnn workspace size
|
|
void allocConvWorkSpace(size_t maxWorkSpace);
|
|
|
|
protected:
|
|
int imageH_, imageW_, outputH_, outputW_;
|
|
/// Cudnn tensor descriptor for bias.
|
|
hl_tensor_descriptor biasDesc_;
|
|
/// Cudnn tensor descriptor for input.
|
|
std::vector<hl_tensor_descriptor> inputDesc_;
|
|
/// Cudnn tensor descriptor for output.
|
|
std::vector<hl_tensor_descriptor> outputDesc_;
|
|
/// Cudnn tensor descriptor for filter.
|
|
std::vector<hl_filter_descriptor> filterDesc_;
|
|
/// Cudnn tensor descriptor for a convolution operation.
|
|
std::vector<hl_convolution_descriptor> convDesc_;
|
|
/// One sample offset of input data.
|
|
IntV inputOffset_;
|
|
/// One sample offset of output data.
|
|
IntV outputOffset_;
|
|
/// One group offset of weight.
|
|
IntV weightOffset_;
|
|
/// One group offset of bias.
|
|
int biasOffset_;
|
|
|
|
/// Save the algorithm for forward convolution, which is obtained by cudnn
|
|
/// api to search the best suited algorithm.
|
|
std::vector<int> fwdAlgo_;
|
|
/// Save the algorithm for computing convolution gradient with respect to
|
|
/// filter coefficients.
|
|
std::vector<int> bwdFilterAlgo_;
|
|
/// Save the algorithm for computing convolution gradient with respect to
|
|
/// the output.
|
|
std::vector<int> bwdDataAlgo_;
|
|
/// Amount of GPU memory needed as workspace to be able to execute a
|
|
/// forward convolution with the specified algo.
|
|
std::vector<size_t> fwdLimitBytes_;
|
|
/// Amount of GPU memory needed as workspace to be able to execute a
|
|
/// backwardFilter with the specified algo.
|
|
std::vector<size_t> bwdFilterLimitBytes_;
|
|
/// Amount of GPU memory needed as workspace to be able to execute a
|
|
/// backwardData with the specified algo.
|
|
std::vector<size_t> bwdDataLimitBytes_;
|
|
|
|
/// Device work space address for each group.
|
|
std::vector<void*> workSpace_;
|
|
/// Max number of groups.
|
|
int maxGroups_;
|
|
/// Total work space address in device for all groups.
|
|
void* workSpaceData_;
|
|
/// Size of total work space.
|
|
size_t workSpaceInBytes_;
|
|
|
|
/// Is or not select conv algorihtm.
|
|
bool isSelectAlgo_;
|
|
|
|
public:
|
|
explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
|
|
|
|
~CudnnConvLayer();
|
|
|
|
/**
|
|
* Intialization. Initialize member variables and create tenor descriptor.
|
|
*/
|
|
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
|
|
/**
|
|
* Reshape is done each forward. Reshape tensor decriptor
|
|
* inputDesc_, outputDesc_, convDesc_. And search the faster algo
|
|
* or the fastest algo within a given memeory limit.
|
|
*/
|
|
void reshape(int batchSize);
|
|
void forward(PassType passType);
|
|
void backward(const UpdateCallback& callback);
|
|
void addBiases();
|
|
void bpropBiases();
|
|
};
|
|
|
|
} // namespace paddle
|