feature/design_of_v2_layer_converter
commit
9c1c19b6d0
@ -1 +0,0 @@
|
||||
.gitignore
|
@ -0,0 +1,15 @@
|
||||
*.DS_Store
|
||||
build/
|
||||
*.user
|
||||
.vscode
|
||||
.idea
|
||||
.project
|
||||
.cproject
|
||||
.pydevproject
|
||||
Makefile
|
||||
.test_env/
|
||||
third_party/
|
||||
*~
|
||||
bazel-*
|
||||
|
||||
!build/*.deb
|
@ -1,17 +0,0 @@
|
||||
PADDLE_BUILD_DIR="@CMAKE_CURRENT_BINARY_DIR@/../"
|
||||
WITH_GPU="@WITH_GPU@"
|
||||
PROTOBUF_LIBRARY="@PROTOBUF_LIBRARY@"
|
||||
ZLIB_LIBRARIES="@ZLIB_LIBRARIES@"
|
||||
CMAKE_THREAD_LIB="@CMAKE_THREAD_LIBS_INIT@"
|
||||
CMAKE_DL_LIBS="@CMAKE_DL_LIBS@"
|
||||
|
||||
|
||||
WITH_PYTHON="@WITH_PYTHON@"
|
||||
PYTHON_LIBRARIES="@PYTHON_LIBRARIES@"
|
||||
GLOG_LIBRARIES="@GLOG_LIBRARIES@"
|
||||
GFLAGS_LIBRARIES="@GFLAGS_LIBRARIES@"
|
||||
GFLAGS_LOCATION="@GFLAGS_LOCATION@"
|
||||
CBLAS_LIBRARIES="@CBLAS_LIBRARIES@"
|
||||
|
||||
CUDA_LIBRARIES="@CUDA_CUDART_LIBRARY@"
|
||||
WITH_COVERALLS="@ON_COVERALLS@"
|
@ -1,157 +0,0 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
try:
|
||||
from paddle_api_config import *
|
||||
import os.path
|
||||
import platform
|
||||
|
||||
system = platform.system().lower()
|
||||
is_osx = (system == 'darwin')
|
||||
is_win = (system == 'windows')
|
||||
is_lin = (system == 'linux')
|
||||
|
||||
if is_lin:
|
||||
whole_start = "-Wl,--whole-archive"
|
||||
whole_end = "-Wl,--no-whole-archive"
|
||||
elif is_osx:
|
||||
whole_start = ""
|
||||
whole_end = ""
|
||||
|
||||
LIB_DIRS = [
|
||||
"math", 'function', 'utils', 'parameter', "gserver", "api", "cuda",
|
||||
"pserver", "trainer"
|
||||
]
|
||||
PARENT_LIB_DIRS = ['proto']
|
||||
|
||||
class PaddleLDFlag(object):
|
||||
def __init__(self):
|
||||
self.paddle_build_dir = PADDLE_BUILD_DIR
|
||||
self.paddle_build_dir = os.path.abspath(self.paddle_build_dir)
|
||||
self.with_gpu = PaddleLDFlag.cmake_bool(WITH_GPU)
|
||||
self.protolib = PROTOBUF_LIBRARY
|
||||
self.zlib = ZLIB_LIBRARIES
|
||||
self.thread = CMAKE_THREAD_LIB
|
||||
self.dl_libs = CMAKE_DL_LIBS
|
||||
self.with_python = PaddleLDFlag.cmake_bool(WITH_PYTHON)
|
||||
self.python_libs = PYTHON_LIBRARIES
|
||||
|
||||
self.glog_libs = GLOG_LIBRARIES
|
||||
|
||||
self.with_coverage = PaddleLDFlag.cmake_bool(WITH_COVERALLS)
|
||||
self.gflags_libs = GFLAGS_LIBRARIES
|
||||
self.gflags_location = GFLAGS_LOCATION
|
||||
self.cblas_libs = CBLAS_LIBRARIES
|
||||
self.curt = CUDA_LIBRARIES
|
||||
|
||||
def ldflag_str(self):
|
||||
return " ".join(
|
||||
[self.libs_dir_str(), self.parent_dir_str(), self.libs_str()])
|
||||
|
||||
def libs_dir_str(self):
|
||||
libdirs = LIB_DIRS
|
||||
return " ".join(
|
||||
map(lambda x: "-L" + os.path.join(self.paddle_build_dir, x),
|
||||
libdirs))
|
||||
|
||||
def parent_dir_str(self):
|
||||
libdirs = PARENT_LIB_DIRS
|
||||
return " ".join(
|
||||
map(lambda x: "-L" + os.path.join(self.paddle_build_dir, '..', x),
|
||||
libdirs))
|
||||
|
||||
def libs_str(self):
|
||||
libs = [
|
||||
whole_start,
|
||||
"-lpaddle_gserver",
|
||||
"-lpaddle_function",
|
||||
whole_end,
|
||||
"-lpaddle_pserver",
|
||||
"-lpaddle_trainer_lib",
|
||||
"-lpaddle_network",
|
||||
'-lpaddle_parameter',
|
||||
"-lpaddle_math",
|
||||
'-lpaddle_utils',
|
||||
"-lpaddle_proto",
|
||||
"-lpaddle_cuda",
|
||||
"-lpaddle_api",
|
||||
self.normalize_flag(self.protolib),
|
||||
self.normalize_flag(self.glog_libs),
|
||||
self.normalize_flag(self.gflags_libs),
|
||||
self.normalize_flag(self.zlib),
|
||||
self.normalize_flag(self.thread),
|
||||
self.normalize_flag(self.dl_libs),
|
||||
self.normalize_flag(self.cblas_libs),
|
||||
]
|
||||
|
||||
if self.with_python:
|
||||
libs.append(self.normalize_flag(self.python_libs))
|
||||
if self.with_gpu:
|
||||
libs.append(self.normalize_flag(self.curt))
|
||||
if self.with_coverage:
|
||||
libs.append("-fprofile-arcs")
|
||||
return " ".join(filter(lambda l: len(l) != 0, libs))
|
||||
|
||||
def normalize_flag(self, cmake_flag):
|
||||
"""
|
||||
CMake flag string to ld flag
|
||||
:type cmake_flag: str
|
||||
"""
|
||||
if ";" in cmake_flag:
|
||||
return " ".join(map(self.normalize_flag, cmake_flag.split(";")))
|
||||
if cmake_flag.startswith("/"): # is a path
|
||||
return cmake_flag
|
||||
elif cmake_flag.startswith("-l"): # normal link command
|
||||
return cmake_flag
|
||||
elif cmake_flag in [
|
||||
"gflags-shared", "gflags-static", "gflags_nothreads-shared",
|
||||
"gflags_nothreads-static"
|
||||
]: # special for gflags
|
||||
assert PaddleLDFlag.cmake_bool(self.gflags_location)
|
||||
return self.gflags_location
|
||||
elif len(cmake_flag) != 0:
|
||||
return "".join(["-l", cmake_flag])
|
||||
else:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def cmake_bool(cmake_str):
|
||||
"""
|
||||
CMake bool string to bool
|
||||
:param cmake_str: cmake boolean string
|
||||
:type cmake_str: str
|
||||
:rtype: bool
|
||||
"""
|
||||
if cmake_str in ["FALSE", "OFF", "NO"] or cmake_str.endswith(
|
||||
"-NOTFOUND"):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
def c_flag(self):
|
||||
if self.with_coverage:
|
||||
return [
|
||||
"-fprofile-arcs", "-ftest-coverage", "-O0", "-g",
|
||||
"-std=c++11"
|
||||
]
|
||||
else:
|
||||
return ["-std=c++11"]
|
||||
except ImportError:
|
||||
|
||||
class PaddleLDFlag(object):
|
||||
def ldflag_str(self):
|
||||
pass
|
||||
|
||||
def c_flag(self):
|
||||
pass
|
@ -0,0 +1,150 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "ConvBaseOperator.h"
|
||||
#include "paddle/math/MathUtils.h"
|
||||
#include "paddle/math/Matrix.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* @brief ConvBaseOperator takes two inputs to perform the convolution.
|
||||
* The first input is the image, and the second input is the convolution kernel.
|
||||
* The height of data for two inputs are the same. Each data of the first input
|
||||
* is convolved with each data of the second input indepedently.
|
||||
*
|
||||
* The config file api is conv_operator.
|
||||
*/
|
||||
|
||||
ConvBaseOperator::ConvBaseOperator(const OperatorConfig &config, bool useGpu)
|
||||
: Operator(config, useGpu) {
|
||||
CHECK(useGpu);
|
||||
CHECK_EQ(config_.input_indices_size(), 2L);
|
||||
|
||||
caffeMode_ = true;
|
||||
getConvParams();
|
||||
computeConvSizes();
|
||||
|
||||
// initialize all to default algorithms
|
||||
fwdAlgo_ = 0;
|
||||
bwdFilterAlgo_ = 0;
|
||||
bwdDataAlgo_ = 0;
|
||||
fwdLimitBytes_ = 0;
|
||||
bwdDataLimitBytes_ = 0;
|
||||
bwdFilterLimitBytes_ = 0;
|
||||
workSpaceInBytes_ = 0;
|
||||
workSpace_ = nullptr;
|
||||
|
||||
isSelectAlgo_ = false;
|
||||
}
|
||||
|
||||
void ConvBaseOperator::allocConvWorkSpace() {
|
||||
hl_conv_workspace(imageDesc_,
|
||||
outputDesc_,
|
||||
filterDesc_,
|
||||
convDesc_,
|
||||
&fwdAlgo_,
|
||||
&fwdLimitBytes_,
|
||||
&bwdDataAlgo_,
|
||||
&bwdDataLimitBytes_,
|
||||
&bwdFilterAlgo_,
|
||||
&bwdFilterLimitBytes_);
|
||||
|
||||
size_t maxWorkSpace = 0;
|
||||
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
|
||||
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
|
||||
|
||||
if (maxWorkSpace > workSpaceInBytes_) {
|
||||
if (workSpaceInBytes_ != 0) {
|
||||
hl_free_mem_device(workSpace_);
|
||||
}
|
||||
// total amount of storage needed
|
||||
workSpace_ = hl_malloc_device(maxWorkSpace);
|
||||
workSpaceInBytes_ = maxWorkSpace;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvBaseOperator::computeConvSizes() {
|
||||
hl_create_filter_descriptor(
|
||||
&filterDesc_, channels_, numFilters_, filterSizeY_, filterSize_);
|
||||
hl_create_tensor_descriptor(&imageDesc_);
|
||||
hl_create_tensor_descriptor(&outputDesc_);
|
||||
hl_create_convolution_descriptor(&convDesc_,
|
||||
imageDesc_,
|
||||
filterDesc_,
|
||||
paddingY_,
|
||||
padding_,
|
||||
strideY_,
|
||||
stride_);
|
||||
}
|
||||
|
||||
void ConvBaseOperator::reshapeImageDescriptors() {
|
||||
hl_tensor_reshape(imageDesc_,
|
||||
1,
|
||||
channels_,
|
||||
imageH_,
|
||||
imageW_,
|
||||
channels_ * imageH_ * imageW_,
|
||||
imageH_ * imageW_,
|
||||
imageW_,
|
||||
1);
|
||||
hl_tensor_reshape(outputDesc_,
|
||||
1,
|
||||
numFilters_,
|
||||
outputH_,
|
||||
outputW_,
|
||||
numFilters_ * outputH_ * outputW_,
|
||||
outputH_ * outputW_,
|
||||
outputW_,
|
||||
1);
|
||||
hl_reset_convolution_descriptor(convDesc_,
|
||||
imageDesc_,
|
||||
filterDesc_,
|
||||
paddingY_,
|
||||
padding_,
|
||||
strideY_,
|
||||
stride_);
|
||||
}
|
||||
|
||||
void ConvBaseOperator::getConvParams() {
|
||||
configNumFilters_ = config_.num_filters();
|
||||
const ConvConfig &conf = config_.conv_conf();
|
||||
padding_ = conf.padding();
|
||||
stride_ = conf.stride();
|
||||
filterSize_ = conf.filter_size();
|
||||
paddingY_ = conf.padding_y();
|
||||
strideY_ = conf.stride_y();
|
||||
filterSizeY_ = conf.filter_size_y();
|
||||
filterPixels_ = filterSize_ * filterSizeY_;
|
||||
configChannels_ = conf.channels();
|
||||
imgSize_ = conf.img_size();
|
||||
imgSizeY_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
|
||||
imgPixels_ = imgSize_ * imgSizeY_;
|
||||
CHECK_EQ(conf.groups(), 1U);
|
||||
filterChannels_ = conf.filter_channels();
|
||||
outputX_ = conf.output_x();
|
||||
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
|
||||
outputs_ = outputX_ * outputX_;
|
||||
|
||||
isDeconv_ = (config_.type() == "conv") ? false : true;
|
||||
if (isDeconv_) {
|
||||
channels_ = configNumFilters_;
|
||||
numFilters_ = configChannels_;
|
||||
} else {
|
||||
channels_ = configChannels_;
|
||||
numFilters_ = configNumFilters_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,112 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
#pragma once
|
||||
|
||||
#include "Operator.h"
|
||||
#include "paddle/math/MathUtils.h"
|
||||
#include "paddle/math/Matrix.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* @brief ConvOperator takes two inputs to perform the convolution.
|
||||
* The first input is the image, and the second input is the convolution kernel.
|
||||
* The height of data for two inputs are the same. Each data of the first input
|
||||
* is convolved with each data of the second input indepedently.
|
||||
*
|
||||
* The config file api is conv_operator.
|
||||
*/
|
||||
|
||||
class ConvBaseOperator : public Operator {
|
||||
public:
|
||||
ConvBaseOperator(const OperatorConfig &config, bool useGpu);
|
||||
/**
|
||||
* Free workspace in device and destroy cudnn tensor descriptor.
|
||||
*/
|
||||
virtual ~ConvBaseOperator() {
|
||||
if (workSpaceInBytes_ != 0) {
|
||||
hl_free_mem_device(workSpace_);
|
||||
workSpaceInBytes_ = 0;
|
||||
}
|
||||
|
||||
hl_destroy_tensor_descriptor(imageDesc_);
|
||||
hl_destroy_tensor_descriptor(outputDesc_);
|
||||
hl_destroy_filter_descriptor(filterDesc_);
|
||||
hl_destroy_convolution_descriptor(convDesc_);
|
||||
}
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Get convolution parameters from layer config and
|
||||
* initialize member variables.
|
||||
*/
|
||||
void getConvParams();
|
||||
|
||||
/**
|
||||
* Allocate Gpu Memory for cudnn convolution algorithms.
|
||||
*/
|
||||
void allocConvWorkSpace();
|
||||
|
||||
/**
|
||||
* Create cudnn tensor descriptor for convolution operation.
|
||||
*/
|
||||
void computeConvSizes();
|
||||
|
||||
/**
|
||||
* Reshape cudnn tensor descriptor.
|
||||
*/
|
||||
void reshapeImageDescriptors();
|
||||
|
||||
/**
|
||||
* Reshape cudnn tensor descriptor.
|
||||
*/
|
||||
virtual void reshape(int batchSize) = 0;
|
||||
|
||||
/**
|
||||
* Check filter size is equal to the size calculated by parameters from
|
||||
* layer config.
|
||||
*/
|
||||
void checkFilterSize(const MatrixPtr &filter) {
|
||||
CHECK_EQ(static_cast<int>(filter->getWidth()),
|
||||
filterSize_ * filterSizeY_ * channels_ * numFilters_);
|
||||
}
|
||||
|
||||
/// Most of member variables are same with CudnnConvLayer.
|
||||
/// There is no explanation here.
|
||||
bool isDeconv_;
|
||||
int imageH_, imageW_, outputH_, outputW_;
|
||||
hl_tensor_descriptor imageDesc_;
|
||||
hl_tensor_descriptor outputDesc_;
|
||||
hl_filter_descriptor filterDesc_;
|
||||
hl_convolution_descriptor convDesc_;
|
||||
bool caffeMode_;
|
||||
int inputOffset_, outputOffset_, weightOffset_;
|
||||
int numFilters_, channels_;
|
||||
|
||||
/// from parsing config
|
||||
int configNumFilters_, configChannels_;
|
||||
int padding_, stride_, filterSize_, imgSize_, imgSizeY_;
|
||||
int paddingY_, strideY_, filterSizeY_;
|
||||
int imgPixels_, filterPixels_, filterChannels_, outputX_, outputY_, outputs_;
|
||||
|
||||
/// Following member variables are same with CudnnConvLayer.
|
||||
/// There is no explanation here.
|
||||
int fwdAlgo_, bwdFilterAlgo_, bwdDataAlgo_;
|
||||
size_t fwdLimitBytes_, bwdDataLimitBytes_, bwdFilterLimitBytes_;
|
||||
size_t workSpaceInBytes_;
|
||||
void *workSpace_;
|
||||
bool isSelectAlgo_;
|
||||
};
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,195 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "ConvBaseProjection.h"
|
||||
#include "paddle/utils/Stat.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
ThreadLocalD<std::vector<MemoryHandle *>> ConvBaseProjection::convMem_;
|
||||
|
||||
ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config,
|
||||
ParameterPtr parameter,
|
||||
bool useGpu)
|
||||
: Projection(config, parameter, useGpu) {
|
||||
CHECK(useGpu); // only support GPU
|
||||
getConvParams();
|
||||
initCudnn();
|
||||
|
||||
size_t height = filterH_ * filterW_ * channels_ / groups_;
|
||||
size_t width = numFilters_;
|
||||
weight_.reset(new Weight(height, width, parameter));
|
||||
weightOffset_ = height * width / groups_;
|
||||
}
|
||||
|
||||
void ConvBaseProjection::getConvParams() {
|
||||
const ConvConfig &conf = config_.conv_conf();
|
||||
paddingH_ = conf.padding_y();
|
||||
paddingW_ = conf.padding();
|
||||
|
||||
strideH_ = conf.stride_y();
|
||||
strideW_ = conf.stride();
|
||||
|
||||
filterH_ = conf.filter_size_y();
|
||||
filterW_ = conf.filter_size();
|
||||
|
||||
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
|
||||
configImgW_ = conf.img_size();
|
||||
|
||||
configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
|
||||
configOutW_ = conf.output_x();
|
||||
|
||||
configChannels_ = conf.channels();
|
||||
configNumFilters_ = config_.num_filters();
|
||||
|
||||
isDeconv_ = (config_.type() == "conv") ? false : true;
|
||||
|
||||
channels_ = (isDeconv_) ? configNumFilters_ : configChannels_;
|
||||
numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_;
|
||||
|
||||
groups_ = conf.groups();
|
||||
CHECK_EQ(channels_ % groups_, 0);
|
||||
CHECK_EQ(numFilters_ % groups_, 0);
|
||||
}
|
||||
|
||||
void ConvBaseProjection::initCudnn() {
|
||||
hl_create_filter_descriptor(&filterDesc_,
|
||||
channels_ / groups_,
|
||||
numFilters_ / groups_,
|
||||
filterH_,
|
||||
filterW_);
|
||||
hl_create_tensor_descriptor(&imageDesc_);
|
||||
hl_create_tensor_descriptor(&outputDesc_);
|
||||
hl_create_convolution_descriptor(&convDesc_,
|
||||
imageDesc_,
|
||||
filterDesc_,
|
||||
paddingH_,
|
||||
paddingW_,
|
||||
strideH_,
|
||||
strideW_);
|
||||
|
||||
// initialize all to default algorithms
|
||||
fwdAlgo_ = 0;
|
||||
bwdFilterAlgo_ = 0;
|
||||
bwdDataAlgo_ = 0;
|
||||
fwdLimitBytes_ = 0;
|
||||
bwdDataLimitBytes_ = 0;
|
||||
bwdFilterLimitBytes_ = 0;
|
||||
workSpaceInBytes_ = 0;
|
||||
|
||||
batchNum_ = 0;
|
||||
isSelectAlgo_ = false;
|
||||
}
|
||||
|
||||
void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
|
||||
// The stride between two consecutive samples in the output of ConvProjection
|
||||
// may not be numFilters_ * outputH_ * outputW_ (conv) or
|
||||
// channels_ * imageH_ * imageW_ (deconv)
|
||||
// for example, in the case of layer ConcatenateLayer2 with two
|
||||
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
|
||||
// So the calculation of nStride is different from CudnnConvLayer.
|
||||
size_t nStrideImage, nStrideOutput;
|
||||
if (isDeconv_) {
|
||||
nStrideImage = out_->value->getStride();
|
||||
nStrideOutput = numFilters_ * outputH_ * outputW_;
|
||||
} else {
|
||||
nStrideImage = channels_ * imageH_ * imageW_;
|
||||
nStrideOutput = out_->value->getStride();
|
||||
}
|
||||
|
||||
hl_tensor_reshape(imageDesc_,
|
||||
batchSize,
|
||||
channels_ / groups_,
|
||||
imageH_,
|
||||
imageW_,
|
||||
nStrideImage,
|
||||
imageH_ * imageW_,
|
||||
imageW_,
|
||||
1);
|
||||
|
||||
hl_tensor_reshape(outputDesc_,
|
||||
batchSize,
|
||||
numFilters_ / groups_,
|
||||
outputH_,
|
||||
outputW_,
|
||||
nStrideOutput,
|
||||
outputH_ * outputW_,
|
||||
outputW_,
|
||||
1);
|
||||
|
||||
hl_reset_convolution_descriptor(convDesc_,
|
||||
imageDesc_,
|
||||
filterDesc_,
|
||||
paddingH_,
|
||||
paddingW_,
|
||||
strideH_,
|
||||
strideW_);
|
||||
}
|
||||
|
||||
void ConvBaseProjection::reshape(int batchSize) {
|
||||
size_t width = calOutputSize();
|
||||
CHECK_EQ(width, out_->value->getWidth());
|
||||
CHECK_EQ(calInputSize(), in_->value->getWidth());
|
||||
|
||||
isSelectAlgo_ = (batchSize == batchNum_);
|
||||
batchNum_ = batchSize;
|
||||
|
||||
if (!isSelectAlgo_) {
|
||||
reshapeTensorDesc(batchSize);
|
||||
hl_conv_workspace(imageDesc_,
|
||||
outputDesc_,
|
||||
filterDesc_,
|
||||
convDesc_,
|
||||
&fwdAlgo_,
|
||||
&fwdLimitBytes_,
|
||||
&bwdDataAlgo_,
|
||||
&bwdDataLimitBytes_,
|
||||
&bwdFilterAlgo_,
|
||||
&bwdFilterLimitBytes_);
|
||||
|
||||
size_t maxWorkSpace = 0;
|
||||
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
|
||||
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
|
||||
workSpaceInBytes_ = maxWorkSpace;
|
||||
|
||||
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_
|
||||
<< " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_;
|
||||
}
|
||||
|
||||
isSelectAlgo_ = true;
|
||||
}
|
||||
|
||||
void *ConvBaseProjection::getSpaceBytes(size_t size) {
|
||||
std::vector<MemoryHandle *> &convMem = *convMem_;
|
||||
if (convMem.empty()) {
|
||||
int numDevices = hl_get_device_count();
|
||||
convMem.resize(numDevices);
|
||||
}
|
||||
|
||||
int devId = hl_get_device();
|
||||
MemoryHandle **localMem = &(convMem[devId]);
|
||||
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
|
||||
*localMem = new GpuMemoryHandle(size);
|
||||
}
|
||||
return (*localMem)->getBuf();
|
||||
}
|
||||
|
||||
ConvBaseProjection::~ConvBaseProjection() {
|
||||
hl_destroy_tensor_descriptor(imageDesc_);
|
||||
hl_destroy_tensor_descriptor(outputDesc_);
|
||||
hl_destroy_filter_descriptor(filterDesc_);
|
||||
hl_destroy_convolution_descriptor(convDesc_);
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,116 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Projection.h"
|
||||
#include "paddle/math/MathUtils.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* @brief Base class for ConvProjection and ConvTransProjection.
|
||||
*/
|
||||
class ConvBaseProjection : public Projection {
|
||||
public:
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
ConvBaseProjection(const ProjectionConfig& config,
|
||||
ParameterPtr parameter,
|
||||
bool useGpu);
|
||||
|
||||
~ConvBaseProjection();
|
||||
|
||||
protected:
|
||||
void getConvParams();
|
||||
void initCudnn();
|
||||
|
||||
void reshapeTensorDesc(int batchSize);
|
||||
void reshape(int batchSize);
|
||||
|
||||
virtual size_t calOutputSize() = 0;
|
||||
virtual size_t calInputSize() = 0;
|
||||
|
||||
static void* getSpaceBytes(size_t size);
|
||||
|
||||
/// True if it's deconv projection layer, false if it's ConvProjection layer
|
||||
bool isDeconv_;
|
||||
/// imageH_ and imageW_ / outputH_ and outputW_
|
||||
/// is calculated from the input layer.
|
||||
int imageH_, imageW_;
|
||||
int outputH_, outputW_;
|
||||
/// configImgH_ and configImgW_ / configOutH_ and configOutW_
|
||||
/// is obtained from config.
|
||||
int configImgH_, configImgW_;
|
||||
int configOutH_, configOutW_;
|
||||
/// channels_ and numFilters_ are defined in terms of convolution semantics
|
||||
int channels_, numFilters_;
|
||||
/// configChannels and configNumFilters_ are obtained from config
|
||||
/// For Conv they are the same as channels_ and numFilters
|
||||
/// For ConvTrans they are opposite to channels_ and numFilters
|
||||
int configChannels_, configNumFilters_;
|
||||
int paddingH_, paddingW_;
|
||||
int strideH_, strideW_;
|
||||
int filterH_, filterW_;
|
||||
/// One group offset of input data.
|
||||
int inputOffset_;
|
||||
/// One group offset of output data.
|
||||
int outputOffset_;
|
||||
/// One group offset of weight.
|
||||
int weightOffset_;
|
||||
int groups_;
|
||||
|
||||
/// Cudnn tensor descriptor for input.
|
||||
hl_tensor_descriptor imageDesc_;
|
||||
/// Cudnn tensor descriptor for output.
|
||||
hl_tensor_descriptor outputDesc_;
|
||||
/// Cudnn tensor descriptor for filter.
|
||||
hl_filter_descriptor filterDesc_;
|
||||
/// Cudnn tensor descriptor for a convolution operation.
|
||||
hl_convolution_descriptor convDesc_;
|
||||
|
||||
/// Record the algorithm for forward convolution, which is obtained by cudnn
|
||||
/// api to search the best suited algorithm.
|
||||
int fwdAlgo_;
|
||||
/// Record the algorithm for computing convolution gradient with respect to
|
||||
/// filter coefficients.
|
||||
int bwdFilterAlgo_;
|
||||
/// Record the algorithm for computing convolution gradient with respect to
|
||||
/// the output.
|
||||
int bwdDataAlgo_;
|
||||
/// Amount of GPU memory needed as workspace to be able to execute a
|
||||
/// forward convolution with the specified algo.
|
||||
size_t fwdLimitBytes_;
|
||||
/// Amount of GPU memory needed as workspace to be able to execute a
|
||||
/// backwardFilter with the specified algo.
|
||||
size_t bwdDataLimitBytes_;
|
||||
/// Amount of GPU memory needed as workspace to be able to execute a
|
||||
/// backwardData with the specified algo.
|
||||
size_t bwdFilterLimitBytes_;
|
||||
/// Size of total work space.
|
||||
size_t workSpaceInBytes_;
|
||||
|
||||
/// Whether to call cuDNN api to choose conv algorithm.
|
||||
bool isSelectAlgo_;
|
||||
/// batchNum is used to record batch size. If the batch size is changed,
|
||||
/// the selection algorithm will be called.
|
||||
int batchNum_;
|
||||
bool bias_;
|
||||
|
||||
std::unique_ptr<Weight> weight_;
|
||||
static ThreadLocalD<std::vector<MemoryHandle*>> convMem_;
|
||||
};
|
||||
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,44 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
#pragma once
|
||||
|
||||
#include "ConvBaseOperator.h"
|
||||
#include "paddle/math/MathUtils.h"
|
||||
#include "paddle/math/Matrix.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* @brief ConvOperator takes two inputs to perform the convolution.
|
||||
* The first input is the image, and the second input is the convolution kernel.
|
||||
* The height of data for two inputs are the same. Each data of the first input
|
||||
* is convolved with each data of the second input indepedently.
|
||||
*
|
||||
* The config file api is conv_operator.
|
||||
*/
|
||||
|
||||
class ConvOperator : public ConvBaseOperator {
|
||||
public:
|
||||
ConvOperator(const OperatorConfig &config, bool useGpu)
|
||||
: ConvBaseOperator(config, useGpu) {}
|
||||
/**
|
||||
* Free workspace in device and destroy cudnn tensor descriptor.
|
||||
*/
|
||||
virtual ~ConvOperator() {}
|
||||
void forward() override;
|
||||
void backward() override;
|
||||
void reshape(int batchSize) override;
|
||||
};
|
||||
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue