support rectangle padding, stride, window and input for PoolProjection (#115)

* support rectangle padding, stride, window and input for PoolProjection

* Follow comments.
1. Remove start
2. refine img_pool_a/b.conf for test_NetworkCompare
3. Split unit test

* Modify the test in img_layers.py
avx_docs
qingqing01 8 years ago committed by hedaoyuan
parent 8a044d2e2d
commit 191fafe355

@ -84,16 +84,23 @@ extern void hl_expand_feature2col(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
*
*/
extern void hl_maxpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData);
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
/**
* @brief Maximum pool backward.
@ -107,21 +114,28 @@ extern void hl_maxpool_forward(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[out] targetGrad output grad.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
*
*/
extern void hl_maxpool_backward(
int frameCnt, const real* inputData,
const int frameCnt, const real* inputData,
const real* outData, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* targetGrad,
real scaleA, real scaleB);
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad);
/**
* @brief Averge pool forward.
@ -133,16 +147,23 @@ extern void hl_maxpool_backward(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data.
*
*/
extern void hl_avgpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData);
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
/**
* @brief Maximum pool backward.
@ -154,20 +175,27 @@ extern void hl_avgpool_forward(
* @param[in] width image width.
* @param[in] pooledH output image height.
* @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window.
* @param[in] stride pooling stride.
* @param[in] start pooling start.
* @param[out] backGrad output grad.
* @param[in] sizeX width of pooling window.
* @param[in] sizeY height of pooling window.
* @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[in] scaleA scale.
* @param[in] scaleB scale.
* @param[out] backGrad output grad.
*
*/
extern void hl_avgpool_backward(
int frameCnt, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* backGrad,
real scaleA, real scaleB);
const int frameCnt, const real* outGrad,
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad);
/**
* @brief Cross-map-respose normalize forward.

@ -38,29 +38,45 @@ inline void hl_expand_feature2col(
real* dataCol) {}
inline void hl_maxpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData) {}
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
inline void hl_maxpool_backward(
int frameCnt, const real* inputData,
const int frameCnt, const real* inputData,
const real* outData, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* targetGrad,
real scaleA, real scaleB) {}
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {}
inline void hl_avgpool_forward(
int frameCnt, const real* inputData, int channels,
int height, int width, int pooledH, int pooledW,
int sizeX, int stride, int start, real* tgtData) {}
const int frameCnt, const real* inputData,
const int channels,
const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
inline void hl_avgpool_backward(
int frameCnt, const real* outGrad,
int channels, int height, int width,
int pooledH, int pooledW, int sizeX,
int stride, int start, real* backGrad,
real scaleA, real scaleB) {}
const int frameCnt, const real* outGrad,
const int channels, const int height,
const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad) {}
inline void hl_CMRNorm_forward(
size_t frameCnt, const real* in, real* scale, real* out,

File diff suppressed because it is too large Load Diff

@ -51,7 +51,6 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
PoolLayer::init(layerMap, parameterMap);
CHECK(useGpu_) << "CudnnPoolLayer only support gpu";
CHECK_EQ(start_, 0) << poolType_ << " dose not support 'start'";
hl_create_tensor_descriptor(&inputDesc_);
hl_create_tensor_descriptor(&outputDesc_);

@ -56,16 +56,6 @@ public:
void reshape(int batchSize);
virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr);
/**
* Calculate output size according window size of pooling.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
};
} // namespace paddle

@ -35,7 +35,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
poolType_ = conf.pool_type();
channels_ = conf.channels();
sizeX_ = conf.size_x();
start_ = conf.start();
stride_ = conf.stride();
outputX_ = conf.output_x();
imgSize_ = conf.img_size();
@ -47,22 +46,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
bool cudnnTypeCheck = true;
#ifndef PADDLE_ONLY_CPU
cudnnTypeCheck = !CudnnPoolLayer::typeCheck(poolType_);
#endif
if ((sizeY_ != sizeX_ || imgSizeY_ != imgSize_ || strideY_ != stride_ ||
confPaddingY_ != confPadding_ || outputY_ != outputX_) &&
cudnnTypeCheck) {
LOG(FATAL) << poolType_ << " does not supported non-square "
"filter, image, stride or padding";
}
if (confPadding_ != 0 && cudnnTypeCheck) {
LOG(FATAL) << poolType_ << " does not supported 'padding'";
}
return true;
}

@ -28,7 +28,7 @@ namespace paddle {
class PoolLayer : public Layer {
protected:
size_t channels_, sizeX_, stride_, outputX_, imgSize_;
int start_, confPadding_;
int confPadding_;
size_t sizeY_;
size_t imgSizeY_;
@ -47,6 +47,16 @@ public:
static Layer* create(const LayerConfig& config);
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
/**
* Calculate output size according window size and padding size.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
};
} // namespace paddle

@ -25,13 +25,15 @@ size_t PoolProjectionLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) {
imgSizeH_ = imgSize_;
imgSizeH_ = imgSizeY_;
}
if (imgSizeW_ == 0) {
imgSizeW_ = imgSize_;
}
outputH_ = 1 + (imgSizeH_ - start_ - sizeX_ + stride_ - 1) / stride_;
outputW_ = 1 + (imgSizeW_ - start_ - sizeX_ + stride_ - 1) / stride_;
outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
layerSize = outputH_ * outputW_ * channels_;
getOutput().setFrameHeight(outputH_);
@ -51,8 +53,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_,
stride_, outputH_, outputW_);
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
}
void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
@ -69,7 +72,9 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
sizeX_, start_, stride_, outputH_, outputW_, 1, 1);
sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::forward(PassType passType) {
@ -84,8 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue();
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_,
stride_, outputH_, outputW_);
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
}
void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
@ -97,7 +103,9 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
/* Do derivation */
MatrixPtr outputGrad = getOutputGrad();
MatrixPtr inputGrad = getInputGrad(0);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, start_,
stride_, outputH_, outputW_, 1, 1);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
}
} // namespace paddle

@ -0,0 +1,46 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=8,stride=1)
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=MaxPooling(),
)
avgpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=AvgPooling(),
)
outputs([maxpool, avgpool])

@ -0,0 +1,44 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8, num_filters=8, stride=1)
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=CudnnMaxPooling(),
)
avgpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=CudnnAvgPooling(),
)
outputs([maxpool, avgpool])

@ -791,21 +791,24 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
(*config).biasSize = 0;
(*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(16);
(*config).layerConfig.set_partial_sum(1);
(*config).layerConfig.set_shared_biases(true);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType);
pool->set_channels(16);
pool->set_size_x(3);
if (poolType == "cudnn-max-pool" || poolType == "cudnn-avg-pool") {
pool->set_padding(0);
} else {
pool->set_size_x(kw);
pool->set_size_y(kh);
pool->set_start(0);
}
pool->set_stride(2);
pool->set_output_x((pool->img_size() - pool->start() - pool->size_x()) /
((float)pool->stride()) +
1.5);
pool->set_padding(pw);
pool->set_padding_y(ph);
pool->set_stride(sw);
pool->set_stride_y(sh);
int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
pool->set_output_x(ow);
pool->set_output_y(oh);
}
void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
@ -814,9 +817,10 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
setPoolConfig(&config, pool, poolType);
pool->set_img_size(14);
config.layerConfig.set_size(pool->output_x() * pool->output_x() *
pool->set_img_size_y(14);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels());
testLayerGrad(config, "pool", 100, trans, useGpu);
@ -829,11 +833,11 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf();
setPoolConfig(&config, pool, poolType);
pool->set_size_y(4);
pool->set_stride_y(3);
pool->set_img_size(10);
pool->set_img_size_y(20);
setPoolConfig(&config, pool, poolType);
pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) /
((float)pool->stride_y()) +
1.5);
@ -1252,8 +1256,6 @@ TEST(Layer, MultiplexLayer) {
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);

@ -116,6 +116,8 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) {
gradientMachine->start(trainer.getConfig(), nullptr);
gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN);
for (size_t i = 0; i < in.outGrads.size(); i++) {
// If the all the layers in the config have no parameters, also
// not set NeedGradient(), the outArgs[i] will be nullptr.
outArgs[i].grad->copyFrom(*in.outGrads[i]);
}
gradientMachine->backward();
@ -225,6 +227,18 @@ TEST(Compare, concat_table) {
compareNetwork(config_file_a, config_file_b);
}
#ifndef PADDLE_ONLY_CPU
TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf";
bool useGpu = FLAGS_use_gpu;
FLAGS_use_gpu = true;
compareNetwork(config_file_a, config_file_b);
FLAGS_use_gpu = useGpu;
}
#endif
P_DEFINE_string(config_file_a, "", "config of one network to compare");
P_DEFINE_string(config_file_b, "", "config of another network to compare");
TEST(Compare, network) {

File diff suppressed because it is too large Load Diff

@ -742,31 +742,37 @@ public:
*/
virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels, size_t sizeX,
int start_, size_t stride, size_t outputH,
size_t outputW) {
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
/// Pooling backward operation.
virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX,
int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets,
real scaleOutput) {
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
/// Pooling forward operation, caculate the average of sizeX elements.
virtual void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW) {
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
virtual void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput) {
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted";
}
@ -1131,21 +1137,30 @@ public:
real alpha = 1.0f, real beta = 0.0f);
void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start_, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput);
Matrix& outGrad, Matrix& outV, size_t sizeX,
size_t sizeY, size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets, real scaleOutput);
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
Matrix& denoms, size_t channels, size_t sizeX,
@ -1242,21 +1257,31 @@ public:
real alpha = 1.0f, real beta = 0.0f);
void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start_, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, int start,
size_t stride, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput);
Matrix& outGrad, Matrix& outV,
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t stride,
size_t outputH, size_t outputW);
size_t channels, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t outputH,
size_t outputW, real scaleTargets, real scaleOutput);
size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
Matrix& denoms, size_t channels, size_t sizeX,

@ -1846,6 +1846,159 @@ TEST(Matrix, classificationError) {
}
}
void testMaxPoolFwdBwd(int numSamples, int channels,
int imgSizeH, int imgSizeW,
int ksizeH, int ksizeW,
int strideH, int strideW,
int padH, int padW) {
int outH = 0, outW = 0;
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->maxPoolForward(*input, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
targetGpu->maxPoolForward(*inputGpu, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
checkMatrixEqual(target, targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->maxPoolBackward(*input, imgSizeH, imgSizeW,
*targetGrad, *target,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
inputGpuGrad->maxPoolBackward(*inputGpu, imgSizeH, imgSizeW,
*targetGpuGrad, *targetGpu,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
checkMatrixEqual(inputGrad, targetBwdCheck);
}
void testAvgPoolFwdBwd(int numSamples, int channels,
int imgSizeH, int imgSizeW,
int ksizeH, int ksizeW,
int strideH, int strideW,
int padH, int padW) {
int outH = 0, outW = 0;
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->avgPoolForward(*input, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
targetGpu->avgPoolForward(*inputGpu, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->avgPoolBackward(*targetGrad, imgSizeH, imgSizeW,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
inputGpuGrad->avgPoolBackward(*targetGpuGrad, imgSizeH, imgSizeW,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetBwdCheck);
}
TEST(Matrix, PoolFwdBwd) {
for (auto numSamples : {5, 32}) {
for (auto channels : {1, 9, 32}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
for (auto sizeX : {2, 5}) {
for (auto sizeY : {2, 5}) {
for (auto sH : {1, 2}) {
for (auto sW : {1, 2}) {
for (auto pH : {0, (sizeY - 1)/2}) {
for (auto pW : {0, (sizeX - 1)/2}) {
VLOG(3) << " numSamples=" << numSamples
<< " channels=" << channels
<< " imgSizeH=" << imgSizeH
<< " imgSizeW=" << imgSizeW
<< " sizeX=" << sizeX
<< " sizeY=" << sizeY
<< " strideH=" << sH
<< " strideW=" << sW
<< " padingH=" << pH
<< " padingW=" << pW;
testMaxPoolFwdBwd(numSamples, channels, imgSizeH,
imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
testAvgPoolFwdBwd(numSamples, channels, imgSizeH,
imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
}
}
}
}
}
}
}
}
}
}
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);

@ -88,7 +88,8 @@ message PoolConfig {
required uint32 size_x = 3;
// Tell the net where in the input image to start the pooling.
required uint32 start = 4;
// start is deprecated now.
optional uint32 start = 4;
// Defines the stride size between successive pooling squares.
required uint32 stride = 5;

@ -961,10 +961,6 @@ def parse_pool(pool, input_layer_name, pool_conf):
"['max-projection', 'avg-projection', "
"'cudnn-max-pool', 'cudnn-avg-pool']"
% pool.pool_type)
if pool.size_y or pool.stride_y or pool.img_width or pool.padding_y:
config_assert(pool.pool_type.startswith('cudnn'),
"'size_y', 'stride_y' and 'img_width' and 'padding_y'"
"can only be used for cudnn")
pool_conf.channels = pool.channels
pool_conf.size_x = pool.size_x
@ -974,36 +970,25 @@ def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.stride_y = default(pool.stride_y, pool_conf.stride);
img_pixels = g_layer_map[input_layer_name].size / pool.channels
# the img_width may be removed,
# and it can be calculated automatically later.
pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5))
pool_conf.img_size_y = img_pixels / pool_conf.img_size
config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d"
% (pool_conf.img_size, img_pixels))
if pool.start is not None:
config_assert(pool.padding is None,
'At most one of start and padding can be set.')
pool_conf.start = pool.start
pool_conf.padding = 0
pool_conf.output_x = int(math.ceil((pool_conf.img_size - \
pool_conf.start - pool_conf.size_x) / \
float(pool_conf.stride))) + 1
config_assert(not pool.start, "start is deprecated in pooling.")
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y - \
pool_conf.start - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1
elif pool.padding is not None:
if pool.padding is not None:
pool_conf.padding = pool.padding
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.start = 0
pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
2*pool_conf.padding - pool_conf.size_x) / \
float(pool_conf.stride))) + 1
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
2*pool_conf.padding_y - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1
else:
raise ValueError('At least one of start and padding should be set.')
def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels
@ -1603,7 +1588,7 @@ class PoolLayer(LayerBase):
pool_conf = self.config.inputs[input_index].pool_conf
print("output size for %s is %d*%d " % (
name, pool_conf.output_y, pool_conf.output_x))
self.set_layer_size((pool_conf.output_x ** 2) * pool_conf.channels)
self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('batch_norm')
class BatchNormLayer(LayerBase):

@ -1627,7 +1627,9 @@ def img_conv_layer(input, filter_size, num_filters,
@layer_support()
def img_pool_layer(input, pool_size, name=None,
num_channels=None, pool_type=None,
stride=1, start=None, padding=0, layer_attr=None):
stride=1, start=None, padding=0, layer_attr=None,
pool_size_y=None, stride_y=None, padding_y=None,
img_width=None):
"""
Image pooling Layer.
@ -1635,25 +1637,34 @@ def img_pool_layer(input, pool_size, name=None,
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
:param padding: pooling padding
:param padding: pooling padding width.
:type padding: int
:param padding_y: pooling padding height. It's equal to padding by default.
:type padding_y: int|None
:param name: name of pooling layer
:type name: basestring.
:param input: layer's input
:type input: LayerOutput
:param pool_size: pooling size
:param pool_size: pooling window width
:type pool_size: int
:param pool_size_y: pooling window height. It's eaqual to pool_size by default.
:type pool_size_y: int|None
:param num_channels: number of input channel.
:type num_channels: int
:param pool_type: pooling type. MaxPooling or AveragePooling. Default is
MaxPooling.
:type pool_type: BasePoolingType
:param stride: stride of pooling.
:param stride: stride width of pooling.
:type stride: int
:param start: start position of pooling operation.
:type start: int
:param stride_y: stride height of pooling. It is equal to stride by default.
:type stride_y: int|None
:param start: start position of pooling operation. Note it is deprecated now.
:type start: int|None
:param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute
:param img_width: the width of input feature map. If it is None, the input feature
map should be square.
:type img_width: int|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
@ -1666,17 +1677,29 @@ def img_pool_layer(input, pool_size, name=None,
elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y
stride_y = stride if stride_y is None else stride_y
padding_y = padding if padding_y is None else padding_y
Layer(
name=name,
type=LayerType.POOL_LAYER,
inputs=[Input(input.name,
pool=Pool(
pool_type=''.join([pool_type.name, '-projection']),
pool_type=type_name,
channels=num_channels,
size_x=pool_size,
start=start,
stride=stride,
padding=padding
padding=padding,
size_y=pool_size_y,
stride_y=stride_y,
padding_y=padding_y,
img_width=img_width
))],
**ExtraLayerAttribute.to_kwargs(layer_attr)
)

@ -170,13 +170,13 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None,
:type shared_bias: bool
:param conv_layer_attr: see img_conv_layer for details
:type conv_layer_attr: ExtraLayerAttribute
:param pool_stride: see img_conv_layer for details
:param pool_stride: see img_pool_layer for details
:type pool_stride: int
:param pool_start: see img_conv_layer for details
:param pool_start: see img_pool_layer for details. It is deprecated now.
:type pool_start: int
:param pool_padding: see img_conv_layer for details
:param pool_padding: see img_pool_layer for details
:type pool_padding: int
:param pool_layer_attr: see img_conv_layer for details
:param pool_layer_attr: see img_pool_layer for details
:type pool_layer_attr: ExtraLayerAttribute
:return: Layer's output
:rtype: LayerOutput
@ -243,7 +243,7 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None,
:param bn_layer_attr: ParameterAttribute.
:param pool_stride: see img_pool_layer's document.
:type pool_stride: int
:param pool_start: see img_pool_layer's document.
:param pool_start: see img_pool_layer's document. It is deprecated now.
:type pool_start: int
:param pool_padding: see img_pool_layer's document.
:type pool_padding: int

@ -19,6 +19,8 @@ __all__ = [
"BasePoolingType",
"MaxPooling",
"AvgPooling",
"CudnnMaxPooling",
"CudnnAvgPooling",
"SumPooling",
"SquareRootNPooling"
]
@ -57,6 +59,23 @@ class MaxPooling(BasePoolingType):
self.output_max_index = output_max_index
class CudnnMaxPooling(BasePoolingType):
"""
Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-max-pool")
class CudnnAvgPooling(BasePoolingType):
"""
Cudnn average pooling only support GPU. Return the average value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-pool")
class AvgPooling(BasePoolingType):
"""
Average pooling.

@ -1,4 +1,4 @@
7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr
86c0815275a9d5eb902e23c6a592f58a img_layers.protostr
a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
9c038249ec8ff719753a746cdb04c026 layer_activations.protostr
5913f87b39cee3b2701fa158270aca26 projections.protostr

@ -7,8 +7,10 @@ settings(
img = data_layer(name='image', size=256*256)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
filter_size=(32, 64), padding=(1, 0), stride=(1, 1),
filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())

Loading…
Cancel
Save