support rectangle padding, stride, window and input for PoolProjection (#115)

* support rectangle padding, stride, window and input for PoolProjection

* Follow comments.
1. Remove start
2. refine img_pool_a/b.conf for test_NetworkCompare
3. Split unit test

* Modify the test in img_layers.py
avx_docs
qingqing01 8 years ago committed by hedaoyuan
parent 8a044d2e2d
commit 191fafe355

@ -84,16 +84,23 @@ extern void hl_expand_feature2col(
* @param[in] width image width. * @param[in] width image width.
* @param[in] pooledH output image height. * @param[in] pooledH output image height.
* @param[in] pooledW output image width. * @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window. * @param[in] sizeX width of pooling window.
* @param[in] stride pooling stride. * @param[in] sizeY height of pooling window.
* @param[in] start pooling start. * @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data. * @param[out] tgtData output data.
* *
*/ */
extern void hl_maxpool_forward( extern void hl_maxpool_forward(
int frameCnt, const real* inputData, int channels, const int frameCnt, const real* inputData,
int height, int width, int pooledH, int pooledW, const int channels,
int sizeX, int stride, int start, real* tgtData); const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
@ -107,21 +114,28 @@ extern void hl_maxpool_forward(
* @param[in] width image width. * @param[in] width image width.
* @param[in] pooledH output image height. * @param[in] pooledH output image height.
* @param[in] pooledW output image width. * @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window. * @param[in] sizeX width of pooling window.
* @param[in] stride pooling stride. * @param[in] sizeY height of pooling window.
* @param[in] start pooling start. * @param[in] strideH pooling stride height.
* @param[out] targetGrad output grad. * @param[in] strideW pooling stride width.
* @param[in] scaleA scale. * @param[in] scaleA scale.
* @param[in] scaleB scale. * @param[in] scaleB scale.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] targetGrad output grad.
* *
*/ */
extern void hl_maxpool_backward( extern void hl_maxpool_backward(
int frameCnt, const real* inputData, const int frameCnt, const real* inputData,
const real* outData, const real* outGrad, const real* outData, const real* outGrad,
int channels, int height, int width, const int channels, const int height,
int pooledH, int pooledW, int sizeX, const int width,
int stride, int start, real* targetGrad, const int pooledH, const int pooledW,
real scaleA, real scaleB); const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad);
/** /**
* @brief Averge pool forward. * @brief Averge pool forward.
@ -133,16 +147,23 @@ extern void hl_maxpool_backward(
* @param[in] width image width. * @param[in] width image width.
* @param[in] pooledH output image height. * @param[in] pooledH output image height.
* @param[in] pooledW output image width. * @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window. * @param[in] sizeX width of pooling window.
* @param[in] stride pooling stride. * @param[in] sizeY height of pooling window.
* @param[in] start pooling start. * @param[in] strideH pooling stride height.
* @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[out] tgtData output data. * @param[out] tgtData output data.
* *
*/ */
extern void hl_avgpool_forward( extern void hl_avgpool_forward(
int frameCnt, const real* inputData, int channels, const int frameCnt, const real* inputData,
int height, int width, int pooledH, int pooledW, const int channels,
int sizeX, int stride, int start, real* tgtData); const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData);
/** /**
* @brief Maximum pool backward. * @brief Maximum pool backward.
@ -154,20 +175,27 @@ extern void hl_avgpool_forward(
* @param[in] width image width. * @param[in] width image width.
* @param[in] pooledH output image height. * @param[in] pooledH output image height.
* @param[in] pooledW output image width. * @param[in] pooledW output image width.
* @param[in] sizeX size of pooling window. * @param[in] sizeX width of pooling window.
* @param[in] stride pooling stride. * @param[in] sizeY height of pooling window.
* @param[in] start pooling start. * @param[in] strideH pooling stride height.
* @param[out] backGrad output grad. * @param[in] strideW pooling stride width.
* @param[in] paddingH padding height.
* @param[in] paddingW padding width.
* @param[in] scaleA scale. * @param[in] scaleA scale.
* @param[in] scaleB scale. * @param[in] scaleB scale.
* @param[out] backGrad output grad.
* *
*/ */
extern void hl_avgpool_backward( extern void hl_avgpool_backward(
int frameCnt, const real* outGrad, const int frameCnt, const real* outGrad,
int channels, int height, int width, const int channels, const int height,
int pooledH, int pooledW, int sizeX, const int width,
int stride, int start, real* backGrad, const int pooledH, const int pooledW,
real scaleA, real scaleB); const int sizeX, const int sizeY,
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad);
/** /**
* @brief Cross-map-respose normalize forward. * @brief Cross-map-respose normalize forward.

@ -38,29 +38,45 @@ inline void hl_expand_feature2col(
real* dataCol) {} real* dataCol) {}
inline void hl_maxpool_forward( inline void hl_maxpool_forward(
int frameCnt, const real* inputData, int channels, const int frameCnt, const real* inputData,
int height, int width, int pooledH, int pooledW, const int channels,
int sizeX, int stride, int start, real* tgtData) {} const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
inline void hl_maxpool_backward( inline void hl_maxpool_backward(
int frameCnt, const real* inputData, const int frameCnt, const real* inputData,
const real* outData, const real* outGrad, const real* outData, const real* outGrad,
int channels, int height, int width, const int channels, const int height,
int pooledH, int pooledW, int sizeX, const int width,
int stride, int start, real* targetGrad, const int pooledH, const int pooledW,
real scaleA, real scaleB) {} const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW,
real scaleA, real scaleB,
real* targetGrad) {}
inline void hl_avgpool_forward( inline void hl_avgpool_forward(
int frameCnt, const real* inputData, int channels, const int frameCnt, const real* inputData,
int height, int width, int pooledH, int pooledW, const int channels,
int sizeX, int stride, int start, real* tgtData) {} const int height, const int width,
const int pooledH, const int pooledW,
const int sizeX, const int sizeY,
const int strideH, const int strideW,
const int paddingH, const int paddingW, real* tgtData) {}
inline void hl_avgpool_backward( inline void hl_avgpool_backward(
int frameCnt, const real* outGrad, const int frameCnt, const real* outGrad,
int channels, int height, int width, const int channels, const int height,
int pooledH, int pooledW, int sizeX, const int width,
int stride, int start, real* backGrad, const int pooledH, const int pooledW,
real scaleA, real scaleB) {} const int sizeX, const int sizeY,
const int strideH, const int strideW,
int paddingH, int paddingW,
real scaleA, real scaleB,
real* backGrad) {}
inline void hl_CMRNorm_forward( inline void hl_CMRNorm_forward(
size_t frameCnt, const real* in, real* scale, real* out, size_t frameCnt, const real* in, real* scale, real* out,

File diff suppressed because it is too large Load Diff

@ -51,7 +51,6 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
PoolLayer::init(layerMap, parameterMap); PoolLayer::init(layerMap, parameterMap);
CHECK(useGpu_) << "CudnnPoolLayer only support gpu"; CHECK(useGpu_) << "CudnnPoolLayer only support gpu";
CHECK_EQ(start_, 0) << poolType_ << " dose not support 'start'";
hl_create_tensor_descriptor(&inputDesc_); hl_create_tensor_descriptor(&inputDesc_);
hl_create_tensor_descriptor(&outputDesc_); hl_create_tensor_descriptor(&outputDesc_);

@ -56,16 +56,6 @@ public:
void reshape(int batchSize); void reshape(int batchSize);
virtual void forward(PassType passType); virtual void forward(PassType passType);
virtual void backward(const UpdateCallback& callback = nullptr); virtual void backward(const UpdateCallback& callback = nullptr);
/**
* Calculate output size according window size of pooling.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
}; };
} // namespace paddle } // namespace paddle

@ -35,7 +35,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
poolType_ = conf.pool_type(); poolType_ = conf.pool_type();
channels_ = conf.channels(); channels_ = conf.channels();
sizeX_ = conf.size_x(); sizeX_ = conf.size_x();
start_ = conf.start();
stride_ = conf.stride(); stride_ = conf.stride();
outputX_ = conf.output_x(); outputX_ = conf.output_x();
imgSize_ = conf.img_size(); imgSize_ = conf.img_size();
@ -47,22 +46,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding(); confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
bool cudnnTypeCheck = true;
#ifndef PADDLE_ONLY_CPU
cudnnTypeCheck = !CudnnPoolLayer::typeCheck(poolType_);
#endif
if ((sizeY_ != sizeX_ || imgSizeY_ != imgSize_ || strideY_ != stride_ ||
confPaddingY_ != confPadding_ || outputY_ != outputX_) &&
cudnnTypeCheck) {
LOG(FATAL) << poolType_ << " does not supported non-square "
"filter, image, stride or padding";
}
if (confPadding_ != 0 && cudnnTypeCheck) {
LOG(FATAL) << poolType_ << " does not supported 'padding'";
}
return true; return true;
} }

@ -28,7 +28,7 @@ namespace paddle {
class PoolLayer : public Layer { class PoolLayer : public Layer {
protected: protected:
size_t channels_, sizeX_, stride_, outputX_, imgSize_; size_t channels_, sizeX_, stride_, outputX_, imgSize_;
int start_, confPadding_; int confPadding_;
size_t sizeY_; size_t sizeY_;
size_t imgSizeY_; size_t imgSizeY_;
@ -47,6 +47,16 @@ public:
static Layer* create(const LayerConfig& config); static Layer* create(const LayerConfig& config);
virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
/**
* Calculate output size according window size and padding size.
*/
int outputSize(int imageSize, int windowSize, int padding, int stride) {
int outputSize;
outputSize =
(imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
return outputSize;
}
}; };
} // namespace paddle } // namespace paddle

@ -25,13 +25,15 @@ size_t PoolProjectionLayer::getSize() {
imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight(); imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth(); imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
if (imgSizeH_ == 0) { if (imgSizeH_ == 0) {
imgSizeH_ = imgSize_; imgSizeH_ = imgSizeY_;
} }
if (imgSizeW_ == 0) { if (imgSizeW_ == 0) {
imgSizeW_ = imgSize_; imgSizeW_ = imgSize_;
} }
outputH_ = 1 + (imgSizeH_ - start_ - sizeX_ + stride_ - 1) / stride_;
outputW_ = 1 + (imgSizeW_ - start_ - sizeX_ + stride_ - 1) / stride_; outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
layerSize = outputH_ * outputW_ * channels_; layerSize = outputH_ * outputW_ * channels_;
getOutput().setFrameHeight(outputH_); getOutput().setFrameHeight(outputH_);
@ -51,8 +53,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_, outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
stride_, outputH_, outputW_); sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
} }
void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) { void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
@ -69,7 +72,9 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
MatrixPtr inputGrad = getInputGrad(0); MatrixPtr inputGrad = getInputGrad(0);
inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV, inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
sizeX_, start_, stride_, outputH_, outputW_, 1, 1); sizeX_, sizeY_,
strideY_, stride_, outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
} }
void AvgPoolProjectionLayer::forward(PassType passType) { void AvgPoolProjectionLayer::forward(PassType passType) {
@ -84,8 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
MatrixPtr outV = getOutputValue(); MatrixPtr outV = getOutputValue();
outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_, outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
stride_, outputH_, outputW_); sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, confPaddingY_, confPadding_);
} }
void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) { void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
@ -97,7 +103,9 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ /* Do derivation */
MatrixPtr outputGrad = getOutputGrad(); MatrixPtr outputGrad = getOutputGrad();
MatrixPtr inputGrad = getInputGrad(0); MatrixPtr inputGrad = getInputGrad(0);
inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, start_, inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
stride_, outputH_, outputW_, 1, 1); sizeX_, sizeY_, strideY_, stride_,
outputH_, outputW_, 1, 1,
confPaddingY_, confPadding_);
} }
} // namespace paddle } // namespace paddle

@ -0,0 +1,46 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8,
num_filters=8,stride=1)
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=MaxPooling(),
)
avgpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
img_width=16,
pool_type=AvgPooling(),
)
outputs([maxpool, avgpool])

@ -0,0 +1,44 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=10)
data = data_layer(name ="input", size=8*16*16)
conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
num_channels=8, num_filters=8, stride=1)
maxpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=CudnnMaxPooling(),
)
avgpool = img_pool_layer(input=conv,
pool_size=3,
pool_size_y=5,
num_channels=8,
stride=1,
stride_y=2,
padding=1,
padding_y=2,
pool_type=CudnnAvgPooling(),
)
outputs([maxpool, avgpool])

@ -791,21 +791,24 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
(*config).biasSize = 0; (*config).biasSize = 0;
(*config).layerConfig.set_type("pool"); (*config).layerConfig.set_type("pool");
(*config).layerConfig.set_num_filters(16); (*config).layerConfig.set_num_filters(16);
(*config).layerConfig.set_partial_sum(1);
(*config).layerConfig.set_shared_biases(true);
int kw = 3, kh = 3;
int pw = 0, ph = 0;
int sw = 2, sh = 2;
pool->set_pool_type(poolType); pool->set_pool_type(poolType);
pool->set_channels(16); pool->set_channels(16);
pool->set_size_x(3); pool->set_size_x(kw);
if (poolType == "cudnn-max-pool" || poolType == "cudnn-avg-pool") { pool->set_size_y(kh);
pool->set_padding(0); pool->set_start(0);
} else { pool->set_padding(pw);
pool->set_start(0); pool->set_padding_y(ph);
} pool->set_stride(sw);
pool->set_stride(2); pool->set_stride_y(sh);
pool->set_output_x((pool->img_size() - pool->start() - pool->size_x()) /
((float)pool->stride()) + int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
1.5); int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
pool->set_output_x(ow);
pool->set_output_y(oh);
} }
void testPoolLayer(const string& poolType, bool trans, bool useGpu) { void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
@ -814,9 +817,10 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf(); PoolConfig* pool = input->mutable_pool_conf();
setPoolConfig(&config, pool, poolType);
pool->set_img_size(14); pool->set_img_size(14);
config.layerConfig.set_size(pool->output_x() * pool->output_x() * pool->set_img_size_y(14);
setPoolConfig(&config, pool, poolType);
config.layerConfig.set_size(pool->output_x() * pool->output_y() *
pool->channels()); pool->channels());
testLayerGrad(config, "pool", 100, trans, useGpu); testLayerGrad(config, "pool", 100, trans, useGpu);
@ -829,11 +833,11 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
LayerInputConfig* input = config.layerConfig.add_inputs(); LayerInputConfig* input = config.layerConfig.add_inputs();
PoolConfig* pool = input->mutable_pool_conf(); PoolConfig* pool = input->mutable_pool_conf();
setPoolConfig(&config, pool, poolType);
pool->set_size_y(4); pool->set_size_y(4);
pool->set_stride_y(3); pool->set_stride_y(3);
pool->set_img_size(10); pool->set_img_size(10);
pool->set_img_size_y(20); pool->set_img_size_y(20);
setPoolConfig(&config, pool, poolType);
pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) /
((float)pool->stride_y()) + ((float)pool->stride_y()) +
1.5); 1.5);
@ -1252,8 +1256,6 @@ TEST(Layer, MultiplexLayer) {
} }
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);

@ -116,6 +116,8 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) {
gradientMachine->start(trainer.getConfig(), nullptr); gradientMachine->start(trainer.getConfig(), nullptr);
gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN); gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN);
for (size_t i = 0; i < in.outGrads.size(); i++) { for (size_t i = 0; i < in.outGrads.size(); i++) {
// If the all the layers in the config have no parameters, also
// not set NeedGradient(), the outArgs[i] will be nullptr.
outArgs[i].grad->copyFrom(*in.outGrads[i]); outArgs[i].grad->copyFrom(*in.outGrads[i]);
} }
gradientMachine->backward(); gradientMachine->backward();
@ -225,6 +227,18 @@ TEST(Compare, concat_table) {
compareNetwork(config_file_a, config_file_b); compareNetwork(config_file_a, config_file_b);
} }
#ifndef PADDLE_ONLY_CPU
TEST(Compare, img_pool) {
std::string config_file_a = "./gserver/tests/img_pool_a.conf";
std::string config_file_b = "./gserver/tests/img_pool_b.conf";
bool useGpu = FLAGS_use_gpu;
FLAGS_use_gpu = true;
compareNetwork(config_file_a, config_file_b);
FLAGS_use_gpu = useGpu;
}
#endif
P_DEFINE_string(config_file_a, "", "config of one network to compare"); P_DEFINE_string(config_file_a, "", "config of one network to compare");
P_DEFINE_string(config_file_b, "", "config of another network to compare"); P_DEFINE_string(config_file_b, "", "config of another network to compare");
TEST(Compare, network) { TEST(Compare, network) {

File diff suppressed because it is too large Load Diff

@ -742,31 +742,37 @@ public:
*/ */
virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH, virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH,
size_t imgSizeW, size_t channels, size_t sizeX, size_t imgSizeW, size_t channels, size_t sizeX,
int start_, size_t stride, size_t outputH, size_t sizeY, size_t strideH, size_t strideW,
size_t outputW) { size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
/// Pooling backward operation. /// Pooling backward operation.
virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, Matrix& outGrad, Matrix& outV, size_t sizeX,
int start, size_t stride, size_t outputH, size_t sizeY, size_t strideH, size_t strideW,
size_t outputW, real scaleTargets, size_t outputH, size_t outputW,
real scaleOutput) { real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
/// Pooling forward operation, caculate the average of sizeX elements. /// Pooling forward operation, caculate the average of sizeX elements.
virtual void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, virtual void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t channels, size_t sizeX, size_t sizeY,
size_t stride, size_t outputH, size_t outputW) { size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
virtual void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, virtual void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t sizeX, size_t sizeY,
size_t strideH, size_t strideW,
size_t outputH, size_t outputW, size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput) { real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
@ -1131,21 +1137,30 @@ public:
real alpha = 1.0f, real beta = 0.0f); real alpha = 1.0f, real beta = 0.0f);
void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start_, size_t stride, size_t channels, size_t sizeX, size_t sizeY,
size_t outputH, size_t outputW); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, int start, Matrix& outGrad, Matrix& outV, size_t sizeX,
size_t stride, size_t outputH, size_t outputW, size_t sizeY, size_t strideH, size_t strideW,
real scaleTargets, real scaleOutput); size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t stride, size_t channels, size_t sizeX, size_t sizeY,
size_t outputH, size_t outputW); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t outputH, size_t sizeX, size_t sizeY,
size_t outputW, real scaleTargets, real scaleOutput); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW, void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
Matrix& denoms, size_t channels, size_t sizeX, Matrix& denoms, size_t channels, size_t sizeX,
@ -1242,21 +1257,31 @@ public:
real alpha = 1.0f, real beta = 0.0f); real alpha = 1.0f, real beta = 0.0f);
void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW, void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start_, size_t stride, size_t channels, size_t sizeX, size_t sizeY,
size_t outputH, size_t outputW); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW, void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
Matrix& outGrad, Matrix& outV, size_t sizeX, int start, Matrix& outGrad, Matrix& outV,
size_t stride, size_t outputH, size_t outputW, size_t sizeX, size_t sizeY,
real scaleTargets, real scaleOutput); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW, void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t channels, size_t sizeX, int start, size_t stride, size_t channels, size_t sizeX, size_t sizeY,
size_t outputH, size_t outputW); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
size_t paddingH, size_t paddingW);
void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW, void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
size_t sizeX, int start, size_t stride, size_t outputH, size_t sizeX, size_t sizeY,
size_t outputW, real scaleTargets, real scaleOutput); size_t strideH, size_t strideW,
size_t outputH, size_t outputW,
real scaleTargets, real scaleOutput,
size_t paddingH, size_t paddingW);
void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW, void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
Matrix& denoms, size_t channels, size_t sizeX, Matrix& denoms, size_t channels, size_t sizeX,

@ -1846,6 +1846,159 @@ TEST(Matrix, classificationError) {
} }
} }
void testMaxPoolFwdBwd(int numSamples, int channels,
int imgSizeH, int imgSizeW,
int ksizeH, int ksizeW,
int strideH, int strideW,
int padH, int padW) {
int outH = 0, outW = 0;
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->maxPoolForward(*input, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
targetGpu->maxPoolForward(*inputGpu, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
checkMatrixEqual(target, targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->maxPoolBackward(*input, imgSizeH, imgSizeW,
*targetGrad, *target,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
inputGpuGrad->maxPoolBackward(*inputGpu, imgSizeH, imgSizeW,
*targetGpuGrad, *targetGpu,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
checkMatrixEqual(inputGrad, targetBwdCheck);
}
void testAvgPoolFwdBwd(int numSamples, int channels,
int imgSizeH, int imgSizeW,
int ksizeH, int ksizeW,
int strideH, int strideW,
int padH, int padW) {
int outH = 0, outW = 0;
outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
int inWidth = imgSizeH * imgSizeW * channels;
MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
int outWidth = channels * outH * outW;
MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
input->randomizeUniform();
target->randomizeUniform();
inputGpu->copyFrom(*input);
targetGpu->copyFrom(*target);
target->avgPoolForward(*input, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
targetGpu->avgPoolForward(*inputGpu, imgSizeH, imgSizeW,
channels, ksizeW, ksizeH,
strideH, strideW, outH, outW, padH, padW);
MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
targetCheck->copyFrom(*targetGpu);
MatrixCheckErr(*target, *targetCheck);
MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
false, true);
inputGrad->randomizeUniform();
targetGrad->randomizeUniform();
inputGpuGrad->copyFrom(*inputGrad);
targetGpuGrad->copyFrom(*targetGrad);
inputGrad->avgPoolBackward(*targetGrad, imgSizeH, imgSizeW,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
inputGpuGrad->avgPoolBackward(*targetGpuGrad, imgSizeH, imgSizeW,
ksizeW, ksizeH,
strideH, strideW,
outH, outW, 1.0, 1.0, padH, padW);
MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
false, false);
targetBwdCheck->copyFrom(*inputGpuGrad);
MatrixCheckErr(*inputGrad, *targetBwdCheck);
}
TEST(Matrix, PoolFwdBwd) {
for (auto numSamples : {5, 32}) {
for (auto channels : {1, 9, 32}) {
for (auto imgSizeH : {14, 28}) {
for (auto imgSizeW : {16, 30}) {
for (auto sizeX : {2, 5}) {
for (auto sizeY : {2, 5}) {
for (auto sH : {1, 2}) {
for (auto sW : {1, 2}) {
for (auto pH : {0, (sizeY - 1)/2}) {
for (auto pW : {0, (sizeX - 1)/2}) {
VLOG(3) << " numSamples=" << numSamples
<< " channels=" << channels
<< " imgSizeH=" << imgSizeH
<< " imgSizeW=" << imgSizeW
<< " sizeX=" << sizeX
<< " sizeY=" << sizeY
<< " strideH=" << sH
<< " strideW=" << sW
<< " padingH=" << pH
<< " padingW=" << pW;
testMaxPoolFwdBwd(numSamples, channels, imgSizeH,
imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
testAvgPoolFwdBwd(numSamples, channels, imgSizeH,
imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
}
}
}
}
}
}
}
}
}
}
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv); testing::InitGoogleTest(&argc, argv);
initMain(argc, argv); initMain(argc, argv);

@ -88,7 +88,8 @@ message PoolConfig {
required uint32 size_x = 3; required uint32 size_x = 3;
// Tell the net where in the input image to start the pooling. // Tell the net where in the input image to start the pooling.
required uint32 start = 4; // start is deprecated now.
optional uint32 start = 4;
// Defines the stride size between successive pooling squares. // Defines the stride size between successive pooling squares.
required uint32 stride = 5; required uint32 stride = 5;

@ -961,10 +961,6 @@ def parse_pool(pool, input_layer_name, pool_conf):
"['max-projection', 'avg-projection', " "['max-projection', 'avg-projection', "
"'cudnn-max-pool', 'cudnn-avg-pool']" "'cudnn-max-pool', 'cudnn-avg-pool']"
% pool.pool_type) % pool.pool_type)
if pool.size_y or pool.stride_y or pool.img_width or pool.padding_y:
config_assert(pool.pool_type.startswith('cudnn'),
"'size_y', 'stride_y' and 'img_width' and 'padding_y'"
"can only be used for cudnn")
pool_conf.channels = pool.channels pool_conf.channels = pool.channels
pool_conf.size_x = pool.size_x pool_conf.size_x = pool.size_x
@ -974,36 +970,25 @@ def parse_pool(pool, input_layer_name, pool_conf):
pool_conf.stride_y = default(pool.stride_y, pool_conf.stride); pool_conf.stride_y = default(pool.stride_y, pool_conf.stride);
img_pixels = g_layer_map[input_layer_name].size / pool.channels img_pixels = g_layer_map[input_layer_name].size / pool.channels
# the img_width may be removed,
# and it can be calculated automatically later.
pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5)) pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5))
pool_conf.img_size_y = img_pixels / pool_conf.img_size pool_conf.img_size_y = img_pixels / pool_conf.img_size
config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels, config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels,
"Incorrect input image size %d for input image pixels %d" "Incorrect input image size %d for input image pixels %d"
% (pool_conf.img_size, img_pixels)) % (pool_conf.img_size, img_pixels))
if pool.start is not None: config_assert(not pool.start, "start is deprecated in pooling.")
config_assert(pool.padding is None,
'At most one of start and padding can be set.')
pool_conf.start = pool.start
pool_conf.padding = 0
pool_conf.output_x = int(math.ceil((pool_conf.img_size - \
pool_conf.start - pool_conf.size_x) / \
float(pool_conf.stride))) + 1
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y - \ if pool.padding is not None:
pool_conf.start - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1
elif pool.padding is not None:
pool_conf.padding = pool.padding pool_conf.padding = pool.padding
pool_conf.padding_y = default(pool.padding_y, pool_conf.padding) pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
pool_conf.start = 0
pool_conf.output_x = int(math.ceil((pool_conf.img_size + \ pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
2*pool_conf.padding - pool_conf.size_x) / \ 2*pool_conf.padding - pool_conf.size_x) / \
float(pool_conf.stride))) + 1 float(pool_conf.stride))) + 1
pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \ pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
2*pool_conf.padding_y - pool_conf.size_y) / \ 2*pool_conf.padding_y - pool_conf.size_y) / \
float(pool_conf.stride_y))) + 1 float(pool_conf.stride_y))) + 1
else:
raise ValueError('At least one of start and padding should be set.')
def parse_image(image, input_layer_name, image_conf): def parse_image(image, input_layer_name, image_conf):
image_conf.channels = image.channels image_conf.channels = image.channels
@ -1603,7 +1588,7 @@ class PoolLayer(LayerBase):
pool_conf = self.config.inputs[input_index].pool_conf pool_conf = self.config.inputs[input_index].pool_conf
print("output size for %s is %d*%d " % ( print("output size for %s is %d*%d " % (
name, pool_conf.output_y, pool_conf.output_x)) name, pool_conf.output_y, pool_conf.output_x))
self.set_layer_size((pool_conf.output_x ** 2) * pool_conf.channels) self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('batch_norm') @config_layer('batch_norm')
class BatchNormLayer(LayerBase): class BatchNormLayer(LayerBase):

@ -210,7 +210,7 @@ DEVICE = 'device'
def layer_support(*attrs): def layer_support(*attrs):
attrs_list = list(attrs) attrs_list = list(attrs)
attrs_list.append(DEVICE) attrs_list.append(DEVICE)
def decorator(method): def decorator(method):
@functools.wraps(method) @functools.wraps(method)
@ -1627,7 +1627,9 @@ def img_conv_layer(input, filter_size, num_filters,
@layer_support() @layer_support()
def img_pool_layer(input, pool_size, name=None, def img_pool_layer(input, pool_size, name=None,
num_channels=None, pool_type=None, num_channels=None, pool_type=None,
stride=1, start=None, padding=0, layer_attr=None): stride=1, start=None, padding=0, layer_attr=None,
pool_size_y=None, stride_y=None, padding_y=None,
img_width=None):
""" """
Image pooling Layer. Image pooling Layer.
@ -1635,25 +1637,34 @@ def img_pool_layer(input, pool_size, name=None,
.. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/ .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
:param padding: pooling padding :param padding: pooling padding width.
:type padding: int :type padding: int
:param padding_y: pooling padding height. It's equal to padding by default.
:type padding_y: int|None
:param name: name of pooling layer :param name: name of pooling layer
:type name: basestring. :type name: basestring.
:param input: layer's input :param input: layer's input
:type input: LayerOutput :type input: LayerOutput
:param pool_size: pooling size :param pool_size: pooling window width
:type pool_size: int :type pool_size: int
:param pool_size_y: pooling window height. It's eaqual to pool_size by default.
:type pool_size_y: int|None
:param num_channels: number of input channel. :param num_channels: number of input channel.
:type num_channels: int :type num_channels: int
:param pool_type: pooling type. MaxPooling or AveragePooling. Default is :param pool_type: pooling type. MaxPooling or AveragePooling. Default is
MaxPooling. MaxPooling.
:type pool_type: BasePoolingType :type pool_type: BasePoolingType
:param stride: stride of pooling. :param stride: stride width of pooling.
:type stride: int :type stride: int
:param start: start position of pooling operation. :param stride_y: stride height of pooling. It is equal to stride by default.
:type start: int :type stride_y: int|None
:param start: start position of pooling operation. Note it is deprecated now.
:type start: int|None
:param layer_attr: Extra Layer attribute. :param layer_attr: Extra Layer attribute.
:type layer_attr: ExtraLayerAttribute :type layer_attr: ExtraLayerAttribute
:param img_width: the width of input feature map. If it is None, the input feature
map should be square.
:type img_width: int|None
:return: LayerOutput object. :return: LayerOutput object.
:rtype: LayerOutput :rtype: LayerOutput
""" """
@ -1666,17 +1677,29 @@ def img_pool_layer(input, pool_size, name=None,
elif isinstance(pool_type, AvgPooling): elif isinstance(pool_type, AvgPooling):
pool_type.name = 'avg' pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y
stride_y = stride if stride_y is None else stride_y
padding_y = padding if padding_y is None else padding_y
Layer( Layer(
name=name, name=name,
type=LayerType.POOL_LAYER, type=LayerType.POOL_LAYER,
inputs=[Input(input.name, inputs=[Input(input.name,
pool=Pool( pool=Pool(
pool_type=''.join([pool_type.name, '-projection']), pool_type=type_name,
channels=num_channels, channels=num_channels,
size_x=pool_size, size_x=pool_size,
start=start, start=start,
stride=stride, stride=stride,
padding=padding padding=padding,
size_y=pool_size_y,
stride_y=stride_y,
padding_y=padding_y,
img_width=img_width
))], ))],
**ExtraLayerAttribute.to_kwargs(layer_attr) **ExtraLayerAttribute.to_kwargs(layer_attr)
) )
@ -2751,7 +2774,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False, tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False,
name=name) name=name)
return tmp return tmp

@ -170,13 +170,13 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None,
:type shared_bias: bool :type shared_bias: bool
:param conv_layer_attr: see img_conv_layer for details :param conv_layer_attr: see img_conv_layer for details
:type conv_layer_attr: ExtraLayerAttribute :type conv_layer_attr: ExtraLayerAttribute
:param pool_stride: see img_conv_layer for details :param pool_stride: see img_pool_layer for details
:type pool_stride: int :type pool_stride: int
:param pool_start: see img_conv_layer for details :param pool_start: see img_pool_layer for details. It is deprecated now.
:type pool_start: int :type pool_start: int
:param pool_padding: see img_conv_layer for details :param pool_padding: see img_pool_layer for details
:type pool_padding: int :type pool_padding: int
:param pool_layer_attr: see img_conv_layer for details :param pool_layer_attr: see img_pool_layer for details
:type pool_layer_attr: ExtraLayerAttribute :type pool_layer_attr: ExtraLayerAttribute
:return: Layer's output :return: Layer's output
:rtype: LayerOutput :rtype: LayerOutput
@ -243,7 +243,7 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None,
:param bn_layer_attr: ParameterAttribute. :param bn_layer_attr: ParameterAttribute.
:param pool_stride: see img_pool_layer's document. :param pool_stride: see img_pool_layer's document.
:type pool_stride: int :type pool_stride: int
:param pool_start: see img_pool_layer's document. :param pool_start: see img_pool_layer's document. It is deprecated now.
:type pool_start: int :type pool_start: int
:param pool_padding: see img_pool_layer's document. :param pool_padding: see img_pool_layer's document.
:type pool_padding: int :type pool_padding: int
@ -555,7 +555,7 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param state_act: lstm state activiation type. :param state_act: lstm state activiation type.
:type state_act: BaseActivation :type state_act: BaseActivation
:param mixed_bias_attr: bias parameter attribute of mixed layer. :param mixed_bias_attr: bias parameter attribute of mixed layer.
False means no bias, None means default bias. False means no bias, None means default bias.
:type mixed_bias_attr: ParameterAttribute|False :type mixed_bias_attr: ParameterAttribute|False
:param lstm_bias_attr: bias parameter attribute of lstm layer. :param lstm_bias_attr: bias parameter attribute of lstm layer.

@ -19,6 +19,8 @@ __all__ = [
"BasePoolingType", "BasePoolingType",
"MaxPooling", "MaxPooling",
"AvgPooling", "AvgPooling",
"CudnnMaxPooling",
"CudnnAvgPooling",
"SumPooling", "SumPooling",
"SquareRootNPooling" "SquareRootNPooling"
] ]
@ -26,7 +28,7 @@ __all__ = [
class BasePoolingType(object): class BasePoolingType(object):
""" """
Base Pooling Type. Base Pooling Type.
Note these pooling types are used for sequence input, not for images. Note these pooling types are used for sequence input, not for images.
Each PoolingType contains one parameter: Each PoolingType contains one parameter:
@ -55,7 +57,24 @@ class MaxPooling(BasePoolingType):
def __init__(self, output_max_index=None): def __init__(self, output_max_index=None):
BasePoolingType.__init__(self, "max") BasePoolingType.__init__(self, "max")
self.output_max_index = output_max_index self.output_max_index = output_max_index
class CudnnMaxPooling(BasePoolingType):
"""
Cudnn max pooling only support GPU. Return the maxinum value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-max-pool")
class CudnnAvgPooling(BasePoolingType):
"""
Cudnn average pooling only support GPU. Return the average value in the
pooling window.
"""
def __init__(self):
BasePoolingType.__init__(self, "cudnn-avg-pool")
class AvgPooling(BasePoolingType): class AvgPooling(BasePoolingType):
""" """

@ -1,4 +1,4 @@
7e6919d17562516e9a1d9a88de1fb3b9 img_layers.protostr 86c0815275a9d5eb902e23c6a592f58a img_layers.protostr
a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr a5d9259ff1fd7ca23d0ef090052cb1f2 last_first_seq.protostr
9c038249ec8ff719753a746cdb04c026 layer_activations.protostr 9c038249ec8ff719753a746cdb04c026 layer_activations.protostr
5913f87b39cee3b2701fa158270aca26 projections.protostr 5913f87b39cee3b2701fa158270aca26 projections.protostr

@ -7,8 +7,10 @@ settings(
img = data_layer(name='image', size=256*256) img = data_layer(name='image', size=256*256)
# the parse_conv in config_parse.py is not strictly accurate when filter_size
# is not square. So here set square filter_size.
img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
filter_size=(32, 64), padding=(1, 0), stride=(1, 1), filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
act=LinearActivation()) act=LinearActivation())
img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
@ -17,4 +19,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32)
img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
outputs(img_pool, img_norm) outputs(img_pool, img_norm)

Loading…
Cancel
Save