support rectangle padding, stride, window and input for PoolProjection (#115)

* support rectangle padding, stride, window and input for PoolProjection * Follow comments. 1. Remove start 2. refine img_pool_a/b.conf for test_NetworkCompare 3. Split unit test * Modify the test in img_layers.py
9 years ago · 191fafe355
parent 8a044d2e2d
commit 191fafe355
22 changed files with 757 additions and 325 deletions
--- a/paddle/cuda/include/hl_cnn.h
+++ b/paddle/cuda/include/hl_cnn.h
@ -84,16 +84,23 @@ extern void hl_expand_feature2col(
 * @param[in]   width       image width.
 * @param[in]   pooledH     output image height.
 * @param[in]   pooledW     output image width.
- * @param[in]   sizeX       size of pooling window.
+ * @param[in]   sizeX       width of pooling window.
- * @param[in]   stride      pooling stride.
+ * @param[in]   sizeY       height of pooling window.
- * @param[in]   start       pooling start.
+ * @param[in]   strideH     pooling stride height.
 * @param[in]   strideW     pooling stride width.
 * @param[in]   paddingH    padding height.
 * @param[in]   paddingW    padding width.
 * @param[out]  tgtData     output data.
 *
 */
 extern void hl_maxpool_forward(
-    int frameCnt, const real* inputData, int channels,
+    const int frameCnt, const real* inputData,
-    int height, int width, int pooledH, int pooledW,
+    const int channels,
-    int sizeX, int stride, int start, real* tgtData);
+    const int height, const int width,
    const int pooledH, const int pooledW,
    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    const int paddingH, const int paddingW, real* tgtData);
 /**
 * @brief   Maximum pool backward.
@ -107,21 +114,28 @@ extern void hl_maxpool_forward(
 * @param[in]   width       image width.
 * @param[in]   pooledH     output image height.
 * @param[in]   pooledW     output image width.
- * @param[in]   sizeX       size of pooling window.
+ * @param[in]   sizeX       width of pooling window.
- * @param[in]   stride      pooling stride.
+ * @param[in]   sizeY       height of pooling window.
- * @param[in]   start       pooling start.
+ * @param[in]   strideH     pooling stride height.
- * @param[out]  targetGrad  output grad.
+ * @param[in]   strideW     pooling stride width.
 * @param[in]   scaleA      scale.
 * @param[in]   scaleB      scale.
 * @param[in]   paddingH    padding height.
 * @param[in]   paddingW    padding width.
 * @param[out]  targetGrad  output grad.
 *
 */
 extern void hl_maxpool_backward(
-    int frameCnt, const real* inputData,
+    const int frameCnt, const real* inputData,
    const real* outData, const real* outGrad,
-    int channels, int height, int width,
+    const int channels, const int height,
-    int pooledH, int pooledW, int sizeX,
+    const int width,
-    int stride, int start, real* targetGrad,
+    const int pooledH, const int pooledW,
-    real scaleA, real scaleB);
+    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    const int paddingH, const int paddingW,
    real scaleA, real scaleB,
    real* targetGrad);
 /**
 * @brief   Averge pool forward.
@ -133,16 +147,23 @@ extern void hl_maxpool_backward(
 * @param[in]   width       image width.
 * @param[in]   pooledH     output image height.
 * @param[in]   pooledW     output image width.
- * @param[in]   sizeX       size of pooling window.
+ * @param[in]   sizeX       width of pooling window.
- * @param[in]   stride      pooling stride.
+ * @param[in]   sizeY       height of pooling window.
- * @param[in]   start       pooling start.
+ * @param[in]   strideH     pooling stride height.
 * @param[in]   strideW     pooling stride width.
 * @param[in]   paddingH    padding height.
 * @param[in]   paddingW    padding width.
 * @param[out]  tgtData     output data.
 *
 */
 extern void hl_avgpool_forward(
-    int frameCnt, const real* inputData, int channels,
+    const int frameCnt, const real* inputData,
-    int height, int width, int pooledH, int pooledW,
+    const int channels,
-    int sizeX, int stride, int start, real* tgtData);
+    const int height, const int width,
    const int pooledH, const int pooledW,
    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    const int paddingH, const int paddingW, real* tgtData);
 /**
 * @brief   Maximum pool backward.
@ -154,20 +175,27 @@ extern void hl_avgpool_forward(
 * @param[in]   width       image width.
 * @param[in]   pooledH     output image height.
 * @param[in]   pooledW     output image width.
- * @param[in]   sizeX       size of pooling window.
+ * @param[in]   sizeX       width of pooling window.
- * @param[in]   stride      pooling stride.
+ * @param[in]   sizeY       height of pooling window.
- * @param[in]   start       pooling start.
+ * @param[in]   strideH     pooling stride height.
- * @param[out]  backGrad    output grad.
+ * @param[in]   strideW     pooling stride width.
 * @param[in]   paddingH    padding height.
 * @param[in]   paddingW    padding width.
 * @param[in]   scaleA      scale.
 * @param[in]   scaleB      scale.
 * @param[out]  backGrad    output grad.
 *
 */
 extern void hl_avgpool_backward(
-    int frameCnt, const real* outGrad,
+    const int frameCnt, const real* outGrad,
-    int channels, int height, int width,
+    const int channels, const int height,
-    int pooledH, int pooledW, int sizeX,
+    const int width,
-    int stride, int start, real* backGrad,
+    const int pooledH, const int pooledW,
-    real scaleA, real scaleB);
+    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    int paddingH, int paddingW,
    real scaleA, real scaleB,
    real* backGrad);
 /**
 * @brief   Cross-map-respose normalize forward.
--- a/paddle/cuda/include/stub/hl_cnn_stub.h
+++ b/paddle/cuda/include/stub/hl_cnn_stub.h
@ -38,29 +38,45 @@ inline void hl_expand_feature2col(
    real* dataCol) {}
 inline void hl_maxpool_forward(
-    int frameCnt, const real* inputData, int channels,
+    const int frameCnt, const real* inputData,
-    int height, int width, int pooledH, int pooledW,
+    const int channels,
-    int sizeX, int stride, int start, real* tgtData) {}
+    const int height, const int width,
    const int pooledH, const int pooledW,
    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    const int paddingH, const int paddingW, real* tgtData) {}
 inline void hl_maxpool_backward(
-    int frameCnt, const real* inputData,
+    const int frameCnt, const real* inputData,
    const real* outData, const real* outGrad,
-    int channels, int height, int width,
+    const int channels, const int height,
-    int pooledH, int pooledW, int sizeX,
+    const int width,
-    int stride, int start, real* targetGrad,
+    const int pooledH, const int pooledW,
-    real scaleA, real scaleB) {}
+    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    const int paddingH, const int paddingW,
    real scaleA, real scaleB,
    real* targetGrad) {}
 inline void hl_avgpool_forward(
-    int frameCnt, const real* inputData, int channels,
+    const int frameCnt, const real* inputData,
-    int height, int width, int pooledH, int pooledW,
+    const int channels,
-    int sizeX, int stride, int start, real* tgtData) {}
+    const int height, const int width,
    const int pooledH, const int pooledW,
    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    const int paddingH, const int paddingW, real* tgtData) {}
 inline void hl_avgpool_backward(
-    int frameCnt, const real* outGrad,
+    const int frameCnt, const real* outGrad,
-    int channels, int height, int width,
+    const int channels, const int height,
-    int pooledH, int pooledW, int sizeX,
+    const int width,
-    int stride, int start, real* backGrad,
+    const int pooledH, const int pooledW,
-    real scaleA, real scaleB) {}
+    const int sizeX, const int sizeY,
    const int strideH, const int strideW,
    int paddingH, int paddingW,
    real scaleA, real scaleB,
    real* backGrad) {}
 inline void hl_CMRNorm_forward(
    size_t frameCnt, const real* in, real* scale, real* out,
--- a/paddle/cuda/src/hl_cuda_cnn.cu
+++ b/paddle/cuda/src/hl_cuda_cnn.cu
--- a/paddle/gserver/layers/CudnnPoolLayer.cpp
+++ b/paddle/gserver/layers/CudnnPoolLayer.cpp
@ -51,7 +51,6 @@ bool CudnnPoolLayer::init(const LayerMap &layerMap,
  PoolLayer::init(layerMap, parameterMap);
  CHECK(useGpu_) << "CudnnPoolLayer only support gpu";
  CHECK_EQ(start_, 0) << poolType_ << " dose not support 'start'";
  hl_create_tensor_descriptor(&inputDesc_);
  hl_create_tensor_descriptor(&outputDesc_);
--- a/paddle/gserver/layers/CudnnPoolLayer.h
+++ b/paddle/gserver/layers/CudnnPoolLayer.h
@ -56,16 +56,6 @@ public:
  void reshape(int batchSize);
  virtual void forward(PassType passType);
  virtual void backward(const UpdateCallback& callback = nullptr);
  /**
   * Calculate output size according window size of pooling.
   */
  int outputSize(int imageSize, int windowSize, int padding, int stride) {
    int outputSize;
    outputSize =
        (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
    return outputSize;
  }
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/PoolLayer.cpp
+++ b/paddle/gserver/layers/PoolLayer.cpp
@ -35,7 +35,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
  poolType_ = conf.pool_type();
  channels_ = conf.channels();
  sizeX_ = conf.size_x();
  start_ = conf.start();
  stride_ = conf.stride();
  outputX_ = conf.output_x();
  imgSize_ = conf.img_size();
@ -47,22 +46,6 @@ bool PoolLayer::init(const LayerMap& layerMap,
  confPaddingY_ = conf.has_padding_y() ? conf.padding_y() : conf.padding();
  outputY_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
  bool cudnnTypeCheck = true;
 #ifndef PADDLE_ONLY_CPU
  cudnnTypeCheck = !CudnnPoolLayer::typeCheck(poolType_);
 #endif
  if ((sizeY_ != sizeX_ || imgSizeY_ != imgSize_ || strideY_ != stride_ ||
       confPaddingY_ != confPadding_ || outputY_ != outputX_) &&
      cudnnTypeCheck) {
    LOG(FATAL) << poolType_ << " does not supported non-square "
                               "filter, image, stride or padding";
  }
  if (confPadding_ != 0 && cudnnTypeCheck) {
    LOG(FATAL) << poolType_ << " does not supported 'padding'";
  }
  return true;
 }
--- a/paddle/gserver/layers/PoolLayer.h
+++ b/paddle/gserver/layers/PoolLayer.h
@ -28,7 +28,7 @@ namespace paddle {
 class PoolLayer : public Layer {
 protected:
  size_t channels_, sizeX_, stride_, outputX_, imgSize_;
-  int start_, confPadding_;
+  int confPadding_;
  size_t sizeY_;
  size_t imgSizeY_;
@ -47,6 +47,16 @@ public:
  static Layer* create(const LayerConfig& config);
  virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
  /**
   * Calculate output size according window size and padding size.
   */
  int outputSize(int imageSize, int windowSize, int padding, int stride) {
    int outputSize;
    outputSize =
        (imageSize - windowSize + 2 * padding + stride - 1) / stride + 1;
    return outputSize;
  }
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/PoolProjectionLayer.cpp
+++ b/paddle/gserver/layers/PoolProjectionLayer.cpp
@ -25,13 +25,15 @@ size_t PoolProjectionLayer::getSize() {
  imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
  imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
  if (imgSizeH_ == 0) {
-    imgSizeH_ = imgSize_;
+    imgSizeH_ = imgSizeY_;
  }
  if (imgSizeW_ == 0) {
    imgSizeW_ = imgSize_;
  }
-  outputH_ = 1 + (imgSizeH_ - start_ - sizeX_ + stride_ - 1) / stride_;
+
-  outputW_ = 1 + (imgSizeW_ - start_ - sizeX_ + stride_ - 1) / stride_;
+  outputH_ = outputSize(imgSizeH_, sizeY_, confPaddingY_, strideY_);
  outputW_ = outputSize(imgSizeW_, sizeX_, confPadding_, stride_);
  layerSize = outputH_ * outputW_ * channels_;
  getOutput().setFrameHeight(outputH_);
@ -51,8 +53,9 @@ void MaxPoolProjectionLayer::forward(PassType passType) {
  MatrixPtr outV = getOutputValue();
-  outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_,
+  outV->maxPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
-                       stride_, outputH_, outputW_);
+                       sizeX_, sizeY_, strideY_, stride_,
                       outputH_, outputW_, confPaddingY_, confPadding_);
 }
 void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
@ -69,7 +72,9 @@ void MaxPoolProjectionLayer::backward(const UpdateCallback& callback) {
  MatrixPtr inputGrad = getInputGrad(0);
  inputGrad->maxPoolBackward(*inputV, imgSizeH_, imgSizeW_, *outGrad, *outV,
-                             sizeX_, start_, stride_, outputH_, outputW_, 1, 1);
+                             sizeX_, sizeY_,
                             strideY_, stride_, outputH_, outputW_, 1, 1,
                             confPaddingY_, confPadding_);
 }
 void AvgPoolProjectionLayer::forward(PassType passType) {
@ -84,8 +89,9 @@ void AvgPoolProjectionLayer::forward(PassType passType) {
  MatrixPtr outV = getOutputValue();
-  outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_, sizeX_, start_,
+  outV->avgPoolForward(*input, imgSizeH_, imgSizeW_, channels_,
-                       stride_, outputH_, outputW_);
+                       sizeX_, sizeY_, strideY_, stride_,
                       outputH_, outputW_, confPaddingY_, confPadding_);
 }
 void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
@ -97,7 +103,9 @@ void AvgPoolProjectionLayer::backward(const UpdateCallback& callback) {
  /* Do derivation */
  MatrixPtr outputGrad = getOutputGrad();
  MatrixPtr inputGrad = getInputGrad(0);
-  inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_, sizeX_, start_,
+  inputGrad->avgPoolBackward(*outputGrad, imgSizeH_, imgSizeW_,
-                             stride_, outputH_, outputW_, 1, 1);
+                             sizeX_, sizeY_, strideY_, stride_,
                             outputH_, outputW_, 1, 1,
                             confPaddingY_, confPadding_);
 }
 }  // namespace paddle
--- a/paddle/gserver/tests/img_pool_a.conf
+++ b/paddle/gserver/tests/img_pool_a.conf
@ -0,0 +1,46 @@
 #edit-mode: -*- python -*-
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 settings(batch_size=10)
 data = data_layer(name ="input", size=8*16*16)
 conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
                      num_channels=8,
                      num_filters=8,stride=1)
 maxpool = img_pool_layer(input=conv,
                         pool_size=3,
                         pool_size_y=5,
                         num_channels=8,
                         stride=1,
                         stride_y=2,
                         padding=1,
                         padding_y=2,
                         img_width=16,
                         pool_type=MaxPooling(),
 )
 avgpool = img_pool_layer(input=conv,
                         pool_size=3,
                         pool_size_y=5,
                         num_channels=8,
                         stride=1,
                         stride_y=2,
                         padding=1,
                         padding_y=2,
                         img_width=16,
                         pool_type=AvgPooling(),
 )
 outputs([maxpool, avgpool])
--- a/paddle/gserver/tests/img_pool_b.conf
+++ b/paddle/gserver/tests/img_pool_b.conf
@ -0,0 +1,44 @@
 #edit-mode: -*- python -*-
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 settings(batch_size=10)
 data = data_layer(name ="input", size=8*16*16)
 conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
                      num_channels=8, num_filters=8, stride=1)
 maxpool = img_pool_layer(input=conv,
                         pool_size=3,
                         pool_size_y=5,
                         num_channels=8,
                         stride=1,
                         stride_y=2,
                         padding=1,
                         padding_y=2,
                         pool_type=CudnnMaxPooling(),
 )
 avgpool = img_pool_layer(input=conv,
                         pool_size=3,
                         pool_size_y=5,
                         num_channels=8,
                         stride=1,
                         stride_y=2,
                         padding=1,
                         padding_y=2,
                         pool_type=CudnnAvgPooling(),
 )
 outputs([maxpool, avgpool])
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@ -791,21 +791,24 @@ void setPoolConfig(TestConfig* config, PoolConfig* pool,
  (*config).biasSize = 0;
  (*config).layerConfig.set_type("pool");
  (*config).layerConfig.set_num_filters(16);
  (*config).layerConfig.set_partial_sum(1);
  (*config).layerConfig.set_shared_biases(true);
  int kw = 3, kh = 3;
  int pw = 0, ph = 0;
  int sw = 2, sh = 2;
  pool->set_pool_type(poolType);
  pool->set_channels(16);
-  pool->set_size_x(3);
+  pool->set_size_x(kw);
-  if (poolType == "cudnn-max-pool" || poolType == "cudnn-avg-pool") {
+  pool->set_size_y(kh);
-    pool->set_padding(0);
+  pool->set_start(0);
-  } else {
+  pool->set_padding(pw);
-    pool->set_start(0);
+  pool->set_padding_y(ph);
-  }
+  pool->set_stride(sw);
-  pool->set_stride(2);
+  pool->set_stride_y(sh);
-  pool->set_output_x((pool->img_size() - pool->start() - pool->size_x()) /
+
-                         ((float)pool->stride()) +
+  int ow = (pool->img_size() - kw + 2 * pw + sw - 1) / sw + 1;
-                     1.5);
+  int oh = (pool->img_size_y() - kh + 2 * ph + sh - 1) / sh + 1;
  pool->set_output_x(ow);
  pool->set_output_y(oh);
 }
 void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
@ -814,9 +817,10 @@ void testPoolLayer(const string& poolType, bool trans, bool useGpu) {
  LayerInputConfig* input = config.layerConfig.add_inputs();
  PoolConfig* pool = input->mutable_pool_conf();
  setPoolConfig(&config, pool, poolType);
  pool->set_img_size(14);
-  config.layerConfig.set_size(pool->output_x() * pool->output_x() *
+  pool->set_img_size_y(14);
  setPoolConfig(&config, pool, poolType);
  config.layerConfig.set_size(pool->output_x() * pool->output_y() *
                              pool->channels());
  testLayerGrad(config, "pool", 100, trans, useGpu);
@ -829,11 +833,11 @@ void testPoolLayer2(const string& poolType, bool trans, bool useGpu) {
  LayerInputConfig* input = config.layerConfig.add_inputs();
  PoolConfig* pool = input->mutable_pool_conf();
  setPoolConfig(&config, pool, poolType);
  pool->set_size_y(4);
  pool->set_stride_y(3);
  pool->set_img_size(10);
  pool->set_img_size_y(20);
  setPoolConfig(&config, pool, poolType);
  pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) /
                         ((float)pool->stride_y()) +
                     1.5);
@ -1252,8 +1256,6 @@ TEST(Layer, MultiplexLayer) {
  }
 }
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);
--- a/paddle/gserver/tests/test_NetworkCompare.cpp
+++ b/paddle/gserver/tests/test_NetworkCompare.cpp
@ -116,6 +116,8 @@ void calcGradient(DataIn& in, DataOut& out, const std::string& configPath) {
  gradientMachine->start(trainer.getConfig(), nullptr);
  gradientMachine->forward(in.inArgs, &outArgs, PASS_TRAIN);
  for (size_t i = 0; i < in.outGrads.size(); i++) {
    // If the all the layers in the config have no parameters, also
    // not set NeedGradient(), the outArgs[i] will be nullptr.
    outArgs[i].grad->copyFrom(*in.outGrads[i]);
  }
  gradientMachine->backward();
@ -225,6 +227,18 @@ TEST(Compare, concat_table) {
  compareNetwork(config_file_a, config_file_b);
 }
 #ifndef PADDLE_ONLY_CPU
 TEST(Compare, img_pool) {
  std::string config_file_a = "./gserver/tests/img_pool_a.conf";
  std::string config_file_b = "./gserver/tests/img_pool_b.conf";
  bool useGpu = FLAGS_use_gpu;
  FLAGS_use_gpu = true;
  compareNetwork(config_file_a, config_file_b);
  FLAGS_use_gpu = useGpu;
 }
 #endif
 P_DEFINE_string(config_file_a, "", "config of one network to compare");
 P_DEFINE_string(config_file_b, "", "config of another network to compare");
 TEST(Compare, network) {
--- a/paddle/math/Matrix.cpp
+++ b/paddle/math/Matrix.cpp
--- a/paddle/math/Matrix.h
+++ b/paddle/math/Matrix.h
@ -742,31 +742,37 @@ public:
   */
  virtual void maxPoolForward(Matrix& inputMat, size_t imgSizeH,
                              size_t imgSizeW, size_t channels, size_t sizeX,
-                              int start_, size_t stride, size_t outputH,
+                              size_t sizeY, size_t strideH, size_t strideW,
-                              size_t outputW) {
+                              size_t outputH, size_t outputW,
                              size_t paddingH, size_t paddingW) {
    LOG(FATAL) << "Not implemeted";
  }
  /// Pooling backward operation.
  virtual void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
                               Matrix& outGrad, Matrix& outV, size_t sizeX,
-                               int start, size_t stride, size_t outputH,
+                               size_t sizeY, size_t strideH, size_t strideW,
-                               size_t outputW, real scaleTargets,
+                               size_t outputH, size_t outputW,
-                               real scaleOutput) {
+                               real scaleTargets, real scaleOutput,
                               size_t paddingH, size_t paddingW) {
    LOG(FATAL) << "Not implemeted";
  }
  /// Pooling forward operation, caculate the average of sizeX elements.
  virtual void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
-                              size_t channels, size_t sizeX, int start,
+                              size_t channels, size_t sizeX, size_t sizeY,
-                              size_t stride, size_t outputH, size_t outputW) {
+                              size_t strideH, size_t strideW,
                              size_t outputH, size_t outputW,
                              size_t paddingH, size_t paddingW) {
    LOG(FATAL) << "Not implemeted";
  }
  virtual void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
-                               size_t sizeX, int start, size_t stride,
+                               size_t sizeX, size_t sizeY,
                               size_t strideH, size_t strideW,
                               size_t outputH, size_t outputW,
-                               real scaleTargets, real scaleOutput) {
+                               real scaleTargets, real scaleOutput,
                               size_t paddingH, size_t paddingW) {
    LOG(FATAL) << "Not implemeted";
  }
@ -1131,21 +1137,30 @@ public:
                  real alpha = 1.0f, real beta = 0.0f);
  void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
-                      size_t channels, size_t sizeX, int start_, size_t stride,
+                      size_t channels, size_t sizeX, size_t sizeY,
-                      size_t outputH, size_t outputW);
+                      size_t strideH, size_t strideW,
                      size_t outputH, size_t outputW,
                      size_t paddingH, size_t paddingW);
  void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
-                       Matrix& outGrad, Matrix& outV, size_t sizeX, int start,
+                       Matrix& outGrad, Matrix& outV, size_t sizeX,
-                       size_t stride, size_t outputH, size_t outputW,
+                       size_t sizeY, size_t strideH, size_t strideW,
-                       real scaleTargets, real scaleOutput);
+                       size_t outputH, size_t outputW,
                       real scaleTargets, real scaleOutput,
                       size_t paddingH, size_t paddingW);
  void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
-                      size_t channels, size_t sizeX, int start, size_t stride,
+                      size_t channels, size_t sizeX, size_t sizeY,
-                      size_t outputH, size_t outputW);
+                      size_t strideH, size_t strideW,
                      size_t outputH, size_t outputW,
                      size_t paddingH, size_t paddingW);
  void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
-                       size_t sizeX, int start, size_t stride, size_t outputH,
+                       size_t sizeX, size_t sizeY,
-                       size_t outputW, real scaleTargets, real scaleOutput);
+                       size_t strideH, size_t strideW,
                       size_t outputH, size_t outputW,
                       real scaleTargets, real scaleOutput,
                       size_t paddingH, size_t paddingW);
  void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
                         Matrix& denoms, size_t channels, size_t sizeX,
@ -1242,21 +1257,31 @@ public:
                  real alpha = 1.0f, real beta = 0.0f);
  void maxPoolForward(Matrix& inputMat, size_t imgSizeH, size_t imgSizeW,
-                      size_t channels, size_t sizeX, int start_, size_t stride,
+                      size_t channels, size_t sizeX, size_t sizeY,
-                      size_t outputH, size_t outputW);
+                      size_t strideH, size_t strideW,
                      size_t outputH, size_t outputW,
                      size_t paddingH, size_t paddingW);
  void maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
-                       Matrix& outGrad, Matrix& outV, size_t sizeX, int start,
+                       Matrix& outGrad, Matrix& outV,
-                       size_t stride, size_t outputH, size_t outputW,
+                       size_t sizeX, size_t sizeY,
-                       real scaleTargets, real scaleOutput);
+                       size_t strideH, size_t strideW,
                       size_t outputH, size_t outputW,
                       real scaleTargets, real scaleOutput,
                       size_t paddingH, size_t paddingW);
  void avgPoolForward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
-                      size_t channels, size_t sizeX, int start, size_t stride,
+                      size_t channels, size_t sizeX, size_t sizeY,
-                      size_t outputH, size_t outputW);
+                      size_t strideH, size_t strideW,
                      size_t outputH, size_t outputW,
                      size_t paddingH, size_t paddingW);
  void avgPoolBackward(Matrix& input, size_t imgSizeH, size_t imgSizeW,
-                       size_t sizeX, int start, size_t stride, size_t outputH,
+                       size_t sizeX, size_t sizeY,
-                       size_t outputW, real scaleTargets, real scaleOutput);
+                       size_t strideH, size_t strideW,
                       size_t outputH, size_t outputW,
                       real scaleTargets, real scaleOutput,
                       size_t paddingH, size_t paddingW);
  void crossMapNormalFwd(Matrix& input, size_t imgSizeH, size_t imgSizeW,
                         Matrix& denoms, size_t channels, size_t sizeX,
--- a/paddle/math/tests/test_matrixCompare.cpp
+++ b/paddle/math/tests/test_matrixCompare.cpp
@ -1846,6 +1846,159 @@ TEST(Matrix, classificationError) {
  }
 }
 void testMaxPoolFwdBwd(int numSamples, int channels,
                       int imgSizeH, int imgSizeW,
                       int ksizeH, int ksizeW,
                       int strideH, int strideW,
                       int padH, int padW) {
  int outH = 0, outW = 0;
  outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
  outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
  int inWidth = imgSizeH * imgSizeW * channels;
  MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
  MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
  int outWidth = channels * outH * outW;
  MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
  MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
  input->randomizeUniform();
  target->randomizeUniform();
  inputGpu->copyFrom(*input);
  targetGpu->copyFrom(*target);
  target->maxPoolForward(*input, imgSizeH, imgSizeW,
                         channels, ksizeW, ksizeH,
                         strideH, strideW, outH, outW, padH, padW);
  targetGpu->maxPoolForward(*inputGpu, imgSizeH, imgSizeW,
                            channels, ksizeW, ksizeH,
                            strideH, strideW, outH, outW, padH, padW);
  MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
  targetCheck->copyFrom(*targetGpu);
  checkMatrixEqual(target, targetCheck);
  MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
  MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
  MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
  MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
                                              false, true);
  inputGrad->randomizeUniform();
  targetGrad->randomizeUniform();
  inputGpuGrad->copyFrom(*inputGrad);
  targetGpuGrad->copyFrom(*targetGrad);
  inputGrad->maxPoolBackward(*input, imgSizeH, imgSizeW,
                             *targetGrad, *target,
                             ksizeW, ksizeH,
                             strideH, strideW,
                             outH, outW, 1.0, 1.0, padH, padW);
  inputGpuGrad->maxPoolBackward(*inputGpu, imgSizeH, imgSizeW,
                                *targetGpuGrad, *targetGpu,
                                ksizeW, ksizeH,
                                strideH, strideW,
                                outH, outW, 1.0, 1.0, padH, padW);
  MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
                                               false, false);
  targetBwdCheck->copyFrom(*inputGpuGrad);
  checkMatrixEqual(inputGrad, targetBwdCheck);
 }
 void testAvgPoolFwdBwd(int numSamples, int channels,
                       int imgSizeH, int imgSizeW,
                       int ksizeH, int ksizeW,
                       int strideH, int strideW,
                       int padH, int padW) {
  int outH = 0, outW = 0;
  outH = (imgSizeH - ksizeH + 2 * padH + strideH - 1) / strideH + 1;
  outW = (imgSizeW - ksizeW + 2 * padW + strideW - 1) / strideW + 1;
  int inWidth = imgSizeH * imgSizeW * channels;
  MatrixPtr input = CpuMatrix::create(numSamples, inWidth, false, false);
  MatrixPtr inputGpu = GpuMatrix::create(numSamples, inWidth, false, true);
  int outWidth = channels * outH * outW;
  MatrixPtr target = CpuMatrix::create(numSamples, outWidth, false, false);
  MatrixPtr targetGpu = GpuMatrix::create(numSamples, outWidth, false, true);
  input->randomizeUniform();
  target->randomizeUniform();
  inputGpu->copyFrom(*input);
  targetGpu->copyFrom(*target);
  target->avgPoolForward(*input, imgSizeH, imgSizeW,
                         channels, ksizeW, ksizeH,
                         strideH, strideW, outH, outW, padH, padW);
  targetGpu->avgPoolForward(*inputGpu, imgSizeH, imgSizeW,
                            channels, ksizeW, ksizeH,
                            strideH, strideW, outH, outW, padH, padW);
  MatrixPtr targetCheck = CpuMatrix::create(numSamples, outWidth, false, false);
  targetCheck->copyFrom(*targetGpu);
  MatrixCheckErr(*target, *targetCheck);
  MatrixPtr inputGrad = CpuMatrix::create(numSamples, inWidth, false, false);
  MatrixPtr inputGpuGrad = GpuMatrix::create(numSamples, inWidth, false, true);
  MatrixPtr targetGrad = CpuMatrix::create(numSamples, outWidth, false, false);
  MatrixPtr targetGpuGrad = GpuMatrix::create(numSamples, outWidth,
                                              false, true);
  inputGrad->randomizeUniform();
  targetGrad->randomizeUniform();
  inputGpuGrad->copyFrom(*inputGrad);
  targetGpuGrad->copyFrom(*targetGrad);
  inputGrad->avgPoolBackward(*targetGrad, imgSizeH, imgSizeW,
                             ksizeW, ksizeH,
                             strideH, strideW,
                             outH, outW, 1.0, 1.0, padH, padW);
  inputGpuGrad->avgPoolBackward(*targetGpuGrad, imgSizeH, imgSizeW,
                                ksizeW, ksizeH,
                                strideH, strideW,
                                outH, outW, 1.0, 1.0, padH, padW);
  MatrixPtr targetBwdCheck = CpuMatrix::create(numSamples, inWidth,
                                               false, false);
  targetBwdCheck->copyFrom(*inputGpuGrad);
  MatrixCheckErr(*inputGrad, *targetBwdCheck);
 }
 TEST(Matrix, PoolFwdBwd) {
  for (auto numSamples : {5, 32}) {
    for (auto channels : {1, 9, 32}) {
      for (auto imgSizeH : {14, 28}) {
        for (auto imgSizeW : {16, 30}) {
          for (auto sizeX : {2, 5}) {
            for (auto sizeY : {2, 5}) {
              for (auto sH : {1, 2}) {
                for (auto sW : {1, 2}) {
                   for (auto pH : {0, (sizeY - 1)/2}) {
                     for (auto pW : {0, (sizeX - 1)/2}) {
                       VLOG(3) << " numSamples=" << numSamples
                               << " channels=" << channels
                               << " imgSizeH=" << imgSizeH
                               << " imgSizeW=" << imgSizeW
                               << " sizeX=" << sizeX
                               << " sizeY=" << sizeY
                               << " strideH=" << sH
                               << " strideW=" << sW
                               << " padingH=" << pH
                               << " padingW=" << pW;
                       testMaxPoolFwdBwd(numSamples, channels, imgSizeH,
                         imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
                       testAvgPoolFwdBwd(numSamples, channels, imgSizeH,
                         imgSizeW, sizeX, sizeY, sH, sW, pH, pW);
                     }
                   }
                }
              }
            }
          }
        }
      }
    }
  }
 }
 int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  initMain(argc, argv);
--- a/proto/ModelConfig.proto.m4
+++ b/proto/ModelConfig.proto.m4
@ -88,7 +88,8 @@ message PoolConfig {
  required uint32 size_x = 3;
  // Tell the net where in the input image to start the pooling.
-  required uint32 start = 4;
+  // start is deprecated now.
  optional uint32 start = 4;
  // Defines the stride size between successive pooling squares.
  required uint32 stride = 5;
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@ -961,10 +961,6 @@ def parse_pool(pool, input_layer_name, pool_conf):
                  "['max-projection', 'avg-projection', "
                  "'cudnn-max-pool', 'cudnn-avg-pool']"
                  % pool.pool_type)
    if pool.size_y or pool.stride_y or pool.img_width or pool.padding_y:
        config_assert(pool.pool_type.startswith('cudnn'),
                      "'size_y', 'stride_y' and 'img_width' and 'padding_y'"
                      "can only be used for cudnn")
    pool_conf.channels = pool.channels
    pool_conf.size_x = pool.size_x
@ -974,36 +970,25 @@ def parse_pool(pool, input_layer_name, pool_conf):
    pool_conf.stride_y = default(pool.stride_y, pool_conf.stride);
    img_pixels = g_layer_map[input_layer_name].size / pool.channels
    # the img_width may be removed,
    # and it can be calculated automatically later.
    pool_conf.img_size = default(pool.img_width, int(img_pixels ** 0.5))
    pool_conf.img_size_y = img_pixels / pool_conf.img_size
    config_assert(pool_conf.img_size * pool_conf.img_size_y == img_pixels,
                  "Incorrect input image size %d for input image pixels %d"
                  % (pool_conf.img_size, img_pixels))
-    if pool.start is not None:
+    config_assert(not pool.start, "start is deprecated in pooling.")
        config_assert(pool.padding is None,
              'At most one of start and padding can be set.')
        pool_conf.start = pool.start
        pool_conf.padding = 0
        pool_conf.output_x = int(math.ceil((pool_conf.img_size - \
            pool_conf.start - pool_conf.size_x) / \
            float(pool_conf.stride))) + 1
-        pool_conf.output_y = int(math.ceil((pool_conf.img_size_y - \
+    if pool.padding is not None:
            pool_conf.start - pool_conf.size_y) / \
            float(pool_conf.stride_y))) + 1
    elif pool.padding is not None:
        pool_conf.padding = pool.padding
        pool_conf.padding_y = default(pool.padding_y, pool_conf.padding)
        pool_conf.start = 0
        pool_conf.output_x = int(math.ceil((pool_conf.img_size + \
            2*pool_conf.padding - pool_conf.size_x) / \
            float(pool_conf.stride))) + 1
        pool_conf.output_y = int(math.ceil((pool_conf.img_size_y + \
            2*pool_conf.padding_y - pool_conf.size_y) / \
            float(pool_conf.stride_y))) + 1
    else:
        raise ValueError('At least one of start and padding should be set.')
 def parse_image(image, input_layer_name, image_conf):
    image_conf.channels = image.channels
@ -1603,7 +1588,7 @@ class PoolLayer(LayerBase):
            pool_conf = self.config.inputs[input_index].pool_conf
            print("output size for %s is %d*%d " % (
                name, pool_conf.output_y, pool_conf.output_x))
-            self.set_layer_size((pool_conf.output_x ** 2) * pool_conf.channels)
+            self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
@config_layer('batch_norm')
 class BatchNormLayer(LayerBase):
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -210,7 +210,7 @@ DEVICE = 'device'
 def layer_support(*attrs):
-    attrs_list = list(attrs) 
+    attrs_list = list(attrs)
    attrs_list.append(DEVICE)
    def decorator(method):
        @functools.wraps(method)
@ -1627,7 +1627,9 @@ def img_conv_layer(input, filter_size, num_filters,
@layer_support()
 def img_pool_layer(input, pool_size, name=None,
                   num_channels=None, pool_type=None,
-                   stride=1, start=None, padding=0, layer_attr=None):
+                   stride=1, start=None, padding=0, layer_attr=None,
                   pool_size_y=None, stride_y=None, padding_y=None,
                   img_width=None):
    """
    Image pooling Layer.
@ -1635,25 +1637,34 @@ def img_pool_layer(input, pool_size, name=None,
    .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
-    :param padding: pooling padding
+    :param padding: pooling padding width.
    :type padding: int
    :param padding_y: pooling padding height. It's equal to padding by default.
    :type padding_y: int|None
    :param name: name of pooling layer
    :type name: basestring.
    :param input: layer's input
    :type input: LayerOutput
-    :param pool_size: pooling size
+    :param pool_size: pooling window width
    :type pool_size: int
    :param pool_size_y: pooling window height. It's eaqual to pool_size by default.
    :type pool_size_y: int|None
    :param num_channels: number of input channel.
    :type num_channels: int
    :param pool_type: pooling type. MaxPooling or AveragePooling. Default is
                      MaxPooling.
    :type pool_type: BasePoolingType
-    :param stride: stride of pooling.
+    :param stride: stride width of pooling.
    :type stride: int
-    :param start: start position of pooling operation.
+    :param stride_y: stride height of pooling. It is equal to stride by default.
-    :type start: int
+    :type stride_y: int|None
    :param start: start position of pooling operation. Note it is deprecated now.
    :type start: int|None
    :param layer_attr: Extra Layer attribute.
    :type layer_attr: ExtraLayerAttribute
    :param img_width: the width of input feature map. If it is None, the input feature
                      map should be square.
    :type img_width: int|None
    :return: LayerOutput object.
    :rtype: LayerOutput
    """
@ -1666,17 +1677,29 @@ def img_pool_layer(input, pool_size, name=None,
    elif isinstance(pool_type, AvgPooling):
        pool_type.name = 'avg'
    type_name = pool_type.name + '-projection' \
      if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
      else pool_type.name
    pool_size_y = pool_size if pool_size_y is None else pool_size_y
    stride_y = stride if stride_y is None else stride_y
    padding_y = padding if padding_y is None else padding_y
    Layer(
        name=name,
        type=LayerType.POOL_LAYER,
        inputs=[Input(input.name,
                      pool=Pool(
-                          pool_type=''.join([pool_type.name, '-projection']),
+                          pool_type=type_name,
                          channels=num_channels,
                          size_x=pool_size,
                          start=start,
                          stride=stride,
-                          padding=padding
+                          padding=padding,
                          size_y=pool_size_y,
                          stride_y=stride_y,
                          padding_y=padding_y,
                          img_width=img_width
                      ))],
        **ExtraLayerAttribute.to_kwargs(layer_attr)
    )
@ -2751,7 +2774,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
    tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False,
                          name=name)
-    
+
    return tmp
--- a/python/paddle/trainer_config_helpers/networks.py
+++ b/python/paddle/trainer_config_helpers/networks.py
@ -170,13 +170,13 @@ def simple_img_conv_pool(input, filter_size, num_filters, pool_size, name=None,
    :type shared_bias: bool
    :param conv_layer_attr: see img_conv_layer for details
    :type conv_layer_attr: ExtraLayerAttribute
-    :param pool_stride: see img_conv_layer for details
+    :param pool_stride: see img_pool_layer for details
    :type pool_stride: int
-    :param pool_start: see img_conv_layer for details
+    :param pool_start: see img_pool_layer for details. It is deprecated now.
    :type pool_start: int
-    :param pool_padding: see img_conv_layer for details
+    :param pool_padding: see img_pool_layer for details
    :type pool_padding: int
-    :param pool_layer_attr: see img_conv_layer for details
+    :param pool_layer_attr: see img_pool_layer for details
    :type pool_layer_attr: ExtraLayerAttribute
    :return: Layer's output
    :rtype: LayerOutput
@ -243,7 +243,7 @@ def img_conv_bn_pool(input, filter_size, num_filters, pool_size, name=None,
    :param bn_layer_attr: ParameterAttribute.
    :param pool_stride: see img_pool_layer's document.
    :type pool_stride: int
-    :param pool_start: see img_pool_layer's document.
+    :param pool_start: see img_pool_layer's document. It is deprecated now.
    :type pool_start: int
    :param pool_padding: see img_pool_layer's document.
    :type pool_padding: int
@ -555,7 +555,7 @@ def lstmemory_unit(input, name=None, size=None, param_attr=None,
    :type gate_act: BaseActivation
    :param state_act: lstm state activiation type.
    :type state_act: BaseActivation
-    :param mixed_bias_attr: bias parameter attribute of mixed layer. 
+    :param mixed_bias_attr: bias parameter attribute of mixed layer.
                            False means no bias, None means default bias.
    :type mixed_bias_attr: ParameterAttribute|False
    :param lstm_bias_attr: bias parameter attribute of lstm layer.
--- a/python/paddle/trainer_config_helpers/poolings.py
+++ b/python/paddle/trainer_config_helpers/poolings.py
@ -19,6 +19,8 @@ __all__ = [
    "BasePoolingType",
    "MaxPooling",
    "AvgPooling",
    "CudnnMaxPooling",
    "CudnnAvgPooling",
    "SumPooling",
    "SquareRootNPooling"
 ]
@ -26,7 +28,7 @@ __all__ = [
 class BasePoolingType(object):
    """
-    Base Pooling Type. 
+    Base Pooling Type.
    Note these pooling types are used for sequence input, not for images.
    Each PoolingType contains one parameter:
@ -55,7 +57,24 @@ class MaxPooling(BasePoolingType):
    def __init__(self, output_max_index=None):
        BasePoolingType.__init__(self, "max")
        self.output_max_index = output_max_index
-        
+
 class CudnnMaxPooling(BasePoolingType):
    """
    Cudnn max pooling only support GPU. Return the maxinum value in the
    pooling window.
    """
    def __init__(self):
        BasePoolingType.__init__(self, "cudnn-max-pool")
 class CudnnAvgPooling(BasePoolingType):
    """
    Cudnn average pooling only support GPU. Return the average value in the
    pooling window.
    """
    def __init__(self):
        BasePoolingType.__init__(self, "cudnn-avg-pool")
 class AvgPooling(BasePoolingType):
    """
--- a/python/paddle/trainer_config_helpers/tests/configs/check.md5
+++ b/python/paddle/trainer_config_helpers/tests/configs/check.md5
@ -1,4 +1,4 @@
-7e6919d17562516e9a1d9a88de1fb3b9  img_layers.protostr
+86c0815275a9d5eb902e23c6a592f58a  img_layers.protostr
 a5d9259ff1fd7ca23d0ef090052cb1f2  last_first_seq.protostr
 9c038249ec8ff719753a746cdb04c026  layer_activations.protostr
 5913f87b39cee3b2701fa158270aca26  projections.protostr
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
@ -7,8 +7,10 @@ settings(
 img = data_layer(name='image', size=256*256)
 # the parse_conv in config_parse.py is not strictly accurate when filter_size
 # is not square. So here set square filter_size.
 img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64,
-                          filter_size=(32, 64), padding=(1, 0), stride=(1, 1),
+                          filter_size=(32, 32), padding=(1, 1), stride=(1, 1),
                          act=LinearActivation())
 img_bn = batch_norm_layer(input=img_conv, act=ReluActivation())
@ -17,4 +19,4 @@ img_norm = img_cmrnorm_layer(input=img_bn, size=32)
 img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling())
-outputs(img_pool, img_norm)
+outputs(img_pool, img_norm)