refactored ExpandConvLayer and ExpandConvTransLayer with ConvBaseLayerCpu

9 years ago · 2575b74fee
parent aa2cd2ce8f
commit 2575b74fee
10 changed files with 355 additions and 717 deletions
--- a/paddle/gserver/layers/ConvBaseLayer.h
+++ b/paddle/gserver/layers/ConvBaseLayer.h
@ -78,12 +78,7 @@ protected:
  /// of output size.
  bool caffeMode_;
-  /*The expandInput_ and transOutValue_ are used for CPU expand conv calc*/
+
  /// Expand one sample at a time. shape:
  /// (numChannels * filterPixels_, outputSizeH * outputSizeW)
  MatrixPtr expandInput_;
  /// The transpose of output, which is an auxiliary matrix.
  MatrixPtr transOutValue_;
 public:
  explicit ConvBaseLayer(const LayerConfig& config) : Layer(config) {}
@ -135,26 +130,6 @@ public:
    CHECK_GE(imageSize, 1);
    return imageSize;
  }
  /**
   * Create or resize expandInput_.
   */
  void resetExpandInput(size_t height, size_t width);
  /**
   * Create or resize transOutValue_.
   */
  void resetConvOutput(size_t batchSize, int inIdx);
  /**
   * Add shared bias.
   */
  void addSharedBias();
  /**
   * Add unshared bias.
   */
  void addUnsharedBias();
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/ConvBaseLayerCpu.cpp
+++ b/paddle/gserver/layers/ConvBaseLayerCpu.cpp
@ -0,0 +1,241 @@
 /* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/utils/Logging.h"
 #include "ConvBaseLayerCpu.h"
 namespace paddle {
 bool ConvBaseLayerCpu::init(const LayerMap &layerMap,
                           const ParameterMap &parameterMap) {
  /* Initialize the basic convolutional parent class */
  ConvBaseLayer::init(layerMap, parameterMap);
  int channel;
  /* Initialize the projection */
  for (auto &inputConfig : config_.inputs()) {
    const ConvConfig &conf = inputConfig.conv_conf();
    subM_.push_back(numFilters_ / conf.groups());
    subN_.push_back(conf.output_x() * conf.output_x());
    channel = isConv_ ? conf.channels() : numFilters_;
    subK_.push_back(channel * conf.filter_size() * conf.filter_size() /
                    conf.groups());
    /* Consistent caffe mode for multiple input */
    caffeMode_ = conf.caffe_mode();
  }
  return true;
 }
 void ConvBaseLayerCpu::resetExpandInput(size_t height, size_t width) {
  Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
 }
 void ConvBaseLayerCpu::addSharedBias() {
  size_t mapW = getSize() / numFilters_;
  size_t mapH = getOutputValue()->getElementCnt() / mapW;
  MatrixPtr out =
      Matrix::create(getOutputValue()->getData(), mapH, mapW, false, useGpu_);
  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
  out->transpose(transOutValue_, false);  // false means no memory allocation
  transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
                          numFilters_);
  MatrixPtr bias =
      Matrix::create(biases_->getW()->getData(), 1,
                     biases_->getW()->getElementCnt(), false, useGpu_);
  transOutValue_->addBias(*bias, 1.0f);
  transOutValue_->reshape(mapW, mapH);
  transOutValue_->transpose(out, false);  // false means no memory allocation
  out->clear();
  bias->clear();
 }
 void ConvBaseLayerCpu::addUnsharedBias() {
  MatrixPtr outValue = getOutputValue();
  MatrixPtr bias =
      Matrix::create(biases_->getW()->getData(), 1,
                     biases_->getW()->getElementCnt(), false, useGpu_);
  outValue->addBias(*bias, 1.0f);
 }
 void ConvBaseLayerCpu::expandOneFrame(MatrixPtr image, size_t startIdx,
                                     int inIdx) {
  int channel = isConv_ ? channels_[inIdx] : numFilters_;
  resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
  real *imgData = image->getData() + startIdx * image->getWidth();
  MatrixPtr imageTmp = Matrix::create(
      imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel, false,
      useGpu_);
  expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
                           channel, filterSize_[inIdx],
                           filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
                           padding_[inIdx], padding_[inIdx],
                           outputH_[inIdx], outputW_[inIdx]);
  imageTmp->clear();
 }
 void ConvBaseLayerCpu::expandFwdOnce(MatrixPtr image, MatrixPtr out,
                                     int inIdx, int startIdx) {
  int subM = subM_[inIdx];
  int subN = subN_[inIdx];
  int subK = subK_[inIdx];
  expandOneFrame(image, startIdx, inIdx);
  int nf = isConv_ ? numFilters_ : channels_[inIdx];
  real *outData =
      out->getData() + startIdx * subN * nf;
  real *wgtData = weights_[inIdx]->getW()->getData();
  real *expInData = expandInput_->getData();
  for (int g = 0; g < groups_[inIdx]; ++g) {
    MatrixPtr A =
        Matrix::create(wgtData, subK, subM, true, useGpu_);  // mark transpose
    MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
    MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
    C->mul(A, B, 1, 1);
    A->clear();
    B->clear();
    C->clear();
    wgtData += subK * subM;
    expInData += subK * subN;
    outData += subM * subN;
  }
 }
 void ConvBaseLayerCpu::bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx) {
  int channel = isConv_ ? channels_[inpIdx] : numFilters_;
  int subM = subM_[inpIdx];
  int subN = subN_[inpIdx];
  int subK = subK_[inpIdx];
  size_t batchSize = image->getHeight();
  MatrixPtr tgtGrad = out;
  /* reset the expand-grad memory */
  resetExpandInput(subK * groups_[inpIdx], subN);
  real *localGradData = image->getData();
  real *tgtGradData = tgtGrad->getData();
  for (size_t n = 0; n < batchSize; n++) {
    real *wgtData = weights_[inpIdx]->getW()->getData();
    real *expandInData = expandInput_->getData();
    for (int g = 0; g < groups_[inpIdx]; g++) {
      // create temporary matrix
      MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
      MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
      MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
      C->mul(A, B);  // mul
      // clear the temporary matrix
      A->clear();
      B->clear();
      C->clear();
      expandInData += subK * subN;
      localGradData += subM * subN;
      wgtData += subK * subM;
    }
    // shrink one frame outGrad
    MatrixPtr oneGradTmp = Matrix::create(
        expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
    MatrixPtr vTmp = Matrix::create(
        tgtGradData, 1,
        imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel, false,
        useGpu_);
    vTmp->convShrink(*oneGradTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
                     channel, filterSize_[inpIdx],
                     filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
                     padding_[inpIdx], padding_[inpIdx],
                     outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
    vTmp->clear();
    oneGradTmp->clear();
    // move the data-pointer
    tgtGradData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel;
  }
 }
 void ConvBaseLayerCpu::bpropWeights(MatrixPtr image, MatrixPtr out,
                                    int inpIdx) {
  MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
  int subM = subM_[inpIdx];
  int subN = subN_[inpIdx];
  int subK = subK_[inpIdx];
  size_t batchSize = image->getHeight();
  resetExpandInput(subK * groups_[inpIdx], subN);
  real *gradData = out->getData();
  for (size_t n = 0; n < batchSize; n++) {  // frame by frame
    // expand
    expandOneFrame(image, n, inpIdx);
    real *wGradData = weightGrad->getData();
    real *expandInData = expandInput_->getData();
    // expand-mul one-group by one
    for (int g = 0; g < groups_[inpIdx]; g++) {
      MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
      MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_);
      MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
      C->mul(A, B, 1, 1);
      A->clear();
      B->clear();
      C->clear();
      gradData += subM * subN;
      wGradData += subK * subM;
      expandInData += subK * subN;
    }
  }
 }
 void ConvBaseLayerCpu::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
  size_t mapW = getSize() / numFilters_;
  size_t mapH = v->getElementCnt() / mapW;
  MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
  vTmp->transpose(transOutValue_, false);  // false means no memory allocation
  transOutValue_->reshape(transOutValue_->getElementCnt() / numFilters_,
                          numFilters_);
  biases->collectBias(*transOutValue_, 1.0f);
 }
 void ConvBaseLayerCpu::bpropBiases(MatrixPtr v) {
  MatrixPtr biases =
      Matrix::create(biases_->getWGrad()->getData(), 1,
                     biases_->getWGrad()->getElementCnt(), false, useGpu_);
  if (sharedBiases_) {
    bpropSharedBias(biases, v);
  } else {
    biases->collectBias(*v, 1.0f);
  }
  biases->clear();
 }
 }  // namespace paddle
--- a/paddle/gserver/layers/ConvBaseLayerCpu.h
+++ b/paddle/gserver/layers/ConvBaseLayerCpu.h
@ -0,0 +1,91 @@
 /* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "ConvBaseLayer.h"
 #include "paddle/math/Matrix.h"
 #include <vector>
 namespace paddle {
 /**
 * @brief A subclass of ConvBaseLayer that is a superclass of both
 * ExpandConvLayer and ExpandConvTransLayer
 */
 class ConvBaseLayerCpu : public ConvBaseLayer {
 protected:
  /// For expand convolution.
  /// subM_ = numFilters_ / groups_.
  IntV subM_;
  /// subN_ = outputH_ * outputW_.
  IntV subN_;
  /// subK_ = channels_ * filterPixels_ * groups_.
  IntV subK_;
  /// The spatial dimensions of height of input feature map.
  IntV imgSizeH_;
  /// The spatial dimensions of width of input feature map.
  IntV imgSizeW_;
  /// The spatial dimensions of height of output feature map.
  IntV outputH_;
  /// The spatial dimensions of width of output feature map.
  IntV outputW_;
  /*The expandInput_ and transOutValue_ are used for CPU expand conv calc*/
  /// Expand one sample at a time. shape:
  /// (numChannels * filterPixels_, outputSizeH * outputSizeW)
  MatrixPtr expandInput_;
  /// The transpose of output, which is an auxiliary matrix.
  MatrixPtr transOutValue_;
 public:
  explicit ConvBaseLayerCpu(const LayerConfig& config)
    : ConvBaseLayer(config) {}
  ~ConvBaseLayerCpu() {}
  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
  /**
   * Create or resize expandInput_.
   */
  void resetExpandInput(size_t height, size_t width);
  /**
   * Add shared bias.
   */
  void addSharedBias();
  /**
   * Add unshared bias.
   */
  void addUnsharedBias();
  /**
   * Expand one input sample.
   */
  void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
  /**
   * Expand one input sample and perform matrix multiplication.
   */
  void expandFwdOnce(MatrixPtr image, MatrixPtr out, int inIdx, int startIdx);
  void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
  void bpropBiases(MatrixPtr v);
  void bpropWeights(MatrixPtr image, MatrixPtr out, int inpIdx);
  void bpropActs(MatrixPtr image, MatrixPtr out, int inpIdx);
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/ConvTransBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvTransBaseLayer.cpp
@ -1,88 +0,0 @@
 /* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #include "paddle/utils/Logging.h"
 #include "ConvTransBaseLayer.h"
 namespace paddle {
 bool ConvTransBaseLayer::init(const LayerMap& layerMap,
                         const ParameterMap& parameterMap) {
  /* Initialize the basic parent class */
  Layer::init(layerMap, parameterMap);
  /* Initialize the convolutional layer parameter */
  /* Everything is the same as ConvBaseLayer.cpp except that the meaning of
   * num_filters and channel is switched.
   *
   * In the config, num_filters refer to the number of feature maps in the
   * output of convTransLayer, and channel refer to the number of feature maps
   * in the input of convTransLayer.
   *
   * However, within the convTrans class, the channel is related to the output
   * and num_filters is related to the input, so that it is consistent with the
   * settings in convLayer.
   * */
  channel_ = config_.num_filters();
  sharedBiases_ = config_.shared_biases();
  for (auto& inputConfig : config_.inputs()) {
    const ConvConfig& conf = inputConfig.conv_conf();
    padding_.push_back(conf.padding());
    stride_.push_back(conf.stride());
    filterSize_.push_back(conf.filter_size());
    paddingY_.push_back(conf.padding_y());
    strideY_.push_back(conf.stride_y());
    filterSizeY_.push_back(conf.filter_size_y());
    filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
    numFilters_.push_back(conf.channels());
    imgSize_.push_back(conf.img_size());
    imgPixels_.push_back(imgSize_.back() * imgSize_.back());
    groups_.push_back(conf.groups());
    filterChannels_.push_back(conf.filter_channels());
    outputX_.push_back(conf.output_x());
    outputs_.push_back(outputX_.back() * outputX_.back());
  }
  /* initialize the weightList */
  CHECK(inputLayers_.size() == parameters_.size());
  for (size_t i = 0; i < inputLayers_.size(); i++) {
    size_t height, width;
    height = filterPixels_[i] * filterChannels_[i];
    width = numFilters_[i];
    // create a new weight
    CHECK_EQ(parameters_[i]->getSize(), width * height);
    Weight* w = new Weight(height, width, parameters_[i]);
    weights_.emplace_back(w);
  }
  /* initialize the biases_ */
  if (biasParameter_.get() != NULL) {
    if (sharedBiases_) {
      CHECK_EQ((size_t)channel_, biasParameter_->getSize());
      biases_ =
          std::unique_ptr<Weight>(new Weight(channel_, 1, biasParameter_));
    } else {
      biases_ =
          std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
    }
  }
  // default caffe model
  caffeMode_ = true;
  return true;
 }
 }  // namespace paddle
--- a/paddle/gserver/layers/ConvTransBaseLayer.h
+++ b/paddle/gserver/layers/ConvTransBaseLayer.h
@ -1,117 +0,0 @@
 /* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include "Layer.h"
 namespace paddle {
 /**
 * @brief A Base Convolution Layer, which convolves the input image
 * with learned filters and (optionally) adds biases.
 */
 class ConvTransBaseLayer : public Layer {
 protected:
  typedef std::vector<int> IntV;
  /// The number of channel in image (the output of the deconv layer).
  int channel_;
  /// The x dimension of the padding.
  IntV padding_;
  /// The y dimension of the padding.
  IntV paddingY_;
  /// The x dimension of the stride.
  IntV stride_;
  /// The y dimension of the stride.
  IntV strideY_;
  /// The x dimension of a filter kernel.
  IntV filterSize_;
  /// The y dimension of a filter kernel.
  IntV filterSizeY_;
  /// The number of filters(i.e. the number channels of the deconv layer input)
  IntV numFilters_;
  /// The spatial dimensions of input feature map.
  IntV imgSize_;
  /// The total pixel size of input feature map.
  /// imgPixels_ = imgSizeX_ * imgSizeY_.
  IntV imgPixels_;
  /// filterPixels_ = filterSizeX_ * filterSizeY_.
  IntV filterPixels_;
  /// filterChannels_ = channels_/groups_.
  IntV filterChannels_;
  /// The spatial dimensions of output feature map.
  IntV outputX_;
  /// The spatial dimensions of output feature map.
  IntV outputs_;
  /// Group size, refer to grouped convolution in
  /// Alex Krizhevsky's paper: when group=2, the first half of the
  /// filters are only connected to the first half of the input channels,
  /// and the second half only connected to the second half.
  IntV groups_;
  /// Whether the bias is shared for feature in each channel.
  bool sharedBiases_;
  /// shape of weight: (numChannels * filterPixels_, numFilters)
  WeightList weights_;
  /// If shared_biases is false shape of bias: (numFilters_, 1)
  /// If shared_biases is ture shape of bias:
  /// (numFilters_ * outputX * outputY, 1)
  std::unique_ptr<Weight> biases_;
  /// True by default. The only difference is the calculation
  /// of output size.
  bool caffeMode_;
 public:
  explicit ConvTransBaseLayer(const LayerConfig& config) : Layer(config) {}
  virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
  Weight& getWeight(int idx) { return *weights_[idx]; }
  /**
   * Calculate image size based on caffeMode_ from outputSize.
   * - input(+padding): 0123456789
   * - imageSize(+padding) = 10;
   * - filterSize = 3;
   * - stride = 2;
   * - caffeMode_ is true:
       - output: (012), (234), (456), (678)
       - outputSize = 4;
   * - caffeMode_ is false:
   *   - output: (012), (234), (456), (678), (9)
   *   - outputSize = 5;
   */
  /*
   * In order to be consistent with the convLayer, here the outputSize is
   * actually the size of the input image of convTransLayer, and the image size
   * is actually the size of the output image of convTransLayer
   */
  int imageSize(int outputSize, int filterSize, int padding, int stride) {
    int imageSize;
    if (!caffeMode_) {
     imageSize =
         (outputSize - 1) * stride + filterSize - 2 * padding - stride + 1;
    } else {
     imageSize = (outputSize - 1) * stride + filterSize - 2 * padding;
    }
    CHECK_GE(imageSize, 1);
    return imageSize;
  }
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/ExpandConvLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvLayer.cpp
--- a/paddle/gserver/layers/ExpandConvLayer.h
+++ b/paddle/gserver/layers/ExpandConvLayer.h
@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
-#include "ConvBaseLayer.h"
+#include "ConvBaseLayerCpu.h"
 #include "paddle/math/Matrix.h"
 #include <vector>
@ -28,24 +28,11 @@ namespace paddle {
 *
 * The config file api is img_conv_layer.
 */
 class ExpandConvLayer : public ConvBaseLayer {
 protected:
  /// For expand convolution.
  /// subM_ = numFilters_ / groups_.
  IntV subM_;
  /// subN_ = outputH_ * outputW_.
  IntV subN_;
  /// subK_ = channels_ * filterPixels_ * groups_.
  IntV subK_;
  /// Expand one sample at a time. shape:
  /// (numChannels * filterPixels_, outputSizeH * outputSizeW)
  MatrixPtr expandInput_;
  /// The transpose of output, which is an auxiliary matrix.
  MatrixPtr transOutValue_;
 class ExpandConvLayer : public ConvBaseLayerCpu {
 public:
-  explicit ExpandConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
+  explicit ExpandConvLayer(const LayerConfig& config) :
    ConvBaseLayerCpu(config) {}
  ~ExpandConvLayer() {}
@ -53,23 +40,8 @@ public:
  size_t getOutputSize();
  /**
   * Expand one input sample.
   */
  void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
  /**
   * Expand one input sample and perform matrix multiplication.
   */
  void expandFwdOnce(MatrixPtr image, int inIdx, int startIdx);
  void forward(PassType passType);
  void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
  void bpropBiases(MatrixPtr v);
  void backward(const UpdateCallback& callback);
  void bpropWeights(MatrixPtr v, int inpIdx);
  void bpropActs(MatrixPtr v, int inpIdx);
 };
 }  // namespace paddle
--- a/paddle/gserver/layers/ExpandConvTransLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp
@ -29,18 +29,7 @@ REGISTER_LAYER(exconvt, ExpandConvTransLayer);
 bool ExpandConvTransLayer::init(const LayerMap &layerMap,
                           const ParameterMap &parameterMap) {
  /* Initialize the basic convolutional parent class */
-  ConvBaseLayer::init(layerMap, parameterMap);
+  ConvBaseLayerCpu::init(layerMap, parameterMap);
  /* Initialize the projection */
  for (auto &inputConfig : config_.inputs()) {
    const ConvConfig &conf = inputConfig.conv_conf();
    subM_.push_back(conf.channels() / conf.groups());
    subN_.push_back(conf.output_x() * conf.output_x());
    subK_.push_back(numFilters_ * conf.filter_size() * conf.filter_size() /
                    conf.groups());
    /* Consistent caffe mode for multiple input */
    caffeMode_ = conf.caffe_mode();
  }
  return true;
 }
@ -73,67 +62,6 @@ size_t ExpandConvTransLayer::getSize() {
  return layerSize;
 }
 void ExpandConvTransLayer::resetExpandInput(size_t height, size_t width) {
  Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
 }
 /*void ExpandConvTransLayer::resetConvOutput(size_t batchSize, int inIdx) {
  Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx],
                         false, useGpu_);
 }*/
 void ExpandConvTransLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
                                     int inIdx) {
  resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
  real *imgData = image->getData() + startIdx * image->getWidth();
  MatrixPtr imageTmp = Matrix::create(
      imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel_, false,
      useGpu_);
  expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
                           channel_, filterSize_[inIdx],
                           filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
                           padding_[inIdx], padding_[inIdx],
                           outputH_[inIdx], outputW_[inIdx]);
  imageTmp->clear();
 }
 void ExpandConvTransLayer::expandBackOnce(MatrixPtr imageGrad, int inIdx,
                                        int startIdx) {
  int subM = subM_[inIdx];
  int subN = subN_[inIdx];
  int subK = subK_[inIdx];
  LayerPtr prevLayer = getPrev(inIdx);
  if (NULL == prevLayer->getOutputGrad()) {
    return;
  }
  expandOneFrame(imageGrad, startIdx, inIdx);
  real *outGradData =
      prevLayer -> getOutputGrad()->getData()
                  + startIdx * subN * numFilters_[inIdx];
  real *wgtData = weights_[inIdx]->getW()->getData();
  real *expInData = expandInput_->getData();
  for (int g = 0; g < groups_[inIdx]; ++g) {
    MatrixPtr A =
        Matrix::create(wgtData, subK, subM, true, useGpu_);  // mark transpose
    MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
    MatrixPtr C = Matrix::create(outGradData, subM, subN, false, useGpu_);
    C->mul(A, B, 1, 1);
    A->clear();
    B->clear();
    C->clear();
    wgtData += subK * subM;
    expInData += subK * subN;
    outGradData += subM * subN;
  }
 }
 void ExpandConvTransLayer::forward(PassType passType) {
  Layer::forward(passType);
@ -148,7 +76,7 @@ void ExpandConvTransLayer::forward(PassType passType) {
    LayerPtr prevLayer = getPrev(i);
    output = prevLayer->getOutputValue();
    REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
-    shrinkFwd(output, i);
+    bpropActs(output, getOutputValue(), i);
  }
  /* add the bias-vector */
@ -164,84 +92,6 @@ void ExpandConvTransLayer::forward(PassType passType) {
  forwardActivation();
 }
 void ExpandConvTransLayer::shrinkFwd(MatrixPtr output, int inpIdx) {
  int subM = subM_[inpIdx];
  int subN = subN_[inpIdx];
  int subK = subK_[inpIdx];
  size_t batchSize = output->getHeight();
  MatrixPtr image = getOutputValue();
  /* reset the expand-grad memory */
  resetExpandInput(subK * groups_[inpIdx], subN);
  real *localData = output->getData();
  real *imageData = image->getData();
  for (size_t n = 0; n < batchSize; n++) {
    real *wgtData = weights_[inpIdx]->getW()->getData();
    real *expandInData = expandInput_->getData();
    for (int g = 0; g < groups_[inpIdx]; g++) {
      // create temporary matrix
      MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
      MatrixPtr B = Matrix::create(localData, subM, subN, false, useGpu_);
      MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
      C->mul(A, B);  // mul
      // clear the temporary matrix
      A->clear();
      B->clear();
      C->clear();
      expandInData += subK * subN;
      localData += subM * subN;
      wgtData += subK * subM;
    }
    // shrink one frame outGrad
    MatrixPtr oneTmp = Matrix::create(
        expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
    MatrixPtr vTmp = Matrix::create(
        imageData, 1,
        imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_, false,
        useGpu_);
    vTmp->convShrink(*oneTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
                     channel_, filterSize_[inpIdx],
                     filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
                     padding_[inpIdx], padding_[inpIdx],
                     outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
    vTmp->clear();
    oneTmp->clear();
    // move the data-pointer
    imageData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_;
  }
 }
 void ExpandConvTransLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
  size_t mapW = getSize() / channel_;
  size_t mapH = v->getElementCnt() / mapW;
  MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
  Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
  vTmp->transpose(transOutValue_, false);  // false means no memory allocation
  vTmp->reshape(transOutValue_->getElementCnt() / channel_, channel_);
  biases->collectBias(*vTmp, 1.0f);
 }
 void ExpandConvTransLayer::bpropBiases(MatrixPtr v) {
  MatrixPtr biases =
      Matrix::create(biases_->getWGrad()->getData(), 1,
                     biases_->getWGrad()->getElementCnt(), false, useGpu_);
  if (sharedBiases_) {
    bpropSharedBias(biases, v);
  } else {
    biases->collectBias(*v, 1.0f);
  }
  biases->clear();
 }
 void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
  backwardActivation();
@ -255,51 +105,18 @@ void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
  for (size_t i = 0; i != inputLayers_.size(); ++i) {
    /* First, calculate the input layers error */
    for (size_t off = 0; off < imageGrad->getHeight(); off++) {
-        expandBackOnce(imageGrad, i, off);
+      if (NULL != getPrev(i)->getOutputGrad()) {
        expandFwdOnce(imageGrad, getPrev(i)->getOutputGrad(), i, off);
      }
    }
    if (weights_[i]->getWGrad()) {
      /* Then, calculate the W-gradient for the current layer */
-      bpropWeights(imageGrad, i);
+      bpropWeights(imageGrad, getPrev(i)->getOutputValue(), i);
      /* Increasing the number of gradient */
      weights_[i]->getParameterPtr()->incUpdate(callback);
    }
  }
 }
 void ExpandConvTransLayer::bpropWeights(MatrixPtr v, int inpIdx) {
  MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
  MatrixPtr outputV = getPrev(inpIdx)->getOutputValue();
  int subM = subM_[inpIdx];
  int subN = subN_[inpIdx];
  int subK = subK_[inpIdx];
  size_t batchSize = outputV->getHeight();
  resetExpandInput(subK * groups_[inpIdx], subN);
  real *outputData = outputV -> getData();
  for (size_t n = 0; n < batchSize; n++) {  // frame by frame
    // expand
    expandOneFrame(v, n, inpIdx);
    real *wGradData = weightGrad->getData();
    real *expandInData = expandInput_->getData();
    // expand-mul one-group by one
    for (int g = 0; g < groups_[inpIdx]; g++) {
      MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
      MatrixPtr B = Matrix::create(outputData, subM, subN, true, useGpu_);
      MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
      C->mul(A, B, 1, 1);
      A->clear();
      B->clear();
      C->clear();
      outputData += subM * subN;
      wGradData += subK * subM;
      expandInData += subK * subN;
    }
  }
 }
 }  // namespace paddle
--- a/paddle/gserver/layers/ExpandConvTransLayer.h
+++ b/paddle/gserver/layers/ExpandConvTransLayer.h
@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
-#include "ConvBaseLayer.h"
+#include "ConvBaseLayerCpu.h"
 #include "paddle/math/Matrix.h"
 #include <vector>
@ -24,32 +24,14 @@ namespace paddle {
 /**
 * @brief A subclass of convolution layer.
 * This layer expands input and use matrix multiplication to
- * calculate convolution operation.
+ * calculate convolution transpose (deconv) operation.
 *
 * The config file api is img_convTrans_layer.
 */
-class ExpandConvTransLayer : public ConvBaseLayer {
+class ExpandConvTransLayer : public ConvBaseLayerCpu {
 protected:
  /// For expand convolution.
  /// subM_ = numFilters_ / groups_.
  IntV subM_;
  /// subN_ = outputH_ * outputW_.
  IntV subN_;
  /// subK_ = channels_ * filterPixels_ * groups_.
  IntV subK_;
  /// The spatial dimensions of height of input feature map.
  IntV imgSizeH_;
  /// The spatial dimensions of width of input feature map.
  IntV imgSizeW_;
  /// The spatial dimensions of height of output feature map.
  IntV outputH_;
  /// The spatial dimensions of width of output feature map.
  IntV outputW_;
 public:
  explicit ExpandConvTransLayer(const LayerConfig& config) :
-      ConvBaseLayer(config) {}
+    ConvBaseLayerCpu(config) {}
  ~ExpandConvTransLayer() {}
@ -57,38 +39,8 @@ public:
  size_t getSize();
  /**
   * Create or resize expandInput_.
   */
  void resetExpandInput(size_t height, size_t width);
  /**
   * Create or resize transOutValue_.
   */
  void resetConvOutput(size_t batchSize, int inIdx);
  /**
   * Expand one input sample.
   */
  void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
  /**
   * Expand one output image and perform matrix multiplication.
   */
  void expandBackOnce(MatrixPtr image, int inIdx, int startIdx);
  /**
   * Perform matrix multiplication on one output and then shrink.
   */
  void shrinkFwd(MatrixPtr output, int inpIdx);
  void forward(PassType passType);
  void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
  void bpropBiases(MatrixPtr v);
  void backward(const UpdateCallback& callback);
  void bpropWeights(MatrixPtr v, int inpIdx);
  void bpropActs(MatrixPtr v, int inpIdx);
 };
 }  // namespace paddle
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@ -302,6 +302,8 @@ void testConvLayer(const string& type, bool trans, bool useGpu) {
                              config.layerConfig.num_filters());
  testLayerGrad(config, "conv", 100, trans, useGpu);
  // Use small batch_size and useWeight=true to test biasGrad
  testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02);
 }
 TEST(Layer, convLayer) {