initial take on deconv layers

9 years ago · 5c88f07262
parent 05204af1f2
commit 5c88f07262
5 changed files with 670 additions and 0 deletions
--- a/paddle/gserver/layers/ConvTransBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvTransBaseLayer.cpp
@ -0,0 +1,77 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#include "paddle/utils/Logging.h"
+#include "ConvTransBaseLayer.h"
+namespace paddle {
+
+bool ConvTransBaseLayer::init(const LayerMap& layerMap,
+                         const ParameterMap& parameterMap) {
+  /* Initialize the basic parent class */
+  Layer::init(layerMap, parameterMap);
+
+  /* Initialize the convolutional layer parameter */
+  channel_ = config_.num_filters();
+  sharedBiases_ = config_.shared_biases();
+  for (auto& inputConfig : config_.inputs()) {
+    const ConvConfig& conf = inputConfig.conv_conf();
+    padding_.push_back(conf.padding());
+    stride_.push_back(conf.stride());
+    filterSize_.push_back(conf.filter_size());
+    paddingY_.push_back(conf.padding_y());
+    strideY_.push_back(conf.stride_y());
+    filterSizeY_.push_back(conf.filter_size_y());
+    filterPixels_.push_back(filterSize_.back() * filterSizeY_.back());
+    numFilters_.push_back(conf.channels());
+    imgSize_.push_back(conf.img_size());
+    imgPixels_.push_back(imgSize_.back() * imgSize_.back());
+    groups_.push_back(conf.groups());
+    filterChannels_.push_back(conf.filter_channels());
+    outputX_.push_back(conf.output_x());
+    outputs_.push_back(outputX_.back() * outputX_.back());
+  }
+
+  /* initialize the weightList */
+  CHECK(inputLayers_.size() == parameters_.size());
+  for (size_t i = 0; i < inputLayers_.size(); i++) {
+    size_t height, width;
+    height = filterPixels_[i] * filterChannels_[i];
+    width = numFilters_[i];
+
+    // create a new weight
+    CHECK_EQ(parameters_[i]->getSize(), width * height);
+    Weight* w = new Weight(height, width, parameters_[i]);
+    weights_.emplace_back(w);
+  }
+
+  /* initialize the biases_ */
+  if (biasParameter_.get() != NULL) {
+    if (sharedBiases_) {
+      CHECK_EQ((size_t)channel_, biasParameter_->getSize());
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(channel_, 1, biasParameter_));
+    } else {
+      biases_ =
+          std::unique_ptr<Weight>(new Weight(getSize(), 1, biasParameter_));
+    }
+  }
+
+  // default caffe model
+  caffeMode_ = true;
+
+  return true;
+}
+
+}  // namespace paddle
--- a/paddle/gserver/layers/ConvTransBaseLayer.h
+++ b/paddle/gserver/layers/ConvTransBaseLayer.h
@ -0,0 +1,112 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#pragma once
+
+#include "Layer.h"
+namespace paddle {
+
+/**
+ * @brief A Base Convolution Layer, which convolves the input image
+ * with learned filters and (optionally) adds biases.
+ */
+
+class ConvTransBaseLayer : public Layer {
+protected:
+  typedef std::vector<int> IntV;
+
+  /// The number of channel in image (the output of the deconv layer).
+  int channel_;
+  /// The x dimension of the padding.
+  IntV padding_;
+  /// The y dimension of the padding.
+  IntV paddingY_;
+  /// The x dimension of the stride.
+  IntV stride_;
+  /// The y dimension of the stride.
+  IntV strideY_;
+  /// The x dimension of a filter kernel.
+  IntV filterSize_;
+  /// The y dimension of a filter kernel.
+  IntV filterSizeY_;
+  /// The number of filters(i.e. the number channels of the deconv layer input)
+  IntV numFilters_;
+  /// The spatial dimensions of input feature map.
+  IntV imgSize_;
+  /// The total pixel size of input feature map.
+  /// imgPixels_ = imgSizeX_ * imgSizeY_.
+  IntV imgPixels_;
+  /// filterPixels_ = filterSizeX_ * filterSizeY_.
+  IntV filterPixels_;
+  /// filterChannels_ = channels_/groups_.
+  IntV filterChannels_;
+  /// The spatial dimensions of output feature map.
+  IntV outputX_;
+  /// The spatial dimensions of output feature map.
+  IntV outputs_;
+  /// Group size, refer to grouped convolution in
+  /// Alex Krizhevsky's paper: when group=2, the first half of the
+  /// filters are only connected to the first half of the input channels,
+  /// and the second half only connected to the second half.
+  IntV groups_;
+  /// Whether the bias is shared for feature in each channel.
+  bool sharedBiases_;
+
+  /// shape of weight: (numChannels * filterPixels_, numFilters)
+  WeightList weights_;
+  /// If shared_biases is false shape of bias: (numFilters_, 1)
+  /// If shared_biases is ture shape of bias:
+  /// (numFilters_ * outputX * outputY, 1)
+  std::unique_ptr<Weight> biases_;
+
+  /// True by default. The only difference is the calculation
+  /// of output size.
+  bool caffeMode_;
+
+public:
+  explicit ConvTransBaseLayer(const LayerConfig& config) : Layer(config) {}
+
+  virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+
+  Weight& getWeight(int idx) { return *weights_[idx]; }
+
+  /**
+   * Calculate image size based on caffeMode_ from outputSize.
+   * - input(+padding): 0123456789
+   * - imageSize(+padding) = 10;
+   * - filterSize = 3;
+   * - stride = 2;
+   * - caffeMode_ is true:
+       - output: (012), (234), (456), (678)
+       - outputSize = 4;
+   * - caffeMode_ is false:
+   *   - output: (012), (234), (456), (678), (9)
+   *   - outputSize = 5;
+   */
+
+  int imageSize(int outputSize, int filterSize, int padding, int stride) {
+    int imageSize;
+    if (!caffeMode_) {
+     imageSize =
+         (outputSize - 1) * stride + filterSize - 2 * padding - stride + 1;
+    } else {
+     imageSize = (outputSize - 1) * stride + filterSize - 2 * padding;
+    }
+    CHECK_GE(imageSize, 1);
+    return imageSize;
+  }
+};
+
+}  // namespace paddle
--- a/paddle/gserver/layers/ExpandConvTransLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp
--- a/paddle/gserver/layers/ExpandConvTransLayer.h
+++ b/paddle/gserver/layers/ExpandConvTransLayer.h
@ -0,0 +1,106 @@
+/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+
+#pragma once
+
+#include "ConvTransBaseLayer.h"
+#include "paddle/math/Matrix.h"
+#include <vector>
+
+namespace paddle {
+
+/**
+ * @brief A subclass of convolution layer.
+ * This layer expands input and use matrix multiplication to
+ * calculate convolution operation.
+ *
+ * The config file api is img_conv_layer.
+ */
+class ExpandConvTransLayer : public ConvTransBaseLayer {
+protected:
+  /// For expand convolution.
+  /// subM_ = numFilters_ / groups_.
+  IntV subM_;
+  /// subN_ = outputH_ * outputW_.
+  IntV subN_;
+  /// subK_ = channels_ * filterPixels_ * groups_.
+  IntV subK_;
+  /// The spatial dimensions of height of input feature map.
+  IntV imgSizeH_;
+  /// The spatial dimensions of width of input feature map.
+  IntV imgSizeW_;
+  /// The spatial dimensions of height of output feature map.
+  IntV outputH_;
+  /// The spatial dimensions of width of output feature map.
+  IntV outputW_;
+  /// Expand one sample at a time. shape:
+  /// (numChannels * filterPixels_, outputSizeH * outputSizeW)
+  MatrixPtr expandInput_;
+  /// The transpose of output, which is an auxiliary matrix.
+  MatrixPtr transOutValue_;
+
+public:
+  explicit ExpandConvTransLayer(const LayerConfig& config) :
+      ConvTransBaseLayer(config) {}
+
+  ~ExpandConvTransLayer() {}
+
+  bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
+
+  size_t getSize();
+
+  /**
+   * Create or resize expandInput_.
+   */
+  void resetExpandInput(size_t height, size_t width);
+
+  /**
+   * Create or resize transOutValue_.
+   */
+  void resetConvOutput(size_t batchSize, int inIdx);
+
+  /**
+   * Expand one input sample.
+   */
+  void expandOneFrame(MatrixPtr image, size_t startIdx, int inIdx);
+
+  /**
+   * Expand one output image and perform matrix multiplication.
+   */
+  void expandBackOnce(MatrixPtr image, int inIdx, int startIdx);
+
+  /**
+   * Perform matrix multiplication on one output and then shrink.
+   */
+  void shrinkFwd(MatrixPtr output, int inpIdx);
+
+  /**
+   * Add shared bias.
+   */
+  void addSharedBias();
+
+  /**
+   * Add unshared bias.
+   */
+  void addUnsharedBias();
+  void forward(PassType passType);
+  void bpropSharedBias(MatrixPtr biases, MatrixPtr v);
+  void bpropBiases(MatrixPtr v);
+  void backward(const UpdateCallback& callback);
+  void bpropWeights(MatrixPtr v, int inpIdx);
+  void bpropActs(MatrixPtr v, int inpIdx);
+};
+
+}  // namespace paddle
--- a/paddle/gserver/tests/test_LayerGrad.cpp
+++ b/paddle/gserver/tests/test_LayerGrad.cpp
@ -312,6 +312,49 @@ TEST(Layer, convLayer) {
 #endif
 }

+
+void testConvTransLayer(const string& type, bool trans, bool useGpu) {
+  TestConfig config;
+  config.biasSize = 3;
+  config.layerConfig.set_type(type);
+  config.layerConfig.set_num_filters(3);
+  config.layerConfig.set_partial_sum(1);
+  config.layerConfig.set_shared_biases(true);
+
+  config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 288});
+  LayerInputConfig* input = config.layerConfig.add_inputs();
+  ConvConfig* conv = input->mutable_conv_conf();
+  conv->set_filter_size(2);
+  conv->set_filter_size_y(3);
+  conv->set_channels(16);
+  conv->set_padding(0);
+  conv->set_padding_y(1);
+  conv->set_stride(2);
+  conv->set_stride_y(2);
+  conv->set_groups(1);
+  conv->set_filter_channels(3 / conv->groups());
+  conv->set_img_size(16);
+  conv->set_output_x(
+      (2 * conv->padding() + conv->img_size() - conv->filter_size()) /
+          ((float)conv->stride()) +
+      1.5);
+
+  config.layerConfig.set_size(conv->img_size() * conv->img_size() *
+                              config.layerConfig.num_filters());
+
+  testLayerGrad(config, "convTrans", 100, trans, useGpu);
+}
+
+TEST(Layer, convTransLayer) {
+  testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ false);
+/*
+#ifndef PADDLE_ONLY_CPU
+  testConvLayer("exconv",  trans=  false,  useGpu=  true);
+  testConvLayer("cudnn_conv",  trans=  false,  useGpu=  true);
+#endif
+*/
+}
+
 TEST(Layer, blockExpandLayer) {
  TestConfig config;
  config.biasSize = 0;