add the mobilenet gpu acceleration, cpu is in the process

8 years ago · efae51ce24
parent eeb17c26fd
commit efae51ce24
5 changed files with 130 additions and 102 deletions
--- a/paddle/function/DepthwiseConvOp.cpp
+++ b/paddle/function/DepthwiseConvOp.cpp
@ -18,11 +18,6 @@ limitations under the License. */

 namespace paddle {

-/*
- * imData = [input_channels, input_height, input_width]
- * colData = [input_channels, filter_height, filter_width,
- *            output_height, output_width]
- */
 template <class T>
 class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> {
 public:
@ -33,6 +28,8 @@ public:
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputHeight,
+                  int inputWidth,
                  int filterHeight,
                  int filterWidth,
                  int strideH,
@ -40,7 +37,7 @@ public:
                  int paddingH,
                  int paddingW,
                  T* outputData) {
-    // NO_IMPLEMENTATION
+    // TODO(zhaolong) : cpu implementation of depthwise convolution
  }
 };

@ -118,8 +115,8 @@ public:

    size_t batchSize = input[0];
    // size_t inputChannels = input[1];
-    // size_t inputHeight = input[2];
-    // size_t inputWidth = input[3];
+    size_t inputHeight = input[2];
+    size_t inputWidth = input[3];
    size_t filterHeight = getFilterHeight(filter);
    size_t filterWidth = getFilterWidth(filter);
    size_t outputChannels = output[1];
@ -139,6 +136,8 @@ public:
                  outputChannels,
                  outputHeight,
                  outputWidth,
+                  inputHeight,
+                  inputWidth,
                  filterHeight,
                  filterWidth,
                  strideH(),
@ -233,8 +232,8 @@ public:
  }

  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    CHECK_EQ(numInputs_, inputs.size());
-    CHECK_EQ(numOutputs_, outputs.size());
+    // CHECK_EQ(numInputs_, inputs.size());
+    // CHECK_EQ(numOutputs_, outputs.size());
    check(inputs, outputs);
    const TensorShape& output = inputs[0].shape();
    const TensorShape& input = inputs[1].shape();
--- a/paddle/function/DepthwiseConvOp.h
+++ b/paddle/function/DepthwiseConvOp.h
@ -18,11 +18,6 @@ limitations under the License. */

 namespace paddle {

-/*
- * imData = [input_channels, input_height, input_width]
- * colData = [input_channels, filter_height, filter_width,
- *            output_height, output_width]
- */
 template <DeviceType Device, class T>
 class DepthwiseConvFunctor {
 public:
@ -33,6 +28,8 @@ public:
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputHeight,
+                  int intputWidth,
                  int filterHeight,
                  int filterWidth,
                  int strideH,
--- a/paddle/function/DepthwiseConvOpGpu.cu
+++ b/paddle/function/DepthwiseConvOpGpu.cu
--- a/paddle/gserver/layers/ConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ConvBaseLayer.cpp
@ -21,7 +21,8 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
                         const ParameterMap& parameterMap) {
  /* Initialize the basic parent class */
  Layer::init(layerMap, parameterMap);
-  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv")
+  isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv" ||
+               config_.type() == "depthwise_conv")
                  ? false
                  : true;

--- a/paddle/gserver/layers/DepthwiseConvLayer.cpp
+++ b/paddle/gserver/layers/DepthwiseConvLayer.cpp
@ -15,6 +15,7 @@ limitations under the License. */
 #include "DepthwiseConvLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
+#include <iostream>

 namespace paddle {

@ -79,6 +80,7 @@ void DepthwiseConvLayer::forward(PassType passType) {
  Layer::forward(passType);

  size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
+  // std::cout << "outputSize" << getOutputSize() <<std::endl;
  resetOutput(batchSize, getOutputSize());

  // Calculate the shape of the input, output, and filter.