modity the format

8 years ago · dbb658805e
parent 44927bf70a
commit dbb658805e
2 changed files with 61 additions and 64 deletions
--- a/paddle/function/DepthwiseConvOp.cpp
+++ b/paddle/function/DepthwiseConvOp.cpp
@ -99,8 +99,7 @@ public:
    ConvFunctionBase::init(config);
  }
-  virtual void check(const BufferArgs& inputs,
+  void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
                     const BufferArgs& outputs) override {
    const TensorShape& input = inputs[0].shape();
    const TensorShape& filter = inputs[1].shape();
    const TensorShape& output = outputs[0].shape();
@ -162,8 +161,7 @@ public:
    ConvFunctionBase::init(config);
  }
-  virtual void check(const BufferArgs& inputs,
+  void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
                     const BufferArgs& outputs) override {
    const TensorShape& output = inputs[0].shape();
    const TensorShape& filter = inputs[1].shape();
    const TensorShape& input = outputs[0].shape();
@ -225,8 +223,7 @@ public:
    ConvFunctionBase::init(config);
  }
-  virtual void check(const BufferArgs& inputs,
+  void check(const BufferArgs& inputs, const BufferArgs& outputs) override {
                     const BufferArgs& outputs) override {
    const TensorShape& output = inputs[0].shape();
    const TensorShape& input = inputs[1].shape();
    const TensorShape& filter = outputs[0].shape();
--- a/paddle/function/DepthwiseConvOpGpu.cu
+++ b/paddle/function/DepthwiseConvOpGpu.cu
@ -24,10 +24,10 @@ __global__
 void ConvolutionDepthwiseForward(const int nthreads,
    const T* const inputData, const T* const filterData,
    const int batchSize, const int outputChannels, const int outputHeight,
-    const int outputWidth,const int inputChannels, const int inputHeight, const int inputWidth,
+    const int outputWidth, const int inputChannels, const int inputHeight,
-    const int filterMultiplier, const int filterHeight, const int filterWidth, const int strideH,
+    const int inputWidth, const int filterMultiplier, const int filterHeight,
-    const int strideW, const int paddingH, const int paddingW,
+    const int filterWidth, const int strideH, const int strideW,
-    T* const outputData) {
+    const int paddingH, const int paddingW, T* const outputData) {
  int index =
    (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
@ -51,8 +51,8 @@ void ConvolutionDepthwiseForward(const int nthreads,
            for (int kw = 0; kw < filterWidth; ++kw) {
                const int h_in = -paddingH + h_out * strideH + kh;
                const int w_in = -paddingW + w_out * strideW + kw;
-                const int offset = ((batch * inputChannels + c_in) * inputHeight + h_in)
+                const int offset = ((batch * inputChannels + c_in)
-					* inputWidth + w_in;
+                    * inputHeight + h_in) * inputWidth + w_in;
                value += (*weight) * inputData[offset];
                ++weight;
            }
@ -64,8 +64,8 @@ void ConvolutionDepthwiseForward(const int nthreads,
                const int w_in = -paddingW + w_out * strideW + kw;
                if ((h_in >= 0) && (h_in < inputHeight)
                   && (w_in >= 0) && (w_in < inputWidth)) {
-                    const int offset = ((batch * inputChannels + c_in) * inputHeight + h_in)
+                    const int offset = ((batch * inputChannels + c_in)
-                        * inputWidth + w_in;
+                        * inputHeight + h_in) * inputWidth + w_in;
                    value += (*weight) * inputData[offset];
                }
                ++weight;
@ -82,10 +82,10 @@ __global__
 void ConvolutionDepthwiseInputBackward(const int nthreads,
    const T* const top_diff, const T* const weight_data,
    const int num, const int outputChannels, const int outputHeight,
-    const int outputWidth,const int inputChannels, const int inputHeight, const int inputWidth,
+    const int outputWidth, const int inputChannels, const int inputHeight,
-    const int filterMultiplier, const int filterHeight, const int filterWidth, const int strideH,
+    const int inputWidth, const int filterMultiplier, const int filterHeight,
-    const int strideW, const int paddingH, const int paddingW,
+    const int filterWidth, const int strideH, const int strideW,
-     T* const bottom_diff) {
+    const int paddingH, const int paddingW, T* const bottom_diff) {
  int index =
    (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
  if (index < nthreads) {
@ -95,8 +95,8 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
    const int w_in = index % inputWidth;
    const int c_out_start = c_in * filterMultiplier;
    T value = 0;
-	for(int c_out = c_out_start; c_out < c_out_start + filterMultiplier; c_out ++){
+    for (int c_out = c_out_start;
-	//weight bixu c_out
+         c_out < c_out_start + filterMultiplier; c_out ++) {
        const T* weight = weight_data + c_out * filterHeight * filterWidth;
        for (int kh = 0; kh < filterHeight; ++kh) {
            for (int kw = 0; kw < filterWidth; ++kw) {
@ -105,11 +105,12 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
                if (((h_out_s % strideH) == 0) && ((w_out_s % strideW) == 0)) {
                    const int h_out = h_out_s / strideH;
                    const int w_out = w_out_s / strideW;
-	                // TODO(zhaolong) : the 'if' affect the effectiveness, it needs to optimize
+                    // TODO(zhaolong) : the 'if' affect the effectiveness,
                    // it needs to optimize
                    if ((h_out >= 0) && (h_out < outputHeight)
                        && (w_out >= 0) && (w_out < outputWidth)) {
-                        const int offset = ((batch * outputChannels + c_out) * outputHeight + h_out)
+                        const int offset = ((batch * outputChannels + c_out)
-                           * outputWidth + w_out;
+                            * outputHeight + h_out) * outputWidth + w_out;
                        value += (*weight) * top_diff[offset];
                    }
                }
@ -127,10 +128,10 @@ __global__
 void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
    const T* const top_diff, const T* const inputData,
    const int num, const int outputChannels, const int outputHeight,
-    const int outputWidth, const int inputChannels, const int inputHeight, const int inputWidth,
+    const int outputWidth, const int inputChannels, const int inputHeight,
-    const int filterMultiplier, const int filterHeight, const int filterWidth, const int strideH,
+    const int inputWidth, const int filterMultiplier, const int filterHeight,
-    const int strideW, const int paddingH, const int paddingW,
+    const int filterWidth, const int strideH, const int strideW,
-    T* const buffer_data) {
+    const int paddingH, const int paddingW, T* const buffer_data) {
  int index =
    (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
  if (index < nthreads) {
@ -143,13 +144,14 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
    const int w_in = -paddingW + w_out * strideW + kw;
    if ((h_in >= 0) && (h_in < inputHeight)
          && (w_in >= 0) && (w_in < inputWidth)) {
-      const int c_out = index / filterHeight / filterWidth / outputHeight / outputWidth;
+      const int c_out = index /
            (filterHeight * filterWidth * outputHeight * outputWidth);
      const int c_in = c_out / filterMultiplier;
      const int batch = num_i;
-      const int top_offset = ((batch * outputChannels + c_out) * outputHeight + h_out)
+      const int top_offset = ((batch * outputChannels + c_out) *
-            * outputWidth + w_out;
+            outputHeight + h_out) * outputWidth + w_out;
-      const int bottom_offset = ((batch * inputChannels + c_in) * inputHeight + h_in)
+      const int bottom_offset = ((batch * inputChannels + c_in)
-            * inputWidth + w_in;
+            * inputHeight + h_in) * inputWidth + w_in;
      buffer_data[index] = top_diff[top_offset] * inputData[bottom_offset];
    } else {
      buffer_data[index] = 0;
@ -177,7 +179,6 @@ public:
            int paddingH,
            int paddingW,
            T* outputData){
    int outputSize = batchSize * outputChannels * outputHeight * outputWidth;
    size_t blocks = (outputSize + 1024 -1) / 1024;
@ -229,7 +230,6 @@ public:
            int paddingH,
            int paddingW,
            T* inputGrad){
    int inputSize = batchSize * inputChannels * inputHeight * inputWidth;
    size_t blocks = (inputSize + 1024 -1) / 1024;
@ -284,15 +284,16 @@ public:
                int paddingW,
                T* colData,
                T* filterGrad){
-
+        int colDataSize = outputChannels * filterHeight * filterWidth
-        int colDataSize = outputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
+            * outputHeight * outputWidth;
        size_t blocks = (colDataSize + 1024 -1) / 1024;
        size_t blockX = 512;
        size_t blockY = (blocks+512-1)/512;
        dim3 threads(1024, 1);
        dim3 grid(blockX, blockY);
-        BaseMatrix filterGradMatrix(outputChannels * filterHeight * filterWidth, 1, filterGrad, false, true);
+        BaseMatrix filterGradMatrix(outputChannels * filterHeight * filterWidth,
            1, filterGrad, false, true);
        for (int i = 0; i < batchSize; i++) {
            ConvolutionDepthwiseFilterBackward<T>
@ -315,8 +316,7 @@ public:
                    strideW,
                    paddingH,
                    paddingW,
-                    colData
+                    colData);
            );
            int K = outputHeight * outputWidth;
            int M = colDataSize / K;