|
|
|
@ -24,7 +24,7 @@ __global__
|
|
|
|
|
void ConvolutionDepthwiseForward(const int nthreads,
|
|
|
|
|
const T* const inputData, const T* const filterData,
|
|
|
|
|
const int batchSize, const int outputChannels, const int outputHeight,
|
|
|
|
|
const int outputWidth, const int inputHeight, const int inputWidth,
|
|
|
|
|
const int outputWidth,const int inputChannels, const int inputHeight, const int inputWidth,
|
|
|
|
|
const int filterHeight, const int filterWidth, const int strideH,
|
|
|
|
|
const int strideW, const int paddingH, const int paddingW,
|
|
|
|
|
T* const outputData) {
|
|
|
|
@ -49,7 +49,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
|
|
|
|
|
for (int kw = 0; kw < filterWidth; ++kw) {
|
|
|
|
|
const int h_in = -paddingH + h * strideH + kh;
|
|
|
|
|
const int w_in = -paddingW + w * strideW + kw;
|
|
|
|
|
const int offset = ((n * outputChannels + c) * inputHeight + h_in)
|
|
|
|
|
const int offset = ((n * inputChannels + c) * inputHeight + h_in)
|
|
|
|
|
* inputWidth + w_in;
|
|
|
|
|
value += (*weight) * inputData[offset];
|
|
|
|
|
++weight;
|
|
|
|
@ -80,15 +80,15 @@ __global__
|
|
|
|
|
void ConvolutionDepthwiseInputBackward(const int nthreads,
|
|
|
|
|
const T* const top_diff, const T* const weight_data,
|
|
|
|
|
const int num, const int outputChannels, const int outputHeight,
|
|
|
|
|
const int outputWidth, const int inputHeight, const int inputWidth,
|
|
|
|
|
const int outputWidth,const int inputChannels, const int inputHeight, const int inputWidth,
|
|
|
|
|
const int filterHeight, const int filterWidth, const int strideH,
|
|
|
|
|
const int strideW, const int paddingH, const int paddingW,
|
|
|
|
|
T* const bottom_diff) {
|
|
|
|
|
int index =
|
|
|
|
|
(blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
|
|
|
|
|
if(index < nthreads) {
|
|
|
|
|
const int n = index / outputChannels / inputHeight / inputWidth;
|
|
|
|
|
const int c = (index / inputHeight / inputWidth) % outputChannels;
|
|
|
|
|
const int n = index / inputChannels / inputHeight / inputWidth;
|
|
|
|
|
const int c = (index / inputHeight / inputWidth) % inputChannels;
|
|
|
|
|
const int h = (index / inputWidth) % inputHeight;
|
|
|
|
|
const int w = index % inputWidth;
|
|
|
|
|
const T* weight = weight_data + c * filterHeight * filterWidth;
|
|
|
|
@ -121,7 +121,7 @@ __global__
|
|
|
|
|
void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
|
|
|
|
|
const T* const top_diff, const T* const inputData,
|
|
|
|
|
const int num, const int outputChannels, const int outputHeight,
|
|
|
|
|
const int outputWidth, const int inputHeight, const int inputWidth,
|
|
|
|
|
const int outputWidth, const int inputChannels, const int inputHeight, const int inputWidth,
|
|
|
|
|
const int filterHeight, const int filterWidth, const int strideH,
|
|
|
|
|
const int strideW, const int paddingH, const int paddingW,
|
|
|
|
|
T* const buffer_data) {
|
|
|
|
@ -141,7 +141,7 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
|
|
|
|
|
const int n = num_i;
|
|
|
|
|
const int top_offset = ((n * outputChannels + c) * outputHeight + h)
|
|
|
|
|
* outputWidth + w;
|
|
|
|
|
const int bottom_offset = ((n * outputChannels + c) * inputHeight + h_in)
|
|
|
|
|
const int bottom_offset = ((n * inputChannels + c) * inputHeight + h_in)
|
|
|
|
|
* inputWidth + w_in;
|
|
|
|
|
buffer_data[index] = top_diff[top_offset] * inputData[bottom_offset];
|
|
|
|
|
} else {
|
|
|
|
@ -159,6 +159,7 @@ public:
|
|
|
|
|
int outputChannels,
|
|
|
|
|
int outputHeight,
|
|
|
|
|
int outputWidth,
|
|
|
|
|
int inputChannels,
|
|
|
|
|
int inputHeight,
|
|
|
|
|
int inputWidth,
|
|
|
|
|
int filterHeight,
|
|
|
|
@ -186,6 +187,7 @@ public:
|
|
|
|
|
outputChannels,
|
|
|
|
|
outputHeight,
|
|
|
|
|
outputWidth,
|
|
|
|
|
inputChannels,
|
|
|
|
|
inputHeight,
|
|
|
|
|
inputWidth,
|
|
|
|
|
filterHeight,
|
|
|
|
@ -237,6 +239,7 @@ public:
|
|
|
|
|
outputChannels,
|
|
|
|
|
outputHeight,
|
|
|
|
|
outputWidth,
|
|
|
|
|
inputChannels,
|
|
|
|
|
inputHeight,
|
|
|
|
|
inputWidth,
|
|
|
|
|
filterHeight,
|
|
|
|
@ -290,6 +293,7 @@ public:
|
|
|
|
|
outputChannels,
|
|
|
|
|
outputHeight,
|
|
|
|
|
outputWidth,
|
|
|
|
|
inputChannels,
|
|
|
|
|
inputHeight,
|
|
|
|
|
inputWidth,
|
|
|
|
|
filterHeight,
|
|
|
|
|