ignore im2col if not necessary in conv 1 * 1

cblas_new
xzl 8 years ago
parent f70e807756
commit 5229df52a5

@ -109,6 +109,13 @@ protected:
return filter[filter.ndims() - 1]; return filter[filter.ndims() - 1];
} }
// determine whether im2col needs to be performed
inline bool isSkipIm2col(const TensorShape& filter) const {
return (getFilterHeight(filter) == 1 && getFilterWidth(filter) == 1 &&
strideH() == 1 && strideW() == 1 && paddingH() == 0 &&
paddingW() == 0);
}
std::vector<size_t> strides_; std::vector<size_t> strides_;
std::vector<size_t> paddings_; std::vector<size_t> paddings_;

@ -66,16 +66,23 @@ public:
real* inputData = inputs[0].data<real>(); real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>(); real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>(); real* outputData = outputs[0].data<real>();
bool skipIm2col = isSkipIm2col(filter);
TensorShape imShape = TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth}); TensorShape({inputChannels / groups_, inputHeight, inputWidth});
TensorShape colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements()); TensorShape colShape;
real* colData = reinterpret_cast<real*>(memory_->getBuf()); real *colBuffer, *colData = NULL;
if (!skipIm2col) {
colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements());
colData = reinterpret_cast<real*>(memory_->getBuf());
}
Im2ColFunctor<kCFO, Device, real> im2col; Im2ColFunctor<kCFO, Device, real> im2col;
GemmFunctor<Device, real> gemm; GemmFunctor<Device, real> gemm;
@ -86,15 +93,18 @@ public:
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) { for (size_t g = 0; g < groups_; g++) {
im2col(inputData + g * inputOffset, colBuffer = inputData + g * inputOffset;
imShape, if (!skipIm2col) {
colData, im2col(inputData + g * inputOffset,
colShape, imShape,
strideH(), colData,
strideW(), colShape,
paddingH(), strideH(),
paddingW()); strideW(),
paddingH(),
paddingW());
colBuffer = colData;
}
int M = outputChannels / groups_; int M = outputChannels / groups_;
int N = outputHeight * outputWidth; int N = outputHeight * outputWidth;
int K = inputChannels / groups_ * filterHeight * filterWidth; int K = inputChannels / groups_ * filterHeight * filterWidth;
@ -106,7 +116,7 @@ public:
1.0f, 1.0f,
filterData + g * filterOffset, filterData + g * filterOffset,
K, K,
colData, colBuffer,
N, N,
beta, beta,
outputData + g * outputOffset, outputData + g * outputOffset,
@ -159,19 +169,27 @@ public:
real* outputGrad = inputs[0].data<real>(); real* outputGrad = inputs[0].data<real>();
real* filterData = inputs[1].data<real>(); real* filterData = inputs[1].data<real>();
real* inputGrad = outputs[0].data<real>(); real* inputGrad = outputs[0].data<real>();
bool skipIm2col = isSkipIm2col(filter);
TensorShape imShape = TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth}); TensorShape({inputChannels / groups_, inputHeight, inputWidth});
TensorShape colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements()); TensorShape colShape;
real* colData = reinterpret_cast<real*>(memory_->getBuf()); real *colBuffer, *colData = NULL;
if (!skipIm2col) {
colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements());
colData = reinterpret_cast<real*>(memory_->getBuf());
}
Col2ImFunctor<kCFO, Device, real> col2im; Col2ImFunctor<kCFO, Device, real> col2im;
GemmFunctor<Device, real> gemm; GemmFunctor<Device, real> gemm;
size_t inputOffset = imShape.getElements(); size_t inputOffset = imShape.getElements();
size_t outputOffset = size_t outputOffset =
(outputChannels / groups_) * outputHeight * outputWidth; (outputChannels / groups_) * outputHeight * outputWidth;
@ -182,6 +200,12 @@ public:
int K = outputChannels / groups_; int K = outputChannels / groups_;
int N = outputHeight * outputWidth; int N = outputHeight * outputWidth;
int M = inputChannels / groups_ * filterHeight * filterWidth; int M = inputChannels / groups_ * filterHeight * filterWidth;
colBuffer = colData;
real scale = 0.0f;
if (skipIm2col) {
colBuffer = inputGrad + g * inputOffset;
scale = 1.0f;
}
gemm(CblasTrans, gemm(CblasTrans,
CblasNoTrans, CblasNoTrans,
M, M,
@ -192,17 +216,19 @@ public:
M, M,
outputGrad + g * outputOffset, outputGrad + g * outputOffset,
N, N,
0.0f, scale,
colData, colBuffer,
N); N);
col2im(inputGrad + g * inputOffset, if (!skipIm2col) {
imShape, col2im(inputGrad + g * inputOffset,
colData, imShape,
colShape, colBuffer,
strideH(), colShape,
strideW(), strideH(),
paddingH(), strideW(),
paddingW()); paddingH(),
paddingW());
}
} }
inputGrad += inputChannels * inputHeight * inputWidth; inputGrad += inputChannels * inputHeight * inputWidth;
outputGrad += outputChannels * outputHeight * outputWidth; outputGrad += outputChannels * outputHeight * outputWidth;
@ -255,16 +281,23 @@ public:
real* outputGrad = inputs[0].data<real>(); real* outputGrad = inputs[0].data<real>();
real* inputData = inputs[1].data<real>(); real* inputData = inputs[1].data<real>();
real* filterGrad = outputs[0].data<real>(); real* filterGrad = outputs[0].data<real>();
bool skipIm2col = isSkipIm2col(filter);
TensorShape imShape = TensorShape imShape =
TensorShape({inputChannels / groups_, inputHeight, inputWidth}); TensorShape({inputChannels / groups_, inputHeight, inputWidth});
TensorShape colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements()); TensorShape colShape;
real* colData = reinterpret_cast<real*>(memory_->getBuf()); real *colBuffer, *colData = NULL;
if (!skipIm2col) {
colShape = TensorShape({inputChannels / groups_,
filterHeight,
filterWidth,
outputHeight,
outputWidth});
resizeBuffer<Device>(colShape.getElements());
colData = reinterpret_cast<real*>(memory_->getBuf());
}
Im2ColFunctor<kCFO, Device, real> im2col; Im2ColFunctor<kCFO, Device, real> im2col;
GemmFunctor<Device, real> gemm; GemmFunctor<Device, real> gemm;
@ -274,15 +307,18 @@ public:
size_t filterOffset = filter.getElements() / groups_; size_t filterOffset = filter.getElements() / groups_;
for (size_t i = 0; i < batchSize; i++) { for (size_t i = 0; i < batchSize; i++) {
for (size_t g = 0; g < groups_; g++) { for (size_t g = 0; g < groups_; g++) {
im2col(inputData + g * inputOffset, colBuffer = inputData + g * inputOffset;
imShape, if (!skipIm2col) {
colData, im2col(inputData + g * inputOffset,
colShape, imShape,
strideH(), colData,
strideW(), colShape,
paddingH(), strideH(),
paddingW()); strideW(),
paddingH(),
paddingW());
colBuffer = colData;
}
int M = outputChannels / groups_; int M = outputChannels / groups_;
int K = outputHeight * outputWidth; int K = outputHeight * outputWidth;
int N = inputChannels / groups_ * filterHeight * filterWidth; int N = inputChannels / groups_ * filterHeight * filterWidth;
@ -294,7 +330,7 @@ public:
1.0f, 1.0f,
outputGrad + g * outputOffset, outputGrad + g * outputOffset,
K, K,
colData, colBuffer,
K, K,
i == 0 ? beta : 1.0f, i == 0 ? beta : 1.0f,
filterGrad + g * filterOffset, filterGrad + g * filterOffset,

Loading…
Cancel
Save