|
|
@ -25,40 +25,32 @@ enum TestType {
|
|
|
|
kBackwardFilterTest = 2,
|
|
|
|
kBackwardFilterTest = 2,
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
enum LayerType {
|
|
|
|
|
|
|
|
convolutionType = 0,
|
|
|
|
|
|
|
|
depthwiseConvolutionType = 1,
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
template <DeviceType DType1, DeviceType DType2>
|
|
|
|
template <DeviceType DType1, DeviceType DType2>
|
|
|
|
class ConvolutionTest {
|
|
|
|
class ConvolutionTest {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ConvolutionTest(const std::string& conv1,
|
|
|
|
ConvolutionTest(const std::string& conv1,
|
|
|
|
const std::string& conv2,
|
|
|
|
const std::string& conv2,
|
|
|
|
LayerType layerType,
|
|
|
|
|
|
|
|
TestType type,
|
|
|
|
TestType type,
|
|
|
|
|
|
|
|
bool useGroups = true,
|
|
|
|
std::string algo = "auto") {
|
|
|
|
std::string algo = "auto") {
|
|
|
|
for (size_t batchSize : {1, 32}) {
|
|
|
|
for (size_t batchSize : {1, 32}) {
|
|
|
|
for (size_t inputSize : {7, 14, 54}) {
|
|
|
|
for (size_t inputSize : {7, 14, 54}) {
|
|
|
|
for (size_t filterSize : {1, 3, 5}) {
|
|
|
|
for (size_t filterSize : {1, 3, 5}) {
|
|
|
|
for (size_t inputChannels : {3, 64}) {
|
|
|
|
for (size_t inputChannels : {3, 64}) {
|
|
|
|
for (size_t outputChannels : {3, 64, 128}) {
|
|
|
|
for (size_t outputChannels : {3, 64, 128}) {
|
|
|
|
|
|
|
|
for (size_t groups : {1, 3, 64}) {
|
|
|
|
if (inputChannels > outputChannels) break;
|
|
|
|
if (inputChannels > outputChannels) break;
|
|
|
|
if (layerType == depthwiseConvolutionType &&
|
|
|
|
if (groups != 1 &&
|
|
|
|
outputChannels % inputChannels != 0)
|
|
|
|
(inputChannels != groups || outputChannels % groups != 0))
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!useGroups) groups = 1;
|
|
|
|
size_t groups = 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (layerType == depthwiseConvolutionType) {
|
|
|
|
|
|
|
|
groups = inputChannels;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (size_t stride : {1, 2}) {
|
|
|
|
for (size_t stride : {1, 2}) {
|
|
|
|
for (size_t padding : {0, 1}) {
|
|
|
|
for (size_t padding : {0, 1}) {
|
|
|
|
if (padding >= filterSize) break;
|
|
|
|
if (padding >= filterSize) break;
|
|
|
|
size_t outputSize =
|
|
|
|
size_t outputSize =
|
|
|
|
(inputSize - filterSize + 2 * padding + stride) / stride;
|
|
|
|
(inputSize - filterSize + 2 * padding + stride) /
|
|
|
|
|
|
|
|
stride;
|
|
|
|
VLOG(3) << " batchSize=" << batchSize
|
|
|
|
VLOG(3) << " batchSize=" << batchSize
|
|
|
|
<< " inputChannels=" << inputChannels
|
|
|
|
<< " inputChannels=" << inputChannels
|
|
|
|
<< " inputHeight=" << inputSize
|
|
|
|
<< " inputHeight=" << inputSize
|
|
|
@ -85,10 +77,10 @@ public:
|
|
|
|
batchSize, inputChannels, inputSize, inputSize};
|
|
|
|
batchSize, inputChannels, inputSize, inputSize};
|
|
|
|
|
|
|
|
|
|
|
|
TensorShape filter;
|
|
|
|
TensorShape filter;
|
|
|
|
if (layerType == depthwiseConvolutionType)
|
|
|
|
if (groups > 1)
|
|
|
|
filter = TensorShape({groups,
|
|
|
|
filter = TensorShape({groups,
|
|
|
|
outputChannels / groups,
|
|
|
|
outputChannels / groups,
|
|
|
|
(size_t)1,
|
|
|
|
inputChannels / groups,
|
|
|
|
filterSize,
|
|
|
|
filterSize,
|
|
|
|
filterSize});
|
|
|
|
filterSize});
|
|
|
|
else
|
|
|
|
else
|
|
|
@ -107,7 +99,8 @@ public:
|
|
|
|
} else if (type == kBackwardInputTest) {
|
|
|
|
} else if (type == kBackwardInputTest) {
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
|
|
|
|
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
|
|
|
|
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input),
|
|
|
|
|
|
|
|
ADD_TO);
|
|
|
|
test.run();
|
|
|
|
test.run();
|
|
|
|
} else if (type == kBackwardFilterTest) {
|
|
|
|
} else if (type == kBackwardFilterTest) {
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
@ -123,6 +116,7 @@ public:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// Mainly used to test cases where the height and width (input, filter)
|
|
|
|
// Mainly used to test cases where the height and width (input, filter)
|
|
|
@ -132,8 +126,8 @@ class ConvolutionTest2 {
|
|
|
|
public:
|
|
|
|
public:
|
|
|
|
ConvolutionTest2(const std::string& conv1,
|
|
|
|
ConvolutionTest2(const std::string& conv1,
|
|
|
|
const std::string& conv2,
|
|
|
|
const std::string& conv2,
|
|
|
|
LayerType layerType,
|
|
|
|
|
|
|
|
TestType type,
|
|
|
|
TestType type,
|
|
|
|
|
|
|
|
bool useGroups = true,
|
|
|
|
std::string algo = "auto") {
|
|
|
|
std::string algo = "auto") {
|
|
|
|
for (size_t batchSize : {16}) {
|
|
|
|
for (size_t batchSize : {16}) {
|
|
|
|
for (size_t inputHeight : {7, 31}) {
|
|
|
|
for (size_t inputHeight : {7, 31}) {
|
|
|
@ -142,15 +136,13 @@ public:
|
|
|
|
for (size_t filterWidth : {3, 7}) {
|
|
|
|
for (size_t filterWidth : {3, 7}) {
|
|
|
|
for (size_t inputChannels : {7}) {
|
|
|
|
for (size_t inputChannels : {7}) {
|
|
|
|
for (size_t outputChannels : {7, 32}) {
|
|
|
|
for (size_t outputChannels : {7, 32}) {
|
|
|
|
if (layerType == depthwiseConvolutionType &&
|
|
|
|
for (size_t groups : {1, 7}) {
|
|
|
|
outputChannels % inputChannels != 0)
|
|
|
|
if (!useGroups && groups != 1 &&
|
|
|
|
break;
|
|
|
|
(inputChannels != groups ||
|
|
|
|
|
|
|
|
outputChannels % groups != 0))
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (!useGroups) groups = 1;
|
|
|
|
|
|
|
|
|
|
|
|
size_t groups = 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (layerType == depthwiseConvolutionType) {
|
|
|
|
|
|
|
|
groups = inputChannels;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t stride = 1;
|
|
|
|
size_t stride = 1;
|
|
|
|
size_t padding = 0;
|
|
|
|
size_t padding = 0;
|
|
|
|
size_t outputHeight =
|
|
|
|
size_t outputHeight =
|
|
|
@ -185,10 +177,10 @@ public:
|
|
|
|
batchSize, inputChannels, inputHeight, inputWidth};
|
|
|
|
batchSize, inputChannels, inputHeight, inputWidth};
|
|
|
|
|
|
|
|
|
|
|
|
TensorShape filter;
|
|
|
|
TensorShape filter;
|
|
|
|
if (layerType == depthwiseConvolutionType)
|
|
|
|
if (groups > 1)
|
|
|
|
filter = TensorShape({groups,
|
|
|
|
filter = TensorShape({groups,
|
|
|
|
outputChannels / groups,
|
|
|
|
outputChannels / groups,
|
|
|
|
(size_t)1,
|
|
|
|
inputChannels / groups,
|
|
|
|
filterHeight,
|
|
|
|
filterHeight,
|
|
|
|
filterWidth});
|
|
|
|
filterWidth});
|
|
|
|
else
|
|
|
|
else
|
|
|
@ -207,7 +199,8 @@ public:
|
|
|
|
} else if (type == kBackwardInputTest) {
|
|
|
|
} else if (type == kBackwardInputTest) {
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
|
|
|
|
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input), ADD_TO);
|
|
|
|
test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input),
|
|
|
|
|
|
|
|
ADD_TO);
|
|
|
|
test.run();
|
|
|
|
test.run();
|
|
|
|
} else if (type == kBackwardFilterTest) {
|
|
|
|
} else if (type == kBackwardFilterTest) {
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
|
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
|
|
|
@ -223,109 +216,37 @@ public:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ======Start Convolution TEST======
|
|
|
|
|
|
|
|
TEST(Forward, GEMM) {
|
|
|
|
TEST(Forward, GEMM) {
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
|
|
|
|
"NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest);
|
|
|
|
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false);
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test2(
|
|
|
|
"NaiveConv-CPU", "GemmConv-CPU", convolutionType, kForwardTest);
|
|
|
|
"NaiveConv-CPU", "GemmConv-CPU", kForwardTest, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef PADDLE_ONLY_CPU
|
|
|
|
#ifndef PADDLE_ONLY_CPU
|
|
|
|
TEST(Forward, GEMM2) {
|
|
|
|
TEST(Forward, GEMM2) {
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
"GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest);
|
|
|
|
"GemmConv-CPU", "GemmConv-GPU", kForwardTest);
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
"GemmConv-CPU", "GemmConv-GPU", convolutionType, kForwardTest);
|
|
|
|
"GemmConv-CPU", "GemmConv-GPU", kForwardTest);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TEST(BackwardInput, GEMM) {
|
|
|
|
TEST(BackwardInput, GEMM) {
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
"GemmConvGradInput-CPU",
|
|
|
|
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest);
|
|
|
|
"GemmConvGradInput-GPU",
|
|
|
|
|
|
|
|
convolutionType,
|
|
|
|
|
|
|
|
kBackwardInputTest);
|
|
|
|
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
"GemmConvGradInput-CPU",
|
|
|
|
"GemmConvGradInput-CPU", "GemmConvGradInput-GPU", kBackwardInputTest);
|
|
|
|
"GemmConvGradInput-GPU",
|
|
|
|
|
|
|
|
convolutionType,
|
|
|
|
|
|
|
|
kBackwardInputTest);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TEST(BackwardFilter, GEMM) {
|
|
|
|
TEST(BackwardFilter, GEMM) {
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
"GemmConvGradFilter-CPU",
|
|
|
|
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest);
|
|
|
|
"GemmConvGradFilter-GPU",
|
|
|
|
|
|
|
|
convolutionType,
|
|
|
|
|
|
|
|
kBackwardFilterTest);
|
|
|
|
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
|
|
|
|
"GemmConvGradFilter-CPU",
|
|
|
|
|
|
|
|
"GemmConvGradFilter-GPU",
|
|
|
|
|
|
|
|
convolutionType,
|
|
|
|
|
|
|
|
kBackwardFilterTest);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// ======End Convolution TEST======
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// ======Start DepthwiseConvolution TEST======
|
|
|
|
|
|
|
|
// TODO(zhaolong) The depthwise convolution cpu test will be added when the cpu
|
|
|
|
|
|
|
|
// version of depthwiseConv is implemented.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef PADDLE_ONLY_CPU
|
|
|
|
|
|
|
|
TEST(DepthwiseConvForward, GEMM) {
|
|
|
|
|
|
|
|
ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
|
|
|
|
|
|
|
|
"GemmConv-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConv-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kForwardTest);
|
|
|
|
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
|
|
|
|
"GemmConv-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConv-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kForwardTest);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEST(DepthwiseConvForward, GEMM2) {
|
|
|
|
|
|
|
|
ConvolutionTest<DEVICE_TYPE_GPU, DEVICE_TYPE_GPU> test(
|
|
|
|
|
|
|
|
"DepthwiseConv-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConv-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kForwardTest);
|
|
|
|
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
|
|
|
|
"DepthwiseConv-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConv-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kForwardTest);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEST(DepthwiseConvBackwardInput, GEMM) {
|
|
|
|
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
|
|
|
|
"DepthwiseConvGradInput-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConvGradInput-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kBackwardInputTest);
|
|
|
|
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
|
|
|
|
"DepthwiseConvGradInput-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConvGradInput-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kBackwardInputTest);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEST(DepthwiseConvBackwardFilter, GEMM) {
|
|
|
|
|
|
|
|
ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
|
|
|
|
|
|
|
|
"DepthwiseConvGradFilter-GPU",
|
|
|
|
|
|
|
|
"DepthwiseConvGradFilter-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kBackwardFilterTest);
|
|
|
|
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
ConvolutionTest2<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test2(
|
|
|
|
"DepthwiseConvGradFilter-GPU",
|
|
|
|
"GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", kBackwardFilterTest);
|
|
|
|
"DepthwiseConvGradFilter-GPU",
|
|
|
|
|
|
|
|
depthwiseConvolutionType,
|
|
|
|
|
|
|
|
kBackwardFilterTest);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
// ======End DepthwiseConvolution TEST======
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace paddle
|
|
|
|
} // namespace paddle
|
|
|
|