add the mobilenet gpu acceleration, cpu is in the process

cblas_new
xzl 8 years ago
parent eeb17c26fd
commit efae51ce24

@ -18,11 +18,6 @@ limitations under the License. */
namespace paddle {
/*
* imData = [input_channels, input_height, input_width]
* colData = [input_channels, filter_height, filter_width,
* output_height, output_width]
*/
template <class T>
class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> {
public:
@ -33,6 +28,8 @@ public:
int outputChannels,
int outputHeight,
int outputWidth,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideH,
@ -40,7 +37,7 @@ public:
int paddingH,
int paddingW,
T* outputData) {
// NO_IMPLEMENTATION
// TODO(zhaolong) : cpu implementation of depthwise convolution
}
};
@ -118,8 +115,8 @@ public:
size_t batchSize = input[0];
// size_t inputChannels = input[1];
// size_t inputHeight = input[2];
// size_t inputWidth = input[3];
size_t inputHeight = input[2];
size_t inputWidth = input[3];
size_t filterHeight = getFilterHeight(filter);
size_t filterWidth = getFilterWidth(filter);
size_t outputChannels = output[1];
@ -139,6 +136,8 @@ public:
outputChannels,
outputHeight,
outputWidth,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
@ -233,8 +232,8 @@ public:
}
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(numInputs_, inputs.size());
CHECK_EQ(numOutputs_, outputs.size());
// CHECK_EQ(numInputs_, inputs.size());
// CHECK_EQ(numOutputs_, outputs.size());
check(inputs, outputs);
const TensorShape& output = inputs[0].shape();
const TensorShape& input = inputs[1].shape();

@ -18,11 +18,6 @@ limitations under the License. */
namespace paddle {
/*
* imData = [input_channels, input_height, input_width]
* colData = [input_channels, filter_height, filter_width,
* output_height, output_width]
*/
template <DeviceType Device, class T>
class DepthwiseConvFunctor {
public:
@ -33,6 +28,8 @@ public:
int outputChannels,
int outputHeight,
int outputWidth,
int inputHeight,
int intputWidth,
int filterHeight,
int filterWidth,
int strideH,

File diff suppressed because it is too large Load Diff

@ -21,7 +21,8 @@ bool ConvBaseLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);
isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv")
isDeconv_ = (config_.type() == "exconv" || config_.type() == "cudnn_conv" ||
config_.type() == "depthwise_conv")
? false
: true;

@ -15,6 +15,7 @@ limitations under the License. */
#include "DepthwiseConvLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include <iostream>
namespace paddle {
@ -79,6 +80,7 @@ void DepthwiseConvLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
// std::cout << "outputSize" << getOutputSize() <<std::endl;
resetOutput(batchSize, getOutputSize());
// Calculate the shape of the input, output, and filter.

Loading…
Cancel
Save