You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
253 lines
6.9 KiB
253 lines
6.9 KiB
/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#pragma once
|
|
|
|
#include <vector>
|
|
#include "Layer.h"
|
|
#include "MKLDNNBase.h"
|
|
#include "mkldnn.hpp"
|
|
#include "paddle/math/MKLDNNMatrix.h"
|
|
|
|
DECLARE_bool(use_mkldnn);
|
|
|
|
namespace paddle {
|
|
|
|
class MKLDNNLayer;
|
|
typedef std::shared_ptr<MKLDNNLayer> MKLDNNLayerPtr;
|
|
|
|
/**
|
|
* @brief Base class of MKLDNNlayer.
|
|
*
|
|
*/
|
|
class MKLDNNLayer : public Layer {
|
|
protected:
|
|
// batch size
|
|
int bs_;
|
|
// input image channel, height and width
|
|
int ic_, ih_, iw_;
|
|
// output image channel, height and width
|
|
int oc_, oh_, ow_;
|
|
|
|
// backward also need reset after reset forward handle
|
|
bool needResetBwd_;
|
|
|
|
// mkldnn engine, stream and primivtives
|
|
mkldnn::engine engine_;
|
|
std::shared_ptr<MKLDNNStream> stream_;
|
|
std::shared_ptr<mkldnn::primitive> fwd_;
|
|
std::shared_ptr<mkldnn::primitive> bwdWgt_;
|
|
std::shared_ptr<mkldnn::primitive> bwdData_;
|
|
std::vector<mkldnn::primitive> pipelineFwd_;
|
|
std::vector<mkldnn::primitive> pipelineBwd_;
|
|
|
|
// MKLDNNMatrixPtr
|
|
MKLDNNMatrixPtr inVal_;
|
|
MKLDNNMatrixPtr inGrad_;
|
|
MKLDNNMatrixPtr outVal_;
|
|
MKLDNNMatrixPtr outGrad_;
|
|
MKLDNNMatrixPtr wgtVal_;
|
|
MKLDNNMatrixPtr wgtGrad_;
|
|
MKLDNNMatrixPtr biasVal_;
|
|
MKLDNNMatrixPtr biasGrad_;
|
|
|
|
public:
|
|
explicit MKLDNNLayer(const LayerConfig& config)
|
|
: Layer(config),
|
|
bs_(0),
|
|
ic_(0),
|
|
ih_(0),
|
|
iw_(0),
|
|
oc_(0),
|
|
oh_(0),
|
|
ow_(0),
|
|
needResetBwd_(true),
|
|
engine_(mkldnn::engine::cpu, 0),
|
|
stream_(nullptr),
|
|
fwd_(nullptr),
|
|
bwdWgt_(nullptr),
|
|
bwdData_(nullptr) {}
|
|
|
|
~MKLDNNLayer() {}
|
|
|
|
virtual bool init(const LayerMap& layerMap,
|
|
const ParameterMap& parameterMap) {
|
|
CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn."
|
|
<< "Please set WITH_MKLDNN=ON "
|
|
<< "and set use_mkldnn=True";
|
|
CHECK(!useGpu_) << "Do not support GPU yet";
|
|
|
|
// set device id before Layer::init
|
|
setDevice(MKLDNN_DEVICE);
|
|
// change param device to MKLDNN device
|
|
setParamsDevice(MKLDNN_DEVICE, parameterMap);
|
|
if (!Layer::init(layerMap, parameterMap)) {
|
|
return false;
|
|
}
|
|
|
|
stream_.reset(new MKLDNNStream());
|
|
engine_ = CPUEngine::Instance().getEngine();
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* convert weight from paddle format to mkldnn format
|
|
* weight_ will be override
|
|
*/
|
|
virtual void convertWeightsFromPaddle() {}
|
|
|
|
/**
|
|
* convert mkldnn weight to paddle format
|
|
* weight_ will be override
|
|
*/
|
|
virtual void convertWeightsToPaddle() {}
|
|
|
|
/**
|
|
* convert MKLDNN output to other device.
|
|
* only support CPU device yet
|
|
*/
|
|
virtual void convertOutputToOtherDevice() {}
|
|
|
|
/**
|
|
* print info about sizes
|
|
*/
|
|
virtual void printSizeInfo() {
|
|
VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_
|
|
<< ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_
|
|
<< ", oh: " << oh_ << ", ow: " << ow_;
|
|
}
|
|
|
|
/**
|
|
* Print the mkldnn memory format flow of value
|
|
*/
|
|
virtual void printValueFormatFlow() {
|
|
if (inVal_ && outVal_) {
|
|
VLOG(MKLDNN_FMTS) << "value format flow --- " << inVal_->getFormat()
|
|
<< " >>> " << outVal_->getFormat();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Print the mkldnn memory format flow of grad
|
|
*/
|
|
virtual void printGradFormatFlow() {
|
|
if (inGrad_ && outGrad_) {
|
|
VLOG(MKLDNN_FMTS) << "grad format flow --- " << inGrad_->getFormat()
|
|
<< " <<< " << outGrad_->getFormat();
|
|
}
|
|
}
|
|
|
|
protected:
|
|
/**
|
|
* copy image size and sequence info to other device
|
|
* @note: can not directly use Layer::copyOutputToOtherDevice since here only
|
|
* copy base info and do not copy data value
|
|
*/
|
|
void copyOutputInfoToOtherDevice() {
|
|
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
|
|
outputOtherDevice_[i].setFrameHeight(output_.getFrameHeight());
|
|
outputOtherDevice_[i].setFrameWidth(output_.getFrameWidth());
|
|
outputOtherDevice_[i].sequenceStartPositions =
|
|
output_.sequenceStartPositions;
|
|
outputOtherDevice_[i].subSequenceStartPositions =
|
|
output_.subSequenceStartPositions;
|
|
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If input only has MKLDNN device.
|
|
* Otherwise, only support the previous layer using CPU device.
|
|
*/
|
|
bool inputIsOnlyMKLDNN(int index = 0) {
|
|
int prevDevice = getPrev(index)->getDeviceId();
|
|
if (prevDevice == MKLDNN_DEVICE) {
|
|
return true;
|
|
} else {
|
|
// do not support GPU yet
|
|
CHECK_EQ(prevDevice, CPU_DEVICE) << "Only support CPU yet";
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If output only has MKLDNN device.
|
|
* Otherwise, other devices should only using CPU device.
|
|
*/
|
|
bool outputIsOnlyMKLDNN() {
|
|
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
|
|
CHECK_EQ(outputOtherDevice_[i].deviceId, CPU_DEVICE)
|
|
<< "Only support other device is CPU yet";
|
|
}
|
|
return outputOtherDevice_.size() == 0;
|
|
}
|
|
|
|
/**
|
|
* Sync input value data
|
|
*/
|
|
void syncInputValue() {
|
|
if (inputIsOnlyMKLDNN()) {
|
|
return;
|
|
}
|
|
real* iData = getInputValue(0, CPU_DEVICE)->getData();
|
|
// update input data
|
|
// since it might be changed if this is after data layer
|
|
inVal_->updateData(iData);
|
|
}
|
|
|
|
/**
|
|
* Sync output grad data
|
|
*/
|
|
void syncOutputGrad() {
|
|
if (outputIsOnlyMKLDNN()) {
|
|
return;
|
|
}
|
|
|
|
// update diff
|
|
real* oDiff = getOutput(CPU_DEVICE).grad->getData();
|
|
outGrad_->updateData(oDiff);
|
|
}
|
|
|
|
/**
|
|
* Set deviceId of this layer.
|
|
*/
|
|
void setDevice(int id) { deviceId_ = id; }
|
|
|
|
/**
|
|
* Set deviceId of the params used in this layer.
|
|
*/
|
|
void setParamsDevice(int id, const ParameterMap& parameterMap) {
|
|
for (auto& inputConfig : config_.inputs()) {
|
|
if (inputConfig.has_input_parameter_name()) {
|
|
ParameterPtr parameter;
|
|
std::string name = inputConfig.input_parameter_name();
|
|
CHECK(mapGet(name, parameterMap, ¶meter))
|
|
<< "Cannot find input parameter " << name << " for layer "
|
|
<< getName();
|
|
parameter->setDevice(id);
|
|
}
|
|
}
|
|
if (config_.has_bias_parameter_name()) {
|
|
ParameterPtr parameter;
|
|
std::string name = config_.bias_parameter_name();
|
|
CHECK(mapGet(name, parameterMap, ¶meter))
|
|
<< "Cannot find bias parameter " << name << " for layer "
|
|
<< getName();
|
|
parameter->setDevice(id);
|
|
}
|
|
}
|
|
};
|
|
|
|
} // namespace paddle
|