@ -65,6 +65,17 @@ protected:
MKLDNNMatrixPtr biasVal_ ;
MKLDNNMatrixPtr biasGrad_ ;
// merge grad primitive
std : : shared_ptr < mkldnn : : primitive > mergeGrad_ ;
std : : vector < mkldnn : : primitive > pipelineMergeGrad_ ;
// tmp input argument to save input grad, only used to merge grad
Argument tmpInArg_ ;
// since mkldnn sum do not support different formats:
// can refer to https://github.com/01org/mkl-dnn/issues/134
// so need create reorder manually and save tmp MKLDNNMatrix
MKLDNNMatrixPtr tmpOutGrad_ ;
std : : shared_ptr < mkldnn : : primitive > tmpCvt_ ;
public :
explicit MKLDNNLayer ( const LayerConfig & config )
: Layer ( config ) ,
@ -99,6 +110,7 @@ public:
if ( ! Layer : : init ( layerMap , parameterMap ) ) {
return false ;
}
setOutputMap ( ) ;
checkCPUOutputsNumber ( ) ;
stream_ . reset ( new MKLDNNStream ( ) ) ;
@ -118,12 +130,9 @@ public:
VLOG ( MKLDNN_BASE ) < < getName ( ) < < " reset mkldnn forward " ;
// reset when input total sizes changed, not only the batchsize
inputElemenCnt_ = elemenCnt ;
pipelineFwd_ . clear ( ) ;
reshape ( bs_ , ic_ , ih_ , iw_ , oc_ , oh_ , ow_ ) ;
resetFwd ( pipelineFwd_ , inVal_ , wgtVal_ , biasVal_ , outVal_ ) ;
if ( outVal_ ) {
// change original output value to mkldnn output value
output_ . value = std : : dynamic_pointer_cast < Matrix > ( outVal_ ) ;
}
convertWeightsFromPaddle ( ) ;
needResetBwd_ = true ;
}
@ -144,9 +153,18 @@ public:
void backward ( const UpdateCallback & callback ) override {
if ( needResetBwd_ ) {
VLOG ( MKLDNN_BASE ) < < getName ( ) < < " reset mkldnn backward " ;
pipelineBwd_ . clear ( ) ;
pipelineMergeGrad_ . clear ( ) ;
mergeGrad_ = nullptr ;
resetBwd ( pipelineBwd_ , inGrad_ , wgtGrad_ , biasGrad_ , outGrad_ ) ;
needResetBwd_ = false ;
}
// merge grad must before backward activation
if ( mergeGrad_ ) {
REGISTER_TIMER_INFO ( " MergeBpGrad " , getName ( ) . c_str ( ) ) ;
stream_ - > submit ( pipelineMergeGrad_ ) ;
}
{
REGISTER_TIMER_INFO ( " BpActTimer " , getName ( ) . c_str ( ) ) ;
backwardActivation ( ) ;
@ -247,6 +265,76 @@ protected:
}
}
/**
* reset the output grad matrix from primitive desc .
* and reset the merge grad primitive if needed .
* note : when this layer has serval outputs ,
* it could not be mixed with cpu device ,
* since it can not get memory desc from cpu device .
*/
virtual void resetOutGrad ( MKLDNNMatrixPtr & out ,
mkldnn : : memory : : primitive_desc pd ) {
CHECK ( outputIsOnlyMKLDNN ( ) ) < < " do not support mixed with other device yet " ;
mergeGrad_ = nullptr ;
pipelineMergeGrad_ . clear ( ) ;
out = MKLDNNMatrix : : create ( output_ . grad , pd ) ;
if ( outputMap_ . size ( ) < = 1 ) {
return ;
}
std : : vector < double > scales ( outputMap_ . size ( ) , 1.0 ) ;
std : : vector < mkldnn : : memory : : primitive_desc > srcPDs ;
std : : vector < mkldnn : : primitive : : at > srcs ;
for ( auto it = outputMap_ . begin ( ) ; it ! = outputMap_ . end ( ) ; + + it ) {
MKLDNNMatrixPtr src =
std : : dynamic_pointer_cast < MKLDNNMatrix > ( it - > second - > grad ) ;
VLOG ( MKLDNN_BASE ) < < getName ( ) < < " has output grad " < < it - > first ;
CHECK ( src ) < < " should be MKLDNNMatrix " ;
auto srcDims = src - > getDims ( ) ;
auto dstDims = out - > getDims ( ) ;
CHECK_EQ ( srcDims . size ( ) , dstDims . size ( ) ) ;
for ( size_t i = 0 ; i < srcDims . size ( ) ; + + i ) {
CHECK_EQ ( srcDims [ i ] , dstDims [ i ] ) ;
}
srcPDs . push_back ( src - > getPrimitiveDesc ( ) ) ;
srcs . push_back ( * src ) ;
}
// TODO(TJ): remove me when mkldnn sum support different formats
for ( size_t i = 1 ; i < srcPDs . size ( ) ; + + i ) {
CHECK ( srcPDs [ 0 ] = = srcPDs [ i ] ) ;
}
tmpOutGrad_ = nullptr ;
tmpCvt_ = nullptr ;
if ( out - > getPrimitiveDesc ( ) ! = srcPDs [ 0 ] ) {
tmpOutGrad_ = MKLDNNMatrix : : create ( nullptr , srcPDs [ 0 ] ) ;
tmpCvt_ = MKLDNNMatrix : : createReorder ( tmpOutGrad_ , out ) ;
CHECK ( tmpCvt_ ) ;
pipelineMergeGrad_ . push_back ( * tmpCvt_ ) ;
} else {
tmpOutGrad_ = out ;
}
auto sumPD = mkldnn : : sum : : primitive_desc (
tmpOutGrad_ - > getMemoryDesc ( ) , scales , srcPDs ) ;
mergeGrad_ . reset ( new mkldnn : : sum ( sumPD , srcs , * tmpOutGrad_ ) ) ;
pipelineMergeGrad_ . insert ( pipelineMergeGrad_ . begin ( ) , * mergeGrad_ ) ;
}
/**
* reset input grad from primitive desc .
* this function is avaiable for input is only mkldnn
* or input do not care cpu device
*/
virtual void resetInGrad ( MKLDNNMatrixPtr & in ,
mkldnn : : memory : : primitive_desc pd ) {
LayerPtr & input = inputLayers_ [ 0 ] ;
const MatrixPtr & grad =
input - > getOutputMapSize ( ) > 1 ? nullptr : input - > getOutput ( ) . grad ;
in = MKLDNNMatrix : : create ( grad , pd ) ;
Argument & arg = input - > getOutput ( this - > getName ( ) ) ;
arg . grad = std : : dynamic_pointer_cast < Matrix > ( in ) ;
}
/**
* print info about sizes
*/
@ -334,6 +422,16 @@ private:
}
}
/**
* Set output map of prev layers .
*/
void setOutputMap ( ) {
outputMap_ . clear ( ) ;
for ( size_t i = 0 ; i < inputLayers_ . size ( ) ; + + i ) {
inputLayers_ [ i ] - > setOutput ( getName ( ) , & tmpInArg_ ) ;
}
}
/**
* Check the cpu device number of outputOtherDevice_ .
* should have only one at most .