@ -20,6 +20,8 @@ limitations under the License. */
# include "paddle/math/SparseRowMatrix.h"
# include "paddle/math/SparseRowMatrix.h"
# include "paddle/utils/Thread.h"
# include "paddle/utils/Thread.h"
P_DECLARE_int32 ( trainer_count ) ;
namespace paddle {
namespace paddle {
SgdThreadUpdater : : SgdThreadUpdater ( const OptimizationConfig & optConfig )
SgdThreadUpdater : : SgdThreadUpdater ( const OptimizationConfig & optConfig )
@ -48,6 +50,13 @@ void SgdThreadUpdater::init(std::vector<ParameterPtr>& parameters) {
false /*inPserver*/ ) ) ;
false /*inPserver*/ ) ) ;
size_t numRows = para - > isGradSparseUpdate ( ) ? para - > getConfig ( ) . dims ( 0 ) : 0 ;
size_t numRows = para - > isGradSparseUpdate ( ) ? para - > getConfig ( ) . dims ( 0 ) : 0 ;
optimizers_ [ pid ] - > init ( numRows , & para - > getConfig ( ) ) ;
optimizers_ [ pid ] - > init ( numRows , & para - > getConfig ( ) ) ;
if ( para - > isGradSparseUpdate ( ) & & FLAGS_trainer_count = = 1 ) {
// For trainer_count=1, the gradient machine is NeuralNetwork, which does
// not create parameter buf for PARAMETER_GRADIENT for sparse update in
// Parameter::enableType(). But gradient parameter buf is still used
// in SgdThreadUpdater. We need to explicitly create it.
para - > enableBufType ( PARAMETER_GRADIENT ) ;
}
}
}
}
}
@ -211,7 +220,7 @@ void SgdThreadUpdater::threadUpdateSparse(
// From MultiGradientMachine
// From MultiGradientMachine
SparseRowIdsCpuMatrix * mainMat = dynamic_cast < SparseRowIdsCpuMatrix * > (
SparseRowIdsCpuMatrix * mainMat = dynamic_cast < SparseRowIdsCpuMatrix * > (
para - > getMat ( PARAMETER_GRADIENT ) . get ( ) ) ;
para - > getMat ( PARAMETER_GRADIENT ) . get ( ) ) ;
const std : : vector < uint32_t > & sparseIds = mainMat - > getIds ( tid ) ;
std : : vector < uint32_t > & sparseIds = mainMat - > getIds ( tid ) ;
for ( auto id : sparseIds ) {
for ( auto id : sparseIds ) {
// setup sub bufs
// setup sub bufs
@ -221,6 +230,7 @@ void SgdThreadUpdater::threadUpdateSparse(
optimizer - > update ( vecs , para - > getConfig ( ) , id ) ;
optimizer - > update ( vecs , para - > getConfig ( ) , id ) ;
vecs [ PARAMETER_GRADIENT ] - > zeroMem ( ) ;
vecs [ PARAMETER_GRADIENT ] - > zeroMem ( ) ;
}
}
sparseIds . clear ( ) ;
} else if ( dynamic_cast < SparseRowCpuMatrix * > (
} else if ( dynamic_cast < SparseRowCpuMatrix * > (
para - > getMat ( PARAMETER_GRADIENT ) . get ( ) ) ) {
para - > getMat ( PARAMETER_GRADIENT ) . get ( ) ) ) {
// From NeuralNetwork
// From NeuralNetwork
@ -246,6 +256,10 @@ void SgdThreadUpdater::threadUpdateSparse(
optimizer - > update ( vecs , para - > getConfig ( ) , id ) ;
optimizer - > update ( vecs , para - > getConfig ( ) , id ) ;
vecs [ PARAMETER_GRADIENT ] - > zeroMem ( ) ;
vecs [ PARAMETER_GRADIENT ] - > zeroMem ( ) ;
}
}
// For numThreads > 1, MultiGradientMachine is used, which goes
// to the above branch.
CHECK_EQ ( numThreads , 1 ) ;
mainMat - > clearIndices ( ) ;
} else {
} else {
auto & m = * para - > getMat ( PARAMETER_GRADIENT ) . get ( ) ;
auto & m = * para - > getMat ( PARAMETER_GRADIENT ) . get ( ) ;
LOG ( FATAL ) < < " Internal error: " < < para - > getName ( ) < < " "
LOG ( FATAL ) < < " Internal error: " < < para - > getName ( ) < < " "