add OMP SGD to speedup with CPUs

update-doc-pybind
tensor-tang 7 years ago
parent 330e9929ec
commit d6a27ade54

@ -92,6 +92,28 @@ public:
const T* getData() const { return this->data_; } const T* getData() const { return this->data_; }
T* getData() { return this->data_; } T* getData() { return this->data_; }
#ifdef PADDLE_USE_MKLDNN
/**
* sgd update with openmp to speedup
*/
void sgdUpdateWithOMP(VectorT& gradVec,
VectorT& momVec,
T learningRate,
T momentum,
T decayRate) {
size_t size = this->getSize();
T* val = this->getData();
T* grd = gradVec.getData();
T* mom = momVec.getData();
decayRate *= learningRate;
#pragma omp parallel for
for (size_t i = 0; i < size; ++i) {
mom[i] = momentum * mom[i] - learningRate * grd[i] - decayRate * val[i];
val[i] += mom[i];
}
}
#endif
virtual void zeroMem() = 0; virtual void zeroMem() = 0;
// set all elements to value // set all elements to value
virtual void reset(const T& value) = 0; virtual void reset(const T& value) = 0;

@ -37,6 +37,15 @@ public:
real torch_learningRate = optConfig_.learning_method() == "torch_momentum" real torch_learningRate = optConfig_.learning_method() == "torch_momentum"
? 1.0 - paraConfig.momentum() ? 1.0 - paraConfig.momentum()
: 1.0; : 1.0;
#ifdef PADDLE_USE_MKLDNN
vecs[PARAMETER_VALUE]->sgdUpdateWithOMP(
*vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_MOMENTUM],
learningRate_ * paraConfig.learning_rate() *
(firstTime_ ? 1.0 : torch_learningRate),
paraConfig.momentum(),
applyDecay_ ? paraConfig.decay_rate() : 0);
#else
vecs[PARAMETER_VALUE]->sgdUpdate( vecs[PARAMETER_VALUE]->sgdUpdate(
*vecs[PARAMETER_GRADIENT], *vecs[PARAMETER_GRADIENT],
*vecs[PARAMETER_MOMENTUM], *vecs[PARAMETER_MOMENTUM],
@ -44,6 +53,7 @@ public:
(firstTime_ ? 1.0 : torch_learningRate), (firstTime_ ? 1.0 : torch_learningRate),
paraConfig.momentum(), paraConfig.momentum(),
applyDecay_ ? paraConfig.decay_rate() : 0); applyDecay_ ? paraConfig.decay_rate() : 0);
#endif
} }
virtual void finishBatch() { firstTime_ = false; } virtual void finishBatch() { firstTime_ = false; }
}; };

@ -30,6 +30,9 @@ void sgdUpdateCpu(real learningRate,
const real* grad, const real* grad,
real* momentumVec) { real* momentumVec) {
decayRate *= learningRate; decayRate *= learningRate;
#ifdef PADDLE_USE_MKLDNN
#pragma omp parallel for
#endif
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
momentumVec[i] = momentum * momentumVec[i] - learningRate * grad[i] - momentumVec[i] = momentum * momentumVec[i] - learningRate * grad[i] -
decayRate * value[i]; decayRate * value[i];

Loading…
Cancel
Save