commit
01bee86df5
@ -0,0 +1,28 @@
|
|||||||
|
| Github account | name |
|
||||||
|
|---|---|
|
||||||
|
| reyoung | Yang Yu |
|
||||||
|
| gangliao | Gang Liao |
|
||||||
|
| luotao01 | Tao Luo |
|
||||||
|
| jacquesqiao | Long-Fei Qiao |
|
||||||
|
| qingqing01 | Qing-Qing Dang |
|
||||||
|
| hedaoyuan | Dao-Yuan He |
|
||||||
|
| wangyang59 | Yang Wang |
|
||||||
|
| QiJune | Jun Qi |
|
||||||
|
| tianbingsz | Tian-Bing Xu |
|
||||||
|
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
|
||||||
|
| typhoonzero | Yi Wu |
|
||||||
|
| backyes | Yan-Fei Wang |
|
||||||
|
| pengli09 | Peng Li |
|
||||||
|
| livc | Zhao Li |
|
||||||
|
| Xreki | Yi-Qun Liu |
|
||||||
|
| Yancey1989 | Xu Yan |
|
||||||
|
| emailweixu | Wei Xu |
|
||||||
|
| wen-bo-yang | Wen-Bo Yang |
|
||||||
|
| helinwang | He-Lin Wang |
|
||||||
|
| lcy-seso | Ying Cao |
|
||||||
|
| Zrachel | Rui-Qing Zhang |
|
||||||
|
| Haichao-Zhang | Hai-Chao Zhang |
|
||||||
|
| gongweibao | Wei-Bao Gong |
|
||||||
|
| lzhao4ever | Liang Zhao |
|
||||||
|
| zhouxiao-coder | Xiao Zhou |
|
||||||
|
| lipeng-unisound | Peng Li |
|
@ -1,56 +0,0 @@
|
|||||||
Cao, Ying
|
|
||||||
Cheng, Yujuan
|
|
||||||
Dang, Qingqing
|
|
||||||
Dong, Tengfei
|
|
||||||
Du, Dalong
|
|
||||||
Feng, Shouqiang
|
|
||||||
Gao, Haoyuan
|
|
||||||
Han, Baochang
|
|
||||||
Han, Jinchen
|
|
||||||
Hao, Nanyu
|
|
||||||
He, Daoyuan
|
|
||||||
He, Zhengyan
|
|
||||||
Hou, Jue
|
|
||||||
Huang, Chang
|
|
||||||
Huang, Zhiheng
|
|
||||||
Hu, Na
|
|
||||||
Kong, Qi
|
|
||||||
Liao, Gang
|
|
||||||
Li, Bo
|
|
||||||
Li, Jiajie
|
|
||||||
Li, Jing
|
|
||||||
Li, Lei
|
|
||||||
Li, Peng
|
|
||||||
Liu, Sheng
|
|
||||||
Liu, Yuan
|
|
||||||
Li, Yuze
|
|
||||||
Luo, Heng
|
|
||||||
Luo, Tao
|
|
||||||
Lyu, Qin
|
|
||||||
Mao, Hongyue
|
|
||||||
Qian, Xiaojun
|
|
||||||
Qiao, Longfei
|
|
||||||
Qi, Jun
|
|
||||||
Qin, Duohao
|
|
||||||
Shen, Guolong
|
|
||||||
Shi, Guangchuan
|
|
||||||
Song, Xiang
|
|
||||||
Wang, Helin
|
|
||||||
Wang, Jiang
|
|
||||||
Wang, Yanfei
|
|
||||||
Wang, Yi
|
|
||||||
Wang, Yong
|
|
||||||
Weng, Renliang
|
|
||||||
Xu, Tianbing
|
|
||||||
Xu, Wei
|
|
||||||
Xu, Xingyu
|
|
||||||
Yan, Chong
|
|
||||||
Yan, Chunwei
|
|
||||||
Yang, Yi
|
|
||||||
Yu, Yang
|
|
||||||
Yu, Yinan
|
|
||||||
Zhang, Jian
|
|
||||||
Zhang, Ruiqing
|
|
||||||
Zhang, Weide
|
|
||||||
Zhao, Liang
|
|
||||||
Zhou, Jie
|
|
File diff suppressed because it is too large
Load Diff
@ -1,209 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#include <fstream>
|
|
||||||
#include "paddle/utils/Logging.h"
|
|
||||||
|
|
||||||
#include "ParallelParameter.h"
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
|
|
||||||
UpdateFunction paramUpdateFunctions[UPDATE_TYPE_NUM] = {
|
|
||||||
nullptr, // &ParallelParameter::singleUpdate, /* single thread */
|
|
||||||
nullptr, // &ParallelParameter::controlUpdate, /* controller thread */
|
|
||||||
&ParallelParameter::majorUpdate, /* major thread */
|
|
||||||
&ParallelParameter::minorUpdate, /* minor thread */
|
|
||||||
|
|
||||||
nullptr, /* master */
|
|
||||||
&ParallelParameter::slaveUpdate, /* slave */
|
|
||||||
};
|
|
||||||
ParallelParameterPtr ParallelParameter::create(TrainerRole role,
|
|
||||||
ParameterPtr localParam,
|
|
||||||
int asyncCount) {
|
|
||||||
ParallelParameterPtr ptr = nullptr;
|
|
||||||
switch (role) {
|
|
||||||
case TRAINER_ROLE_CONTROL:
|
|
||||||
case TRAINER_ROLE_MAJOR:
|
|
||||||
case TRAINER_ROLE_MINOR:
|
|
||||||
ptr = std::make_shared<SyncParameter>(role, localParam);
|
|
||||||
break;
|
|
||||||
case TRAINER_ROLE_MASTER:
|
|
||||||
case TRAINER_ROLE_SLAVE:
|
|
||||||
ptr = std::make_shared<AsyncParameter>(role, asyncCount, localParam);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG(FATAL) << "unknown role " << role << "\n";
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
void ParallelParameter::syncUpdate(TrainerRole role, real learnRate) {
|
|
||||||
if (paramUpdateFunctions[role]) {
|
|
||||||
(this->*paramUpdateFunctions[role])(learnRate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::attachControlParam(ParallelParameterPtr controler) {
|
|
||||||
controlParam_ = controler;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::attachMajorParam(ParallelParameterPtr partner) {
|
|
||||||
majorPartners_.push_back(partner);
|
|
||||||
if (role_ == TRAINER_ROLE_CONTROL) {
|
|
||||||
localParam_->setSharedCount(majorPartners_.size());
|
|
||||||
}
|
|
||||||
// partnerParam_ = partner;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::attachMinorParam(ParallelParameterPtr partner,
|
|
||||||
int deviceId) {
|
|
||||||
minorPartners_.push_back(partner);
|
|
||||||
minorDeviceIds_.push_back(deviceId);
|
|
||||||
// partnerParam_ = partner;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::waitAllMajorGradReady() {
|
|
||||||
for (size_t i = 0; i < majorPartners_.size(); i++) {
|
|
||||||
majorPartners_[i]->waitGradReady();
|
|
||||||
partnerParam_ = majorPartners_[i]->getLocalParameter();
|
|
||||||
VectorPtr localGrad = localParam_->getBuf(PARAMETER_GRADIENT);
|
|
||||||
VectorPtr patnrGrad = partnerParam_->getBuf(PARAMETER_GRADIENT);
|
|
||||||
if (FLAGS_use_gpu) hl_set_device(minorDeviceIds_[i]);
|
|
||||||
localGrad->add(*patnrGrad);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::synchronizeParamter() {
|
|
||||||
valueSem_->wait();
|
|
||||||
if (role_ == TRAINER_ROLE_MINOR) {
|
|
||||||
/* copy the value from controller */
|
|
||||||
VectorPtr cntrlVec =
|
|
||||||
(controlParam_->getLocalParameter())->getBuf(PARAMETER_VALUE);
|
|
||||||
VectorPtr localVec = localParam_->getBuf(PARAMETER_VALUE);
|
|
||||||
localVec->copyFrom(*cntrlVec);
|
|
||||||
|
|
||||||
/* dispatch the value to major */
|
|
||||||
for (size_t i = 0; i < majorPartners_.size(); i++) {
|
|
||||||
VectorPtr majorVec =
|
|
||||||
(majorPartners_[i]->getLocalParameter())->getBuf(PARAMETER_VALUE);
|
|
||||||
majorVec->copyFrom(*localVec);
|
|
||||||
majorPartners_[i]->postValueReady();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::singleUpdate(real learnRate) {
|
|
||||||
CHECK(role_ == TRAINER_ROLE_SINGLE);
|
|
||||||
localParam_->updateWithGradient(learnRate);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::controlUpdate(const UpdateCallback &callBack) {
|
|
||||||
CHECK(role_ == TRAINER_ROLE_CONTROL);
|
|
||||||
CHECK(gradSem_ != NULL && valueSem_ != NULL);
|
|
||||||
CHECK(majorPartners_.size());
|
|
||||||
|
|
||||||
/* update */
|
|
||||||
if (callBack) {
|
|
||||||
callBack(localParam_.get());
|
|
||||||
localParam_->clearGradient();
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < minorPartners_.size(); i++) {
|
|
||||||
minorPartners_[i]->postValueReady();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::majorUpdate(real learnRate) {
|
|
||||||
(void)learnRate;
|
|
||||||
CHECK(role_ == TRAINER_ROLE_MAJOR);
|
|
||||||
CHECK(gradSem_ != NULL && valueSem_ != NULL);
|
|
||||||
CHECK(minorPartners_.size() && controlParam_);
|
|
||||||
|
|
||||||
/* wait the minor-Gradient is ready */
|
|
||||||
for (size_t i = 0; i < minorPartners_.size(); i++) {
|
|
||||||
minorPartners_[i]->waitGradReady();
|
|
||||||
partnerParam_ = minorPartners_[i]->getLocalParameter();
|
|
||||||
VectorPtr localGrad = localParam_->getBuf(PARAMETER_GRADIENT);
|
|
||||||
VectorPtr minorGrad = partnerParam_->getBuf(PARAMETER_GRADIENT);
|
|
||||||
localGrad->add(*minorGrad);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* notice the controller that the gradient is ready */
|
|
||||||
gradSem_->post();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SyncParameter::minorUpdate(real learnRate) {
|
|
||||||
(void)learnRate;
|
|
||||||
CHECK(role_ == TRAINER_ROLE_MINOR);
|
|
||||||
CHECK(gradSem_ != NULL && valueSem_ != NULL);
|
|
||||||
|
|
||||||
// notice the major that the gradient is ready
|
|
||||||
gradSem_->post();
|
|
||||||
}
|
|
||||||
|
|
||||||
AsyncParameter::AsyncParameter(TrainerRole role,
|
|
||||||
int asyncCount,
|
|
||||||
ParameterPtr localParam)
|
|
||||||
: ParallelParameter(role, localParam) {
|
|
||||||
asyncCount_ = asyncCount;
|
|
||||||
accumCounter_ = 0;
|
|
||||||
gradientAccum_ = Vector::create(localParam->getSize(), localParam->useGpu());
|
|
||||||
gradientAccum_->zeroMem();
|
|
||||||
}
|
|
||||||
|
|
||||||
void AsyncParameter::slaveUpdate(real learnRate) {
|
|
||||||
/* increase the accumCounter_ */
|
|
||||||
accumCounter_++;
|
|
||||||
|
|
||||||
/* accumulate the gradient to the buffer */
|
|
||||||
VectorPtr grad = localParam_->getBuf(PARAMETER_GRADIENT);
|
|
||||||
gradientAccum_->add(*grad);
|
|
||||||
|
|
||||||
/* if need to be synchronized with the master */
|
|
||||||
if (accumCounter_ == asyncCount_) {
|
|
||||||
gradSem_->post();
|
|
||||||
// accumCounter_ = 0; NOTICE: the upper-function need to reset the counter
|
|
||||||
} else { // self update
|
|
||||||
localParam_->updateWithGradient(learnRate);
|
|
||||||
}
|
|
||||||
localParam_->clearGradient();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AsyncParameter::masterUpdate(ParallelParameterPtr slaveParam,
|
|
||||||
const UpdateCallback &callback) {
|
|
||||||
CHECK(slaveParam && callback);
|
|
||||||
|
|
||||||
/* wait the slave is ready */
|
|
||||||
if (!slaveParam->timeWaitGradReady(5)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
AsyncParameter *asyncParam = dynamic_cast<AsyncParameter *>(slaveParam.get());
|
|
||||||
|
|
||||||
/* get the accum-gradient to update local parameter */
|
|
||||||
VectorPtr slaveVec = asyncParam->getAccum();
|
|
||||||
localParam_->getBuf(PARAMETER_GRADIENT)->copyFrom(*slaveVec);
|
|
||||||
callback(localParam_.get());
|
|
||||||
// slaveVec->zeroMem();
|
|
||||||
|
|
||||||
/* copy the newest parameter-value to the slave */
|
|
||||||
slaveVec = (slaveParam->getLocalParameter())->getBuf(PARAMETER_VALUE);
|
|
||||||
slaveVec->copyFrom(*(localParam_->getBuf(PARAMETER_VALUE)));
|
|
||||||
|
|
||||||
/* release the semphore */
|
|
||||||
slaveParam->postValueReady();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace paddle
|
|
@ -1,244 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#include <sys/time.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <iostream>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "hl_gpu.h"
|
|
||||||
#include "paddle/math/Vector.h"
|
|
||||||
#include "paddle/parameter/Parameter.h"
|
|
||||||
#include "paddle/parameter/ParameterUpdateFunctions.h"
|
|
||||||
#include "paddle/utils/Common.h"
|
|
||||||
#include "paddle/utils/Flags.h"
|
|
||||||
#include "paddle/utils/Locks.h"
|
|
||||||
|
|
||||||
#include "ParameterConfig.pb.h"
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
|
|
||||||
class ParallelParameter;
|
|
||||||
class SyncParameter;
|
|
||||||
class AsyncParameter;
|
|
||||||
|
|
||||||
typedef std::shared_ptr<ParallelParameter> ParallelParameterPtr;
|
|
||||||
|
|
||||||
const int UPDATE_TYPE_NUM = 32;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TrainRole denotes the role of current training, different roles have
|
|
||||||
* different jobs.
|
|
||||||
*
|
|
||||||
* control, major, minor are three kinds of role to support mutiple GPUs
|
|
||||||
* parallel SGD training. SM on GPU card has two groups, each group
|
|
||||||
* consist of a major and a minor.
|
|
||||||
*
|
|
||||||
* @param single single GPU card single thread training.
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* @param control current parameter updates via control role,
|
|
||||||
* not participate in real training. control role is
|
|
||||||
* responsible for merging all major's gradient and
|
|
||||||
* update parameter value.
|
|
||||||
*
|
|
||||||
* @param major major role paticipates in real training, when local
|
|
||||||
* gradient is ready, merge its corresponding minor's
|
|
||||||
* gradient and notify controller: this group's gradient
|
|
||||||
* is already ready.
|
|
||||||
*
|
|
||||||
* @param minor minor role participates in real training, when local
|
|
||||||
* gradient is ready, only notify its corresponding major.
|
|
||||||
* In order to maximum apportion jobs, after controller
|
|
||||||
* updates the paramemter value, each group's minior
|
|
||||||
* reponses to dispatch the latest model into local and
|
|
||||||
* major.
|
|
||||||
*/
|
|
||||||
enum TrainerRole {
|
|
||||||
TRAINER_ROLE_SINGLE,
|
|
||||||
TRAINER_ROLE_CONTROL,
|
|
||||||
TRAINER_ROLE_MAJOR,
|
|
||||||
TRAINER_ROLE_MINOR,
|
|
||||||
TRAINER_ROLE_MASTER,
|
|
||||||
TRAINER_ROLE_SLAVE
|
|
||||||
};
|
|
||||||
typedef void (ParallelParameter::*UpdateFunction)(real learnRate);
|
|
||||||
|
|
||||||
class ParallelParameter {
|
|
||||||
public:
|
|
||||||
static ParallelParameterPtr create(TrainerRole role,
|
|
||||||
ParameterPtr localParam,
|
|
||||||
int asyncCount = 1);
|
|
||||||
|
|
||||||
ParallelParameter(TrainerRole role, ParameterPtr localParam) {
|
|
||||||
role_ = role;
|
|
||||||
gradSem_.reset(new Semaphore(0));
|
|
||||||
valueSem_.reset(new Semaphore(0));
|
|
||||||
localParam_ = localParam;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~ParallelParameter() {}
|
|
||||||
|
|
||||||
ParameterPtr getLocalParameter() { return localParam_; }
|
|
||||||
bool timeWaitGradReady(int sec) {
|
|
||||||
struct timespec ts;
|
|
||||||
ts.tv_nsec = 0;
|
|
||||||
ts.tv_sec = time(NULL) + sec;
|
|
||||||
return gradSem_->timeWait(&ts);
|
|
||||||
}
|
|
||||||
void waitGradReady() { gradSem_->wait(); }
|
|
||||||
void postValueReady() { valueSem_->post(); }
|
|
||||||
|
|
||||||
void syncUpdate(TrainerRole role, real learnRate);
|
|
||||||
|
|
||||||
virtual void synchronizeParamter() = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* for synchronous
|
|
||||||
*/
|
|
||||||
virtual void singleUpdate(real learnRate) { (void)learnRate; }
|
|
||||||
|
|
||||||
virtual void controlUpdate(const UpdateCallback& callback) { (void)callback; }
|
|
||||||
|
|
||||||
virtual void majorUpdate(real learnRate) { (void)learnRate; }
|
|
||||||
|
|
||||||
virtual void minorUpdate(real learnRate) { (void)learnRate; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* for asynchronous
|
|
||||||
*/
|
|
||||||
virtual void slaveUpdate(real learnRate) { (void)learnRate; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
TrainerRole role_;
|
|
||||||
ParameterPtr localParam_;
|
|
||||||
std::unique_ptr<Semaphore>
|
|
||||||
gradSem_; /// wether the local parameter-gradient is ready
|
|
||||||
std::unique_ptr<Semaphore>
|
|
||||||
valueSem_; /// wether the local parameter-value is updated
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* this class is designed for multi-threading training.
|
|
||||||
*
|
|
||||||
* "Synchronous" means multiple GPUs calculate 1/4 mini-Batch,
|
|
||||||
* but will get only one gradient
|
|
||||||
*/
|
|
||||||
class SyncParameter : public ParallelParameter {
|
|
||||||
public:
|
|
||||||
SyncParameter(TrainerRole role, ParameterPtr localParam)
|
|
||||||
: ParallelParameter(role, localParam) {
|
|
||||||
controlParam_ = nullptr;
|
|
||||||
majorPartners_.clear();
|
|
||||||
minorPartners_.clear();
|
|
||||||
}
|
|
||||||
~SyncParameter() {
|
|
||||||
majorPartners_.clear();
|
|
||||||
minorPartners_.clear();
|
|
||||||
}
|
|
||||||
void attachControlParam(ParallelParameterPtr controler);
|
|
||||||
|
|
||||||
void attachMajorParam(ParallelParameterPtr partner);
|
|
||||||
|
|
||||||
void attachMinorParam(ParallelParameterPtr partner, int deviceId);
|
|
||||||
|
|
||||||
void waitAllMajorGradReady();
|
|
||||||
|
|
||||||
void synchronizeParamter();
|
|
||||||
|
|
||||||
void singleUpdate(real learnRate);
|
|
||||||
|
|
||||||
void controlUpdate(const UpdateCallback& callback);
|
|
||||||
|
|
||||||
void majorUpdate(real learnRate);
|
|
||||||
|
|
||||||
void minorUpdate(real learnRate);
|
|
||||||
|
|
||||||
std::vector<ParallelParameterPtr>& getMajorPartners() {
|
|
||||||
return majorPartners_;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<ParallelParameterPtr>& getMinorPartners() {
|
|
||||||
return minorPartners_;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
// The following variables are used in a multithreaded training situation
|
|
||||||
// partnerParam_ is local-parameter's partner
|
|
||||||
// controlParam_ is the controller-thread 's parameter
|
|
||||||
ParameterPtr partnerParam_;
|
|
||||||
std::vector<ParallelParameterPtr> majorPartners_;
|
|
||||||
std::vector<ParallelParameterPtr> minorPartners_;
|
|
||||||
std::vector<int> minorDeviceIds_;
|
|
||||||
ParallelParameterPtr controlParam_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class AsyncParameter : public ParallelParameter {
|
|
||||||
public:
|
|
||||||
AsyncParameter(TrainerRole role, int asyncCount, ParameterPtr localParam);
|
|
||||||
|
|
||||||
void clearCounter() { accumCounter_ = 0; }
|
|
||||||
|
|
||||||
VectorPtr getAccum() { return gradientAccum_; }
|
|
||||||
|
|
||||||
void synchronizeParamter() {
|
|
||||||
if (accumCounter_ == asyncCount_) {
|
|
||||||
valueSem_->wait();
|
|
||||||
clearCounter();
|
|
||||||
gradientAccum_->zeroMem();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When asynchronous training, update strategy including slave and master.
|
|
||||||
*
|
|
||||||
* slave: If in range asyncCount, adopting self-update method.
|
|
||||||
* If beyond asyncCount, waiting for master to update.
|
|
||||||
*/
|
|
||||||
void slaveUpdate(real learnRate);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* When asynchronous training, update strategy including slave and master.
|
|
||||||
*
|
|
||||||
* master: it only polls slaves, do not training data.
|
|
||||||
* If slave's gradient is ready, fetch it.
|
|
||||||
* Update master's parameter, then copy it into
|
|
||||||
* corresponding slave.
|
|
||||||
*/
|
|
||||||
bool masterUpdate(ParallelParameterPtr slaveParam,
|
|
||||||
const UpdateCallback& callback);
|
|
||||||
|
|
||||||
private:
|
|
||||||
/**
|
|
||||||
* When asynchronous training, every aysnc trainer needs to
|
|
||||||
* accumulate a number of batch gradient.
|
|
||||||
*
|
|
||||||
* gradientAccum_ is used to save the sum of gradients.
|
|
||||||
*/
|
|
||||||
VectorPtr gradientAccum_;
|
|
||||||
|
|
||||||
/// Asynchronous count.
|
|
||||||
int asyncCount_;
|
|
||||||
/// Accumulate counter of current gradients.
|
|
||||||
int accumCounter_;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef std::map<std::string, ParallelParameterPtr> ParallelParameterMap;
|
|
||||||
|
|
||||||
} // namespace paddle
|
|
@ -1,7 +0,0 @@
|
|||||||
FROM paddledev/paddle:cpu-devel-latest
|
|
||||||
COPY build.sh /
|
|
||||||
RUN pip install sphinx &&\
|
|
||||||
pip install sphinx_rtd_theme &&\
|
|
||||||
apt install -y doxygen graphviz &&\
|
|
||||||
pip install recommonmark numpy protobuf==2.6.1
|
|
||||||
CMD /build.sh
|
|
@ -1,13 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -ex
|
|
||||||
|
|
||||||
mkdir -p /build
|
|
||||||
cd /build
|
|
||||||
cmake /paddle -DWITH_DOC=ON
|
|
||||||
make paddle_docs paddle_docs_cn -j `nproc`
|
|
||||||
mkdir -p /output/doc
|
|
||||||
mkdir -p /output/doc_cn
|
|
||||||
cp -r doc/html/* /output/doc/
|
|
||||||
cp -r doc_cn/html/* /output/doc_cn/
|
|
||||||
cd /
|
|
||||||
rm -rf /paddle/build
|
|
@ -1,4 +1,36 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
set -e
|
||||||
docker build . -t paddle_build_doc
|
function usage(){
|
||||||
docker run --rm -v $PWD/../../../../:/paddle -v $PWD:/output paddle_build_doc
|
echo "usage: build_doc [--help] [<args>]"
|
||||||
|
echo "This script generates doc and doc_cn in the script's directory."
|
||||||
|
echo "These are common commands used in various situations:"
|
||||||
|
echo " with_docker build doc and doc_cn with docker"
|
||||||
|
echo " local build doc and doc_cn locally"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
"with_docker")
|
||||||
|
docker run --rm -v $PWD/../../../../:/paddle \
|
||||||
|
-e "WITH_GPU=OFF" -e "WITH_AVX=ON" -e "WITH_DOC=ON" paddledev/paddle:dev
|
||||||
|
;;
|
||||||
|
"local")
|
||||||
|
mkdir -p doc
|
||||||
|
mkdir -p doc_cn
|
||||||
|
PADDLE_SOURCE_DIR=$PWD/../../../../
|
||||||
|
mkdir -p $PADDLE_SOURCE_DIR/build_doc
|
||||||
|
pushd $PADDLE_SOURCE_DIR/build_doc
|
||||||
|
cmake .. -DWITH_DOC=ON
|
||||||
|
make paddle_docs paddle_docs_cn
|
||||||
|
popd
|
||||||
|
cp -r $PADDLE_SOURCE_DIR/build_doc/doc/en/html/* doc
|
||||||
|
cp -r $PADDLE_SOURCE_DIR/build_doc/doc/cn/html/* doc_cn
|
||||||
|
rm -rf $PADDLE_SOURCE_DIR/build_doc
|
||||||
|
;;
|
||||||
|
"--help")
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue