|
|
|
@ -100,25 +100,21 @@ bool RequestGetHandler::Handle(const std::string& varname,
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if (varname != FETCH_BARRIER_MESSAGE && varname != COMPLETE_MESSAGE) {
|
|
|
|
|
PADDLE_THROW(
|
|
|
|
|
"async mode should not send FETCH_BARRIER_MESSAGE or "
|
|
|
|
|
"COMPLETE_MESSAGE");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (enable_dc_asgd_) {
|
|
|
|
|
// NOTE: the format is determined by distributed_transpiler.py
|
|
|
|
|
std::string param_bak_name =
|
|
|
|
|
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
|
|
|
|
|
VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id;
|
|
|
|
|
auto var = scope_->FindVar(varname);
|
|
|
|
|
auto t_orig = var->Get<framework::LoDTensor>();
|
|
|
|
|
auto param_bak = scope_->Var(param_bak_name);
|
|
|
|
|
auto t = param_bak->GetMutable<framework::LoDTensor>();
|
|
|
|
|
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
|
|
|
|
|
VLOG(3) << "copying " << varname << " to " << param_bak_name;
|
|
|
|
|
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
|
|
|
|
|
if (enable_dc_asgd_) {
|
|
|
|
|
// NOTE: the format is determined by distributed_transpiler.py
|
|
|
|
|
std::string param_bak_name =
|
|
|
|
|
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
|
|
|
|
|
VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id;
|
|
|
|
|
auto var = scope_->FindVar(varname);
|
|
|
|
|
auto t_orig = var->Get<framework::LoDTensor>();
|
|
|
|
|
auto param_bak = scope_->Var(param_bak_name);
|
|
|
|
|
auto t = param_bak->GetMutable<framework::LoDTensor>();
|
|
|
|
|
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
|
|
|
|
|
VLOG(3) << "copying " << varname << " to " << param_bak_name;
|
|
|
|
|
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
|
|
|
|
|
}
|
|
|
|
|
*outvar = scope_->FindVar(varname);
|
|
|
|
|
}
|
|
|
|
|
*outvar = scope_->FindVar(varname);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|