【paddle.distributed.fleet】Optimize ParameterServer's Async Mode (#28442)

* test=develop, optimize global_step
musl/disable_test_yolov3_temporarily
123malin 4 years ago committed by GitHub
parent 98adc8f054
commit fbf9564f6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

File diff suppressed because it is too large Load Diff

@ -302,16 +302,13 @@ class AsyncCommunicator : public Communicator {
const std::vector<std::string> &var_tables,
const framework::Scope &scope) override;
virtual void SendByCommunicator(int batches);
virtual void SendByCommunicator();
virtual void SendGlobalStep(int batches);
virtual void RecvByCommunicator();
virtual void RecvNoBarrier();
virtual int BatchesCounter();
virtual void BarrierSend() {}
virtual void BarrierRecv() {}
@ -359,6 +356,10 @@ class HalfAsyncCommunicator : public AsyncCommunicator {
VLOG(0) << "HalfAsyncCommunicator Initialized";
}
void MainThread() override;
void SendByCommunicator() override;
void Clean() override;
void Barrier() override;
@ -438,7 +439,7 @@ class GeoCommunicator : public AsyncCommunicator {
const std::vector<std::string> &var_tables,
const framework::Scope &scope) override;
void SendByCommunicator(int batches) { return; }
void SendByCommunicator() { return; }
std::vector<int64_t> MergeSparseIds(const std::string &send_varname);
@ -475,6 +476,7 @@ class GeoCommunicator : public AsyncCommunicator {
std::shared_ptr<Scope> pserver_scope_;
int send_var_nums_ = 0;
std::unordered_map<std::string, std::shared_ptr<SparseValue>> old_sparses_;
std::unordered_map<

@ -207,6 +207,7 @@ class ParameterServerRuntime(RuntimeBase):
SyncStrategy, GeoStrategy
trainer_config = self.async_strategy.get_trainer_runtime_config()
print(trainer_config)
dist_strategy = self.context["valid_strategy"]
launch_barrier = dist_strategy.a_sync_configs["launch_barrier"]

Loading…
Cancel
Save