diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 03323e3da7..26c09eb8eb 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -66,6 +66,7 @@ void FetchOpHandle::RunImpl() { if (platform::is_gpu_place(var->place_)) { #ifdef PADDLE_WITH_CUDA TensorCopy(t, cpu, *dev_ctx_[t.place()], &tensors_[i]); + dev_ctx_[t.place()]->Wait(); #endif } else { tensors_[i].ShareDataWith(t); diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 07a4b89217..63affb7054 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -33,9 +33,6 @@ std::string OpHandleBase::DebugString() const { OpHandleBase::~OpHandleBase() { #ifdef PADDLE_WITH_CUDA - for (auto &ctx : dev_ctx_) { - ctx.second->Wait(); - } for (auto &ev : events_) { PADDLE_ENFORCE(cudaEventDestroy(ev.second)); }