Always synchronize when copy data on GPU from C++ to Numpy array. (#9110)

7 years ago · 45073b7c39
parent d284cf88e5
commit 45073b7c39
1 changed files with 1 additions and 0 deletions
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@ -72,6 +72,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
        paddle::platform::GpuMemcpyAsync(
            dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(),
            cudaMemcpyDeviceToHost, dev_ctx->stream());
        dev_ctx->Wait();
 #else
        PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
 #endif