|
|
|
@ -190,6 +190,10 @@ void PyCUDATensorSetFromArray(
|
|
|
|
|
static_cast<const platform::CUDADeviceContext *>(pool.Get(place));
|
|
|
|
|
paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(),
|
|
|
|
|
cudaMemcpyHostToDevice, dev_ctx->stream());
|
|
|
|
|
// NOTE: For safety, here wait the copy complete.
|
|
|
|
|
// It because the CPU array.data() could be destroyed after this method.
|
|
|
|
|
// If we make this method async, it could be copied data from a memory buffer
|
|
|
|
|
// that has been freed.
|
|
|
|
|
dev_ctx->Wait();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -217,6 +221,11 @@ void PyCUDATensorSetFromArray(
|
|
|
|
|
paddle::platform::GpuMemcpyAsync(dst, array.data(),
|
|
|
|
|
sizeof(uint16_t) * array.size(),
|
|
|
|
|
cudaMemcpyHostToDevice, dev_ctx->stream());
|
|
|
|
|
// NOTE: For safety, here wait the copy complete.
|
|
|
|
|
// It because the CPU array.data() could be destroyed after this method.
|
|
|
|
|
// If we make this method async, it could be copied data from a memory buffer
|
|
|
|
|
// that has been freed.
|
|
|
|
|
dev_ctx->Wait();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|