diff --git a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc b/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc index 69e35585cf..d416d7df67 100644 --- a/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc +++ b/mindspore/ccsrc/kernel/gpu/data/dataset_iterator_kernel.cc @@ -64,7 +64,7 @@ bool DatasetIteratorKernel::Init(const CNodePtr &kernel_node) { void DatasetIteratorKernel::InitSizeLists() { return; } bool DatasetIteratorKernel::Launch(const std::vector &, const std::vector &, - const std::vector &outputs, void *) { + const std::vector &outputs, void *stream) { void *addr = nullptr; size_t len = 0; @@ -96,11 +96,14 @@ bool DatasetIteratorKernel::Launch(const std::vector &, const std::v } for (size_t i = 0; i < output_size_list_.size(); i++) { - CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpy(outputs[i]->addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice), + CHECK_CUDA_RET_WITH_EXCEPT(cudaMemcpyAsync(outputs[i]->addr, addr, output_size_list_[i], cudaMemcpyDeviceToDevice, + reinterpret_cast(stream)), "Cuda Memcpy Failed"); addr = reinterpret_cast(addr) + output_size_list_[i]; } + CHECK_CUDA_RET_WITH_EXCEPT(cudaStreamSynchronize(reinterpret_cast(stream)), + "cudaStreamSynchronize failed"); (void)GpuBufferMgr::GetInstance().Pop(handle_); return true; }