Add comments and clean code

wangkuiyi-patch-2
Yu Yang 7 years ago
parent a822f8ddbb
commit 2ab12ca248

@ -505,13 +505,15 @@ All parameter, weight, gradient are variables in Paddle.
scope, local_scopes, allow_op_delay);
})
.def("bcast_params", &ParallelExecutor::BCastParamsToGPUs)
// NOTE: even we return a vec<Scope*>* to Python use reference policy.
// We still cannot get local_scope from this vector, since the element
// of vec<Scope*> will be freed by Python GC. We can only return Scope*
// one by one and mark them as reference.
.def("local_scopes",
[](ParallelExecutor &self) -> std::vector<Scope *> * {
return &self.GetLocalScopes();
},
py::return_value_policy::reference)
.def("local_scopes_len",
[](ParallelExecutor &self) { return self.GetLocalScopes().size(); })
.def("local_scope", [](ParallelExecutor &self,
size_t i) { return self.GetLocalScopes()[i]; },
py::return_value_policy::reference)

@ -190,6 +190,10 @@ void PyCUDATensorSetFromArray(
static_cast<const platform::CUDADeviceContext *>(pool.Get(place));
paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(),
cudaMemcpyHostToDevice, dev_ctx->stream());
// NOTE: For safety, here wait the copy complete.
// It because the CPU array.data() could be destroyed after this method.
// If we make this method async, it could be copied data from a memory buffer
// that has been freed.
dev_ctx->Wait();
}
@ -217,6 +221,11 @@ void PyCUDATensorSetFromArray(
paddle::platform::GpuMemcpyAsync(dst, array.data(),
sizeof(uint16_t) * array.size(),
cudaMemcpyHostToDevice, dev_ctx->stream());
// NOTE: For safety, here wait the copy complete.
// It because the CPU array.data() could be destroyed after this method.
// If we make this method async, it could be copied data from a memory buffer
// that has been freed.
dev_ctx->Wait();
}
template <typename T>

Loading…
Cancel
Save