|
|
|
@ -54,17 +54,6 @@ void OpHandleBase::Run(bool use_event) {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
RunImpl();
|
|
|
|
|
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
if (use_event) {
|
|
|
|
|
for (auto &p : dev_ctxes_) {
|
|
|
|
|
int dev_id = boost::get<platform::CUDAPlace>(p.first).device;
|
|
|
|
|
auto stream =
|
|
|
|
|
static_cast<platform::CUDADeviceContext *>(p.second)->stream();
|
|
|
|
|
PADDLE_ENFORCE(cudaEventRecord(events_.at(dev_id), stream));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void OpHandleBase::Wait(platform::DeviceContext *waited_dev) {
|
|
|
|
@ -97,6 +86,27 @@ void OpHandleBase::AddOutput(VarHandleBase *out) {
|
|
|
|
|
out->generated_op_ = this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
if (!events_.empty()) { // Use event
|
|
|
|
|
std::function<void()> method = callback;
|
|
|
|
|
|
|
|
|
|
for (auto &p : dev_ctxes_) {
|
|
|
|
|
method = [method, p, this]() {
|
|
|
|
|
static_cast<platform::CUDADeviceContext *>(p.second)->RecordEvent(
|
|
|
|
|
events_.at(boost::get<platform::CUDAPlace>(p.first).device),
|
|
|
|
|
method);
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
method();
|
|
|
|
|
} else {
|
|
|
|
|
#endif
|
|
|
|
|
callback();
|
|
|
|
|
#ifdef PADDLE_WITH_CUDA
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace details
|
|
|
|
|
} // namespace framework
|
|
|
|
|
} // namespace paddle
|
|
|
|
|