DeviceContext Split, test=develop (#23737)
* supports thread-binding stream, test=develop * avoid using thread_local variables in dtor, test=develop * modify the stream priority enum, test=developrevert-22778-infer_var_type
parent
8af85922d0
commit
2d01cc85c4
@ -0,0 +1,3 @@
|
||||
IF(WITH_GPU)
|
||||
cc_library(cuda_stream SRCS cuda_stream.cc DEPS enforce)
|
||||
ENDIF()
|
@ -0,0 +1,78 @@
|
||||
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/platform/stream/cuda_stream.h"
|
||||
#include "paddle/fluid/platform/cuda_device_guard.h"
|
||||
#include "paddle/fluid/platform/enforce.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
namespace stream {
|
||||
|
||||
constexpr unsigned int kDefaultFlag = cudaStreamDefault;
|
||||
|
||||
bool CUDAStream::Init(const Place& place, const enum Priority& priority) {
|
||||
PADDLE_ENFORCE_EQ(is_gpu_place(place), true,
|
||||
platform::errors::InvalidArgument(
|
||||
"Cuda stream must be created using cuda place."));
|
||||
place_ = place;
|
||||
CUDADeviceGuard guard(boost::get<CUDAPlace>(place_).device);
|
||||
if (priority == Priority::kHigh) {
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
cudaStreamCreateWithPriority(&stream_, kDefaultFlag, -1),
|
||||
platform::errors::Fatal("High priority cuda stream creation failed."));
|
||||
} else if (priority == Priority::kNormal) {
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
cudaStreamCreateWithPriority(&stream_, kDefaultFlag, 0),
|
||||
platform::errors::Fatal(
|
||||
"Normal priority cuda stream creation failed."));
|
||||
}
|
||||
callback_manager_.reset(new StreamCallbackManager(stream_));
|
||||
VLOG(3) << "CUDAStream Init stream: " << stream_
|
||||
<< ", priority: " << static_cast<int>(priority);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CUDAStream::Destroy() {
|
||||
CUDADeviceGuard guard(boost::get<CUDAPlace>(place_).device);
|
||||
Wait();
|
||||
WaitCallback();
|
||||
if (stream_) {
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
cudaStreamDestroy(stream_),
|
||||
platform::errors::Fatal("Cuda stream destruction failed."));
|
||||
}
|
||||
stream_ = nullptr;
|
||||
}
|
||||
|
||||
void CUDAStream::Wait() const {
|
||||
cudaError_t e_sync = cudaSuccess;
|
||||
#if !defined(_WIN32)
|
||||
e_sync = cudaStreamSynchronize(stream_);
|
||||
#else
|
||||
while (e_sync = cudaStreamQuery(stream_)) {
|
||||
if (e_sync == cudaErrorNotReady) continue;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
e_sync, platform::errors::Fatal(
|
||||
"cudaStreamSynchronize raises error: %s, errono: %d",
|
||||
cudaGetErrorString(e_sync), static_cast<int>(e_sync)));
|
||||
}
|
||||
|
||||
} // namespace stream
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
@ -0,0 +1,92 @@
|
||||
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include "paddle/fluid/platform/gpu_info.h"
|
||||
#include "paddle/fluid/platform/macros.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
#include "paddle/fluid/platform/stream_callback_manager.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
namespace stream {
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
|
||||
enum class Priority : uint8_t {
|
||||
kNull = 0x0,
|
||||
kHigh = 0x1,
|
||||
kNormal = 0x2,
|
||||
};
|
||||
|
||||
class CUDAStream final {
|
||||
public:
|
||||
CUDAStream() = default;
|
||||
CUDAStream(const Place& place,
|
||||
const enum Priority& priority = Priority::kNormal) {
|
||||
Init(place, priority);
|
||||
}
|
||||
virtual ~CUDAStream() { Destroy(); }
|
||||
|
||||
bool Init(const Place& place,
|
||||
const enum Priority& priority = Priority::kNormal);
|
||||
|
||||
template <typename Callback>
|
||||
void AddCallback(Callback&& callback) const {
|
||||
callback_manager_->AddCallback(callback);
|
||||
}
|
||||
|
||||
template <typename Callback>
|
||||
void RecordEvent(cudaEvent_t ev, Callback callback) const {
|
||||
callback();
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
cudaEventRecord(ev, stream_),
|
||||
platform::errors::Fatal("CUDA event recording failed."));
|
||||
}
|
||||
|
||||
void RecordEvent(cudaEvent_t ev) const {
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
cudaEventRecord(ev, stream_),
|
||||
platform::errors::Fatal("CUDA event recording failed."));
|
||||
}
|
||||
|
||||
void WaitEvent(cudaEvent_t ev) const {
|
||||
PADDLE_ENFORCE_CUDA_SUCCESS(
|
||||
cudaStreamWaitEvent(stream_, ev, 0),
|
||||
platform::errors::Fatal("Failed to wait event."));
|
||||
}
|
||||
|
||||
void Wait() const;
|
||||
void WaitCallback() const { callback_manager_->Wait(); }
|
||||
|
||||
const cudaStream_t& raw_stream() const { return stream_; }
|
||||
void Destroy();
|
||||
|
||||
private:
|
||||
Place place_;
|
||||
cudaStream_t stream_{nullptr};
|
||||
Priority priority_{Priority::kNormal};
|
||||
std::unique_ptr<StreamCallbackManager> callback_manager_;
|
||||
|
||||
DISABLE_COPY_AND_ASSIGN(CUDAStream);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace stream
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
Loading…
Reference in new issue