|
|
|
@ -24,6 +24,8 @@ limitations under the License. */
|
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
|
|
|
|
|
// This unit test is an example comparing the performance between using pinned
|
|
|
|
|
// memory and not. In general, using pinned memory will be faster.
|
|
|
|
|
template <typename T>
|
|
|
|
|
__global__ void Kernel(T* output, int dim) {
|
|
|
|
|
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
@ -33,7 +35,7 @@ __global__ void Kernel(T* output, int dim) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename Place>
|
|
|
|
|
void test_pinned_memory() {
|
|
|
|
|
float test_pinned_memory() {
|
|
|
|
|
Place cpu_place;
|
|
|
|
|
paddle::platform::CUDAPlace cuda_place;
|
|
|
|
|
|
|
|
|
@ -133,12 +135,14 @@ void test_pinned_memory() {
|
|
|
|
|
paddle::memory::Free(cpu_place, output_pinned_mem[j]);
|
|
|
|
|
paddle::memory::Free(cuda_place, gpu_mem[j]);
|
|
|
|
|
}
|
|
|
|
|
return elapsedTime / 30;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(CPUANDCUDAPinned, CPUAllocator) {
|
|
|
|
|
test_pinned_memory<paddle::platform::CPUPlace>();
|
|
|
|
|
TEST(CPUANDCUDAPinned, CPUAllocatorAndCUDAPinnedAllocator) {
|
|
|
|
|
// Generally speaking, operation on pinned_memory is faster than that on
|
|
|
|
|
// unpinned-memory, but if this unit test fails frequently, please close this
|
|
|
|
|
// test for the time being.
|
|
|
|
|
float time1 = test_pinned_memory<paddle::platform::CPUPlace>();
|
|
|
|
|
float time2 = test_pinned_memory<paddle::platform::CUDAPinnedPlace>();
|
|
|
|
|
EXPECT_GT(time1, time2)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST(CPUANDCUDAPinned, CUDAPinnedAllocator) {
|
|
|
|
|
test_pinned_memory<paddle::platform::CUDAPinnedPlace>();
|
|
|
|
|
}
|