commit
09d9794c8d
@ -1 +1,7 @@
|
||||
add_subdirectory(detail)
|
||||
|
||||
if(${WITH_GPU})
|
||||
nv_library(memory SRCS memory.cc)
|
||||
else(${WITH_GPU})
|
||||
cc_library(memory SRCS memroy.cc)
|
||||
endif(${WITH_GPU})
|
||||
|
@ -1,2 +1,5 @@
|
||||
cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
|
||||
nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc)
|
||||
if(${WITH_GPU})
|
||||
nv_test(system_allocator_test SRCS system_allocator_test.cc)
|
||||
else(${WITH_GPU})
|
||||
cc_test(system_allocator_test SRCS system_allocator_test.cc)
|
||||
endif(${WITH_GPU})
|
||||
|
@ -1,71 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h> // for size_t
|
||||
#include <cstdlib> // for malloc and free
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <sys/mman.h> // for mlock and munlock
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace detail {
|
||||
|
||||
// CPUAllocator<staging=true> calls mlock, which returns
|
||||
// pinned and locked memory as staging areas for data exchange
|
||||
// between host and device. Allocates too much would reduce the
|
||||
// amount of memory available to the system for paging. So, by
|
||||
// default, we should use CPUAllocator<staging=false>.
|
||||
template <bool staging>
|
||||
class CPUAllocator {
|
||||
public:
|
||||
void* Alloc(size_t size);
|
||||
void Free(void* p, size_t size);
|
||||
};
|
||||
|
||||
template <>
|
||||
class CPUAllocator<false> {
|
||||
public:
|
||||
void* Alloc(size_t size) { return std::malloc(size); }
|
||||
void Free(void* p, size_t size) { std::free(p); }
|
||||
};
|
||||
|
||||
template <>
|
||||
class CPUAllocator<true> {
|
||||
public:
|
||||
void* Alloc(size_t size) {
|
||||
void* p = std::malloc(size);
|
||||
if (p == nullptr) {
|
||||
return p;
|
||||
}
|
||||
#ifndef _WIN32
|
||||
mlock(p, size);
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
void Free(void* p, size_t size) {
|
||||
#ifndef _WIN32
|
||||
munlock(p, size);
|
||||
#endif
|
||||
std::free(p);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -1,30 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/memory/detail/cpu_allocator.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
TEST(CPUAllocator, NonStaging) {
|
||||
paddle::memory::detail::CPUAllocator<false> a;
|
||||
void* p = a.Alloc(4096);
|
||||
EXPECT_NE(p, nullptr);
|
||||
a.Free(p, 4096);
|
||||
}
|
||||
|
||||
TEST(CPUAllocator, Staging) {
|
||||
paddle::memory::detail::CPUAllocator<true> a;
|
||||
void* p = a.Alloc(4096);
|
||||
EXPECT_NE(p, nullptr);
|
||||
a.Free(p, 4096);
|
||||
}
|
@ -1,92 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h> // for size_t
|
||||
|
||||
#include <thrust/system/cuda/error.h>
|
||||
#include <thrust/system_error.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace detail {
|
||||
|
||||
inline void throw_on_error(cudaError_t e, const char* message) {
|
||||
if (e) {
|
||||
throw thrust::system_error(e, thrust::cuda_category(), message);
|
||||
}
|
||||
}
|
||||
|
||||
// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
|
||||
// pinned and locked memory as staging areas for data exchange
|
||||
// between host and device. Allocates too much would reduce the
|
||||
// amount of memory available to the system for paging. So, by
|
||||
// default, we should use GPUAllocator<staging=false>.
|
||||
template <bool staging>
|
||||
class GPUAllocator {
|
||||
public:
|
||||
void* Alloc(size_t size);
|
||||
void Free(void* p, size_t size);
|
||||
};
|
||||
|
||||
template <>
|
||||
class GPUAllocator<false> {
|
||||
public:
|
||||
void* Alloc(size_t size) {
|
||||
void* p = 0;
|
||||
cudaError_t result = cudaMalloc(&p, size);
|
||||
if (result == cudaSuccess) {
|
||||
return p;
|
||||
}
|
||||
// clear last error
|
||||
cudaGetLastError();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Free(void* p, size_t size) {
|
||||
// Purposefully allow cudaErrorCudartUnloading, because
|
||||
// that is returned if you ever call cudaFree after the
|
||||
// driver has already shutdown. This happens only if the
|
||||
// process is terminating, in which case we don't care if
|
||||
// cudaFree succeeds.
|
||||
auto err = cudaFree(p);
|
||||
if (err != cudaErrorCudartUnloading) {
|
||||
throw_on_error(err, "cudaFree failed");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class GPUAllocator<true> {
|
||||
public:
|
||||
void* Alloc(size_t size) {
|
||||
void* p = 0;
|
||||
cudaError_t result = cudaMallocHost(&p, size);
|
||||
if (result == cudaSuccess) {
|
||||
return p;
|
||||
}
|
||||
// clear last error
|
||||
cudaGetLastError();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Free(void* p, size_t size) {
|
||||
throw_on_error(cudaFreeHost(p), "cudaFreeHost failed");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -1,30 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/memory/detail/gpu_allocator.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
TEST(GPUAllocator, NonStaging) {
|
||||
paddle::memory::detail::GPUAllocator<false> a;
|
||||
void* p = a.Alloc(4096);
|
||||
EXPECT_NE(p, nullptr);
|
||||
a.Free(p, 4096);
|
||||
}
|
||||
|
||||
TEST(GPUAllocator, Staging) {
|
||||
paddle::memory::detail::GPUAllocator<true> a;
|
||||
void* p = a.Alloc(4096);
|
||||
EXPECT_NE(p, nullptr);
|
||||
a.Free(p, 4096);
|
||||
}
|
@ -0,0 +1,129 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h> // for size_t
|
||||
#include <sys/mman.h> // for mlock and munlock
|
||||
#include <cstdlib> // for malloc and free
|
||||
|
||||
#ifndef PADDLE_ONLY_CPU
|
||||
#include <thrust/system/cuda/error.h>
|
||||
#include <thrust/system_error.h>
|
||||
#endif // PADDLE_ONLY_CPU
|
||||
|
||||
#include "paddle/platform/assert.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace memory {
|
||||
namespace detail {
|
||||
|
||||
class CPUDeleter {
|
||||
public:
|
||||
CPUDeleter(void* ptr, size_t size, bool locked)
|
||||
: ptr_(ptr), size_(size), locked_(locked) {}
|
||||
|
||||
void* Ptr() { return ptr_; }
|
||||
|
||||
void operator()(void* ptr) {
|
||||
PADDLE_ASSERT(ptr == ptr_);
|
||||
if (ptr_ != nullptr && locked_) {
|
||||
munlock(ptr_, size_);
|
||||
}
|
||||
std::free(ptr_);
|
||||
}
|
||||
|
||||
private:
|
||||
void* ptr_;
|
||||
size_t size_;
|
||||
bool locked_;
|
||||
};
|
||||
|
||||
// CPUAllocator<lock_memory=true> calls mlock, which returns pinned
|
||||
// and locked memory as staging areas for data exchange between host
|
||||
// and device. Allocates too much would reduce the amount of memory
|
||||
// available to the system for paging. So, by default, we should use
|
||||
// CPUAllocator<staging=false>.
|
||||
template <bool lock_memory>
|
||||
class CPUAllocator {
|
||||
public:
|
||||
static CPUDeleter Alloc(size_t size) {
|
||||
void* p = std::malloc(size);
|
||||
if (p != nullptr && lock_memory) {
|
||||
mlock(p, size);
|
||||
}
|
||||
return CPUDeleter(p, size, lock_memory);
|
||||
}
|
||||
};
|
||||
|
||||
#ifndef PADDLE_ONLY_CPU // The following code are for CUDA.
|
||||
|
||||
namespace {
|
||||
inline void throw_on_error(cudaError_t e, const char* message) {
|
||||
if (e) {
|
||||
throw thrust::system_error(e, thrust::cuda_category(), message);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class GPUDeleter {
|
||||
public:
|
||||
GPUDeleter(void* ptr, size_t size, bool staging)
|
||||
: ptr_(ptr), size_(size), staging_(staging) {}
|
||||
|
||||
void* Ptr() { return ptr_; }
|
||||
|
||||
void operator()(void* ptr) {
|
||||
PADDLE_ASSERT(ptr == ptr_);
|
||||
// Purposefully allow cudaErrorCudartUnloading, because
|
||||
// that is returned if you ever call cudaFree after the
|
||||
// driver has already shutdown. This happens only if the
|
||||
// process is terminating, in which case we don't care if
|
||||
// cudaFree succeeds.
|
||||
cudaError_t err = staging_ ? cudaFreeHost(ptr) : cudaFree(ptr);
|
||||
if (err != cudaErrorCudartUnloading) {
|
||||
throw_on_error(err, "cudaFree{Host} failed");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void* ptr_;
|
||||
size_t size_;
|
||||
bool staging_;
|
||||
};
|
||||
|
||||
// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
|
||||
// pinned and locked memory as staging areas for data exchange
|
||||
// between host and device. Allocates too much would reduce the
|
||||
// amount of memory available to the system for paging. So, by
|
||||
// default, we should use GPUAllocator<staging=false>.
|
||||
template <bool staging>
|
||||
class GPUAllocator {
|
||||
public:
|
||||
static GPUDeleter Alloc(size_t size) {
|
||||
void* p = 0;
|
||||
cudaError_t result =
|
||||
staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size);
|
||||
if (result != cudaSuccess) {
|
||||
cudaGetLastError(); // clear error if there is any.
|
||||
}
|
||||
return GPUDeleter(result == cudaSuccess ? p : nullptr, size, staging);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // PADDLE_ONLY_CPU
|
||||
|
||||
} // namespace detail
|
||||
} // namespace memory
|
||||
} // namespace paddle
|
@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/memory/detail/system_allocator.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
template <typename Allocator>
|
||||
void TestAllocator() {
|
||||
{
|
||||
auto d = Allocator::Alloc(sizeof(int));
|
||||
EXPECT_NE(d.Ptr(), nullptr);
|
||||
std::unique_ptr<int> p(static_cast<int*>(d.Ptr()), d);
|
||||
}
|
||||
{
|
||||
auto d = Allocator::Alloc(0);
|
||||
EXPECT_EQ(d.Ptr(), nullptr);
|
||||
std::unique_ptr<int> p(static_cast<int*>(d.Ptr()), d);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CPUAllocator, NoLockMem) {
|
||||
TestAllocator<paddle::memory::detail::CPUAllocator<false>>();
|
||||
}
|
||||
TEST(CPUAllocator, LockMem) {
|
||||
TestAllocator<paddle::memory::detail::CPUAllocator<true>>();
|
||||
}
|
||||
|
||||
#ifndef PADDLE_ONLY_CPU
|
||||
TEST(GPUAllocator, NoStaging) {
|
||||
TestAllocator<paddle::memory::detail::GPUAllocator<false>>();
|
||||
}
|
||||
TEST(GPUAllocator, Staging) {
|
||||
TestAllocator<paddle::memory::detail::GPUAllocator<true>>();
|
||||
}
|
||||
#endif // PADDLE_ONLY_CPU
|
Loading…
Reference in new issue