diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index 573bd937a3..0cddb95244 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(trainer) add_subdirectory(scripts) add_subdirectory(optimizer) add_subdirectory(strings) +add_subdirectory(memory) # Do not build go directory until go cmake is working smoothly. # if(CMAKE_Go_COMPILER) diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt new file mode 100644 index 0000000000..3943c3cfad --- /dev/null +++ b/paddle/memory/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(detail) diff --git a/paddle/memory/README.md b/paddle/memory/README.md index fd32d07ef4..e5f7880e4c 100644 --- a/paddle/memory/README.md +++ b/paddle/memory/README.md @@ -31,7 +31,7 @@ In `paddle/memory/memory.h` we have: namespace memory { template <typename Place> void* Alloc(Place, size_t); template <typename Place> void Free(Place, void*); -template <typename Place> void Used(Place); +template <typename Place> size_t Used(Place); } // namespace memory ``` @@ -39,7 +39,7 @@ These function templates have specializations on either `platform::CPUPlace` or ```cpp template<> -void Alloc<CPUPlace>(CPUPlace p, size_t size) { +void* Alloc<CPUPlace>(CPUPlace p, size_t size) { return GetCPUBuddyAllocator()->Alloc(size); } ``` @@ -102,15 +102,11 @@ class BuddyAllocator { }; ``` -#### System Allocators - -The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. A system allocator holds information about a device, including the amount of memory has been allocated, so we can call +Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. -- `GPUAllocator::Used()` and -- `CPUAllocator::Used()` - -to get the amount of memory that has been allocated so far. +#### System Allocators +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. ## Justification diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt new file mode 100644 index 0000000000..fb8a11062d --- /dev/null +++ b/paddle/memory/detail/CMakeLists.txt @@ -0,0 +1 @@ +cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h new file mode 100644 index 0000000000..8a872d3800 --- /dev/null +++ b/paddle/memory/detail/cpu_allocator.h @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include <malloc.h> // for malloc and free +#include <stddef.h> // for size_t + +namespace paddle { +namespace memory { +namespace detail { + +// CPUAllocator<staging=true> calls cudaMallocHost, which returns +// pinned and mlocked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the +// amount of memory available to the system for paging. So, by +// default, we should use CPUAllocator<staging=false>. +template <bool staging> +class CPUAllocator { +public: + void* Alloc(size_t size); + void Free(void* p); +}; + +template <> +class CPUAllocator<false> { +public: + void* Alloc(size_t size) { return malloc(size); } + void Free(void* p) { free(p); } +}; + +// If CMake macro WITH_GPU is OFF, C++ compiler won't generate the +// following specialization that depends on the CUDA library. +#ifdef WITH_GPU +template <> +class CPUAllocator<true> { +public: + void* Alloc(size_t size) { + void* p; + if (cudaMallocHost(&p, size) != cudaSuccess) { + return NULL; + } + return *p; + } + + void Free(void* p) { cudaFreeHost(p); } +}; +#endif // WITH_GPU + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc new file mode 100644 index 0000000000..0aa33a22fd --- /dev/null +++ b/paddle/memory/detail/cpu_allocator_test.cc @@ -0,0 +1,32 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/cpu_allocator.h" +#include "gtest/gtest.h" + +TEST(CPUAllocator, NonStaging) { + paddle::memory::detail::CPUAllocator<false> a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p); +} + +#ifdef WITH_GPU +TEST(CPUAllocator, Staging) { + paddle::memory::detail::CPUAllocator<true> a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p); +} +#endif // WITH_GPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc new file mode 100644 index 0000000000..5f1253ede6 --- /dev/null +++ b/paddle/memory/memory.cc @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/memory.h" + +namespace paddle { +namespace memory { + +template <> +void* Alloc<CPUPlace>(CPUPlace, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} + +template <> +void* Alloc<GPUPlace>(GPUPlace pl, size_t size) { + return GetGPUBuddyAllocator(pl.device)->Alloc(size); +} + +template <> +void Free<CPUPlace>(CPUPlace, void* p) { + return GetCPUBuddyAllocator()->Free(p); +} + +template <> +void* Alloc<GPUPlace>(GPUPlace pl, void* p) { + return GetGPUBuddyAllocator(pl.device)->Free(p); +} + +template <> +size_t Used<CPUPlace>(CPUPlace) { + return GetCPUBuddyAllocator()->Used(); +} + +template <> +size_t Alloc<GPUPlace>(GPUPlace pl) { + return GetGPUBuddyAllocator(pl.device)->Used(); +} + +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h new file mode 100644 index 0000000000..ae8ac6ca52 --- /dev/null +++ b/paddle/memory/memory.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/frameowork/place.h" + +namespace paddle { +namespace memory { + +typename<typename paddle::framework::Place> void* Alloc(Place, size_t); +typename<typename paddle::framework::Place> void Free(Place, void*); +typename<typename paddle::framework::Place> size_t Used(Place); + +} // namespace memory +} // namespace paddle diff --git a/paddle/platform/place.cc b/paddle/platform/place.cc index 1afd03c011..0704820aa0 100644 --- a/paddle/platform/place.cc +++ b/paddle/platform/place.cc @@ -8,8 +8,8 @@ namespace detail { class PlacePrinter : public boost::static_visitor<> { public: PlacePrinter(std::ostream &os) : os_(os) {} - void operator()(const CpuPlace &) { os_ << "CpuPlace"; } - void operator()(const GpuPlace &p) { os_ << "GpuPlace(" << p.device << ")"; } + void operator()(const CPUPlace &) { os_ << "CPUPlace"; } + void operator()(const GPUPlace &p) { os_ << "GPUPlace(" << p.device << ")"; } private: std::ostream &os_; @@ -22,14 +22,14 @@ static Place the_default_place; void set_place(const Place &place) { the_default_place = place; } const Place &get_place() { return the_default_place; } -const GpuPlace default_gpu() { return GpuPlace(0); } -const CpuPlace default_cpu() { return CpuPlace(); } +const GPUPlace default_gpu() { return GPUPlace(0); } +const CPUPlace default_cpu() { return CPUPlace(); } bool is_gpu_place(const Place &p) { - return boost::apply_visitor(IsGpuPlace(), p); + return boost::apply_visitor(IsGPUPlace(), p); } bool is_cpu_place(const Place &p) { - return !boost::apply_visitor(IsGpuPlace(), p); + return !boost::apply_visitor(IsGPUPlace(), p); } bool places_are_same_class(const Place &p1, const Place &p2) { diff --git a/paddle/platform/place.h b/paddle/platform/place.h index 489572c526..7cead18388 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -1,43 +1,58 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + #pragma once + #include <boost/variant.hpp> #include <iostream> namespace paddle { namespace platform { -struct CpuPlace { +struct CPUPlace { // WORKAROUND: for some reason, omitting this constructor // causes errors with boost 1.59 and OSX - CpuPlace() {} + CPUPlace() {} // needed for variant equality comparison - inline bool operator==(const CpuPlace &) const { return true; } - inline bool operator!=(const CpuPlace &) const { return false; } + inline bool operator==(const CPUPlace &) const { return true; } + inline bool operator!=(const CPUPlace &) const { return false; } }; -struct GpuPlace { - GpuPlace() : GpuPlace(0) {} - GpuPlace(int d) : device(d) {} +struct GPUPlace { + GPUPlace() : GPUPlace(0) {} + GPUPlace(int d) : device(d) {} // needed for variant equality comparison - inline bool operator==(const GpuPlace &o) const { return device == o.device; } - inline bool operator!=(const GpuPlace &o) const { return !(*this == o); } + inline bool operator==(const GPUPlace &o) const { return device == o.device; } + inline bool operator!=(const GPUPlace &o) const { return !(*this == o); } int device; }; -struct IsGpuPlace : public boost::static_visitor<bool> { - bool operator()(const CpuPlace &) const { return false; } - bool operator()(const GpuPlace &gpu) const { return true; } +struct IsGPUPlace : public boost::static_visitor<bool> { + bool operator()(const CPUPlace &) const { return false; } + bool operator()(const GPUPlace &gpu) const { return true; } }; -typedef boost::variant<GpuPlace, CpuPlace> Place; +typedef boost::variant<GPUPlace, CPUPlace> Place; void set_place(const Place &); const Place &get_place(); -const GpuPlace default_gpu(); -const CpuPlace default_cpu(); +const GPUPlace default_gpu(); +const CPUPlace default_cpu(); bool is_gpu_place(const Place &); bool is_cpu_place(const Place &); diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc index 73fccceedf..33e2e5a439 100644 --- a/paddle/platform/place_test.cc +++ b/paddle/platform/place_test.cc @@ -3,8 +3,8 @@ #include "gtest/gtest.h" TEST(Place, Equality) { - paddle::platform::CpuPlace cpu; - paddle::platform::GpuPlace g0(0), g1(1), gg0(0); + paddle::platform::CPUPlace cpu; + paddle::platform::GPUPlace g0(0), g1(1), gg0(0); EXPECT_EQ(cpu, cpu); EXPECT_EQ(g0, g0); @@ -22,19 +22,19 @@ TEST(Place, Default) { EXPECT_TRUE(paddle::platform::is_gpu_place(paddle::platform::default_gpu())); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::default_cpu())); - paddle::platform::set_place(paddle::platform::CpuPlace()); + paddle::platform::set_place(paddle::platform::CPUPlace()); EXPECT_TRUE(paddle::platform::is_cpu_place(paddle::platform::get_place())); } TEST(Place, Print) { { std::stringstream ss; - ss << paddle::platform::GpuPlace(1); - EXPECT_EQ("GpuPlace(1)", ss.str()); + ss << paddle::platform::GPUPlace(1); + EXPECT_EQ("GPUPlace(1)", ss.str()); } { std::stringstream ss; - ss << paddle::platform::CpuPlace(); - EXPECT_EQ("CpuPlace", ss.str()); + ss << paddle::platform::CPUPlace(); + EXPECT_EQ("CPUPlace", ss.str()); } }