Merge pull request #3534 from gangliao/mem_release

FIX: Release CPU/GPU memory at the end of the Program
8 years ago · 812a64c083
parent 47f380bb47 94b58a29d6
commit 812a64c083
4 changed files with 49 additions and 20 deletions
--- a/paddle/memory/detail/system_allocator.cc
+++ b/paddle/memory/detail/system_allocator.cc
@ -27,7 +27,7 @@ limitations under the License. */
 // between host and device.  Allocates too much would reduce the amount
 // of memory available to the system for paging.  So, by default, we
 // should set false to use_pinned_memory.
-DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory.");
+DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");

 namespace paddle {
 namespace memory {
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@ -13,22 +13,32 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/memory/memory.h"
+
+#include <algorithm>  // for transform
+#include <cstring>    // for memcpy
+#include <mutex>      // for call_once
+
 #include "paddle/memory/detail/buddy_allocator.h"
 #include "paddle/memory/detail/system_allocator.h"

-#include <cstring>  // for memcpy
-
 namespace paddle {
 namespace memory {

-detail::BuddyAllocator* GetCPUBuddyAllocator() {
-  static detail::BuddyAllocator* a = nullptr;
-  if (a == nullptr) {
-    a = new detail::BuddyAllocator(new detail::CPUAllocator,
-                                   platform::CpuMinChunkSize(),
-                                   platform::CpuMaxChunkSize());
-  }
-  return a;
+using BuddyAllocator = detail::BuddyAllocator;
+
+std::once_flag cpu_allocator_flag;
+std::once_flag gpu_allocator_flag;
+
+BuddyAllocator* GetCPUBuddyAllocator() {
+  static std::unique_ptr<BuddyAllocator> a{nullptr};
+
+  std::call_once(cpu_allocator_flag, [&]() {
+    a.reset(new BuddyAllocator(new detail::CPUAllocator,
+                               platform::CpuMinChunkSize(),
+                               platform::CpuMaxChunkSize()));
+  });
+
+  return a.get();
 }

 template <>
@ -48,20 +58,31 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {

 #ifndef PADDLE_ONLY_CPU

-detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
-  static detail::BuddyAllocator** as = NULL;
-  if (as == NULL) {
+BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
+  using BuddyAllocVec = std::vector<BuddyAllocator*>;
+  static std::unique_ptr<BuddyAllocVec, void (*)(BuddyAllocVec * p)> as{
+      new BuddyAllocVec, [](BuddyAllocVec* p) {
+        std::for_each(p->begin(), p->end(),
+                      [](BuddyAllocator* p) { delete p; });
+      }};
+
+  // GPU buddy allocators
+  auto& allocators = *as.get();
+
+  // GPU buddy allocator initialization
+  std::call_once(gpu_allocator_flag, [&]() {
    int gpu_num = platform::GetDeviceCount();
-    as = new detail::BuddyAllocator*[gpu_num];
+    allocators.reserve(gpu_num);
    for (int gpu = 0; gpu < gpu_num; gpu++) {
      platform::SetDeviceId(gpu);
-      as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator,
-                                           platform::GpuMinChunkSize(),
-                                           platform::GpuMaxChunkSize());
+      allocators.emplace_back(new BuddyAllocator(new detail::GPUAllocator,
+                                                 platform::GpuMinChunkSize(),
+                                                 platform::GpuMaxChunkSize()));
    }
-  }
+  });
+
  platform::SetDeviceId(gpu_id);
-  return as[gpu_id];
+  return allocators[gpu_id];
 }

 template <>
--- a/paddle/operators/gather_test.cc
+++ b/paddle/operators/gather_test.cc
@ -45,4 +45,8 @@ TEST(Gather, GatherData) {

  for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
  for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
+
+  delete src;
+  delete index;
+  delete output;
 }
--- a/paddle/operators/scatter_test.cc
+++ b/paddle/operators/scatter_test.cc
@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) {
    EXPECT_EQ(output->data<float>()[i], float(i - 4));
  for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0));
  for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data<float>()[i], float(0));
+
+  delete src;
+  delete index;
+  delete output;
 }