Use OO style to rewrite memory allocation.panyx0718-patch-1
							parent
							
								
									643b6faa0c
								
							
						
					
					
						commit
						58ed412f68
					
				@ -1,15 +1,12 @@
 | 
				
			||||
add_subdirectory(detail)
 | 
				
			||||
 | 
				
			||||
cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce)
 | 
				
			||||
add_subdirectory(allocation)
 | 
				
			||||
cc_library(malloc SRCS malloc.cc DEPS allocator_facade)
 | 
				
			||||
cc_library(memcpy SRCS memcpy.cc DEPS place)
 | 
				
			||||
 | 
				
			||||
cc_library(memory
 | 
				
			||||
        DEPS
 | 
				
			||||
        malloc
 | 
				
			||||
        memcpy)
 | 
				
			||||
 | 
				
			||||
cc_test(malloc_test SRCS malloc_test.cc DEPS malloc)
 | 
				
			||||
 | 
				
			||||
#if (WITH_GPU)
 | 
				
			||||
#   nv_test(pinned_memory_test SRCS pinned_memory_test.cu  DEPS place memory)
 | 
				
			||||
#endif()
 | 
				
			||||
 | 
				
			||||
@ -0,0 +1,43 @@
 | 
				
			||||
cc_library(allocator SRCS allocator.cc DEPS place)
 | 
				
			||||
cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
 | 
				
			||||
cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
 | 
				
			||||
cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
 | 
				
			||||
nv_library(cuda_allocator SRCS cuda_allocator.cc DEPS allocator gpu_info)
 | 
				
			||||
 | 
				
			||||
if (WITH_GPU)
 | 
				
			||||
    nv_test(best_fit_allocator_test
 | 
				
			||||
            SRCS best_fit_allocator_test.cc
 | 
				
			||||
                 best_fit_allocator_test.cu
 | 
				
			||||
            DEPS best_fit_allocator
 | 
				
			||||
                 locked_allocator
 | 
				
			||||
                 cpu_allocator
 | 
				
			||||
                 cuda_allocator
 | 
				
			||||
                 device_context
 | 
				
			||||
                 memcpy)
 | 
				
			||||
else()
 | 
				
			||||
    cc_test(best_fit_allocator_test
 | 
				
			||||
            SRCS best_fit_allocator_test.cc
 | 
				
			||||
            DEPS best_fit_allocator
 | 
				
			||||
                 locked_allocator
 | 
				
			||||
                 cpu_allocator)
 | 
				
			||||
endif()
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
cc_library(naive_managed_allocator SRCS naive_managed_allocator.cc DEPS allocator)
 | 
				
			||||
cc_test(naive_managed_allocator_test SRCS naive_managed_allocator_test.cc DEPS naive_managed_allocator)
 | 
				
			||||
 | 
				
			||||
if (WITH_GPU)
 | 
				
			||||
    set(AllocatorFacadeDeps gpu_info cuda_allocator)
 | 
				
			||||
else ()
 | 
				
			||||
    set(AllocatorFacadeDeps)
 | 
				
			||||
endif()
 | 
				
			||||
 | 
				
			||||
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
 | 
				
			||||
 | 
				
			||||
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
 | 
				
			||||
        ${AllocatorFacadeDeps}
 | 
				
			||||
        cpu_allocator
 | 
				
			||||
        locked_allocator
 | 
				
			||||
        best_fit_allocator
 | 
				
			||||
        naive_managed_allocator
 | 
				
			||||
        aligned_allocator)
 | 
				
			||||
@ -0,0 +1,26 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
ThinAlignedAllocator::ThinAlignedAllocator(
 | 
				
			||||
    std::shared_ptr<ManagedAllocator> underlyning_allocator)
 | 
				
			||||
    : underlying_allocator_(std::move(underlyning_allocator)) {}
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,68 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
#include <memory>
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
template <size_t kAlignment>
 | 
				
			||||
class AlignedAllocation : public Allocation {
 | 
				
			||||
 public:
 | 
				
			||||
  AlignedAllocation(std::unique_ptr<Allocation>&& underlying_allocation,
 | 
				
			||||
                    size_t size)
 | 
				
			||||
      : Allocation(AlignedPtr(underlying_allocation->ptr()), size,
 | 
				
			||||
                   underlying_allocation->place()),
 | 
				
			||||
        underlying_allocation_(std::move(underlying_allocation)) {}
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  static void* AlignedPtr(void* ptr) {
 | 
				
			||||
    auto ptr_addr = reinterpret_cast<uintptr_t>(ptr);
 | 
				
			||||
    ptr_addr = (ptr_addr & ~(kAlignment - 1)) + kAlignment;
 | 
				
			||||
    return reinterpret_cast<void*>(ptr_addr);
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  std::unique_ptr<Allocation> underlying_allocation_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class ThinAlignedAllocator : public ManagedAllocator {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit ThinAlignedAllocator(
 | 
				
			||||
      std::shared_ptr<ManagedAllocator> underlyning_allocator);
 | 
				
			||||
 | 
				
			||||
 protected:
 | 
				
			||||
  std::shared_ptr<ManagedAllocator> underlying_allocator_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
template <size_t kAlignment>
 | 
				
			||||
class AlignedAllocator : public ThinAlignedAllocator {
 | 
				
			||||
 public:
 | 
				
			||||
  using ThinAlignedAllocator::ThinAlignedAllocator;
 | 
				
			||||
  std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
 | 
				
			||||
    auto raw_allocation =
 | 
				
			||||
        underlying_allocator_->Allocate(size + kAlignment, attr);
 | 
				
			||||
    return std::unique_ptr<Allocation>(
 | 
				
			||||
        new AlignedAllocation<kAlignment>(std::move(raw_allocation), size));
 | 
				
			||||
  }
 | 
				
			||||
  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
 | 
				
			||||
    return std::shared_ptr<Allocation>(Allocate(size, attr).release());
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,29 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
Allocation::~Allocation() {}
 | 
				
			||||
 | 
				
			||||
Allocator::~Allocator() {}
 | 
				
			||||
 | 
				
			||||
bool Allocator::IsAllocThreadSafe() const { return false; }
 | 
				
			||||
 | 
				
			||||
const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,93 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
#include <memory>
 | 
				
			||||
#include <string>
 | 
				
			||||
#include "paddle/fluid/platform/place.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class BadAlloc : public std::exception {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit BadAlloc(const std::string& msg) : msg_(msg) {}
 | 
				
			||||
  const char* what() const noexcept override;
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  std::string msg_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class Allocation {
 | 
				
			||||
 public:
 | 
				
			||||
  Allocation(void* ptr, size_t size, platform::Place place)
 | 
				
			||||
      : ptr_(ptr), size_(size), place_(place) {}
 | 
				
			||||
 | 
				
			||||
  Allocation(const Allocation& o) = delete;
 | 
				
			||||
  Allocation& operator=(const Allocation& o) = delete;
 | 
				
			||||
 | 
				
			||||
  void* ptr() const { return ptr_; }
 | 
				
			||||
 | 
				
			||||
  size_t size() const { return size_; }
 | 
				
			||||
 | 
				
			||||
  const platform::Place& place() const { return place_; }
 | 
				
			||||
 | 
				
			||||
  virtual ~Allocation();
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  void* ptr_;
 | 
				
			||||
  size_t size_;
 | 
				
			||||
  platform::Place place_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class Allocator {
 | 
				
			||||
 public:
 | 
				
			||||
  enum Attr {
 | 
				
			||||
    kDefault = 0,
 | 
				
			||||
    kTiny = 1,
 | 
				
			||||
    kFixedHuge = 2,
 | 
				
			||||
    kFluxHuge = 3,
 | 
				
			||||
    kTmp = 4,
 | 
				
			||||
    NumOfAttrs = 5
 | 
				
			||||
  };
 | 
				
			||||
 | 
				
			||||
  virtual ~Allocator();
 | 
				
			||||
  virtual std::unique_ptr<Allocation> Allocate(
 | 
				
			||||
      size_t size, Allocator::Attr attr = kDefault) = 0;
 | 
				
			||||
 | 
				
			||||
  virtual bool IsAllocThreadSafe() const;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
// User need to invoke `Free` or `FreeUniquePtr` manually if allocated by
 | 
				
			||||
// a manally managed allocator.
 | 
				
			||||
class UnmanagedAllocator : public Allocator {
 | 
				
			||||
 public:
 | 
				
			||||
  virtual void Free(Allocation* allocation) = 0;
 | 
				
			||||
 | 
				
			||||
  void FreeUniquePtr(std::unique_ptr<Allocation> allocation) {
 | 
				
			||||
    Free(allocation.get());
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
// The allocation will be managed by smart pointers
 | 
				
			||||
class ManagedAllocator : public Allocator {
 | 
				
			||||
 public:
 | 
				
			||||
  virtual std::shared_ptr<Allocation> AllocateShared(
 | 
				
			||||
      size_t size, Allocator::Attr attr = kDefault) = 0;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,102 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
#include <map>
 | 
				
			||||
#include <vector>
 | 
				
			||||
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator_facade.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
 | 
				
			||||
#include "paddle/fluid/platform/gpu_info.h"
 | 
				
			||||
#include "paddle/fluid/platform/place.h"
 | 
				
			||||
#ifdef PADDLE_WITH_CUDA
 | 
				
			||||
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
 | 
				
			||||
#endif
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class AllocatorFacadePrivate {
 | 
				
			||||
 public:
 | 
				
			||||
  std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
 | 
				
			||||
  std::vector<std::unique_ptr<Allocation>> pre_allocations_;
 | 
				
			||||
  std::vector<std::shared_ptr<Allocator>> holding_allocators_;
 | 
				
			||||
 | 
				
			||||
  ~AllocatorFacadePrivate() {
 | 
				
			||||
    // Specify destruct order.
 | 
				
			||||
    pre_allocations_.clear();
 | 
				
			||||
    allocators_.clear();
 | 
				
			||||
    holding_allocators_.clear();
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  AllocatorFacadePrivate() {
 | 
				
			||||
    InitCPUAllocator();
 | 
				
			||||
    InitCUDAAllocator();
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  void InitCPUAllocator() {
 | 
				
			||||
    auto all = NaiveManagedAllocator::Create(
 | 
				
			||||
        std::unique_ptr<Allocator>(new CPUAllocator()));
 | 
				
			||||
 | 
				
			||||
    allocators_[platform::CPUPlace()] = all;
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  void InitCUDAAllocator() {
 | 
				
			||||
#ifdef PADDLE_WITH_CUDA
 | 
				
			||||
    for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) {
 | 
				
			||||
      auto cuda_allocator =
 | 
				
			||||
          NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
 | 
				
			||||
              new CUDAAllocator(platform::CUDAPlace(dev_id))));
 | 
				
			||||
 | 
				
			||||
      auto allocation = cuda_allocator->Allocate(platform::GpuMaxChunkSize());
 | 
				
			||||
      auto allocator = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
 | 
				
			||||
          new LockedAllocator(std::unique_ptr<Allocator>(
 | 
				
			||||
              new BestFitAllocator(allocation.get())))));
 | 
				
			||||
 | 
				
			||||
      pre_allocations_.emplace_back(std::move(allocation));
 | 
				
			||||
      holding_allocators_.emplace_back(cuda_allocator);
 | 
				
			||||
      allocators_[platform::CUDAPlace(dev_id)] =
 | 
				
			||||
          std::make_shared<AlignedAllocator<64>>(std::move(allocator));
 | 
				
			||||
    }
 | 
				
			||||
#endif
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {}
 | 
				
			||||
AllocatorFacade::~AllocatorFacade() { delete m_; }
 | 
				
			||||
 | 
				
			||||
AllocatorFacade& AllocatorFacade::Instance() {
 | 
				
			||||
  static AllocatorFacade instance;
 | 
				
			||||
  return instance;
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
 | 
				
			||||
    const platform::Place& place, size_t size, Allocator::Attr attr) {
 | 
				
			||||
  return m_->allocators_[place]->AllocateShared(size, attr);
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
 | 
				
			||||
                                                   size_t size,
 | 
				
			||||
                                                   Allocator::Attr attr) {
 | 
				
			||||
  return m_->allocators_[place]->Allocate(size, attr);
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,47 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
#include <memory>
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
#include "paddle/fluid/platform/place.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class AllocatorFacadePrivate;
 | 
				
			||||
class AllocatorFacade {
 | 
				
			||||
 public:
 | 
				
			||||
  ~AllocatorFacade();
 | 
				
			||||
  AllocatorFacade(const AllocatorFacade& o) = delete;
 | 
				
			||||
  const AllocatorFacade& operator=(const AllocatorFacade& o) = delete;
 | 
				
			||||
 | 
				
			||||
  static AllocatorFacade& Instance();
 | 
				
			||||
 | 
				
			||||
  std::shared_ptr<Allocation> AllocShared(
 | 
				
			||||
      const platform::Place& place, size_t size,
 | 
				
			||||
      Allocator::Attr attr = Allocator::kDefault);
 | 
				
			||||
 | 
				
			||||
  std::unique_ptr<Allocation> Alloc(const platform::Place& place, size_t size,
 | 
				
			||||
                                    Allocator::Attr attr = Allocator::kDefault);
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  AllocatorFacade();
 | 
				
			||||
  AllocatorFacadePrivate* m_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,169 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 | 
				
			||||
#include <bits/stdc++.h>
 | 
				
			||||
#include <list>
 | 
				
			||||
#include <map>
 | 
				
			||||
#include <string>
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
static int HighestBitPos(size_t N) {
 | 
				
			||||
  if (UNLIKELY(N == 0)) {
 | 
				
			||||
    return 0;
 | 
				
			||||
  } else {
 | 
				
			||||
    // NOTE: here we can use __builtin_clz in GCC.
 | 
				
			||||
    // However, let's use std::log2 for better readability
 | 
				
			||||
    // and trust std::log2's performance.
 | 
				
			||||
    return static_cast<int>(std::log2(N) + 1);
 | 
				
			||||
  }
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
BestFitAllocator::BestFitAllocator(Allocation* allocation)
 | 
				
			||||
    : allocation_(allocation) {
 | 
				
			||||
  details::Chunk chunk;
 | 
				
			||||
  chunk.size_ = allocation_->size();
 | 
				
			||||
  chunk.offset_ = 0;
 | 
				
			||||
  chunk.is_free = true;
 | 
				
			||||
  chunks_.emplace_back(chunk);
 | 
				
			||||
  free_chunks_[HighestBitPos(chunk.size_)].insert(
 | 
				
			||||
      {chunk.size_, chunks_.begin()});
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
std::unique_ptr<Allocation> BestFitAllocator::Allocate(size_t size, Attr attr) {
 | 
				
			||||
  auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
 | 
				
			||||
  MapIt map_it;
 | 
				
			||||
  for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
 | 
				
			||||
    map_it = free_chunks_[highest_set_bit].lower_bound(size);
 | 
				
			||||
    if (map_it != free_chunks_[highest_set_bit].end()) {
 | 
				
			||||
      break;
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
  if (UNLIKELY(highest_set_bit == free_chunks_.size())) {
 | 
				
			||||
    throw BadAlloc(string::Sprintf(
 | 
				
			||||
        "Cannot allocate %d, All fragments size is %d", size, FreeSize()));
 | 
				
			||||
  }
 | 
				
			||||
  auto chunk_it = SplitChunk(size, highest_set_bit, map_it);
 | 
				
			||||
  return std::unique_ptr<Allocation>(new BestFitAllocation(this, chunk_it));
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
size_t BestFitAllocator::FreeSize() const {
 | 
				
			||||
  size_t acc = 0;
 | 
				
			||||
  for (auto& array_item : free_chunks_) {
 | 
				
			||||
    for (auto& pair : array_item) {
 | 
				
			||||
      acc += pair.second->size_;
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
  return acc;
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
BestFitAllocator::ListIt BestFitAllocator::SplitChunk(size_t request_size,
 | 
				
			||||
                                                      size_t free_chunk_offset,
 | 
				
			||||
                                                      MapIt bin_iterator) {
 | 
				
			||||
  auto to_split_it = bin_iterator->second;
 | 
				
			||||
  free_chunks_[free_chunk_offset].erase(bin_iterator);
 | 
				
			||||
 | 
				
			||||
  PADDLE_ENFORCE(to_split_it->is_free);
 | 
				
			||||
  PADDLE_ENFORCE_GE(to_split_it->size_, request_size);
 | 
				
			||||
 | 
				
			||||
  auto remaining_size = to_split_it->size_ - request_size;
 | 
				
			||||
  details::Chunk to_use;
 | 
				
			||||
  details::Chunk remaining;
 | 
				
			||||
  to_use.size_ = request_size;
 | 
				
			||||
  to_use.is_free = false;
 | 
				
			||||
  remaining.size_ = remaining_size;
 | 
				
			||||
  remaining.is_free = true;
 | 
				
			||||
 | 
				
			||||
  // calc offsets
 | 
				
			||||
  to_use.offset_ = to_split_it->offset_;
 | 
				
			||||
  remaining.offset_ = to_use.offset_ + to_use.size_;
 | 
				
			||||
 | 
				
			||||
  // insert to chunk list
 | 
				
			||||
  auto to_use_it = chunks_.insert(to_split_it, to_use);
 | 
				
			||||
  if (remaining.size_ != 0) {
 | 
				
			||||
    auto bit_size = static_cast<size_t>(HighestBitPos(remaining.size_));
 | 
				
			||||
    free_chunks_[bit_size].insert(
 | 
				
			||||
        {remaining.size_, chunks_.insert(to_split_it, remaining)});
 | 
				
			||||
  }
 | 
				
			||||
  chunks_.erase(to_split_it);
 | 
				
			||||
  return to_use_it;
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
void BestFitAllocator::Free(Allocation* allocation) {
 | 
				
			||||
  auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
 | 
				
			||||
  auto chunk_it = bf_allocation->ChunkIterator();
 | 
				
			||||
  PADDLE_ENFORCE(!chunk_it->is_free);
 | 
				
			||||
  chunk_it->is_free = true;
 | 
				
			||||
  if (chunk_it != chunks_.begin()) {
 | 
				
			||||
    auto prev_it = chunk_it;
 | 
				
			||||
    --prev_it;
 | 
				
			||||
 | 
				
			||||
    if (prev_it->is_free) {
 | 
				
			||||
      // Merge Left.
 | 
				
			||||
      EraseFreeNode(prev_it);
 | 
				
			||||
      prev_it->size_ += chunk_it->size_;
 | 
				
			||||
      chunks_.erase(chunk_it);
 | 
				
			||||
      chunk_it = prev_it;
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  auto next_it = chunk_it;
 | 
				
			||||
  ++next_it;
 | 
				
			||||
  if (next_it != chunks_.end() && next_it->is_free) {
 | 
				
			||||
    EraseFreeNode(next_it);
 | 
				
			||||
    chunk_it->size_ += next_it->size_;
 | 
				
			||||
    chunks_.erase(next_it);
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  InsertFreeNode(chunk_it);
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
void BestFitAllocator::InsertFreeNode(const ListIt& it) {
 | 
				
			||||
  auto pos = static_cast<size_t>(HighestBitPos(it->size_));
 | 
				
			||||
  auto& free_map = free_chunks_[pos];
 | 
				
			||||
  free_map.insert({it->size_, it});
 | 
				
			||||
}
 | 
				
			||||
void BestFitAllocator::EraseFreeNode(const ListIt& it) {
 | 
				
			||||
  size_t pos = static_cast<size_t>(HighestBitPos(it->size_));
 | 
				
			||||
  auto& free_map = free_chunks_[pos];
 | 
				
			||||
  auto map_it = free_map.find(it->size_);
 | 
				
			||||
  while (map_it->second != it && map_it != free_map.end()) {
 | 
				
			||||
    ++map_it;
 | 
				
			||||
  }
 | 
				
			||||
  PADDLE_ENFORCE(map_it != free_map.end());
 | 
				
			||||
  free_map.erase(map_it);
 | 
				
			||||
}
 | 
				
			||||
size_t BestFitAllocator::NumFreeChunks() const {
 | 
				
			||||
  size_t num = 0;
 | 
				
			||||
  for (auto& array_item : free_chunks_) {
 | 
				
			||||
    num += array_item.size();
 | 
				
			||||
  }
 | 
				
			||||
  return num;
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
BestFitAllocation::BestFitAllocation(
 | 
				
			||||
    paddle::memory::allocation::BestFitAllocator* allocator,
 | 
				
			||||
    typename details::ChunkList::iterator chunk_it)
 | 
				
			||||
    : Allocation(reinterpret_cast<void*>(
 | 
				
			||||
                     reinterpret_cast<uintptr_t>(allocator->BasePtr()) +
 | 
				
			||||
                     chunk_it->offset_),
 | 
				
			||||
                 chunk_it->size_, allocator->Place()),
 | 
				
			||||
      allocator_(allocator),
 | 
				
			||||
      chunk_it_(chunk_it) {}
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,132 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
#include <array>
 | 
				
			||||
#include <list>
 | 
				
			||||
#include <map>
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
namespace details {
 | 
				
			||||
struct Chunk {
 | 
				
			||||
  bool is_free{true};
 | 
				
			||||
  // Offset to the base allocation.
 | 
				
			||||
  uintptr_t offset_;
 | 
				
			||||
  size_t size_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
// Here we use std::list to maintain chunk list.
 | 
				
			||||
// NOTE(yy): The traditional implementation of ChunkList is add `prev`/`next`
 | 
				
			||||
// pointers in `Chunk`, and split the allocation as `ChunkHeader` and
 | 
				
			||||
// `Payload`. Such as
 | 
				
			||||
//   *-------*---------------*---------------*--------------*
 | 
				
			||||
//   | Chunk | prev_ pointer | next_ pointer | payload .... |
 | 
				
			||||
//   *-------*---------------*---------------*--------------*
 | 
				
			||||
// This implementation can just return a raw pointer, and we can get the list
 | 
				
			||||
// structure by it. However, we cannot use the same code on GPU since CPU
 | 
				
			||||
// cannot access GPU memory directly.
 | 
				
			||||
//
 | 
				
			||||
// So we choose to use `std::list` and return an allocation instance, which
 | 
				
			||||
// contains the list node iterator, then we can unify CPU/GPU code.
 | 
				
			||||
//
 | 
				
			||||
// To return an allocation is not a bad idea, since Tensor/Vector should holds
 | 
				
			||||
// an allocation instead of raw pointer directly.
 | 
				
			||||
using ChunkList = std::list<Chunk>;
 | 
				
			||||
 | 
				
			||||
// Here we use a multi-level map of free chunks.
 | 
				
			||||
// the map is
 | 
				
			||||
//      MSB offset --> size --> [ChunkList::iterator]
 | 
				
			||||
//
 | 
				
			||||
// The time complexities:
 | 
				
			||||
//     find a free chunk:
 | 
				
			||||
//          O(logN),
 | 
				
			||||
//               where N is the number of free nodes with the same MSB offset.
 | 
				
			||||
//     find the position of a chunk iterator:
 | 
				
			||||
//          O(logN + K),
 | 
				
			||||
//               where N is the number of free nodes with the same MSB offset.
 | 
				
			||||
//               where K is the number of free nodes with the same size.
 | 
				
			||||
//     insert a free chunk:
 | 
				
			||||
//          O(logN),
 | 
				
			||||
//               where N is the number of free nodes with the same MSB offset.
 | 
				
			||||
//     erase a free chunk:
 | 
				
			||||
//          O(1)
 | 
				
			||||
using FreeChunkBin =
 | 
				
			||||
    std::array<std::multimap<size_t, ChunkList::iterator>, sizeof(size_t) * 8>;
 | 
				
			||||
}  // namespace details
 | 
				
			||||
 | 
				
			||||
class BestFitAllocator;
 | 
				
			||||
 | 
				
			||||
// The BestFitAllocation maintain the List Node iterator.
 | 
				
			||||
class BestFitAllocation : public Allocation {
 | 
				
			||||
 private:
 | 
				
			||||
  using ListIt = typename details::ChunkList::iterator;
 | 
				
			||||
 | 
				
			||||
 public:
 | 
				
			||||
  BestFitAllocation(BestFitAllocator* allocator, ListIt chunk_it);
 | 
				
			||||
 | 
				
			||||
  const ListIt& ChunkIterator() const { return chunk_it_; }
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  BestFitAllocator* allocator_;
 | 
				
			||||
  typename details::ChunkList::iterator chunk_it_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
// TODO(yy): Current BestFitAllocator is not thread-safe. To make it thread
 | 
				
			||||
// safe, we must wrap a locked_allocator. However, we can implement a thread
 | 
				
			||||
// safe allocator by locking each bin and chunks list independently. It will
 | 
				
			||||
// make BestFitAllocator faster in multi-thread situation.
 | 
				
			||||
//
 | 
				
			||||
// This allocator implements a best-fit allocator with merging the free nodes.
 | 
				
			||||
//
 | 
				
			||||
// To allocate a buffer, it will find the best-fit chunk. If the best-fit chunk
 | 
				
			||||
// is larger than request size, the original block will be split into two
 | 
				
			||||
// chunks. The first block will be used and the second block will be put into
 | 
				
			||||
// free chunks.
 | 
				
			||||
//
 | 
				
			||||
// To free an allocation, it will set the chunk of allocation to free and merge
 | 
				
			||||
// the prev-chunk and the next-chunk when possible.
 | 
				
			||||
class BestFitAllocator : public UnmanagedAllocator {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit BestFitAllocator(Allocation* allocation);
 | 
				
			||||
 | 
				
			||||
  void* BasePtr() const { return allocation_->ptr(); }
 | 
				
			||||
 | 
				
			||||
  const platform::Place& Place() const { return allocation_->place(); }
 | 
				
			||||
 | 
				
			||||
  std::unique_ptr<Allocation> Allocate(size_t size,
 | 
				
			||||
                                       Attr attr = kDefault) override;
 | 
				
			||||
  void Free(Allocation* allocation) override;
 | 
				
			||||
 | 
				
			||||
  size_t NumFreeChunks() const;
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  size_t FreeSize() const;
 | 
				
			||||
  using MapIt = typename details::FreeChunkBin::value_type::iterator;
 | 
				
			||||
  using ListIt = typename details::ChunkList::iterator;
 | 
				
			||||
 | 
				
			||||
  ListIt SplitChunk(size_t request_size, size_t free_chunk_offset,
 | 
				
			||||
                    MapIt bin_iterator);
 | 
				
			||||
  void EraseFreeNode(const ListIt& it);
 | 
				
			||||
  void InsertFreeNode(const ListIt& it);
 | 
				
			||||
 | 
				
			||||
  Allocation* allocation_;  // not owned
 | 
				
			||||
  details::ChunkList chunks_;
 | 
				
			||||
  details::FreeChunkBin free_chunks_;
 | 
				
			||||
};
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,144 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 | 
				
			||||
#include <thread>  // NOLINT
 | 
				
			||||
#include <vector>
 | 
				
			||||
#include "gtest/gtest.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class StubAllocation : public Allocation {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit StubAllocation(size_t size)
 | 
				
			||||
      : Allocation(0, size, platform::CPUPlace()) {}
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
TEST(BestFitAllocator, test_allocation) {
 | 
				
			||||
  StubAllocation stub(4UL * 1024 * 1024 * 1024);
 | 
				
			||||
  BestFitAllocator allocator(&stub);
 | 
				
			||||
  {
 | 
				
			||||
    auto allocation = allocator.Allocate(64);
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation));
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  {
 | 
				
			||||
    auto allocation = allocator.Allocate(80);
 | 
				
			||||
 | 
				
			||||
    {
 | 
				
			||||
      auto best_fit_allocation =
 | 
				
			||||
          dynamic_cast<BestFitAllocation*>(allocation.get());
 | 
				
			||||
      ASSERT_NE(best_fit_allocation, nullptr);
 | 
				
			||||
      ASSERT_FALSE(best_fit_allocation->ChunkIterator()->is_free);
 | 
				
			||||
      ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
 | 
				
			||||
      ASSERT_EQ(allocation->size(), 80);
 | 
				
			||||
      ASSERT_EQ(allocation->ptr(), nullptr);
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    auto allocation2 = allocator.Allocate(60);
 | 
				
			||||
    auto allocation3 = allocator.Allocate(90);
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation2));
 | 
				
			||||
    allocation2 = allocator.Allocate(30);
 | 
				
			||||
 | 
				
			||||
    {
 | 
				
			||||
      auto best_fit_allocation =
 | 
				
			||||
          dynamic_cast<BestFitAllocation*>(allocation2.get());
 | 
				
			||||
      ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
 | 
				
			||||
    }
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation2));
 | 
				
			||||
 | 
				
			||||
    allocation2 = allocator.Allocate(60);
 | 
				
			||||
 | 
				
			||||
    {
 | 
				
			||||
      auto best_fit_allocation =
 | 
				
			||||
          dynamic_cast<BestFitAllocation*>(allocation2.get());
 | 
				
			||||
      ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation));
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation2));
 | 
				
			||||
 | 
				
			||||
    allocation = allocator.Allocate(80 + 60);
 | 
				
			||||
    {
 | 
				
			||||
      auto best_fit_allocation =
 | 
				
			||||
          dynamic_cast<BestFitAllocation*>(allocation.get());
 | 
				
			||||
      ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 0);
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation));
 | 
				
			||||
 | 
				
			||||
    allocation = allocator.Allocate(80);
 | 
				
			||||
    allocation2 = allocator.Allocate(60);
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation));
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation3));
 | 
				
			||||
    allocator.FreeUniquePtr(std::move(allocation2));
 | 
				
			||||
 | 
				
			||||
    ASSERT_EQ(allocator.NumFreeChunks(), 1U);
 | 
				
			||||
  }
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
 | 
				
			||||
  CPUAllocator allocator;
 | 
				
			||||
  auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
 | 
				
			||||
 | 
				
			||||
  std::unique_ptr<Allocator> best_fit_allocator(
 | 
				
			||||
      new BestFitAllocator(global_allocation.get()));
 | 
				
			||||
 | 
				
			||||
  LockedAllocator locked_allocator(std::move(best_fit_allocator));
 | 
				
			||||
 | 
				
			||||
  auto th_main = [&] {
 | 
				
			||||
    std::random_device dev;
 | 
				
			||||
    std::default_random_engine engine(dev());
 | 
				
			||||
    std::uniform_int_distribution<size_t> dist(1U, 1024U);
 | 
				
			||||
 | 
				
			||||
    for (size_t i = 0; i < 128; ++i) {
 | 
				
			||||
      size_t allocate_size = dist(engine);
 | 
				
			||||
 | 
				
			||||
      auto allocation =
 | 
				
			||||
          locked_allocator.Allocate(sizeof(size_t) * allocate_size);
 | 
				
			||||
 | 
				
			||||
      size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 | 
				
			||||
 | 
				
			||||
      for (size_t j = 0; j < allocate_size; ++j) {
 | 
				
			||||
        data[j] = j;
 | 
				
			||||
      }
 | 
				
			||||
      std::this_thread::yield();
 | 
				
			||||
 | 
				
			||||
      for (size_t j = 0; j < allocate_size; ++j) {
 | 
				
			||||
        ASSERT_EQ(data[j], j);
 | 
				
			||||
      }
 | 
				
			||||
 | 
				
			||||
      locked_allocator.FreeUniquePtr(std::move(allocation));
 | 
				
			||||
    }
 | 
				
			||||
  };
 | 
				
			||||
  {
 | 
				
			||||
    std::vector<std::thread> threads;
 | 
				
			||||
    for (size_t i = 0; i < 1024; ++i) {
 | 
				
			||||
      threads.emplace_back(th_main);
 | 
				
			||||
    }
 | 
				
			||||
    for (auto& th : threads) {
 | 
				
			||||
      th.join();
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  allocator.FreeUniquePtr(std::move(global_allocation));
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,88 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include <thread>  // NOLINT
 | 
				
			||||
#include <vector>
 | 
				
			||||
#include "gtest/gtest.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
 | 
				
			||||
#include "paddle/fluid/memory/memcpy.h"
 | 
				
			||||
#include "paddle/fluid/platform/for_range.h"
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
struct ForEachFill {
 | 
				
			||||
  size_t* ptr_;
 | 
				
			||||
 | 
				
			||||
  explicit ForEachFill(size_t* ptr) : ptr_(ptr) {}
 | 
				
			||||
 | 
				
			||||
  __device__ void operator()(size_t i) { ptr_[i] = i; }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
TEST(BestFitAllocator, concurrent_cuda) {
 | 
				
			||||
  CUDAAllocator allocator(platform::CUDAPlace(0));
 | 
				
			||||
  // 256 MB
 | 
				
			||||
  auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
 | 
				
			||||
  LockedAllocator concurrent_allocator(
 | 
				
			||||
      std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
 | 
				
			||||
 | 
				
			||||
  auto th_main = [&] {
 | 
				
			||||
    std::random_device dev;
 | 
				
			||||
    std::default_random_engine engine(dev());
 | 
				
			||||
    std::uniform_int_distribution<size_t> dist(1U, 1024U);
 | 
				
			||||
    platform::CUDAPlace gpu(0);
 | 
				
			||||
    platform::CUDADeviceContext dev_ctx(gpu);
 | 
				
			||||
    std::array<size_t, 1024> buf;
 | 
				
			||||
    for (size_t i = 0; i < 128; ++i) {
 | 
				
			||||
      size_t allocate_size = dist(engine);
 | 
				
			||||
 | 
				
			||||
      auto allocation =
 | 
				
			||||
          concurrent_allocator.Allocate(sizeof(size_t) * allocate_size);
 | 
				
			||||
 | 
				
			||||
      size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 | 
				
			||||
 | 
				
			||||
      ForEachFill fill(data);
 | 
				
			||||
      platform::ForRange<platform::CUDADeviceContext> for_range(dev_ctx,
 | 
				
			||||
                                                                allocate_size);
 | 
				
			||||
      for_range(fill);
 | 
				
			||||
 | 
				
			||||
      memory::Copy(platform::CPUPlace(), buf.data(), gpu, data,
 | 
				
			||||
                   sizeof(size_t) * allocate_size, dev_ctx.stream());
 | 
				
			||||
 | 
				
			||||
      dev_ctx.Wait();
 | 
				
			||||
      for (size_t j = 0; j < allocate_size; ++j) {
 | 
				
			||||
        ASSERT_EQ(buf[j], j);
 | 
				
			||||
      }
 | 
				
			||||
 | 
				
			||||
      concurrent_allocator.FreeUniquePtr(std::move(allocation));
 | 
				
			||||
    }
 | 
				
			||||
  };
 | 
				
			||||
 | 
				
			||||
  {
 | 
				
			||||
    std::vector<std::thread> threads;
 | 
				
			||||
    for (size_t i = 0; i < 1024; ++i) {
 | 
				
			||||
      threads.emplace_back(th_main);
 | 
				
			||||
    }
 | 
				
			||||
    for (auto& th : threads) {
 | 
				
			||||
      th.join();
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
  allocator.FreeUniquePtr(std::move(cuda_allocation));
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,40 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
 | 
				
			||||
#include <stdlib.h>
 | 
				
			||||
#include <string>
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
std::unique_ptr<Allocation> CPUAllocator::Allocate(size_t size, Attr attr) {
 | 
				
			||||
  void* ptr;
 | 
				
			||||
  auto status = posix_memalign(&ptr, kAlignment, size);
 | 
				
			||||
  if (UNLIKELY(status) != 0) {
 | 
				
			||||
    throw BadAlloc(string::Sprintf("Cannot allocate cpu memory %d. Errno is %d",
 | 
				
			||||
                                   size, status));
 | 
				
			||||
  }
 | 
				
			||||
  return std::unique_ptr<Allocation>(new CPUAllocation(ptr, size));
 | 
				
			||||
}
 | 
				
			||||
void CPUAllocator::Free(Allocation* allocation) {
 | 
				
			||||
  PADDLE_ENFORCE_NOT_NULL(dynamic_cast<CPUAllocation*>(allocation));
 | 
				
			||||
  free(allocation->ptr());
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
bool CPUAllocator::IsAllocThreadSafe() const { return true; }
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,38 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class CPUAllocation : public Allocation {
 | 
				
			||||
 public:
 | 
				
			||||
  CPUAllocation(void* ptr, size_t size)
 | 
				
			||||
      : Allocation(ptr, size, platform::CPUPlace()) {}
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class CPUAllocator : public UnmanagedAllocator {
 | 
				
			||||
 public:
 | 
				
			||||
  constexpr static size_t kAlignment = 64u;
 | 
				
			||||
  std::unique_ptr<Allocation> Allocate(size_t size,
 | 
				
			||||
                                       Attr attr = kDefault) override;
 | 
				
			||||
  void Free(Allocation* allocation) override;
 | 
				
			||||
  bool IsAllocThreadSafe() const override;
 | 
				
			||||
};
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,69 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
 | 
				
			||||
#include <cuda.h>
 | 
				
			||||
#include <cuda_runtime.h>
 | 
				
			||||
#include <string>
 | 
				
			||||
#include "paddle/fluid/platform/gpu_info.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class CUDADeviceGuard {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit CUDADeviceGuard(int dev_id) {
 | 
				
			||||
    int prev_id = platform::GetCurrentDeviceId();
 | 
				
			||||
    if (prev_id != dev_id) {
 | 
				
			||||
      prev_id_ = prev_id;
 | 
				
			||||
      platform::SetDeviceId(dev_id);
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  ~CUDADeviceGuard() {
 | 
				
			||||
    if (prev_id_ != -1) {
 | 
				
			||||
      platform::SetDeviceId(prev_id_);
 | 
				
			||||
    }
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  int prev_id_{-1};
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
std::unique_ptr<Allocation> CUDAAllocator::Allocate(size_t size, Attr attr) {
 | 
				
			||||
  CUDADeviceGuard guard(place_.device);
 | 
				
			||||
  void* ptr;
 | 
				
			||||
  auto status = cudaMalloc(&ptr, size);
 | 
				
			||||
  if (UNLIKELY(status != cudaSuccess)) {
 | 
				
			||||
    throw BadAlloc(string::Sprintf(
 | 
				
			||||
        "Cannot allocate %d on GPU %d, cuda status %d, %s", size, place_.device,
 | 
				
			||||
        status, cudaGetErrorString(status)));
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
  return std::unique_ptr<Allocation>(
 | 
				
			||||
      new CUDAAllocation(ptr, size, platform::Place(place_)));
 | 
				
			||||
}
 | 
				
			||||
 | 
				
			||||
void CUDAAllocator::Free(Allocation* allocation) {
 | 
				
			||||
  auto* cuda_allocation = dynamic_cast<CUDAAllocation*>(allocation);
 | 
				
			||||
  PADDLE_ENFORCE_NOT_NULL(cuda_allocation);
 | 
				
			||||
  PADDLE_ENFORCE_EQ(boost::get<platform::CUDAPlace>(cuda_allocation->place()),
 | 
				
			||||
                    place_);
 | 
				
			||||
  PADDLE_ENFORCE(cudaFree(allocation->ptr()));
 | 
				
			||||
}
 | 
				
			||||
bool CUDAAllocator::IsAllocThreadSafe() const { return true; }
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,45 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
#include "paddle/fluid/platform/place.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
// Just a flag type.
 | 
				
			||||
class CUDAAllocation : public Allocation {
 | 
				
			||||
 public:
 | 
				
			||||
  using Allocation::Allocation;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class CUDAAllocator : public UnmanagedAllocator {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit CUDAAllocator(const platform::CUDAPlace& place) : place_(place) {}
 | 
				
			||||
  explicit CUDAAllocator(const platform::Place& place)
 | 
				
			||||
      : place_(boost::get<platform::CUDAPlace>(place)) {}
 | 
				
			||||
  std::unique_ptr<Allocation> Allocate(size_t size,
 | 
				
			||||
                                       Attr attr = kDefault) override;
 | 
				
			||||
  void Free(Allocation* allocation) override;
 | 
				
			||||
  bool IsAllocThreadSafe() const override;
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  platform::CUDAPlace place_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,49 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/memory/allocation/locked_allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
std::unique_ptr<Allocation> LockedAllocator::Allocate(size_t size, Attr attr) {
 | 
				
			||||
  if (underlying_allocator_->IsAllocThreadSafe()) {
 | 
				
			||||
    return underlying_allocator_->Allocate(size, attr);
 | 
				
			||||
  } else {
 | 
				
			||||
    std::lock_guard<std::mutex> guard(mtx_);
 | 
				
			||||
    return underlying_allocator_->Allocate(size, attr);
 | 
				
			||||
  }
 | 
				
			||||
}
 | 
				
			||||
void LockedAllocator::Free(Allocation *allocation) {
 | 
				
			||||
  if (underlying_allocator_->IsAllocThreadSafe()) {
 | 
				
			||||
    return underlying_allocator_->Free(allocation);
 | 
				
			||||
  } else {
 | 
				
			||||
    std::lock_guard<std::mutex> guard(mtx_);
 | 
				
			||||
    return underlying_allocator_->Free(allocation);
 | 
				
			||||
  }
 | 
				
			||||
}
 | 
				
			||||
bool LockedAllocator::IsAllocThreadSafe() const { return true; }
 | 
				
			||||
 | 
				
			||||
LockedAllocator::LockedAllocator(
 | 
				
			||||
    std::unique_ptr<Allocator> &&underlying_allocator) {
 | 
				
			||||
  auto *allocator =
 | 
				
			||||
      dynamic_cast<UnmanagedAllocator *>(underlying_allocator.get());
 | 
				
			||||
  PADDLE_ENFORCE_NOT_NULL(allocator);
 | 
				
			||||
  underlying_allocator.release();
 | 
				
			||||
  underlying_allocator_.reset(allocator);
 | 
				
			||||
}
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,38 @@
 | 
				
			||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
//
 | 
				
			||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
// you may not use this file except in compliance with the License.
 | 
				
			||||
// You may obtain a copy of the License at
 | 
				
			||||
//
 | 
				
			||||
//     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
//
 | 
				
			||||
// Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
// See the License for the specific language governing permissions and
 | 
				
			||||
// limitations under the License.
 | 
				
			||||
#pragma once
 | 
				
			||||
#include <memory>
 | 
				
			||||
#include <thread>  // NOLINT
 | 
				
			||||
#include "paddle/fluid/memory/allocation/allocator.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace memory {
 | 
				
			||||
namespace allocation {
 | 
				
			||||
 | 
				
			||||
class LockedAllocator : public UnmanagedAllocator {
 | 
				
			||||
 public:
 | 
				
			||||
  explicit LockedAllocator(std::unique_ptr<Allocator>&& underlying_allocator);
 | 
				
			||||
  std::unique_ptr<Allocation> Allocate(size_t size,
 | 
				
			||||
                                       Attr attr = kDefault) override;
 | 
				
			||||
  void Free(Allocation* allocation) override;
 | 
				
			||||
  bool IsAllocThreadSafe() const override;
 | 
				
			||||
 | 
				
			||||
 private:
 | 
				
			||||
  std::unique_ptr<UnmanagedAllocator> underlying_allocator_;
 | 
				
			||||
  std::mutex mtx_;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace allocation
 | 
				
			||||
}  // namespace memory
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
Some files were not shown because too many files have changed in this diff Show More
					Loading…
					
					
				
		Reference in new issue