add unittest for allocator_facade.cc

7 years ago · 21fdf8e87d
parent 64d94596ab
commit 21fdf8e87d
8 changed files with 161 additions and 9 deletions
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@ -168,7 +168,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
    startup_exe = fluid.Executor(place)
    startup_exe.run(startup_prog)
    strategy = fluid.ExecutionStrategy()
-    strategy.num_threads = args.cpus
+    strategy.num_threads = 0  #args.cpus
    strategy.allow_op_delay = False
    build_strategy = fluid.BuildStrategy()
    if args.reduce_strategy == "reduce":
@ -187,6 +187,8 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
        num_trainers = 1
        trainer_id = 0
    print('Use parallel_executor')
    strategy.type = 2
    exe = fluid.ParallelExecutor(
        True,
        avg_loss.name,
--- a/benchmark/fluid/models/resnet.py
+++ b/benchmark/fluid/models/resnet.py
@ -172,7 +172,7 @@ def get_model(args, is_train, main_prog, startup_prog):
    reader, dshape, class_dim = _model_reader_dshape_classdim(args, is_train)
    pyreader = None
-    trainer_count = int(os.getenv("PADDLE_TRAINERS"))
+    trainer_count = int(os.getenv("PADDLE_TRAINERS", 1))
    with fluid.program_guard(main_prog, startup_prog):
        with fluid.unique_name.guard():
            if args.use_reader_op:
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@ -48,8 +48,11 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
        auto_increment_allocator
        zero_size_allocator
        conditional_allocator
        retry_allocator
        cuda_device_guard)
 nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
 cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator naive_managed_allocator best_fit_allocator locked_allocator cpu_allocator)
 cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
--- a/paddle/fluid/memory/allocation/aligned_allocator.cc
+++ b/paddle/fluid/memory/allocation/aligned_allocator.cc
@ -26,6 +26,11 @@ std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared(
    size_t size, Allocator::Attr attr) {
  return std::shared_ptr<Allocation>(Allocate(size, attr).release());
 }
 bool ThinAlignedAllocator::IsAllocThreadSafe() const {
  return underlying_allocator_->IsAllocThreadSafe();
 }
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
--- a/paddle/fluid/memory/allocation/aligned_allocator.h
+++ b/paddle/fluid/memory/allocation/aligned_allocator.h
@ -77,6 +77,8 @@ class ThinAlignedAllocator : public ManagedAllocator {
  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
  bool IsAllocThreadSafe() const;
 protected:
  std::shared_ptr<ManagedAllocator> underlying_allocator_;
 };
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@ -13,7 +13,9 @@
 // limitations under the License.
 #include "paddle/fluid/memory/allocation/allocator.h"
 #include <gflags/gflags.h>
 #include <map>
 #include <unordered_map>
 #include <vector>
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
 #include "paddle/fluid/memory/allocation/allocator_facade.h"
@ -24,6 +26,7 @@
 #include "paddle/fluid/memory/allocation/locked_allocator.h"
 #include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
 #include "paddle/fluid/memory/allocation/pinned_allocator.h"
 #include "paddle/fluid/memory/allocation/retry_allocator.h"
 #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/gpu_info.h"
@ -32,6 +35,11 @@
 #include "paddle/fluid/memory/allocation/cuda_allocator.h"
 #endif
 DEFINE_int32(
    gpu_allocator_retry_time, 0,
    "The retry time (milliseconds) when allocator fails "
    "to allocate memory. No retry if this value is not greater than 0");
 namespace paddle {
 namespace memory {
 namespace allocation {
@ -60,6 +68,7 @@ class CPUManagedAllocator : public ManagedAllocator {
      return normal_allocator_->AllocateShared(size, attr);
    }
  }
  bool IsAllocThreadSafe() const override { return true; }
 private:
@ -86,8 +95,12 @@ class CUDAManagedAllocator : public ManagedAllocator {
      size_t capacity = available / max_chunk_size_;
      if (capacity == 1) {
        VLOG(10) << "Create BestFitAllocator with chunk_size "
                 << max_chunk_size_;
        default_allocator_ = BestFitAllocatorCreator();
      } else {
        VLOG(10) << "Create AutoIncrementAllocator with chunk_size "
                 << max_chunk_size_ << " and capacity " << capacity;
        default_allocator_ = std::make_shared<AutoIncrementAllocator>(
            [this] { return std::move(BestFitAllocatorCreator()); }, capacity);
      }
@ -116,6 +129,7 @@ class CUDAManagedAllocator : public ManagedAllocator {
  std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
    return default_allocator_->Allocate(size, attr);
  }
  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
    return default_allocator_->AllocateShared(size, attr);
  }
@ -123,10 +137,20 @@ class CUDAManagedAllocator : public ManagedAllocator {
  std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
    chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
    auto* allocation = chunks_.back().get();
-    return std::make_shared<AlignedAllocator<64u>>(
+    std::unique_ptr<Allocator> unmanaged_allocator(new LockedAllocator(
-        NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
+        std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
-            new LockedAllocator(std::unique_ptr<Allocator>(
+
-                new BestFitAllocator(allocation))))));
+    if (FLAGS_gpu_allocator_retry_time <= 0) {
      VLOG(10) << "Create NaiveManagedAllocator without retry";
      return std::make_shared<AlignedAllocator<64u>>(
          NaiveManagedAllocator::Create(std::move(unmanaged_allocator)));
    } else {
      VLOG(10) << "Create RetryAllocator with retry_time "
               << FLAGS_gpu_allocator_retry_time << "ms";
      return std::make_shared<AlignedAllocator<64u>>(RetryAllocator::Create(
          std::move(unmanaged_allocator),
          static_cast<size_t>(FLAGS_gpu_allocator_retry_time)));
    }
  }
  bool IsAllocThreadSafe() const override { return true; }
@ -141,7 +165,8 @@ class CUDAManagedAllocator : public ManagedAllocator {
 class AllocatorFacadePrivate {
 public:
-  std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
+  std::unordered_map<platform::Place, std::shared_ptr<ManagedAllocator>>
      allocators_;
  ~AllocatorFacadePrivate() = default;
@ -184,13 +209,13 @@ AllocatorFacade& AllocatorFacade::Instance() {
 std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
    const platform::Place& place, size_t size, Allocator::Attr attr) {
-  return m_->allocators_[place]->AllocateShared(size, attr);
+  return m_->allocators_.at(place)->AllocateShared(size, attr);
 }
 std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
                                                   size_t size,
                                                   Allocator::Attr attr) {
-  return m_->allocators_[place]->Allocate(size, attr);
+  return m_->allocators_.at(place)->Allocate(size, attr);
 }
 }  // namespace allocation
--- a/paddle/fluid/memory/allocation/allocator_facade_test.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade_test.cc
@ -0,0 +1,54 @@
 // Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "paddle/fluid/memory/allocation/allocator_facade.h"
 #include <gflags/gflags.h>
 #include <gtest/gtest.h>
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DECLARE_int32(gpu_allocator_retry_time);
 namespace paddle {
 namespace memory {
 namespace allocation {
 TEST(allocator, allocator) {
  FLAGS_fraction_of_gpu_memory_to_use = 0.01;
  FLAGS_gpu_allocator_retry_time = 500;
  auto &instance = AllocatorFacade::Instance();
  {
    auto cpu_allocation = instance.Alloc(platform::CPUPlace(), 1024);
    ASSERT_NE(cpu_allocation, nullptr);
  }
  {
    auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0), 1024);
    ASSERT_NE(gpu_allocation, nullptr);
  }
  {
    // Allocate 2GB gpu memory
    auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0),
                                         2 * static_cast<size_t>(1 << 30));
    ASSERT_NE(gpu_allocation, nullptr);
  }
  {}
 }
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
--- a/paddle/fluid/platform/place.h
+++ b/paddle/fluid/platform/place.h
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
 #include <functional>
 #include <iostream>
 #include <vector>
@ -130,5 +131,65 @@ typename Visitor::result_type VisitPlace(const Place &place,
  return boost::apply_visitor(PlaceVisitorWrapper<Visitor>(visitor), place);
 }
 struct PlaceHashVisitor : public boost::static_visitor<size_t> {
  template <typename Place>
  inline size_t operator()(const Place &place) const {
    return place.hash();
  }
 };
 }  // namespace platform
 }  // namespace paddle
 namespace std {
 template <>
 struct hash<::paddle::platform::CPUPlace> {
  using argument_type = ::paddle::platform::CPUPlace;
  using result_type = size_t;
  constexpr inline result_type operator()(const argument_type &place) const {
    return static_cast<result_type>(-1);
  }
 };
 template <>
 struct hash<::paddle::platform::CUDAPlace> {
  using argument_type = ::paddle::platform::CUDAPlace;
  using result_type = size_t;
  inline result_type operator()(const argument_type &place) const {
    return static_cast<result_type>(place.device);
  }
 };
 template <>
 struct hash<::paddle::platform::CUDAPinnedPlace> {
  using argument_type = ::paddle::platform::CUDAPinnedPlace;
  using result_type = size_t;
  constexpr inline result_type operator()(const argument_type &place) const {
    return static_cast<result_type>(-2);
  }
 };
 namespace {  // NOLINT
 struct PlaceHashVisitor : public boost::static_visitor<size_t> {
  template <typename Place>
  inline size_t operator()(const Place &place) const {
    return std::hash<Place>()(place);
  }
 };
 }
 template <>
 struct hash<::paddle::platform::Place> {
  using argument_type = ::paddle::platform::Place;
  using result_type = size_t;
  inline result_type operator()(const argument_type &place) const {
    return boost::apply_visitor(PlaceHashVisitor(), place);
  }
 };
 }  // namespace std