use std::vector instead of std::list to promote performance for parallel module

5 years ago · 3bb48ffee1
parent f1b722297e
commit 3bb48ffee1
73 changed files with 141 additions and 160 deletions
--- a/mindspore/ccsrc/parallel/context.cc
+++ b/mindspore/ccsrc/parallel/context.cc
@ -21,7 +21,6 @@
 #include <utility>
 #include <numeric>
 #include <functional>
-#include <list>
 #include <memory>

 #include "parallel/device_manager.h"
--- a/mindspore/ccsrc/parallel/context.h
+++ b/mindspore/ccsrc/parallel/context.h
@ -20,7 +20,6 @@
 #include <cstdint>
 #include <string>
 #include <vector>
-#include <list>
 #include <memory>

 #include "parallel/status.h"
--- a/mindspore/ccsrc/parallel/device.h
+++ b/mindspore/ccsrc/parallel/device.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_DEVICE_H_

 #include <cstdint>
-#include <list>
 #include <string>
 #include <utility>

--- a/mindspore/ccsrc/parallel/device_manager.cc
+++ b/mindspore/ccsrc/parallel/device_manager.cc
@ -30,7 +30,7 @@ namespace mindspore {
 namespace parallel {
 DeviceManagerPtr g_device_manager = nullptr;

-Stage::Stage(const std::list<mindspore::parallel::Device>& devices, int num, int rank)
+Stage::Stage(const std::vector<mindspore::parallel::Device>& devices, int num, int rank)
    : devices_(devices), number_(num), rank_(rank) {
  gm_ = GroupManager();
 }
@ -104,7 +104,7 @@ int32_t GetListMemberByIndex(size_t index, const RankList& devices) {
  return result;
 }

-std::shared_ptr<Device> GetListMemberByIndex(size_t index, const std::list<std::shared_ptr<Device>>& device_list) {
+std::shared_ptr<Device> GetListMemberByIndex(size_t index, const std::vector<std::shared_ptr<Device>>& device_list) {
  size_t i = 0;
  std::shared_ptr<Device> result;
  if ((device_list.empty()) || (index >= device_list.size())) {
@ -178,7 +178,7 @@ Status DeviceManager::Init(const RankList& devices, int32_t global_device_rank,
      MS_LOG(ERROR) << "The number of 'devices' in a stage must be positive";
      return Status::FAILED;
    }
-    std::list<Device> curr_dev_list;
+    std::vector<Device> curr_dev_list;
    for (int i = 0; i < num_device; ++i) {
      curr_dev_list.push_back(*GetListMemberByIndex(global_index, devices_));
      global_index++;
@ -278,8 +278,8 @@ RankList DeviceManager::global_device_list(int32_t stage_id, int32_t rank, int32

 Device DeviceManager::CreateNewDeviceByRank(int32_t rank) const { return Device(rank); }

-std::list<Device> DeviceManager::CreateDeviceListByRankList(RankList ranks) {
-  std::list<Device> dev_list;
+std::vector<Device> DeviceManager::CreateDeviceListByRankList(RankList ranks) {
+  std::vector<Device> dev_list;
  for (auto& rank : ranks) {
    Device one = CreateNewDeviceByRank(rank);
    dev_list.push_back(one);
@ -312,8 +312,8 @@ std::string HashName(const std::string& origin_name) { return std::to_string(std
 // is '0-1-3-5-7'.
 std::string DeviceManager::GenerateGroupNameByRanks(RankList ranks) {
  std::string rank_list_name;
-  std::list<int32_t>::iterator it;
-  ranks.sort();  // sorted in increasing order
+  std::vector<int32_t>::iterator it;
+  std::sort(ranks.begin(), ranks.end());  // sorted in increasing order
  for (it = ranks.begin(); it != ranks.end(); ++it) {
    if (it == ranks.begin()) {
      rank_list_name = std::to_string(*it);
@ -343,7 +343,8 @@ std::string DeviceManager::GenerateGroupNameByRanks(RankList ranks) {
 // Create the group with the given devices and the given name. The GroupManager
 // gm_ will create a new group only if there does not exit a group with the same
 // name. Otherwise, let the pointer g point to that group.
-Group DeviceManager::CreateGroup(const std::string& group_name, const std::list<mindspore::parallel::Device>& devices) {
+Group DeviceManager::CreateGroup(const std::string& group_name,
+                                 const std::vector<mindspore::parallel::Device>& devices) {
  if ((world_group() == NCCL_WORLD_GROUP) && (devices.size() != devices_.size())) {
    MS_LOG(EXCEPTION) << "Do not support sub group for nccl";
  }
@ -360,7 +361,7 @@ Group DeviceManager::CreateGroup(const RankList& dev_ranks) {
  }

  std::string group_name = GenerateGroupNameByRanks(dev_ranks);
-  std::list<Device> dev_list = CreateDeviceListByRankList(dev_ranks);
+  auto dev_list = CreateDeviceListByRankList(dev_ranks);
  return CreateGroup(group_name, dev_list);
 }

--- a/mindspore/ccsrc/parallel/device_manager.h
+++ b/mindspore/ccsrc/parallel/device_manager.h
@ -19,7 +19,7 @@

 #include <cstdint>
 #include <cstring>
-#include <list>
+#include <vector>
 #include <map>
 #include <memory>
 #include <string>
@ -50,19 +50,19 @@ class Stage {
  // This class is used in pipeline-parallelization. Available devices are partitioned into multiple stages.
  // Currently, the function of pipeline-parallelization and this class are NOT implemented.
 public:
-  explicit Stage(std::list<Device> devices) : devices_(std::move(devices)), number_(0), rank_(0) {
+  explicit Stage(std::vector<Device> devices) : devices_(std::move(devices)), number_(0), rank_(0) {
    gm_ = GroupManager();
  }
-  Stage(const std::list<mindspore::parallel::Device>& devices, int num, int rank);
+  Stage(const std::vector<mindspore::parallel::Device>& devices, int num, int rank);
  ~Stage() = default;

  int GetStageNum() const { return number_; }
  size_t GetDevicesNum() const { return devices_.size(); }
-  std::list<Device> GetDevicesList() { return devices_; }
+  std::vector<Device> GetDevicesList() { return devices_; }
  int global_rank(Group* g) const;

 private:
-  std::list<Device> devices_;
+  std::vector<Device> devices_;
  int number_;
  int32_t rank_;
  GroupManager gm_;
@ -89,10 +89,10 @@ class DeviceManager {
  RankList global_device_list(int32_t stage_id, int32_t rank, int32_t split_num) const;

  Device CreateNewDeviceByRank(int32_t rank) const;
-  std::list<Device> CreateDeviceListByRankList(RankList ranks);
+  std::vector<Device> CreateDeviceListByRankList(RankList ranks);

  std::string GenerateGroupNameByRanks(RankList dev_ranks);
-  Group CreateGroup(const std::string& group_name, const std::list<Device>& devices);
+  Group CreateGroup(const std::string& group_name, const std::vector<Device>& devices);
  Group CreateGroup(const RankList& dev_ranks);
  std::shared_ptr<Stage> GetStageById(int32_t stage_id);

@ -108,11 +108,11 @@ class DeviceManager {
  std::string FindRankListNameByHashName(const std::string& hash_name);

 private:
-  std::list<std::shared_ptr<Device>> devices_;
+  std::vector<std::shared_ptr<Device>> devices_;
  // each stage has a list of devices
-  std::list<std::list<int32_t>> stage_devices_;
+  std::vector<std::vector<int32_t>> stage_devices_;
  std::shared_ptr<Device> device_;
-  std::list<std::shared_ptr<Stage>> stages_;
+  std::vector<std::shared_ptr<Stage>> stages_;
  GroupManager gm_;
  std::string backend_;

--- a/mindspore/ccsrc/parallel/device_matrix.cc
+++ b/mindspore/ccsrc/parallel/device_matrix.cc
@ -21,7 +21,7 @@
 #include <utility>
 #include <numeric>
 #include <functional>
-#include <list>
+#include <vector>

 #include "parallel/status.h"
 #include "parallel/ops_info/operator_info.h"
@ -64,7 +64,7 @@ Status DeviceMatrix::GetDevicesAlongDim(const uint32_t& dim, RankList* devices)
  }

  RankList group;
-  std::list<RankList> local_group_list;
+  std::vector<RankList> local_group_list;

  // lower than dim
  int32_t step = 1;
@ -160,7 +160,7 @@ std::string ShapeToString(const Shape& shape) {
  return str + "]";
 }

-std::string ListToString(const std::list<int32_t>& list) {
+std::string ListToString(const std::vector<int32_t>& list) {
  std::string str = "[";
  for (auto& element : list) {
    str += std::to_string(element) + ", ";
--- a/mindspore/ccsrc/parallel/device_matrix.h
+++ b/mindspore/ccsrc/parallel/device_matrix.h
@ -20,7 +20,6 @@
 #include <cstdint>
 #include <string>
 #include <vector>
-#include <list>

 #include "parallel/status.h"
 #include "utils/convert_utils.h"
@ -28,7 +27,7 @@
 namespace mindspore {
 namespace parallel {

-using RankList = std::list<int32_t>;
+using RankList = std::vector<int32_t>;
 using Shape = std::vector<int32_t>;

 class DeviceMatrix {
@ -36,7 +35,7 @@ class DeviceMatrix {
  DeviceMatrix(int32_t rank, RankList devices, Shape dev_shape);
  DeviceMatrix() = default;
  ~DeviceMatrix() = default;
-  std::list<RankList> group_list() const { return group_list_; }
+  std::vector<RankList> group_list() const { return group_list_; }
  Status CreateGroupList();
  Status GetDevicesByTensorMap(const Shape& tensor_map, RankList* rank_list);
  Status GetDevicesAlongDim(const uint32_t& dim, RankList* devices);
@ -46,11 +45,11 @@ class DeviceMatrix {
  RankList dev_list_;
  // From low dim to high dim. eg: [D0 D1 D2 D3]
  Shape dev_shape_;
-  std::list<RankList> group_list_;
+  std::vector<RankList> group_list_;
 };

 std::string ShapeToString(const Shape& shape);
-std::string ListToString(const std::list<int32_t>& list);
+std::string ListToString(const std::vector<int32_t>& list);
 }  // namespace parallel
 }  // namespace mindspore

--- a/mindspore/ccsrc/parallel/graph_util/generate_graph.cc
+++ b/mindspore/ccsrc/parallel/graph_util/generate_graph.cc
@ -17,7 +17,6 @@
 #include "parallel/graph_util/generate_graph.h"

 #include <algorithm>
-#include <list>
 #include <memory>
 #include <string>
 #include <utility>
--- a/mindspore/ccsrc/parallel/graph_util/generate_graph.h
+++ b/mindspore/ccsrc/parallel/graph_util/generate_graph.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_GRAPH_UTIL_GENERATE_GRAPH_H_

 #include <vector>
-#include <list>
 #include <memory>
 #include <unordered_map>
 #include <map>
--- a/mindspore/ccsrc/parallel/group_manager.cc
+++ b/mindspore/ccsrc/parallel/group_manager.cc
@ -30,13 +30,13 @@ Group::Group() {
  devices_.clear();
 }

-Status Group::Init(const std::string &name, const std::list<Device> &devices) {
+Status Group::Init(const std::string &name, const std::vector<Device> &devices) {
  this->name_ = name;
  this->devices_ = devices;
  return Status::SUCCESS;
 }

-std::list<Device> Group::GetDevicesList() const { return devices_; }
+std::vector<Device> Group::GetDevicesList() const { return devices_; }

 bool Group::IsInThisGroup(int32_t device_rank) {
  for (auto &device : devices_) {
@ -66,7 +66,7 @@ Status Group::GetIndex(size_t *index) {

 GroupManager::GroupManager() { groups_.clear(); }

-Status GroupManager::CreateGroup(const std::string &group_name, const std::list<Device> &devices,
+Status GroupManager::CreateGroup(const std::string &group_name, const std::vector<Device> &devices,
                                 mindspore::parallel::Group *const group) {
  // it is simple to use size to determine whether it is a world group
  uint32_t world_size = 0;
--- a/mindspore/ccsrc/parallel/group_manager.h
+++ b/mindspore/ccsrc/parallel/group_manager.h
@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PARALLEL_GROUP_MANAGER_H_

 #include <cstdint>
-#include <list>
+#include <vector>
 #include <map>
 #include <string>

@ -37,8 +37,8 @@ class Group {
 public:
  Group();
  ~Group() = default;
-  Status Init(const std::string& name, const std::list<Device>& devices);
-  std::list<Device> GetDevicesList() const;
+  Status Init(const std::string& name, const std::vector<Device>& devices);
+  std::vector<Device> GetDevicesList() const;
  std::string name() const { return name_; }
  bool IsInThisGroup(int32_t device_rank);
  Status GetIndex(size_t* index);
@ -46,7 +46,7 @@ class Group {

 private:
  std::string name_;
-  std::list<Device> devices_;
+  std::vector<Device> devices_;
 };

 class GroupManager {
@ -54,7 +54,7 @@ class GroupManager {
  GroupManager();
  ~GroupManager() = default;

-  Status CreateGroup(const std::string& name, const std::list<Device>& devices, Group* group);
+  Status CreateGroup(const std::string& name, const std::vector<Device>& devices, Group* group);
  Status DestroyGroup(Group* group);
  Status DestroyAllGroups();
  Status GetRankID(const std::string& name, unsigned int* rank_id);
--- a/mindspore/ccsrc/parallel/ops_info/activation_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/activation_info.h
@ -19,7 +19,6 @@

 #include <ir/value.h>
 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/arithmetic_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_ARITHMETIC_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/batch_parallel_info.h
@ -17,7 +17,6 @@
 #ifndef MINDSPORE_CCSRC_PARALLEL_OPS_INFO_BATCH_PARALLEL_INFO_H_
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_BATCH_PARALLEL_INFO_H_

-#include <list>
 #include <string>
 #include <unordered_map>
 #include <vector>
--- a/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/bias_add_info.h
@ -18,7 +18,7 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_BIAS_ADD_INFO_H_

 #include <string>
-#include <list>
+
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/comparison_function_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/comparison_function_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_COMPARISON_FUNCTION_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
--- a/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/dropout_do_mask_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_DROPOUT_DO_MASK_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/elementary_function_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/elementary_function_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_ELEMENTARY_FUNCTION_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include "ir/value.h"
--- a/mindspore/ccsrc/parallel/ops_info/generator_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/generator_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_GENERATOR_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/get_next_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/get_next_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_GETNEXT_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/l2_normalize_info.h
@ -18,7 +18,6 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_L2_NORMALIZE_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
--- a/mindspore/ccsrc/parallel/ops_info/loss_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/loss_info.h
@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_LOSS_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
+
 #include "ir/value.h"
 #include "parallel/ops_info/operator_info.h"
 #include "parallel/ops_info/activation_info.h"
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
+++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.cc
@ -397,7 +397,7 @@ Status MatMulBase::GenerateStrategies(int32_t stage_id) {
    return FAILED;
  }
  CheckGlobalDeviceManager();
-  std::list<int32_t> dev_list = g_device_manager->GetDeviceListByStageId(stage_id);
+  std::vector<int32_t> dev_list = g_device_manager->GetDeviceListByStageId(stage_id);
  size_t dev_num = dev_list.size();
  Shape input0_shape = inputs_shape_[0], input1_shape = inputs_shape_[1];
  if (transpose_a_) {
--- a/mindspore/ccsrc/parallel/ops_info/matmul_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/matmul_info.h
@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_MATMUL_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
+
 #include "ir/value.h"
 #include "parallel/ops_info/operator_info.h"
 #include "parallel/strategy.h"
--- a/mindspore/ccsrc/parallel/ops_info/onehot_info.h
+++ b/mindspore/ccsrc/parallel/ops_info/onehot_info.h
@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_ONEHOT_INFO_H_

 #include <string>
-#include <list>
 #include <unordered_map>
 #include <vector>
 #include <memory>
+
 #include "ir/value.h"
 #include "parallel/ops_info/operator_info.h"
 #include "parallel/auto_parallel/operator_costmodel.h"
--- a/Show More
+++ b/Show More