use std::vector instead of std::list to promote performance for parallel module

pull/45/head
c00425699 5 years ago
parent f1b722297e
commit 3bb48ffee1

@ -21,7 +21,6 @@
#include <utility>
#include <numeric>
#include <functional>
#include <list>
#include <memory>
#include "parallel/device_manager.h"

@ -20,7 +20,6 @@
#include <cstdint>
#include <string>
#include <vector>
#include <list>
#include <memory>
#include "parallel/status.h"

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_DEVICE_H_
#include <cstdint>
#include <list>
#include <string>
#include <utility>

@ -30,7 +30,7 @@ namespace mindspore {
namespace parallel {
DeviceManagerPtr g_device_manager = nullptr;
Stage::Stage(const std::list<mindspore::parallel::Device>& devices, int num, int rank)
Stage::Stage(const std::vector<mindspore::parallel::Device>& devices, int num, int rank)
: devices_(devices), number_(num), rank_(rank) {
gm_ = GroupManager();
}
@ -104,7 +104,7 @@ int32_t GetListMemberByIndex(size_t index, const RankList& devices) {
return result;
}
std::shared_ptr<Device> GetListMemberByIndex(size_t index, const std::list<std::shared_ptr<Device>>& device_list) {
std::shared_ptr<Device> GetListMemberByIndex(size_t index, const std::vector<std::shared_ptr<Device>>& device_list) {
size_t i = 0;
std::shared_ptr<Device> result;
if ((device_list.empty()) || (index >= device_list.size())) {
@ -178,7 +178,7 @@ Status DeviceManager::Init(const RankList& devices, int32_t global_device_rank,
MS_LOG(ERROR) << "The number of 'devices' in a stage must be positive";
return Status::FAILED;
}
std::list<Device> curr_dev_list;
std::vector<Device> curr_dev_list;
for (int i = 0; i < num_device; ++i) {
curr_dev_list.push_back(*GetListMemberByIndex(global_index, devices_));
global_index++;
@ -278,8 +278,8 @@ RankList DeviceManager::global_device_list(int32_t stage_id, int32_t rank, int32
Device DeviceManager::CreateNewDeviceByRank(int32_t rank) const { return Device(rank); }
std::list<Device> DeviceManager::CreateDeviceListByRankList(RankList ranks) {
std::list<Device> dev_list;
std::vector<Device> DeviceManager::CreateDeviceListByRankList(RankList ranks) {
std::vector<Device> dev_list;
for (auto& rank : ranks) {
Device one = CreateNewDeviceByRank(rank);
dev_list.push_back(one);
@ -312,8 +312,8 @@ std::string HashName(const std::string& origin_name) { return std::to_string(std
// is '0-1-3-5-7'.
std::string DeviceManager::GenerateGroupNameByRanks(RankList ranks) {
std::string rank_list_name;
std::list<int32_t>::iterator it;
ranks.sort(); // sorted in increasing order
std::vector<int32_t>::iterator it;
std::sort(ranks.begin(), ranks.end()); // sorted in increasing order
for (it = ranks.begin(); it != ranks.end(); ++it) {
if (it == ranks.begin()) {
rank_list_name = std::to_string(*it);
@ -343,7 +343,8 @@ std::string DeviceManager::GenerateGroupNameByRanks(RankList ranks) {
// Create the group with the given devices and the given name. The GroupManager
// gm_ will create a new group only if there does not exit a group with the same
// name. Otherwise, let the pointer g point to that group.
Group DeviceManager::CreateGroup(const std::string& group_name, const std::list<mindspore::parallel::Device>& devices) {
Group DeviceManager::CreateGroup(const std::string& group_name,
const std::vector<mindspore::parallel::Device>& devices) {
if ((world_group() == NCCL_WORLD_GROUP) && (devices.size() != devices_.size())) {
MS_LOG(EXCEPTION) << "Do not support sub group for nccl";
}
@ -360,7 +361,7 @@ Group DeviceManager::CreateGroup(const RankList& dev_ranks) {
}
std::string group_name = GenerateGroupNameByRanks(dev_ranks);
std::list<Device> dev_list = CreateDeviceListByRankList(dev_ranks);
auto dev_list = CreateDeviceListByRankList(dev_ranks);
return CreateGroup(group_name, dev_list);
}

@ -19,7 +19,7 @@
#include <cstdint>
#include <cstring>
#include <list>
#include <vector>
#include <map>
#include <memory>
#include <string>
@ -50,19 +50,19 @@ class Stage {
// This class is used in pipeline-parallelization. Available devices are partitioned into multiple stages.
// Currently, the function of pipeline-parallelization and this class are NOT implemented.
public:
explicit Stage(std::list<Device> devices) : devices_(std::move(devices)), number_(0), rank_(0) {
explicit Stage(std::vector<Device> devices) : devices_(std::move(devices)), number_(0), rank_(0) {
gm_ = GroupManager();
}
Stage(const std::list<mindspore::parallel::Device>& devices, int num, int rank);
Stage(const std::vector<mindspore::parallel::Device>& devices, int num, int rank);
~Stage() = default;
int GetStageNum() const { return number_; }
size_t GetDevicesNum() const { return devices_.size(); }
std::list<Device> GetDevicesList() { return devices_; }
std::vector<Device> GetDevicesList() { return devices_; }
int global_rank(Group* g) const;
private:
std::list<Device> devices_;
std::vector<Device> devices_;
int number_;
int32_t rank_;
GroupManager gm_;
@ -89,10 +89,10 @@ class DeviceManager {
RankList global_device_list(int32_t stage_id, int32_t rank, int32_t split_num) const;
Device CreateNewDeviceByRank(int32_t rank) const;
std::list<Device> CreateDeviceListByRankList(RankList ranks);
std::vector<Device> CreateDeviceListByRankList(RankList ranks);
std::string GenerateGroupNameByRanks(RankList dev_ranks);
Group CreateGroup(const std::string& group_name, const std::list<Device>& devices);
Group CreateGroup(const std::string& group_name, const std::vector<Device>& devices);
Group CreateGroup(const RankList& dev_ranks);
std::shared_ptr<Stage> GetStageById(int32_t stage_id);
@ -108,11 +108,11 @@ class DeviceManager {
std::string FindRankListNameByHashName(const std::string& hash_name);
private:
std::list<std::shared_ptr<Device>> devices_;
std::vector<std::shared_ptr<Device>> devices_;
// each stage has a list of devices
std::list<std::list<int32_t>> stage_devices_;
std::vector<std::vector<int32_t>> stage_devices_;
std::shared_ptr<Device> device_;
std::list<std::shared_ptr<Stage>> stages_;
std::vector<std::shared_ptr<Stage>> stages_;
GroupManager gm_;
std::string backend_;

@ -21,7 +21,7 @@
#include <utility>
#include <numeric>
#include <functional>
#include <list>
#include <vector>
#include "parallel/status.h"
#include "parallel/ops_info/operator_info.h"
@ -64,7 +64,7 @@ Status DeviceMatrix::GetDevicesAlongDim(const uint32_t& dim, RankList* devices)
}
RankList group;
std::list<RankList> local_group_list;
std::vector<RankList> local_group_list;
// lower than dim
int32_t step = 1;
@ -160,7 +160,7 @@ std::string ShapeToString(const Shape& shape) {
return str + "]";
}
std::string ListToString(const std::list<int32_t>& list) {
std::string ListToString(const std::vector<int32_t>& list) {
std::string str = "[";
for (auto& element : list) {
str += std::to_string(element) + ", ";

@ -20,7 +20,6 @@
#include <cstdint>
#include <string>
#include <vector>
#include <list>
#include "parallel/status.h"
#include "utils/convert_utils.h"
@ -28,7 +27,7 @@
namespace mindspore {
namespace parallel {
using RankList = std::list<int32_t>;
using RankList = std::vector<int32_t>;
using Shape = std::vector<int32_t>;
class DeviceMatrix {
@ -36,7 +35,7 @@ class DeviceMatrix {
DeviceMatrix(int32_t rank, RankList devices, Shape dev_shape);
DeviceMatrix() = default;
~DeviceMatrix() = default;
std::list<RankList> group_list() const { return group_list_; }
std::vector<RankList> group_list() const { return group_list_; }
Status CreateGroupList();
Status GetDevicesByTensorMap(const Shape& tensor_map, RankList* rank_list);
Status GetDevicesAlongDim(const uint32_t& dim, RankList* devices);
@ -46,11 +45,11 @@ class DeviceMatrix {
RankList dev_list_;
// From low dim to high dim. eg: [D0 D1 D2 D3]
Shape dev_shape_;
std::list<RankList> group_list_;
std::vector<RankList> group_list_;
};
std::string ShapeToString(const Shape& shape);
std::string ListToString(const std::list<int32_t>& list);
std::string ListToString(const std::vector<int32_t>& list);
} // namespace parallel
} // namespace mindspore

@ -17,7 +17,6 @@
#include "parallel/graph_util/generate_graph.h"
#include <algorithm>
#include <list>
#include <memory>
#include <string>
#include <utility>

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_GRAPH_UTIL_GENERATE_GRAPH_H_
#include <vector>
#include <list>
#include <memory>
#include <unordered_map>
#include <map>

@ -30,13 +30,13 @@ Group::Group() {
devices_.clear();
}
Status Group::Init(const std::string &name, const std::list<Device> &devices) {
Status Group::Init(const std::string &name, const std::vector<Device> &devices) {
this->name_ = name;
this->devices_ = devices;
return Status::SUCCESS;
}
std::list<Device> Group::GetDevicesList() const { return devices_; }
std::vector<Device> Group::GetDevicesList() const { return devices_; }
bool Group::IsInThisGroup(int32_t device_rank) {
for (auto &device : devices_) {
@ -66,7 +66,7 @@ Status Group::GetIndex(size_t *index) {
GroupManager::GroupManager() { groups_.clear(); }
Status GroupManager::CreateGroup(const std::string &group_name, const std::list<Device> &devices,
Status GroupManager::CreateGroup(const std::string &group_name, const std::vector<Device> &devices,
mindspore::parallel::Group *const group) {
// it is simple to use size to determine whether it is a world group
uint32_t world_size = 0;

@ -18,7 +18,7 @@
#define MINDSPORE_CCSRC_PARALLEL_GROUP_MANAGER_H_
#include <cstdint>
#include <list>
#include <vector>
#include <map>
#include <string>
@ -37,8 +37,8 @@ class Group {
public:
Group();
~Group() = default;
Status Init(const std::string& name, const std::list<Device>& devices);
std::list<Device> GetDevicesList() const;
Status Init(const std::string& name, const std::vector<Device>& devices);
std::vector<Device> GetDevicesList() const;
std::string name() const { return name_; }
bool IsInThisGroup(int32_t device_rank);
Status GetIndex(size_t* index);
@ -46,7 +46,7 @@ class Group {
private:
std::string name_;
std::list<Device> devices_;
std::vector<Device> devices_;
};
class GroupManager {
@ -54,7 +54,7 @@ class GroupManager {
GroupManager();
~GroupManager() = default;
Status CreateGroup(const std::string& name, const std::list<Device>& devices, Group* group);
Status CreateGroup(const std::string& name, const std::vector<Device>& devices, Group* group);
Status DestroyGroup(Group* group);
Status DestroyAllGroups();
Status GetRankID(const std::string& name, unsigned int* rank_id);

@ -19,7 +19,6 @@
#include <ir/value.h>
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_ARITHMETIC_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -17,7 +17,6 @@
#ifndef MINDSPORE_CCSRC_PARALLEL_OPS_INFO_BATCH_PARALLEL_INFO_H_
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_BATCH_PARALLEL_INFO_H_
#include <list>
#include <string>
#include <unordered_map>
#include <vector>

@ -18,7 +18,7 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_BIAS_ADD_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_COMPARISON_FUNCTION_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include "ir/value.h"

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_DROPOUT_DO_MASK_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_ELEMENTARY_FUNCTION_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include "ir/value.h"

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_GENERATOR_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_GETNEXT_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_L2_NORMALIZE_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>

@ -18,10 +18,10 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_LOSS_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>
#include "ir/value.h"
#include "parallel/ops_info/operator_info.h"
#include "parallel/ops_info/activation_info.h"

@ -397,7 +397,7 @@ Status MatMulBase::GenerateStrategies(int32_t stage_id) {
return FAILED;
}
CheckGlobalDeviceManager();
std::list<int32_t> dev_list = g_device_manager->GetDeviceListByStageId(stage_id);
std::vector<int32_t> dev_list = g_device_manager->GetDeviceListByStageId(stage_id);
size_t dev_num = dev_list.size();
Shape input0_shape = inputs_shape_[0], input1_shape = inputs_shape_[1];
if (transpose_a_) {

@ -18,10 +18,10 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_MATMUL_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>
#include "ir/value.h"
#include "parallel/ops_info/operator_info.h"
#include "parallel/strategy.h"

@ -18,10 +18,10 @@
#define MINDSPORE_CCSRC_PARALLEL_OPS_INFO_ONEHOT_INFO_H_
#include <string>
#include <list>
#include <unordered_map>
#include <vector>
#include <memory>
#include "ir/value.h"
#include "parallel/ops_info/operator_info.h"
#include "parallel/auto_parallel/operator_costmodel.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save