commit
cacebd1211
@ -0,0 +1,220 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "minddata/dataset/engine/cache/cache_hw.h"
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
#include <numa.h>
|
||||||
|
#endif
|
||||||
|
#include <sched.h>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cctype>
|
||||||
|
#include <fstream>
|
||||||
|
#include <regex>
|
||||||
|
#include <thread>
|
||||||
|
#include "utils/log_adapter.h"
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
CacheServerHW::CacheServerHW() {
|
||||||
|
num_cpus_ = std::thread::hardware_concurrency();
|
||||||
|
MS_LOG(DEBUG) << "Number of cpu(s) : " << num_cpus_;
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
if (numa_enabled()) {
|
||||||
|
MS_LOG(WARNING) << "Numa support enabled";
|
||||||
|
for (auto i = 0; i <= numa_max_node(); ++i) {
|
||||||
|
int64_t free_avail;
|
||||||
|
int64_t mem_avail = numa_node_size(i, &free_avail);
|
||||||
|
MS_LOG(INFO) << "Total physical/free RAM in bytes at node " << i << " : " << mem_avail << "/" << free_avail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t CacheServerHW::GetTotalSystemMemory() {
|
||||||
|
auto pages = sysconf(_SC_PHYS_PAGES);
|
||||||
|
auto page_size = sysconf(_SC_PAGE_SIZE);
|
||||||
|
auto total = static_cast<int64_t>(pages) * static_cast<int64_t>(page_size);
|
||||||
|
MS_LOG(INFO) << "Total physical RAM in bytes: " << total;
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
Status CacheServerHW::SetDefaultMemoryPolicy(CachePoolPolicy policy) {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
if (numa_enabled()) {
|
||||||
|
// Set our default memory policy.
|
||||||
|
switch (policy) {
|
||||||
|
case kLocal:
|
||||||
|
numa_set_localalloc();
|
||||||
|
MS_LOG(DEBUG) << "Setting memory default policy to local node. Low level code may override the setting";
|
||||||
|
break;
|
||||||
|
case kInterleave:
|
||||||
|
numa_set_interleave_mask(numa_all_nodes_ptr);
|
||||||
|
MS_LOG(DEBUG) << "Numa affinity is turned off. Use interleave memory policy as default.";
|
||||||
|
break;
|
||||||
|
case kOnNode:
|
||||||
|
case kPreferred:
|
||||||
|
RETURN_STATUS_UNEXPECTED("Unsupported memory policy");
|
||||||
|
break;
|
||||||
|
case kNone:
|
||||||
|
default:
|
||||||
|
// No action taken.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status CacheServerHW::GetNumaNodeInfo() {
|
||||||
|
std::set<Path> numa_nodes_;
|
||||||
|
Path node(kSysNodePath);
|
||||||
|
auto it = Path::DirIterator::OpenDirectory(&node);
|
||||||
|
if (it == nullptr) {
|
||||||
|
MS_LOG(WARNING) << "Unable to open directory " << kSysNodePath << ". Skip scanning hardware info";
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
auto isdigit_string = [](const char *str) -> bool {
|
||||||
|
bool r = true;
|
||||||
|
for (auto i = 0; i < strlen(str); ++i) {
|
||||||
|
if (!std::isdigit(str[i])) {
|
||||||
|
r = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
};
|
||||||
|
// Look for name starts with 'node' and followed by digits.
|
||||||
|
const char kNodeName[] = "node";
|
||||||
|
while (it->hasNext()) {
|
||||||
|
auto p = it->next();
|
||||||
|
const std::string entry = p.Basename();
|
||||||
|
const char *name = entry.data();
|
||||||
|
if (strncmp(name, kNodeName, 4) == 0 && isdigit_string(name + strlen(kNodeName))) {
|
||||||
|
numa_nodes_.insert(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// There should be at least one. But if not found in any case, just move on the
|
||||||
|
// rest of the server start up.
|
||||||
|
if (numa_nodes_.empty()) {
|
||||||
|
MS_LOG(WARNING) << "No numa nodes ? Skip scanning hardware info";
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
// For each numa node, get a list of CPU that is associated with it.
|
||||||
|
const char kCpuList[] = "cpulist";
|
||||||
|
auto r = std::regex("[0-9]*-[0-9]*");
|
||||||
|
for (Path p : numa_nodes_) {
|
||||||
|
auto node_dir = p.Basename().data();
|
||||||
|
numa_id_t numa_node = strtol(node_dir + strlen(kNodeName), nullptr, 10);
|
||||||
|
Path f = p / kCpuList;
|
||||||
|
std::ifstream fs(f.toString());
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + f.toString());
|
||||||
|
std::string cpu_string;
|
||||||
|
cpu_set_t cpuset;
|
||||||
|
CPU_ZERO(&cpuset);
|
||||||
|
int32_t cpu_cnt = 0;
|
||||||
|
while (getline(fs, cpu_string)) {
|
||||||
|
// Now we parse the content of cpu_string.
|
||||||
|
std::sregex_iterator iter(cpu_string.begin(), cpu_string.end(), r);
|
||||||
|
std::sregex_iterator end;
|
||||||
|
while (iter != end) {
|
||||||
|
auto match = iter->str();
|
||||||
|
auto pos = match.find_first_of('-');
|
||||||
|
std::string min = match.substr(0, pos);
|
||||||
|
std::string max = match.substr(pos + 1);
|
||||||
|
cpu_id_t cpu_min = strtol(min.data(), nullptr, 10);
|
||||||
|
cpu_id_t cpu_max = strtol(max.data(), nullptr, 10);
|
||||||
|
MS_LOG(DEBUG) << "Numa node " << numa_node << " CPU(s) : " << cpu_min << "-" << cpu_max;
|
||||||
|
for (int i = cpu_min; i <= cpu_max; ++i) {
|
||||||
|
CPU_SET(i, &cpuset);
|
||||||
|
++cpu_cnt;
|
||||||
|
}
|
||||||
|
++iter;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(!fs.bad(), "Fail to read file: " + f.toString());
|
||||||
|
fs.close();
|
||||||
|
// Remember which cpu is attached to this numa node.
|
||||||
|
numa_cpuset_.emplace(numa_node, cpuset);
|
||||||
|
numa_cpu_cnt_.emplace(numa_node, cpu_cnt);
|
||||||
|
}
|
||||||
|
MS_LOG(DEBUG) << "Number of numa nodes : " << numa_cpuset_.size();
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status CacheServerHW::SetAffinity(const Task &tk, numa_id_t numa_node) {
|
||||||
|
auto r = numa_cpuset_.find(numa_node);
|
||||||
|
if (r != numa_cpuset_.end()) {
|
||||||
|
auto err = pthread_setaffinity_np(tk.GetNativeHandle(), sizeof(r->second), &r->second);
|
||||||
|
if (err) {
|
||||||
|
std::string errMsg = "Unable to set affiity. Errno = " + std::to_string(errno);
|
||||||
|
RETURN_STATUS_UNEXPECTED(errMsg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
RETURN_STATUS_UNEXPECTED("Numa node " + std::to_string(numa_node) + " not found");
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<cpu_id_t> CacheServerHW::GetCpuList(numa_id_t numa_id) {
|
||||||
|
std::vector<cpu_id_t> v;
|
||||||
|
auto it = numa_cpuset_.find(numa_id);
|
||||||
|
if (it != numa_cpuset_.end()) {
|
||||||
|
auto &cpu_set = it->second;
|
||||||
|
for (auto i = 0; i < num_cpus_; ++i) {
|
||||||
|
if (CPU_ISSET(i, &cpu_set)) {
|
||||||
|
v.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
numa_id_t CacheServerHW::GetMyNode() const {
|
||||||
|
numa_id_t node_id = 0;
|
||||||
|
auto cpu = sched_getcpu();
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
node_id = numa_node_of_cpu(cpu);
|
||||||
|
#else
|
||||||
|
bool found = false;
|
||||||
|
for (auto it : numa_cpuset_) {
|
||||||
|
cpu_set_t &cpu_set = it.second;
|
||||||
|
if (CPU_ISSET(cpu, &cpu_set)) {
|
||||||
|
node_id = it.first;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MS_LOG(DEBUG) << "cpu id " << cpu << " found : " << std::boolalpha << found;
|
||||||
|
#endif
|
||||||
|
return node_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CacheServerHW::InterleaveMemory(void *ptr, size_t sz) {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
if (numa_enabled()) {
|
||||||
|
numa_interleave_memory(ptr, sz, numa_all_nodes_ptr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CacheServerHW::numa_enabled() {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
return (numa_available() != -1);
|
||||||
|
#else
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
@ -0,0 +1,81 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CACHE_HW_H_
|
||||||
|
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CACHE_HW_H_
|
||||||
|
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
#include <numa.h>
|
||||||
|
#endif
|
||||||
|
#include <sched.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "minddata/dataset/engine/cache/cache_common.h"
|
||||||
|
#include "minddata/dataset/util/memory_pool.h"
|
||||||
|
#include "minddata/dataset/util/path.h"
|
||||||
|
#include "minddata/dataset/util/status.h"
|
||||||
|
#include "minddata/dataset/util/task.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
class CacheServerHW {
|
||||||
|
public:
|
||||||
|
CacheServerHW();
|
||||||
|
~CacheServerHW() = default;
|
||||||
|
|
||||||
|
/// \brief Get Numa node info without using numa library
|
||||||
|
/// \return Status object
|
||||||
|
Status GetNumaNodeInfo();
|
||||||
|
|
||||||
|
/// \brief Set thread affinity
|
||||||
|
Status SetAffinity(const Task &tk, numa_id_t numa_node);
|
||||||
|
|
||||||
|
/// \brief Get total number of cpu(s)
|
||||||
|
int32_t GetCpuCount() const { return num_cpus_; }
|
||||||
|
|
||||||
|
/// \brief Get total number of numa nodes
|
||||||
|
int32_t GetNumaNodeCount() const { return numa_cpuset_.empty() ? 1 : numa_cpuset_.size(); }
|
||||||
|
|
||||||
|
/// \brief Get a list of cpu for a given numa node.
|
||||||
|
std::vector<cpu_id_t> GetCpuList(numa_id_t numa_id);
|
||||||
|
|
||||||
|
static bool numa_enabled();
|
||||||
|
|
||||||
|
/// \brief Return the numa the current thread is running on.
|
||||||
|
numa_id_t GetMyNode() const;
|
||||||
|
|
||||||
|
/// \brief Interleave a given memory block. Used by shared memory only.
|
||||||
|
static void InterleaveMemory(void *ptr, size_t sz);
|
||||||
|
|
||||||
|
/// \brief Set default memory policy.
|
||||||
|
static Status SetDefaultMemoryPolicy(CachePoolPolicy);
|
||||||
|
|
||||||
|
/// \brief This returns the size (in bytes) of the physical RAM on the machine.
|
||||||
|
/// \return the size (in bytes) of the physical RAM on the machine.
|
||||||
|
static int64_t GetTotalSystemMemory();
|
||||||
|
|
||||||
|
private:
|
||||||
|
constexpr static char kSysNodePath[] = "/sys/devices/system/node";
|
||||||
|
int32_t num_cpus_;
|
||||||
|
std::map<numa_id_t, cpu_set_t> numa_cpuset_;
|
||||||
|
std::map<numa_id_t, int32_t> numa_cpu_cnt_;
|
||||||
|
};
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CACHE_HW_H_
|
@ -0,0 +1,224 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <limits>
|
||||||
|
#include "minddata/dataset/engine/cache/cache_hw.h"
|
||||||
|
#include "minddata/dataset/engine/cache/cache_numa.h"
|
||||||
|
#include "minddata/dataset/util/random.h"
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
NumaMemoryPool::NumaMemoryPool(std::shared_ptr<CacheServerHW> hw, float memory_cap_ratio)
|
||||||
|
: hw_(std::move(hw)), memory_cap_ratio_(memory_cap_ratio) {
|
||||||
|
int64_t total_avail = 0;
|
||||||
|
// We will create a number of small Arenas to spread out the server threads so it
|
||||||
|
// will be less contention. If we link with the numa library, i.e. if
|
||||||
|
// NUMA_ENABLED is defined, we will make use of the low level numa library such that
|
||||||
|
// each Arena solely comes from one particular socket.
|
||||||
|
// The total number of Arenas will be controlled under the number of cpus.
|
||||||
|
auto num_cpus = hw_->GetCpuCount();
|
||||||
|
memory_segments_.reserve(num_cpus);
|
||||||
|
arena_list_.reserve(num_cpus);
|
||||||
|
mux_ = std::make_unique<std::mutex[]>(num_cpus);
|
||||||
|
auto num_memory_nodes = num_cpus;
|
||||||
|
int64_t max_avail = CacheServerHW::GetTotalSystemMemory() * memory_cap_ratio_;
|
||||||
|
int64_t arena_sz = max_avail / num_memory_nodes;
|
||||||
|
// If arena_sz is too small, lower the number of Arenas.
|
||||||
|
if (arena_sz < std::numeric_limits<int32_t>::max()) {
|
||||||
|
arena_sz = round_up_4K(std::numeric_limits<int32_t>::max());
|
||||||
|
num_memory_nodes = max_avail / arena_sz;
|
||||||
|
if (num_memory_nodes == 0) {
|
||||||
|
num_memory_nodes = 1;
|
||||||
|
arena_sz = max_avail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Creating " << num_memory_nodes << " number of arena. Each one of size " << arena_sz;
|
||||||
|
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
if (numa_available() != -1) {
|
||||||
|
auto num_numa_nodes = hw_->GetNumaNodeCount();
|
||||||
|
numa_id_t node_id = 0;
|
||||||
|
for (auto i = 0; i < num_memory_nodes; ++i) {
|
||||||
|
auto success = CreateMultipleArenas(arena_sz, node_id++ % num_numa_nodes, 1);
|
||||||
|
total_avail += success * arena_sz;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto success = CreateMultipleArenas(arena_sz, 0, num_memory_nodes);
|
||||||
|
total_avail += success * arena_sz;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
auto success = CreateMultipleArenas(arena_sz, 0, num_memory_nodes);
|
||||||
|
total_avail += success * arena_sz;
|
||||||
|
#endif
|
||||||
|
memory_cap_ = total_avail;
|
||||||
|
MS_LOG(WARNING) << "Memory pool created. Total available memory " << memory_cap_ << " spread in " << nodes_.size()
|
||||||
|
<< " arenas";
|
||||||
|
int32_t slot = 0;
|
||||||
|
// Set up a map for future easy access.
|
||||||
|
for (auto node_id : nodes_) {
|
||||||
|
numa_map_[node_id].push_back(slot);
|
||||||
|
++slot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t NumaMemoryPool::CreateMultipleArenas(int64_t segment_sz, numa_id_t node_id, int32_t repeat_count) {
|
||||||
|
int32_t success = 0;
|
||||||
|
for (auto i = 0; i < repeat_count; ++i) {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
void *ptr = numa_alloc_onnode(segment_sz, node_id);
|
||||||
|
#else
|
||||||
|
void *ptr = malloc(segment_sz);
|
||||||
|
#endif
|
||||||
|
if (ptr != nullptr) {
|
||||||
|
memory_segments_.emplace_back(ptr, segment_sz);
|
||||||
|
arena_list_.push_back(std::make_unique<ArenaImpl>(ptr, segment_sz));
|
||||||
|
nodes_.push_back(node_id);
|
||||||
|
++success;
|
||||||
|
} else {
|
||||||
|
// Skip the rest.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MS_LOG(DEBUG) << "Allocate " << success << " arenas from node " << node_id;
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
NumaMemoryPool::~NumaMemoryPool() {
|
||||||
|
if (!memory_segments_.empty()) {
|
||||||
|
for (auto &s : memory_segments_) {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
numa_free(s.first, s.second);
|
||||||
|
#else
|
||||||
|
free(s.first);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Status NumaMemoryPool::Allocate(size_t n, void **p) {
|
||||||
|
RETURN_UNEXPECTED_IF_NULL(p);
|
||||||
|
auto mt = GetRandomDevice();
|
||||||
|
Status rc;
|
||||||
|
void *ptr = nullptr;
|
||||||
|
auto num_segments = memory_segments_.size();
|
||||||
|
CHECK_FAIL_RETURN_UNEXPECTED(num_segments > 0, "No numa nodes available");
|
||||||
|
if (NumaAware()) {
|
||||||
|
auto num_numa_nodes = hw_->GetNumaNodeCount();
|
||||||
|
// We will start from the numa node this worker id is running on and do a round robin search.
|
||||||
|
numa_id_t start = hw_->GetMyNode();
|
||||||
|
numa_id_t node_id = start;
|
||||||
|
do {
|
||||||
|
auto it = numa_map_.find(node_id);
|
||||||
|
if (it != numa_map_.end()) {
|
||||||
|
auto &slots = it->second;
|
||||||
|
auto num_slots = slots.size();
|
||||||
|
std::uniform_int_distribution<int32_t> distribution(0, num_slots - 1);
|
||||||
|
auto start_slot = distribution(mt);
|
||||||
|
int32_t inx = start_slot;
|
||||||
|
do {
|
||||||
|
int32_t k = slots.at(inx);
|
||||||
|
std::unique_lock lock_x(mux_[k]);
|
||||||
|
auto &impl = arena_list_.at(k);
|
||||||
|
rc = impl->Allocate(n, &ptr);
|
||||||
|
if (rc.IsOk()) {
|
||||||
|
*p = ptr;
|
||||||
|
break;
|
||||||
|
} else if (rc.IsOutofMemory()) {
|
||||||
|
inx = (inx + 1) % num_slots;
|
||||||
|
} else {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
} while (inx != start_slot);
|
||||||
|
}
|
||||||
|
// We have done searching for this numa node. If not found, move to the next node.
|
||||||
|
if (ptr == nullptr) {
|
||||||
|
node_id = (node_id + 1) % num_numa_nodes;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (node_id != start);
|
||||||
|
} else {
|
||||||
|
// If not numa aware, just randomly pick a slot.
|
||||||
|
std::uniform_int_distribution<int32_t> distribution(0, num_segments - 1);
|
||||||
|
auto start_slot = distribution(mt);
|
||||||
|
int32_t slot = start_slot;
|
||||||
|
do {
|
||||||
|
std::unique_lock lock_x(mux_[slot]);
|
||||||
|
auto &impl = arena_list_.at(slot);
|
||||||
|
rc = impl->Allocate(n, &ptr);
|
||||||
|
if (rc.IsOk()) {
|
||||||
|
*p = ptr;
|
||||||
|
break;
|
||||||
|
} else if (rc.IsOutofMemory()) {
|
||||||
|
// Make the next arena and continue.
|
||||||
|
slot = (slot + 1) % num_segments;
|
||||||
|
} else {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
} while (slot != start_slot);
|
||||||
|
}
|
||||||
|
// Handle the case we have done one round robin search.
|
||||||
|
if (ptr == nullptr) {
|
||||||
|
return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__);
|
||||||
|
}
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NumaMemoryPool::Deallocate(void *p) {
|
||||||
|
// Find out which numa slot it comes from.
|
||||||
|
auto slot = Locate(p);
|
||||||
|
MS_ASSERT(slot != -1);
|
||||||
|
std::unique_lock lock_x(mux_[slot]);
|
||||||
|
auto &impl = arena_list_.at(slot);
|
||||||
|
impl->Deallocate(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
int NumaMemoryPool::PercentFree() const {
|
||||||
|
int percent_free = 0;
|
||||||
|
int num_arena = 0;
|
||||||
|
for (auto const &p : arena_list_) {
|
||||||
|
percent_free += p->PercentFree();
|
||||||
|
num_arena++;
|
||||||
|
}
|
||||||
|
if (num_arena) {
|
||||||
|
return percent_free / num_arena;
|
||||||
|
} else {
|
||||||
|
return 100;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t NumaMemoryPool::Locate(void *p) const {
|
||||||
|
int32_t slot = 0;
|
||||||
|
char *mem = reinterpret_cast<char *>(p);
|
||||||
|
for (slot = 0; slot < memory_segments_.size(); ++slot) {
|
||||||
|
auto elem = memory_segments_.at(slot);
|
||||||
|
char *q = reinterpret_cast<char *>(elem.first);
|
||||||
|
if (mem >= q && mem < q + elem.second) {
|
||||||
|
return slot;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<numa_id_t> NumaMemoryPool::GetAvailableNodes() const {
|
||||||
|
std::vector<numa_id_t> v;
|
||||||
|
std::transform(numa_map_.begin(), numa_map_.end(), std::back_inserter(v),
|
||||||
|
[](const std::pair<numa_id_t, std::vector<int32_t>> &v) { return v.first; });
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
@ -0,0 +1,195 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CACHE_NUMA_H_
|
||||||
|
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CACHE_NUMA_H_
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include "minddata/dataset/engine/cache/cache_hw.h"
|
||||||
|
#include "minddata/dataset/util/arena.h"
|
||||||
|
#include "minddata/dataset/util/memory_pool.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
/// \brief An allocator but for a particular numa node.
|
||||||
|
template <typename T>
|
||||||
|
class NumaAllocator {
|
||||||
|
public:
|
||||||
|
explicit NumaAllocator(numa_id_t node_id, CachePoolPolicy policy)
|
||||||
|
: policy_(policy), numa_enabled_(false), node_id_(node_id) {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
numa_enabled_ = numa_available() != -1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
~NumaAllocator() = default;
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
explicit NumaAllocator(NumaAllocator<U> const &rhs)
|
||||||
|
: policy_(rhs.policy_), numa_enabled_(rhs.numa_enabled_), node_id_(rhs.node_id_) {}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
bool operator==(Allocator<U> const &rhs) const {
|
||||||
|
return node_id_ == rhs.node_id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
bool operator!=(Allocator<U> const &rhs) const {
|
||||||
|
return node_id_ != rhs.node_id_;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
friend class NumaAllocator;
|
||||||
|
|
||||||
|
using value_type = T;
|
||||||
|
using pointer = T *;
|
||||||
|
using const_pointer = const T *;
|
||||||
|
using reference = T &;
|
||||||
|
using const_reference = const T &;
|
||||||
|
using size_type = uint64_t;
|
||||||
|
using difference_type = std::ptrdiff_t;
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
struct rebind {
|
||||||
|
using other = Allocator<U>;
|
||||||
|
};
|
||||||
|
|
||||||
|
using propagate_on_container_copy_assignment = std::true_type;
|
||||||
|
using propagate_on_container_move_assignment = std::true_type;
|
||||||
|
using propagate_on_container_swap = std::true_type;
|
||||||
|
|
||||||
|
/// Allocate memory on this node only. Return nullptr if no memory on this numa node.
|
||||||
|
/// \note. This version will not throw if we can't allocate memory from this node.
|
||||||
|
/// User must check if the pointer returned is null or not.
|
||||||
|
pointer allocate(std::size_t n) noexcept {
|
||||||
|
auto sz = n * sizeof(T);
|
||||||
|
void *p = nullptr;
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
if (numa_enabled_) {
|
||||||
|
switch (policy_) {
|
||||||
|
case kPreferred:
|
||||||
|
numa_set_preferred(node_id_);
|
||||||
|
p = numa_alloc(sz);
|
||||||
|
break;
|
||||||
|
case kLocal:
|
||||||
|
p = numa_alloc_local(sz);
|
||||||
|
break;
|
||||||
|
case kInterleave:
|
||||||
|
p = numa_alloc_interleaved(sz);
|
||||||
|
break;
|
||||||
|
case kOnNode:
|
||||||
|
p = numa_alloc_onnode(sz, node_id_);
|
||||||
|
break;
|
||||||
|
case kNone:
|
||||||
|
default:
|
||||||
|
p = numa_alloc(sz);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p = malloc(sz);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
p = malloc(sz);
|
||||||
|
#endif
|
||||||
|
return reinterpret_cast<pointer>(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Free a memory allocated on this node.
|
||||||
|
void deallocate(pointer p, std::size_t n) noexcept {
|
||||||
|
#ifdef NUMA_ENABLED
|
||||||
|
if (numa_enabled_) {
|
||||||
|
numa_free(p, n * sizeof(T));
|
||||||
|
} else {
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
free(p);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Allow one to change to another numa node
|
||||||
|
void SetNodeId(numa_id_t node_id) { node_id_ = node_id; }
|
||||||
|
|
||||||
|
/// \brif Getter for node_id;
|
||||||
|
numa_id_t GetNodeId() const { return node_id_; }
|
||||||
|
|
||||||
|
/// \brief Getter for policy
|
||||||
|
CachePoolPolicy GetPolicy() const { return policy_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
CachePoolPolicy policy_;
|
||||||
|
bool numa_enabled_;
|
||||||
|
numa_id_t node_id_;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// \brief A NumaMemoryPool is like a CircularPool but all the arenas have already been allocated
|
||||||
|
/// and each one comes from a numa socket. Memory is allocated using OnNode policy. That is,
|
||||||
|
/// it is solely comes from one particular numa node, and is not interleaved.
|
||||||
|
class NumaMemoryPool : public MemoryPool {
|
||||||
|
public:
|
||||||
|
explicit NumaMemoryPool(std::shared_ptr<CacheServerHW> hw, float memory_cap_ratio);
|
||||||
|
~NumaMemoryPool() override;
|
||||||
|
|
||||||
|
// As a derived class, we override the following functions
|
||||||
|
Status Allocate(size_t size, void **pVoid) override;
|
||||||
|
void Deallocate(void *pVoid) override;
|
||||||
|
Status Reallocate(void **pVoid, size_t old_sz, size_t new_sz) override { RETURN_STATUS_UNEXPECTED("Not supported"); }
|
||||||
|
uint64_t get_max_size() const override { return std::numeric_limits<uint64_t>::max(); }
|
||||||
|
int PercentFree() const override;
|
||||||
|
|
||||||
|
/// \brief Return if the memory pool is numa aware
|
||||||
|
bool NumaAware() const { return CacheServerHW::numa_enabled(); }
|
||||||
|
|
||||||
|
/// \brief. This returns all the numa nodes that we are able to allocate memory from.
|
||||||
|
std::vector<numa_id_t> GetAvailableNodes() const;
|
||||||
|
|
||||||
|
/// \brief. Given a pointer (allocated from this pool), return the numa node where it is located.
|
||||||
|
/// \note. -1 is returned if not found.
|
||||||
|
numa_id_t FindNode(void *p) const {
|
||||||
|
auto slot = Locate(p);
|
||||||
|
if (slot != -1) {
|
||||||
|
return nodes_.at(slot);
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Return maximum available memory
|
||||||
|
int64_t GetAvailableMemory() const { return memory_cap_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<CacheServerHW> hw_;
|
||||||
|
float memory_cap_ratio_;
|
||||||
|
int64_t memory_cap_;
|
||||||
|
std::vector<std::pair<void *, int64_t>> memory_segments_;
|
||||||
|
std::vector<std::unique_ptr<ArenaImpl>> arena_list_;
|
||||||
|
std::unique_ptr<std::mutex[]> mux_;
|
||||||
|
std::vector<numa_id_t> nodes_;
|
||||||
|
std::map<numa_id_t, std::vector<int32_t>> numa_map_;
|
||||||
|
|
||||||
|
/// \brief. Returns the slot that a given memory comes from.
|
||||||
|
/// \return slot from numa_segments. -1 if not found.
|
||||||
|
int32_t Locate(void *p) const;
|
||||||
|
|
||||||
|
/// If numa library is not linked, or numa_availble() return -1, we will fall back to this method.
|
||||||
|
int32_t CreateMultipleArenas(int64_t segment_sz, numa_id_t node_id, int32_t repeat_count);
|
||||||
|
};
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_CACHE_NUMA_H_
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,32 @@
|
|||||||
|
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||||
|
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||||
|
|
||||||
|
if (ENABLE_CACHE)
|
||||||
|
ms_protobuf_generate(CACHE_PERF_PROTO_SRCS CACHE_PERF_PROTO_HDRS cache_perf.proto)
|
||||||
|
|
||||||
|
add_executable(cache_perf cache_perf.cc cache_msg.cc cache_perf_run.cc ${CACHE_PERF_PROTO_SRCS})
|
||||||
|
target_link_libraries(cache_perf
|
||||||
|
_c_dataengine
|
||||||
|
_c_mindrecord
|
||||||
|
mindspore::protobuf
|
||||||
|
mindspore_gvar
|
||||||
|
${PYTHON_LIBRARIES}
|
||||||
|
pthread)
|
||||||
|
|
||||||
|
if (USE_GLOG)
|
||||||
|
target_link_libraries(cache_perf mindspore::glog)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
add_executable(cache_pipeline cache_pipeline.cc cache_msg.cc cache_pipeline_run.cc ${CACHE_PERF_PROTO_SRCS})
|
||||||
|
target_link_libraries(cache_pipeline
|
||||||
|
_c_dataengine
|
||||||
|
_c_mindrecord
|
||||||
|
mindspore::protobuf
|
||||||
|
mindspore_gvar
|
||||||
|
${PYTHON_LIBRARIES}
|
||||||
|
pthread)
|
||||||
|
|
||||||
|
if (USE_GLOG)
|
||||||
|
target_link_libraries(cache_pipeline mindspore::glog)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
@ -0,0 +1,48 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/cache/perf/cache_msg.h"
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/ipc.h>
|
||||||
|
#include <sys/msg.h>
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
Status CachePerfMsg::Send(int32_t qID) {
|
||||||
|
auto err = msgsnd(qID, reinterpret_cast<void *>(&small_msg_), sizeof(small_msg_.body.msg), IPC_NOWAIT);
|
||||||
|
if (err == -1) {
|
||||||
|
std::string errMsg = "Failed to call msgsnd. Errno = " + std::to_string(errno);
|
||||||
|
RETURN_STATUS_UNEXPECTED(errMsg);
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
Status CachePerfMsg::Receive(int32_t qID) {
|
||||||
|
// This is a blocking call. Either there is some message or we the queue is removed when
|
||||||
|
// the destructor is called.
|
||||||
|
auto err = msgrcv(qID, reinterpret_cast<void *>(&small_msg_), sizeof(small_msg_.body.msg), 0, MSG_NOERROR);
|
||||||
|
if (err == -1) {
|
||||||
|
if (errno == EIDRM) {
|
||||||
|
return Status(StatusCode::kInterrupted);
|
||||||
|
} else {
|
||||||
|
std::string errMsg = "Failed to call msgrcv. Errno = " + std::to_string(errno);
|
||||||
|
RETURN_STATUS_UNEXPECTED(errMsg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue