Support data type int64 in NCCL. (#9818)

fea/docker_cudnn7
qingqing01 7 years ago committed by GitHub
parent 1d88ebe44d
commit 129859e732
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -14,8 +14,9 @@
#pragma once #pragma once
#include <thread> #include <thread> // NOLINT
#include <typeindex> #include <typeindex>
#include <vector>
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
@ -29,6 +30,8 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) {
return ncclDouble; return ncclDouble;
} else if (type == typeid(int)) { // NOLINT } else if (type == typeid(int)) { // NOLINT
return ncclInt; return ncclInt;
} else if (type == typeid(int64_t)) { // NOLINT
return ncclInt64;
} else { } else {
PADDLE_THROW("Not supported"); PADDLE_THROW("Not supported");
} }
@ -66,23 +69,23 @@ struct NCCLContext {
return boost::get<platform::CUDAPlace>(ctx_->GetPlace()).device; return boost::get<platform::CUDAPlace>(ctx_->GetPlace()).device;
} }
static void InitNCCLContext(std::unordered_map<int, NCCLContext> &contexts, static void InitNCCLContext(std::unordered_map<int, NCCLContext> *contexts,
const std::vector<platform::Place> &places) { const std::vector<platform::Place> &places) {
std::vector<ncclComm_t> comms; std::vector<ncclComm_t> comms;
std::vector<int> devs; std::vector<int> devs;
comms.resize(contexts.size()); comms.resize(contexts->size());
devs.reserve(contexts.size()); devs.reserve(contexts->size());
for (auto &p : places) { for (auto &p : places) {
devs.push_back(boost::get<platform::CUDAPlace>(p).device); devs.push_back(boost::get<platform::CUDAPlace>(p).device);
} }
PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( PADDLE_ENFORCE(platform::dynload::ncclCommInitAll(
&comms[0], static_cast<int>(contexts.size()), &devs[0])); &comms[0], static_cast<int>(contexts->size()), &devs[0]));
int i = 0; int i = 0;
for (auto &dev_id : devs) { for (auto &dev_id : devs) {
contexts.at(dev_id).comm_ = comms[i++]; contexts->at(dev_id).comm_ = comms[i++];
} }
} }
}; };
@ -91,7 +94,7 @@ struct NCCLContextMap {
std::unordered_map<int, NCCLContext> contexts_; std::unordered_map<int, NCCLContext> contexts_;
std::vector<int> order_; std::vector<int> order_;
NCCLContextMap(const std::vector<platform::Place> &places) { explicit NCCLContextMap(const std::vector<platform::Place> &places) {
order_.reserve(places.size()); order_.reserve(places.size());
for (auto &p : places) { for (auto &p : places) {
int dev_id = boost::get<CUDAPlace>(p).device; int dev_id = boost::get<CUDAPlace>(p).device;

Loading…
Cancel
Save