Merge pull request #16887 from guru4elephant/add_nccl_context_pybind
Add nccl context pybindshanyi15-patch-1
commit
bbc6c5714f
@ -0,0 +1,78 @@
|
|||||||
|
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/fleet/nccl_wrapper.h"
|
||||||
|
#include <utility>
|
||||||
|
#include "paddle/fluid/framework/data_feed.h"
|
||||||
|
#include "paddle/fluid/framework/scope.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
std::shared_ptr<NCCLWrapper> NCCLWrapper::s_instance_ = NULL;
|
||||||
|
bool NCCLWrapper::is_initialized_ = false;
|
||||||
|
|
||||||
|
void NCCLWrapper::InitNCCL() {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
PADDLE_ENFORCE(platform::dynload::ncclCommInitRank(
|
||||||
|
&(nccl_info_.comm_), nccl_info_.global_ranks_, nccl_info_.nccl_id_,
|
||||||
|
nccl_info_.my_global_rank_));
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NCCLWrapper::SetNCCLId(const NCCLInfo& nccl_info) {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
nccl_info_.nccl_id_ = nccl_info.nccl_id_;
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
NCCLInfo NCCLWrapper::GetNCCLId() {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
PADDLE_ENFORCE(platform::dynload::ncclGetUniqueId(&(nccl_info_.nccl_id_)));
|
||||||
|
#endif
|
||||||
|
return nccl_info_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NCCLWrapper::SetRankInfo(const int local_rank, const int global_rank,
|
||||||
|
const int ranks) {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
nccl_info_.local_rank_ = local_rank;
|
||||||
|
nccl_info_.my_global_rank_ = global_rank;
|
||||||
|
nccl_info_.global_ranks_ = ranks;
|
||||||
|
PADDLE_ENFORCE(cudaSetDevice(local_rank));
|
||||||
|
PADDLE_ENFORCE(cudaStreamCreate(&(nccl_info_.stream_)));
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void NCCLWrapper::SyncVar(const int root_rank, const Scope& scope,
|
||||||
|
const std::vector<std::string>& var_names) {
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
for (auto& name : var_names) {
|
||||||
|
auto var = scope.FindVar(name);
|
||||||
|
LoDTensor* tensor = var->GetMutable<LoDTensor>();
|
||||||
|
int32_t total_size = tensor->numel();
|
||||||
|
PADDLE_ENFORCE(platform::dynload::ncclBcast(
|
||||||
|
reinterpret_cast<void*>(tensor->data<float>()), total_size, ncclFloat,
|
||||||
|
root_rank, nccl_info_.comm_, nccl_info_.stream_));
|
||||||
|
cudaStreamSynchronize(nccl_info_.stream_);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace framework
|
||||||
|
} // end namespace paddle
|
@ -0,0 +1,83 @@
|
|||||||
|
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
|
#include <ctime>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <random>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "paddle/fluid/framework/program_desc.h"
|
||||||
|
#include "paddle/fluid/framework/scope.h"
|
||||||
|
#include "paddle/fluid/framework/variable_helper.h"
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
#include "paddle/fluid/platform/dynload/nccl.h"
|
||||||
|
#endif
|
||||||
|
#include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
class NCCLInfo {
|
||||||
|
public:
|
||||||
|
NCCLInfo() {}
|
||||||
|
virtual ~NCCLInfo() {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
int local_rank_;
|
||||||
|
int global_ranks_;
|
||||||
|
int my_global_rank_;
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
ncclUniqueId nccl_id_;
|
||||||
|
ncclComm_t comm_;
|
||||||
|
cudaStream_t stream_;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
class NCCLWrapper {
|
||||||
|
public:
|
||||||
|
virtual ~NCCLWrapper() {}
|
||||||
|
NCCLWrapper() {}
|
||||||
|
|
||||||
|
void InitNCCL();
|
||||||
|
void SetNCCLId(const NCCLInfo& nccl_info);
|
||||||
|
NCCLInfo GetNCCLId();
|
||||||
|
void SetRankInfo(const int local_rank, const int global_rank,
|
||||||
|
const int ranks);
|
||||||
|
void SyncVar(const int root_rank, const Scope& scope,
|
||||||
|
const std::vector<std::string>& var_names);
|
||||||
|
|
||||||
|
static std::shared_ptr<NCCLWrapper> GetInstance() {
|
||||||
|
if (NULL == s_instance_) {
|
||||||
|
s_instance_.reset(new paddle::framework::NCCLWrapper());
|
||||||
|
}
|
||||||
|
return s_instance_;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
NCCLInfo nccl_info_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static std::shared_ptr<NCCLWrapper> s_instance_;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
static bool is_initialized_;
|
||||||
|
DISABLE_COPY_AND_ASSIGN(NCCLWrapper);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace framework
|
||||||
|
} // end namespace paddle
|
@ -0,0 +1,53 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#ifdef _POSIX_C_SOURCE
|
||||||
|
#undef _POSIX_C_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef _XOPEN_SOURCE
|
||||||
|
#undef _XOPEN_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "google/protobuf/io/zero_copy_stream_impl.h"
|
||||||
|
#include "google/protobuf/text_format.h"
|
||||||
|
#include "paddle/fluid/framework/async_executor.h"
|
||||||
|
#include "paddle/fluid/framework/data_feed.h"
|
||||||
|
#include "paddle/fluid/framework/data_feed.pb.h"
|
||||||
|
#include "paddle/fluid/framework/fleet/nccl_wrapper.h"
|
||||||
|
#include "paddle/fluid/framework/scope.h"
|
||||||
|
#include "paddle/fluid/inference/io.h"
|
||||||
|
#include "paddle/fluid/platform/place.h"
|
||||||
|
#include "paddle/fluid/platform/variant.h"
|
||||||
|
#include "paddle/fluid/pybind/nccl_wrapper_py.h"
|
||||||
|
|
||||||
|
namespace py = pybind11;
|
||||||
|
namespace pd = paddle::framework;
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace pybind {
|
||||||
|
void BindNCCLWrapper(py::module* m) {
|
||||||
|
py::class_<framework::NCCLWrapper>(*m, "Nccl")
|
||||||
|
.def(py::init())
|
||||||
|
.def("init_nccl", &framework::NCCLWrapper::InitNCCL)
|
||||||
|
.def("set_nccl_id", &framework::NCCLWrapper::SetNCCLId)
|
||||||
|
.def("set_rank_info", &framework::NCCLWrapper::SetRankInfo)
|
||||||
|
.def("sync_var", &framework::NCCLWrapper::SyncVar);
|
||||||
|
} // end NCCLWrapper
|
||||||
|
} // end namespace pybind
|
||||||
|
} // end namespace paddle
|
@ -0,0 +1,28 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "pybind11/pybind11.h"
|
||||||
|
#include "pybind11/stl.h"
|
||||||
|
|
||||||
|
namespace py = pybind11;
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace pybind {
|
||||||
|
|
||||||
|
void BindNCCLWrapper(py::module* m);
|
||||||
|
|
||||||
|
} // namespace pybind
|
||||||
|
} // namespace paddle
|
Loading…
Reference in new issue