[api 2.0] add collective op for cpu using gloo and paddle.distributed.* apis (#26552)

add collective op for cpu using gloo and paddle.distributed.* apis
revert-26856-strategy_example2
lilong12 5 years ago committed by GitHub
parent 07973c577e
commit 1c68138327
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -105,6 +105,11 @@ enum GlooStoreType { HDFS, HTTP };
class GlooWrapper {
public:
static std::shared_ptr<GlooWrapper> GetInstance() {
static auto s_instance = std::make_shared<GlooWrapper>();
return s_instance;
}
GlooWrapper() {}
virtual ~GlooWrapper() {}
@ -153,6 +158,11 @@ class GlooWrapper {
#endif
}
bool IsInitialized() { return is_initialized_; }
#ifdef PADDLE_WITH_GLOO
std::shared_ptr<gloo::Context> GetContext() { return context_; }
#endif
template <typename T>
std::vector<T> AllReduce(std::vector<T>& sendbuf, // NOLINT
const std::string& mode = "sum") { // NOLINT

@ -35,5 +35,9 @@ if(WITH_NCCL)
op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS} nccl_common)
endif()
if(WITH_GLOO)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper)
endif()
set(OPERATOR_DEPS ${OPERATOR_DEPS} ${COLLECTIVE_DEPS} PARENT_SCOPE)
set(GLOB_COLLECTIVE_DEPS ${COLLECTIVE_DEPS} CACHE INTERNAL "collective dependency")

@ -0,0 +1,47 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/collective/barrier_op.h"
#include <memory>
namespace paddle {
namespace operators {
class BarrierOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {}
};
class BarrierOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "(Tensor) Input data (only used in CUDAKernel).");
AddOutput("Out", "(Tensor) Output data (only used in CUDAKernel).");
AddAttr<int>("ring_id", "(int default 0) communication ring id.")
.SetDefault(0);
AddComment(R"DOC(
Barrier Operator - Barrier among all pariticapitors.)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_WITHOUT_GRADIENT(barrier, ops::BarrierOp, ops::BarrierOpMaker);
REGISTER_OP_CPU_KERNEL(barrier, ops::BarrierOpCPUKernel<int>);

@ -0,0 +1,64 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/collective/barrier_op.h"
#include <memory>
#if defined(PADDLE_WITH_NCCL)
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace paddle {
namespace operators {
template <typename T>
class BarrierOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#if defined(PADDLE_WITH_NCCL)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace();
ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
int64_t numel = in->numel();
const void* sendbuff = in->data<void>();
void* recvbuff = out->mutable_data<T>(place);
int rid = ctx.Attr<int>("ring_id");
auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
auto stream = static_cast<platform::CUDADeviceContext*>(dev_ctx)->stream();
ncclRedOp_t nccl_red_type = ncclSum;
PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce(
sendbuff, recvbuff, numel, dtype, nccl_red_type, comm->comm(), stream));
auto comm_stream =
platform::NCCLCommContext::Instance().Get(rid, place)->stream();
PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(comm_stream));
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with NCCL."));
#endif
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(barrier, ops::BarrierOpCUDAKernel<int>);

@ -0,0 +1,54 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#if defined(PADDLE_WITH_GLOO)
#include <gloo/barrier.h>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
namespace paddle {
namespace operators {
template <typename T>
class BarrierOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#if defined(PADDLE_WITH_GLOO)
auto gloo = paddle::framework::GlooWrapper::GetInstance();
PADDLE_ENFORCE_EQ(
gloo->IsInitialized(), true,
platform::errors::PreconditionNotMet(
"You must initialize the gloo environment first to use it."));
gloo::BarrierOptions opts(gloo->GetContext());
gloo::barrier(opts);
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON"));
#endif
}
};
} // namespace operators
} // namespace paddle

@ -23,6 +23,11 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#if defined(PADDLE_WITH_GLOO)
#include <gloo/allgather.h>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
namespace paddle {
namespace operators {
@ -30,7 +35,31 @@ template <typename T>
class CAllGatherOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW("unimplemented cpu kernel for CAllGatherOp.");
#if defined(PADDLE_WITH_GLOO)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
framework::DDim out_dims = in->dims();
auto place = ctx.GetPlace();
auto gloo = paddle::framework::GlooWrapper::GetInstance();
auto nranks = gloo->Size();
out_dims[0] *= nranks;
int64_t send_numel = in->numel();
const T* send_buff = in->data<T>();
T* recv_buff = out->mutable_data<T>(out_dims, place);
PADDLE_ENFORCE_EQ(
gloo->IsInitialized(), true,
platform::errors::PreconditionNotMet(
"You must initialize the gloo environment first to use it."));
gloo::AllgatherOptions opts(gloo->GetContext());
opts.setInput(const_cast<T*>(send_buff), send_numel);
opts.setOutput(recv_buff, send_numel * nranks);
gloo::allgather(opts);
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON"));
#endif
}
};

@ -25,6 +25,11 @@ limitations under the License. */
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#if defined(PADDLE_WITH_GLOO)
#include <gloo/allreduce.h>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
namespace paddle {
namespace operators {
@ -50,7 +55,53 @@ template <ReduceType red_type, typename T>
class CAllReduceOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW("CAllReduce op do not support CPUKernel for now.");
#if defined(PADDLE_WITH_GLOO)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
auto place = ctx.GetPlace();
int64_t send_numel = in->numel();
const T* send_buff = in->data<T>();
T* recv_buff = out->mutable_data<T>(in->dims(), place);
auto gloo = paddle::framework::GlooWrapper::GetInstance();
PADDLE_ENFORCE_EQ(
gloo->IsInitialized(), true,
platform::errors::PreconditionNotMet(
"You must initialize the gloo environment first to use it."));
gloo::AllreduceOptions opts(gloo->GetContext());
opts.setInput(const_cast<T*>(send_buff), send_numel);
opts.setOutput(recv_buff, send_numel);
switch (red_type) {
case kRedSum:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::sum<T>));
break;
case kRedMax:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::max<T>));
break;
case kRedMin:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::min<T>));
break;
case kRedProd:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::product<T>));
break;
default:
PADDLE_ENFORCE_EQ(true, false,
platform::errors::InvalidArgument(
"Invalid reduce type: %d.", red_type));
}
gloo::allreduce(opts);
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON"));
#endif
}
};

@ -22,6 +22,11 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#if defined(PADDLE_WITH_GLOO)
#include <gloo/broadcast.h>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
namespace paddle {
namespace operators {
@ -29,7 +34,27 @@ template <typename T>
class CBroadcastOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW("Unimplemented cpu kernel for CBroadcastOp.");
#if defined(PADDLE_WITH_GLOO)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
auto root = ctx.Attr<int>("root");
auto place = ctx.GetPlace();
int64_t send_numel = in->numel();
T* recv_buff = out->mutable_data<T>(in->dims(), place);
auto gloo = paddle::framework::GlooWrapper::GetInstance();
PADDLE_ENFORCE_EQ(
gloo->IsInitialized(), true,
platform::errors::PreconditionNotMet(
"You must initialize the gloo environment first to use it."));
gloo::BroadcastOptions opts(gloo->GetContext());
opts.setOutput(recv_buff, send_numel);
opts.setRoot(root);
gloo::broadcast(opts);
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON"));
#endif
}
};

@ -28,6 +28,10 @@ limitations under the License. */
#include "paddle/fluid/platform/collective_helper.h"
#include "paddle/fluid/platform/nccl_helper.h"
#endif
#if defined(PADDLE_WITH_GLOO)
#include <gloo/reduce.h>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
namespace paddle {
namespace operators {
@ -54,9 +58,55 @@ template <ReduceType red_type, typename T>
class CReduceOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
#if defined(PADDLE_WITH_GLOO)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
auto root_id = ctx.Attr<int>("root_id");
auto place = ctx.GetPlace();
int64_t send_numel = in->numel();
const T* send_buff = in->data<T>();
T* recv_buff = out->mutable_data<T>(in->dims(), place);
auto gloo = paddle::framework::GlooWrapper::GetInstance();
PADDLE_ENFORCE_EQ(
true, false,
platform::errors::Unavailable("Unimplemented CReduceOpCPUKernel now."));
gloo->IsInitialized(), true,
platform::errors::PreconditionNotMet(
"You must initialize the gloo environment first to use it."));
gloo::ReduceOptions opts(gloo->GetContext());
opts.setInput(const_cast<T*>(send_buff), send_numel);
opts.setOutput(recv_buff, send_numel);
opts.setRoot(root_id);
switch (red_type) {
case kRedSum:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::sum<T>));
break;
case kRedMax:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::max<T>));
break;
case kRedMin:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::min<T>));
break;
case kRedProd:
opts.setReduceFunction(
static_cast<void (*)(void*, const void*, const void*, size_t)>(
&gloo::product<T>));
break;
default:
PADDLE_ENFORCE_EQ(true, false,
platform::errors::InvalidArgument(
"Invalid reduce type: %d.", red_type));
}
gloo::reduce(opts);
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON"));
#endif
}
};

@ -22,6 +22,11 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#if defined(PADDLE_WITH_GLOO)
#include <gloo/scatter.h>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
#endif
namespace paddle {
namespace operators {
@ -29,9 +34,39 @@ template <typename T>
class CScatterOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE_EQ(true, false,
platform::errors::Unavailable(
"Unimplemented cpu kernel for CScatterOp."));
#if defined(PADDLE_WITH_GLOO)
auto in = ctx.Input<framework::Tensor>("X");
auto out = ctx.Output<framework::Tensor>("Out");
auto root_id = ctx.Attr<int>("root");
auto gloo = paddle::framework::GlooWrapper::GetInstance();
PADDLE_ENFORCE_EQ(
gloo->IsInitialized(), true,
platform::errors::PreconditionNotMet(
"You must initialize the gloo environment first to use it."));
int64_t send_numel = out->numel();
auto nranks = gloo->Size();
auto rank = gloo->Rank();
T* recv_buff = out->data<T>();
gloo::ScatterOptions opts(gloo->GetContext());
if (root_id == rank) {
T* send_buff = const_cast<T*>(in->data<T>());
std::vector<T*> ptrs(nranks);
for (int i = 0; i < nranks; ++i) {
ptrs[i] = send_buff;
send_buff += send_numel;
}
opts.setInputs(ptrs, send_numel);
}
opts.setOutput(recv_buff, send_numel);
opts.setRoot(root_id);
gloo::scatter(opts);
#else
PADDLE_THROW(platform::errors::Unavailable(
"PaddlePaddle should compile with GLOO by setting WITH_GLOO=ON"));
#endif
}
};

@ -88,6 +88,10 @@ ELSE()
set(STREAM_CALLBACK_DEPS)
ENDIF()
if(WITH_GLOO)
cc_library(gloo_context SRCS gloo_context.cc DEPS framework_proto gloo_wrapper enforce)
endif()
cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)
# memcpy depends on device_context, here add deps individually for

@ -0,0 +1,33 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/gloo_context.h"
namespace paddle {
namespace platform {
#if defined(PADDLE_WITH_GLOO)
void GlooParallelContext::Init() {
auto gloo_ptr = paddle::framework::GlooWrapper::GetInstance();
gloo_ptr->SetRank(strategy_.rank);
gloo_ptr->SetSize(strategy_.rank_num);
gloo_ptr->SetPrefix(strategy_.prefix);
gloo_ptr->SetIface(strategy_.iface);
gloo_ptr->SetTimeoutSeconds(strategy_.init_seconds, strategy_.run_seconds);
gloo_ptr->SetHdfsStore(strategy_.path, strategy_.fs_name, strategy_.fs_ugi);
gloo_ptr->Init();
}
#endif
} // namespace platform
} // namespace paddle

@ -0,0 +1,51 @@
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/fleet/gloo_wrapper.h"
namespace paddle {
namespace platform {
#if defined(PADDLE_WITH_GLOO)
struct GlooParallelStrategy {
int rank{0};
int rank_num{1};
std::string iface;
std::string prefix;
int init_seconds{9999999};
int run_seconds{9999999};
std::string path;
std::string fs_name;
std::string fs_ugi;
};
class GlooParallelContext {
public:
explicit GlooParallelContext(const GlooParallelStrategy& strategy)
: strategy_(strategy) {}
virtual ~GlooParallelContext() {}
virtual void Init();
protected:
GlooParallelStrategy strategy_;
};
#endif
} // namespace platform
} // namespace paddle

@ -40,6 +40,11 @@ set(PYBIND_SRCS
inference_api.cc
generator_py.cc)
if(WITH_GLOO)
set(PYBIND_DEPS ${PYBIND_DEPS} gloo_context)
set(PYBIND_SRCS ${PYBIND_SRCS} gloo_context_py.cc)
endif(WITH_GLOO)
if (WITH_CRYPTO)
set(PYBIND_DEPS ${PYBIND_DEPS} paddle_crypto)
set(PYBIND_SRCS ${PYBIND_SRCS} crypto.cc)

@ -0,0 +1,111 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/pybind/gloo_context_py.h"
#include <Python.h>
#include <pybind11/chrono.h>
#include <pybind11/complex.h>
#include <pybind11/functional.h>
#include <pybind11/stl.h>
#include <memory>
#include <set>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/mmap_allocator.h"
#include "paddle/fluid/platform/gloo_context.h"
namespace paddle {
namespace pybind {
namespace py = ::pybind11;
// Bind Methods
void BindGlooContext(py::module *m) {
// define parallel context for gloo
#if defined(PADDLE_WITH_GLOO)
py::class_<platform::GlooParallelStrategy> gloo_parallel_strategy(
*m, "GlooParallelStrategy", "");
gloo_parallel_strategy.def(py::init())
.def_property("rank_num",
[](const platform::GlooParallelStrategy &self) {
return self.rank_num;
},
[](platform::GlooParallelStrategy &self, int nranks) {
self.rank_num = nranks;
})
.def_property(
"rank",
[](const platform::GlooParallelStrategy &self) { return self.rank; },
[](platform::GlooParallelStrategy &self, int rank) {
self.rank = rank;
})
.def_property(
"iface",
[](const platform::GlooParallelStrategy &self) { return self.iface; },
[](platform::GlooParallelStrategy &self, const std::string &iface) {
self.iface = iface;
})
.def_property("prefix",
[](const platform::GlooParallelStrategy &self) {
return self.prefix;
},
[](platform::GlooParallelStrategy &self,
const std::string &prefix) { self.prefix = prefix; })
.def_property("init_seconds",
[](const platform::GlooParallelStrategy &self) {
return self.init_seconds;
},
[](platform::GlooParallelStrategy &self, int init_seconds) {
self.init_seconds = init_seconds;
})
.def_property("run_seconds",
[](const platform::GlooParallelStrategy &self) {
return self.run_seconds;
},
[](platform::GlooParallelStrategy &self, int run_seconds) {
self.run_seconds = run_seconds;
})
.def_property(
"path",
[](const platform::GlooParallelStrategy &self) { return self.path; },
[](platform::GlooParallelStrategy &self, const std::string &path) {
self.path = path;
})
.def_property("fs_name",
[](const platform::GlooParallelStrategy &self) {
return self.fs_name;
},
[](platform::GlooParallelStrategy &self,
const std::string &fs_name) { self.fs_name = fs_name; })
.def_property("fs_ugi",
[](const platform::GlooParallelStrategy &self) {
return self.fs_ugi;
},
[](platform::GlooParallelStrategy &self,
const std::string &fs_ugi) { self.fs_ugi = fs_ugi; });
py::class_<platform::GlooParallelContext> gloo_ctx(*m, "GlooParallelContext");
gloo_ctx.def(py::init<const platform::GlooParallelStrategy &>())
.def("init", [](platform::GlooParallelContext &self) { self.Init(); });
#endif
}
} // namespace pybind
} // namespace paddle

@ -0,0 +1,26 @@
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <Python.h>
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
namespace paddle {
namespace pybind {
void BindGlooContext(pybind11::module* m);
} // namespace pybind
} // namespace paddle

@ -86,6 +86,19 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
{"accuracy", {"Correct", "Total"}},
{"fill_constant", {"Out"}},
{"matmul", {"Out"}},
{"c_broadcast", {"Out"}},
{"c_allreduce_sum", {"Out"}},
{"c_allreduce_max", {"Out"}},
{"c_allreduce_min", {"Out"}},
{"c_allreduce_prod", {"Out"}},
{"c_reduce_sum", {"Out"}},
{"c_reduce_max", {"Out"}},
{"c_reduce_min", {"Out"}},
{"c_reduce_prod", {"Out"}},
{"c_reduce", {"Out"}},
{"c_allgather", {"Out"}},
{"c_scatter", {"Out"}},
{"barrier", {"Out"}},
{"fake_quantize_dequantize_moving_average_abs_max",
{"Out", "OutScale", "OutAccum", "OutState"}},
{"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}},

@ -66,6 +66,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/fleet_wrapper_py.h"
#include "paddle/fluid/pybind/generator_py.h"
#include "paddle/fluid/pybind/global_value_getter_setter.h"
#include "paddle/fluid/pybind/gloo_context_py.h"
#include "paddle/fluid/pybind/gloo_wrapper_py.h"
#include "paddle/fluid/pybind/heter_wrapper_py.h"
#include "paddle/fluid/pybind/imperative.h"
@ -2611,6 +2612,9 @@ All parameter, weight, gradient are variables in Paddle.
#endif
#ifdef PADDLE_WITH_NCCL
BindNCCLWrapper(&m);
#endif
#ifdef PADDLE_WITH_GLOO
BindGlooContext(&m);
#endif
BindGraph(&m);
BindNode(&m);

@ -11,3 +11,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .collective import *

File diff suppressed because it is too large Load Diff

@ -58,6 +58,12 @@ if(NOT WITH_GPU OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_broadcast)
LIST(REMOVE_ITEM TEST_OPS test_collective_reduce)
LIST(REMOVE_ITEM TEST_OPS test_collective_scatter)
LIST(REMOVE_ITEM TEST_OPS test_collective_reduce_api)
LIST(REMOVE_ITEM TEST_OPS test_collective_scatter_api)
LIST(REMOVE_ITEM TEST_OPS test_collective_barrier_api)
LIST(REMOVE_ITEM TEST_OPS test_collective_allreduce_api)
LIST(REMOVE_ITEM TEST_OPS test_collective_broadcast_api)
LIST(REMOVE_ITEM TEST_OPS test_collective_allgather_api)
LIST(REMOVE_ITEM TEST_OPS test_reducescatter)
LIST(REMOVE_ITEM TEST_OPS test_reducescatter_api)
endif()

@ -0,0 +1,53 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import argparse
import os
import sys
import signal
import time
import socket
from contextlib import closing
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase):
def __init__(self):
self.global_ring_id = 0
def get_model(self, main_prog, startup_program, rank):
with fluid.program_guard(main_prog, startup_program):
tensor_list = []
tindata = layers.data(
name="tindata", shape=[10, 1000], dtype='float32')
paddle.distributed.all_gather(tensor_list, tindata)
return tensor_list
if __name__ == "__main__":
runtime_main(TestCollectiveAllgatherAPI, "allgather")

@ -0,0 +1,52 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import argparse
import os
import sys
import signal
import time
import socket
from contextlib import closing
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase):
def __init__(self):
self.global_ring_id = 0
def get_model(self, main_prog, startup_program, rank):
with fluid.program_guard(main_prog, startup_program):
tindata = layers.data(
name="tindata", shape=[10, 1000], dtype='float32')
paddle.distributed.all_reduce(tindata)
return [tindata]
if __name__ == "__main__":
runtime_main(TestCollectiveAllreduceAPI, "allreduce")

@ -0,0 +1,50 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import argparse
import os
import sys
import signal
import time
import socket
from contextlib import closing
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
class TestCollectiveBarrierAPI(TestCollectiveAPIRunnerBase):
def __init__(self):
self.global_ring_id = 0
def get_model(self, main_prog, startup_program, rank):
with fluid.program_guard(main_prog, startup_program):
paddle.distributed.barrier()
return []
if __name__ == "__main__":
runtime_main(TestCollectiveBarrierAPI, "barrier")

@ -0,0 +1,52 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import argparse
import os
import sys
import signal
import time
import socket
from contextlib import closing
from six import string_types
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
import paddle.fluid.unique_name as nameGen
from paddle.fluid import core
import unittest
from multiprocessing import Process
import paddle.fluid.layers as layers
from functools import reduce
from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main
class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase):
def __init__(self):
self.global_ring_id = 0
def get_model(self, main_prog, startup_program, rank):
with fluid.program_guard(main_prog, startup_program):
tindata = layers.data(
name="tindata", shape=[10, 1000], dtype='float32')
paddle.distributed.broadcast(tindata, src=1)
return [tindata]
if __name__ == "__main__":
runtime_main(TestCollectiveBroadcastAPI, "broadcast")

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save