add allreduce and broadcast without test (#31024)
add allreduce and broadcast without testrevert-31562-mean
parent
5618f14047
commit
9fcdaeba5e
@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
struct ASCENDPlace;
|
||||
struct float16;
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_NPU_KERNEL(c_allreduce_max,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMax, float>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMax, int>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMax, int8_t>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMax, plat::float16>)
|
||||
@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
struct ASCENDPlace;
|
||||
struct float16;
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_NPU_KERNEL(c_allreduce_min,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMin, float>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMin, int>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMin, int8_t>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedMin, plat::float16>)
|
||||
@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
struct ASCENDPlace;
|
||||
struct float16;
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_NPU_KERNEL(c_allreduce_prod,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedProd, float>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedProd, int>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedProd, int8_t>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedProd, plat::float16>)
|
||||
@ -0,0 +1,31 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
struct ASCENDPlace;
|
||||
struct float16;
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_NPU_KERNEL(c_allreduce_sum,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedSum, float>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedSum, int>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedSum, int8_t>,
|
||||
ops::CAllReduceOpASCENDKernel<ops::kRedSum, plat::float16>)
|
||||
@ -0,0 +1,94 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
|
||||
|
||||
#if defined(PADDLE_WITH_ASCEND_CL)
|
||||
#include "paddle/fluid/platform/collective_helper.h"
|
||||
#include "paddle/fluid/platform/hccl_helper.h"
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
class CBroadcastOpASCENDKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
#if defined(PADDLE_WITH_ASCEND_CL)
|
||||
auto x = ctx.Input<framework::LoDTensor>("X");
|
||||
void *ptr = reinterpret_cast<void*>(const_cast<T*>(x->data<T>()));
|
||||
int numel = x->numel();
|
||||
hcclDataType_t dtype = platform::ToHCCLDataType(x->type());
|
||||
|
||||
auto out = ctx.Output<framework::LoDTensor>("Out");
|
||||
|
||||
auto place = ctx.GetPlace();
|
||||
auto comm = paddle::platform::HCCLCommContext::Instance().Get();
|
||||
|
||||
aclrtStream stream = nullptr;
|
||||
if (ctx.Attr<bool>("use_calc_stream")) {
|
||||
auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
|
||||
stream = static_cast<platform::NPUDeviceContext*>(dev_ctx)->stream();
|
||||
} else {
|
||||
stream = comm->stream();
|
||||
}
|
||||
|
||||
int root = ctx.Attr<int>("root");
|
||||
int ring_id = ctx.Attr<int>("ring_id");
|
||||
std::string group = std::string(HCOM_GROUP_PREFIX) + std::to_string(ring_id);
|
||||
std::string tag = ctx.Attr<std::string>("tag");
|
||||
|
||||
VLOG(3) << "begin hccl broadcast, parameter is: "<< "root " << root
|
||||
<< ", group is " << group
|
||||
<< ", tag is " << tag;
|
||||
|
||||
if (root == static_cast<int>(comm->rank())) {
|
||||
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::hcom_broadcast(tag.c_str(), ptr, numel,
|
||||
dtype, (uint32_t)root, group.c_str(), (void*)stream));
|
||||
VLOG(3) << "rank " << comm->rank() << " invoke Bcast. sent "
|
||||
<< x->numel();
|
||||
} else {
|
||||
PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::hcom_broadcast(tag.c_str(), ptr, numel,
|
||||
dtype, (uint32_t)root, group.c_str(), (void*)stream));
|
||||
VLOG(3) << "rank " << comm->rank() << " invoke Bcast. recieved "
|
||||
<< framework::product(out->dims());
|
||||
}
|
||||
if (out != x) {
|
||||
framework::TensorCopy(
|
||||
*static_cast<const framework::Tensor*>(x), place,
|
||||
*platform::DeviceContextPool::Instance().Get(place),
|
||||
static_cast<framework::Tensor*>(out));
|
||||
}
|
||||
|
||||
out->Resize(x->dims());
|
||||
out->set_lod(x->lod());
|
||||
#else
|
||||
PADDLE_THROW(platform::errors::PreconditionNotMet(
|
||||
"PaddlePaddle should compile with GPU."));
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_NPU_KERNEL(c_broadcast,
|
||||
ops::CBroadcastOpASCENDKernel<float>,
|
||||
ops::CBroadcastOpASCENDKernel<int>,
|
||||
ops::CBroadcastOpASCENDKernel<int8_t>,
|
||||
ops::CBroadcastOpASCENDKernel<plat::float16>);
|
||||
@ -0,0 +1,79 @@
|
||||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#if defined(PADDLE_WITH_ASCEND_CL)
|
||||
#include "paddle/fluid/platform/collective_helper.h"
|
||||
#include "paddle/fluid/platform/hccl_helper.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/operators/npu_op_runner.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
class Scope;
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class CCommInitOpNPU : public framework::OperatorBase {
|
||||
public:
|
||||
CCommInitOpNPU(const std::string& type,
|
||||
const framework::VariableNameMap& inputs,
|
||||
const framework::VariableNameMap& outputs,
|
||||
const framework::AttributeMap& attrs)
|
||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||
|
||||
void RunImpl(const framework::Scope& scope,
|
||||
const platform::Place& place) const override {
|
||||
std::string rank_table_file = Attr<std::string>("rank_table_file");
|
||||
uint32_t rank_id = Attr<int>("rank_id");
|
||||
uint32_t device_id = Attr<int>("device_id");
|
||||
|
||||
VLOG(3) << "begin init hccl, parameter is: "
|
||||
<< "rank_table_file " << rank_table_file
|
||||
<< " rank_id " << rank_id
|
||||
<< " device_id " << device_id;
|
||||
|
||||
platform::HCCLCommContext::Instance().CreateHCCLComm(rank_table_file, rank_id, device_id);
|
||||
}
|
||||
};
|
||||
|
||||
class CCommInitOpNPUMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddComment(R"DOC(
|
||||
CCommInit operator on NPU
|
||||
|
||||
Initialize collective communication context within this trainer
|
||||
)DOC");
|
||||
AddAttr<std::string>("rank_table_file",
|
||||
"(string) path to rank_table_file");
|
||||
AddAttr<int>("rank_id", "(int) world rank id of the process");
|
||||
AddAttr<int>("device_id", "(int) device id of the process/thread");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OPERATOR(c_comm_init_hccl, ops::CCommInitOpNPU,
|
||||
ops::CCommInitOpNPUMaker);
|
||||
|
||||
#endif
|
||||
@ -0,0 +1,76 @@
|
||||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifdef PADDLE_WITH_ASCEND_CL
|
||||
#include "paddle/fluid/platform/collective_helper.h"
|
||||
#include "paddle/fluid/platform/hccl_helper.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/operators/npu_op_runner.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
class Scope;
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class CCreateGroupOpNPU : public framework::OperatorBase {
|
||||
public:
|
||||
CCreateGroupOpNPU(const std::string& type,
|
||||
const framework::VariableNameMap& inputs,
|
||||
const framework::VariableNameMap& outputs,
|
||||
const framework::AttributeMap& attrs)
|
||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||
|
||||
void RunImpl(const framework::Scope& scope,
|
||||
const platform::Place& place) const override {
|
||||
std::string group_name = Attr<std::string>("group_name");
|
||||
int nranks = Attr<int>("nranks");
|
||||
std::vector<int> rank_ids = Attr<std::vector<int>>("rank_ids");
|
||||
paddle::platform::HCCLCommContext::Instance().CreateHCCLGroup(
|
||||
group_name, (uint32_t)nranks,
|
||||
std::vector<uint32_t>(rank_ids.begin(), rank_ids.end()));
|
||||
}
|
||||
};
|
||||
|
||||
class CCreateGroupOpNPUMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddComment(R"DOC(
|
||||
CCreateGroup operator on NPU
|
||||
|
||||
Create collective communication group on NPU
|
||||
)DOC");
|
||||
AddAttr<std::string>("group_name",
|
||||
"(string) name of the collective communication group");
|
||||
AddAttr<int>("nranks", "(int) number of the group");
|
||||
AddAttr<std::vector<int>>("rank_ids",
|
||||
"(list of int) The world rank id of the group members");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OPERATOR(c_create_group, ops::CCreateGroupOpNPU,
|
||||
ops::CCreateGroupOpNPUMaker);
|
||||
|
||||
#endif
|
||||
@ -0,0 +1,192 @@
|
||||
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <thread> // NOLINT
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/operator.h"
|
||||
#include "paddle/fluid/framework/program_desc.h"
|
||||
#include "paddle/fluid/operators/dropout_op.h"
|
||||
#include "paddle/fluid/operators/math/math_function.h"
|
||||
#include "paddle/fluid/string/printf.h"
|
||||
#include "paddle/fluid/operators/collective/c_broadcast_op.h"
|
||||
#include "paddle/fluid/operators/collective/c_allreduce_op.h"
|
||||
|
||||
#if defined(PADDLE_WITH_ASCEND_CL)
|
||||
#include "paddle/fluid/platform/collective_helper.h"
|
||||
#include "paddle/fluid/platform/hccl_helper.h"
|
||||
#endif
|
||||
|
||||
namespace f = paddle::framework;
|
||||
namespace p = paddle::platform;
|
||||
namespace m = paddle::operators::math;
|
||||
|
||||
USE_OP(c_broadcast);
|
||||
USE_OP(c_allreduce_sum);
|
||||
USE_NO_KERNEL_OP(c_comm_init_hccl);
|
||||
USE_NO_KERNEL_OP(c_create_group);
|
||||
USE_OP_DEVICE_KERNEL(c_broadcast, NPU);
|
||||
USE_OP_DEVICE_KERNEL(c_allreduce_sum, NPU);
|
||||
|
||||
void Prepare(f::Scope* scope, const p::DeviceContext& ctx){
|
||||
|
||||
std::string rank_table_file = getenv("RANK_TABLE_FILE");
|
||||
int rank_id = atoi(getenv("RANK_ID"));
|
||||
int device_id = atoi(getenv("DEVICE_ID"));
|
||||
|
||||
printf("rank_table_file: %s, rank_id = %d, device_id = %d\n", rank_table_file.c_str(), rank_id, device_id);
|
||||
|
||||
f::AttributeMap attrs;
|
||||
attrs["rank_table_file"] = rank_table_file;
|
||||
attrs["rank_id"] = rank_id;
|
||||
attrs["device_id"] = device_id;
|
||||
auto comm_init_op =
|
||||
f::OpRegistry::CreateOp("c_comm_init_hccl", {}, {}, attrs);
|
||||
auto place = ctx.GetPlace();
|
||||
comm_init_op->Run(*scope, place);
|
||||
ctx.Wait();
|
||||
|
||||
f::AttributeMap create_attrs;
|
||||
create_attrs["group_name"] = HCOM_GROUP_PREFIX + std::to_string(0);
|
||||
create_attrs["nranks"] = 2;
|
||||
std::vector<int> rank_ids{0, 1};
|
||||
create_attrs["rank_ids"] = rank_ids;
|
||||
auto create_group_op = f::OpRegistry::CreateOp("c_create_group", {}, {}, create_attrs);
|
||||
create_group_op->Run(*scope, place);
|
||||
ctx.Wait();
|
||||
}
|
||||
void TestHCCLBroadcastOp(f::Scope* scope, const p::DeviceContext& ctx) {
|
||||
std::cout<< "BEGIN TEST:" << __FUNCTION__ <<std::endl;
|
||||
// init
|
||||
auto x = scope->Var("X");
|
||||
auto tensor_x = x->GetMutable<f::LoDTensor>();
|
||||
int num = 2;
|
||||
std::vector<float> init;
|
||||
int rank_id = atoi(getenv("RANK_ID"));
|
||||
std::cout<< "rank_id:" << rank_id<<std::endl;
|
||||
for (int64_t i = 0; i < num * num; ++i) {
|
||||
init.push_back(1.0 + rank_id);
|
||||
std::cout<< init[0];
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
|
||||
TensorFromVector(init, ctx, tensor_x);
|
||||
tensor_x->Resize({num, num});
|
||||
|
||||
ctx.Wait();
|
||||
|
||||
auto place = ctx.GetPlace();
|
||||
auto out = scope->Var("Out");
|
||||
auto tensor_out = out->GetMutable<f::LoDTensor>();
|
||||
tensor_out->Resize({num, num});
|
||||
tensor_out->mutable_data<float>(place); // allocate
|
||||
|
||||
ctx.Wait();
|
||||
|
||||
// run
|
||||
f::AttributeMap attrs;
|
||||
attrs["tag"]=std::string("tagx");
|
||||
attrs["root"]=0;
|
||||
attrs["ring_id"]=0;
|
||||
|
||||
auto op =
|
||||
f::OpRegistry::CreateOp("c_broadcast", {{"X", {"X"}}},
|
||||
{{"Out", {"Out"}}}, attrs);
|
||||
|
||||
op->Run(*scope, place);
|
||||
|
||||
std::vector<float> out_vec;
|
||||
TensorToVector(*tensor_out, ctx, &out_vec);
|
||||
|
||||
ctx.Wait();
|
||||
|
||||
EXPECT_EQ(out_vec.size(), init.size());
|
||||
for (uint32_t i = 0; i < out_vec.size(); i++) {
|
||||
EXPECT_EQ(out_vec[i], 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
void TestHCCLAllReduceOp(f::Scope* scope, const p::DeviceContext& ctx) {
|
||||
std::cout<< "BEGIN TEST:" << __FUNCTION__ <<std::endl;
|
||||
// init
|
||||
auto x = scope->Var("X");
|
||||
auto tensor_x = x->GetMutable<f::LoDTensor>();
|
||||
|
||||
std::vector<float> init;
|
||||
int rank_id = atoi(getenv("RANK_ID"));
|
||||
std::cout<< "rank_id:" << rank_id<<std::endl;
|
||||
|
||||
int num1 = 1;
|
||||
int num2 = 4;
|
||||
|
||||
for (int64_t i = 0; i < num1 * num2; ++i) {
|
||||
init.push_back(1.0);
|
||||
// init.push_back(1.0 + rank_id * 3);
|
||||
std::cout<< init[0];
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
|
||||
TensorFromVector(init, ctx, tensor_x);
|
||||
tensor_x->Resize({num1, num2});
|
||||
|
||||
ctx.Wait();
|
||||
|
||||
auto place = ctx.GetPlace();
|
||||
auto out = scope->Var("Out");
|
||||
auto tensor_out = out->GetMutable<f::LoDTensor>();
|
||||
tensor_out->Resize({num1, num2});
|
||||
tensor_out->mutable_data<float>(place); // allocate
|
||||
|
||||
ctx.Wait();
|
||||
|
||||
// run
|
||||
f::AttributeMap attrs;
|
||||
attrs["tag"]=std::string("tagx");
|
||||
attrs["ring_id"]=0;
|
||||
|
||||
auto op =
|
||||
f::OpRegistry::CreateOp("c_allreduce_sum", {{"X", {"X"}}},
|
||||
{{"Out", {"Out"}}}, attrs);
|
||||
|
||||
op->Run(*scope, place);
|
||||
|
||||
std::vector<float> out_vec;
|
||||
TensorToVector(*tensor_out, ctx, &out_vec);
|
||||
|
||||
ctx.Wait();
|
||||
|
||||
EXPECT_EQ(out_vec.size(), init.size());
|
||||
for (uint32_t i = 0; i < out_vec.size(); i++) {
|
||||
EXPECT_EQ(out_vec[i], 2.0);
|
||||
}
|
||||
}
|
||||
TEST(c_broadcast, NPU) {
|
||||
f::Scope scope;
|
||||
char * npu_id=getenv("FLAGS_selected_npus");
|
||||
|
||||
p::NPUDeviceContext ctx(p::NPUPlace(atoi(npu_id)));
|
||||
|
||||
Prepare(&scope, ctx);
|
||||
// TestHCCLBroadcastOp(&scope, ctx);
|
||||
TestHCCLAllReduceOp(&scope, ctx);
|
||||
}
|
||||
@ -0,0 +1,111 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#if defined(PADDLE_WITH_ASCEND_CL)
|
||||
#include "paddle/fluid/platform/collective_helper.h"
|
||||
#include <utility>
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
class HCCLCommImpl : public HCCLComm {
|
||||
public:
|
||||
void set_rank_table_file(const std::string& rank_table_file) { rank_table_file_ = rank_table_file; }
|
||||
std::string rank_table_file() const override { return rank_table_file_; }
|
||||
|
||||
void set_rank(uint32_t rank) { rank_ = rank; }
|
||||
uint32_t rank() const override { return rank_; }
|
||||
|
||||
void set_device_id(uint32_t device_id) { device_id_ = device_id; }
|
||||
uint32_t device_id() const override { return device_id_; }
|
||||
|
||||
aclrtStream stream() const override { return dev_ctx_->stream(); }
|
||||
|
||||
void set_dev_ctx(std::unique_ptr<NPUDeviceContext>&& dev_ctx) {
|
||||
dev_ctx_ = std::move(dev_ctx);
|
||||
}
|
||||
NPUDeviceContext* dev_context() const override { return dev_ctx_.get(); }
|
||||
|
||||
private:
|
||||
std::string rank_table_file_;
|
||||
uint32_t rank_;
|
||||
uint32_t device_id_;
|
||||
std::unique_ptr<NPUDeviceContext> dev_ctx_;
|
||||
};
|
||||
|
||||
HCCLComm* HCCLCommContext::CreateHCCLComm(const std::string& rank_table_file,
|
||||
uint32_t rank, uint32_t device_id) {
|
||||
/*
|
||||
PADDLE_ENFORCE_NOT_NULL(rank_table_file,
|
||||
platform::errors::InvalidArgument(
|
||||
"The rank table file should not be null."));
|
||||
|
||||
PADDLE_ENFORCE_GE(rank, 0,
|
||||
platform::errors::InvalidArgument(
|
||||
"Expected rank >= 0. But received rank is %d.", rank));
|
||||
|
||||
PADDLE_ENFORCE_GE(device_id, 0,
|
||||
platform::errors::InvalidArgument(
|
||||
"Expected dev_id >= 0. But received dev_id is %d.", device_id));
|
||||
*/
|
||||
auto* comm_wrapper = AssignHCCLComm(rank_table_file, rank, device_id);
|
||||
|
||||
platform::dynload::hcom_init(rank_table_file.c_str(), std::to_string(rank).c_str());
|
||||
platform::dynload::hcom_bind_model(comm_wrapper->stream(), comm_wrapper->stream());
|
||||
|
||||
VLOG(1) << "hccl communicator of rank " << rank << " has been created";
|
||||
return comm_wrapper;
|
||||
}
|
||||
|
||||
HCCLComm* HCCLCommContext::AssignHCCLComm(const std::string& rank_table_file,
|
||||
uint32_t rank, uint32_t device_id) {
|
||||
|
||||
std::unique_ptr<NPUDeviceContext> dev_ctx(
|
||||
new NPUDeviceContext(NPUPlace(device_id)));
|
||||
|
||||
VLOG(3) << "device_id" << device_id;
|
||||
VLOG(3) << "dev_ctx->stream()" << dev_ctx->stream();
|
||||
|
||||
HCCLCommImpl* c = new HCCLCommImpl;
|
||||
c->set_rank_table_file(rank_table_file);
|
||||
c->set_rank(rank);
|
||||
c->set_device_id(device_id);
|
||||
c->set_dev_ctx(std::move(dev_ctx));
|
||||
// comm_ = c
|
||||
comm_.reset(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
void HCCLCommContext::CreateHCCLGroup(const std::string& group_name, uint32_t nranks,
|
||||
const std::vector<uint32_t>& rank_ids) {
|
||||
/*
|
||||
PADDLE_ENFORCE_NOT_NULL(group_name,
|
||||
platform::errors::InvalidArgument(
|
||||
"The group name should not be null."));
|
||||
PADDLE_ENFORCE_GT(nranks, 0,
|
||||
platform::errors::InvalidArgument(
|
||||
"Expected nranks > 0. But received nranks is %d.", nranks));
|
||||
PADDLE_ENFORCE_NOT_NULL(rank_ids,
|
||||
platform::errors::InvalidArgument(
|
||||
"The rank ids should not be null."));
|
||||
*/
|
||||
platform::dynload::hcom_create_group(group_name.c_str(), nranks, (unsigned int*)rank_ids.data());
|
||||
|
||||
VLOG(1) << "hccl group with name " << group_name << " has been created";
|
||||
}
|
||||
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
|
||||
#endif
|
||||
@ -0,0 +1,127 @@
|
||||
/**
|
||||
* Copyright 2019-2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file base.h
|
||||
* @brief HCOM data type definition
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef HCCL_BASE_H_
|
||||
#define HCCL_BASE_H_
|
||||
|
||||
#define HCOM_GROUP_PREFIX "HCOM_GROUP_"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
typedef signed char s8;
|
||||
typedef signed short s16;
|
||||
typedef signed int s32;
|
||||
typedef signed long long s64;
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned short u16;
|
||||
typedef unsigned int u32;
|
||||
typedef unsigned long long u64;
|
||||
|
||||
/**
|
||||
* @brief HCOM functions return value definition
|
||||
*/
|
||||
typedef enum tagHcclResult {
|
||||
HCCL_SUCCESS = 0, /**< success */
|
||||
HCCL_E_PARA = 1, /**< parameter error */
|
||||
HCCL_E_PTR = 2, /**< empty pointer */
|
||||
HCCL_E_MEMORY = 3, /**< memory error */
|
||||
HCCL_E_INTERNAL = 4, /**< internal error */
|
||||
HCCL_E_NOT_SUPPORT = 5, /**< not support feature */
|
||||
HCCL_E_NOT_FOUND = 6, /**< not found specific resource */
|
||||
HCCL_E_UNAVAIL = 7, /**< resource unavailable */
|
||||
HCCL_E_SYSCALL = 8, /**< call system interface error */
|
||||
HCCL_E_TIMEOUT = 9, /**< timeout */
|
||||
HCCL_E_OPEN_FILE_FAILURE = 10, /**< open file fail */
|
||||
HCCL_E_TCP_CONNECT = 11, /**< tcp connect fail */
|
||||
HCCL_E_ROCE_CONNECT = 12, /**< roce connect fail */
|
||||
HCCL_E_TCP_TRANSFER = 13, /**< tcp transfer fail */
|
||||
HCCL_E_ROCE_TRANSFER = 14, /**< roce transfer fail */
|
||||
HCCL_E_RUNTIME = 15, /**< call runtime api fail */
|
||||
HCCL_E_DRV = 16, /**< call driver api fail */
|
||||
HCCL_E_PROFILING = 17, /**< call profiling api fail */
|
||||
HCCL_E_CCE = 18, /**< call cce api fail */
|
||||
HCCL_E_NETWORK = 19, /**< call network api fail */
|
||||
HCCL_E_RESERVED /**< reserved */
|
||||
} hcclResult_t;
|
||||
|
||||
/* handle to communicator */
|
||||
typedef void *hcclComm_t;
|
||||
|
||||
/**
|
||||
* @brief HCCL Reduction opperation
|
||||
*/
|
||||
typedef enum tagHcclRedOp {
|
||||
HCCL_REP_OP_SUM = 0, /**< sum */
|
||||
HCCL_REP_OP_PROD = 1, /**< prod */
|
||||
HCCL_REP_OP_MAX = 2, /**< max */
|
||||
HCCL_REP_OP_MIN = 3, /**< min */
|
||||
HCCL_REP_OP_RESERVED /**< reserved */
|
||||
} hcclRedOp_t;
|
||||
|
||||
/**
|
||||
* @brief HCCL data type
|
||||
*/
|
||||
typedef enum tagHcclDataType {
|
||||
HCCL_DATA_TYPE_INT8 = 0, /**< int8 */
|
||||
HCCL_DATA_TYPE_INT16 = 1, /**< int16 */
|
||||
HCCL_DATA_TYPE_INT32 = 2, /**< int32 */
|
||||
HCCL_DATA_TYPE_FP16 = 3, /**< fp16 */
|
||||
HCCL_DATA_TYPE_FP32 = 4, /**< fp32 */
|
||||
HCCL_DATA_TYPE_INT64 = 5, /**< fp32 */
|
||||
HCCL_DATA_TYPE_UINT64 = 6, /**< fp32 */
|
||||
HCCL_DATA_TYPE_RESERVED /**< reserved */
|
||||
} hcclDataType_t;
|
||||
|
||||
const u32 HCCL_MAX_SEGMENT_NUM = 8; // The max number of gradient segments.
|
||||
|
||||
/**
|
||||
* @brief the feature of the model
|
||||
*/
|
||||
struct model_feature {
|
||||
const char *model_name; /**< The model name */
|
||||
u32 gradient_num; /**< The number of gradients */
|
||||
float *gradient_size; /**< The size of each gradient */
|
||||
float *gradient_time; /**< The BP compution time of each gradient */
|
||||
};
|
||||
|
||||
enum GradSplitForceMode {
|
||||
FORCE_NONE, /**< no force */
|
||||
FORCE_SIZE, /**< force split gradient by size */
|
||||
FORCE_RESERVED /**< reserved */
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief stream handle.
|
||||
*/
|
||||
typedef void *rtStream_t;
|
||||
|
||||
/**
|
||||
* @brief model handle.
|
||||
*/
|
||||
typedef void *rtModel_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
#endif // HCCL_BASE_H_
|
||||
@ -0,0 +1,38 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/platform/dynload/hccl.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
namespace dynload {
|
||||
|
||||
std::once_flag hccl_dso_flag;
|
||||
void *hccl_dso_handle;
|
||||
|
||||
#define DEFINE_WRAP(__name) DynLoad__##__name __name
|
||||
|
||||
HCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
|
||||
|
||||
#if HCCL_VERSION_CODE >= 2212
|
||||
HCCL_RAND_ROUTINE_EACH_AFTER_2212(DEFINE_WRAP)
|
||||
#endif
|
||||
|
||||
#if HCCL_VERSION_CODE >= 2703
|
||||
HCCL_RAND_ROUTINE_EACH_AFTER_2703(DEFINE_WRAP)
|
||||
#endif
|
||||
|
||||
} // namespace dynload
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
@ -0,0 +1,84 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
#pragma once
|
||||
|
||||
// #include <hccl/hccl.h>
|
||||
// #include <hccl/hccl_types.h>
|
||||
#include <mutex> // NOLINT
|
||||
|
||||
#include "paddle/fluid/platform/port.h"
|
||||
#include "paddle/fluid/platform/dynload/hcom.h"
|
||||
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
namespace dynload {
|
||||
|
||||
extern std::once_flag hccl_dso_flag;
|
||||
extern void* hccl_dso_handle;
|
||||
|
||||
#define DECLARE_DYNAMIC_LOAD_HCCL_WRAP(__name) \
|
||||
struct DynLoad__##__name { \
|
||||
template <typename... Args> \
|
||||
auto operator()(Args... args) -> decltype(__name(args...)) { \
|
||||
using HCCL_func = decltype(&::__name); \
|
||||
std::call_once(hccl_dso_flag, []() { \
|
||||
hccl_dso_handle = paddle::platform::dynload::GetHCCLDsoHandle(); \
|
||||
}); \
|
||||
static void* p_##__name = dlsym(hccl_dso_handle, #__name); \
|
||||
return reinterpret_cast<HCCL_func>(p_##__name)(args...); \
|
||||
} \
|
||||
}; \
|
||||
extern DynLoad__##__name __name
|
||||
|
||||
#define HCCL_RAND_ROUTINE_EACH(__macro) \
|
||||
__macro(hcom_init); \
|
||||
__macro(hcom_destroy); \
|
||||
__macro(hcom_bind_model); \
|
||||
__macro(hcom_unbind_model); \
|
||||
__macro(hcom_send); \
|
||||
__macro(hcom_receive); \
|
||||
__macro(hcom_broadcast); \
|
||||
__macro(hcom_all_gather); \
|
||||
__macro(hcom_all_reduce); \
|
||||
__macro(hcom_reduce_scatter); \
|
||||
__macro(hcom_create_group); \
|
||||
__macro(hcom_destroy_group); \
|
||||
__macro(hcom_get_rank_id); \
|
||||
__macro(hcom_get_local_rank_id); \
|
||||
__macro(hcom_get_local_rank_size); \
|
||||
__macro(hcom_get_split_strategy); \
|
||||
__macro(hcom_set_split_strategy_by_size); \
|
||||
__macro(hcom_set_split_strategy_by_index); \
|
||||
__macro(hcom_get_group_rank_from_world_rank); \
|
||||
__macro(hcom_get_world_rank_from_group_rank);
|
||||
|
||||
|
||||
HCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_HCCL_WRAP)
|
||||
|
||||
#if HCCL_VERSION_CODE >= 2212
|
||||
#define HCCL_RAND_ROUTINE_EACH_AFTER_2212(__macro) __macro(HCCLBroadcast);
|
||||
HCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_HCCL_WRAP)
|
||||
#endif
|
||||
|
||||
#if HCCL_VERSION_CODE >= 2703
|
||||
#define HCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \
|
||||
__macro(HCCLSend); \
|
||||
__macro(HCCLRecv);
|
||||
HCCL_RAND_ROUTINE_EACH_AFTER_2703(DECLARE_DYNAMIC_LOAD_HCCL_WRAP)
|
||||
#endif
|
||||
|
||||
} // namespace dynload
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue