Add macro BOOST_GET to enrich the error information of boost :: get (#24175)

* add new macro BOOST_GET_SAFELY & unittests, test=develop

* add different macro type, test=develop

* fix get macro type in executor, test=develop

* four macro part change backup

* using one macro for all case, test=develop

* revert attribute change, test=develop

* change to three func to solve gcc4.8 bug, test=develop

* polish some details, test=develop
release/2.0-alpha
Chen Weihang 5 years ago committed by GitHub
parent 9c073bbd53
commit aa0f254fbe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -60,10 +60,10 @@ struct ExtractAttribute<bool> {
bool* operator()(Attribute& attr) const {
if (attr.type() == typeid(int)) { // NOLINT
int val = boost::get<int>(attr);
int val = BOOST_GET_CONST(int, attr);
attr = static_cast<bool>(val);
} else if (attr.type() == typeid(float)) { // NOLINT
float val = boost::get<float>(attr);
float val = BOOST_GET_CONST(float, attr);
attr = static_cast<bool>(val);
}
bool* attr_value = nullptr;
@ -86,10 +86,10 @@ struct ExtractAttribute<int64_t> {
int64_t* operator()(Attribute& attr) const {
if (attr.type() == typeid(int)) { // NOLINT
int val = boost::get<int>(attr);
int val = BOOST_GET_CONST(int, attr);
attr = static_cast<int64_t>(val);
} else if (attr.type() == typeid(float)) { // NOLINT
int val = boost::get<float>(attr);
int val = BOOST_GET_CONST(float, attr);
attr = static_cast<int64_t>(val);
}
int64_t* attr_value = nullptr;
@ -112,11 +112,11 @@ struct ExtractAttribute<std::vector<int64_t>> {
std::vector<int64_t>* operator()(Attribute& attr) const {
if (attr.type() == typeid(std::vector<int>)) { // NOLINT
std::vector<int> val = boost::get<std::vector<int>>(attr);
std::vector<int> val = BOOST_GET_CONST(std::vector<int>, attr);
std::vector<int64_t> vec(val.begin(), val.end());
attr = vec;
} else if (attr.type() == typeid(std::vector<float>)) { // NOLINT
std::vector<float> val = boost::get<std::vector<float>>(attr);
std::vector<float> val = BOOST_GET_CONST(std::vector<float>, attr);
std::vector<int64_t> vec(val.begin(), val.end());
attr = vec;
}
@ -140,10 +140,10 @@ struct ExtractAttribute<float> {
float* operator()(Attribute& attr) const {
if (attr.type() == typeid(int)) { // NOLINT
int val = boost::get<int>(attr);
int val = BOOST_GET_CONST(int, attr);
attr = static_cast<float>(val);
} else if (attr.type() == typeid(int64_t)) { // NOLINT
int64_t val = boost::get<int64_t>(attr);
int64_t val = BOOST_GET_CONST(int64_t, attr);
attr = static_cast<float>(val);
}
float* attr_value = nullptr;

@ -182,7 +182,7 @@ void AllReduceOpHandle::NCCLAllReduceFunc(
void AllReduceOpHandle::SyncNCCLAllReduce() {
if (FLAGS_sync_nccl_allreduce) {
for (auto &p : places_) {
int dev_id = boost::get<platform::CUDAPlace>(p).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p).device;
auto *nccl_ctxs =
nccl_ctxs_->GetRunEnvNCCLCtx(run_order_, use_hierarchical_allreduce_);
auto &nccl_ctx = nccl_ctxs->at(dev_id);

@ -54,21 +54,22 @@ void ProcessGraph(std::vector<ir::Graph *> graphs, Scope *scope) {
if (node && node->IsOp()) {
if (node->Name() == "send") {
auto send_var_name = node->Op()->Input("X")[0];
auto send_varnames = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("send_varnames"));
auto epmap = boost::get<std::vector<std::string>>(
node->Op()->GetNullableAttr("epmap"));
auto height_section = boost::get<std::vector<int64_t>>(
node->Op()->GetNullableAttr("sections"));
auto send_varnames =
BOOST_GET_CONST(std::vector<std::string>,
node->Op()->GetNullableAttr("send_varnames"));
auto epmap = BOOST_GET_CONST(std::vector<std::string>,
node->Op()->GetNullableAttr("epmap"));
auto height_section = BOOST_GET_CONST(
std::vector<int64_t>, node->Op()->GetNullableAttr("sections"));
auto trainer_id =
boost::get<int>(node->Op()->GetNullableAttr("trainer_id"));
BOOST_GET_CONST(int, node->Op()->GetNullableAttr("trainer_id"));
auto merge_add =
boost::get<bool>(node->Op()->GetNullableAttr("merge_add"));
BOOST_GET_CONST(bool, node->Op()->GetNullableAttr("merge_add"));
if (!merge_add) {
merge_add = FLAGS_communicator_is_sgd_optimizer;
}
auto use_send_handler =
boost::get<bool>(node->Op()->GetNullableAttr("use_send_handler"));
auto use_send_handler = BOOST_GET_CONST(
bool, node->Op()->GetNullableAttr("use_send_handler"));
send_varname_to_ctx[send_var_name] = operators::distributed::RpcContext(
send_var_name, send_varnames, epmap, height_section, trainer_id,
merge_add, use_send_handler);
@ -198,16 +199,16 @@ FetchResultType AsyncSSAGraphExecutor::Run(
HandleException();
FetchList ret;
auto &val = boost::get<FetchList>(fetch_data);
auto &val = BOOST_GET(FetchList, fetch_data);
for (size_t fetch_idx = 0; fetch_idx < fetch_tensors.size(); ++fetch_idx) {
if (data_is_lod_tensor(val.at(fetch_idx))) {
std::vector<const LoDTensor *> lodtensor_ptrs;
lodtensor_ptrs.push_back(&(boost::get<LoDTensor>(val.at(fetch_idx))));
lodtensor_ptrs.push_back(&(BOOST_GET(LoDTensor, val.at(fetch_idx))));
LoDTensor var;
var.MergeLoDTensor(lodtensor_ptrs, platform::CPUPlace());
ret.emplace_back(var);
} else {
auto array = boost::get<LoDTensorArray>(val.at(fetch_idx));
auto array = BOOST_GET(LoDTensorArray, val.at(fetch_idx));
LoDTensorArray item_array;
item_array.reserve(array.size());
for (size_t i = 0; i < array.size(); ++i) {

@ -75,7 +75,8 @@ void BroadcastOpHandle::BroadcastOneVar(
} else {
#if defined(PADDLE_WITH_NCCL)
VarHandle *out_handle = nullptr;
int root_id = boost::get<platform::CUDAPlace>(in_tensor.place()).device;
int root_id =
BOOST_GET_CONST(platform::CUDAPlace, in_tensor.place()).device;
std::vector<std::function<void()>> broadcast_calls;
int type = platform::ToNCCLDataType(in_tensor.type());
@ -86,7 +87,7 @@ void BroadcastOpHandle::BroadcastOneVar(
->FindVar(out_var_handle->name());
int dst_id =
boost::get<platform::CUDAPlace>(out_var_handle->place()).device;
BOOST_GET_CONST(platform::CUDAPlace, out_var_handle->place()).device;
auto &nccl_ctx = nccl_ctxs_->at(dst_id);

@ -46,7 +46,7 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
platform::DeviceContextPool::Instance().Get(place));
if (dynamic_cast<StreamGarbageCollector *>(gc_)) {
platform::CUDADeviceGuard guard(
boost::get<platform::CUDAPlace>(place).device);
BOOST_GET_CONST(platform::CUDAPlace, place).device);
PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
PADDLE_ENFORCE_NOT_NULL(event_);
}
@ -62,7 +62,7 @@ EagerDeletionOpHandle::EagerDeletionOpHandle(
EagerDeletionOpHandle::~EagerDeletionOpHandle() {
#ifdef PADDLE_WITH_CUDA
if (event_) {
auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx_->GetPlace());
auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx_->GetPlace());
platform::CUDADeviceGuard guard(gpu_place.device);
PADDLE_ENFORCE(cudaEventDestroy(event_));
}
@ -72,7 +72,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() {
void EagerDeletionOpHandle::InitCUDA() {
#ifdef PADDLE_WITH_CUDA
int dev_id =
boost::get<platform::CUDAPlace>(dev_ctxes_.begin()->first).device;
BOOST_GET_CONST(platform::CUDAPlace, dev_ctxes_.begin()->first).device;
events_[dev_id] = nullptr;
#endif
}

@ -68,22 +68,22 @@ static void CheckDims(const framework::DDim &tensor_dims,
void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
if (return_merged_) {
if (data_is_lod_tensor(tensors_[0])) {
const auto &tensor_dims = boost::get<LoDTensor>(tensors_[0]).dims();
const auto &tensor_dims = BOOST_GET_CONST(LoDTensor, tensors_[0]).dims();
for (size_t i = 1; i < tensors_.size(); i++) {
const auto &ele_dims = boost::get<LoDTensor>(tensors_[i]).dims();
const auto &ele_dims = BOOST_GET_CONST(LoDTensor, tensors_[i]).dims();
CheckDims(tensor_dims, ele_dims, offset_);
}
std::vector<const LoDTensor *> tensors_ptr;
tensors_ptr.reserve(tensors_.size());
for (auto &t : tensors_) {
tensors_ptr.emplace_back(&boost::get<LoDTensor>(t));
tensors_ptr.emplace_back(&BOOST_GET_CONST(LoDTensor, t));
}
auto &val = boost::get<FetchList>(*data_);
auto &val = BOOST_GET(FetchList, *data_);
LoDTensor var;
var.MergeLoDTensor(tensors_ptr, platform::CPUPlace());
val.at(offset_) = std::move(var);
} else {
auto &array = boost::get<LoDTensorArray>(tensors_[0]);
auto &array = BOOST_GET_CONST(LoDTensorArray, tensors_[0]);
LoDTensorArray tmp_array;
tmp_array.reserve(array.size());
for (size_t i = 0; i < array.size(); ++i) {
@ -92,7 +92,7 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
tensors_ptr.reserve(tensors_.size());
tensors_ptr.push_back(&array[i]);
for (size_t j = 1; j < tensors_.size(); ++j) {
auto &element = boost::get<LoDTensorArray>(tensors_[j]);
auto &element = BOOST_GET_CONST(LoDTensorArray, tensors_[j]);
const auto &ele_dims = element[i].dims();
CheckDims(tensor_dims, ele_dims, offset_);
tensors_ptr.push_back(&element[i]);
@ -100,11 +100,11 @@ void FetchOpHandle::WaitAndMergeCPUFetchVars() const {
tmp_array.emplace_back();
tmp_array.back().MergeLoDTensor(tensors_ptr, platform::CPUPlace());
}
auto &val = boost::get<FetchList>(*data_);
auto &val = BOOST_GET(FetchList, *data_);
val.at(offset_) = std::move(tmp_array);
}
} else {
auto &val = boost::get<FetchUnmergedList>(*data_);
auto &val = BOOST_GET(FetchUnmergedList, *data_);
val.at(offset_) = std::move(tensors_);
}
}
@ -142,13 +142,13 @@ void FetchOpHandle::RunImpl() {
if (var->IsType<LoDTensor>()) {
auto &t = var->Get<framework::LoDTensor>();
auto &item = boost::get<LoDTensor>(tensors_[i]);
auto &item = BOOST_GET(LoDTensor, tensors_[i]);
TransData(t, &item);
} else {
auto &t = var->Get<framework::LoDTensorArray>();
LoDTensorArray tmp(t.size());
tensors_[i] = tmp;
auto &item = boost::get<LoDTensorArray>(tensors_[i]);
auto &item = BOOST_GET(LoDTensorArray, tensors_[i]);
for (size_t j = 0; j < t.size(); ++j) {
TransData(t[j], &item[j]);
}

@ -84,7 +84,7 @@ inline bool IsOpRole(const OpDesc &op, OpRole role) {
const auto &attrs = op.GetAttrMap();
auto iter = attrs.find(OpProtoAndCheckerMaker::OpRoleAttrName());
if (iter == attrs.end()) return false;
return static_cast<bool>(boost::get<int>(iter->second) &
return static_cast<bool>(BOOST_GET_CONST(int, iter->second) &
static_cast<int>(role));
}
@ -92,13 +92,13 @@ inline std::vector<std::string> GetOpRoleVarsOrEmpty(const OpDesc &op) {
const auto &attrs = op.GetAttrMap();
auto iter = attrs.find(OpProtoAndCheckerMaker::OpRoleVarAttrName());
if (iter == attrs.end()) return {};
auto &ret = boost::get<std::vector<std::string>>(iter->second);
auto &ret = BOOST_GET_CONST(std::vector<std::string>, iter->second);
PADDLE_ENFORCE_EQ(
ret.size() % 2, 0,
platform::errors::InvalidArgument(
"The size of attribute %s must be an even number, but got %d",
OpProtoAndCheckerMaker::OpRoleVarAttrName(), ret.size()));
return boost::get<std::vector<std::string>>(iter->second);
return BOOST_GET_CONST(std::vector<std::string>, iter->second);
}
bool IsDataParallelInferenceGraph(const ir::Graph &graph);

@ -122,7 +122,7 @@ void TensorCheckerVisitor<platform::CUDADeviceContext>::apply(
auto* dev_ctx = reinterpret_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(tensor_.place()));
int dev_id = boost::get<platform::CUDAPlace>(tensor_.place()).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, tensor_.place()).device;
PADDLE_ENFORCE_EQ(
(dev_id >= 0 && dev_id < multi_op_var2gpu_str_mutex().size()), true,
platform::errors::OutOfRange("GPU dev_id must >=0 and < dev_count=%d",

@ -83,7 +83,7 @@ class NCCLOpHandleBase : public OpHandleBase {
}
for (auto& p : dev_ctxes_) {
int dev_id = boost::get<platform::CUDAPlace>(p.first).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device;
if (inter_events_.find(dev_id) != inter_events_.end()) {
continue;
}
@ -104,7 +104,7 @@ class NCCLOpHandleBase : public OpHandleBase {
ncclRedOp_t op) {
PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0");
auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_);
int dev_id = boost::get<platform::CUDAPlace>(place).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto& nccl_ctx = flat_nccl_ctxs->at(dev_id);
auto stream = nccl_ctx.stream();
auto comm = nccl_ctx.comm_;
@ -146,7 +146,7 @@ class NCCLOpHandleBase : public OpHandleBase {
void InterReduce(platform::Place place, const void* sendbuff, void* recvbuff,
size_t count, ncclDataType_t datatype, ncclRedOp_t op) {
auto nccl_ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order_);
int dev_id = boost::get<platform::CUDAPlace>(place).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto& nccl_ctx = nccl_ctxs->at(dev_id);
auto stream = nccl_ctx.stream();
auto comm = nccl_ctx.comm_;
@ -173,7 +173,7 @@ class NCCLOpHandleBase : public OpHandleBase {
ncclRedOp_t op) {
auto nccl_ctxs = nccl_ctxs_->GetHierarchicalExterCtx(run_order_);
PADDLE_ENFORCE(nccl_ctxs_, "can't get exter %d nccl_ctxs", run_order_);
int dev_id = boost::get<platform::CUDAPlace>(place).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto& nccl_ctx = nccl_ctxs->at(dev_id);
auto stream = nccl_ctx.stream();
auto comm = nccl_ctx.comm_;
@ -199,7 +199,7 @@ class NCCLOpHandleBase : public OpHandleBase {
void InterBroadCast(platform::Place place, void* sendbuff, size_t count,
ncclDataType_t datatype, ncclRedOp_t op) {
auto nccl_ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order_);
int dev_id = boost::get<platform::CUDAPlace>(place).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto& nccl_ctx = nccl_ctxs->at(dev_id);
auto stream = nccl_ctx.stream();
auto comm = nccl_ctx.comm_;

@ -45,7 +45,7 @@ OpHandleBase::~OpHandleBase() PADDLE_MAY_THROW {
void OpHandleBase::InitCUDA() {
#ifdef PADDLE_WITH_CUDA
for (auto &p : dev_ctxes_) {
int dev_id = boost::get<platform::CUDAPlace>(p.first).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device;
PADDLE_ENFORCE(cudaSetDevice(dev_id));
PADDLE_ENFORCE(
cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming));
@ -55,7 +55,8 @@ void OpHandleBase::InitCUDA() {
auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
if (out_var_handle) {
int dev_id =
boost::get<platform::CUDAPlace>(out_var_handle->place()).device;
BOOST_GET_CONST(platform::CUDAPlace, out_var_handle->place())
.device;
out_var_handle->SetGenerateEvent(events_.at(dev_id));
}
}
@ -63,7 +64,7 @@ void OpHandleBase::InitCUDA() {
PADDLE_ENFORCE_EQ(dev_ctxes_.size(), 1UL,
"%s should have only one dev_ctx.", Name());
auto &place = dev_ctxes_.begin()->first;
int dev_id = boost::get<platform::CUDAPlace>(place).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
for (auto &out_var : outputs_) {
auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
if (out_var_handle) {
@ -192,7 +193,7 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
#ifdef PADDLE_WITH_CUDA
if (!events_.empty()) { // Use event
for (auto &p : dev_ctxes_) {
auto dev_id = boost::get<platform::CUDAPlace>(p.first).device;
auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device;
auto *cuda_dev_ctx = static_cast<platform::CUDADeviceContext *>(p.second);
VLOG(10) << "cudadevicecontext:" << cuda_dev_ctx << ", dev_id:" << dev_id;
PADDLE_ENFORCE_CUDA_SUCCESS(
@ -210,8 +211,8 @@ void OpHandleBase::RunAndRecordEvent(platform::Place p,
} else {
auto *ctx = dev_ctxes_.at(p);
auto *cuda_ctx = static_cast<platform::CUDADeviceContext *>(ctx);
cuda_ctx->RecordEvent(events_.at(boost::get<platform::CUDAPlace>(p).device),
callback);
cuda_ctx->RecordEvent(
events_.at(BOOST_GET_CONST(platform::CUDAPlace, p).device), callback);
}
#else
callback();

@ -43,7 +43,7 @@ static std::vector<std::unique_ptr<ir::Graph>> SeparateMultiDevicesGraph(
for (auto &op : op_handles) {
auto &dev_ctx = op->DeviceContext();
auto &p = dev_ctx.begin()->first;
int dev_id = boost::get<platform::CUDAPlace>(p).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p).device;
auto &dev_dummys = graphs[dev_id]->Get<GraphDepVars>(kGraphDepVars);
graphs[dev_id]->AddNode(graph->RemoveNode(op->Node()).release());
@ -256,13 +256,14 @@ FetchResultType ParallelSSAGraphExecutor::Run(
if (!is_valid[scope_idx]) {
continue;
}
const auto &fetch_list = boost::get<FetchList>(fetch_data[scope_idx]);
const auto &fetch_list =
BOOST_GET_CONST(FetchList, fetch_data[scope_idx]);
if (data_is_lod_tensor(fetch_list[fetch_idx])) {
lodtensor_ptrs.push_back(
&(boost::get<LoDTensor>(fetch_list[fetch_idx])));
&(BOOST_GET_CONST(LoDTensor, fetch_list[fetch_idx])));
} else {
lodtensorarray_ptrs.push_back(
&(boost::get<LoDTensorArray>(fetch_list[fetch_idx])));
&(BOOST_GET_CONST(LoDTensorArray, fetch_list[fetch_idx])));
}
}
if (lodtensor_ptrs.size() != 0) {
@ -295,7 +296,7 @@ FetchResultType ParallelSSAGraphExecutor::Run(
continue;
}
const auto &fetch_list =
boost::get<FetchUnmergedList>(fetch_data[scope_idx]);
BOOST_GET_CONST(FetchUnmergedList, fetch_data[scope_idx]);
PADDLE_ENFORCE_EQ(
fetch_list[fetch_idx].size(), 1,
platform::errors::Fatal("Each place must have only one fetched "

@ -271,13 +271,13 @@ void ReduceOpHandle::RunImpl() {
out_var_handle->place(), pre_in.type());
auto out_p = out_var_handle->place();
int root_id = boost::get<platform::CUDAPlace>(out_p).device;
int root_id = BOOST_GET_CONST(platform::CUDAPlace, out_p).device;
std::vector<std::function<void()>> all_reduce_calls;
for (size_t i = 0; i < var_scopes.size(); ++i) {
auto &p = in_places[i];
auto &lod_tensor = *lod_tensors[i];
int dev_id = boost::get<platform::CUDAPlace>(p).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p).device;
auto &nccl_ctx = nccl_ctxs_->at(dev_id);
void *buffer = const_cast<void *>(lod_tensor.data<void>());

@ -54,7 +54,7 @@ struct ScaleLossGradFunctor {
#ifdef PADDLE_WITH_CUDA
OutT cast_coeff = static_cast<OutT>(coeff_);
auto stream = static_cast<platform::CUDADeviceContext *>(ctx_)->stream();
memory::Copy(boost::get<platform::CUDAPlace>(place_), out_data,
memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, place_), out_data,
platform::CPUPlace(), &cast_coeff, SizeOfType(out_dtype_),
stream);
VLOG(10) << place_ << "RUN Scale loss grad op";

@ -66,7 +66,7 @@ void ShareTensorBufferOpHandle::AddReuseVarPair(
void ShareTensorBufferOpHandle::InitCUDA() {
#ifdef PADDLE_WITH_CUDA
int dev_id =
boost::get<platform::CUDAPlace>(dev_ctxes_.begin()->first).device;
BOOST_GET_CONST(platform::CUDAPlace, dev_ctxes_.begin()->first).device;
events_[dev_id] = nullptr;
#endif
}

@ -127,7 +127,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
PADDLE_ENFORCE(in_numel / 2 == static_cast<size_t>(k));
out_numel = (out_numel == 0) ? static_cast<size_t>(out.numel()) : out_numel;
int dev_id = boost::get<platform::CUDAPlace>(place).device;
int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device;
auto *nccl_ctxs = nccl_ctxs_->GetRunEnvNCCLCtx(run_order_, false);
auto &nccl_ctx = nccl_ctxs->at(dev_id);
auto stream = nccl_ctx.stream();

@ -49,7 +49,7 @@ void TestMain(const platform::Place &place, uint16_t lanes) {
CHECK_EQ(0, dl_tensor.ctx.device_id);
} else if (platform::is_gpu_place(place)) {
CHECK_EQ(kDLGPU, dl_tensor.ctx.device_type);
CHECK_EQ(boost::get<platform::CUDAPlace>(place).device,
CHECK_EQ(BOOST_GET_CONST(platform::CUDAPlace, place).device,
dl_tensor.ctx.device_id);
} else if (platform::is_cuda_pinned_place(place)) {
CHECK_EQ(kDLCPUPinned, dl_tensor.ctx.device_type);

@ -452,15 +452,15 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
if (platform::is_gpu_place(place_)) {
if (IsFastEagerDeletionModeEnabled()) {
gc.reset(new UnsafeFastGPUGarbageCollector(
boost::get<platform::CUDAPlace>(place_), max_memory_size));
BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size));
} else {
gc.reset(new DefaultStreamGarbageCollector(
boost::get<platform::CUDAPlace>(place_), max_memory_size));
BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size));
}
} else if (platform::is_cpu_place(place_)) {
#endif
gc.reset(new CPUGarbageCollector(boost::get<platform::CPUPlace>(place_),
max_memory_size));
gc.reset(new CPUGarbageCollector(
BOOST_GET_CONST(platform::CPUPlace, place_), max_memory_size));
#ifdef PADDLE_WITH_CUDA
}
#endif
@ -522,7 +522,7 @@ void Executor::RunPreparedContext(
for (auto* op : global_block.AllOps()) {
if (op->Type() == kFeedOpType) {
std::string feed_target_name = op->Output("Out")[0];
int idx = boost::get<int>(op->GetAttr("col"));
int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
SetFeedVariable(scope, *(*feed_targets)[feed_target_name],
feed_holder_name, idx);
}
@ -534,7 +534,7 @@ void Executor::RunPreparedContext(
for (auto* op : global_block.AllOps()) {
if (op->Type() == kFetchOpType) {
std::string fetch_target_name = op->Input("X")[0];
int idx = boost::get<int>(op->GetAttr("col"));
int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
*(*fetch_targets)[fetch_target_name] =
GetFetchVariable(*scope, fetch_holder_name, idx);
}

@ -151,7 +151,7 @@ void BoxWrapper::PullSparse(const paddle::platform::Place& place,
} else if (platform::is_gpu_place(place)) {
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
VLOG(3) << "Begin copy keys, key_num[" << total_length << "]";
int device_id = boost::get<platform::CUDAPlace>(place).GetDeviceId();
int device_id = BOOST_GET_CONST(platform::CUDAPlace, place).GetDeviceId();
LoDTensor& total_keys_tensor = keys_tensor[device_id];
uint64_t* total_keys = reinterpret_cast<uint64_t*>(
total_keys_tensor.mutable_data<int64_t>({total_length, 1}, place));
@ -224,7 +224,7 @@ void BoxWrapper::PushSparseGrad(const paddle::platform::Place& place,
"Warning:: CPUPlace is not supported in PaddleBox now."));
} else if (platform::is_gpu_place(place)) {
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
int device_id = boost::get<platform::CUDAPlace>(place).GetDeviceId();
int device_id = BOOST_GET_CONST(platform::CUDAPlace, place).GetDeviceId();
LoDTensor& cached_total_keys_tensor = keys_tensor[device_id];
uint64_t* total_keys =
reinterpret_cast<uint64_t*>(cached_total_keys_tensor.data<int64_t>());
@ -236,7 +236,7 @@ void BoxWrapper::PushSparseGrad(const paddle::platform::Place& place,
push_boxps_timer.Start();
int ret = boxps_ptr_->PushSparseGPU(
total_keys, total_grad_values_gpu, static_cast<int>(total_length),
boost::get<platform::CUDAPlace>(place).GetDeviceId());
BOOST_GET_CONST(platform::CUDAPlace, place).GetDeviceId());
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
"PushSparseGPU failed in BoxPS."));
push_boxps_timer.Pause();

@ -116,7 +116,7 @@ void BoxWrapper::CopyForPull(const paddle::platform::Place& place,
const int64_t total_length) {
auto stream = dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(
boost::get<platform::CUDAPlace>(place)))
BOOST_GET_CONST(platform::CUDAPlace, place)))
->stream();
auto buf_value = memory::AllocShared(place, values.size() * sizeof(float*));
float** gpu_values = reinterpret_cast<float**>(buf_value->ptr());
@ -134,7 +134,7 @@ void BoxWrapper::CopyKeys(const paddle::platform::Place& place,
const int64_t* gpu_len, int slot_num, int total_len) {
auto stream = dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(
boost::get<platform::CUDAPlace>(place)))
BOOST_GET_CONST(platform::CUDAPlace, place)))
->stream();
CopyKeysKernel<<<(total_len + 512 - 1) / 512, 512, 0, stream>>>(
origin_keys, total_keys, gpu_len, slot_num, total_len);
@ -149,7 +149,7 @@ void BoxWrapper::CopyForPush(const paddle::platform::Place& place,
const int batch_size) {
auto stream = dynamic_cast<platform::CUDADeviceContext*>(
platform::DeviceContextPool::Instance().Get(
boost::get<platform::CUDAPlace>(place)))
BOOST_GET_CONST(platform::CUDAPlace, place)))
->stream();
auto slot_lengths_lod = slot_lengths;
for (int i = 1; i < slot_lengths_lod.size(); i++) {

@ -84,7 +84,7 @@ StreamGarbageCollector::StreamGarbageCollector(const platform::CUDAPlace &place,
}
StreamGarbageCollector::~StreamGarbageCollector() {
auto place = boost::get<platform::CUDAPlace>(this->dev_ctx_->GetPlace());
auto place = BOOST_GET_CONST(platform::CUDAPlace, this->dev_ctx_->GetPlace());
platform::CUDADeviceGuard guard(place.device);
PADDLE_ENFORCE(cudaStreamSynchronize(stream_));
PADDLE_ENFORCE(cudaStreamDestroy(stream_));

@ -161,7 +161,7 @@ class GradOpDescMakerBase {
template <typename T>
inline const T& Attr(const std::string& name) const {
return boost::get<T>(GetAttr(name));
return BOOST_GET_CONST(T, GetAttr(name));
}
std::string ForwardOpType() const { return this->fwd_op_.Type(); }

@ -170,7 +170,8 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const {
eltwise_y_in_tensor->numel(), 0.0f);
// update weights and biases
float epsilon = boost::get<float>(batch_norm->Op()->GetAttr("epsilon"));
float epsilon =
BOOST_GET_CONST(float, batch_norm->Op()->GetAttr("epsilon"));
recompute_bias_and_weights(scope, conv_weight, *bn_scale, *bn_bias_tensor,
*bn_mean, *bn_variance, eltwise_y_in_tensor,
epsilon, conv_type());
@ -275,7 +276,8 @@ void ConvEltwiseAddBNFusePass::ApplyImpl(ir::Graph* graph) const {
scope->FindVar(bn_bias->Name())->GetMutable<LoDTensor>();
// update weights and biases
float epsilon = boost::get<float>(batch_norm->Op()->GetAttr("epsilon"));
float epsilon =
BOOST_GET_CONST(float, batch_norm->Op()->GetAttr("epsilon"));
recompute_bias_and_weights(scope, conv_weight, *bn_scale, *bn_bias_tensor,
*bn_mean, *bn_variance, eltwise_y_in_tensor,
epsilon, conv_type());

@ -90,7 +90,7 @@ class PlacementPassTest {
if (node->IsOp() && node->Op()) {
auto* op = node->Op();
if (op->HasAttr("use_cudnn") &&
boost::get<bool>(op->GetAttr("use_cudnn"))) {
BOOST_GET_CONST(bool, op->GetAttr("use_cudnn"))) {
++use_cudnn_true_count;
}
}

@ -192,9 +192,10 @@ static int BuildFusion(Graph* graph, const std::string& name_scope,
GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
// TODO(jczaja): Add support for is_sparse / is_distributed
auto is_sparse = boost::get<bool>(lookup_table->Op()->GetAttr("is_sparse"));
auto is_sparse =
BOOST_GET_CONST(bool, lookup_table->Op()->GetAttr("is_sparse"));
auto is_distributed =
boost::get<bool>(lookup_table->Op()->GetAttr("is_distributed"));
BOOST_GET_CONST(bool, lookup_table->Op()->GetAttr("is_distributed"));
if (is_sparse == true || is_distributed == true) {
return;

@ -173,7 +173,7 @@ void FCElementwiseLayerNormFusePass::ApplyImpl(ir::Graph *graph) const {
}
int begin_norm_axis =
boost::get<int>(layer_norm->Op()->GetAttr("begin_norm_axis"));
BOOST_GET_CONST(int, layer_norm->Op()->GetAttr("begin_norm_axis"));
auto layer_norm_x_dims = fc_out->Var()->GetShape();
auto layer_norm_x_mat_dims = framework::flatten_to_2d(
framework::make_ddim(layer_norm_x_dims), begin_norm_axis);

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save