You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
201 lines
6.7 KiB
201 lines
6.7 KiB
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#include "paddle/fluid/framework/device_worker.h"
|
|
#include "paddle/fluid/framework/device_worker_factory.h"
|
|
#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
|
|
#include "paddle/fluid/framework/fleet/heter_wrapper.h"
|
|
#include "paddle/fluid/platform/cpu_helper.h"
|
|
#include "paddle/fluid/string/string_helper.h"
|
|
|
|
#if (defined PADDLE_WITH_NCCL) && (defined PADDLE_WITH_PSLIB)
|
|
#include "paddle/fluid/platform/cuda_device_guard.h"
|
|
|
|
#if defined _WIN32 || defined __APPLE__
|
|
#else
|
|
#define _LINUX
|
|
#endif
|
|
|
|
namespace paddle {
|
|
namespace framework {
|
|
|
|
void PSGPUWorker::Initialize(const TrainerDesc& desc) {
|
|
param_ = desc.downpour_param();
|
|
mpi_rank_ = desc.mpi_rank();
|
|
trainer_desc_ = desc;
|
|
/*
|
|
for (int i = 0; i < trainer_desc_.xpu_recv_list_size(); ++i) {
|
|
send_var_list_.push_back(trainer_desc_.xpu_recv_list(i));
|
|
}
|
|
*/
|
|
for (int i = 0; i < param_.sparse_table_size(); ++i) {
|
|
uint64_t table_id =
|
|
static_cast<uint64_t>(param_.sparse_table(i).table_id());
|
|
TableParameter table = param_.sparse_table(i);
|
|
sparse_key_names_[table_id].resize(table.sparse_key_name_size());
|
|
for (int j = 0; j < table.sparse_key_name_size(); ++j) {
|
|
sparse_key_names_[table_id][j] = table.sparse_key_name(j);
|
|
}
|
|
sparse_value_names_[table_id].resize(table.sparse_value_name_size());
|
|
for (int j = 0; j < table.sparse_value_name_size(); ++j) {
|
|
sparse_value_names_[table_id][j] = table.sparse_value_name(j);
|
|
}
|
|
sparse_grad_names_[table_id].resize(table.sparse_grad_name_size());
|
|
for (int j = 0; j < table.sparse_grad_name_size(); ++j) {
|
|
sparse_grad_names_[table_id][j] = table.sparse_grad_name(j);
|
|
}
|
|
label_var_name_[table_id] = table.label_var_name();
|
|
sparse_push_keys_[table_id] = std::vector<uint64_t>();
|
|
}
|
|
|
|
for (int i = 0; i < param_.dense_table_size(); ++i) {
|
|
uint64_t table_id = static_cast<uint64_t>(param_.dense_table(i).table_id());
|
|
auto table = param_.dense_table(i);
|
|
dense_value_names_[table_id].resize(table.dense_value_name_size());
|
|
for (int j = 0; j < table.dense_value_name_size(); ++j) {
|
|
dense_value_names_[table_id][j] = table.dense_value_name(j);
|
|
}
|
|
dense_grad_names_[table_id].resize(table.dense_grad_name_size());
|
|
for (int j = 0; j < table.dense_grad_name_size(); ++j) {
|
|
dense_grad_names_[table_id][j] = table.dense_grad_name(j);
|
|
}
|
|
}
|
|
|
|
skip_ops_.resize(param_.skip_ops_size());
|
|
for (int i = 0; i < param_.skip_ops_size(); ++i) {
|
|
skip_ops_[i] = param_.skip_ops(i);
|
|
}
|
|
for (int i = 0; i < param_.stat_var_names_size(); ++i) {
|
|
stat_var_name_map_[param_.stat_var_names(i)] = 1;
|
|
}
|
|
|
|
need_to_push_sparse_ = param_.push_sparse();
|
|
need_to_push_dense_ = param_.push_dense();
|
|
|
|
fetch_config_ = desc.fetch_config();
|
|
use_cvm_ = desc.use_cvm();
|
|
// for sparse value accessor, embedding only
|
|
no_cvm_ = desc.no_cvm();
|
|
scale_datanorm_ = desc.scale_datanorm();
|
|
dump_slot_ = desc.dump_slot();
|
|
dump_fields_.resize(desc.dump_fields_size());
|
|
for (int i = 0; i < desc.dump_fields_size(); ++i) {
|
|
dump_fields_[i] = desc.dump_fields(i);
|
|
}
|
|
adjust_ins_weight_config_ = desc.adjust_ins_weight_config();
|
|
need_dump_param_ = false;
|
|
dump_param_.resize(desc.dump_param_size());
|
|
for (int i = 0; i < desc.dump_param_size(); ++i) {
|
|
dump_param_[i] = desc.dump_param(i);
|
|
}
|
|
if (desc.dump_param_size() != 0) {
|
|
need_dump_param_ = true;
|
|
}
|
|
for (int i = 0; i < desc.check_nan_var_names_size(); ++i) {
|
|
check_nan_var_names_.push_back(desc.check_nan_var_names(i));
|
|
}
|
|
copy_table_config_ = desc.copy_table_config();
|
|
for (int i = 0; i < copy_table_config_.src_sparse_tables_size(); ++i) {
|
|
uint64_t src_table = copy_table_config_.src_sparse_tables(i);
|
|
uint64_t dest_table = copy_table_config_.dest_sparse_tables(i);
|
|
VLOG(3) << "copy_sparse_tables_ push back " << src_table << "->"
|
|
<< dest_table;
|
|
copy_sparse_tables_.push_back(std::make_pair(src_table, dest_table));
|
|
}
|
|
for (int i = 0; i < copy_table_config_.src_dense_tables_size(); ++i) {
|
|
uint64_t src_table = copy_table_config_.src_dense_tables(i);
|
|
uint64_t dest_table = copy_table_config_.dest_dense_tables(i);
|
|
VLOG(3) << "copy_dense_tables_ push back " << src_table << "->"
|
|
<< dest_table;
|
|
copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
|
|
}
|
|
for (auto& m : copy_table_config_.table_denpendency_map()) {
|
|
if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {
|
|
// currently only support one dependency
|
|
for (auto& value : m.values()) {
|
|
table_dependency_[m.key()] = value;
|
|
}
|
|
}
|
|
}
|
|
// pull_queue_ = paddle::framework::MakeChannel<std::shared_ptr<HeterTask>>();
|
|
// push_queue_ = paddle::framework::MakeChannel<std::shared_ptr<HeterTask>>();
|
|
}
|
|
|
|
void PSGPUWorker::SetChannelWriter(ChannelObject<std::string>* queue) {
|
|
writer_.Reset(queue);
|
|
}
|
|
|
|
void PSGPUWorker::SetNeedDump(bool need_dump_field) {
|
|
need_dump_field_ = need_dump_field;
|
|
}
|
|
|
|
void PSGPUWorker::DumpParam() {}
|
|
|
|
void PSGPUWorker::TrainFiles() {
|
|
platform::SetNumThreads(1);
|
|
platform::Timer timeline;
|
|
timeline.Start();
|
|
|
|
int total_ins_num = 0;
|
|
|
|
// how to accumulate fetched values here
|
|
device_reader_->Start();
|
|
int cur_batch;
|
|
while ((cur_batch = device_reader_->Next()) > 0) {
|
|
total_ins_num += cur_batch;
|
|
for (auto& op : ops_) {
|
|
bool need_skip = false;
|
|
for (auto t = 0u; t < skip_ops_.size(); ++t) {
|
|
if (op->Type().find(skip_ops_[t]) != std::string::npos) {
|
|
need_skip = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!need_skip) {
|
|
op->Run(*thread_scope_, place_);
|
|
}
|
|
}
|
|
|
|
PrintFetchVars();
|
|
thread_scope_->DropKids();
|
|
}
|
|
timeline.Pause();
|
|
VLOG(1) << "GpuPs worker " << thread_id_ << " train cost "
|
|
<< timeline.ElapsedSec() << " seconds, ins_num: " << total_ins_num;
|
|
return;
|
|
}
|
|
|
|
void PSGPUWorker::ResetStat() {
|
|
total_time_ = 0;
|
|
read_time_ = 0;
|
|
pack_time_ = 0;
|
|
pull_sparse_local_time_ = 0;
|
|
op_all_time_ = 0;
|
|
xpu_op_time_ = 0;
|
|
xpu_wait_time_ = 0;
|
|
cpu_op_time_ = 0;
|
|
collect_label_time_ = 0;
|
|
fill_sparse_time_ = 0;
|
|
push_sparse_time_ = 0;
|
|
gpu_2_cpu_time_ = 0;
|
|
cpu_2_gpu_time_ = 0;
|
|
total_inst_ = 0;
|
|
}
|
|
|
|
void PSGPUWorker::ProduceTasks() { return; }
|
|
|
|
} // end namespace framework
|
|
} // end namespace paddle
|
|
#endif
|