|
|
|
@ -27,20 +27,6 @@ void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service) {
|
|
|
|
|
VLOG(4) << "RunServer thread end";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void CreateTensorFromMessageType(framework::Variable *var,
|
|
|
|
|
sendrecv::VarType var_type) {
|
|
|
|
|
if (var_type == sendrecv::VarType::LOD_TENSOR) {
|
|
|
|
|
var->GetMutable<framework::LoDTensor>();
|
|
|
|
|
} else if (var_type == sendrecv::VarType::SELECTED_ROWS) {
|
|
|
|
|
var->GetMutable<framework::SelectedRows>();
|
|
|
|
|
} else {
|
|
|
|
|
PADDLE_THROW(
|
|
|
|
|
"VariableMessage type %d is not in "
|
|
|
|
|
"[LoDTensor, SelectedRows]",
|
|
|
|
|
var_type);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void ParallelExecuteBlocks(
|
|
|
|
|
const std::vector<size_t> ¶llel_blkids, framework::Executor *executor,
|
|
|
|
|
const std::vector<std::shared_ptr<framework::ExecutorPrepareContext>>
|
|
|
|
@ -77,59 +63,37 @@ void ListenAndServOp::Stop() {
|
|
|
|
|
server_thread_->join();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ListenAndServOp::RunImpl(const framework::Scope &scope,
|
|
|
|
|
const platform::Place &dev_place) const {
|
|
|
|
|
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
|
|
|
|
|
auto &dev_ctx = *pool.Get(dev_place);
|
|
|
|
|
framework::Scope &recv_scope = scope.NewScope();
|
|
|
|
|
|
|
|
|
|
if (!rpc_service_) {
|
|
|
|
|
std::string endpoint = Attr<std::string>("endpoint");
|
|
|
|
|
rpc_service_.reset(new detail::AsyncGRPCServer(endpoint));
|
|
|
|
|
}
|
|
|
|
|
void ListenAndServOp::PreparePrefetchCtx(
|
|
|
|
|
framework::Executor *executor, framework::BlockDesc *prefetch_block,
|
|
|
|
|
framework::ProgramDesc *program) const {
|
|
|
|
|
// TODO(qiao) set proper fields for table lookup and update
|
|
|
|
|
rpc_service_->SetExecutor(executor);
|
|
|
|
|
VLOG(3) << "prefetch block id is " << prefetch_block->ID();
|
|
|
|
|
auto prefetch_prepared = executor->Prepare(*program, prefetch_block->ID());
|
|
|
|
|
rpc_service_->SetPrefetchBlkdId(prefetch_block->ID());
|
|
|
|
|
rpc_service_->SetPrefetchPreparedCtx(prefetch_prepared.get());
|
|
|
|
|
prefetch_prepared.release();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto ins = Inputs("X");
|
|
|
|
|
void ListenAndServOp::RunSyncUpdate(
|
|
|
|
|
framework::Executor *executor, framework::ProgramDesc *program,
|
|
|
|
|
framework::Scope *recv_scope, framework::BlockDesc *prefetch_block) const {
|
|
|
|
|
auto fan_in = Attr<int>("Fanin");
|
|
|
|
|
auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock);
|
|
|
|
|
auto *prefetch_block = Attr<framework::BlockDesc *>(kPrefetchBlock);
|
|
|
|
|
auto *program = optimize_block->Program();
|
|
|
|
|
|
|
|
|
|
size_t num_blocks = program->Size();
|
|
|
|
|
PADDLE_ENFORCE_GE(num_blocks, 2,
|
|
|
|
|
"server program should have at least 2 blocks");
|
|
|
|
|
|
|
|
|
|
framework::Executor executor(dev_place);
|
|
|
|
|
std::vector<int> block_list;
|
|
|
|
|
for (size_t blkid = 1; blkid < num_blocks; ++blkid) {
|
|
|
|
|
if (blkid != static_cast<size_t>(prefetch_block->ID())) {
|
|
|
|
|
block_list.push_back(blkid);
|
|
|
|
|
}
|
|
|
|
|
block_list.push_back(blkid);
|
|
|
|
|
}
|
|
|
|
|
auto optimize_prepared = executor.Prepare(*program, block_list);
|
|
|
|
|
auto optimize_prepared = executor->Prepare(*program, block_list);
|
|
|
|
|
// Insert placeholder for block0 which holds current op itself.
|
|
|
|
|
optimize_prepared.insert(
|
|
|
|
|
optimize_prepared.begin(),
|
|
|
|
|
std::shared_ptr<framework::ExecutorPrepareContext>(nullptr));
|
|
|
|
|
|
|
|
|
|
rpc_service_->SetScope(&recv_scope);
|
|
|
|
|
rpc_service_->SetDevCtx(&dev_ctx);
|
|
|
|
|
// TODO(qiao) set proper fields for table lookup and update
|
|
|
|
|
rpc_service_->SetExecutor(&executor);
|
|
|
|
|
VLOG(3) << "prefetch block id is " << prefetch_block->ID();
|
|
|
|
|
auto prefetch_prepared = executor.Prepare(*program, prefetch_block->ID());
|
|
|
|
|
rpc_service_->SetPrefetchBlkdId(prefetch_block->ID());
|
|
|
|
|
rpc_service_->SetPrefetchPreparedCtx(prefetch_prepared.get());
|
|
|
|
|
prefetch_prepared.release();
|
|
|
|
|
rpc_service_->SetProgram(program);
|
|
|
|
|
// start the server listening after all member initialized.
|
|
|
|
|
server_thread_.reset(new std::thread(RunServer, rpc_service_));
|
|
|
|
|
VLOG(3) << "wait server thread to become ready...";
|
|
|
|
|
sleep(5);
|
|
|
|
|
// Write to a file of server selected port for python use.
|
|
|
|
|
std::ofstream port_file;
|
|
|
|
|
port_file.open("/tmp/paddle.selected_port");
|
|
|
|
|
port_file << rpc_service_->GetSelectedPort();
|
|
|
|
|
port_file.close();
|
|
|
|
|
|
|
|
|
|
bool exit_flag = false;
|
|
|
|
|
// Record received sparse variables, so that
|
|
|
|
|
// we could reset those after execute optimize program
|
|
|
|
@ -170,7 +134,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NOTE: if is_gpu_place, CUDA kernels are laugched by multiple threads
|
|
|
|
|
// NOTE: if is_gpu_place, CUDA kernels are launch by multiple threads
|
|
|
|
|
// and this will still work.
|
|
|
|
|
|
|
|
|
|
// The optimize blocks which have the same parent ID would run parallel
|
|
|
|
@ -182,16 +146,16 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
|
|
|
|
|
for (size_t blkid = 2; blkid < num_blocks; ++blkid) {
|
|
|
|
|
if (blkid != static_cast<size_t>(prefetch_block->ID())) {
|
|
|
|
|
if (program->Block(blkid).Parent() != last_parent_blkid) {
|
|
|
|
|
ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared,
|
|
|
|
|
program, &recv_scope);
|
|
|
|
|
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared,
|
|
|
|
|
program, recv_scope);
|
|
|
|
|
parallel_blkids.clear();
|
|
|
|
|
last_parent_blkid = program->Block(blkid).Parent();
|
|
|
|
|
}
|
|
|
|
|
parallel_blkids.push_back(blkid);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ParallelExecuteBlocks(parallel_blkids, &executor, optimize_prepared,
|
|
|
|
|
program, &recv_scope);
|
|
|
|
|
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
|
|
|
|
|
recv_scope);
|
|
|
|
|
VLOG(2) << "run all blocks spent " << detail::GetTimestamp() - ts << "(ms)";
|
|
|
|
|
|
|
|
|
|
// Reset the received sparse variables, the sum operator would not
|
|
|
|
@ -209,6 +173,42 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
|
|
|
|
|
} // while(true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void SavePort(std::shared_ptr<detail::AsyncGRPCServer> rpc_service) {
|
|
|
|
|
std::ofstream port_file;
|
|
|
|
|
port_file.open("/tmp/paddle.selected_port");
|
|
|
|
|
port_file << rpc_service->GetSelectedPort();
|
|
|
|
|
port_file.close();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ListenAndServOp::RunImpl(const framework::Scope &scope,
|
|
|
|
|
const platform::Place &dev_place) const {
|
|
|
|
|
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
|
|
|
|
|
auto &dev_ctx = *pool.Get(dev_place);
|
|
|
|
|
framework::Scope &recv_scope = scope.NewScope();
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE(!rpc_service_);
|
|
|
|
|
std::string endpoint = Attr<std::string>("endpoint");
|
|
|
|
|
rpc_service_.reset(new detail::AsyncGRPCServer(endpoint));
|
|
|
|
|
|
|
|
|
|
auto *optimize_block = Attr<framework::BlockDesc *>(kOptimizeBlock);
|
|
|
|
|
auto *prefetch_block = Attr<framework::BlockDesc *>(kPrefetchBlock);
|
|
|
|
|
auto *program = optimize_block->Program();
|
|
|
|
|
framework::Executor executor(dev_place);
|
|
|
|
|
|
|
|
|
|
// prepare rpc_service
|
|
|
|
|
rpc_service_->SetScope(&recv_scope);
|
|
|
|
|
rpc_service_->SetDevCtx(&dev_ctx);
|
|
|
|
|
rpc_service_->SetProgram(program);
|
|
|
|
|
PreparePrefetchCtx(&executor, prefetch_block, program);
|
|
|
|
|
// start the server listening after all member initialized.
|
|
|
|
|
server_thread_.reset(new std::thread(RunServer, rpc_service_));
|
|
|
|
|
VLOG(3) << "wait server thread to become ready...";
|
|
|
|
|
sleep(5);
|
|
|
|
|
// Write to a file of server selected port for python use.
|
|
|
|
|
SavePort(rpc_service_);
|
|
|
|
|
RunSyncUpdate(&executor, program, &recv_scope, prefetch_block);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
|
|
|
public:
|
|
|
|
|
ListenAndServOpMaker(OpProto *proto, OpAttrChecker *op_checker)
|
|
|
|
|