Destroy session first. (#30954)

Destroy session first.
revert-31562-mean
gongweibao 5 years ago committed by GitHub
parent 500f28ec37
commit ebef6601d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -50,29 +50,35 @@ class AscendInstance {
virtual ~AscendInstance() {}
AscendInstance() {}
std::map<AscendString, AscendString> GetDefaultInitOptions() {
std::map<AscendString, AscendString> _GetDefaultInitOptions() {
std::map<AscendString, AscendString> init_options;
init_options["ge.exec.deviceId"] = "0";
init_options["ge.graphRunMode"] = "1";
return init_options;
}
std::map<AscendString, AscendString> GetDefaultInitSessionOptions() {
std::map<AscendString, AscendString> _GetDefaultInitSessionOptions() {
std::map<AscendString, AscendString> init_options;
init_options["a"] = "b";
init_options["ge.trainFlag"] = "1";
//init_options["a"] = "b";
//init_options["ge.trainFlag"] = "1";
return init_options;
}
ge::Status InitGEForUT() { return ge::GEInitialize(GetDefaultInitOptions()); }
ge::Status InitGEForUT() { return ge::GEInitialize(_GetDefaultInitOptions()); }
void InitGlobalResouces() {
LOG(INFO) << "Begin InitGlobalResouces";
session_.reset(new ge::Session(GetDefaultInitSessionOptions()));
LOG(INFO) << "Begin ascend InitGlobalResouces";
session_.reset(new ge::Session(_GetDefaultInitSessionOptions()));
if (session_ == nullptr) {
LOG(FATAL) << "new session error:" << session_;
}
LOG(INFO) << "End InitGlobalResouces";
LOG(INFO) << "End ascend InitGlobalResouces";
}
void DestroyGlobalResouces() {
LOG(INFO) << "Begin ascend DestroyGlobalResouces";
session_ = nullptr;
LOG(INFO) << "Begin ascend DestroyGlobalResouces";
}
static std::shared_ptr<AscendInstance> GetInstance() {

@ -55,6 +55,9 @@ void BindAscendWrapper(py::module *m) {
.def("init_global_resources",
&framework::AscendInstance::InitGlobalResouces,
py::call_guard<py::gil_scoped_release>())
.def("destroy_global_resources",
&framework::AscendInstance::DestroyGlobalResouces,
py::call_guard<py::gil_scoped_release>())
.def("add_ascend_subgraph", &framework::AscendInstance::AddAscendSubgraph,
py::call_guard<py::gil_scoped_release>());
}

@ -121,8 +121,7 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
"--run_mode",
type=str,
default="collective",
help="run mode of job, can be:collective/ps/ps-heter"
)
help="run mode of job, can be:collective/ps/ps-heter")
base_group.add_argument(
"--ascend_npus",
@ -133,7 +132,6 @@ see: http://www.paddlepaddle.org/documentation/docs/zh/1.6/user_guides/howto/tra
"--ascend_npus=\"0,1,2,3\" will launch four training processes each bound to one gpu."
)
base_group.add_argument("--selected_gpus", dest="gpus")
base_group.add_argument(
@ -250,6 +248,9 @@ def launch_collective(args):
log_dir=args.log_dir,
envs=global_envs)
for idx, proc in enumerate(procs):
print("launch proc_id:{} idx:{}".format(proc.proc.pid, idx))
while True:
alive = watch_local_trainers(procs, cluster.trainers_nranks())

@ -182,9 +182,14 @@ class AscendOptimizer(Optimizer):
def __init__(self, optimizer, fetch_list=[]):
self.inner_opt = optimizer
self.fetch_list = fetch_list
self.ascend_instance = None
def __del__(self):
print("begin AscendOptimizer del")
if self.ascend_instance is not None:
self.ascend_instance.destroy_global_resources()
core.ge_finalize()
print("end AscendOptimizer del")
def _can_apply(self):
if not self.user_defined_strategy.ascend:

@ -16,6 +16,7 @@ from paddle.fluid.optimizer import Optimizer
import paddle.fluid.core as core
import numpy as np
from paddle.distributed import fleet
from functools import reduce
registerd_op = {## forwards
"elementwise_add": "AddParser",

Loading…
Cancel
Save