!12192 terminate tbe process pool in separate thread, kill child of fork in 310 model converter

From: @zhoufeng54
Reviewed-by: @xu-yfei,@kisnwang
Signed-off-by: @xu-yfei
pull/12192/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 8aba5d8f57

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
"""tbe process""" """tbe process"""
import threading
import traceback import traceback
import multiprocessing import multiprocessing
import subprocess import subprocess
@ -137,12 +138,17 @@ class TbeProcess:
res = "TBEException", "ERROR: [MS_BUILD_PROCESS_NUM] type should be a int num, but got :" + process_num res = "TBEException", "ERROR: [MS_BUILD_PROCESS_NUM] type should be a int num, but got :" + process_num
return res return res
def exit(self): def close_pool(self):
if self.__pool is not None:
self.__pool.terminate() self.__pool.terminate()
self.__pool.join() self.__pool.join()
del self.__pool del self.__pool
def exit(self):
if self.__pool is not None:
stop_thread = threading.Thread(target=self.close_pool)
stop_thread.daemon = True
stop_thread.start()
def start_compile_op(self, op_json): def start_compile_op(self, op_json):
""" """
start compile op async. start compile op async.

@ -65,13 +65,30 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall
if (pid == 0) { if (pid == 0) {
ChildProcess(child_process); ChildProcess(child_process);
shared_memory.Detach(); shared_memory.Detach();
MS_LOG_INFO << "Model converter: child process exit"; MS_LOG_INFO << "Model converter: child process sleep waiting for exit signal.";
exit(0); while (1) {
// waiting for signal
}
} else { // parent process } else { // parent process
ret = ParentProcess(parent_process); ret = ParentProcess(parent_process);
shared_memory.Detach(); shared_memory.Detach();
MS_LOG_INFO << "Model converter: parent process kills child of fork.";
(void)kill(pid, SIGKILL);
constexpr uint32_t kMaxLoopCount = 5;
bool child_exited = false;
for (uint32_t i = 0; i < kMaxLoopCount; ++i) {
int status; int status;
wait(&status); if (waitpid(pid, &status, WNOHANG) == pid) {
MS_LOG(INFO) << "Child process " << pid << " exits success.";
child_exited = true;
break;
}
sleep(1);
}
if (!child_exited) {
MS_LOG(WARNING) << "Child process " << pid << " has been killed but waitpid failed.";
}
shared_memory.Destroy(); shared_memory.Destroy();
} }
return ret; return ret;

Loading…
Cancel
Save