You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
378 lines
16 KiB
378 lines
16 KiB
# Copyright 2020 Huawei Technologies Co., Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ============================================================================
|
|
"""version and config check"""
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
from pathlib import Path
|
|
from abc import abstractmethod, ABCMeta
|
|
import numpy as np
|
|
from packaging import version
|
|
from . import log as logger
|
|
from .version import __version__
|
|
from .default_config import __package_name__
|
|
|
|
|
|
class EnvChecker(metaclass=ABCMeta):
|
|
"""basic class for environment check"""
|
|
|
|
@abstractmethod
|
|
def check_env(self, e):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def set_env(self):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def check_version(self):
|
|
pass
|
|
|
|
|
|
class GPUEnvChecker(EnvChecker):
|
|
"""GPU environment check."""
|
|
|
|
def __init__(self):
|
|
self.version = ["10.1"]
|
|
# env
|
|
self.path = os.getenv("PATH")
|
|
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
|
|
|
|
# check
|
|
self.v = "0"
|
|
self.cuda_lib_path = self._get_lib_path("libcu")
|
|
self.cuda_bin_path = self._get_bin_path("cuda")
|
|
|
|
def check_env(self, e):
|
|
raise e
|
|
|
|
def set_env(self):
|
|
return
|
|
|
|
def _get_bin_path(self, bin_name):
|
|
"""Get bin path by bin name."""
|
|
if bin_name == "cuda":
|
|
return self._get_cuda_bin_path()
|
|
return []
|
|
|
|
def _get_cuda_bin_path(self):
|
|
"""Get cuda bin path by lib path."""
|
|
path_list = []
|
|
for path in self.cuda_lib_path:
|
|
path = os.path.abspath(path.strip()+"/bin/")
|
|
if Path(path).is_dir():
|
|
path_list.append(path)
|
|
return np.unique(path_list)
|
|
|
|
def _get_nvcc_version(self, is_set_env):
|
|
"""Get cuda version by nvcc command."""
|
|
nvcc_result = subprocess.run(["nvcc --version | grep release"],
|
|
timeout=3, text=True, capture_output=True, check=False, shell=True)
|
|
if nvcc_result.returncode:
|
|
if not is_set_env:
|
|
for path in self.cuda_bin_path:
|
|
if Path(path + "/nvcc").is_file():
|
|
os.environ['PATH'] = path + ":" + os.environ['PATH']
|
|
return self._get_nvcc_version(True)
|
|
return ""
|
|
result = nvcc_result.stdout
|
|
for line in result.split('\n'):
|
|
if line:
|
|
return line.strip().split("release")[1].split(",")[0].strip()
|
|
return ""
|
|
|
|
def check_version(self):
|
|
"""Check cuda version."""
|
|
version_match = False
|
|
for path in self.cuda_lib_path:
|
|
version_file = path + "/version.txt"
|
|
if not Path(version_file).is_file():
|
|
continue
|
|
if self._check_version(version_file):
|
|
version_match = True
|
|
break
|
|
if not version_match:
|
|
if self.v == "0":
|
|
logger.warning("Cuda version file version.txt is not found, please confirm that the correct "
|
|
"cuda version has been installed, you can refer to the "
|
|
"installation guidelines: https://www.mindspore.cn/install")
|
|
else:
|
|
logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, "
|
|
"please refer to the installation guide for version matching "
|
|
"information: https://www.mindspore.cn/install")
|
|
nvcc_version = self._get_nvcc_version(False)
|
|
if nvcc_version and (nvcc_version not in self.version):
|
|
logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} "
|
|
"does not match, please refer to the installation guide for version matching "
|
|
"information: https://www.mindspore.cn/install")
|
|
|
|
def _check_version(self, version_file):
|
|
"""Check cuda version by version.txt."""
|
|
v = self._read_version(version_file)
|
|
v = version.parse(v)
|
|
v_str = str(v.major) + "." + str(v.minor)
|
|
if v_str not in self.version:
|
|
return False
|
|
return True
|
|
|
|
def _get_lib_path(self, lib_name):
|
|
"""Get gpu lib path by ldd command."""
|
|
path_list = []
|
|
current_path = os.path.split(os.path.realpath(__file__))[0]
|
|
ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name],
|
|
timeout=3, text=True, capture_output=True, check=False, shell=True)
|
|
if ldd_result.returncode:
|
|
logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that "
|
|
f"_c_experssion.so depend on {lib_name}, "
|
|
f"and _c_expression.so in directory:{current_path}")
|
|
return path_list
|
|
result = ldd_result.stdout
|
|
for i in result.split('\n'):
|
|
path = i.partition("=>")[2]
|
|
if path.lower().find("not found") > 0:
|
|
logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm "
|
|
"that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the "
|
|
"installation guidelines: https://www.mindspore.cn/install")
|
|
continue
|
|
path = path.partition(lib_name)[0]
|
|
if path:
|
|
path_list.append(os.path.abspath(path.strip() + "../"))
|
|
return np.unique(path_list)
|
|
|
|
def _read_version(self, file_path):
|
|
"""Get gpu version info in version.txt."""
|
|
with open(file_path, 'r') as f:
|
|
all_info = f.readlines()
|
|
for line in all_info:
|
|
if line.startswith("CUDA Version"):
|
|
self.v = line.strip().split("CUDA Version")[1]
|
|
return self.v
|
|
return self.v
|
|
|
|
|
|
class AscendEnvChecker(EnvChecker):
|
|
"""ascend environment check"""
|
|
|
|
def __init__(self):
|
|
self.version = ["1.77.22.1.220"]
|
|
atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
|
|
atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
|
|
hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info"
|
|
if os.path.exists(atlas_nnae_version):
|
|
# atlas default path
|
|
self.fwk_path = "/usr/local/Ascend/nnae/latest/fwkacllib"
|
|
self.op_impl_path = "/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe"
|
|
self.tbe_path = self.fwk_path + "/lib64"
|
|
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
|
|
self.fwk_version = atlas_nnae_version
|
|
self.op_path = "/usr/local/Ascend/nnae/latest/opp"
|
|
elif os.path.exists(atlas_toolkit_version):
|
|
# atlas default path
|
|
self.fwk_path = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib"
|
|
self.op_impl_path = "/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe"
|
|
self.tbe_path = self.fwk_path + "/lib64"
|
|
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
|
|
self.fwk_version = atlas_toolkit_version
|
|
self.op_path = "/usr/local/Ascend/ascend-toolkit/latest/opp"
|
|
elif os.path.exists(hisi_fwk_version):
|
|
# hisi default path
|
|
self.fwk_path = "/usr/local/Ascend/fwkacllib"
|
|
self.op_impl_path = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"
|
|
self.tbe_path = self.fwk_path + "/lib64"
|
|
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
|
|
self.fwk_version = hisi_fwk_version
|
|
self.op_path = "/usr/local/Ascend/opp"
|
|
else:
|
|
# custom or unknown environment
|
|
self.fwk_path = ""
|
|
self.op_impl_path = ""
|
|
self.tbe_path = ""
|
|
self.cce_path = ""
|
|
self.fwk_version = ""
|
|
self.op_path = ""
|
|
|
|
# env
|
|
self.path = os.getenv("PATH")
|
|
self.python_path = os.getenv("PYTHONPATH")
|
|
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
|
|
self.ascend_opp_path = os.getenv("ASCEND_OPP_PATH")
|
|
|
|
# check content
|
|
self.path_check = "/fwkacllib/ccec_compiler/bin"
|
|
self.python_path_check = "opp/op_impl/built-in/ai_core/tbe"
|
|
self.ld_lib_path_check_fwk = "/fwkacllib/lib64"
|
|
self.ld_lib_path_check_addons = "/add-ons"
|
|
self.ascend_opp_path_check = "/op"
|
|
self.v = ""
|
|
|
|
def check_env(self, e):
|
|
self._check_env()
|
|
raise e
|
|
|
|
def check_version(self):
|
|
if not Path(self.fwk_version).is_file():
|
|
logger.warning("Using custom Ascend 910 AI software package path, package version checking is skipped, "
|
|
"please make sure Ascend 910 AI software package version is supported, you can reference to "
|
|
"the installation guidelines https://www.mindspore.cn/install")
|
|
return
|
|
|
|
v = self._read_version(self.fwk_version)
|
|
if v not in self.version:
|
|
v_list = str([x for x in self.version])
|
|
logger.warning(f"MindSpore version {__version__} and Ascend 910 AI software package version {v} does not "
|
|
f"match, the version of software package expect one of {v_list}, "
|
|
"please reference to the match info on: https://www.mindspore.cn/install")
|
|
|
|
def check_deps_version(self):
|
|
"""
|
|
te, topi, hccl wheel package version check
|
|
in order to update the change of 'LD_LIBRARY_PATH' env, run a sub process
|
|
"""
|
|
input_args = ["--mindspore_version=" + __version__]
|
|
for v in self.version:
|
|
input_args.append("--supported_version=" + v)
|
|
deps_version_checker = os.path.join(os.path.split(os.path.realpath(__file__))[0], "_check_deps_version.py")
|
|
call_cmd = [sys.executable, deps_version_checker] + input_args
|
|
try:
|
|
process = subprocess.run(call_cmd, timeout=3, text=True, capture_output=True, check=False)
|
|
if process.stdout.strip() != "":
|
|
logger.warning(process.stdout.strip())
|
|
except subprocess.TimeoutExpired:
|
|
logger.info("Package te, topi, hccl version check timed out, skip.")
|
|
|
|
def set_env(self):
|
|
if not self.tbe_path:
|
|
self._check_env()
|
|
return
|
|
|
|
try:
|
|
# pylint: disable=unused-import
|
|
import te
|
|
# pylint: disable=broad-except
|
|
except Exception:
|
|
if Path(self.tbe_path).is_dir():
|
|
if os.getenv('LD_LIBRARY_PATH'):
|
|
os.environ['LD_LIBRARY_PATH'] = self.tbe_path + ":" + os.environ['LD_LIBRARY_PATH']
|
|
else:
|
|
os.environ['LD_LIBRARY_PATH'] = self.tbe_path
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.tbe_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
# check te version after set te env
|
|
self.check_deps_version()
|
|
|
|
if Path(self.op_impl_path).is_dir():
|
|
# python path for sub process
|
|
if os.getenv('PYTHONPATH'):
|
|
os.environ['PYTHONPATH'] = self.op_impl_path + ":" + os.environ['PYTHONPATH']
|
|
else:
|
|
os.environ['PYTHONPATH'] = self.op_impl_path
|
|
# sys path for this process
|
|
sys.path.append(self.op_impl_path)
|
|
|
|
os.environ['TBE_IMPL_PATH'] = self.op_impl_path
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.op_impl_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
if Path(self.cce_path).is_dir():
|
|
os.environ['PATH'] = self.cce_path + ":" + os.environ['PATH']
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.cce_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
if self.op_path is None:
|
|
pass
|
|
elif Path(self.op_path).is_dir():
|
|
os.environ['ASCEND_OPP_PATH'] = self.op_path
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.op_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
def _check_env(self):
|
|
"""ascend dependence path check"""
|
|
if self.path is None or self.path_check not in self.path:
|
|
logger.warning("Can not find ccec_compiler(need by mindspore-ascend), please check if you have set env "
|
|
"PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
|
|
|
|
if self.python_path is None or self.python_path_check not in self.python_path:
|
|
logger.warning(
|
|
"Can not find tbe op implement(need by mindspore-ascend), please check if you have set env "
|
|
"PYTHONPATH, you can reference to the installation guidelines "
|
|
"https://www.mindspore.cn/install")
|
|
|
|
if self.ld_lib_path is None or not (self.ld_lib_path_check_fwk in self.ld_lib_path and
|
|
self.ld_lib_path_check_addons in self.ld_lib_path):
|
|
logger.warning("Can not find driver so(need by mindspore-ascend), please check if you have set env "
|
|
"LD_LIBRARY_PATH, you can reference to the installation guidelines "
|
|
"https://www.mindspore.cn/install")
|
|
|
|
if self.ascend_opp_path is None or self.ascend_opp_path_check not in self.ascend_opp_path:
|
|
logger.warning(
|
|
"Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, "
|
|
"you can reference to the installation guidelines https://www.mindspore.cn/install")
|
|
|
|
def _read_version(self, file_path):
|
|
"""get ascend version info"""
|
|
with open(file_path, 'r') as f:
|
|
all_info = f.readlines()
|
|
for line in all_info:
|
|
if line.startswith("Version="):
|
|
self.v = line.strip().split("=")[1]
|
|
return self.v
|
|
return self.v
|
|
|
|
def check_version_and_env_config():
|
|
"""check version and env config"""
|
|
if __package_name__.lower() == "mindspore-ascend":
|
|
env_checker = AscendEnvChecker()
|
|
elif __package_name__.lower() == "mindspore-gpu":
|
|
env_checker = GPUEnvChecker()
|
|
else:
|
|
logger.info(f"Package version {__package_name__} does not need to check any environment variable, skipping.")
|
|
return
|
|
|
|
try:
|
|
# pylint: disable=unused-import
|
|
from . import _c_expression
|
|
# check version of ascend site or cuda
|
|
env_checker.check_version()
|
|
|
|
env_checker.set_env()
|
|
except ImportError as e:
|
|
env_checker.check_env(e)
|
|
|
|
|
|
def _set_pb_env():
|
|
"""Set env variable `PROTOCOL_BUFFERS` to prevent memory overflow."""
|
|
if os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") == "cpp":
|
|
logger.info("Current env variable `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp`. "
|
|
"When the checkpoint file is too large, "
|
|
"it may cause memory limit error during load checkpoint file. "
|
|
"This can be solved by set env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`.")
|
|
elif os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") is None:
|
|
logger.info("Setting the env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` to prevent memory overflow "
|
|
"during save or load checkpoint file.")
|
|
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
|
|
|
|
|
check_version_and_env_config()
|
|
_set_pb_env()
|