You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
323 lines
13 KiB
323 lines
13 KiB
# Copyright 2020 Huawei Technologies Co., Ltd
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ============================================================================
|
|
"""version and config check"""
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
from pathlib import Path
|
|
from abc import abstractmethod, ABCMeta
|
|
from packaging import version
|
|
from . import log as logger
|
|
from .version import __version__
|
|
from .default_config import __package_name__
|
|
|
|
|
|
class EnvChecker(metaclass=ABCMeta):
|
|
"""basic class for environment check"""
|
|
|
|
@abstractmethod
|
|
def check_env(self, e):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def set_env(self):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def check_version(self):
|
|
pass
|
|
|
|
|
|
class GPUEnvChecker(EnvChecker):
|
|
"""gpu environment check"""
|
|
|
|
def __init__(self):
|
|
self.version = ["10.1"]
|
|
self.cuda_path = "/usr/local/cuda"
|
|
if os.path.exists(self.cuda_path):
|
|
# cuda default path
|
|
self.cuda_bin = self.cuda_path + "/bin"
|
|
self.cuda_lib = self.cuda_path + "/lib64"
|
|
self.cuda_version = self.cuda_path + "/version.txt"
|
|
else:
|
|
# custom or unknown environment
|
|
self.cuda_path = ""
|
|
self.cuda_bin = ""
|
|
self.cuda_lib = ""
|
|
self.cuda_version = ""
|
|
|
|
# env
|
|
self.path = os.getenv("PATH")
|
|
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
|
|
|
|
# check
|
|
self.path_check = "/cuda"
|
|
self.ld_lib_path_check = "/cuda"
|
|
self.v = "0"
|
|
|
|
def check_env(self, e):
|
|
self._check_env()
|
|
raise e
|
|
|
|
def set_env(self):
|
|
if not self.cuda_bin:
|
|
self._check_env()
|
|
return
|
|
|
|
if Path(self.cuda_bin).is_dir():
|
|
os.environ['PATH'] = self.cuda_bin + ":" + os.environ['PATH']
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.cuda_bin}, please check if cuda is installed correctly.")
|
|
|
|
def check_version(self):
|
|
if not Path(self.cuda_version).is_file():
|
|
logger.warning("Using custom cuda path, cuda version checking is skiped, please make sure "
|
|
"cuda version is supported, you can reference to the installation guidelines "
|
|
"https://www.mindspore.cn/install")
|
|
return
|
|
|
|
v = self._read_version(self.cuda_version)
|
|
v = version.parse(v)
|
|
v_str = str(v.major) + "." + str(v.minor)
|
|
if v_str not in self.version:
|
|
logger.warning(f"MindSpore version {__version__} and cuda version {v_str} does not match, "
|
|
"reference to the match info on: https://www.mindspore.cn/install")
|
|
|
|
def _check_env(self):
|
|
"""gpu cuda path check"""
|
|
if self.path is None or self.path_check not in self.path:
|
|
logger.warning("Can not find nvcc compiler(need by mindspore-gpu), please check if you have set env "
|
|
"PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
|
|
|
|
if self.ld_lib_path is None or self.ld_lib_path_check not in self.ld_lib_path:
|
|
logger.warning("Can not find cuda so(need by mindspore-gpu), please check if you have set env "
|
|
"LD_LIBRARY_PATH, you can reference to the installation guidelines "
|
|
"https://www.mindspore.cn/install")
|
|
|
|
def _read_version(self, file_path):
|
|
"""get gpu version info"""
|
|
with open(file_path, 'r') as f:
|
|
all_info = f.readlines()
|
|
for line in all_info:
|
|
if line.startswith("CUDA Version"):
|
|
self.v = line.strip().split("CUDA Version")[1]
|
|
return self.v
|
|
return self.v
|
|
|
|
|
|
class AscendEnvChecker(EnvChecker):
|
|
"""ascend environment check"""
|
|
|
|
def __init__(self):
|
|
self.version = ["1.76.T21.0.B210"]
|
|
atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
|
|
atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
|
|
hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info"
|
|
if os.path.exists(atlas_nnae_version):
|
|
# atlas default path
|
|
self.fwk_path = "/usr/local/Ascend/nnae/latest/fwkacllib"
|
|
self.op_impl_path = "/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe"
|
|
self.tbe_path = self.fwk_path + "/lib64"
|
|
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
|
|
self.fwk_version = atlas_nnae_version
|
|
self.op_path = "/usr/local/Ascend/nnae/latest/opp"
|
|
elif os.path.exists(atlas_toolkit_version):
|
|
# atlas default path
|
|
self.fwk_path = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib"
|
|
self.op_impl_path = "/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe"
|
|
self.tbe_path = self.fwk_path + "/lib64"
|
|
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
|
|
self.fwk_version = atlas_toolkit_version
|
|
self.op_path = "/usr/local/Ascend/ascend-toolkit/latest/opp"
|
|
elif os.path.exists(hisi_fwk_version):
|
|
# hisi default path
|
|
self.fwk_path = "/usr/local/Ascend/fwkacllib"
|
|
self.op_impl_path = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"
|
|
self.tbe_path = self.fwk_path + "/lib64"
|
|
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
|
|
self.fwk_version = hisi_fwk_version
|
|
self.op_path = "/usr/local/Ascend/opp"
|
|
else:
|
|
# custom or unknown environment
|
|
self.fwk_path = ""
|
|
self.op_impl_path = ""
|
|
self.tbe_path = ""
|
|
self.cce_path = ""
|
|
self.fwk_version = ""
|
|
self.op_path = ""
|
|
|
|
# env
|
|
self.path = os.getenv("PATH")
|
|
self.python_path = os.getenv("PYTHONPATH")
|
|
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
|
|
self.ascend_opp_path = os.getenv("ASCEND_OPP_PATH")
|
|
|
|
# check content
|
|
self.path_check = "/fwkacllib/ccec_compiler/bin"
|
|
self.python_path_check = "opp/op_impl/built-in/ai_core/tbe"
|
|
self.ld_lib_path_check_fwk = "/fwkacllib/lib64"
|
|
self.ld_lib_path_check_addons = "/add-ons"
|
|
self.ascend_opp_path_check = "/op"
|
|
self.v = ""
|
|
|
|
def check_env(self, e):
|
|
self._check_env()
|
|
raise e
|
|
|
|
def check_version(self):
|
|
if not Path(self.fwk_version).is_file():
|
|
logger.warning("Using custom Ascend 910 AI software package path, package version checking is skiped, "
|
|
"please make sure Ascend 910 AI software package version is supported, you can reference to "
|
|
"the installation guidelines https://www.mindspore.cn/install")
|
|
return
|
|
|
|
v = self._read_version(self.fwk_version)
|
|
if v not in self.version:
|
|
logger.warning(f"MindSpore version {__version__} and Ascend 910 AI software package version {v} does not "
|
|
"match, reference to the match info on: https://www.mindspore.cn/install")
|
|
|
|
def check_deps_version(self):
|
|
"""
|
|
te, topi, hccl wheel package version check
|
|
in order to update the change of 'LD_LIBRARY_PATH' env, run a sub process
|
|
"""
|
|
input_args = ["--mindspore_version=" + __version__]
|
|
for v in self.version:
|
|
input_args.append("--supported_version=" + v)
|
|
deps_version_checker = os.path.join(os.path.split(os.path.realpath(__file__))[0], "_check_deps_version.py")
|
|
call_cmd = [sys.executable, deps_version_checker] + input_args
|
|
try:
|
|
process = subprocess.run(call_cmd, timeout=3, text=True, capture_output=True, check=False)
|
|
if process.stdout.strip() != "":
|
|
logger.warning(process.stdout.strip())
|
|
except subprocess.TimeoutExpired:
|
|
logger.warning("Package te, topi, hccl version check timed out, skip.")
|
|
|
|
def set_env(self):
|
|
if not self.tbe_path:
|
|
self._check_env()
|
|
return
|
|
|
|
try:
|
|
# pylint: disable=unused-import
|
|
import te
|
|
except RuntimeError:
|
|
if Path(self.tbe_path).is_dir():
|
|
if os.getenv('LD_LIBRARY_PATH'):
|
|
os.environ['LD_LIBRARY_PATH'] = self.tbe_path + ":" + os.environ['LD_LIBRARY_PATH']
|
|
else:
|
|
os.environ['LD_LIBRARY_PATH'] = self.tbe_path
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.tbe_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
# check te version after set te env
|
|
self.check_deps_version()
|
|
|
|
if Path(self.op_impl_path).is_dir():
|
|
sys.path.append(self.op_impl_path)
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.op_impl_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
if Path(self.cce_path).is_dir():
|
|
os.environ['PATH'] = self.cce_path + ":" + os.environ['PATH']
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.cce_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
if self.op_path is None:
|
|
pass
|
|
elif Path(self.op_path).is_dir():
|
|
os.environ['ASCEND_OPP_PATH'] = self.op_path
|
|
else:
|
|
raise EnvironmentError(
|
|
f"No such directory: {self.op_path}, Please check if Ascend 910 AI software package is "
|
|
"installed correctly.")
|
|
|
|
def _check_env(self):
|
|
"""ascend dependence path check"""
|
|
if self.path is None or self.path_check not in self.path:
|
|
logger.warning("Can not find ccec_compiler(need by mindspore-ascend), please check if you have set env "
|
|
"PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
|
|
|
|
if self.python_path is None or self.python_path_check not in self.python_path:
|
|
logger.warning(
|
|
"Can not find tbe op implement(need by mindspore-ascend), please check if you have set env "
|
|
"PYTHONPATH, you can reference to the installation guidelines "
|
|
"https://www.mindspore.cn/install")
|
|
|
|
if self.ld_lib_path is None or not (self.ld_lib_path_check_fwk in self.ld_lib_path and
|
|
self.ld_lib_path_check_addons in self.ld_lib_path):
|
|
logger.warning("Can not find driver so(need by mindspore-ascend), please check if you have set env "
|
|
"LD_LIBRARY_PATH, you can reference to the installation guidelines "
|
|
"https://www.mindspore.cn/install")
|
|
|
|
if self.ascend_opp_path is None or self.ascend_opp_path_check not in self.ascend_opp_path:
|
|
logger.warning(
|
|
"Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, "
|
|
"you can reference to the installation guidelines https://www.mindspore.cn/install")
|
|
|
|
def _read_version(self, file_path):
|
|
"""get ascend version info"""
|
|
with open(file_path, 'r') as f:
|
|
all_info = f.readlines()
|
|
for line in all_info:
|
|
if line.startswith("Version="):
|
|
self.v = line.strip().split("=")[1]
|
|
return self.v
|
|
return self.v
|
|
|
|
def check_version_and_env_config():
|
|
"""check version and env config"""
|
|
if __package_name__.lower() == "mindspore-ascend":
|
|
env_checker = AscendEnvChecker()
|
|
elif __package_name__.lower() == "mindspore-gpu":
|
|
env_checker = GPUEnvChecker()
|
|
else:
|
|
logger.info(f"Package version {__package_name__} does not need to check any environment variable, skipping.")
|
|
return
|
|
|
|
try:
|
|
# pylint: disable=unused-import
|
|
from . import _c_expression
|
|
# check version of ascend site or cuda
|
|
env_checker.check_version()
|
|
|
|
env_checker.set_env()
|
|
except ImportError as e:
|
|
env_checker.check_env(e)
|
|
|
|
|
|
def _set_pb_env():
|
|
"""Set env variable `PROTOCOL_BUFFERS` to prevent memory overflow."""
|
|
if os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") == "cpp":
|
|
logger.info("Current env variable `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp`. "
|
|
"When the checkpoint file is too large, "
|
|
"it may cause memory limit error durning load checkpoint file. "
|
|
"This can be solved by set env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`.")
|
|
elif os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") is None:
|
|
logger.info("Setting the env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` to prevent memory overflow "
|
|
"during save or load checkpoint file.")
|
|
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
|
|
|
|
|
check_version_and_env_config()
|
|
_set_pb_env()
|