You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mindspore/mindspore/_check_version.py

378 lines
16 KiB

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""version and config check"""
import os
import sys
import subprocess
from pathlib import Path
from abc import abstractmethod, ABCMeta
import numpy as np
from packaging import version
from . import log as logger
from .version import __version__
from .default_config import __package_name__
class EnvChecker(metaclass=ABCMeta):
"""basic class for environment check"""
@abstractmethod
def check_env(self, e):
pass
@abstractmethod
def set_env(self):
pass
@abstractmethod
def check_version(self):
pass
class GPUEnvChecker(EnvChecker):
"""GPU environment check."""
def __init__(self):
self.version = ["10.1"]
# env
self.path = os.getenv("PATH")
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
# check
self.v = "0"
self.cuda_lib_path = self._get_lib_path("libcu")
self.cuda_bin_path = self._get_bin_path("cuda")
def check_env(self, e):
raise e
def set_env(self):
return
def _get_bin_path(self, bin_name):
"""Get bin path by bin name."""
if bin_name == "cuda":
return self._get_cuda_bin_path()
return []
def _get_cuda_bin_path(self):
"""Get cuda bin path by lib path."""
path_list = []
for path in self.cuda_lib_path:
path = os.path.abspath(path.strip()+"/bin/")
if Path(path).is_dir():
path_list.append(path)
return np.unique(path_list)
def _get_nvcc_version(self, is_set_env):
"""Get cuda version by nvcc command."""
nvcc_result = subprocess.run(["nvcc --version | grep release"],
timeout=3, text=True, capture_output=True, check=False, shell=True)
if nvcc_result.returncode:
if not is_set_env:
for path in self.cuda_bin_path:
if Path(path + "/nvcc").is_file():
os.environ['PATH'] = path + ":" + os.environ['PATH']
return self._get_nvcc_version(True)
return ""
result = nvcc_result.stdout
for line in result.split('\n'):
if line:
return line.strip().split("release")[1].split(",")[0].strip()
return ""
def check_version(self):
"""Check cuda version."""
version_match = False
for path in self.cuda_lib_path:
version_file = path + "/version.txt"
if not Path(version_file).is_file():
continue
if self._check_version(version_file):
version_match = True
break
if not version_match:
if self.v == "0":
logger.warning("Cuda version file version.txt is not found, please confirm that the correct "
"cuda version has been installed, you can refer to the "
"installation guidelines: https://www.mindspore.cn/install")
else:
logger.warning(f"MindSpore version {__version__} and cuda version {self.v} does not match, "
"please refer to the installation guide for version matching "
"information: https://www.mindspore.cn/install")
nvcc_version = self._get_nvcc_version(False)
if nvcc_version and (nvcc_version not in self.version):
logger.warning(f"MindSpore version {__version__} and nvcc(cuda bin) version {nvcc_version} "
"does not match, please refer to the installation guide for version matching "
"information: https://www.mindspore.cn/install")
def _check_version(self, version_file):
"""Check cuda version by version.txt."""
v = self._read_version(version_file)
v = version.parse(v)
v_str = str(v.major) + "." + str(v.minor)
if v_str not in self.version:
return False
return True
def _get_lib_path(self, lib_name):
"""Get gpu lib path by ldd command."""
path_list = []
current_path = os.path.split(os.path.realpath(__file__))[0]
ldd_result = subprocess.run(["ldd " + current_path + "/_c_expression*.so* | grep " + lib_name],
timeout=3, text=True, capture_output=True, check=False, shell=True)
if ldd_result.returncode:
logger.warning(f"{lib_name} so(need by mndspore-gpu) is not found, please confirm that "
f"_c_experssion.so depend on {lib_name}, "
f"and _c_expression.so in directory:{current_path}")
return path_list
result = ldd_result.stdout
for i in result.split('\n'):
path = i.partition("=>")[2]
if path.lower().find("not found") > 0:
logger.warning(f"Cuda {self.version} version(need by mindspore-gpu) is not found, please confirm "
"that the path of cuda is set to the env LD_LIBRARY_PATH, please refer to the "
"installation guidelines: https://www.mindspore.cn/install")
continue
path = path.partition(lib_name)[0]
if path:
path_list.append(os.path.abspath(path.strip() + "../"))
return np.unique(path_list)
def _read_version(self, file_path):
"""Get gpu version info in version.txt."""
with open(file_path, 'r') as f:
all_info = f.readlines()
for line in all_info:
if line.startswith("CUDA Version"):
self.v = line.strip().split("CUDA Version")[1]
return self.v
return self.v
class AscendEnvChecker(EnvChecker):
"""ascend environment check"""
def __init__(self):
self.version = ["1.77.22.1.220"]
atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info"
if os.path.exists(atlas_nnae_version):
# atlas default path
self.fwk_path = "/usr/local/Ascend/nnae/latest/fwkacllib"
self.op_impl_path = "/usr/local/Ascend/nnae/latest/opp/op_impl/built-in/ai_core/tbe"
self.tbe_path = self.fwk_path + "/lib64"
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
self.fwk_version = atlas_nnae_version
self.op_path = "/usr/local/Ascend/nnae/latest/opp"
elif os.path.exists(atlas_toolkit_version):
# atlas default path
self.fwk_path = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib"
self.op_impl_path = "/usr/local/Ascend/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe"
self.tbe_path = self.fwk_path + "/lib64"
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
self.fwk_version = atlas_toolkit_version
self.op_path = "/usr/local/Ascend/ascend-toolkit/latest/opp"
elif os.path.exists(hisi_fwk_version):
# hisi default path
self.fwk_path = "/usr/local/Ascend/fwkacllib"
self.op_impl_path = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe"
self.tbe_path = self.fwk_path + "/lib64"
self.cce_path = self.fwk_path + "/ccec_compiler/bin"
self.fwk_version = hisi_fwk_version
self.op_path = "/usr/local/Ascend/opp"
else:
# custom or unknown environment
self.fwk_path = ""
self.op_impl_path = ""
self.tbe_path = ""
self.cce_path = ""
self.fwk_version = ""
self.op_path = ""
# env
self.path = os.getenv("PATH")
self.python_path = os.getenv("PYTHONPATH")
self.ld_lib_path = os.getenv("LD_LIBRARY_PATH")
self.ascend_opp_path = os.getenv("ASCEND_OPP_PATH")
# check content
self.path_check = "/fwkacllib/ccec_compiler/bin"
self.python_path_check = "opp/op_impl/built-in/ai_core/tbe"
self.ld_lib_path_check_fwk = "/fwkacllib/lib64"
self.ld_lib_path_check_addons = "/add-ons"
self.ascend_opp_path_check = "/op"
self.v = ""
def check_env(self, e):
self._check_env()
raise e
def check_version(self):
if not Path(self.fwk_version).is_file():
logger.warning("Using custom Ascend 910 AI software package path, package version checking is skipped, "
"please make sure Ascend 910 AI software package version is supported, you can reference to "
"the installation guidelines https://www.mindspore.cn/install")
return
v = self._read_version(self.fwk_version)
if v not in self.version:
v_list = str([x for x in self.version])
logger.warning(f"MindSpore version {__version__} and Ascend 910 AI software package version {v} does not "
f"match, the version of software package expect one of {v_list}, "
"please reference to the match info on: https://www.mindspore.cn/install")
def check_deps_version(self):
"""
te, topi, hccl wheel package version check
in order to update the change of 'LD_LIBRARY_PATH' env, run a sub process
"""
input_args = ["--mindspore_version=" + __version__]
for v in self.version:
input_args.append("--supported_version=" + v)
deps_version_checker = os.path.join(os.path.split(os.path.realpath(__file__))[0], "_check_deps_version.py")
call_cmd = [sys.executable, deps_version_checker] + input_args
try:
process = subprocess.run(call_cmd, timeout=3, text=True, capture_output=True, check=False)
if process.stdout.strip() != "":
logger.warning(process.stdout.strip())
except subprocess.TimeoutExpired:
logger.info("Package te, topi, hccl version check timed out, skip.")
def set_env(self):
if not self.tbe_path:
self._check_env()
return
try:
# pylint: disable=unused-import
import te
# pylint: disable=broad-except
except Exception:
if Path(self.tbe_path).is_dir():
if os.getenv('LD_LIBRARY_PATH'):
os.environ['LD_LIBRARY_PATH'] = self.tbe_path + ":" + os.environ['LD_LIBRARY_PATH']
else:
os.environ['LD_LIBRARY_PATH'] = self.tbe_path
else:
raise EnvironmentError(
f"No such directory: {self.tbe_path}, Please check if Ascend 910 AI software package is "
"installed correctly.")
# check te version after set te env
self.check_deps_version()
if Path(self.op_impl_path).is_dir():
# python path for sub process
if os.getenv('PYTHONPATH'):
os.environ['PYTHONPATH'] = self.op_impl_path + ":" + os.environ['PYTHONPATH']
else:
os.environ['PYTHONPATH'] = self.op_impl_path
# sys path for this process
sys.path.append(self.op_impl_path)
os.environ['TBE_IMPL_PATH'] = self.op_impl_path
else:
raise EnvironmentError(
f"No such directory: {self.op_impl_path}, Please check if Ascend 910 AI software package is "
"installed correctly.")
if Path(self.cce_path).is_dir():
os.environ['PATH'] = self.cce_path + ":" + os.environ['PATH']
else:
raise EnvironmentError(
f"No such directory: {self.cce_path}, Please check if Ascend 910 AI software package is "
"installed correctly.")
if self.op_path is None:
pass
elif Path(self.op_path).is_dir():
os.environ['ASCEND_OPP_PATH'] = self.op_path
else:
raise EnvironmentError(
f"No such directory: {self.op_path}, Please check if Ascend 910 AI software package is "
"installed correctly.")
def _check_env(self):
"""ascend dependence path check"""
if self.path is None or self.path_check not in self.path:
logger.warning("Can not find ccec_compiler(need by mindspore-ascend), please check if you have set env "
"PATH, you can reference to the installation guidelines https://www.mindspore.cn/install")
if self.python_path is None or self.python_path_check not in self.python_path:
logger.warning(
"Can not find tbe op implement(need by mindspore-ascend), please check if you have set env "
"PYTHONPATH, you can reference to the installation guidelines "
"https://www.mindspore.cn/install")
if self.ld_lib_path is None or not (self.ld_lib_path_check_fwk in self.ld_lib_path and
self.ld_lib_path_check_addons in self.ld_lib_path):
logger.warning("Can not find driver so(need by mindspore-ascend), please check if you have set env "
"LD_LIBRARY_PATH, you can reference to the installation guidelines "
"https://www.mindspore.cn/install")
if self.ascend_opp_path is None or self.ascend_opp_path_check not in self.ascend_opp_path:
logger.warning(
"Can not find opp path (need by mindspore-ascend), please check if you have set env ASCEND_OPP_PATH, "
"you can reference to the installation guidelines https://www.mindspore.cn/install")
def _read_version(self, file_path):
"""get ascend version info"""
with open(file_path, 'r') as f:
all_info = f.readlines()
for line in all_info:
if line.startswith("Version="):
self.v = line.strip().split("=")[1]
return self.v
return self.v
def check_version_and_env_config():
"""check version and env config"""
if __package_name__.lower() == "mindspore-ascend":
env_checker = AscendEnvChecker()
elif __package_name__.lower() == "mindspore-gpu":
env_checker = GPUEnvChecker()
else:
logger.info(f"Package version {__package_name__} does not need to check any environment variable, skipping.")
return
try:
# pylint: disable=unused-import
from . import _c_expression
# check version of ascend site or cuda
env_checker.check_version()
env_checker.set_env()
except ImportError as e:
env_checker.check_env(e)
def _set_pb_env():
"""Set env variable `PROTOCOL_BUFFERS` to prevent memory overflow."""
if os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") == "cpp":
logger.info("Current env variable `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=cpp`. "
"When the checkpoint file is too large, "
"it may cause memory limit error during load checkpoint file. "
"This can be solved by set env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python`.")
elif os.getenv("PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION") is None:
logger.info("Setting the env `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python` to prevent memory overflow "
"during save or load checkpoint file.")
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
check_version_and_env_config()
_set_pb_env()