You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/python/paddle/fluid/core.py

353 lines
14 KiB

# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import site
import sys
import os
import warnings
import platform
core_suffix = 'so'
if os.name == 'nt':
core_suffix = 'pyd'
has_avx_core = False
has_noavx_core = False
current_path = os.path.abspath(os.path.dirname(__file__))
if os.path.exists(current_path + os.sep + 'core_avx.' + core_suffix):
has_avx_core = True
if os.path.exists(current_path + os.sep + 'core_noavx.' + core_suffix):
has_noavx_core = True
try:
if os.name == 'nt':
third_lib_path = current_path + os.sep + '..' + os.sep + 'libs'
os.environ['path'] = third_lib_path + ';' + os.environ['path']
sys.path.insert(0, third_lib_path)
# Note: from python3.8, PATH will not take effect
# https://github.com/python/cpython/pull/12302
# Use add_dll_directory to specify dll resolution path
if sys.version_info[:2] >= (3, 8):
os.add_dll_directory(third_lib_path)
except ImportError as e:
from .. import compat as cpt
if os.name == 'nt':
executable_path = os.path.abspath(os.path.dirname(sys.executable))
raise ImportError(
"""NOTE: You may need to run \"set PATH=%s;%%PATH%%\"
if you encounters \"DLL load failed\" errors. If you have python
installed in other directory, replace \"%s\" with your own
directory. The original error is: \n %s""" %
(executable_path, executable_path, cpt.get_exception_message(e)))
else:
raise ImportError(
"""NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
if you encounters \"libmkldnn.so not found\" errors. If you have python
installed in other directory, replace \"/usr/local/lib\" with your own
directory. The original error is: \n""" + cpt.get_exception_message(e))
except Exception as e:
raise e
def avx_supported():
"""
Whether current system(Linux, MacOS, Windows) is supported with AVX.
"""
from .. import compat as cpt
sysstr = platform.system().lower()
has_avx = False
if sysstr == 'linux':
try:
has_avx = os.popen('cat /proc/cpuinfo | grep -i avx').read() != ''
except Exception as e:
sys.stderr.write('Can not get the AVX flag from /proc/cpuinfo.\n'
'The original error is: %s\n' %
cpt.get_exception_message(e))
return has_avx
elif sysstr == 'darwin':
try:
has_avx = os.popen(
'sysctl machdep.cpu.features | grep -i avx').read() != ''
except Exception as e:
sys.stderr.write(
'Can not get the AVX flag from machdep.cpu.features.\n'
'The original error is: %s\n' % cpt.get_exception_message(e))
if not has_avx:
try:
has_avx = os.popen(
'sysctl machdep.cpu.leaf7_features | grep -i avx').read(
) != ''
except Exception as e:
sys.stderr.write(
'Can not get the AVX flag from machdep.cpu.leaf7_features.\n'
'The original error is: %s\n' %
cpt.get_exception_message(e))
return has_avx
elif sysstr == 'windows':
import ctypes
ONE_PAGE = ctypes.c_size_t(0x1000)
def asm_func(code_str, restype=ctypes.c_uint32, argtypes=()):
# Call the code_str as a function
# Alloc 1 page to ensure the protection
pfnVirtualAlloc = ctypes.windll.kernel32.VirtualAlloc
pfnVirtualAlloc.restype = ctypes.c_void_p
MEM_COMMIT = ctypes.c_ulong(0x1000)
PAGE_READWRITE = ctypes.c_ulong(0x4)
address = pfnVirtualAlloc(None, ONE_PAGE, MEM_COMMIT,
PAGE_READWRITE)
if not address:
raise Exception("Failed to VirtualAlloc")
# Copy the code into the memory segment
memmove = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p,
ctypes.c_void_p,
ctypes.c_size_t)(ctypes._memmove_addr)
if memmove(address, code_str, len(code_str)) < 0:
raise Exception("Failed to memmove")
# Enable execute permissions
PAGE_EXECUTE = ctypes.c_ulong(0x10)
pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect
res = pfnVirtualProtect(
ctypes.c_void_p(address), ONE_PAGE, PAGE_EXECUTE,
ctypes.byref(ctypes.c_ulong(0)))
if not res:
raise Exception("Failed VirtualProtect")
# Flush instruction cache
pfnGetCurrentProcess = ctypes.windll.kernel32.GetCurrentProcess
pfnGetCurrentProcess.restype = ctypes.c_void_p
prochandle = ctypes.c_void_p(pfnGetCurrentProcess())
res = ctypes.windll.kernel32.FlushInstructionCache(
prochandle, ctypes.c_void_p(address), ONE_PAGE)
if not res:
raise Exception("Failed FlushInstructionCache")
# Cast the memory to function
functype = ctypes.CFUNCTYPE(restype, *argtypes)
func = functype(address)
return func, address
# http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits
# mov eax,0x1; cpuid; mov cx, ax; ret
code_str = b"\xB8\x01\x00\x00\x00\x0f\xa2\x89\xC8\xC3"
avx_bit = 28
retval = 0
try:
# Convert the code_str into a function that returns uint
func, address = asm_func(code_str)
retval = func()
ctypes.windll.kernel32.VirtualFree(
ctypes.c_void_p(address), ctypes.c_size_t(0), ONE_PAGE)
except Exception as e:
sys.stderr.write('Failed getting the AVX flag on Windows.\n'
'The original error is: %s\n' %
cpt.get_exception_message(e))
return (retval & (1 << avx_bit)) > 0
else:
sys.stderr.write('Do not get AVX flag on %s\n' % sysstr)
return False
def run_shell_command(cmd):
import subprocess
out, err = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True).communicate()
if err:
return None
else:
return out.decode('utf-8').strip()
def get_dso_path(core_so, dso_name):
if core_so and dso_name:
return run_shell_command("ldd %s|grep %s|awk '{print $3}'" %
(core_so, dso_name))
else:
return None
def load_dso(dso_absolute_path):
if dso_absolute_path:
try:
from ctypes import cdll
cdll.LoadLibrary(dso_absolute_path)
except:
warnings.warn("Load {} failed".format(dso_absolute_path))
def pre_load(dso_name):
if has_avx_core:
core_so = current_path + os.sep + 'core_avx.' + core_suffix
elif has_noavx_core:
core_so = current_path + os.sep + 'core_noavx.' + core_suffix
else:
core_so = None
dso_path = get_dso_path(core_so, dso_name)
load_dso(dso_path)
def get_libc_ver():
ldd_glibc = run_shell_command("ldd --version | awk '/ldd/{print $NF}'")
if ldd_glibc is not None:
return ("glibc", ldd_glibc)
ldd_musl = run_shell_command("ldd 2>&1 | awk '/Version/{print $NF}'")
if ldd_musl is not None:
return ("musl", ldd_musl)
return (None, None)
def less_than_ver(a, b):
if a is None or b is None:
return False
import re
import operator
def to_list(s):
s = re.sub('(\.0+)+$', '', s)
return [int(x) for x in s.split('.')]
return operator.lt(to_list(a), to_list(b))
# NOTE(zhiqiu): An error may occurs when import paddle in linux platform with glibc < 2.22,
# the error message of which is "dlopen: cannot load any more object with static TLS".
# This happens when:
# (1) the number of dynamic shared librarys (DSO) loaded > 14,
# (2) after that, load a dynamic shared library (DSO) with static TLS.
# For paddle, the problem is that 'libgomp' is a DSO with static TLS, and it is loaded after 14 DSOs.
# So, here is a tricky way to solve the problem by pre load 'libgomp' before 'core_avx.so'.
# The final solution is to upgrade glibc to > 2.22 on the target system.
if platform.system().lower() == 'linux':
libc_type, libc_ver = get_libc_ver()
if libc_type == 'glibc' and less_than_ver(libc_ver, '2.23'):
try:
pre_load('libgomp')
except Exception as e:
# NOTE(zhiqiu): do not abort if failed, since it may success when import core_avx.so
sys.stderr.write('Error: Can not preload libgomp.so')
load_noavx = False
if avx_supported():
try:
from .core_avx import *
from .core_avx import __doc__, __file__, __name__, __package__
from .core_avx import __unittest_throw_exception__
from .core_avx import _append_python_callable_object_and_return_id
from .core_avx import _cleanup, _Scope
from .core_avx import _get_use_default_grad_op_desc_maker_ops
from .core_avx import _get_all_register_op_kernels
from .core_avx import _is_program_version_supported
from .core_avx import _set_eager_deletion_mode
from .core_avx import _set_fuse_parameter_group_size
from .core_avx import _set_fuse_parameter_memory_size
from .core_avx import _is_dygraph_debug_enabled
from .core_avx import _dygraph_debug_level
from .core_avx import _switch_tracer
from .core_avx import _set_paddle_lib_path
from .core_avx import _save_static_dict
from .core_avx import _load_static_dict
from .core_avx import _save_dygraph_dict
from .core_avx import _load_dygraph_dict
from .core_avx import _create_loaded_parameter
from .core_avx import _cuda_synchronize
if sys.platform != 'win32':
from .core_avx import _set_process_pids
from .core_avx import _erase_process_pids
from .core_avx import _set_process_signal_handler
from .core_avx import _throw_error_if_process_failed
from .core_avx import _convert_to_tensor_list
from .core_avx import _cleanup_mmap_fds
from .core_avx import _remove_tensor_list_mmap_fds
except Exception as e:
if has_avx_core:
raise e
else:
from .. import compat as cpt
sys.stderr.write(
'WARNING: Do not have avx core. You may not build with AVX, '
'but AVX is supported on local machine.\n You could build paddle '
'WITH_AVX=ON to get better performance.\n'
'The original error is: %s\n' % cpt.get_exception_message(e))
load_noavx = True
else:
load_noavx = True
if load_noavx:
try:
from .core_noavx import *
from .core_noavx import __doc__, __file__, __name__, __package__
from .core_noavx import __unittest_throw_exception__
from .core_noavx import _append_python_callable_object_and_return_id
from .core_noavx import _cleanup, _Scope
from .core_noavx import _get_use_default_grad_op_desc_maker_ops
from .core_noavx import _get_all_register_op_kernels
from .core_noavx import _is_program_version_supported
from .core_noavx import _set_eager_deletion_mode
from .core_noavx import _set_fuse_parameter_group_size
from .core_noavx import _set_fuse_parameter_memory_size
from .core_noavx import _is_dygraph_debug_enabled
from .core_noavx import _dygraph_debug_level
from .core_noavx import _switch_tracer
from .core_noavx import _set_paddle_lib_path
from .core_noavx import _save_static_dict
from .core_noavx import _load_static_dict
from .core_noavx import _save_dygraph_dict
from .core_noavx import _load_dygraph_dict
from .core_noavx import _create_loaded_parameter
from .core_noavx import _cuda_synchronize
if sys.platform != 'win32':
from .core_noavx import _set_process_pids
from .core_noavx import _erase_process_pids
from .core_noavx import _set_process_signal_handler
from .core_noavx import _throw_error_if_process_failed
from .core_noavx import _convert_to_tensor_list
from .core_noavx import _cleanup_mmap_fds
from .core_noavx import _remove_tensor_list_mmap_fds
except Exception as e:
if has_noavx_core:
sys.stderr.write(
'Error: Can not import noavx core while this file exists ' +
current_path + os.sep + 'core_noavx.' + core_suffix + '\n')
raise e
# set paddle lib path
def set_paddle_lib_path():
site_dirs = site.getsitepackages() if hasattr(
site,
'getsitepackages') else [x for x in sys.path if 'site-packages' in x]
for site_dir in site_dirs:
lib_dir = os.path.sep.join([site_dir, 'paddle', 'libs'])
if os.path.exists(lib_dir):
_set_paddle_lib_path(lib_dir)
return
if hasattr(site, 'USER_SITE'):
lib_dir = os.path.sep.join([site.USER_SITE, 'paddle', 'libs'])
if os.path.exists(lib_dir):
_set_paddle_lib_path(lib_dir)
set_paddle_lib_path()