You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/python/paddle/utils/cpp_extension/cpp_extension.py

580 lines
24 KiB

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import six
import sys
import textwrap
import copy
import re
import setuptools
from setuptools.command.easy_install import easy_install
from setuptools.command.build_ext import build_ext
from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags, add_std_without_repeat, get_build_directory
from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from
from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS, OS_NAME
from .extension_utils import use_new_custom_op_load_method, MSVC_COMPILE_FLAGS
# Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default,
# The solution is: 1.User add function PyInit_[name] 2. set not to export
# refer to https://stackoverflow.com/questions/34689210/error-exporting-symbol-when-building-python-c-extension-in-windows
if IS_WINDOWS and six.PY3:
from distutils.command.build_ext import build_ext as _du_build_ext
from unittest.mock import Mock
_du_build_ext.get_export_symbols = Mock(return_value=None)
CUDA_HOME = find_cuda_home()
def setup(**attr):
"""
Wrapper setuptools.setup function to valid `build_ext` command and
implement paddle api code injection by switching `write_stub`
function in bdist_egg with `custom_write_stub`.
Its usage is almost same as `setuptools.setup` except for `ext_modules`
arguments. For compiling multi custom operators, all necessary source files
can be include into just one Extension (CppExtension/CUDAExtension).
Moreover, only one `name` argument is required in `setup` and no need to spcific
`name` in Extension.
Example:
>> from paddle.utils.cpp_extension import CUDAExtension, setup
>> setup(name='custom_module',
ext_modules=CUDAExtension(
sources=['relu_op.cc', 'relu_op.cu'],
include_dirs=[], # specific user-defined include dirs
extra_compile_args=[]) # specific user-defined compil arguments.
"""
cmdclass = attr.get('cmdclass', {})
assert isinstance(cmdclass, dict)
# if not specific cmdclass in setup, add it automaticaly.
if 'build_ext' not in cmdclass:
cmdclass['build_ext'] = BuildExtension.with_options(
no_python_abi_suffix=True)
attr['cmdclass'] = cmdclass
error_msg = """
Required to specific `name` argument in paddle.utils.cpp_extension.setup.
It's used as `import XXX` when you want install and import your custom operators.\n
For Example:
# setup.py file
from paddle.utils.cpp_extension import CUDAExtension, setup
setup(name='custom_module',
ext_modules=CUDAExtension(
sources=['relu_op.cc', 'relu_op.cu'])
# After running `python setup.py install`
from custom_module import relue
"""
# name argument is required
if 'name' not in attr:
raise ValueError(error_msg)
ext_modules = attr.get('ext_modules', [])
if not isinstance(ext_modules, list):
ext_modules = [ext_modules]
assert len(
ext_modules
) == 1, "Required only one Extension, but received {}. If you want to compile multi operators, you can include all necessary source files in one Extenion.".format(
len(ext_modules))
# replace Extension.name with attr['name] to keep consistant with Package name.
for ext_module in ext_modules:
ext_module.name = attr['name']
attr['ext_modules'] = ext_modules
# Add rename .so hook in easy_install
assert 'easy_install' not in cmdclass
cmdclass['easy_install'] = EasyInstallCommand
# Always set zip_safe=False to make compatible in PY2 and PY3
# See http://peak.telecommunity.com/DevCenter/setuptools#setting-the-zip-safe-flag
attr['zip_safe'] = False
# switch `write_stub` to inject paddle api in .egg
with bootstrap_context():
setuptools.setup(**attr)
def CppExtension(sources, *args, **kwargs):
"""
Returns setuptools.CppExtension instance for setup.py to make it easy
to specify compile flags while building C++ custommed op kernel.
Args:
sources(list[str]): The C++/CUDA source file names
args(list[options]): list of config options used to compile shared library
kwargs(dict[option]): dict of config options used to compile shared library
Returns:
Extension: An instance of setuptools.Extension
"""
kwargs = normalize_extension_kwargs(kwargs, use_cuda=False)
# Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
# be replaced as `setup.name` to keep consistant with package. Because we allow
# users can not specific name in Extension.
# See `paddle.utils.cpp_extension.setup` for details.
name = kwargs.get('name', None)
if name is None:
name = _generate_extension_name(sources)
return setuptools.Extension(name, sources, *args, **kwargs)
def CUDAExtension(sources, *args, **kwargs):
"""
Returns setuptools.CppExtension instance for setup.py to make it easy
to specify compile flags while build CUDA custommed op kernel.
Args:
sources(list[str]): The C++/CUDA source file names
args(list[options]): list of config options used to compile shared library
kwargs(dict[option]): dict of config options used to compile shared library
Returns:
Extension: An instance of setuptools.Extension
"""
kwargs = normalize_extension_kwargs(kwargs, use_cuda=True)
# Note(Aurelius84): While using `setup` and `jit`, the Extension `name` will
# be replaced as `setup.name` to keep consistant with package. Because we allow
# users can not specific name in Extension.
# See `paddle.utils.cpp_extension.setup` for details.
name = kwargs.get('name', None)
if name is None:
name = _generate_extension_name(sources)
return setuptools.Extension(name, sources, *args, **kwargs)
def _generate_extension_name(sources):
"""
Generate extension name by source files.
"""
assert len(sources) > 0, "source files is empty"
file_prefix = []
for source in sources:
source = os.path.basename(source)
filename, _ = os.path.splitext(source)
# Use list to generate same order.
if filename not in file_prefix:
file_prefix.append(filename)
return '_'.join(file_prefix)
class BuildExtension(build_ext, object):
"""
Inherited from setuptools.command.build_ext to customize how to apply
compilation process with share library.
"""
@classmethod
def with_options(cls, **options):
"""
Returns a BuildExtension subclass containing use-defined options.
"""
class cls_with_options(cls):
def __init__(self, *args, **kwargs):
kwargs.update(options)
cls.__init__(self, *args, **kwargs)
return cls_with_options
def __init__(self, *args, **kwargs):
"""
Attributes is initialized with following oreder:
1. super(self).__init__()
2. initialize_options(self)
3. the reset of current __init__()
4. finalize_options(self)
So, it is recommended to set attribute value in `finalize_options`.
"""
super(BuildExtension, self).__init__(*args, **kwargs)
self.no_python_abi_suffix = kwargs.get("no_python_abi_suffix", True)
self.output_dir = kwargs.get("output_dir", None)
# for compatible two custom op define method
use_new_custom_op_load_method(
kwargs.get("use_new_method", use_new_custom_op_load_method()))
def initialize_options(self):
super(BuildExtension, self).initialize_options()
def finalize_options(self):
super(BuildExtension, self).finalize_options()
# NOTE(Aurelius84): Set location of compiled shared library.
# Carefully to modify this because `setup.py build/install`
# and `load` interface rely on this attribute.
if self.output_dir is not None:
self.build_lib = self.output_dir
def build_extensions(self):
self._check_abi()
for extension in self.extensions:
# check settings of compiler
if isinstance(extension.extra_compile_args, dict):
for compiler in ['cxx', 'nvcc']:
if compiler not in extension.extra_compile_args:
extension.extra_compile_args[compiler] = []
# Consider .cu, .cu.cc as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cu.cc']
# Save the original _compile method for later.
if self.compiler.compiler_type == 'msvc':
self.compiler._cpp_extensions += ['.cu', '.cuh']
original_compile = self.compiler.compile
original_spawn = self.compiler.spawn
else:
# add determine compile flags
add_compile_flag(extension, '-std=c++11')
original_compile = self.compiler._compile
def unix_custom_single_compiler(obj, src, ext, cc_args, extra_postargs,
pp_opts):
"""
Monkey patch machanism to replace inner compiler to custom complie process on Unix platform.
"""
# use abspath to ensure no warning and don't remove deecopy because modify params
# with dict type is dangerous.
src = os.path.abspath(src)
cflags = copy.deepcopy(extra_postargs)
try:
original_compiler = self.compiler.compiler_so
# ncvv compile CUDA source
if is_cuda_file(src):
assert CUDA_HOME is not None
nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
self.compiler.set_executable('compiler_so', nvcc_cmd)
# {'nvcc': {}, 'cxx: {}}
if isinstance(cflags, dict):
cflags = cflags['nvcc']
else:
cflags = prepare_unix_cudaflags(cflags)
# cxx compile Cpp source
elif isinstance(cflags, dict):
cflags = cflags['cxx']
add_std_without_repeat(
cflags, self.compiler.compiler_type, use_std14=False)
original_compile(obj, src, ext, cc_args, cflags, pp_opts)
finally:
# restore original_compiler
self.compiler.compiler_so = original_compiler
def win_custom_single_compiler(sources,
output_dir=None,
macros=None,
include_dirs=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
depends=None):
self.cflags = copy.deepcopy(extra_postargs)
extra_postargs = None
def win_custom_spawn(cmd):
# Using regex to modify compile options
compile_options = self.compiler.compile_options
for i in range(len(cmd)):
if re.search('/MD', cmd[i]) is not None:
cmd[i] = '/MT'
if re.search('/W[1-4]', cmd[i]) is not None:
cmd[i] = '/W0'
# Using regex to match src, obj and include files
src_regex = re.compile('/T(p|c)(.*)')
src_list = [
m.group(2) for m in (src_regex.match(elem) for elem in cmd)
if m
]
obj_regex = re.compile('/Fo(.*)')
obj_list = [
m.group(1) for m in (obj_regex.match(elem) for elem in cmd)
if m
]
include_regex = re.compile(r'((\-|\/)I.*)')
include_list = [
m.group(1)
for m in (include_regex.match(elem) for elem in cmd) if m
]
assert len(src_list) == 1 and len(obj_list) == 1
src = src_list[0]
obj = obj_list[0]
if is_cuda_file(src):
assert CUDA_HOME is not None
nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
if isinstance(self.cflags, dict):
cflags = self.cflags['nvcc']
elif isinstance(self.cflags, list):
cflags = self.cflags
else:
cflags = []
cflags = prepare_win_cudaflags(cflags) + ['--use-local-env']
for flag in MSVC_COMPILE_FLAGS:
cflags = ['-Xcompiler', flag] + cflags
cmd = [nvcc_cmd, '-c', src, '-o', obj
] + include_list + cflags
elif isinstance(self.cflags, dict):
cflags = MSVC_COMPILE_FLAGS + self.cflags['cxx']
cmd += cflags
elif isinstance(self.cflags, list):
cflags = MSVC_COMPILE_FLAGS + self.cflags
cmd += cflags
return original_spawn(cmd)
try:
self.compiler.spawn = win_custom_spawn
return original_compile(sources, output_dir, macros,
include_dirs, debug, extra_preargs,
extra_postargs, depends)
finally:
self.compiler.spawn = original_spawn
def object_filenames_with_cuda(origina_func, build_directory):
"""
Decorated the function to add customized naming machanism.
Originally, both .cc/.cu will have .o object output that will
bring file override problem. Use .cu.o as CUDA object suffix.
"""
def wrapper(source_filenames, strip_dir=0, output_dir=''):
try:
objects = origina_func(source_filenames, strip_dir,
output_dir)
for i, source in enumerate(source_filenames):
# modify xx.o -> xx.cu.o/xx.cu.obj
if is_cuda_file(source):
old_obj = objects[i]
if self.compiler.compiler_type == 'msvc':
objects[i] = old_obj[:-3] + 'cu.obj'
else:
objects[i] = old_obj[:-1] + 'cu.o'
# if user set build_directory, output objects there.
if build_directory is not None:
objects = [
os.path.join(build_directory, os.path.basename(obj))
for obj in objects
]
# ensure to use abspath
objects = [os.path.abspath(obj) for obj in objects]
finally:
self.compiler.object_filenames = origina_func
return objects
return wrapper
# customized compile process
if self.compiler.compiler_type == 'msvc':
self.compiler.compile = win_custom_single_compiler
else:
self.compiler._compile = unix_custom_single_compiler
self.compiler.object_filenames = object_filenames_with_cuda(
self.compiler.object_filenames, self.build_lib)
self._record_op_info()
print("Compiling user custom op, it will cost a few seconds.....")
build_ext.build_extensions(self)
def get_ext_filename(self, fullname):
# for example: custommed_extension.cpython-37m-x86_64-linux-gnu.so
ext_name = super(BuildExtension, self).get_ext_filename(fullname)
if self.no_python_abi_suffix and six.PY3:
split_str = '.'
name_items = ext_name.split(split_str)
assert len(
name_items
) > 2, "Expected len(name_items) > 2, but received {}".format(
len(name_items))
name_items.pop(-2)
# custommed_extension.so
ext_name = split_str.join(name_items)
return ext_name
def _check_abi(self):
"""
Check ABI Compatibility.
"""
if hasattr(self.compiler, 'compiler_cxx'):
compiler = self.compiler.compiler_cxx[0]
elif IS_WINDOWS:
compiler = os.environ.get('CXX', 'cl')
else:
compiler = os.environ.get('CXX', 'c++')
check_abi_compatibility(compiler)
# Warn user if VC env is activated but `DISTUILS_USE_SDK` is not set.
if IS_WINDOWS and 'VSCMD_ARG_TGT_ARCH' in os.environ and 'DISTUTILS_USE_SDK' not in os.environ:
msg = (
'It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.'
'This may lead to multiple activations of the VC env.'
'Please set `DISTUTILS_USE_SDK=1` and try again.')
raise UserWarning(msg)
def _record_op_info(self):
"""
Record custum op inforomation.
"""
# parse shared library abs path
outputs = self.get_outputs()
assert len(outputs) == 1
# multi operators built into same one .so file
so_path = os.path.abspath(outputs[0])
so_name = os.path.basename(so_path)
for i, extension in enumerate(self.extensions):
sources = [os.path.abspath(s) for s in extension.sources]
op_names = parse_op_name_from(sources)
for op_name in op_names:
CustomOpInfo.instance().add(op_name,
so_name=so_name,
so_path=so_path)
class EasyInstallCommand(easy_install, object):
"""
Extend easy_intall Command to control the behavior of naming shared library
file.
NOTE(Aurelius84): This is a hook subclass inherited Command used to rename shared
library file after extracting egg-info into site-packages.
"""
def __init__(self, *args, **kwargs):
super(EasyInstallCommand, self).__init__(*args, **kwargs)
# NOTE(Aurelius84): Add args and kwargs to make compatible with PY2/PY3
def run(self, *args, **kwargs):
super(EasyInstallCommand, self).run(*args, **kwargs)
# NOTE: To avoid failing import .so file instead of
# python file because they have same name, we rename
# .so shared library to another name.
for egg_file in self.outputs:
filename, ext = os.path.splitext(egg_file)
will_rename = False
if OS_NAME.startswith('linux') and ext == '.so':
will_rename = True
elif IS_WINDOWS and ext == '.pyd':
will_rename = True
if will_rename:
new_so_path = filename + "_pd_" + ext
if not os.path.exists(new_so_path):
os.rename(r'%s' % egg_file, r'%s' % new_so_path)
assert os.path.exists(new_so_path)
def load(name,
sources,
extra_cflags=None,
extra_cuda_cflags=None,
extra_ldflags=None,
extra_include_paths=None,
build_directory=None,
interpreter=None,
verbose=False):
"""
An Interface to automatically compile C++/CUDA source files Just-In-Time
and return callable python function as other Paddle layers API. It will
append user defined custom op in background.
This module will perform compiling, linking, api generation and module loading
processes for users. It does not require CMake or Ninja environment and only
g++/nvcc on Linux and clang++ on MacOS. Moreover, ABI compatibility will be
checked to ensure that compiler version on local machine is compatible with
pre-installed Paddle whl in python site-packages. For example if Paddle is built
with GCC5.4, the version of user's local machine should satisfy GCC >= 5.4.
Otherwise, a fatal error will occur because ABI compatibility.
Args:
name(str): generated shared library file name.
sources(list[str]): custom op source files name with .cc/.cu suffix.
extra_cflag(list[str]): additional flags used to compile CPP files. By default
all basic and framework related flags have been included.
If your pre-insall Paddle supported MKLDNN, please add
'-DPADDLE_WITH_MKLDNN'. Default None.
extra_cuda_cflags(list[str]): additonal flags used to compile CUDA files. See
https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
for details. Default None.
extra_ldflags(list[str]): additonal flags used to link shared library. See
https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html for details.
Default None.
extra_include_paths(list[str]): additional include path used to search header files.
Default None.
build_directory(str): specific directory path to put shared library file. If set None,
it will use `PADDLE_EXTENSION_DIR` from os.environ. Use
`paddle.utils.cpp_extension.get_build_directory()` to see the location.
interpreter(str): alias or full interpreter path to specific which one to use if have installed multiple.
If set None, will use `python` as default interpreter.
verbose(bool): whether to verbose compiled log information
Returns:
custom api: A callable python function with same signature as CustomOp Kernel defination.
Example:
>> from paddle.utils.cpp_extension import load
>> relu2 = load(name='relu2',
sources=['relu_op.cc', 'relu_op.cu'])
>> x = paddle.rand([4, 10]], dtype='float32')
>> out = relu2(x)
"""
if build_directory is None:
build_directory = get_build_directory(verbose)
# ensure to use abs path
build_directory = os.path.abspath(build_directory)
# Will load shared library from 'path' on windows
if IS_WINDOWS:
os.environ['path'] = build_directory + ';' + os.environ['path']
log_v("build_directory: {}".format(build_directory), verbose)
file_path = os.path.join(build_directory, "{}_setup.py".format(name))
sources = [os.path.abspath(source) for source in sources]
# TODO(Aurelius84): split cflags and cuda_flags
if extra_cflags is None: extra_cflags = []
if extra_cuda_cflags is None: extra_cuda_cflags = []
compile_flags = extra_cflags + extra_cuda_cflags
log_v("additonal compile_flags: [{}]".format(' '.join(compile_flags)),
verbose)
# write setup.py file and compile it
_write_setup_file(name, sources, file_path, extra_include_paths,
compile_flags, extra_ldflags, verbose)
_jit_compile(file_path, interpreter, verbose)
# import as callable python api
custom_op_api = _import_module_from_library(name, build_directory, verbose)
return custom_op_api