You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/tools/sampcd_processor.py

651 lines
24 KiB

# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import subprocess
import multiprocessing
import math
import platform
import inspect
import paddle
import paddle.fluid
import json
"""
please make sure to run in the tools path
usage: python sample_test.py {arg1}
arg1: the first arg defined running in gpu version or cpu version
for example, you can run cpu version python2 testing like this:
python sampcd_processor.py cpu
"""
def find_all(srcstr, substr):
"""
to find all desired substring in the source string
and return their starting indices as a list
Args:
srcstr(str): the parent string
substr(str): substr
Returns:
list: a list of the indices of the substrings
found
"""
indices = []
gotone = srcstr.find(substr)
while (gotone != -1):
indices.append(gotone)
gotone = srcstr.find(substr, gotone + 1)
return indices
def check_indent(cdline):
"""
to check the indent of a given code line
to get the number of starting blank chars,
e.t. blankspaces and \t
\t will be interpreted as 4 single blankspaces,
e.t. '\t'=' '
Args:
cdline(str) : a single line of code from the source file
Returns:
int : the indent of the number of interpreted
blankspaces
"""
indent = 0
for c in cdline:
if c == '\t':
indent += 4
elif c == ' ':
indent += 1
if c != ' ' and c != '\t':
break
return indent
# srccom: raw comments in the source,including ''' and original indent
def sampcd_extract_and_run(srccom, name, htype="def", hname=""):
"""
Extract and run sample codes from source comment and
the result will be returned.
Args:
srccom(str): the source comment of some API whose
example codes will be extracted and run.
name(str): the name of the API.
htype(str): the type of hint banners, def/class/method.
hname(str): the name of the hint banners , e.t. def hname.
Returns:
result: True or False
"""
result = True
def sampcd_header_print(name, sampcd, htype, hname):
"""
print hint banner headers.
Args:
name(str): the name of the API.
sampcd(str): sample code string
htype(str): the type of hint banners, def/class/method.
hname(str): the name of the hint banners , e.t. def hname.
flushed.
"""
print_header(htype, hname)
print("Sample code ", str(y), " extracted for ", name, " :")
print(sampcd)
print("----example code check----\n")
print("executing sample code .....")
print("execution result:")
sampcd_begins = find_all(srccom, " code-block:: python")
if len(sampcd_begins) == 0:
print_header(htype, hname)
'''
detect sample codes using >>> to format
and consider this situation as wrong
'''
if srccom.find("Examples:") != -1:
print("----example code check----\n")
if srccom.find(">>>") != -1:
print(
"Deprecated sample code style:\n\n Examples:\n\n >>>codeline\n >>>codeline\n\n\n ",
"Please use '.. code-block:: python' to ",
"format sample code.\n")
result = False
else:
print("Error: No sample code!\n")
result = False
for y in range(1, len(sampcd_begins) + 1):
sampcd_begin = sampcd_begins[y - 1]
sampcd = srccom[sampcd_begin + len(" code-block:: python") + 1:]
sampcd = sampcd.split("\n")
# remove starting empty lines
while sampcd[0].replace(' ', '').replace('\t', '') == '':
sampcd.pop(0)
# the minimum indent, which is the indent of the first
# non-empty line
min_indent = check_indent(sampcd[0])
sampcd_to_write = []
for i in range(0, len(sampcd)):
cdline = sampcd[i]
# handle empty lines or those only with spaces/tabs
if cdline.strip() == '':
continue
this_indent = check_indent(cdline)
if this_indent < min_indent:
break
else:
cdline = cdline.replace('\t', ' ')
sampcd_to_write.append(cdline[min_indent:])
sampcd = '\n'.join(sampcd_to_write)
if sys.argv[1] == "cpu":
sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = ""\n' + sampcd
if sys.argv[1] == "gpu":
sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = "0"\n' + sampcd
sampcd += '\nprint(' + '\"' + name + ' sample code is executed successfully!\")'
if len(sampcd_begins) > 1:
tfname = name + "_example_" + str(y) + ".py"
else:
tfname = name + "_example" + ".py"
tempf = open("samplecode_temp/" + tfname, 'w')
tempf.write(sampcd)
tempf.close()
if platform.python_version()[0] == "2":
cmd = ["python", "samplecode_temp/" + tfname]
elif platform.python_version()[0] == "3":
cmd = ["python3", "samplecode_temp/" + tfname]
else:
print("Error: fail to parse python version!")
result = False
exit(1)
subprc = subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = subprc.communicate()
msg = "".join(output.decode(encoding='utf-8'))
err = "".join(error.decode(encoding='utf-8'))
if subprc.returncode != 0:
print("\nSample code error found in ", name, ":\n")
sampcd_header_print(name, sampcd, htype, hname)
print("subprocess return code: ", str(subprc.returncode))
print("Error Raised from Sample Code ", name, " :\n")
print(err)
print(msg)
result = False
# msg is the returned code execution report
#os.remove("samplecode_temp/" + tfname)
return result
def single_defcom_extract(start_from, srcls, is_class_begin=False):
"""
to extract a def function/class/method comments body
Args:
start_from(int): the line num of "def" header
srcls(list): the source file in lines
is_class_begin(bool): whether the start_from is a beginning a class. \
For a sole class body itself may end up with its method if it has no
docstring. But the body of \
a common def function can only be ended up by a none-indented def/class
Returns:
string : the extracted comment body, inclusive of its quote marks.
"""
i = start_from
fcombody = "" # def comment body
comstart = -1 # the starting line index of comment mark "'''" or """"""
# if it is not -1, it indicates the loop is in the comment body
comstyle = 0 # comment mark style ,comments quoted with ''' is coded as 1
# comments quoted with """ is coded as 2
for x in range(i + 1, len(srcls)):
if is_class_begin:
if srcls[x].replace('\t', ' ').startswith(' def '):
break
if srcls[x].startswith('def ') or srcls[x].startswith('class '):
break
else:
if comstart == -1:
s = srcls[x].replace(" ", '').replace("\t",
'').replace("\n", '')
if s.startswith("\"\"\"") or s.startswith("r\"\"\""):
comstart = x
comstyle = 2
continue
if (comstyle == 2 and comstart != -1 and
srcls[x].replace(" ", '').replace("\t", '').replace(
"\n", '').startswith("\"\"\"")):
break
if comstart == -1:
s = srcls[x].replace(" ", '').replace("\t",
'').replace("\n", '')
if s.startswith("\'\'\'") or s.startswith("r\'\'\'"):
comstart = x
comstyle = 1
continue
if (comstyle == 1 and comstart != -1 and
srcls[x].replace(" ", '').replace("\t", '').replace(
"\n", '').startswith("\'\'\'")):
break
if (comstart !=
-1): # when the comments start, begin to add line to fcombody
fcombody += srcls[x]
return fcombody
def print_header(htype, name):
print(htype, " name:", name)
print("-----------------------")
def srccoms_extract(srcfile, wlist):
"""
Given a source file ``srcfile``, this function will
extract its API(doc comments) and run sample codes in the
API.
Args:
srcfile(file): the source file
wlist(list): white list
Returns:
result: True or False
"""
process_result = True
srcc = srcfile.read()
# 2. get defs and classes header line number
# set file pointer to its beginning
srcfile.seek(0, 0)
srcls = srcfile.readlines() # source lines
# 1. fetch__all__ list
allidx = srcc.find("__all__")
srcfile_new = srcfile.name
srcfile_new = srcfile_new.replace('.py', '')
srcfile_list = srcfile_new.split('/')
srcfile_str = ''
for i in range(4, len(srcfile_list)):
srcfile_str = srcfile_str + srcfile_list[i] + '.'
if allidx != -1:
alllist = []
# get all list for layers/ops.py
if srcfile.name.find("ops.py") != -1:
for ai in range(0, len(srcls)):
if srcls[ai].startswith("__all__"):
lb = srcls[ai].find('[')
rb = srcls[ai].find(']')
if lb == -1:
continue
allele = srcls[ai][lb + 1:rb].replace("'", '').replace(
" ", '').replace("\"", '')
alllist.append(allele)
if '' in alllist:
alllist.remove('')
else:
alllist_b = allidx + len("__all__")
allstr = srcc[alllist_b + srcc[alllist_b:].find("[") + 1:alllist_b +
srcc[alllist_b:].find("]")]
allstr = allstr.replace("\n", '').replace(" ", '').replace(
"'", '').replace("\"", '')
alllist = allstr.split(',')
if '' in alllist:
alllist.remove('')
api_alllist_count = len(alllist)
api_count = 0
handled = []
# get src contents in layers/ops.py
if srcfile.name.find("ops.py") != -1:
for i in range(0, len(srcls)):
if srcls[i].find("__doc__") != -1:
opname = srcls[i][:srcls[i].find("__doc__") - 1]
if opname in wlist:
continue
comstart = i
for j in range(i, len(srcls)):
if srcls[j].find("\"\"\"") != -1:
comstart = i
opcom = ""
for j in range(comstart + 1, len(srcls)):
opcom += srcls[j]
if srcls[j].find("\"\"\"") != -1:
break
api_count += 1
handled.append(
opname) # ops.py also has normal formatted functions
# use list 'handled' to mark the functions have been handled here
# which will be ignored in the following step
for i in range(0, len(srcls)):
if srcls[i].startswith(
'def '): # a function header is detected in line i
f_header = srcls[i].replace(" ", '')
fn = f_header[len('def'):f_header.find('(')] # function name
if "%s%s" % (srcfile_str, fn) not in methods:
continue
if fn in handled:
continue
if fn in alllist:
api_count += 1
if fn in wlist or fn + "@" + srcfile.name in wlist:
continue
fcombody = single_defcom_extract(i, srcls)
if fcombody == "": # if no comment
print_header("def", fn)
print("WARNING: no comments in function ", fn,
", but it deserves.")
continue
else:
if not sampcd_extract_and_run(fcombody, fn, "def", fn):
process_result = False
if srcls[i].startswith('class '):
c_header = srcls[i].replace(" ", '')
cn = c_header[len('class'):c_header.find('(')] # class name
if '%s%s' % (srcfile_str, cn) not in methods:
continue
if cn in handled:
continue
if cn in alllist:
api_count += 1
if cn in wlist or cn + "@" + srcfile.name in wlist:
continue
# class comment
classcom = single_defcom_extract(i, srcls, True)
if classcom != "":
if not sampcd_extract_and_run(classcom, cn, "class",
cn):
process_result = False
else:
print("WARNING: no comments in class itself ", cn,
", but it deserves.\n")
# handling methods in class bodies
for x in range(
i + 1,
len(srcls)): # from the next line of class header
if (srcls[x].startswith('def ') or
srcls[x].startswith('class ')):
break
else:
# member method def header
srcls[x] = srcls[x].replace('\t', ' ')
if (srcls[x].startswith(
' def ')): # detect a mehtod header..
thisl = srcls[x]
indent = len(thisl) - len(thisl.lstrip())
mn = thisl[indent + len('def '):thisl.find(
'(')] # method name
name = cn + "." + mn # full name
if '%s%s' % (
srcfile_str, name
) not in methods: # class method not in api.spec
continue
if mn.startswith('_'):
continue
if name in wlist or name + "@" + srcfile.name in wlist:
continue
thismethod = [thisl[indent:]
] # method body lines
# get all the lines of a single method body
# into thismethod(list)
# and send it to single_defcom_extract
for y in range(x + 1, len(srcls)):
srcls[y] = srcls[y].replace('\t', ' ')
if (srcls[y].startswith('def ') or
srcls[y].startswith('class ')):
# end of method
break
elif srcls[y].startswith(' def '):
# end of method
break
else:
thismethod.append(srcls[y][indent:])
thismtdcom = single_defcom_extract(0,
thismethod)
if thismtdcom != "":
if not sampcd_extract_and_run(
thismtdcom, name, "method", name):
process_result = False
return process_result
def test(file_list):
process_result = True
for file in file_list:
with open(file, 'r') as src:
if not srccoms_extract(src, wlist):
process_result = False
return process_result
def get_filenames():
'''
this function will get the modules that pending for check.
Returns:
list: the modules pending for check .
'''
filenames = []
global methods
global whl_error
methods = []
whl_error = []
get_incrementapi()
API_spec = 'dev_pr_diff_api.spec'
with open(API_spec) as f:
for line in f.readlines():
api = line.replace('\n', '')
try:
module = eval(api).__module__
except AttributeError:
whl_error.append(api)
continue
if len(module.split('.')) > 1:
filename = '../python/'
module_py = '%s.py' % module.split('.')[-1]
for i in range(0, len(module.split('.')) - 1):
filename = filename + '%s/' % module.split('.')[i]
filename = filename + module_py
else:
filename = ''
print("\nWARNING:----Exception in get api filename----\n")
print("\n" + api + ' module is ' + module + "\n")
if filename != '' and filename not in filenames:
filenames.append(filename)
# get all methods
method = ''
if inspect.isclass(eval(api)):
name = api.split('.')[-1]
elif inspect.isfunction(eval(api)):
name = api.split('.')[-1]
elif inspect.ismethod(eval(api)):
name = '%s.%s' % (api.split('.')[-2], api.split('.')[-1])
else:
name = ''
print("\nWARNING:----Exception in get api methods----\n")
print("\n" + line + "\n")
print("\n" + api + ' method is None!!!' + "\n")
for j in range(2, len(module.split('.'))):
method = method + '%s.' % module.split('.')[j]
method = method + name
if method not in methods:
methods.append(method)
os.remove(API_spec)
return filenames
def get_incrementapi():
'''
this function will get the apis that difference between API_DEV.spec and API_PR.spec.
'''
def get_api_md5(path):
api_md5 = {}
API_spec = '%s/%s' % (os.path.abspath(os.path.join(os.getcwd(), "..")),
path)
with open(API_spec) as f:
for line in f.readlines():
api = line.split(' ', 1)[0]
md5 = line.split("'document', ")[1].replace(')', '').replace(
'\n', '')
api_md5[api] = md5
return api_md5
dev_api = get_api_md5('paddle/fluid/API_DEV.spec')
pr_api = get_api_md5('paddle/fluid/API_PR.spec')
with open('dev_pr_diff_api.spec', 'w') as f:
for key in pr_api:
if key in dev_api:
if dev_api[key] != pr_api[key]:
f.write(key)
f.write('\n')
else:
f.write(key)
f.write('\n')
def get_wlist():
'''
this function will get the white list of API.
Returns:
wlist: a list of API that should not trigger the example check .
'''
wlist = []
wlist_file = []
# only white on CPU
gpu_not_white = []
with open("wlist.json", 'r') as load_f:
load_dict = json.load(load_f)
for key in load_dict:
if key == 'wlist_dir':
for item in load_dict[key]:
wlist_file.append(item["name"])
elif key == "gpu_not_white":
gpu_not_white = load_dict[key]
elif key == "wlist_api":
for item in load_dict[key]:
wlist.append(item["name"])
else:
wlist = wlist + load_dict[key]
return wlist, wlist_file, gpu_not_white
wlist, wlist_file, gpu_not_white = get_wlist()
if len(sys.argv) < 2:
print("Error: inadequate number of arguments")
print('''If you are going to run it on
"CPU: >>> python sampcd_processor.py cpu
"GPU: >>> python sampcd_processor.py gpu
''')
sys.exit("lack arguments")
else:
if sys.argv[1] == "gpu":
for _gnw in gpu_not_white:
wlist.remove(_gnw)
elif sys.argv[1] != "cpu":
print("Unrecognized argument:'", sys.argv[1], "' , 'cpu' or 'gpu' is ",
"desired\n")
sys.exit("Invalid arguments")
print("API check -- Example Code")
print("sample_test running under python", platform.python_version())
if not os.path.isdir("./samplecode_temp"):
os.mkdir("./samplecode_temp")
cpus = multiprocessing.cpu_count()
filenames = get_filenames()
if len(filenames) == 0 and len(whl_error) == 0:
print("-----API_PR.spec is the same as API_DEV.spec-----")
exit(0)
rm_file = []
for f in filenames:
for w_file in wlist_file:
if f.startswith(w_file):
rm_file.append(f)
filenames.remove(f)
if len(rm_file) != 0:
print("REMOVE white files: %s" % rm_file)
print("API_PR is diff from API_DEV: %s" % filenames)
one_part_filenum = int(math.ceil(len(filenames) / cpus))
if one_part_filenum == 0:
one_part_filenum = 1
divided_file_list = [
filenames[i:i + one_part_filenum]
for i in range(0, len(filenames), one_part_filenum)
]
po = multiprocessing.Pool()
results = po.map_async(test, divided_file_list)
po.close()
po.join()
result = results.get()
# delete temp files
for root, dirs, files in os.walk("./samplecode_temp"):
for fntemp in files:
os.remove("./samplecode_temp/" + fntemp)
os.rmdir("./samplecode_temp")
print("----------------End of the Check--------------------")
if len(whl_error) != 0:
print("%s is not in whl." % whl_error)
print("")
print("Please check the whl package and API_PR.spec!")
print("You can follow these steps in order to generate API.spec:")
print("1. cd ${paddle_path}, compile paddle;")
print("2. pip install build/python/dist/(build whl package);")
print(
"3. run 'python tools/print_signatures.py paddle > paddle/fluid/API.spec'."
)
for temp in result:
if not temp:
print("")
print("In addition, mistakes found in sample codes.")
print("Please check sample codes.")
print("----------------------------------------------------")
exit(1)
else:
for temp in result:
if not temp:
print("Mistakes found in sample codes.")
print("Please check sample codes.")
exit(1)
print("Sample code check is successful!")