#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Fri Jun 14 14:10:36 2019 @author: haowang101779990 """ """ This script is for scraping and executing sample codes in the comments of paddle .py source file in order to validate the sample codes. Put this script at directory fluid/ log July 4 : CPU is implemented, wlist is added, transpiler module need to be finished """ import os import subprocess def find_all(srcstr, substr): indices = [] gotone = srcstr.find(substr) while (gotone != -1): indices.append(gotone) gotone = srcstr.find(substr, gotone + 1) return indices def check_indent(cdline): indent = 0 for c in cdline: if c == '\t': indent += 4 elif c == ' ': indent += 1 if c != ' ' and c != '\t': break return indent #srccom: raw comments in the source,including ''' and original indent def sampcd_extract_and_run(srccom, name, logf): sampcd_begins = find_all(srccom, ".. code-block:: python") #no sample code #have sample code but not formatted by code block status = [] ''' status: 3:error sample code 2:have sample code but format is wrong 1:no sample code 0:successful -1:no comments found -2:in white list there may be several examples in a source comment so status is a list to contain the states ''' if (len(sampcd_begins) == 0): if (srccom.find("Examples:") != -1): print "----example code check----\n" logf.write("\n----example code check----\n") if (srccom.find(">>>") != -1): logf.write( "Deprecated sample code style:\n\n Examples:\n\n >>>codeline\n >>>codeline\n\n\n " + "Please use '.. code-block:: python' to " + "format sample code.\n") print( "Deprecated sample code style:\n\n Examples:\n\n >>>codeline\n >>>codeline\n\n\n " + "Please use '.. code-block:: python' to " + "format sample code.\n") status.append(2) else: print "No sample code!\n" logf.write("\nNo sample code!\n") status.append(1) for y in range(1, len(sampcd_begins) + 1): sampcd_begin = sampcd_begins[y - 1] sampcd = srccom[sampcd_begin + len(".. code-block:: python") + 1:] sampcd = sampcd.split("\n") #remove starting empty lines while sampcd[0].replace(' ', '').replace('\t', '') == '': sampcd.pop(0) min_indent = check_indent(sampcd[0]) sampcd_to_write = [] for i in range(0, len(sampcd)): cdline = sampcd[i] #handle empty lines or those only with spaces/tabs if cdline.strip() == '': continue this_indent = check_indent(cdline) if (this_indent < min_indent): break else: cdline = cdline.replace('\t', ' ') sampcd_to_write.append(cdline[min_indent:]) sampcd = '\n'.join(sampcd_to_write) sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = ""\n' + sampcd sampcd += '\nprint ' + '\"' + name + ' sample code is executed successfully!\"\n' print "\n" print "Sample code " + str(y) + " extracted for " + name + " :" print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print(sampcd) logf.write("\nSample code extracted for " + name + " :\n") logf.write("\n" + sampcd + "\n") print "----example code check----\n" print "executing sample code ....." logf.write("\n----example code check----\n") logf.write("\nexecuting sample code .....\n") if (len(sampcd_begins) > 1): tfname = name + "_example_" + str(y) + ".py" else: tfname = name + "_example" + ".py" tempf = open("samplecode_temp/" + tfname, 'w') tempf.write(sampcd) tempf.close() cmd = ["python", "samplecode_temp/" + tfname] subprc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = subprc.communicate() print "execution result:" logf.write("\nexecution result:\n") msg = "\n".join(output) if (msg.find("sample code is executed successfully!") == -1): print("Error Raised from Sample Code " + name + " :\n") logf.write("\nError Raised from Sample Code " + name + " :\n") status.append(3) else: status.append(0) #msg is the returned code execution report print msg logf.write("\n" + msg + "\n") os.remove("samplecode_temp/" + tfname) print status logf.write("\n" + "execution status" + str(status) + "\n") return status ''' to extract a def function/class comments body start_from: the line num of "def" header ''' def single_defcom_extract(start_from, srcls, is_class_begin=False): i = start_from fcombody = "" #def comment body comstart = -1 comstyle = 0 for x in range(i + 1, len(srcls)): if is_class_begin: if (srcls[x].startswith(' def ')): break if ((srcls[x].startswith('def ') or srcls[x].startswith('class '))): break else: if (comstart == -1 and srcls[x].replace(" ", '').replace( "\t", '').replace("\n", '').startswith("\"\"\"")): comstart = x comstyle = 2 continue if (comstyle == 2 and comstart != -1 and srcls[x].replace(" ", '').replace("\t", '').replace( "\n", '').startswith("\"\"\"")): break if (comstart == -1 and srcls[x].replace(" ", '').replace( "\t", '').replace("\n", '').startswith("\'\'\'")): comstart = x comstyle = 1 continue if (comstyle == 1 and comstart != -1 and srcls[x].replace(" ", '').replace("\t", '').replace( "\n", '').startswith("\'\'\'")): break if (comstart != -1): #when the comments start, begin to add line to fcombody fcombody += srcls[x] return fcombody def print_header(logf, htype, name): print "\n" print htype + " name:" + name print "-----------------------" logf.write("\n\n" + htype + " name:" + name + "\n") logf.write("-----------------------\n") def srccoms_extract(srcfile, logf, status_all, wlist): print "source file name:" + srcfile.name print "---------------------------------------------------" logf.write("source file name:" + srcfile.name + "\n") logf.write("---------------------------------------------------\n\n") srcc = srcfile.read() #2. get defs and classes header line number #set file pointer to its beginning srcfile.seek(0, 0) srcls = srcfile.readlines() #source lines #1. fetch__all__ list allidx = srcc.find("__all__") if (allidx != -1): alllist = [] if (srcfile.name.find("ops.py") != -1): for ai in range(0, len(srcls)): if (srcls[ai].startswith("__all__")): lb = srcls[ai].find('[') rb = srcls[ai].find(']') if (lb == -1): continue allele = srcls[ai][lb + 1:rb].replace("'", '').replace( " ", '').replace("\"", '') alllist.append(allele) alllist.remove('') else: alllist_b = allidx + len("__all__") allstr = srcc[alllist_b + srcc[alllist_b:].find("[") + 1:alllist_b + srcc[alllist_b:].find("]")] allstr = allstr.replace("\n", '').replace(" ", '').replace( "'", '').replace("\"", '') alllist = allstr.split(',') if '' in alllist: alllist.remove('') print "__all__:" + str(alllist) + "\n" logf.write("__all__:" + str(alllist) + "\n\n") api_alllist_count = len(alllist) api_count = 0 handled = [] if (srcfile.name.find("ops.py") != -1): for i in range(0, len(srcls)): if srcls[i].find("__doc__") != -1: opname = srcls[i][:srcls[i].find("__doc__") - 1] print_header(logf, "def", opname) if opname in wlist: print opname + " is in white list, thus skipped" logf.write("\n" + opname + " is in white list, thus skipped\n") status_all[opname] = [-2] print status_all[opname] logf.write("\n" + "execution status" + str(status_all[ opname]) + "\n") continue comstart = i for j in range(i, len(srcls)): if (srcls[j].find("\"\"\"") != -1): comstart = i opcom = "" for j in range(comstart + 1, len(srcls)): opcom += srcls[j] if (srcls[j].find("\"\"\"") != -1): break if opname in wlist: print opname + " is in white list, thus skipped" logf.write("\n" + opname + " is in white list, thus skipped\n") status_all[opname] = [-2] print status_all[opname] logf.write("\n" + "execution status" + str(status_all[ opname]) + "\n") continue status = sampcd_extract_and_run(opcom, opname, logf) api_count += 1 status_all[opname] = status handled.append(opname) for i in range(0, len(srcls)): if srcls[i].startswith('def '): f_header = srcls[i].replace(" ", '') fn = f_header[len('def'):f_header.find('(')] #function name if fn in handled: continue print_header(logf, "def", fn) if fn in alllist: api_count += 1 if fn in wlist: print fn + " is in white list, thus skipped" logf.write("\n" + fn + " is in white list, thus skipped\n") status_all[fn] = [-2] print status_all[fn] logf.write("\n" + "execution status" + str(status_all[ fn]) + "\n") continue fcombody = single_defcom_extract(i, srcls) if (fcombody == ""): print "no comments in function " + fn logf.write("no comments in function " + fn + "\n\n") status_all[fn] = [-1] print status_all[fn] logf.write("\n" + "execution status" + str(status_all[ fn]) + "\n") continue else: status = sampcd_extract_and_run(fcombody, fn, logf) status_all[fn] = status else: print fn + " not in __all__ list" logf.write(fn + " not in __all__ list\n\n") if srcls[i].startswith('class '): print srcls[i] c_header = srcls[i].replace(" ", '') cn = c_header[len('class'):c_header.find('(')] #function name if cn in handled: continue print_header(logf, "class", cn) if cn in alllist: api_count += 1 if cn in wlist: print cn + " is in white list, thus skipped" logf.write("\n" + cn + " is in white list, thus skipped\n") status_all[cn] = [-2] print status_all[cn] logf.write("\n" + "execution status" + str(status_all[ cn]) + "\n") continue allcoms = [] classcom = single_defcom_extract(i, srcls, True) allcoms.append(classcom) if (classcom != ""): status = sampcd_extract_and_run(classcom, cn, logf) status_all[cn] = status else: print "no comments in class itself " + cn + "\n" logf.write("no comments in class itself " + cn + "\n\n\n") status_all[cn] = [-1] print status_all[cn] logf.write("\n" + "execution status" + str(status_all[ cn]) + "\n") for x in range( i + 1, len(srcls)): #from the next line of class header if (srcls[x].startswith('def ') or srcls[x].startswith('class ')): break else: if (srcls[x].startswith( ' def ')): #detect a mehtod header.. thisl = srcls[x] indent = len(thisl) - len(thisl.lstrip()) mn = thisl[indent + len('def '):thisl.find( '(')] #method name name = cn + "." + mn print_header(logf, "method", name) if mn.startswith('_'): print mn + "is hidden, not visible to users" logf.write( "\n" + mn + "is hidden, not visible to users\n") continue if name in wlist: print name + " is in white list, thus skipped" logf.write( "\n" + name + " is in white list, thus skipped\n") status_all[name] = [-2] print status_all[name] logf.write("\n" + "execution status" + str( status_all[name]) + "\n") continue thismethod = [] thismtdstr = "" thismethod.append(thisl[indent:]) thismtdstr += thisl[indent:] for y in range(x + 1, len(srcls)): if (srcls[y].startswith('def ') or srcls[y].startswith('class ')): break elif (srcls[y].lstrip().startswith('def ')): break else: thismethod.append(srcls[y][indent:]) thismtdstr += srcls[y][indent:] thismtdcom = single_defcom_extract(0, thismethod) allcoms.append(thismtdcom) if (thismtdcom != ""): status = sampcd_extract_and_run(thismtdcom, name, logf) status_all[name] = status else: print "no comments in method " + name + "\n" logf.write("no comments in method " + name + "\n\n\n") status_all[name] = [-1] print status_all[name] logf.write("\n" + "execution status" + str( status_all[name]) + "\n") else: print cn + " is not in __all__ list" logf.write(cn + " is not in __all__ list\n\n") return [ srcfile.name + " all list length: " + str(api_alllist_count), "analysed api count: " + str(api_count) ] filenames = [ "layers/control_flow.py", "layers/io.py", "layers/nn.py", "layers/ops.py", "layers/tensor.py", "layers/learning_rate_scheduler.py", "layers/detection.py", "layers/metric_op.py" ] filenames += [ "dygraph/layers.py", "dygraph/base.py", "dygraph/nn.py", "dygraph/tracer.py", "dygraph/profiler.py", "dygraph/parallel.py", "dygraph/checkpoint.py", "dygraph/learning_rate_scheduler.py", "dygraph/backward_strategy.py" ] filenames += [ "data_feeder.py", "dataset.py", "clip.py", "metrics.py", "executor.py", "initializer.py", "io.py", "nets.py", "optimizer.py", "profiler.py", "regularizer.py", "backward.py", "average.py", "profiler.py", "unique_name.py" ] wlist_inneed = [ "append_LARS", "BuildStrategy.debug_graphviz_path", "BuildStrategy.enable_sequential_execution", "BuildStrategy.fuse_elewise_add_act_ops", "BuildStrategy.fuse_relu_depthwise_conv", "BuildStrategy.gradient_scale_strategy", "BuildStrategy.reduce_strategy", "BuildStrategy.remove_unnecessary_lock", "BuildStrategy.sync_batch_norm", "DynamicRNN.step_input", "DynamicRNN.static_input", "DynamicRNN.block", "DynamicRNN.update_memory", "DynamicRNN.output", "transpiler.DistributeTranspilerConfig", "transpiler.DistributeTranspilerConfig.slice_var_up", "transpiler.DistributeTranspilerConfig.split_method", "transpiler.DistributeTranspilerConfig.min_block_size", "DistributeTranspilerConfig.slice_var_up", "DistributeTranspilerConfig.split_method", "ModelAverage.apply", "ModelAverage.restore", "DistributeTranspilerConfig", "DistributeTranspilerConfig.min_block_size", "ExecutionStrategy.allow_op_delay", "load", "Accuracy.update", "ChunkEvaluator.update", "ExecutionStrategy.num_iteration_per_drop_scope", "ExecutionStrategy.num_threads", "CompiledProgram.with_inference_optimize", "CompositeMetric.add_metric", "CompositeMetric.update", "CompositeMetric.eval", "DetectionMAP.get_map_var", "MetricBase", "MetricBase.reset", "MetricBase.get_config", "MetricBase.update", "MetricBase.eval", "Accuracy.eval", "Auc.update", "Auc.eval", "EditDistance.update", "EditDistance.eval", "ExponentialMovingAverage.apply", "ExponentialMovingAverage.restore", "ExponentialMovingAverage.update", "StaticRNN.step", "StaticRNN.step_input", "StaticRNN.step_output", "StaticRNN.update_memory", "DetectionMAP.reset", 'StaticRNN.output' ] wlist_temp = [ 'elementwise_floordiv', 'Layer', 'Layer.create_parameter', 'Layer.create_variable', 'Layer.sublayers', 'Layer.add_parameter', 'Layer.add_sublayer', 'Layer.parameters', 'Tracer', 'Layer.full_name', 'InMemoryDataset', 'layer_norm', 'bipartite_match', 'double_buffer', 'cumsum', 'thresholded_relu', 'group_norm', 'random_crop', 'py_func', 'row_conv', 'hard_shrink', 'ssd_loss', 'retinanet_target_assign', 'InMemoryDataset.global_shuffle', 'InMemoryDataset.get_memory_data_size', 'DetectionMAP', 'hash', 'InMemoryDataset.set_queue_num', 'LayerNorm', 'Preprocessor', 'chunk_eval', 'GRUUnit', 'ExponentialMovingAverage', 'QueueDataset.global_shuffle', 'NumpyArrayInitializer', 'create_py_reader_by_data', 'InMemoryDataset.local_shuffle', 'InMemoryDataset.get_shuffle_data_size', 'size', 'edit_distance', 'nce', 'BilinearInitializer', 'NaturalExpDecay', 'noam_decay', 'retinanet_detection_output', 'Pool2D', 'PipelineOptimizer', 'generate_mask_labels', 'isfinite', 'InMemoryDataset.set_fleet_send_batch_size', 'cuda_profiler', 'unfold', 'Executor', 'InMemoryDataset.load_into_memory', 'ExponentialDecay', 'BatchNorm', 'deformable_conv', 'InMemoryDataset.preload_into_memory', 'py_reader', 'linear_lr_warmup', 'InMemoryDataset.wait_preload_done', 'CosineDecay', 'roi_perspective_transform', 'unique', 'ones_like', 'LambOptimizer', 'InMemoryDataset.release_memory', 'Conv2DTranspose', 'QueueDataset.local_shuffle' ] ''' white list of private API/ redundant API ''' wlist_ignore = [ 'elementwise_pow', 'WeightedAverage.reset', 'ChunkEvaluator.eval', 'NCE.forward', 'elementwise_div', 'BilinearTensorProduct.forward', 'NoamDecay.step', 'elementwise_min', 'PiecewiseDecay.step', 'Conv3DTranspose.forward', 'elementwise_add', 'IfElse.output', 'IfElse.true_block', 'InverseTimeDecay.step', 'PolynomialDecay.step', 'Precision.eval', 'enabled', 'elementwise_max', 'stop_gperf_profiler', 'IfElse.false_block', 'WeightedAverage.add', 'Auc.trapezoid_area', 'elementwise_mul', 'GroupNorm.forward', 'SpectralNorm.forward', 'elementwise_sub', 'Switch.case', 'IfElse.input', 'prepare_context', 'PRelu.forward', 'Recall.update', 'start_gperf_profiler', 'TreeConv.forward', 'Conv2D.forward', 'Switch.default', 'elementwise_mod', 'Precision.update', 'WeightedAverage.eval', 'Conv3D.forward', 'Embedding.forward', 'Recall.eval', 'FC.forward', 'While.block' ] wlist = wlist_temp + wlist_inneed + wlist_ignore status_all = {} logf = open("log.txt", 'w') statusf = open("status.txt", 'w') if not os.path.isdir("./samplecode_temp"): os.mkdir("./samplecode_temp") for filename in filenames: srcfile = open(filename, 'r') counts = srccoms_extract(srcfile, logf, status_all, wlist) logf.write("\n\n" + str(counts) + "\n\n") srcfile.close() for root, dirs, files in os.walk("./samplecode_temp"): for fntemp in files: os.remove("./samplecode_temp/" + fntemp) os.rmdir("./samplecode_temp") statusf.write("status_all:\n" + str(status_all)) status_groups = {-2: [], -1: [], 0: [], 1: [], 2: [], 3: []} ci_pass = True for key in status_all: statusl = status_all[key] for ele in statusl: if (ele != 0 and ele != -2): ci_pass = False break if len(statusl) == 1: status_groups[statusl[0]].append(key) else: for u in range(0, len(statusl)): status_groups[statusl[u]].append(key + '_' + str(u + 1)) statusf.write('\n\ngrouped apis:\n' + str(status_groups) + '\n') statusf.close() logf.close() temp_wlistf = open("tempwlist.txt", 'w') wlist_temp = status_groups[1] + status_groups[2] + status_groups[ 3] + status_groups[-1] temp_wlistf.write(str(wlist_temp)) temp_wlistf.close() print str(wlist_temp) if not ci_pass: print "Mistakes found in sample codes, refer to the log for details" exit(1) else: print "Sample code check is successful!"