diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 833a28a757..834cdb422a 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -47,9 +47,6 @@ DECLARE_bool(benchmark); DECLARE_bool(check_nan_inf); DECLARE_bool(enable_unused_var_check); DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op"); -DEFINE_bool(fast_check_nan_inf, false, - "Fast checking NAN/INF after each operation. It will be a little" - "bit slow, much faster than check_nan_inf"); namespace paddle { namespace framework { @@ -1173,25 +1170,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, #endif } - if (FLAGS_fast_check_nan_inf) { - for (auto& vname : OutputVars(true)) { - // only check inserted vars, - // please see executor.py for details of fast_check_nan_inf - if (vname.rfind("debug_var") == 0) { - VLOG(3) << "debugging nan/inf in var " << vname; - - auto* var = exec_scope.FindVar(vname); - if (var == nullptr) continue; - if (var->IsType()) { - CheckTensorNANOrInf(type_, vname, var->Get()); - } else if (var->IsType()) { - CheckTensorNANOrInf(type_, vname, - var->Get().value()); - } - } - } - } - if (FLAGS_check_nan_inf) { framework::details::CheckOpHasNanOrInf(*this, exec_scope, place); } diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 1a88d3512e..b24da29d0f 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -175,7 +175,6 @@ def __bootstrap__(): sysstr = platform.system() read_env_flags = [ 'check_nan_inf', - 'fast_check_nan_inf', 'benchmark', 'eager_delete_scope', 'fraction_of_cpu_memory_to_use', diff --git a/python/paddle/fluid/debugger.py b/python/paddle/fluid/debugger.py index 9110b8daf3..75dc14a1d7 100644 --- a/python/paddle/fluid/debugger.py +++ b/python/paddle/fluid/debugger.py @@ -280,88 +280,3 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"): add_op_link_var(opn, var, True) graph(path, show=False) - - -def prepare_fast_nan_inf_debug(_program): - """ - Given a program to run, insert a (reduce) sum op for every var in that program. - Instead of checking all vars originally defined in the program, - only those inserted ops will be checked in the c++ end, to detect if it contains NAN or INF. - Thereforce, the speed of nan/inf checking could be improved. - Please set ``FLAGS_fast_check_nan_inf" to open the fast nan/inf feature. - """ - - helper = LayerHelper('reduce_sum', **locals()) - - if _program is None: - _program = default_main_program() - - for _block in _program.blocks: - # fetch vars in the current block - _vars_in_prog = [] - for _var_name in _block.vars: - _vars_in_prog.append((_var_name, _block.vars[_var_name])) - - # append sum_op in the current block - for _var_name, _var in _vars_in_prog: - - try: - - if _var.dtype == -1: - continue - - ## create a var for holding sum output - _output_var = _block.create_var( - name=unique_name.generate("debug_var_" + _var_name), - dtype=_var.dtype, - type=core.VarDesc.VarType.LOD_TENSOR, - persistable=False, - stop_gradient=True) - - ## create a sum op, input each existing var in the block - _block.append_op( - type='sum', - outputs={'Out': _output_var}, - inputs={'X': [_var]}) - except Exception as e: - pass - - -def run_fast_nan_inf_debug(executor, - program=None, - feed=None, - fetch_list=None, - feed_var_name='feed', - fetch_var_name='fetch', - scope=None, - return_numpy=True, - use_program_cache=False, - dump_core=True): - """ - Run a program by the given executor. Catch the exception of NAN and INF, and save persistables into the dumped core. - """ - - assert (executor is not None) - - try: - output = executor.run(program=program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name, - scope=scope, - return_numpy=return_numpy, - use_program_cache=use_program_cache) - - return output - - except Exception as e: - - print("catch an exception:") - print(e) - - core_filename = "core" + str(int(random.random() * 10000)) + ".pdckpt" - io.save_persistables( - executor, "./", main_program=program, filename=core_filename) - - print("dumping a core into ./%s" % core_filename)