diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 4790e0f611..bd9f8b3c35 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -128,6 +128,13 @@ class ParallelExecutor(object): os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exec_strategy.num_threads = cpu_num * 2 + # Set 1 thread num under nccl2 distribute + # env to make sure all gpus run ops in same order. + if num_trainers > 1: + assert (use_cuda) + # FIXME(gongwb): avoid this set. + exec_strategy.num_threads = 1 + if build_strategy is None: build_strategy = BuildStrategy()