From b5a1c1463d7e1f6f1678cd55a453133eb4b837e4 Mon Sep 17 00:00:00 2001
From: chengduo <zhaochengduo@baidu.com>
Date: Thu, 13 Jun 2019 20:02:48 +0800
Subject: [PATCH] Update CPU_NUM config (#18059)

* update CPU_NUM config
test=develop
---
 paddle/fluid/framework/parallel_executor.cc   |  6 +++++
 python/paddle/dataset/flowers.py              |  3 +--
 .../contrib/slim/tests/test_graph_wrapper.py  |  2 ++
 python/paddle/fluid/data_feeder.py            |  8 +++---
 python/paddle/fluid/framework.py              | 25 ++++++++++++++-----
 .../test_parallel_executor_dry_run.py         |  2 ++
 .../test_py_reader_using_executor.py          |  1 +
 7 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index f5ab5d6ee5..ce37aa4855 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -328,6 +328,12 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
                    "the number of places must be greater than 1.");
   }
 
+  LOG(WARNING) << string::Sprintf(
+      "The number of %s, which is used in ParallelExecutor, is %lu. And "
+      "the Program will be copied %lu copies",
+      (member_->use_cuda_ ? "CUDAPlace" : "CPUPlace"), places.size(),
+      places.size());
+
   // Step 1. Bcast the bcast_vars to devs.
   // Create local scopes
   if (local_scopes.empty()) {
diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py
index e048639ae1..969ad3c922 100644
--- a/python/paddle/dataset/flowers.py
+++ b/python/paddle/dataset/flowers.py
@@ -138,8 +138,7 @@ def reader_creator(data_file,
                 break
 
     if use_xmap:
-        cpu_num = int(os.environ.get('CPU_NUM', cpu_count()))
-        return xmap_readers(mapper, reader, cpu_num, buffered_size)
+        return xmap_readers(mapper, reader, min(4, cpu_count()), buffered_size)
     else:
         return map_readers(mapper, reader)
 
diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py b/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py
index 0ab8052d7a..69080cf50e 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py
@@ -19,6 +19,8 @@ import six
 import numpy as np
 from paddle.fluid.contrib.slim.graph import GraphWrapper
 from paddle.fluid import core
+import os
+os.environ['CPU_NUM'] = str(4)
 
 
 def residual_block(num):
diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py
index 32b2c8014c..1090c78142 100644
--- a/python/paddle/fluid/data_feeder.py
+++ b/python/paddle/fluid/data_feeder.py
@@ -22,7 +22,7 @@ from six.moves import zip, range, xrange
 import multiprocessing
 
 from .framework import Variable, default_main_program, _current_expected_place
-
+from .framework import _cpu_num, _cuda_ids
 __all__ = ['DataFeeder']
 
 
@@ -359,11 +359,9 @@ class DataFeeder(object):
         if num_places is not None:
             return int(num_places)
         elif isinstance(self.place, core.CUDAPlace):
-            return core.get_cuda_device_count()
+            return len(_cuda_ids())
         else:
-            cpu_num = int(
-                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-            return cpu_num
+            return _cpu_num()
 
     def decorate_reader(self,
                         reader,
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index 012d15f45a..f7d487b640 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -82,7 +82,24 @@ def _current_expected_place():
 
 
 def _cpu_num():
-    return int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
+    cpu_num = os.environ.get('CPU_NUM', None)
+    if cpu_num is None:
+        sys.stderr.write(
+            'The CPU_NUM is not specified, you should set CPU_NUM in '
+            'the environment variable list, i.e export CPU_NUM=1. CPU_NUM '
+            'indicates that how many CPUPlace are used in the current task.\n'
+            '!!! The default number of CPUPlaces is 1.')
+        os.environ['CPU_NUM'] = str(1)
+    return int(cpu_num)
+
+
+def _cuda_ids():
+    gpus_env = os.getenv("FLAGS_selected_gpus")
+    if gpus_env:
+        device_ids = [int(s) for s in gpus_env.split(",")]
+    else:
+        device_ids = six.moves.range(core.get_cuda_device_count())
+    return device_ids
 
 
 def cuda_places(device_ids=None):
@@ -116,11 +133,7 @@ def cuda_places(device_ids=None):
     assert core.is_compiled_with_cuda(), \
         "Not compiled with CUDA"
     if device_ids is None:
-        gpus_env = os.getenv("FLAGS_selected_gpus")
-        if gpus_env:
-            device_ids = [int(s) for s in gpus_env.split(",")]
-        else:
-            device_ids = six.moves.range(core.get_cuda_device_count())
+        device_ids = _cuda_ids()
     elif not isinstance(device_ids, (list, tuple)):
         device_ids = [device_ids]
     return [core.CUDAPlace(dev_id) for dev_id in device_ids]
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
index d0eca7d6df..328b3a4813 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_dry_run.py
@@ -17,6 +17,8 @@ from paddle.fluid import compiler
 import unittest
 import logging
 import six
+import os
+os.environ['CPU_NUM'] = str(4)
 
 
 class TestBase(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
index a3701f0808..e4fb9b1970 100644
--- a/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_py_reader_using_executor.py
@@ -22,6 +22,7 @@ import numpy as np
 import threading
 import multiprocessing
 import os
+os.environ['CPU_NUM'] = str(4)
 
 
 def as_tensor(np_array_or_tensor, place=None):