From 4fbba65626fec5eea2cf4eef8c7a81bd29690fe5 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 15 Nov 2017 15:31:51 +0800 Subject: [PATCH 1/5] auto set cpu env when mkldnn or mklml enabled for V1 API --- paddle/scripts/submit_local.sh.in | 47 +++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index 5c4b5a2495..4bf25c69e3 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -43,6 +43,51 @@ function ver2num() { set +e } +function cpu_config() { + # auto set KMP_AFFINITY and OMP_DYNAMIC from Hyper Threading Status + # only when MKLDNN or MKLML enabled + if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then + return 0 + fi + ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs` + if [ $ht -eq 1 ]; then # HT is OFF + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,0,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="FALSE" + fi + else # HT is ON + if [ -z "$KMP_AFFINITY" ]; then + export KMP_AFFINITY="granularity=fine,compact,1,0" + fi + if [ -z "$OMP_DYNAMIC" ]; then + export OMP_DYNAMIC="True" + fi + fi +} + +function threads_config() { + # auto set OMP_NUM_THREADS and MKL_NUM_THREADS + # according to trainer_count and total processors + # only when MKLDNN or MKLML enabled + if [ "@WITH_MKLDNN@" == "OFF" ] && [ "@WITH_MKLML@" == "OFF"]; then + return 0 + fi + processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l` + trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs` + if [ -z $trainers ]; then + trainers=1 + fi + threads=$((processors / trainers)) + if [ -z "$OMP_NUM_THREADS" ]; then + export OMP_NUM_THREADS=$threads + fi + if [ -z "$MKL_NUM_THREADS" ]; then + export MKL_NUM_THREADS=$threads + fi +} + PADDLE_CONF_HOME="$HOME/.config/paddle" mkdir -p ${PADDLE_CONF_HOME} @@ -92,9 +137,11 @@ else: sys.exit(0) EOF +cpu_config case "$1" in "train") + threads_config $@ ${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2} ;; "merge_model") From a6f5f6efb69a14c7c8c654f36a08c467ceb7b258 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 15 Nov 2017 17:14:11 +0800 Subject: [PATCH 2/5] at least set threads number as 1, in case trainers number is larger than processors --- paddle/scripts/submit_local.sh.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index 4bf25c69e3..1cc5078494 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -79,7 +79,10 @@ function threads_config() { if [ -z $trainers ]; then trainers=1 fi - threads=$((processors / trainers)) + threads=$((processors / trainers)) + if [ $threads -eq 0 ]; then + threads=1 + fi if [ -z "$OMP_NUM_THREADS" ]; then export OMP_NUM_THREADS=$threads fi From d66d6c6ea355832243667ea5a01add40fb3e8f73 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 15 Nov 2017 17:21:27 +0800 Subject: [PATCH 3/5] auto set cpu environment in V2 API --- python/paddle/v2/__init__.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py index 3d70513843..a55b9d7a21 100644 --- a/python/paddle/v2/__init__.py +++ b/python/paddle/v2/__init__.py @@ -78,6 +78,31 @@ def init(**kwargs): for key in args_dict.keys(): args.append('--%s=%s' % (key, str(args_dict[key]))) + # auto set cpu environment + def set_env(key, value): + '''If the key has not been set in the environment, set it with value.''' + assert isinstance(key, str) + assert isinstance(value, str) + envset = os.environ.get(key) + if envset is None: + os.environ[key] = value + + ht = os.popen("lscpu |grep \"per core\"|awk -F':' '{print $2}'|xargs") + ht = int(ht.read()) + if ht == 1: # ht is off + set_env("OMP_DYNAMIC", "false") + set_env("KMP_AFFINITY", "granularity=fine,compact,0,0") + else: + set_env("OMP_DYNAMIC", "true") + set_env("KMP_AFFINITY", "granularity=fine,compact,1,0") + processors = os.popen("grep \"processor\" /proc/cpuinfo|sort -u|wc -l") + processors = int(processors.read()) + trainers = kwargs.get('trainer_count', 1) + threads = processors / trainers + threads = '1' if threads < 1 else str(threads) + set_env("OMP_NUM_THREADS", threads) + set_env("MKL_NUM_THREADS", threads) + if 'use_gpu' in kwargs: cp.g_command_config_args['use_gpu'] = kwargs['use_gpu'] if 'use_mkldnn' in kwargs: From a3b2b7b1c754f944db0fae8a015d84a5b1238652 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 15 Nov 2017 17:23:41 +0800 Subject: [PATCH 4/5] remove the hard code setting in benchmark scripts --- benchmark/paddle/image/run_mkldnn.sh | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/benchmark/paddle/image/run_mkldnn.sh b/benchmark/paddle/image/run_mkldnn.sh index a4527e0496..3cc779b48d 100755 --- a/benchmark/paddle/image/run_mkldnn.sh +++ b/benchmark/paddle/image/run_mkldnn.sh @@ -1,9 +1,7 @@ set -e function train() { - unset OMP_NUM_THREADS MKL_NUM_THREADS - export OMP_DYNAMIC="FALSE" - export KMP_AFFINITY="granularity=fine,compact,0,0" + unset OMP_NUM_THREADS MKL_NUM_THREADS OMP_DYNAMIC KMP_AFFINITY topology=$1 layer_num=$2 bs=$3 @@ -14,8 +12,6 @@ function train() { elif [ $4 == "False" ]; then thread=`nproc` # each trainer_count use only 1 core to avoid conflict - export OMP_NUM_THREADS=1 - export MKL_NUM_THREADS=1 log="logs/${topology}-${layer_num}-${thread}mklml-${bs}.log" else echo "Wrong input $3, use True or False." From 6337007ef5745977fdfdc9b6d051eefbd1e6260e Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 16 Nov 2017 11:35:36 +0800 Subject: [PATCH 5/5] add echo the result as comment --- paddle/scripts/submit_local.sh.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in index 1cc5078494..b9a49526a7 100755 --- a/paddle/scripts/submit_local.sh.in +++ b/paddle/scripts/submit_local.sh.in @@ -141,10 +141,12 @@ else: EOF cpu_config +# echo $KMP_AFFINITY $OMP_DYNAMIC case "$1" in "train") threads_config $@ + # echo $OMP_NUM_THREADS $MKL_NUM_THREADS ${DEBUGGER} $PADDLE_BIN_PATH/paddle_trainer ${@:2} ;; "merge_model")