Paddle/benchmark/fluid/run.sh

#!/bin/bash
# This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU.

mkdir -p logs
#export FLAGS_fraction_of_gpu_memory_to_use=0.0
export CUDNN_PATH=/paddle/cudnn_v5

# disable openmp and mkl parallel
#https://github.com/PaddlePaddle/Paddle/issues/7199
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
if [ $ht -eq 1 ]; then # HT is OFF
    if [ -z "$KMP_AFFINITY" ]; then
        export KMP_AFFINITY="granularity=fine,compact,0,0"
    fi
    if [ -z "$OMP_DYNAMIC" ]; then
        export OMP_DYNAMIC="FALSE"
    fi
else # HT is ON
    if [ -z "$KMP_AFFINITY" ]; then
        export KMP_AFFINITY="granularity=fine,compact,1,0"
    fi
fi
# disable multi-gpu if have more than one
export CUDA_VISIBLE_DEVICES=0
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$CUDNN_PATH:$LD_LIBRARY_PATH

# only query the gpu used
nohup stdbuf -oL nvidia-smi \
      --id=${CUDA_VISIBLE_DEVICES} \
      --query-gpu=timestamp \
      --query-compute-apps=pid,process_name,used_memory \
      --format=csv \
      --filename=mem.log  \
      -l 1 &

# mnist
# mnist gpu mnist 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=mnist \
               --device=GPU \
               --batch_size=128 \
               --skip_batch_num=5 \
               --iterations=500 \
               2>&1 | tee -a logs/mnist_gpu_128.log

# vgg16
# gpu cifar10 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=vgg16 \
               --device=GPU \
               --batch_size=128 \
               --skip_batch_num=5 \
               --iterations=30 \
               2>&1 | tee -a logs/vgg16_gpu_128.log

# flowers gpu  128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=vgg16 \
               --device=GPU \
               --batch_size=32 \
               --data_set=flowers \
               --skip_batch_num=5 \
               --iterations=30 \
               2>&1 | tee -a logs/vgg16_gpu_flowers_32.log

# resnet50
# resnet50 gpu cifar10 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=resnet \
               --device=GPU \
               --batch_size=128 \
               --data_set=cifar10 \
               --skip_batch_num=5 \
               --iterations=30 \
               2>&1 | tee -a logs/resnet50_gpu_128.log

# resnet50 gpu flowers 64
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=resnet \
               --device=GPU \
               --batch_size=64 \
               --data_set=flowers \
               --skip_batch_num=5 \
               --iterations=30 \
               2>&1 | tee -a logs/resnet50_gpu_flowers_64.log

# lstm
# lstm gpu imdb 32 # tensorflow only support batch=32
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=stacked_dynamic_lstm \
               --device=GPU \
               --batch_size=32 \
               --skip_batch_num=5 \
               --iterations=30 \
               2>&1 | tee -a logs/lstm_gpu_32.log

# seq2seq
# seq2seq gpu wmb 128
FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
               --model=machine_translation \
               --device=GPU \
               --batch_size=128 \
               --skip_batch_num=5 \
               --iterations=30 \
               2>&1 | tee -a logs/lstm_gpu_128.log
init (#9462) 7 years ago			`#!/bin/bash`
			`# This script benchmarking the PaddlePaddle Fluid on`
			`# single thread single GPU.`
"add auto feature" (#9760) 7 years ago
refine benchmark 7 years ago			`mkdir -p logs`
"add auto feature" (#9760) 7 years ago			`#export FLAGS_fraction_of_gpu_memory_to_use=0.0`
			`export CUDNN_PATH=/paddle/cudnn_v5`
init (#9462) 7 years ago
			`# disable openmp and mkl parallel`
			`#https://github.com/PaddlePaddle/Paddle/issues/7199`
			`export MKL_NUM_THREADS=1`
			`export OMP_NUM_THREADS=1`
			ht=`lscpu \|grep "per core"\|awk -F':' '{print $2}'\|xargs`
			`if [ $ht -eq 1 ]; then # HT is OFF`
			`if [ -z "$KMP_AFFINITY" ]; then`
			`export KMP_AFFINITY="granularity=fine,compact,0,0"`
			`fi`
			`if [ -z "$OMP_DYNAMIC" ]; then`
			`export OMP_DYNAMIC="FALSE"`
			`fi`
			`else # HT is ON`
			`if [ -z "$KMP_AFFINITY" ]; then`
			`export KMP_AFFINITY="granularity=fine,compact,1,0"`
			`fi`
			`fi`
			`# disable multi-gpu if have more than one`
			`export CUDA_VISIBLE_DEVICES=0`
			`export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH`
			`export LD_LIBRARY_PATH=$CUDNN_PATH:$LD_LIBRARY_PATH`

"add auto feature" (#9760) 7 years ago			`# only query the gpu used`
			`nohup stdbuf -oL nvidia-smi \`
			`--id=${CUDA_VISIBLE_DEVICES} \`
			`--query-gpu=timestamp \`
			`--query-compute-apps=pid,process_name,used_memory \`
			`--format=csv \`
			`--filename=mem.log \`
			`-l 1 &`
refine benchmark 7 years ago
"add auto feature" (#9760) 7 years ago			`# mnist`
			`# mnist gpu mnist 128`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
			`--model=mnist \`
"add auto feature" (#9760) 7 years ago			`--device=GPU \`
			`--batch_size=128 \`
			`--skip_batch_num=5 \`
			`--iterations=500 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/mnist_gpu_128.log`
init (#9462) 7 years ago
			`# vgg16`
"add auto feature" (#9760) 7 years ago			`# gpu cifar10 128`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
			`--model=vgg16 \`
init (#9462) 7 years ago			`--device=GPU \`
			`--batch_size=128 \`
			`--skip_batch_num=5 \`
"add auto feature" (#9760) 7 years ago			`--iterations=30 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/vgg16_gpu_128.log`
"add auto feature" (#9760) 7 years ago
			`# flowers gpu 128`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
			`--model=vgg16 \`
"add auto feature" (#9760) 7 years ago			`--device=GPU \`
			`--batch_size=32 \`
			`--data_set=flowers \`
			`--skip_batch_num=5 \`
			`--iterations=30 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/vgg16_gpu_flowers_32.log`
init (#9462) 7 years ago
			`# resnet50`
			`# resnet50 gpu cifar10 128`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
refine benchmark 7 years ago			`--model=resnet \`
init (#9462) 7 years ago			`--device=GPU \`
			`--batch_size=128 \`
			`--data_set=cifar10 \`
			`--skip_batch_num=5 \`
			`--iterations=30 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/resnet50_gpu_128.log`
"add auto feature" (#9760) 7 years ago
			`# resnet50 gpu flowers 64`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
refine benchmark 7 years ago			`--model=resnet \`
"add auto feature" (#9760) 7 years ago			`--device=GPU \`
			`--batch_size=64 \`
			`--data_set=flowers \`
			`--skip_batch_num=5 \`
			`--iterations=30 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/resnet50_gpu_flowers_64.log`
init (#9462) 7 years ago
			`# lstm`
"add auto feature" (#9760) 7 years ago			`# lstm gpu imdb 32 # tensorflow only support batch=32`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
			`--model=stacked_dynamic_lstm \`
"add auto feature" (#9760) 7 years ago			`--device=GPU \`
			`--batch_size=32 \`
			`--skip_batch_num=5 \`
			`--iterations=30 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/lstm_gpu_32.log`
"add auto feature" (#9760) 7 years ago
			`# seq2seq`
			`# seq2seq gpu wmb 128`
Add fluid benchmark Dockerfile (#11095) * add fluid benchmark Dockerfile * add_fluid_benchmark_dockerfile 7 years ago			`FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \`
			`--model=machine_translation \`
"add auto feature" (#9760) 7 years ago			`--device=GPU \`
			`--batch_size=128 \`
			`--skip_batch_num=5 \`
			`--iterations=30 \`
refine benchmark 7 years ago			`2>&1 \| tee -a logs/lstm_gpu_128.log`