From 121943bdb5971ee2924aec86c0b708c95b4ad2f2 Mon Sep 17 00:00:00 2001
From: panfengfeng <panfengfeng@huawei.com>
Date: Mon, 31 Aug 2020 16:41:05 +0800
Subject: [PATCH] update shufflenetv2 scripts

---
 model_zoo/official/cv/shufflenetv2/Readme.md  | 20 ++++-----
 model_zoo/official/cv/shufflenetv2/eval.py    |  3 +-
 .../scripts/run_distribute_train_for_gpu.sh   | 44 ++++++++++++++++++-
 ...l_for_multi_gpu.sh => run_eval_for_gpu.sh} | 35 +++++++++++++--
 .../scripts/run_standalone_train_for_gpu.sh   | 28 ++++++++++--
 .../official/cv/shufflenetv2/src/dataset.py   |  2 -
 model_zoo/official/cv/shufflenetv2/train.py   |  8 ++--
 7 files changed, 114 insertions(+), 26 deletions(-)
 rename model_zoo/official/cv/shufflenetv2/scripts/{run_eval_for_multi_gpu.sh => run_eval_for_gpu.sh} (53%)

diff --git a/model_zoo/official/cv/shufflenetv2/Readme.md b/model_zoo/official/cv/shufflenetv2/Readme.md
index 23291073d9..ff83b656d4 100644
--- a/model_zoo/official/cv/shufflenetv2/Readme.md
+++ b/model_zoo/official/cv/shufflenetv2/Readme.md
@@ -55,7 +55,7 @@ Dataset used: [imagenet](http://www.image-net.org/)
   +-- Readme.md     # descriptions about ShuffleNetV2
   +-- scripts
   Ś   +--run_distribute_train_for_gpu.sh   # shell script for distributed training
-  Ś   +--run_eval_for_multi_gpu.sh         # shell script for evaluation
+  Ś   +--run_eval_for_gpu.sh         # shell script for evaluation
   Ś   +--run_standalone_train_for_gpu.sh   # shell script for standalone training
   +-- src
   Ś   +--config.py      # parameter configuration
@@ -75,23 +75,23 @@ Dataset used: [imagenet](http://www.image-net.org/)
 
 You can start training using python or shell scripts. The usage of shell scripts as follows:
 
-- Ditributed training on GPU: sh run_distribute_train_for_gpu.sh [DATA_DIR]
-- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_ID] [DATA_DIR]
+- Ditributed training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
+- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DATASET_PATH]
 
 ### Launch
 
 ```
 # training example
   python:
-      GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed --platform 'GPU' --dataset_path '~/imagenet/train/' > train.log 2>&1 &
+      GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed=True --platform='GPU' --dataset_path='~/imagenet/train/' > train.log 2>&1 &
 
   shell:
-      GPU: sh run_distribute_train_for_gpu.sh ~/imagenet/train/
+      GPU: cd scripts & sh run_distribute_train_for_gpu.sh 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```
 
 ### Result
 
-Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log`.
+Training result will be stored in the example path. Checkpoints will be stored at `./checkpoint` by default, and training log will be redirected to `./train/train.log`.
 
 ## [Eval process](#contents)
 
@@ -99,21 +99,21 @@ Training result will be stored in the example path. Checkpoints will be stored a
 
 You can start evaluation using python or shell scripts. The usage of shell scripts as follows:
 
-- GPU: sh run_eval_for_multi_gpu.sh [DEVICE_ID] [EPOCH]
+- GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
 
 ### Launch
 
 ``` 
 # infer example
   python:
-      GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform 'GPU' --dataset_path '~/imagenet/val/' --epoch 250 > eval.log 2>&1 &
+      GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform='GPU' --dataset_path='~/imagenet/val/' > eval.log 2>&1 &
 
   shell:
-      GPU: sh run_eval_for_multi_gpu.sh 0 250
+      GPU: cd scripts & sh run_eval_for_gpu.sh '~/imagenet/val/' 'checkpoint_file' 
 ```
 
 > checkpoint can be produced in training process.
 
 ### Result
 
-Inference result will be stored in the example path, you can find result in `val.log`.
+Inference result will be stored in the example path, you can find result in `eval.log`.
diff --git a/model_zoo/official/cv/shufflenetv2/eval.py b/model_zoo/official/cv/shufflenetv2/eval.py
index 51a4ceea8a..fdbddcf376 100644
--- a/model_zoo/official/cv/shufflenetv2/eval.py
+++ b/model_zoo/official/cv/shufflenetv2/eval.py
@@ -31,7 +31,6 @@ if __name__ == '__main__':
     parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)')
     parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
     parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
-    parser.add_argument('--epoch', type=str, default='')
     args_opt = parser.parse_args()
 
     if args_opt.platform == 'Ascend':
@@ -43,7 +42,7 @@ if __name__ == '__main__':
     ckpt = load_checkpoint(args_opt.checkpoint)
     load_param_into_net(net, ckpt)
     net.set_train(False)
-    dataset = create_dataset(args_opt.dataset_path, cfg, False)
+    dataset = create_dataset(args_opt.dataset_path, False, 0, 1)
     loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False,
                                             smooth_factor=0.1, num_classes=cfg.num_classes)
     eval_metrics = {'Loss': nn.Loss(),
diff --git a/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh b/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
index 305f1dcfff..c3bfedeaf8 100644
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
@@ -13,5 +13,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-DATA_DIR=$1
-mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
+if [ $# -lt 3 ]
+then
+    echo "Usage: \
+          sh run_distribute_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] \
+          "
+exit 1
+fi
+
+if [ $1 -lt 1 ] && [ $1 -gt 8 ]
+then
+    echo "error: DEVICE_NUM=$1 is not in (1-8)"
+exit 1
+fi
+
+# check dataset file
+if [ ! -d $3 ]
+then
+    echo "error: DATASET_PATH=$3 is not a directory"    
+exit 1
+fi
+
+export DEVICE_NUM=$1
+export RANK_SIZE=$1
+
+BASEPATH=$(cd "`dirname $0`" || exit; pwd)
+export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+if [ -d "../train" ];
+then
+    rm -rf ../train
+fi
+mkdir ../train
+cd ../train || exit
+
+export CUDA_VISIBLE_DEVICES="$2"
+
+if [ $1 -gt 1 ]
+then
+    mpirun -n $1 --allow-run-as-root \
+    python ${BASEPATH}/../train.py --platform='GPU' --is_distributed=True --dataset_path=$3 > train.log 2>&1 &
+else
+    python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$3 > train.log 2>&1 &
+fi
diff --git a/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh b/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_gpu.sh
similarity index 53%
rename from model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh
rename to model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_gpu.sh
index 3d5c42a72a..af6492886a 100644
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_gpu.sh
@@ -13,6 +13,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-DEVICE_ID=$1
-EPOCH=$2
-CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path '/home/data/ImageNet_Original/val/' --epoch $EPOCH > eval.log 2>&1 &
+if [ $# != 2 ]
+then
+    echo "GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+# check dataset file
+if [ ! -d $1 ]
+then
+    echo "error: DATASET_PATH=$1 is not a directory"    
+exit 1
+fi
+
+# check checkpoint file
+if [ ! -f $2 ]
+then
+    echo "error: CHECKPOINT_PATH=$2 is not a file"    
+exit 1
+fi
+
+BASEPATH=$(cd "`dirname $0`" || exit; pwd)
+export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+export DEVICE_ID=0
+
+if [ -d "../eval" ];
+then
+    rm -rf ../eval
+fi
+mkdir ../eval
+cd ../eval || exit
+
+python ${BASEPATH}/../eval.py --dataset_path=$1 --checkpoint=$2 > ./eval.log 2>&1 &
diff --git a/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh b/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
index a007a96cb0..02da407d1c 100644
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
@@ -13,6 +13,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-DEVICE_ID=$1
-DATA_DIR=$2
-CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
+if [ $# -lt 1 ]
+then
+    echo "Usage: \
+          sh run_standalone_train_for_gpu.sh [DATASET_PATH] \
+          "
+exit 1
+fi
+
+# check dataset file
+if [ ! -d $1 ]
+then
+    echo "error: DATASET_PATH=$1 is not a directory"    
+exit 1
+fi
+
+BASEPATH=$(cd "`dirname $0`" || exit; pwd)
+export PYTHONPATH=${BASEPATH}:$PYTHONPATH
+if [ -d "../train" ];
+then
+    rm -rf ../train
+fi
+mkdir ../train
+cd ../train || exit
+
+python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$1 > train.log 2>&1 &
diff --git a/model_zoo/official/cv/shufflenetv2/src/dataset.py b/model_zoo/official/cv/shufflenetv2/src/dataset.py
index 26b37d78d5..f67c37f0cc 100644
--- a/model_zoo/official/cv/shufflenetv2/src/dataset.py
+++ b/model_zoo/official/cv/shufflenetv2/src/dataset.py
@@ -75,7 +75,5 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
     ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
     # apply batch operations
     ds = ds.batch(cfg.batch_size, drop_remainder=True)
-    # apply dataset repeat operation
-    ds = ds.repeat(repeat_num)
 
     return ds
diff --git a/model_zoo/official/cv/shufflenetv2/train.py b/model_zoo/official/cv/shufflenetv2/train.py
index ac97fe5a3d..0d6560bcb0 100644
--- a/model_zoo/official/cv/shufflenetv2/train.py
+++ b/model_zoo/official/cv/shufflenetv2/train.py
@@ -14,6 +14,7 @@
 # ============================================================================
 """train_imagenet."""
 import argparse
+import ast
 import os
 import random
 import numpy as np
@@ -23,7 +24,7 @@ from network import ShuffleNetV2
 import mindspore.nn as nn
 from mindspore import context
 from mindspore import dataset as de
-from mindspore import ParallelMode
+from mindspore.context import ParallelMode
 from mindspore import Tensor
 from mindspore.communication.management import init, get_rank, get_group_size
 from mindspore.nn.optim.momentum import Momentum
@@ -42,10 +43,9 @@ de.config.set_seed(cfg.random_seed)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='image classification training')
-    parser.add_argument('--dataset_path', type=str, default='/home/data/imagenet_jpeg/train/', help='Dataset path')
+    parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
     parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
-    parser.add_argument('--is_distributed', action='store_true', default=False,
-                        help='distributed training')
+    parser.add_argument('--is_distributed', type=ast.literal_eval, default=False, help='distributed training')
     parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
     parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter')
     args_opt = parser.parse_args()