update shufflenetv2 scripts

5 years ago · 121943bdb5
parent 8d41931456
commit 121943bdb5
7 changed files with 114 additions and 26 deletions
--- a/model_zoo/official/cv/shufflenetv2/Readme.md
+++ b/model_zoo/official/cv/shufflenetv2/Readme.md
@ -55,7 +55,7 @@ Dataset used: [imagenet](http://www.image-net.org/)
  +-- Readme.md     # descriptions about ShuffleNetV2
  +-- scripts
  ¦   +--run_distribute_train_for_gpu.sh   # shell script for distributed training
-  ¦   +--run_eval_for_multi_gpu.sh         # shell script for evaluation
+  ¦   +--run_eval_for_gpu.sh         # shell script for evaluation
  ¦   +--run_standalone_train_for_gpu.sh   # shell script for standalone training
  +-- src
  ¦   +--config.py      # parameter configuration
@ -75,23 +75,23 @@ Dataset used: [imagenet](http://www.image-net.org/)
 You can start training using python or shell scripts. The usage of shell scripts as follows:
- Ditributed training on GPU: sh run_distribute_train_for_gpu.sh [DATA_DIR]
+- Ditributed training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_ID] [DATA_DIR]
+- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DATASET_PATH]
 ### Launch
 ```
 # training example
  python:
-      GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed --platform 'GPU' --dataset_path '~/imagenet/train/' > train.log 2>&1 &
+      GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed=True --platform='GPU' --dataset_path='~/imagenet/train/' > train.log 2>&1 &
  shell:
-      GPU: sh run_distribute_train_for_gpu.sh ~/imagenet/train/
+      GPU: cd scripts & sh run_distribute_train_for_gpu.sh 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
 ```
 ### Result
-Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log  will be redirected to `./train/train.log`.
+Training result will be stored in the example path. Checkpoints will be stored at `./checkpoint` by default, and training log will be redirected to `./train/train.log`.
 ## [Eval process](#contents)
@ -99,21 +99,21 @@ Training result will be stored in the example path. Checkpoints will be stored a
 You can start evaluation using python or shell scripts. The usage of shell scripts as follows:
- GPU: sh run_eval_for_multi_gpu.sh [DEVICE_ID] [EPOCH]
+- GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
 ### Launch
 ``` 
 # infer example
  python:
-      GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform 'GPU' --dataset_path '~/imagenet/val/' --epoch 250 > eval.log 2>&1 &
+      GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform='GPU' --dataset_path='~/imagenet/val/' > eval.log 2>&1 &
  shell:
-      GPU: sh run_eval_for_multi_gpu.sh 0 250
+      GPU: cd scripts & sh run_eval_for_gpu.sh '~/imagenet/val/' 'checkpoint_file' 
 ```
 > checkpoint can be produced in training process.
 ### Result
-Inference result will be stored in the example path, you can find result in `val.log`.
+Inference result will be stored in the example path, you can find result in `eval.log`.
--- a/model_zoo/official/cv/shufflenetv2/eval.py
+++ b/model_zoo/official/cv/shufflenetv2/eval.py
@ -31,7 +31,6 @@ if __name__ == '__main__':
    parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)')
    parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
    parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
    parser.add_argument('--epoch', type=str, default='')
    args_opt = parser.parse_args()
    if args_opt.platform == 'Ascend':
@ -43,7 +42,7 @@ if __name__ == '__main__':
    ckpt = load_checkpoint(args_opt.checkpoint)
    load_param_into_net(net, ckpt)
    net.set_train(False)
-    dataset = create_dataset(args_opt.dataset_path, cfg, False)
+    dataset = create_dataset(args_opt.dataset_path, False, 0, 1)
    loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False,
                                            smooth_factor=0.1, num_classes=cfg.num_classes)
    eval_metrics = {'Loss': nn.Loss(),
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_distribute_train_for_gpu.sh
@ -13,5 +13,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-DATA_DIR=$1
+if [ $# -lt 3 ]
-mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
+then
    echo "Usage: \
          sh run_distribute_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] \
          "
 exit 1
 fi
 if [ $1 -lt 1 ] && [ $1 -gt 8 ]
 then
    echo "error: DEVICE_NUM=$1 is not in (1-8)"
 exit 1
 fi
 # check dataset file
 if [ ! -d $3 ]
 then
    echo "error: DATASET_PATH=$3 is not a directory"    
 exit 1
 fi
 export DEVICE_NUM=$1
 export RANK_SIZE=$1
 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 if [ -d "../train" ];
 then
    rm -rf ../train
 fi
 mkdir ../train
 cd ../train || exit
 export CUDA_VISIBLE_DEVICES="$2"
 if [ $1 -gt 1 ]
 then
    mpirun -n $1 --allow-run-as-root \
    python ${BASEPATH}/../train.py --platform='GPU' --is_distributed=True --dataset_path=$3 > train.log 2>&1 &
 else
    python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$3 > train.log 2>&1 &
 fi
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_eval_for_multi_gpu.sh
@ -13,6 +13,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-DEVICE_ID=$1
+if [ $# != 2 ]
-EPOCH=$2
+then
-CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path '/home/data/ImageNet_Original/val/' --epoch $EPOCH > eval.log 2>&1 &
+    echo "GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]"
 exit 1
 fi
 # check dataset file
 if [ ! -d $1 ]
 then
    echo "error: DATASET_PATH=$1 is not a directory"    
 exit 1
 fi
 # check checkpoint file
 if [ ! -f $2 ]
 then
    echo "error: CHECKPOINT_PATH=$2 is not a file"    
 exit 1
 fi
 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 export DEVICE_ID=0
 if [ -d "../eval" ];
 then
    rm -rf ../eval
 fi
 mkdir ../eval
 cd ../eval || exit
 python ${BASEPATH}/../eval.py --dataset_path=$1 --checkpoint=$2 > ./eval.log 2>&1 &
--- a/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
+++ b/model_zoo/official/cv/shufflenetv2/scripts/run_standalone_train_for_gpu.sh
@ -13,6 +13,28 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
-DEVICE_ID=$1
+if [ $# -lt 1 ]
-DATA_DIR=$2
+then
-CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
+    echo "Usage: \
          sh run_standalone_train_for_gpu.sh [DATASET_PATH] \
          "
 exit 1
 fi
 # check dataset file
 if [ ! -d $1 ]
 then
    echo "error: DATASET_PATH=$1 is not a directory"    
 exit 1
 fi
 BASEPATH=$(cd "`dirname $0`" || exit; pwd)
 export PYTHONPATH=${BASEPATH}:$PYTHONPATH
 if [ -d "../train" ];
 then
    rm -rf ../train
 fi
 mkdir ../train
 cd ../train || exit
 python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$1 > train.log 2>&1 &
--- a/model_zoo/official/cv/shufflenetv2/src/dataset.py
+++ b/model_zoo/official/cv/shufflenetv2/src/dataset.py
@ -75,7 +75,5 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
    # apply batch operations
    ds = ds.batch(cfg.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)
    return ds
--- a/model_zoo/official/cv/shufflenetv2/train.py
+++ b/model_zoo/official/cv/shufflenetv2/train.py
@ -14,6 +14,7 @@
 # ============================================================================
 """train_imagenet."""
 import argparse
 import ast
 import os
 import random
 import numpy as np
@ -23,7 +24,7 @@ from network import ShuffleNetV2
 import mindspore.nn as nn
 from mindspore import context
 from mindspore import dataset as de
-from mindspore import ParallelMode
+from mindspore.context import ParallelMode
 from mindspore import Tensor
 from mindspore.communication.management import init, get_rank, get_group_size
 from mindspore.nn.optim.momentum import Momentum
@ -42,10 +43,9 @@ de.config.set_seed(cfg.random_seed)
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='image classification training')
-    parser.add_argument('--dataset_path', type=str, default='/home/data/imagenet_jpeg/train/', help='Dataset path')
+    parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
    parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
-    parser.add_argument('--is_distributed', action='store_true', default=False,
+    parser.add_argument('--is_distributed', type=ast.literal_eval, default=False, help='distributed training')
                        help='distributed training')
    parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
    parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter')
    args_opt = parser.parse_args()