update shufflenetv2 scripts

pull/5601/head
panfengfeng 5 years ago
parent 8d41931456
commit 121943bdb5

@ -55,7 +55,7 @@ Dataset used: [imagenet](http://www.image-net.org/)
+-- Readme.md # descriptions about ShuffleNetV2
+-- scripts
¦ +--run_distribute_train_for_gpu.sh # shell script for distributed training
¦ +--run_eval_for_multi_gpu.sh # shell script for evaluation
¦ +--run_eval_for_gpu.sh # shell script for evaluation
¦ +--run_standalone_train_for_gpu.sh # shell script for standalone training
+-- src
¦ +--config.py # parameter configuration
@ -75,23 +75,23 @@ Dataset used: [imagenet](http://www.image-net.org/)
You can start training using python or shell scripts. The usage of shell scripts as follows:
- Ditributed training on GPU: sh run_distribute_train_for_gpu.sh [DATA_DIR]
- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_ID] [DATA_DIR]
- Ditributed training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DATASET_PATH]
### Launch
```
# training example
python:
GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed --platform 'GPU' --dataset_path '~/imagenet/train/' > train.log 2>&1 &
GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed=True --platform='GPU' --dataset_path='~/imagenet/train/' > train.log 2>&1 &
shell:
GPU: sh run_distribute_train_for_gpu.sh ~/imagenet/train/
GPU: cd scripts & sh run_distribute_train_for_gpu.sh 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
```
### Result
Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log`.
Training result will be stored in the example path. Checkpoints will be stored at `./checkpoint` by default, and training log will be redirected to `./train/train.log`.
## [Eval process](#contents)
@ -99,21 +99,21 @@ Training result will be stored in the example path. Checkpoints will be stored a
You can start evaluation using python or shell scripts. The usage of shell scripts as follows:
- GPU: sh run_eval_for_multi_gpu.sh [DEVICE_ID] [EPOCH]
- GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
### Launch
```
# infer example
python:
GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform 'GPU' --dataset_path '~/imagenet/val/' --epoch 250 > eval.log 2>&1 &
GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform='GPU' --dataset_path='~/imagenet/val/' > eval.log 2>&1 &
shell:
GPU: sh run_eval_for_multi_gpu.sh 0 250
GPU: cd scripts & sh run_eval_for_gpu.sh '~/imagenet/val/' 'checkpoint_file'
```
> checkpoint can be produced in training process.
### Result
Inference result will be stored in the example path, you can find result in `val.log`.
Inference result will be stored in the example path, you can find result in `eval.log`.

@ -31,7 +31,6 @@ if __name__ == '__main__':
parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)')
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
parser.add_argument('--epoch', type=str, default='')
args_opt = parser.parse_args()
if args_opt.platform == 'Ascend':
@ -43,7 +42,7 @@ if __name__ == '__main__':
ckpt = load_checkpoint(args_opt.checkpoint)
load_param_into_net(net, ckpt)
net.set_train(False)
dataset = create_dataset(args_opt.dataset_path, cfg, False)
dataset = create_dataset(args_opt.dataset_path, False, 0, 1)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False,
smooth_factor=0.1, num_classes=cfg.num_classes)
eval_metrics = {'Loss': nn.Loss(),

@ -13,5 +13,45 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
DATA_DIR=$1
mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
if [ $# -lt 3 ]
then
echo "Usage: \
sh run_distribute_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] \
"
exit 1
fi
if [ $1 -lt 1 ] && [ $1 -gt 8 ]
then
echo "error: DEVICE_NUM=$1 is not in (1-8)"
exit 1
fi
# check dataset file
if [ ! -d $3 ]
then
echo "error: DATASET_PATH=$3 is not a directory"
exit 1
fi
export DEVICE_NUM=$1
export RANK_SIZE=$1
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "../train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
export CUDA_VISIBLE_DEVICES="$2"
if [ $1 -gt 1 ]
then
mpirun -n $1 --allow-run-as-root \
python ${BASEPATH}/../train.py --platform='GPU' --is_distributed=True --dataset_path=$3 > train.log 2>&1 &
else
python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$3 > train.log 2>&1 &
fi

@ -13,6 +13,35 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
DEVICE_ID=$1
EPOCH=$2
CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path '/home/data/ImageNet_Original/val/' --epoch $EPOCH > eval.log 2>&1 &
if [ $# != 2 ]
then
echo "GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi
# check dataset file
if [ ! -d $1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi
# check checkpoint file
if [ ! -f $2 ]
then
echo "error: CHECKPOINT_PATH=$2 is not a file"
exit 1
fi
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
export DEVICE_ID=0
if [ -d "../eval" ];
then
rm -rf ../eval
fi
mkdir ../eval
cd ../eval || exit
python ${BASEPATH}/../eval.py --dataset_path=$1 --checkpoint=$2 > ./eval.log 2>&1 &

@ -13,6 +13,28 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
DEVICE_ID=$1
DATA_DIR=$2
CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 &
if [ $# -lt 1 ]
then
echo "Usage: \
sh run_standalone_train_for_gpu.sh [DATASET_PATH] \
"
exit 1
fi
# check dataset file
if [ ! -d $1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "../train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$1 > train.log 2>&1 &

@ -75,7 +75,5 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
# apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True)
# apply dataset repeat operation
ds = ds.repeat(repeat_num)
return ds

@ -14,6 +14,7 @@
# ============================================================================
"""train_imagenet."""
import argparse
import ast
import os
import random
import numpy as np
@ -23,7 +24,7 @@ from network import ShuffleNetV2
import mindspore.nn as nn
from mindspore import context
from mindspore import dataset as de
from mindspore import ParallelMode
from mindspore.context import ParallelMode
from mindspore import Tensor
from mindspore.communication.management import init, get_rank, get_group_size
from mindspore.nn.optim.momentum import Momentum
@ -42,10 +43,9 @@ de.config.set_seed(cfg.random_seed)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='image classification training')
parser.add_argument('--dataset_path', type=str, default='/home/data/imagenet_jpeg/train/', help='Dataset path')
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
parser.add_argument('--is_distributed', action='store_true', default=False,
help='distributed training')
parser.add_argument('--is_distributed', type=ast.literal_eval, default=False, help='distributed training')
parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter')
args_opt = parser.parse_args()

Loading…
Cancel
Save