update shufflenetv2 scripts

pull/5601/head
panfengfeng 5 years ago
parent 8d41931456
commit 121943bdb5

@ -55,7 +55,7 @@ Dataset used: [imagenet](http://www.image-net.org/)
+-- Readme.md # descriptions about ShuffleNetV2 +-- Readme.md # descriptions about ShuffleNetV2
+-- scripts +-- scripts
¦ +--run_distribute_train_for_gpu.sh # shell script for distributed training ¦ +--run_distribute_train_for_gpu.sh # shell script for distributed training
¦ +--run_eval_for_multi_gpu.sh # shell script for evaluation ¦ +--run_eval_for_gpu.sh # shell script for evaluation
¦ +--run_standalone_train_for_gpu.sh # shell script for standalone training ¦ +--run_standalone_train_for_gpu.sh # shell script for standalone training
+-- src +-- src
¦ +--config.py # parameter configuration ¦ +--config.py # parameter configuration
@ -75,23 +75,23 @@ Dataset used: [imagenet](http://www.image-net.org/)
You can start training using python or shell scripts. The usage of shell scripts as follows: You can start training using python or shell scripts. The usage of shell scripts as follows:
- Ditributed training on GPU: sh run_distribute_train_for_gpu.sh [DATA_DIR] - Ditributed training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
- Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DEVICE_ID] [DATA_DIR] - Standalone training on GPU: sh run_standalone_train_for_gpu.sh [DATASET_PATH]
### Launch ### Launch
``` ```
# training example # training example
python: python:
GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed --platform 'GPU' --dataset_path '~/imagenet/train/' > train.log 2>&1 & GPU: mpirun --allow-run-as-root -n 8 python train.py --is_distributed=True --platform='GPU' --dataset_path='~/imagenet/train/' > train.log 2>&1 &
shell: shell:
GPU: sh run_distribute_train_for_gpu.sh ~/imagenet/train/ GPU: cd scripts & sh run_distribute_train_for_gpu.sh 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
``` ```
### Result ### Result
Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log`. Training result will be stored in the example path. Checkpoints will be stored at `./checkpoint` by default, and training log will be redirected to `./train/train.log`.
## [Eval process](#contents) ## [Eval process](#contents)
@ -99,21 +99,21 @@ Training result will be stored in the example path. Checkpoints will be stored a
You can start evaluation using python or shell scripts. The usage of shell scripts as follows: You can start evaluation using python or shell scripts. The usage of shell scripts as follows:
- GPU: sh run_eval_for_multi_gpu.sh [DEVICE_ID] [EPOCH] - GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]
### Launch ### Launch
``` ```
# infer example # infer example
python: python:
GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform 'GPU' --dataset_path '~/imagenet/val/' --epoch 250 > eval.log 2>&1 & GPU: CUDA_VISIBLE_DEVICES=0 python eval.py --platform='GPU' --dataset_path='~/imagenet/val/' > eval.log 2>&1 &
shell: shell:
GPU: sh run_eval_for_multi_gpu.sh 0 250 GPU: cd scripts & sh run_eval_for_gpu.sh '~/imagenet/val/' 'checkpoint_file'
``` ```
> checkpoint can be produced in training process. > checkpoint can be produced in training process.
### Result ### Result
Inference result will be stored in the example path, you can find result in `val.log`. Inference result will be stored in the example path, you can find result in `eval.log`.

@ -31,7 +31,6 @@ if __name__ == '__main__':
parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)') parser.add_argument('--checkpoint', type=str, default='', help='checkpoint of ShuffleNetV2 (Default: None)')
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform') parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
parser.add_argument('--epoch', type=str, default='')
args_opt = parser.parse_args() args_opt = parser.parse_args()
if args_opt.platform == 'Ascend': if args_opt.platform == 'Ascend':
@ -43,7 +42,7 @@ if __name__ == '__main__':
ckpt = load_checkpoint(args_opt.checkpoint) ckpt = load_checkpoint(args_opt.checkpoint)
load_param_into_net(net, ckpt) load_param_into_net(net, ckpt)
net.set_train(False) net.set_train(False)
dataset = create_dataset(args_opt.dataset_path, cfg, False) dataset = create_dataset(args_opt.dataset_path, False, 0, 1)
loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False, loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False,
smooth_factor=0.1, num_classes=cfg.num_classes) smooth_factor=0.1, num_classes=cfg.num_classes)
eval_metrics = {'Loss': nn.Loss(), eval_metrics = {'Loss': nn.Loss(),

@ -13,5 +13,45 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
DATA_DIR=$1 if [ $# -lt 3 ]
mpirun --allow-run-as-root -n 8 python ./train.py --is_distributed --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 & then
echo "Usage: \
sh run_distribute_train_for_gpu.sh [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] \
"
exit 1
fi
if [ $1 -lt 1 ] && [ $1 -gt 8 ]
then
echo "error: DEVICE_NUM=$1 is not in (1-8)"
exit 1
fi
# check dataset file
if [ ! -d $3 ]
then
echo "error: DATASET_PATH=$3 is not a directory"
exit 1
fi
export DEVICE_NUM=$1
export RANK_SIZE=$1
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "../train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
export CUDA_VISIBLE_DEVICES="$2"
if [ $1 -gt 1 ]
then
mpirun -n $1 --allow-run-as-root \
python ${BASEPATH}/../train.py --platform='GPU' --is_distributed=True --dataset_path=$3 > train.log 2>&1 &
else
python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$3 > train.log 2>&1 &
fi

@ -13,6 +13,35 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
DEVICE_ID=$1 if [ $# != 2 ]
EPOCH=$2 then
CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./eval.py --platform 'GPU' --dataset_path '/home/data/ImageNet_Original/val/' --epoch $EPOCH > eval.log 2>&1 & echo "GPU: sh run_eval_for_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi
# check dataset file
if [ ! -d $1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi
# check checkpoint file
if [ ! -f $2 ]
then
echo "error: CHECKPOINT_PATH=$2 is not a file"
exit 1
fi
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
export DEVICE_ID=0
if [ -d "../eval" ];
then
rm -rf ../eval
fi
mkdir ../eval
cd ../eval || exit
python ${BASEPATH}/../eval.py --dataset_path=$1 --checkpoint=$2 > ./eval.log 2>&1 &

@ -13,6 +13,28 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
DEVICE_ID=$1 if [ $# -lt 1 ]
DATA_DIR=$2 then
CUDA_VISIBLE_DEVICES=$DEVICE_ID python ./train.py --platform 'GPU' --dataset_path $DATA_DIR > train.log 2>&1 & echo "Usage: \
sh run_standalone_train_for_gpu.sh [DATASET_PATH] \
"
exit 1
fi
# check dataset file
if [ ! -d $1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "../train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
python ${BASEPATH}/../train.py --platform='GPU' --dataset_path=$1 > train.log 2>&1 &

@ -75,7 +75,5 @@ def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums) ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=cfg.work_nums)
# apply batch operations # apply batch operations
ds = ds.batch(cfg.batch_size, drop_remainder=True) ds = ds.batch(cfg.batch_size, drop_remainder=True)
# apply dataset repeat operation
ds = ds.repeat(repeat_num)
return ds return ds

@ -14,6 +14,7 @@
# ============================================================================ # ============================================================================
"""train_imagenet.""" """train_imagenet."""
import argparse import argparse
import ast
import os import os
import random import random
import numpy as np import numpy as np
@ -23,7 +24,7 @@ from network import ShuffleNetV2
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context from mindspore import context
from mindspore import dataset as de from mindspore import dataset as de
from mindspore import ParallelMode from mindspore.context import ParallelMode
from mindspore import Tensor from mindspore import Tensor
from mindspore.communication.management import init, get_rank, get_group_size from mindspore.communication.management import init, get_rank, get_group_size
from mindspore.nn.optim.momentum import Momentum from mindspore.nn.optim.momentum import Momentum
@ -42,10 +43,9 @@ de.config.set_seed(cfg.random_seed)
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='image classification training') parser = argparse.ArgumentParser(description='image classification training')
parser.add_argument('--dataset_path', type=str, default='/home/data/imagenet_jpeg/train/', help='Dataset path') parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint') parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
parser.add_argument('--is_distributed', action='store_true', default=False, parser.add_argument('--is_distributed', type=ast.literal_eval, default=False, help='distributed training')
help='distributed training')
parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform') parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform')
parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter') parser.add_argument('--model_size', type=str, default='1.0x', help='ShuffleNetV2 model size parameter')
args_opt = parser.parse_args() args_opt = parser.parse_args()

Loading…
Cancel
Save