diff --git a/model_zoo/official/cv/inceptionv3/README.md b/model_zoo/official/cv/inceptionv3/README.md index e970255436..ddb43fe20d 100644 --- a/model_zoo/official/cv/inceptionv3/README.md +++ b/model_zoo/official/cv/inceptionv3/README.md @@ -90,8 +90,6 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil ```python Major parameters in train.py and config.py are: 'random_seed' # fix random seed -'rank' # local rank of distributed -'group_size' # world size of distributed 'work_nums' # number of workers to read the data 'decay_method' # learning rate scheduler mode "loss_scale" # loss scale diff --git a/model_zoo/official/cv/inceptionv3/src/config.py b/model_zoo/official/cv/inceptionv3/src/config.py index 85462f4628..13ff78a79d 100644 --- a/model_zoo/official/cv/inceptionv3/src/config.py +++ b/model_zoo/official/cv/inceptionv3/src/config.py @@ -20,8 +20,6 @@ from easydict import EasyDict as edict config_gpu = edict({ 'random_seed': 1, - 'rank': 0, - 'group_size': 1, 'work_nums': 8, 'decay_method': 'cosine', "loss_scale": 1, @@ -47,8 +45,6 @@ config_gpu = edict({ config_ascend = edict({ 'random_seed': 1, - 'rank': 0, - 'group_size': 1, 'work_nums': 8, 'decay_method': 'cosine', "loss_scale": 1024, diff --git a/model_zoo/official/cv/resnext50/README.md b/model_zoo/official/cv/resnext50/README.md index c2b7a1c030..16a28f983a 100644 --- a/model_zoo/official/cv/resnext50/README.md +++ b/model_zoo/official/cv/resnext50/README.md @@ -4,9 +4,9 @@ - [Model Architecture](#model-architecture) - [Dataset](#dataset) - [Features](#features) - - [Mixed Precision](#mixed-precision) +- [Mixed Precision](#mixed-precision) - [Environment Requirements](#environment-requirements) -- [Quick Start](#quick-start) +- [Quick Start](#quick-start) - [Script Description](#script-description) - [Script and Sample Code](#script-and-sample-code) - [Script Parameters](#script-parameters) @@ -32,35 +32,33 @@ The overall network architecture of ResNeXt is show below: [Link](https://arxiv.org/abs/1611.05431) - # [Dataset](#contents) Dataset used: [imagenet](http://www.image-net.org/) - Dataset size: ~125G, 1.2W colorful images in 1000 classes - - Train: 120G, 1.2W images - - Test: 5G, 50000 images -- Data format: RGB images. - - Note: Data will be processed in src/dataset.py - +- Train: 120G, 1.2W images +- Test: 5G, 50000 images +- Data format: RGB images +- Note: Data will be processed in src/dataset.py # [Features](#contents) ## [Mixed Precision](#contents) The [mixed precision](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware. - + For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching ‘reduce precision’. # [Environment Requirements](#contents) - Hardware(Ascend/GPU) - - Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. +- Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. - Framework - - [MindSpore](https://www.mindspore.cn/install/en) +- [MindSpore](https://www.mindspore.cn/install/en) - For more information, please check the resources below: - - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) - - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) +- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) +- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) # [Script description](#contents) @@ -68,9 +66,9 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil ```python . -└─resnext50 +└─resnext50 ├─README.md - ├─scripts + ├─scripts ├─run_standalone_train.sh # launch standalone training for ascend(1p) ├─run_distribute_train.sh # launch distributed training for ascend(8p) ├─run_standalone_train_for_gpu.sh # launch standalone training for gpu(1p) @@ -100,14 +98,14 @@ For FP16 operators, if the input data type is FP32, the backend of MindSpore wil ├──train.py # train net ├──export.py # export mindir script ├──mindspore_hub_conf.py # mindspore hub interface - + ``` ## [Script Parameters](#contents) Parameters for both training and evaluating can be set in config.py. -``` +```config "image_height": '224,224' # image size "num_classes": 1000, # dataset class number "per_batch_size": 128, # batch size of input tensor @@ -118,7 +116,6 @@ Parameters for both training and evaluating can be set in config.py. "eta_min": 0, # eta_min in cosine_annealing scheduler "T_max": 150, # T-max in cosine_annealing scheduler "max_epoch": 150, # max epoch num to train the model -"backbone": 'resnext50', # backbone metwork "warmup_epochs" : 1, # warmup epoch "weight_decay": 0.0001, # weight decay "momentum": 0.9, # momentum @@ -135,18 +132,18 @@ Parameters for both training and evaluating can be set in config.py. ## [Training Process](#contents) -#### Usage +### Usage You can start training by python script: -``` +```script python train.py --data_dir ~/imagenet/train/ --platform Ascend --is_distributed 0 ``` or shell stript: -``` -Ascend: +```script +Ascend: # distribute training example(8p) sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH # standalone training @@ -180,16 +177,17 @@ You can find checkpoint file together with result in log. You can start training by python script: -``` +```script python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained resnext.ckpt ``` or shell stript: -``` +```script # Evaluation sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM ``` + PLATFORM is Ascend or GPU, default is Ascend. #### Launch @@ -202,8 +200,8 @@ sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext50_100.ckp #### Result Evaluation result will be stored in the scripts path. Under this, you can find result like the followings in log. - -``` + +```log acc=78.16%(TOP1) acc=93.88%(TOP5) ``` @@ -212,7 +210,7 @@ acc=93.88%(TOP5) Change the export mode and export file in `src/config.py`, and run `export.py`. -``` +```script python export.py --platform PLATFORM --pretrained CKPT_PATH ``` @@ -242,17 +240,16 @@ python export.py --platform PLATFORM --pretrained CKPT_PATH | -------------------------- | ----------------------------- | ------------------------- | -------------------- | | Resource | Ascend 910 | NV SMX2 V100-32G | Ascend 310 | | uploaded Date | 06/30/2020 | 07/23/2020 | 07/23/2020 | -| MindSpore Version | 0.5.0 | 0.6.0 | 0.6.0 | +| MindSpore Version | 0.5.0 | 0.6.0 | 0.6.0 | | Dataset | ImageNet, 1.2W | ImageNet, 1.2W | ImageNet, 1.2W | | batch_size | 1 | 1 | 1 | | outputs | probability | probability | probability | | Accuracy | acc=78.16%(TOP1) | acc=78.05%(TOP1) | | - # [Description of Random Situation](#contents) In dataset.py, we set the seed inside “create_dataset" function. We also use random seed in train.py. # [ModelZoo Homepage](#contents) - -Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). + +Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). diff --git a/model_zoo/official/cv/resnext50/eval.py b/model_zoo/official/cv/resnext50/eval.py index 93c4dc2a93..7972993feb 100644 --- a/model_zoo/official/cv/resnext50/eval.py +++ b/model_zoo/official/cv/resnext50/eval.py @@ -74,7 +74,6 @@ def parse_args(cloud_args=None): args = merge_args(args, cloud_args) args.image_size = config.image_size args.num_classes = config.num_classes - args.backbone = config.backbone args.rank = config.rank args.group_size = config.group_size @@ -201,9 +200,7 @@ def test(cloud_args=None): max_epoch=1, rank=args.rank, group_size=args.group_size, mode='eval') eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True, num_epochs=1) - network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform) - if network is None: - raise NotImplementedError('not implement {}'.format(args.backbone)) + network = get_network(num_classes=args.num_classes, platform=args.platform) load_pretrain_model(model, network, args) diff --git a/model_zoo/official/cv/resnext50/export.py b/model_zoo/official/cv/resnext50/export.py index 09fcd59d79..b2dc045698 100644 --- a/model_zoo/official/cv/resnext50/export.py +++ b/model_zoo/official/cv/resnext50/export.py @@ -33,7 +33,6 @@ def parse_args(): args, _ = parser.parse_known_args() args.image_size = config.image_size args.num_classes = config.num_classes - args.backbone = config.backbone args.image_size = list(map(int, config.image_size.split(','))) args.image_height = args.image_size[0] @@ -46,7 +45,7 @@ if __name__ == '__main__': args_export = parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args_export.platform) - net = get_network(args_export.backbone, num_classes=args_export.num_classes, platform=args_export.platform) + net = get_network(num_classes=args_export.num_classes, platform=args_export.platform) param_dict = load_checkpoint(args_export.pretrained) load_param_into_net(net, param_dict) diff --git a/model_zoo/official/cv/resnext50/src/config.py b/model_zoo/official/cv/resnext50/src/config.py index 75a39966f2..4147aae0df 100644 --- a/model_zoo/official/cv/resnext50/src/config.py +++ b/model_zoo/official/cv/resnext50/src/config.py @@ -26,7 +26,6 @@ config = ed({ "eta_min": 0, "T_max": 150, "max_epoch": 150, - "backbone": 'resnext50', "warmup_epochs": 1, "weight_decay": 0.0001, diff --git a/model_zoo/official/cv/resnext50/src/image_classification.py b/model_zoo/official/cv/resnext50/src/image_classification.py index 37e17caad8..8245b90628 100644 --- a/model_zoo/official/cv/resnext50/src/image_classification.py +++ b/model_zoo/official/cv/resnext50/src/image_classification.py @@ -94,7 +94,5 @@ class Resnet(ImageClassificationNetwork): -def get_network(backbone_name, **kwargs): - if backbone_name in ['resnext50']: - return Resnet(backbone_name, **kwargs) - return None +def get_network(**kwargs): + return Resnet('resnext50', **kwargs) diff --git a/model_zoo/official/cv/resnext50/train.py b/model_zoo/official/cv/resnext50/train.py index fad8be1045..27e782da9c 100644 --- a/model_zoo/official/cv/resnext50/train.py +++ b/model_zoo/official/cv/resnext50/train.py @@ -131,7 +131,6 @@ def parse_args(cloud_args=None): args.eta_min = config.eta_min args.T_max = config.T_max args.max_epoch = config.max_epoch - args.backbone = config.backbone args.warmup_epochs = config.warmup_epochs args.weight_decay = config.weight_decay args.momentum = config.momentum @@ -213,9 +212,7 @@ def train(cloud_args=None): # network args.logger.important_info('start create network') # get network and init - network = get_network(args.backbone, num_classes=args.num_classes, platform=args.platform) - if network is None: - raise NotImplementedError('not implement {}'.format(args.backbone)) + network = get_network(num_classes=args.num_classes, platform=args.platform) load_pretrain_model(args.pretrained, network, args)