From 5825ff1418ad62c324427939b33bf6b09422d790 Mon Sep 17 00:00:00 2001 From: zhaoting Date: Mon, 21 Dec 2020 17:33:49 +0800 Subject: [PATCH] fix some errors and add resnext50 Chinese README --- model_zoo/official/cv/mobilenetv2/README.md | 10 +- .../official/cv/mobilenetv2/README_CN.md | 10 +- .../official/cv/mobilenetv2/src/config.py | 12 +- model_zoo/official/cv/resnet/eval.py | 2 +- model_zoo/official/cv/resnext50/README.md | 12 +- model_zoo/official/cv/resnext50/README_CN.md | 259 ++++++++++++++++++ model_zoo/official/cv/resnext50/src/config.py | 4 +- model_zoo/official/cv/ssd/src/dataset.py | 15 +- .../cv/cycle_gan/src/dataset/datasets.py | 1 + .../cycle_gan/src/utils/cityscapes_utils.py | 2 +- 10 files changed, 290 insertions(+), 37 deletions(-) create mode 100644 model_zoo/official/cv/resnext50/README_CN.md diff --git a/model_zoo/official/cv/mobilenetv2/README.md b/model_zoo/official/cv/mobilenetv2/README.md index 1dd5a1d841..2c5d4b8fa4 100644 --- a/model_zoo/official/cv/mobilenetv2/README.md +++ b/model_zoo/official/cv/mobilenetv2/README.md @@ -10,7 +10,7 @@ - [Script and Sample Code](#script-and-sample-code) - [Training Process](#training-process) - [Evaluation Process](#eval-process) - - [Export MindIR](#export-mindir) + - [Model Export](#model-export) - [Model Description](#model-description) - [Performance](#performance) - [Training Performance](#training-performance) @@ -182,14 +182,14 @@ Inference result will be stored in the example path, you can find result like th result: {'acc': 0.71976314102564111} ckpt=./ckpt_0/mobilenet-200_625.ckpt ``` -## [Export MindIR](#contents) - -Change the export mode and export file in `src/config.py`, and run `export.py`. +## [Model Export](#contents) ```shell -python export.py --platform [PLATFORM] --pretrain_ckpt [CKPT_PATH] +python export.py --platform [PLATFORM] --ckpt_file [CKPT_PATH] --file_format [EXPORT_FORMAT] ``` +`EXPORT_FORMAT` should be in ["AIR", "ONNX", "MINDIR"] + # [Model description](#contents) ## [Performance](#contents) diff --git a/model_zoo/official/cv/mobilenetv2/README_CN.md b/model_zoo/official/cv/mobilenetv2/README_CN.md index ebf0925daa..a1b39c382b 100644 --- a/model_zoo/official/cv/mobilenetv2/README_CN.md +++ b/model_zoo/official/cv/mobilenetv2/README_CN.md @@ -17,7 +17,7 @@ - [用法](#用法-1) - [启动](#启动-1) - [结果](#结果-1) - - [导出MINDIR](#导出MINDIR) + - [模型导出](#模型导出) - [模型描述](#模型描述) - [性能](#性能) - [训练性能](#训练性能) @@ -188,14 +188,14 @@ epoch time:138331.250, per step time:221.330, avg loss:3.917 result:{'acc':0.71976314102564111} ckpt=./ckpt_0/mobilenet-200_625.ckpt ``` -## 导出MINDIR - -修改`src/config.py`文件中的`export_mode`和`export_file`, 运行`export.py`。 +## 模型导出 ```shell -python export.py --platform [PLATFORM] --pretrain_ckpt [CKPT_PATH] +python export.py --platform [PLATFORM] --ckpt_file [CKPT_PATH] --file_format [EXPORT_FORMAT] ``` +`EXPORT_FORMAT` 可选 ["AIR", "ONNX", "MINDIR"]. + # 模型描述 ## 性能 diff --git a/model_zoo/official/cv/mobilenetv2/src/config.py b/model_zoo/official/cv/mobilenetv2/src/config.py index ecea8e8ad3..446cc454f7 100644 --- a/model_zoo/official/cv/mobilenetv2/src/config.py +++ b/model_zoo/official/cv/mobilenetv2/src/config.py @@ -41,9 +41,7 @@ def set_config(args): "save_checkpoint_path": "./", "platform": args.platform, "run_distribute": args.run_distribute, - "activation": "Softmax", - "export_format": "MINDIR", - "export_file": "mobilenetv2" + "activation": "Softmax" }) config_gpu = ed({ "num_classes": 1000, @@ -65,9 +63,7 @@ def set_config(args): "save_checkpoint_path": "./", "platform": args.platform, "run_distribute": args.run_distribute, - "activation": "Softmax", - "export_format": "MINDIR", - "export_file": "mobilenetv2" + "activation": "Softmax" }) config_ascend = ed({ "num_classes": 1000, @@ -92,9 +88,7 @@ def set_config(args): "rank_id": int(os.getenv('RANK_ID', '0')), "rank_size": int(os.getenv('RANK_SIZE', '1')), "run_distribute": int(os.getenv('RANK_SIZE', '1')) > 1., - "activation": "Softmax", - "export_format": "MINDIR", - "export_file": "mobilenetv2" + "activation": "Softmax" }) config = ed({"CPU": config_cpu, "GPU": config_gpu, diff --git a/model_zoo/official/cv/resnet/eval.py b/model_zoo/official/cv/resnet/eval.py index c85da20311..29bd56652e 100755 --- a/model_zoo/official/cv/resnet/eval.py +++ b/model_zoo/official/cv/resnet/eval.py @@ -56,7 +56,7 @@ if __name__ == '__main__': # init context context.set_context(mode=context.GRAPH_MODE, device_target=target, save_graphs=False) - if target != "GPU": + if target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id) diff --git a/model_zoo/official/cv/resnext50/README.md b/model_zoo/official/cv/resnext50/README.md index 16a28f983a..d3bc18e9af 100644 --- a/model_zoo/official/cv/resnext50/README.md +++ b/model_zoo/official/cv/resnext50/README.md @@ -12,7 +12,7 @@ - [Script Parameters](#script-parameters) - [Training Process](#training-process) - [Evaluation Process](#evaluation-process) - - [Export MindIR](#export-mindir) + - [Model Export](#model-export) - [Model Description](#model-description) - [Performance](#performance) - [Training Performance](#evaluation-performance) @@ -206,14 +206,14 @@ acc=78.16%(TOP1) acc=93.88%(TOP5) ``` -## [Export MindIR](#contents) +## [Model Export](#contents) -Change the export mode and export file in `src/config.py`, and run `export.py`. - -```script -python export.py --platform PLATFORM --pretrained CKPT_PATH +```shell +python export.py --device_target [PLATFORM] --ckpt_file [CKPT_PATH] --file_format [EXPORT_FORMAT] ``` +`EXPORT_FORMAT` should be in ["AIR", "ONNX", "MINDIR"] + # [Model description](#contents) ## [Performance](#contents) diff --git a/model_zoo/official/cv/resnext50/README_CN.md b/model_zoo/official/cv/resnext50/README_CN.md new file mode 100644 index 0000000000..41da9b0836 --- /dev/null +++ b/model_zoo/official/cv/resnext50/README_CN.md @@ -0,0 +1,259 @@ +# 目录 + +- [目录](#目录) +- [ResNeXt50说明](#resnext50说明) +- [模型架构](#模型架构) +- [数据集](#数据集) +- [特性](#特性) + - [混合精度](#混合精度) +- [环境要求](#环境要求) +- [脚本说明](#脚本说明) + - [脚本及样例代码](#脚本及样例代码) + - [脚本参数](#脚本参数) + - [训练过程](#训练过程) + - [用法](#用法) + - [样例](#样例) + - [评估过程](#评估过程) + - [用法](#用法-1) + - [样例](#样例-1) + - [结果](#结果) + - [模型导出](#模型导出) +- [模型描述](#模型描述) + - [性能](#性能) + - [训练性能](#训练性能) + - [推理性能](#推理性能) +- [随机情况说明](#随机情况说明) +- [ModelZoo主页](#modelzoo主页) + +# ResNeXt50说明 + +ResNeXt是一个简单、高度模块化的图像分类网络架构。ResNeXt的设计为统一的、多分支的架构,该架构仅需设置几个超参数。此策略提供了一个新维度,我们将其称为“基数”(转换集的大小),它是深度和宽度维度之外的一个重要因素。 + +[论文](https://arxiv.org/abs/1611.05431): Xie S, Girshick R, Dollár, Piotr, et al. Aggregated Residual Transformations for Deep Neural Networks. 2016. + +# 模型架构 + +ResNeXt整体网络架构如下: + +[链接](https://arxiv.org/abs/1611.05431) + +# 数据集 + +使用的数据集:[ImageNet](http://www.image-net.org/) + +- 数据集大小:约125G, 共1000个类,包含1.2万张彩色图像 + - 训练集:120G,1.2万张图像 + - 测试集:5G,5万张图像 +- 数据格式:RGB图像。 + - 注:数据在src/dataset.py中处理。 + +# 特性 + +## 混合精度 + +采用[混合精度](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/enable_mixed_precision.html)的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度,同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时,支持在特定硬件上训练更大的模型或实现更大批次的训练。 + +以FP16算子为例,如果输入数据类型为FP32,MindSpore后台会自动降低精度来处理数据。用户可打开INFO日志,搜索“reduce precision”查看精度降低的算子。 + +# 环境要求 + +- 硬件(Ascend或GPU) + - 准备Ascend或GPU处理器搭建硬件环境。如需试用昇腾处理器,请发送[申请表](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx)至ascend@huawei.com,审核通过即可获得资源。 +- 框架 + - [MindSpore](https://www.mindspore.cn/install) +- 如需查看详情,请参见如下资源: + - [MindSpore教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html) + +# 脚本说明 + +## 脚本及样例代码 + +```path +. +└─resnext50 + ├─README.md + ├─scripts + ├─run_standalone_train.sh # 启动Ascend单机训练(单卡) + ├─run_distribute_train.sh # 启动Ascend分布式训练(8卡) + ├─run_standalone_train_for_gpu.sh # 启动GPU单机训练(单卡) + ├─run_distribute_train_for_gpu.sh # 启动GPU分布式训练(8卡) + └─run_eval.sh # 启动评估 + ├─src + ├─backbone + ├─_init_.py # 初始化 + ├─resnet.py # ResNeXt50骨干 + ├─utils + ├─_init_.py # 初始化 + ├─cunstom_op.py # 网络操作 + ├─logging.py # 打印日志 + ├─optimizers_init_.py # 获取参数 + ├─sampler.py # 分布式采样器 + ├─var_init_.py # 计算增益值 + ├─_init_.py # 初始化 + ├─config.py # 参数配置 + ├─crossentropy.py # 交叉熵损失函数 + ├─dataset.py # 数据预处理 + ├─head.py # 常见头 + ├─image_classification.py # 获取ResNet + ├─linear_warmup.py # 线性热身学习率 + ├─warmup_cosine_annealing.py # 每次迭代的学习率 + ├─warmup_step_lr.py # 热身迭代学习率 + ├─eval.py # 评估网络 + ├──train.py # 训练网络 + ├──mindspore_hub_conf.py # MindSpore Hub接口 +``` + +## 脚本参数 + +在config.py中可以同时配置训练和评估参数。 + +```python +"image_height": '224,224' # 图像大小 +"num_classes": 1000, # 数据集类数 +"per_batch_size": 128, # 输入张量的批次大小 +"lr": 0.05, # 基础学习率 +"lr_scheduler": 'cosine_annealing', # 学习率模式 +"lr_epochs": '30,60,90,120', # LR变化轮次 +"lr_gamma": 0.1, # 减少LR的exponential lr_scheduler因子 +"eta_min": 0, # cosine_annealing调度器中的eta_min +"T_max": 150, # cosine_annealing调度器中的T-max +"max_epoch": 150, # 训练模型的最大轮次数量 +"backbone": 'resnext50', # 骨干网络 +"warmup_epochs" : 1, # 热身轮次 +"weight_decay": 0.0001, # 权重衰减 +"momentum": 0.9, # 动量 +"is_dynamic_loss_scale": 0, # 动态损失放大 +"loss_scale": 1024, # 损失放大 +"label_smooth": 1, # 标签平滑 +"label_smooth_factor": 0.1, # 标签平滑因子 +"ckpt_interval": 2000, # 检查点间隔 +"ckpt_path": 'outputs/', # 检查点保存位置 +"is_save_on_master": 1, +"rank": 0, # 分布式本地进程序号 +"group_size": 1 # 分布式进程总数 +``` + +## 训练过程 + +### 用法 + +您可以通过python脚本开始训练: + +```shell +python train.py --data_dir ~/imagenet/train/ --platform Ascend --is_distributed 0 +``` + +或通过shell脚本开始训练: + +```shell +Ascend: + # 分布式训练示例(8卡) + sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH + # 单机训练 + sh run_standalone_train.sh DEVICE_ID DATA_PATH +GPU: + # 分布式训练示例(8卡) + sh run_distribute_train_for_gpu.sh DATA_PATH + # 单机训练 + sh run_standalone_train_for_gpu.sh DEVICE_ID DATA_PATH +``` + +### 样例 + +```shell +# Ascend分布式训练示例(8卡) +sh scripts/run_distribute_train.sh RANK_TABLE_FILE /dataset/train +# Ascend单机训练示例 +sh scripts/run_standalone_train.sh 0 /dataset/train + +# GPU分布式训练示例(8卡) +sh scripts/run_distribute_train_for_gpu.sh /dataset/train +# GPU单机训练示例 +sh scripts/run_standalone_train_for_gpu.sh 0 /dataset/train +``` + +您可以在日志中找到检查点文件和结果。 + +## 评估过程 + +### 用法 + +您可以通过python脚本开始训练: + +```shell +python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained resnext.ckpt +``` + +或通过shell脚本开始训练: + +```shell +# 评估 +sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM +``` + +PLATFORM is Ascend or GPU, default is Ascend. + +#### 样例 + +```shell +# 检查点评估 +sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext50_100.ckpt Ascend +``` + +#### 结果 + +评估结果保存在脚本路径下。您可以在日志中找到类似以下的结果。 + +```log +acc=78.16%(TOP1) +acc=93.88%(TOP5) +``` + +## 模型导出 + +```shell +python export.py --device_target [PLATFORM] --ckpt_file [CKPT_PATH] --file_format [EXPORT_FORMAT] +``` + +`EXPORT_FORMAT` 可选 ["AIR", "ONNX", "MINDIR"]. + +# 模型描述 + +## 性能 + +### 训练性能 + +| 参数 | ResNeXt50 | | +| -------------------------- | ---------------------------------------------------------- | ------------------------- | +| 资源 | Ascend 910;CPU:2.60GHz,192核;内存:755GB | NV SMX2 V100-32G | +| 上传日期 | 2020-6-30 | 2020-7-23 | +| MindSpore版本 | 0.5.0 | 0.6.0 | +| 数据集 | ImageNet | ImageNet | +| 训练参数 | src/config.py | src/config.py | +| 优化器 | Momentum | Momentum | +| 损失函数 | Softmax交叉熵 | Softmax交叉熵 | +| 损失 | 1.76592 | 1.8965 | +| 准确率 | 78%(TOP1) | 77.8%(TOP1) | +| 总时长 | 7.8小时 (8卡) | 21.5小时 (8卡) | +| 调优检查点 | 192 M(.ckpt文件) | 192 M(.ckpt文件) | + +#### 推理性能 + +| 参数 | | | | +| -------------------------- | ----------------------------- | ------------------------- | -------------------- | +| 资源 | Ascend 910 | NV SMX2 V100-32G | Ascend 310 | +| 上传日期 | 2020-6-30 | 2020-7-23 | 2020-7-23 | +| MindSpore版本 | 0.5.0 | 0.6.0 | 0.6.0 | +| 数据集 | ImageNet, 1.2万 | ImageNet, 1.2万 | ImageNet, 1.2万 | +| batch_size | 1 | 1 | 1 | +| 输出 | 概率 | 概率 | 概率 | +| 准确率 | acc=78.16%(TOP1) | acc=78.05%(TOP1) | | + +# 随机情况说明 + +dataset.py中设置了“create_dataset”函数内的种子,同时还使用了train.py中的随机种子。 + +# ModelZoo主页 + +请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。 diff --git a/model_zoo/official/cv/resnext50/src/config.py b/model_zoo/official/cv/resnext50/src/config.py index 5c35fc5781..21a28f6ad5 100644 --- a/model_zoo/official/cv/resnext50/src/config.py +++ b/model_zoo/official/cv/resnext50/src/config.py @@ -41,7 +41,5 @@ config = ed({ "is_save_on_master": 1, "rank": 0, - "group_size": 1, - "export_format": "MINDIR", - "export_file": "resnext50" + "group_size": 1 }) diff --git a/model_zoo/official/cv/ssd/src/dataset.py b/model_zoo/official/cv/ssd/src/dataset.py index 73c4cbbb3f..f70024e5f1 100644 --- a/model_zoo/official/cv/ssd/src/dataset.py +++ b/model_zoo/official/cv/ssd/src/dataset.py @@ -35,13 +35,12 @@ def _rand(a=0., b=1.): return np.random.rand() * (b - a) + a -def get_imageId_from_fileName(filename): - """Get imageID from fileName""" - try: - filename = os.path.splitext(filename)[0] +def get_imageId_from_fileName(filename, id_iter): + """Get imageID from fileName if fileName is int, else return id_iter.""" + filename = os.path.splitext(filename)[0] + if filename.isdigit(): return int(filename) - except: - raise NotImplementedError('Filename %s is supposed to be an integer.' % (filename)) + return id_iter def random_sample_crop(image, boxes): @@ -185,6 +184,7 @@ def create_voc_label(is_training): image_files_dict = {} image_anno_dict = {} images = [] + id_iter = 0 for anno_file in os.listdir(anno_dir): print(anno_file) if not anno_file.endswith('xml'): @@ -192,7 +192,8 @@ def create_voc_label(is_training): tree = et.parse(os.path.join(anno_dir, anno_file)) root_node = tree.getroot() file_name = root_node.find('filename').text - img_id = get_imageId_from_fileName(file_name) + img_id = get_imageId_from_fileName(file_name, id_iter) + id_iter += 1 image_path = os.path.join(image_dir, file_name) print(image_path) if not os.path.isfile(image_path): diff --git a/model_zoo/research/cv/cycle_gan/src/dataset/datasets.py b/model_zoo/research/cv/cycle_gan/src/dataset/datasets.py index d12b5621a3..a0dfe5064c 100644 --- a/model_zoo/research/cv/cycle_gan/src/dataset/datasets.py +++ b/model_zoo/research/cv/cycle_gan/src/dataset/datasets.py @@ -18,6 +18,7 @@ import random import numpy as np from PIL import Image +random.seed(1) IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.tif', '.tiff'] def is_image_file(filename): diff --git a/model_zoo/research/cv/cycle_gan/src/utils/cityscapes_utils.py b/model_zoo/research/cv/cycle_gan/src/utils/cityscapes_utils.py index 7954ce05ed..ab75377908 100644 --- a/model_zoo/research/cv/cycle_gan/src/utils/cityscapes_utils.py +++ b/model_zoo/research/cv/cycle_gan/src/utils/cityscapes_utils.py @@ -82,7 +82,7 @@ class CityScapes: 'bus', 'train', 'motorcycle', 'bicycle', 'unlabeled'] self.color_list = [] for name in self.classes: - self.color_list.append(label2color[name].color) + self.color_list.append(label2color[name]) self.class_num = len(self.classes) def get_id(self, img_path):