parent
2441059642
commit
b0deae74ee
@ -0,0 +1,71 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""test ShuffleNetV1"""
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
from mindspore import context, nn
|
||||||
|
from mindspore.train.model import Model
|
||||||
|
from mindspore.common import set_seed
|
||||||
|
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||||
|
from src.shufflenetv1 import ShuffleNetV1 as shufflenetv1
|
||||||
|
from src.config import config
|
||||||
|
from src.dataset import create_dataset
|
||||||
|
from src.crossentropysmooth import CrossEntropySmooth
|
||||||
|
|
||||||
|
set_seed(1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='Image classification')
|
||||||
|
parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
|
||||||
|
parser.add_argument('--device_id', type=int, default=0, help='Device id')
|
||||||
|
parser.add_argument('--checkpoint_path', type=str, default='', help='Checkpoint file path')
|
||||||
|
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
|
||||||
|
parser.add_argument('--model_size', type=str, default='2.0x', help='ShuffleNetV1 model size',
|
||||||
|
choices=['2.0x', '1.5x', '1.0x', '0.5x'])
|
||||||
|
args_opt = parser.parse_args()
|
||||||
|
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False,
|
||||||
|
device_id=args_opt.device_id)
|
||||||
|
|
||||||
|
# create dataset
|
||||||
|
dataset = create_dataset(args_opt.dataset_path, do_train=False, device_num=1, rank=0)
|
||||||
|
step_size = dataset.get_dataset_size()
|
||||||
|
|
||||||
|
# define net
|
||||||
|
net = shufflenetv1(model_size=args_opt.model_size)
|
||||||
|
|
||||||
|
# load checkpoint
|
||||||
|
param_dict = load_checkpoint(args_opt.checkpoint_path)
|
||||||
|
load_param_into_net(net, param_dict)
|
||||||
|
net.set_train(False)
|
||||||
|
|
||||||
|
# define loss
|
||||||
|
loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
|
||||||
|
num_classes=config.num_classes)
|
||||||
|
|
||||||
|
# define model
|
||||||
|
eval_metrics = {'Loss': nn.Loss(), 'Top_1_Acc': nn.Top1CategoricalAccuracy(),
|
||||||
|
'Top_5_Acc': nn.Top5CategoricalAccuracy()}
|
||||||
|
model = Model(net, loss_fn=loss, metrics=eval_metrics)
|
||||||
|
|
||||||
|
# eval model
|
||||||
|
start_time = time.time()
|
||||||
|
res = model.eval(dataset, dataset_sink_mode=True)
|
||||||
|
log = "result:" + str(res) + ", ckpt:'" + args_opt.checkpoint_path + "', time: " + str(
|
||||||
|
(time.time() - start_time) * 1000)
|
||||||
|
print(log)
|
||||||
|
filename = './eval_log.txt'
|
||||||
|
with open(filename, 'a') as file_object:
|
||||||
|
file_object.write(log + '\n')
|
@ -0,0 +1,50 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
DATA_DIR=$2
|
||||||
|
export RANK_TABLE_FILE=$1
|
||||||
|
export RANK_SIZE=8
|
||||||
|
|
||||||
|
|
||||||
|
cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
|
||||||
|
echo "the number of logical core" $cores
|
||||||
|
avg_core_per_rank=`expr $cores \/ $RANK_SIZE`
|
||||||
|
core_gap=`expr $avg_core_per_rank \- 1`
|
||||||
|
echo "avg_core_per_rank" $avg_core_per_rank
|
||||||
|
echo "core_gap" $core_gap
|
||||||
|
for((i=0;i<RANK_SIZE;i++))
|
||||||
|
do
|
||||||
|
start=`expr $i \* $avg_core_per_rank`
|
||||||
|
export DEVICE_ID=$i
|
||||||
|
export RANK_ID=$i
|
||||||
|
export DEPLOY_MODE=0
|
||||||
|
export GE_USE_STATIC_MEMORY=1
|
||||||
|
end=`expr $start \+ $core_gap`
|
||||||
|
cmdopt=$start"-"$end
|
||||||
|
|
||||||
|
rm -rf train_parallel$i
|
||||||
|
mkdir ./train_parallel$i
|
||||||
|
cp *.py ./train_parallel$i
|
||||||
|
cd ./train_parallel$i || exit
|
||||||
|
echo "start training for rank $i, device $DEVICE_ID"
|
||||||
|
|
||||||
|
env > env.log
|
||||||
|
taskset -c $cmdopt python ../train.py \
|
||||||
|
--is_distributed \
|
||||||
|
--device_target=Ascend \
|
||||||
|
--dataset_path=$DATA_DIR > log.txt 2>&1 &
|
||||||
|
cd ../
|
||||||
|
done
|
@ -0,0 +1,25 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
export DEVICE_ID=$1
|
||||||
|
DATA_DIR=$2
|
||||||
|
PATH_CHECKPOINT=$3
|
||||||
|
|
||||||
|
python ./eval.py \
|
||||||
|
--device_target=Ascend \
|
||||||
|
--device_id=$DEVICE_ID \
|
||||||
|
--checkpoint_path=$PATH_CHECKPOINT \
|
||||||
|
--dataset_path=$DATA_DIR > eval.log 2>&1 &
|
@ -0,0 +1,23 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
export DEVICE_ID=$1
|
||||||
|
DATA_DIR=$2
|
||||||
|
python ./train.py \
|
||||||
|
--device_target=Ascend \
|
||||||
|
--device_id=$DEVICE_ID \
|
||||||
|
--dataset_path=$DATA_DIR > log.txt 2>&1 &
|
||||||
|
|
@ -0,0 +1,45 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""network config setting"""
|
||||||
|
from easydict import EasyDict as edict
|
||||||
|
|
||||||
|
config = edict({
|
||||||
|
'epoch_size': 250,
|
||||||
|
'keep_checkpoint_max': 5,
|
||||||
|
'ckpt_path': './checkpoint/',
|
||||||
|
'save_checkpoint_epochs': 1,
|
||||||
|
'save_checkpoint': True,
|
||||||
|
'amp_level': 'O3',
|
||||||
|
|
||||||
|
# Dataset Config
|
||||||
|
'batch_size': 128,
|
||||||
|
'num_classes': 1000,
|
||||||
|
|
||||||
|
# Loss Config
|
||||||
|
'label_smooth_factor': 0.1,
|
||||||
|
|
||||||
|
# Learning Rate Config
|
||||||
|
'decay_method': 'cosine',
|
||||||
|
'lr_init': 0.00,
|
||||||
|
'lr_max': 0.50,
|
||||||
|
'lr_end': 0.00,
|
||||||
|
'warmup_epochs': 4,
|
||||||
|
"loss_scale": 1024,
|
||||||
|
|
||||||
|
# Optimization Config
|
||||||
|
'weight_decay': 0.00004,
|
||||||
|
'momentum': 0.9,
|
||||||
|
|
||||||
|
})
|
@ -0,0 +1,38 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""define loss function for network"""
|
||||||
|
import mindspore.nn as nn
|
||||||
|
from mindspore import Tensor
|
||||||
|
from mindspore.common import dtype as mstype
|
||||||
|
from mindspore.nn.loss.loss import _Loss
|
||||||
|
from mindspore.ops import functional as F
|
||||||
|
from mindspore.ops import operations as P
|
||||||
|
|
||||||
|
|
||||||
|
class CrossEntropySmooth(_Loss):
|
||||||
|
"""CrossEntropy"""
|
||||||
|
def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000):
|
||||||
|
super(CrossEntropySmooth, self).__init__()
|
||||||
|
self.onehot = P.OneHot()
|
||||||
|
self.sparse = sparse
|
||||||
|
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
|
||||||
|
self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32)
|
||||||
|
self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction)
|
||||||
|
|
||||||
|
def construct(self, logit, label):
|
||||||
|
if self.sparse:
|
||||||
|
label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value)
|
||||||
|
loss = self.ce(logit, label)
|
||||||
|
return loss
|
@ -0,0 +1,66 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""Data operations, will be used in train.py and eval.py"""
|
||||||
|
from src.config import config
|
||||||
|
import mindspore.common.dtype as mstype
|
||||||
|
import mindspore.dataset.engine as de
|
||||||
|
import mindspore.dataset.transforms.c_transforms as C2
|
||||||
|
import mindspore.dataset.vision.c_transforms as C
|
||||||
|
|
||||||
|
|
||||||
|
def create_dataset(dataset_path, do_train, device_num=1, rank=0):
|
||||||
|
"""
|
||||||
|
create a train or eval dataset
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dataset_path(string): the path of dataset.
|
||||||
|
do_train(bool): whether dataset is used for train or eval.
|
||||||
|
rank (int): The shard ID within num_shards (default=None).
|
||||||
|
group_size (int): Number of shards that the dataset should be divided into (default=None).
|
||||||
|
repeat_num(int): the repeat times of dataset. Default: 1.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dataset
|
||||||
|
"""
|
||||||
|
|
||||||
|
if device_num == 1:
|
||||||
|
ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
|
||||||
|
else:
|
||||||
|
ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||||
|
num_shards=device_num, shard_id=rank)
|
||||||
|
# define map operations
|
||||||
|
if do_train:
|
||||||
|
trans = [
|
||||||
|
C.RandomCropDecodeResize(224),
|
||||||
|
C.RandomHorizontalFlip(prob=0.5),
|
||||||
|
C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
trans = [
|
||||||
|
C.Decode(),
|
||||||
|
C.Resize(239),
|
||||||
|
C.CenterCrop(224)
|
||||||
|
]
|
||||||
|
trans += [
|
||||||
|
C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], std=[0.229 * 255, 0.224 * 255, 0.225 * 255]),
|
||||||
|
C.HWC2CHW(),
|
||||||
|
]
|
||||||
|
|
||||||
|
type_cast_op = C2.TypeCast(mstype.int32)
|
||||||
|
ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=8)
|
||||||
|
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)
|
||||||
|
# apply batch operations
|
||||||
|
ds = ds.batch(config.batch_size, drop_remainder=True)
|
||||||
|
return ds
|
@ -0,0 +1,161 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""learning rate generator"""
|
||||||
|
import math
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_steps_lr(lr_init, lr_max, total_steps, warmup_steps):
|
||||||
|
"""
|
||||||
|
Applies three steps decay to generate learning rate array.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr_init(float): init learning rate.
|
||||||
|
lr_max(float): max learning rate.
|
||||||
|
total_steps(int): all steps in training.
|
||||||
|
warmup_steps(int): all steps in warmup epochs.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array, learning rate array.
|
||||||
|
"""
|
||||||
|
decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps]
|
||||||
|
lr_each_step = []
|
||||||
|
for i in range(total_steps):
|
||||||
|
if i < warmup_steps:
|
||||||
|
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||||
|
else:
|
||||||
|
if i < decay_epoch_index[0]:
|
||||||
|
lr = lr_max
|
||||||
|
elif i < decay_epoch_index[1]:
|
||||||
|
lr = lr_max * 0.1
|
||||||
|
elif i < decay_epoch_index[2]:
|
||||||
|
lr = lr_max * 0.01
|
||||||
|
else:
|
||||||
|
lr = lr_max * 0.001
|
||||||
|
lr_each_step.append(lr)
|
||||||
|
return lr_each_step
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_exponential_lr(lr_init, lr_max, total_steps, warmup_steps, steps_per_epoch):
|
||||||
|
"""
|
||||||
|
Applies exponential decay to generate learning rate array.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr_init(float): init learning rate.
|
||||||
|
lr_max(float): max learning rate.
|
||||||
|
total_steps(int): all steps in training.
|
||||||
|
warmup_steps(int): all steps in warmup epochs.
|
||||||
|
steps_per_epoch(int): steps of one epoch
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array, learning rate array.
|
||||||
|
"""
|
||||||
|
lr_each_step = []
|
||||||
|
if warmup_steps != 0:
|
||||||
|
inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps)
|
||||||
|
else:
|
||||||
|
inc_each_step = 0
|
||||||
|
for i in range(total_steps):
|
||||||
|
if i < warmup_steps:
|
||||||
|
lr = float(lr_init) + inc_each_step * float(i)
|
||||||
|
else:
|
||||||
|
decay_nums = math.floor((float(i - warmup_steps) / steps_per_epoch) / 2)
|
||||||
|
decay_rate = pow(0.94, decay_nums)
|
||||||
|
lr = float(lr_max) * decay_rate
|
||||||
|
if lr < 0.0:
|
||||||
|
lr = 0.0
|
||||||
|
lr_each_step.append(lr)
|
||||||
|
return lr_each_step
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_cosine_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps):
|
||||||
|
"""
|
||||||
|
Applies cosine decay to generate learning rate array.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr_init(float): init learning rate.
|
||||||
|
lr_end(float): end learning rate
|
||||||
|
lr_max(float): max learning rate.
|
||||||
|
total_steps(int): all steps in training.
|
||||||
|
warmup_steps(int): all steps in warmup epochs.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array, learning rate array.
|
||||||
|
"""
|
||||||
|
decay_steps = total_steps - warmup_steps
|
||||||
|
lr_each_step = []
|
||||||
|
for i in range(total_steps):
|
||||||
|
if i < warmup_steps:
|
||||||
|
lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps)
|
||||||
|
lr = float(lr_init) + lr_inc * (i + 1)
|
||||||
|
else:
|
||||||
|
cosine_decay = 0.5 * (1 + math.cos(math.pi * (i-warmup_steps) / decay_steps))
|
||||||
|
lr = (lr_max-lr_end)*cosine_decay + lr_end
|
||||||
|
lr_each_step.append(lr)
|
||||||
|
return lr_each_step
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_liner_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps):
|
||||||
|
"""
|
||||||
|
Applies liner decay to generate learning rate array.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr_init(float): init learning rate.
|
||||||
|
lr_end(float): end learning rate
|
||||||
|
lr_max(float): max learning rate.
|
||||||
|
total_steps(int): all steps in training.
|
||||||
|
warmup_steps(int): all steps in warmup epochs.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array, learning rate array.
|
||||||
|
"""
|
||||||
|
lr_each_step = []
|
||||||
|
for i in range(total_steps):
|
||||||
|
if i < warmup_steps:
|
||||||
|
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||||
|
else:
|
||||||
|
lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps)
|
||||||
|
lr_each_step.append(lr)
|
||||||
|
return lr_each_step
|
||||||
|
|
||||||
|
|
||||||
|
def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode):
|
||||||
|
"""
|
||||||
|
generate learning rate array
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr_init(float): init learning rate
|
||||||
|
lr_end(float): end learning rate
|
||||||
|
lr_max(float): max learning rate
|
||||||
|
warmup_epochs(int): number of warmup epochs
|
||||||
|
total_epochs(int): total epoch of training
|
||||||
|
steps_per_epoch(int): steps of one epoch
|
||||||
|
lr_decay_mode(string): learning rate decay mode, including steps, steps_decay, cosine or liner(default)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
np.array, learning rate array
|
||||||
|
"""
|
||||||
|
total_steps = steps_per_epoch * total_epochs
|
||||||
|
warmup_steps = steps_per_epoch * warmup_epochs
|
||||||
|
if lr_decay_mode == 'steps':
|
||||||
|
lr_each_step = _generate_steps_lr(lr_init, lr_max, total_steps, warmup_steps)
|
||||||
|
elif lr_decay_mode == 'steps_decay':
|
||||||
|
lr_each_step = _generate_exponential_lr(lr_init, lr_max, total_steps, warmup_steps, steps_per_epoch)
|
||||||
|
elif lr_decay_mode == 'cosine':
|
||||||
|
lr_each_step = _generate_cosine_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps)
|
||||||
|
else:
|
||||||
|
lr_each_step = _generate_liner_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps)
|
||||||
|
learning_rate = np.array(lr_each_step).astype(np.float32)
|
||||||
|
return learning_rate
|
@ -0,0 +1,189 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""ShuffleNetV1"""
|
||||||
|
import mindspore.nn as nn
|
||||||
|
import mindspore.ops.operations as P
|
||||||
|
from mindspore import dtype as mstype
|
||||||
|
|
||||||
|
|
||||||
|
class GroupConv(nn.Cell):
|
||||||
|
"""
|
||||||
|
group convolution operation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
in_channels (int): Input channels of feature map.
|
||||||
|
out_channels (int): Output channels of feature map.
|
||||||
|
kernel_size (int): Size of convolution kernel.
|
||||||
|
stride (int): Stride size for the group convolution layer.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tensor, output tensor.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size, stride, pad_mode="pad", pad=0, groups=1, has_bias=False):
|
||||||
|
super(GroupConv, self).__init__()
|
||||||
|
assert in_channels % groups == 0 and out_channels % groups == 0
|
||||||
|
self.groups = groups
|
||||||
|
self.convs = nn.CellList()
|
||||||
|
self.op_split = P.Split(axis=1, output_num=self.groups)
|
||||||
|
self.op_concat = P.Concat(axis=1)
|
||||||
|
self.cast = P.Cast()
|
||||||
|
for _ in range(groups):
|
||||||
|
self.convs.append(nn.Conv2d(in_channels // groups, out_channels // groups,
|
||||||
|
kernel_size=kernel_size, stride=stride, has_bias=has_bias,
|
||||||
|
padding=pad, pad_mode=pad_mode, group=1, weight_init='xavier_uniform'))
|
||||||
|
|
||||||
|
def construct(self, x):
|
||||||
|
features = self.op_split(x)
|
||||||
|
outputs = ()
|
||||||
|
for i in range(self.groups):
|
||||||
|
outputs = outputs + (self.convs[i](self.cast(features[i], mstype.float32)),)
|
||||||
|
out = self.op_concat(outputs)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class ShuffleV1Block(nn.Cell):
|
||||||
|
def __init__(self, inp, oup, group, first_group, mid_channels, ksize, stride):
|
||||||
|
super(ShuffleV1Block, self).__init__()
|
||||||
|
self.stride = stride
|
||||||
|
|
||||||
|
pad = ksize // 2
|
||||||
|
self.group = group
|
||||||
|
|
||||||
|
if stride == 2:
|
||||||
|
outputs = oup - inp
|
||||||
|
else:
|
||||||
|
outputs = oup
|
||||||
|
|
||||||
|
self.relu = nn.ReLU()
|
||||||
|
self.add = P.TensorAdd()
|
||||||
|
self.concat = P.Concat(1)
|
||||||
|
self.shape = P.Shape()
|
||||||
|
self.transpose = P.Transpose()
|
||||||
|
self.reshape = P.Reshape()
|
||||||
|
|
||||||
|
branch_main_1 = [
|
||||||
|
# pw
|
||||||
|
GroupConv(in_channels=inp, out_channels=mid_channels, kernel_size=1, stride=1, pad_mode="pad", pad=0,
|
||||||
|
groups=1 if first_group else group),
|
||||||
|
nn.BatchNorm2d(mid_channels),
|
||||||
|
nn.ReLU(),
|
||||||
|
]
|
||||||
|
|
||||||
|
branch_main_2 = [
|
||||||
|
# dw
|
||||||
|
nn.Conv2d(mid_channels, mid_channels, kernel_size=ksize, stride=stride, pad_mode='pad', padding=pad,
|
||||||
|
group=mid_channels, weight_init='xavier_uniform', has_bias=False),
|
||||||
|
nn.BatchNorm2d(mid_channels),
|
||||||
|
# pw
|
||||||
|
GroupConv(in_channels=mid_channels, out_channels=outputs, kernel_size=1, stride=1, pad_mode="pad", pad=0,
|
||||||
|
groups=group),
|
||||||
|
nn.BatchNorm2d(outputs),
|
||||||
|
]
|
||||||
|
self.branch_main_1 = nn.SequentialCell(branch_main_1)
|
||||||
|
self.branch_main_2 = nn.SequentialCell(branch_main_2)
|
||||||
|
if stride == 2:
|
||||||
|
self.branch_proj = nn.AvgPool2d(kernel_size=3, stride=2, pad_mode='same')
|
||||||
|
|
||||||
|
def construct(self, old_x):
|
||||||
|
left = old_x
|
||||||
|
right = old_x
|
||||||
|
out = old_x
|
||||||
|
right = self.branch_main_1(right)
|
||||||
|
if self.group > 1:
|
||||||
|
right = self.channel_shuffle(right)
|
||||||
|
right = self.branch_main_2(right)
|
||||||
|
if self.stride == 1:
|
||||||
|
out = self.relu(self.add(left, right))
|
||||||
|
elif self.stride == 2:
|
||||||
|
left = self.branch_proj(left)
|
||||||
|
out = self.concat((left, right))
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
def channel_shuffle(self, x):
|
||||||
|
batchsize, num_channels, height, width = self.shape(x)
|
||||||
|
group_channels = num_channels // self.group
|
||||||
|
x = self.reshape(x, (batchsize, group_channels, self.group, height, width))
|
||||||
|
x = self.transpose(x, (0, 2, 1, 3, 4))
|
||||||
|
x = self.reshape(x, (batchsize, num_channels, height, width))
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class ShuffleNetV1(nn.Cell):
|
||||||
|
def __init__(self, n_class=1000, model_size='2.0x', group=3):
|
||||||
|
super(ShuffleNetV1, self).__init__()
|
||||||
|
print('model size is ', model_size)
|
||||||
|
|
||||||
|
self.stage_repeats = [4, 8, 4]
|
||||||
|
self.model_size = model_size
|
||||||
|
if group == 3:
|
||||||
|
if model_size == '0.5x':
|
||||||
|
self.stage_out_channels = [-1, 12, 120, 240, 480]
|
||||||
|
elif model_size == '1.0x':
|
||||||
|
self.stage_out_channels = [-1, 24, 240, 480, 960]
|
||||||
|
elif model_size == '1.5x':
|
||||||
|
self.stage_out_channels = [-1, 24, 360, 720, 1440]
|
||||||
|
elif model_size == '2.0x':
|
||||||
|
self.stage_out_channels = [-1, 48, 480, 960, 1920]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
elif group == 8:
|
||||||
|
if model_size == '0.5x':
|
||||||
|
self.stage_out_channels = [-1, 16, 192, 384, 768]
|
||||||
|
elif model_size == '1.0x':
|
||||||
|
self.stage_out_channels = [-1, 24, 384, 768, 1536]
|
||||||
|
elif model_size == '1.5x':
|
||||||
|
self.stage_out_channels = [-1, 24, 576, 1152, 2304]
|
||||||
|
elif model_size == '2.0x':
|
||||||
|
self.stage_out_channels = [-1, 48, 768, 1536, 3072]
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
# building first layer
|
||||||
|
input_channel = self.stage_out_channels[1]
|
||||||
|
self.first_conv = nn.SequentialCell(
|
||||||
|
nn.Conv2d(3, input_channel, 3, 2, 'pad', 1, weight_init='xavier_uniform', has_bias=False),
|
||||||
|
nn.BatchNorm2d(input_channel),
|
||||||
|
nn.ReLU(),
|
||||||
|
)
|
||||||
|
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same')
|
||||||
|
|
||||||
|
features = []
|
||||||
|
for idxstage in range(len(self.stage_repeats)):
|
||||||
|
numrepeat = self.stage_repeats[idxstage]
|
||||||
|
output_channel = self.stage_out_channels[idxstage + 2]
|
||||||
|
|
||||||
|
for i in range(numrepeat):
|
||||||
|
stride = 2 if i == 0 else 1
|
||||||
|
first_group = idxstage == 0 and i == 0
|
||||||
|
features.append(ShuffleV1Block(input_channel, output_channel,
|
||||||
|
group=group, first_group=first_group,
|
||||||
|
mid_channels=output_channel // 4, ksize=3, stride=stride))
|
||||||
|
input_channel = output_channel
|
||||||
|
|
||||||
|
self.features = nn.SequentialCell(features)
|
||||||
|
self.globalpool = nn.AvgPool2d(7)
|
||||||
|
self.classifier = nn.Dense(self.stage_out_channels[-1], n_class)
|
||||||
|
self.reshape = P.Reshape()
|
||||||
|
|
||||||
|
def construct(self, x):
|
||||||
|
x = self.first_conv(x)
|
||||||
|
x = self.maxpool(x)
|
||||||
|
x = self.features(x)
|
||||||
|
x = self.globalpool(x)
|
||||||
|
x = self.reshape(x, (-1, self.stage_out_channels[-1]))
|
||||||
|
x = self.classifier(x)
|
||||||
|
return x
|
@ -0,0 +1,158 @@
|
|||||||
|
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
"""train ShuffleNetV1"""
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
import numpy as np
|
||||||
|
from mindspore import context
|
||||||
|
from mindspore import Tensor
|
||||||
|
from mindspore.common import set_seed
|
||||||
|
from mindspore.nn.optim.momentum import Momentum
|
||||||
|
from mindspore.train.model import Model, ParallelMode
|
||||||
|
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
|
||||||
|
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||||
|
from mindspore.communication.management import init, get_rank, get_group_size
|
||||||
|
from mindspore.train.loss_scale_manager import FixedLossScaleManager
|
||||||
|
from src.lr_generator import get_lr
|
||||||
|
from src.shufflenetv1 import ShuffleNetV1
|
||||||
|
from src.config import config
|
||||||
|
from src.dataset import create_dataset
|
||||||
|
from src.crossentropysmooth import CrossEntropySmooth
|
||||||
|
|
||||||
|
set_seed(1)
|
||||||
|
|
||||||
|
|
||||||
|
class Monitor(Callback):
|
||||||
|
"""
|
||||||
|
Monitor loss and time.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
lr_init (numpy array): train lr
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> Monitor(lr_init=Tensor([0.05]*100).asnumpy())
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, lr_init=None):
|
||||||
|
super(Monitor, self).__init__()
|
||||||
|
self.lr_init = lr_init
|
||||||
|
self.lr_init_len = len(lr_init)
|
||||||
|
|
||||||
|
def epoch_begin(self, run_context):
|
||||||
|
self.losses = []
|
||||||
|
self.epoch_time = time.time()
|
||||||
|
|
||||||
|
def epoch_end(self, run_context):
|
||||||
|
cb_params = run_context.original_args()
|
||||||
|
|
||||||
|
epoch_mseconds = (time.time() - self.epoch_time) * 1000
|
||||||
|
per_step_mseconds = epoch_mseconds / cb_params.batch_num
|
||||||
|
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, per_step_mseconds,
|
||||||
|
np.mean(self.losses)))
|
||||||
|
|
||||||
|
def step_begin(self, run_context):
|
||||||
|
self.step_time = time.time()
|
||||||
|
|
||||||
|
def step_end(self, run_context):
|
||||||
|
cb_params = run_context.original_args()
|
||||||
|
step_loss = cb_params.net_outputs
|
||||||
|
|
||||||
|
if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
|
||||||
|
step_loss = step_loss[0]
|
||||||
|
if isinstance(step_loss, Tensor):
|
||||||
|
step_loss = np.mean(step_loss.asnumpy())
|
||||||
|
|
||||||
|
self.losses.append(step_loss)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='image classification training')
|
||||||
|
parser.add_argument('--is_distributed', action='store_true', default=False, help='distributed training')
|
||||||
|
parser.add_argument('--device_target', type=str, default='Ascend', choices=('Ascend', 'GPU'), help='run platform')
|
||||||
|
parser.add_argument('--dataset_path', type=str, default='', help='dataset path')
|
||||||
|
parser.add_argument('--device_id', type=int, default=0, help='device id')
|
||||||
|
parser.add_argument('--resume', type=str, default='', help='resume training with existed checkpoint')
|
||||||
|
parser.add_argument('--model_size', type=str, default='2.0x', help='ShuffleNetV1 model size',
|
||||||
|
choices=['2.0x', '1.5x', '1.0x', '0.5x'])
|
||||||
|
args_opt = parser.parse_args()
|
||||||
|
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False)
|
||||||
|
|
||||||
|
# init distributed
|
||||||
|
if args_opt.is_distributed:
|
||||||
|
if os.getenv('DEVICE_ID', "not_set").isdigit():
|
||||||
|
context.set_context(device_id=int(os.getenv('DEVICE_ID')))
|
||||||
|
init()
|
||||||
|
rank = get_rank()
|
||||||
|
group_size = get_group_size()
|
||||||
|
parallel_mode = ParallelMode.DATA_PARALLEL
|
||||||
|
context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=group_size, gradients_mean=True)
|
||||||
|
else:
|
||||||
|
rank = 0
|
||||||
|
group_size = 1
|
||||||
|
context.set_context(device_id=args_opt.device_id)
|
||||||
|
|
||||||
|
# define network
|
||||||
|
net = ShuffleNetV1(model_size=args_opt.model_size)
|
||||||
|
|
||||||
|
# define loss
|
||||||
|
loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
|
||||||
|
num_classes=config.num_classes)
|
||||||
|
|
||||||
|
# define dataset
|
||||||
|
dataset = create_dataset(args_opt.dataset_path, do_train=True, device_num=group_size, rank=rank)
|
||||||
|
batches_per_epoch = dataset.get_dataset_size()
|
||||||
|
|
||||||
|
# resume
|
||||||
|
if args_opt.resume:
|
||||||
|
ckpt = load_checkpoint(args_opt.resume)
|
||||||
|
load_param_into_net(net, ckpt)
|
||||||
|
|
||||||
|
# get learning rate
|
||||||
|
lr = get_lr(lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs,
|
||||||
|
total_epochs=config.epoch_size, steps_per_epoch=batches_per_epoch, lr_decay_mode=config.decay_method)
|
||||||
|
lr = Tensor(lr)
|
||||||
|
# define optimization
|
||||||
|
optimizer = Momentum(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum,
|
||||||
|
weight_decay=config.weight_decay, loss_scale=config.loss_scale)
|
||||||
|
|
||||||
|
# model
|
||||||
|
loss_scale_manager = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
|
||||||
|
model = Model(net, loss_fn=loss, optimizer=optimizer, amp_level=config.amp_level,
|
||||||
|
loss_scale_manager=loss_scale_manager)
|
||||||
|
|
||||||
|
# define callbacks
|
||||||
|
cb = [Monitor(lr_init=lr.asnumpy())]
|
||||||
|
if config.save_checkpoint:
|
||||||
|
save_ckpt_path = config.ckpt_path
|
||||||
|
config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * batches_per_epoch,
|
||||||
|
keep_checkpoint_max=config.keep_checkpoint_max)
|
||||||
|
ckpt_cb = ModelCheckpoint("shufflenetv1", directory=save_ckpt_path, config=config_ck)
|
||||||
|
|
||||||
|
print("============== Starting Training ==============")
|
||||||
|
start_time = time.time()
|
||||||
|
# begin train
|
||||||
|
if args_opt.is_distributed:
|
||||||
|
if rank == 0:
|
||||||
|
cb += [ckpt_cb]
|
||||||
|
else:
|
||||||
|
cb += [ckpt_cb]
|
||||||
|
model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=True)
|
||||||
|
print("time: ", (time.time() - start_time) * 1000)
|
||||||
|
print("============== Train Success ==============")
|
Loading…
Reference in new issue