pull/6156/head
parent
2f14c40934
commit
d10be908b8
@ -0,0 +1,84 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
eval.
|
||||
"""
|
||||
import os
|
||||
import argparse
|
||||
from mindspore import context
|
||||
from mindspore import nn
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
from mindspore.common import dtype as mstype
|
||||
from src.dataset import create_dataset
|
||||
from src.config import config_ascend, config_gpu
|
||||
from src.ghostnet import ghostnet_1x, ghostnet_nose_1x
|
||||
from src.ghostnet600 import ghostnet_600m
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Image classification')
|
||||
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
|
||||
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
|
||||
parser.add_argument('--platform', type=str, default=None, help='run platform')
|
||||
parser.add_argument('--model', type=str, default=None, help='ghostnet')
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config_platform = None
|
||||
if args_opt.platform == "Ascend":
|
||||
config_platform = config_ascend
|
||||
device_id = int(os.getenv('DEVICE_ID'))
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
|
||||
device_id=device_id, save_graphs=False)
|
||||
elif args_opt.platform == "GPU":
|
||||
config_platform = config_gpu
|
||||
context.set_context(mode=context.GRAPH_MODE,
|
||||
device_target="GPU", save_graphs=False)
|
||||
else:
|
||||
raise ValueError("Unsupport platform.")
|
||||
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(
|
||||
is_grad=False, sparse=True, reduction='mean')
|
||||
|
||||
if args_opt.model == 'ghostnet':
|
||||
net = ghostnet_1x(num_classes=config_platform.num_classes)
|
||||
elif args_opt.model == 'ghostnet_nose':
|
||||
net = ghostnet_nose_1x(num_classes=config_platform.num_classes)
|
||||
elif args_opt.model == 'ghostnet-600':
|
||||
net = ghostnet_600m(num_classes=config_platform.num_classes)
|
||||
|
||||
if args_opt.platform == "Ascend":
|
||||
net.to_float(mstype.float16)
|
||||
for _, cell in net.cells_and_names():
|
||||
if isinstance(cell, nn.Dense):
|
||||
cell.to_float(mstype.float32)
|
||||
|
||||
dataset = create_dataset(dataset_path=args_opt.dataset_path,
|
||||
do_train=False,
|
||||
config=config_platform,
|
||||
platform=args_opt.platform,
|
||||
batch_size=config_platform.batch_size,
|
||||
model=args_opt.model)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
if args_opt.checkpoint_path:
|
||||
param_dict = load_checkpoint(args_opt.checkpoint_path)
|
||||
load_param_into_net(net, param_dict)
|
||||
net.set_train(False)
|
||||
|
||||
model = Model(net, loss_fn=loss, metrics={'acc'})
|
||||
res = model.eval(dataset)
|
||||
print("result:", res, "ckpt=", args_opt.checkpoint_path)
|
@ -0,0 +1,27 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""hub config."""
|
||||
from src.ghostnet import ghostnet_1x, ghostnet_nose_1x
|
||||
from src.ghostnet600 import ghostnet_600m
|
||||
|
||||
|
||||
def create_network(name, *args, **kwargs):
|
||||
if name == 'ghostnet':
|
||||
return ghostnet_1x(*args, **kwargs)
|
||||
if name == 'ghostnet_nose':
|
||||
return ghostnet_nose_1x(*args, **kwargs)
|
||||
if name == 'ghostnet-600':
|
||||
return ghostnet_600m(*args, **kwargs)
|
||||
raise NotImplementedError(f"{name} is not implemented in the repo")
|
@ -0,0 +1,54 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
network config setting, will be used in train.py and eval.py
|
||||
"""
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
config_ascend = ed({
|
||||
"num_classes": 37,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 256,
|
||||
"epoch_size": 200,
|
||||
"warmup_epochs": 4,
|
||||
"lr": 0.4,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 200,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
||||
|
||||
config_gpu = ed({
|
||||
"num_classes": 37,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 3,
|
||||
"epoch_size": 370,
|
||||
"warmup_epochs": 4,
|
||||
"lr": 0.4,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 500,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
@ -0,0 +1,110 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
create train or eval dataset.
|
||||
"""
|
||||
import os
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset.engine as de
|
||||
import mindspore.dataset.transforms.vision.c_transforms as C
|
||||
import mindspore.dataset.transforms.vision.py_transforms as P
|
||||
import mindspore.dataset.transforms.c_transforms as C2
|
||||
from mindspore.dataset.transforms.vision import Inter
|
||||
|
||||
|
||||
def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=100, model='ghsotnet'):
|
||||
"""
|
||||
create a train or eval dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
do_train(bool): whether dataset is used for train or eval.
|
||||
repeat_num(int): the repeat times of dataset. Default: 1
|
||||
batch_size(int): the batch size of dataset. Default: 32
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
if platform == "Ascend":
|
||||
rank_size = int(os.getenv("RANK_SIZE"))
|
||||
rank_id = int(os.getenv("RANK_ID"))
|
||||
if rank_size == 1:
|
||||
ds = de.MindDataset(
|
||||
dataset_path, num_parallel_workers=8, shuffle=True)
|
||||
else:
|
||||
ds = de.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
num_shards=rank_size, shard_id=rank_id)
|
||||
elif platform == "GPU":
|
||||
if do_train:
|
||||
from mindspore.communication.management import get_rank, get_group_size
|
||||
ds = de.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
num_shards=get_group_size(), shard_id=get_rank())
|
||||
else:
|
||||
ds = de.MindDataset(
|
||||
dataset_path, num_parallel_workers=8, shuffle=True)
|
||||
else:
|
||||
raise ValueError("Unsupport platform.")
|
||||
|
||||
resize_height = config.image_height
|
||||
buffer_size = 1000
|
||||
|
||||
# define map operations
|
||||
resize_crop_op = C.RandomCropDecodeResize(
|
||||
resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
|
||||
horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)
|
||||
|
||||
color_op = C.RandomColorAdjust(
|
||||
brightness=0.4, contrast=0.4, saturation=0.4)
|
||||
rescale_op = C.Rescale(1/255.0, 0)
|
||||
normalize_op = C.Normalize(
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
change_swap_op = C.HWC2CHW()
|
||||
|
||||
# define python operations
|
||||
decode_p = P.Decode()
|
||||
if model == 'ghostnet-600':
|
||||
s = 274
|
||||
c = 240
|
||||
else:
|
||||
s = 256
|
||||
c = 224
|
||||
resize_p = P.Resize(s, interpolation=Inter.BICUBIC)
|
||||
center_crop_p = P.CenterCrop(c)
|
||||
totensor = P.ToTensor()
|
||||
normalize_p = P.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
composeop = P.ComposeOp(
|
||||
[decode_p, resize_p, center_crop_p, totensor, normalize_p])
|
||||
if do_train:
|
||||
trans = [resize_crop_op, horizontal_flip_op, color_op,
|
||||
rescale_op, normalize_op, change_swap_op]
|
||||
else:
|
||||
trans = composeop()
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
ds = ds.map(input_columns="image", operations=trans,
|
||||
num_parallel_workers=8)
|
||||
ds = ds.map(input_columns="label_list",
|
||||
operations=type_cast_op, num_parallel_workers=8)
|
||||
|
||||
# apply shuffle operations
|
||||
ds = ds.shuffle(buffer_size=buffer_size)
|
||||
|
||||
# apply batch operations
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
ds = ds.repeat(repeat_num)
|
||||
|
||||
return ds
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,165 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""launch train script"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
import shutil
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""
|
||||
parse args .
|
||||
|
||||
Args:
|
||||
|
||||
Returns:
|
||||
args.
|
||||
|
||||
Examples:
|
||||
>>> parse_args()
|
||||
"""
|
||||
parser = ArgumentParser(description="mindspore distributed training launch "
|
||||
"helper utilty that will spawn up "
|
||||
"multiple distributed processes")
|
||||
parser.add_argument("--nproc_per_node", type=int, default=1,
|
||||
help="The number of processes to launch on each node, "
|
||||
"for D training, this is recommended to be set "
|
||||
"to the number of D in your system so that "
|
||||
"each process can be bound to a single D.")
|
||||
parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
|
||||
help="will use the visible devices sequentially")
|
||||
parser.add_argument("--server_id", type=str, default="",
|
||||
help="server ip")
|
||||
parser.add_argument("--training_script", type=str,
|
||||
help="The full path to the single D training "
|
||||
"program/script to be launched in parallel, "
|
||||
"followed by all the arguments for the "
|
||||
"training script")
|
||||
# rest from the training program
|
||||
args, unknown = parser.parse_known_args()
|
||||
args.training_script_args = unknown
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
print("start", __file__)
|
||||
args = parse_args()
|
||||
print(args)
|
||||
visible_devices = args.visible_devices.split(',')
|
||||
assert os.path.isfile(args.training_script)
|
||||
assert len(visible_devices) >= args.nproc_per_node
|
||||
print('visible_devices:{}'.format(visible_devices))
|
||||
if not args.server_id:
|
||||
print('pleaser input server ip!!!')
|
||||
exit(0)
|
||||
print('server_id:{}'.format(args.server_id))
|
||||
|
||||
# construct hccn_table
|
||||
hccn_configs = open('/etc/hccn.conf', 'r').readlines()
|
||||
device_ips = {}
|
||||
for hccn_item in hccn_configs:
|
||||
hccn_item = hccn_item.strip()
|
||||
if hccn_item.startswith('address_'):
|
||||
device_id, device_ip = hccn_item.split('=')
|
||||
device_id = device_id.split('_')[1]
|
||||
device_ips[device_id] = device_ip
|
||||
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
|
||||
hccn_table = {}
|
||||
hccn_table['board_id'] = '0x0000'
|
||||
hccn_table['chip_info'] = '910'
|
||||
hccn_table['deploy_mode'] = 'lab'
|
||||
hccn_table['group_count'] = '1'
|
||||
hccn_table['group_list'] = []
|
||||
instance_list = []
|
||||
usable_dev = ''
|
||||
for instance_id in range(args.nproc_per_node):
|
||||
instance = {}
|
||||
instance['devices'] = []
|
||||
device_id = visible_devices[instance_id]
|
||||
device_ip = device_ips[device_id]
|
||||
usable_dev += str(device_id)
|
||||
instance['devices'].append({
|
||||
'device_id': device_id,
|
||||
'device_ip': device_ip,
|
||||
})
|
||||
instance['rank_id'] = str(instance_id)
|
||||
instance['server_id'] = args.server_id
|
||||
instance_list.append(instance)
|
||||
hccn_table['group_list'].append({
|
||||
'device_num': str(args.nproc_per_node),
|
||||
'server_num': '1',
|
||||
'group_name': '',
|
||||
'instance_count': str(args.nproc_per_node),
|
||||
'instance_list': instance_list,
|
||||
})
|
||||
hccn_table['para_plane_nic_location'] = 'device'
|
||||
hccn_table['para_plane_nic_name'] = []
|
||||
for instance_id in range(args.nproc_per_node):
|
||||
eth_id = visible_devices[instance_id]
|
||||
hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
|
||||
hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
|
||||
hccn_table['status'] = 'completed'
|
||||
|
||||
# save hccn_table to file
|
||||
table_path = os.getcwd()
|
||||
if not os.path.exists(table_path):
|
||||
os.mkdir(table_path)
|
||||
table_fn = os.path.join(table_path,
|
||||
'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
|
||||
with open(table_fn, 'w') as table_fp:
|
||||
json.dump(hccn_table, table_fp, indent=4)
|
||||
sys.stdout.flush()
|
||||
|
||||
# spawn the processes
|
||||
processes = []
|
||||
cmds = []
|
||||
log_files = []
|
||||
env = os.environ.copy()
|
||||
env['RANK_SIZE'] = str(args.nproc_per_node)
|
||||
cur_path = os.getcwd()
|
||||
for rank_id in range(0, args.nproc_per_node):
|
||||
os.chdir(cur_path)
|
||||
device_id = visible_devices[rank_id]
|
||||
device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
|
||||
env['RANK_ID'] = str(rank_id)
|
||||
env['DEVICE_ID'] = str(device_id)
|
||||
if args.nproc_per_node > 1:
|
||||
env['RANK_TABLE_FILE'] = table_fn
|
||||
if os.path.exists(device_dir):
|
||||
shutil.rmtree(device_dir)
|
||||
os.mkdir(device_dir)
|
||||
os.chdir(device_dir)
|
||||
cmd = [sys.executable, '-u']
|
||||
cmd.append(args.training_script)
|
||||
cmd.extend(args.training_script_args)
|
||||
log_file = open(
|
||||
'{dir}/log{id}.log'.format(dir=device_dir, id=rank_id), 'w')
|
||||
process = subprocess.Popen(
|
||||
cmd, stdout=log_file, stderr=log_file, env=env)
|
||||
processes.append(process)
|
||||
cmds.append(cmd)
|
||||
log_files.append(log_file)
|
||||
for process, cmd, log_file in zip(processes, cmds, log_files):
|
||||
process.wait()
|
||||
if process.returncode != 0:
|
||||
raise subprocess.CalledProcessError(returncode=process, cmd=cmd)
|
||||
log_file.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,55 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
|
||||
"""
|
||||
generate learning rate array
|
||||
|
||||
Args:
|
||||
global_step(int): total steps of the training
|
||||
lr_init(float): init learning rate
|
||||
lr_end(float): end learning rate
|
||||
lr_max(float): max learning rate
|
||||
warmup_epochs(int): number of warmup epochs
|
||||
total_epochs(int): total epoch of training
|
||||
steps_per_epoch(int): steps of one epoch
|
||||
|
||||
Returns:
|
||||
np.array, learning rate array
|
||||
"""
|
||||
lr_each_step = []
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_end + \
|
||||
(lr_max - lr_end) * \
|
||||
(1. + math.cos(math.pi * (i - warmup_steps) /
|
||||
(total_steps - warmup_steps))) / 2.
|
||||
if lr < 0.0:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
|
||||
current_step = global_step
|
||||
lr_each_step = np.array(lr_each_step).astype(np.float32)
|
||||
learning_rate = lr_each_step[current_step:]
|
||||
|
||||
return learning_rate
|
@ -0,0 +1,77 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
eval.
|
||||
"""
|
||||
import os
|
||||
import argparse
|
||||
from mindspore import context
|
||||
from mindspore import nn
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
from mindspore.common import dtype as mstype
|
||||
from src.dataset import create_dataset
|
||||
from src.config import config_ascend, config_gpu
|
||||
from src.ghostnet import ghostnet_1x
|
||||
|
||||
parser = argparse.ArgumentParser(description='Image classification')
|
||||
parser.add_argument('--checkpoint_path', type=str,
|
||||
default=None, help='Checkpoint file path')
|
||||
parser.add_argument('--dataset_path', type=str,
|
||||
default=None, help='Dataset path')
|
||||
parser.add_argument('--platform', type=str, default=None, help='run platform')
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config_platform = None
|
||||
if args_opt.platform == "Ascend":
|
||||
config_platform = config_ascend
|
||||
device_id = int(os.getenv('DEVICE_ID'))
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
|
||||
device_id=device_id, save_graphs=False)
|
||||
elif args_opt.platform == "GPU":
|
||||
config_platform = config_gpu
|
||||
context.set_context(mode=context.GRAPH_MODE,
|
||||
device_target="GPU", save_graphs=False)
|
||||
else:
|
||||
raise ValueError("Unsupport platform.")
|
||||
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(
|
||||
is_grad=False, sparse=True, reduction='mean')
|
||||
|
||||
net = ghostnet_1x(num_classes=config_platform.num_classes)
|
||||
|
||||
if args_opt.platform == "Ascend":
|
||||
net.to_float(mstype.float16)
|
||||
for _, cell in net.cells_and_names():
|
||||
if isinstance(cell, nn.Dense):
|
||||
cell.to_float(mstype.float32)
|
||||
|
||||
dataset = create_dataset(dataset_path=args_opt.dataset_path,
|
||||
do_train=False,
|
||||
config=config_platform,
|
||||
platform=args_opt.platform,
|
||||
batch_size=config_platform.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
if args_opt.checkpoint_path:
|
||||
param_dict = load_checkpoint(args_opt.checkpoint_path)
|
||||
load_param_into_net(net, param_dict)
|
||||
net.set_train(False)
|
||||
|
||||
model = Model(net, loss_fn=loss, metrics={'acc'})
|
||||
res = model.eval(dataset)
|
||||
print("result:", res, "ckpt=", args_opt.checkpoint_path)
|
@ -0,0 +1,22 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""hub config."""
|
||||
from src.ghostnet import ghostnet_1x
|
||||
|
||||
|
||||
def create_network(name, *args, **kwargs):
|
||||
if name == 'ghostnet_int8':
|
||||
return ghostnet_1x(*args, **kwargs)
|
||||
raise NotImplementedError(f"{name} is not implemented in the repo")
|
@ -0,0 +1,54 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
network config setting, will be used in train.py and eval.py
|
||||
"""
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
config_ascend = ed({
|
||||
"num_classes": 37,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 256,
|
||||
"epoch_size": 200,
|
||||
"warmup_epochs": 4,
|
||||
"lr": 0.4,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 200,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
||||
|
||||
config_gpu = ed({
|
||||
"num_classes": 37,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 3,
|
||||
"epoch_size": 370,
|
||||
"warmup_epochs": 4,
|
||||
"lr": 0.4,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 500,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
@ -0,0 +1,110 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
create train or eval dataset.
|
||||
"""
|
||||
import os
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset.engine as de
|
||||
import mindspore.dataset.transforms.vision.c_transforms as C
|
||||
import mindspore.dataset.transforms.vision.py_transforms as P
|
||||
import mindspore.dataset.transforms.c_transforms as C2
|
||||
from mindspore.dataset.transforms.vision import Inter
|
||||
|
||||
|
||||
def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=100, model='ghsotnet'):
|
||||
"""
|
||||
create a train or eval dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
do_train(bool): whether dataset is used for train or eval.
|
||||
repeat_num(int): the repeat times of dataset. Default: 1
|
||||
batch_size(int): the batch size of dataset. Default: 32
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
if platform == "Ascend":
|
||||
rank_size = int(os.getenv("RANK_SIZE"))
|
||||
rank_id = int(os.getenv("RANK_ID"))
|
||||
if rank_size == 1:
|
||||
ds = de.MindDataset(
|
||||
dataset_path, num_parallel_workers=8, shuffle=True)
|
||||
else:
|
||||
ds = de.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
num_shards=rank_size, shard_id=rank_id)
|
||||
elif platform == "GPU":
|
||||
if do_train:
|
||||
from mindspore.communication.management import get_rank, get_group_size
|
||||
ds = de.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
num_shards=get_group_size(), shard_id=get_rank())
|
||||
else:
|
||||
ds = de.MindDataset(
|
||||
dataset_path, num_parallel_workers=8, shuffle=True)
|
||||
else:
|
||||
raise ValueError("Unsupport platform.")
|
||||
|
||||
resize_height = config.image_height
|
||||
buffer_size = 1000
|
||||
|
||||
# define map operations
|
||||
resize_crop_op = C.RandomCropDecodeResize(
|
||||
resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
|
||||
horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)
|
||||
|
||||
color_op = C.RandomColorAdjust(
|
||||
brightness=0.4, contrast=0.4, saturation=0.4)
|
||||
rescale_op = C.Rescale(1/255.0, 0)
|
||||
normalize_op = C.Normalize(
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
change_swap_op = C.HWC2CHW()
|
||||
|
||||
# define python operations
|
||||
decode_p = P.Decode()
|
||||
if model == 'ghostnet-600':
|
||||
s = 274
|
||||
c = 240
|
||||
else:
|
||||
s = 256
|
||||
c = 224
|
||||
resize_p = P.Resize(s, interpolation=Inter.BICUBIC)
|
||||
center_crop_p = P.CenterCrop(c)
|
||||
totensor = P.ToTensor()
|
||||
normalize_p = P.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
composeop = P.ComposeOp(
|
||||
[decode_p, resize_p, center_crop_p, totensor, normalize_p])
|
||||
if do_train:
|
||||
trans = [resize_crop_op, horizontal_flip_op, color_op,
|
||||
rescale_op, normalize_op, change_swap_op]
|
||||
else:
|
||||
trans = composeop()
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
ds = ds.map(input_columns="image", operations=trans,
|
||||
num_parallel_workers=8)
|
||||
ds = ds.map(input_columns="label_list",
|
||||
operations=type_cast_op, num_parallel_workers=8)
|
||||
|
||||
# apply shuffle operations
|
||||
ds = ds.shuffle(buffer_size=buffer_size)
|
||||
|
||||
# apply batch operations
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
ds = ds.repeat(repeat_num)
|
||||
|
||||
return ds
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,165 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""launch train script"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
import shutil
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""
|
||||
parse args .
|
||||
|
||||
Args:
|
||||
|
||||
Returns:
|
||||
args.
|
||||
|
||||
Examples:
|
||||
>>> parse_args()
|
||||
"""
|
||||
parser = ArgumentParser(description="mindspore distributed training launch "
|
||||
"helper utilty that will spawn up "
|
||||
"multiple distributed processes")
|
||||
parser.add_argument("--nproc_per_node", type=int, default=1,
|
||||
help="The number of processes to launch on each node, "
|
||||
"for D training, this is recommended to be set "
|
||||
"to the number of D in your system so that "
|
||||
"each process can be bound to a single D.")
|
||||
parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
|
||||
help="will use the visible devices sequentially")
|
||||
parser.add_argument("--server_id", type=str, default="",
|
||||
help="server ip")
|
||||
parser.add_argument("--training_script", type=str,
|
||||
help="The full path to the single D training "
|
||||
"program/script to be launched in parallel, "
|
||||
"followed by all the arguments for the "
|
||||
"training script")
|
||||
# rest from the training program
|
||||
args, unknown = parser.parse_known_args()
|
||||
args.training_script_args = unknown
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
print("start", __file__)
|
||||
args = parse_args()
|
||||
print(args)
|
||||
visible_devices = args.visible_devices.split(',')
|
||||
assert os.path.isfile(args.training_script)
|
||||
assert len(visible_devices) >= args.nproc_per_node
|
||||
print('visible_devices:{}'.format(visible_devices))
|
||||
if not args.server_id:
|
||||
print('pleaser input server ip!!!')
|
||||
exit(0)
|
||||
print('server_id:{}'.format(args.server_id))
|
||||
|
||||
# construct hccn_table
|
||||
hccn_configs = open('/etc/hccn.conf', 'r').readlines()
|
||||
device_ips = {}
|
||||
for hccn_item in hccn_configs:
|
||||
hccn_item = hccn_item.strip()
|
||||
if hccn_item.startswith('address_'):
|
||||
device_id, device_ip = hccn_item.split('=')
|
||||
device_id = device_id.split('_')[1]
|
||||
device_ips[device_id] = device_ip
|
||||
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
|
||||
hccn_table = {}
|
||||
hccn_table['board_id'] = '0x0000'
|
||||
hccn_table['chip_info'] = '910'
|
||||
hccn_table['deploy_mode'] = 'lab'
|
||||
hccn_table['group_count'] = '1'
|
||||
hccn_table['group_list'] = []
|
||||
instance_list = []
|
||||
usable_dev = ''
|
||||
for instance_id in range(args.nproc_per_node):
|
||||
instance = {}
|
||||
instance['devices'] = []
|
||||
device_id = visible_devices[instance_id]
|
||||
device_ip = device_ips[device_id]
|
||||
usable_dev += str(device_id)
|
||||
instance['devices'].append({
|
||||
'device_id': device_id,
|
||||
'device_ip': device_ip,
|
||||
})
|
||||
instance['rank_id'] = str(instance_id)
|
||||
instance['server_id'] = args.server_id
|
||||
instance_list.append(instance)
|
||||
hccn_table['group_list'].append({
|
||||
'device_num': str(args.nproc_per_node),
|
||||
'server_num': '1',
|
||||
'group_name': '',
|
||||
'instance_count': str(args.nproc_per_node),
|
||||
'instance_list': instance_list,
|
||||
})
|
||||
hccn_table['para_plane_nic_location'] = 'device'
|
||||
hccn_table['para_plane_nic_name'] = []
|
||||
for instance_id in range(args.nproc_per_node):
|
||||
eth_id = visible_devices[instance_id]
|
||||
hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
|
||||
hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
|
||||
hccn_table['status'] = 'completed'
|
||||
|
||||
# save hccn_table to file
|
||||
table_path = os.getcwd()
|
||||
if not os.path.exists(table_path):
|
||||
os.mkdir(table_path)
|
||||
table_fn = os.path.join(table_path,
|
||||
'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
|
||||
with open(table_fn, 'w') as table_fp:
|
||||
json.dump(hccn_table, table_fp, indent=4)
|
||||
sys.stdout.flush()
|
||||
|
||||
# spawn the processes
|
||||
processes = []
|
||||
cmds = []
|
||||
log_files = []
|
||||
env = os.environ.copy()
|
||||
env['RANK_SIZE'] = str(args.nproc_per_node)
|
||||
cur_path = os.getcwd()
|
||||
for rank_id in range(0, args.nproc_per_node):
|
||||
os.chdir(cur_path)
|
||||
device_id = visible_devices[rank_id]
|
||||
device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
|
||||
env['RANK_ID'] = str(rank_id)
|
||||
env['DEVICE_ID'] = str(device_id)
|
||||
if args.nproc_per_node > 1:
|
||||
env['RANK_TABLE_FILE'] = table_fn
|
||||
if os.path.exists(device_dir):
|
||||
shutil.rmtree(device_dir)
|
||||
os.mkdir(device_dir)
|
||||
os.chdir(device_dir)
|
||||
cmd = [sys.executable, '-u']
|
||||
cmd.append(args.training_script)
|
||||
cmd.extend(args.training_script_args)
|
||||
log_file = open(
|
||||
'{dir}/log{id}.log'.format(dir=device_dir, id=rank_id), 'w')
|
||||
process = subprocess.Popen(
|
||||
cmd, stdout=log_file, stderr=log_file, env=env)
|
||||
processes.append(process)
|
||||
cmds.append(cmd)
|
||||
log_files.append(log_file)
|
||||
for process, cmd, log_file in zip(processes, cmds, log_files):
|
||||
process.wait()
|
||||
if process.returncode != 0:
|
||||
raise subprocess.CalledProcessError(returncode=process, cmd=cmd)
|
||||
log_file.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,55 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""learning rate generator"""
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
|
||||
"""
|
||||
generate learning rate array
|
||||
|
||||
Args:
|
||||
global_step(int): total steps of the training
|
||||
lr_init(float): init learning rate
|
||||
lr_end(float): end learning rate
|
||||
lr_max(float): max learning rate
|
||||
warmup_epochs(int): number of warmup epochs
|
||||
total_epochs(int): total epoch of training
|
||||
steps_per_epoch(int): steps of one epoch
|
||||
|
||||
Returns:
|
||||
np.array, learning rate array
|
||||
"""
|
||||
lr_each_step = []
|
||||
total_steps = steps_per_epoch * total_epochs
|
||||
warmup_steps = steps_per_epoch * warmup_epochs
|
||||
for i in range(total_steps):
|
||||
if i < warmup_steps:
|
||||
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
|
||||
else:
|
||||
lr = lr_end + \
|
||||
(lr_max - lr_end) * \
|
||||
(1. + math.cos(math.pi * (i - warmup_steps) /
|
||||
(total_steps - warmup_steps))) / 2.
|
||||
if lr < 0.0:
|
||||
lr = 0.0
|
||||
lr_each_step.append(lr)
|
||||
|
||||
current_step = global_step
|
||||
lr_each_step = np.array(lr_each_step).astype(np.float32)
|
||||
learning_rate = lr_each_step[current_step:]
|
||||
|
||||
return learning_rate
|
@ -0,0 +1,61 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Quantization define"""
|
||||
import mindspore as ms
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Parameter, Tensor
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops import composite as C
|
||||
from mindspore.common.initializer import initializer
|
||||
|
||||
#------weight symmetric, activation asymmetric------#
|
||||
|
||||
|
||||
class QuanConv(nn.Conv2d):
|
||||
r"""Conv for quantization"""
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, pad_mode='same',
|
||||
padding=0, dilation=1, group=1, has_bias=True):
|
||||
super(QuanConv, self).__init__(in_channels, out_channels,
|
||||
kernel_size, stride, pad_mode, padding, dilation, group, has_bias)
|
||||
self.floor = P.Floor()
|
||||
self.expand_dims = P.ExpandDims()
|
||||
self.x_lower_bound = Tensor(0, ms.float32)
|
||||
self.x_upper_bound = Tensor(2 ** 8 - 1, ms.float32)
|
||||
self.w_lower_bound = Tensor(-2 ** 7 - 1, ms.float32)
|
||||
self.w_upper_bound = Tensor(2 ** 7, ms.float32)
|
||||
self.scale_a = Parameter(initializer('ones', [1]), name='scale_a')
|
||||
self.scale_w = Parameter(initializer(
|
||||
'ones', [out_channels]), name='scale_w')
|
||||
self.zp_a = Parameter(initializer('ones', [1]), name='zp_a')
|
||||
|
||||
def construct(self, in_data):
|
||||
r"""construct of QuantConv"""
|
||||
x = self.floor(in_data / self.scale_a - self.zp_a + 0.5)
|
||||
x = C.clip_by_value(x, self.x_lower_bound, self.x_upper_bound)
|
||||
x = (x + self.zp_a) * self.scale_a
|
||||
|
||||
exp_dim_scale_w = self.scale_w
|
||||
exp_dim_scale_w = self.expand_dims(exp_dim_scale_w, 1)
|
||||
exp_dim_scale_w = self.expand_dims(exp_dim_scale_w, 2)
|
||||
exp_dim_scale_w = self.expand_dims(exp_dim_scale_w, 3)
|
||||
w = self.floor(self.weight / exp_dim_scale_w + 0.5)
|
||||
w = C.clip_by_value(w, self.w_lower_bound, self.w_upper_bound)
|
||||
w = w * exp_dim_scale_w
|
||||
|
||||
# forward
|
||||
output = self.conv2d(x, w)
|
||||
if self.has_bias:
|
||||
output = self.bias_add(output, self.bias)
|
||||
return output
|
@ -0,0 +1,88 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
eval.
|
||||
"""
|
||||
import os
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
from mindspore import context, Tensor
|
||||
from mindspore import nn
|
||||
from mindspore.train.model import Model
|
||||
from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
from mindspore.common import dtype as mstype
|
||||
|
||||
from src.pet_dataset import create_dataset
|
||||
from src.config import config_ascend, config_gpu
|
||||
from src.resnet_imgnet import resnet50
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='Image classification')
|
||||
parser.add_argument('--checkpoint_path', type=str,
|
||||
default='resnet50-imgnet-0.65x-80.24.ckpt', help='Checkpoint file path')
|
||||
parser.add_argument('--dataset_path', type=str,
|
||||
default='/home/hankai/xiaoan/data/test.mindrecord', help='Dataset path')
|
||||
parser.add_argument('--platform', type=str, default='GPU', help='run platform')
|
||||
args_opt = parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
config_platform = None
|
||||
if args_opt.platform == "Ascend":
|
||||
config_platform = config_ascend
|
||||
device_id = int(os.getenv('DEVICE_ID'))
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
|
||||
device_id=device_id, save_graphs=False)
|
||||
elif args_opt.platform == "GPU":
|
||||
config_platform = config_gpu
|
||||
context.set_context(mode=context.GRAPH_MODE,
|
||||
device_target="GPU", save_graphs=False)
|
||||
else:
|
||||
raise ValueError("Unsupport platform.")
|
||||
|
||||
loss = nn.SoftmaxCrossEntropyWithLogits(
|
||||
is_grad=False, sparse=True, reduction='mean')
|
||||
|
||||
if args_opt.platform == "Ascend":
|
||||
net.to_float(mstype.float16)
|
||||
for _, cell in net.cells_and_names():
|
||||
if isinstance(cell, nn.Dense):
|
||||
cell.to_float(mstype.float32)
|
||||
|
||||
dataset = create_dataset(dataset_path=args_opt.dataset_path,
|
||||
do_train=False,
|
||||
config=config_platform,
|
||||
platform=args_opt.platform,
|
||||
batch_size=config_platform.batch_size)
|
||||
step_size = dataset.get_dataset_size()
|
||||
|
||||
index = []
|
||||
with open('index.txt', 'r') as f:
|
||||
for line in f:
|
||||
ind = Tensor((np.array(line.strip('\n').split(' ')[:-1])).astype(np.int32).reshape(-1, 1))
|
||||
index.append(ind)
|
||||
|
||||
net = resnet50(
|
||||
rate=0.65, class_num=config_platform.num_classes, index=index)
|
||||
if args_opt.checkpoint_path:
|
||||
param_dict = load_checkpoint(args_opt.checkpoint_path)
|
||||
load_param_into_net(net, param_dict)
|
||||
|
||||
net.set_train(False)
|
||||
|
||||
model = Model(net, loss_fn=loss, metrics={'acc'})
|
||||
res = model.eval(dataset)
|
||||
print("result:", res, "ckpt=", args_opt.checkpoint_path)
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,22 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""hub config."""
|
||||
from src.resnet_imgnet import resnet50
|
||||
|
||||
|
||||
def create_network(name, *args, **kwargs):
|
||||
if name == 'resnet-0.65x':
|
||||
return resnet50(*args, **kwargs)
|
||||
raise NotImplementedError(f"{name} is not implemented in the repo")
|
@ -0,0 +1,54 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
network config setting, will be used in train.py and eval.py
|
||||
"""
|
||||
from easydict import EasyDict as ed
|
||||
|
||||
config_ascend = ed({
|
||||
"num_classes": 438,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 256,
|
||||
"epoch_size": 200,
|
||||
"warmup_epochs": 1,
|
||||
"lr": 0.02,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
"label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 5,
|
||||
"keep_checkpoint_max": 200,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
||||
|
||||
config_gpu = ed({
|
||||
"num_classes": 37,
|
||||
"image_height": 224,
|
||||
"image_width": 224,
|
||||
"batch_size": 1,
|
||||
"epoch_size": 200,
|
||||
"warmup_epochs": 0,
|
||||
"lr": 0.8,
|
||||
"momentum": 0.9,
|
||||
"weight_decay": 4e-5,
|
||||
# "label_smooth": 0.1,
|
||||
"loss_scale": 1024,
|
||||
"save_checkpoint": True,
|
||||
"save_checkpoint_epochs": 1,
|
||||
"keep_checkpoint_max": 200,
|
||||
"save_checkpoint_path": "./checkpoint",
|
||||
})
|
@ -0,0 +1,106 @@
|
||||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""
|
||||
create train or eval dataset.
|
||||
"""
|
||||
import os
|
||||
import mindspore.common.dtype as mstype
|
||||
import mindspore.dataset.engine as de
|
||||
import mindspore.dataset.transforms.vision.c_transforms as C
|
||||
import mindspore.dataset.transforms.vision.py_transforms as P
|
||||
import mindspore.dataset.transforms.c_transforms as C2
|
||||
from mindspore.dataset.transforms.vision import Inter
|
||||
|
||||
|
||||
def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=100):
|
||||
"""
|
||||
create a train or eval dataset
|
||||
|
||||
Args:
|
||||
dataset_path(string): the path of dataset.
|
||||
do_train(bool): whether dataset is used for train or eval.
|
||||
repeat_num(int): the repeat times of dataset. Default: 1
|
||||
batch_size(int): the batch size of dataset. Default: 32
|
||||
|
||||
Returns:
|
||||
dataset
|
||||
"""
|
||||
if platform == "Ascend":
|
||||
rank_size = int(os.getenv("RANK_SIZE"))
|
||||
rank_id = int(os.getenv("RANK_ID"))
|
||||
if rank_size == 1:
|
||||
ds = de.MindDataset(
|
||||
dataset_path, num_parallel_workers=8, shuffle=True)
|
||||
else:
|
||||
ds = de.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
num_shards=rank_size, shard_id=rank_id)
|
||||
elif platform == "GPU":
|
||||
if do_train:
|
||||
from mindspore.communication.management import get_rank, get_group_size
|
||||
ds = de.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True,
|
||||
num_shards=get_group_size(), shard_id=get_rank())
|
||||
else:
|
||||
ds = de.MindDataset(
|
||||
dataset_path, num_parallel_workers=8, shuffle=False)
|
||||
else:
|
||||
raise ValueError("Unsupport platform.")
|
||||
|
||||
resize_height = config.image_height
|
||||
buffer_size = 1000
|
||||
|
||||
# define map operations
|
||||
resize_crop_op = C.RandomCropDecodeResize(
|
||||
resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
|
||||
horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)
|
||||
|
||||
color_op = C.RandomColorAdjust(
|
||||
brightness=0.4, contrast=0.4, saturation=0.4)
|
||||
rescale_op = C.Rescale(1/255.0, 0)
|
||||
normalize_op = C.Normalize(
|
||||
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
change_swap_op = C.HWC2CHW()
|
||||
|
||||
# define python operations
|
||||
decode_p = P.Decode()
|
||||
resize_p = P.Resize(256, interpolation=Inter.BILINEAR)
|
||||
center_crop_p = P.CenterCrop(224)
|
||||
totensor = P.ToTensor()
|
||||
normalize_p = P.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
||||
#change_op_p = P.HWC2CHW()
|
||||
composeop = P.ComposeOp(
|
||||
[decode_p, resize_p, center_crop_p, totensor, normalize_p])
|
||||
if do_train:
|
||||
trans = [resize_crop_op, horizontal_flip_op, color_op,
|
||||
rescale_op, normalize_op, change_swap_op]
|
||||
else:
|
||||
#trans = [decode_op, resize_op, center_crop, rescale_op, normalize_op, change_swap_op]
|
||||
trans = composeop()
|
||||
type_cast_op = C2.TypeCast(mstype.int32)
|
||||
|
||||
ds = ds.map(input_columns="image", operations=trans,
|
||||
num_parallel_workers=8)
|
||||
ds = ds.map(input_columns="label_list",
|
||||
operations=type_cast_op, num_parallel_workers=8)
|
||||
|
||||
# apply shuffle operations
|
||||
ds = ds.shuffle(buffer_size=buffer_size)
|
||||
|
||||
# apply batch operations
|
||||
ds = ds.batch(batch_size, drop_remainder=True)
|
||||
|
||||
# apply dataset repeat operation
|
||||
ds = ds.repeat(repeat_num)
|
||||
|
||||
return ds
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue