for comments

for debug

for DEBUG

for DEBUG

for DEBUG

for DEBUG

for well performance

for pylint

for te chip

for pylint

for pylint nth
pull/1481/head
z00478463 6 years ago
parent bdb00c5624
commit 3f38b1a00e

@ -23,7 +23,7 @@ config = ed({
"loss_scale": 128,
"momentum": 0.9,
"weight_decay": 5e-4,
"epoch_size": 50,
"epoch_size": 45,
"buffer_size": 1000,
"image_height": 224,
"image_width": 224,

@ -0,0 +1,60 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
eval.
"""
import os
import argparse
from dataset_imagenet import create_dataset
from config import config
from mindspore import context
from mindspore.model_zoo.resnet import resnet50
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from crossentropy import CrossEntropy
parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
parser.add_argument('--device_num', type=int, default=1, help='Device num.')
parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
args_opt = parser.parse_args()
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
context.set_context(device_id=device_id)
if __name__ == '__main__':
net = resnet50(class_num=config.class_num)
if not config.label_smooth:
config.label_smooth_factor = 0.0
loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
if args_opt.do_eval:
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
step_size = dataset.get_dataset_size()
if args_opt.checkpoint_path:
param_dict = load_checkpoint(args_opt.checkpoint_path)
load_param_into_net(net, param_dict)
net.set_train(False)
model = Model(net, loss_fn=loss, metrics={'acc'})
res = model.eval(dataset)
print("result:", res, "ckpt=", args_opt.checkpoint_path)

@ -21,6 +21,7 @@ from mindspore.common.tensor import Tensor
from mindspore.nn.optim.optimizer import Optimizer
from mindspore.ops import functional as F, composite as C, operations as P
from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
from model.grad_reducer_thor import DistributedGradReducerThor
momentum_opt = C.MultitypeFuncGraph("momentum_opt")

@ -0,0 +1,64 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [ $# != 2 ]
then
echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
if [ ! -d $PATH1 ]
then
echo "error: DATASET_PATH=$1 is not a directory"
exit 1
fi
if [ ! -f $PATH2 ]
then
echo "error: CHECKPOINT_PATH=$2 is not a file"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_SIZE=$DEVICE_NUM
export RANK_ID=0
if [ -d "infer" ];
then
rm -rf ./infer
fi
mkdir ./infer
cp *.py ./infer
cp *.sh ./infer
cd ./infer || exit
env > env.log
echo "start infering for device $DEVICE_ID"
python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
cd ..

@ -109,7 +109,7 @@ if __name__ == '__main__':
step_size = dataset.get_dataset_size()
loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
lr = Tensor(get_model_lr(0, 0.05, 6, 70, 5004))
lr = Tensor(get_model_lr(0, 0.045, 6, 70, 5004))
opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
filter(lambda x: 'matrix_G' in x.name, net.get_parameters()),

@ -486,41 +486,41 @@ def cus_cube_matmul_cast(tik_instance, input_x1, trans_a, input_x2, trans_b,
input_x2_cast_ub[count * repeate_times_max * vectorfp32_size],
input_x2_ub[count * repeate_times_max * vectorfp32_size], repeate_num,
1, 1, 4, 8)
input_x2_L1 = tik_instance.Tensor("float16", [no_tile, ko_tile_inner, c0, c0],
name="input_x2_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x2_L1, input_x2_cast_ub, 0, 1,
no_tile * ko_tile_inner * c0 * c0 * fp16_size // blocksize, 0, 0)
# input_x1 -> input_x1_L1
input_x1_L1 = tik_instance.Tensor(input_x1.dtype, [ko_tile_inner, mo_tile, c0, c0],
name="input_x1_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x1_L1,
input_x1[k_idx,
core_m * mo_tile, 0, 0],
0, ko_tile_inner, mo_tile * c0 * c0 * fp16_size // blocksize,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize, 0)
# input_x2_L1 -> input_x2_L0B
input_x2_L0B = tik_instance.Tensor("float16", [ko_tile_inner, no_tile, c0, c0],
name="input_x2_L0B", scope=tik.scope_cb)
with tik_instance.for_range(0, ko_tile_inner) as cc2:
tik_instance.load2dv1(input_x2_L0B[cc2, 0, 0, 0], input_x2_L1[0, cc2, 0, 0], 0, no_tile,
ko_tile_inner,
0, True)
# input_x1_L1 -> input_x1_L0A
input_x1_L0A = tik_instance.Tensor(input_x1.dtype, [mo_tile, ko_tile_inner, c0, c0],
name="input_x1_L0A", scope=tik.scope_ca)
with tik_instance.for_range(0, mo_tile) as cc1:
tik_instance.load2dv1(input_x1_L0A[cc1, 0, 0, 0], input_x1_L1[0, cc1, 0, 0], 0, ko_tile_inner,
mo_tile, 0, False)
with tik_instance.if_scope(thread_idx_k == 0):
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 0)
with tik_instance.else_scope():
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 1)
res_ub = tik_instance.Tensor(input_x1.dtype, [no_tile, mo_tile, c0, c0],
name="resMatmul_ub", scope=tik.scope_ubuf)
tik_instance.data_move(res_ub, res_L0C, 0, 1, no_tile * mo_tile, 0, 0, 1)
tik_instance.data_move(res[(core_n * loop_n_num + cc_n) * no_tile, core_m * mo_tile, 0, 0],
res_ub, 0, no_tile,
mo_tile * c0 * c0 * fp16_size // blocksize, 0,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize)
input_x2_L1 = tik_instance.Tensor("float16", [no_tile, ko_tile_inner, c0, c0],
name="input_x2_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x2_L1, input_x2_cast_ub, 0, 1,
no_tile * ko_tile_inner * c0 * c0 * fp16_size // blocksize, 0, 0)
# input_x1 -> input_x1_L1
input_x1_L1 = tik_instance.Tensor(input_x1.dtype, [ko_tile_inner, mo_tile, c0, c0],
name="input_x1_L1", scope=tik.scope_cbuf)
tik_instance.data_move(input_x1_L1,
input_x1[k_idx,
core_m * mo_tile, 0, 0],
0, ko_tile_inner, mo_tile * c0 * c0 * fp16_size // blocksize,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize, 0)
# input_x2_L1 -> input_x2_L0B
input_x2_L0B = tik_instance.Tensor("float16", [ko_tile_inner, no_tile, c0, c0],
name="input_x2_L0B", scope=tik.scope_cb)
with tik_instance.for_range(0, ko_tile_inner) as cc2:
tik_instance.load2dv1(input_x2_L0B[cc2, 0, 0, 0], input_x2_L1[0, cc2, 0, 0], 0, no_tile,
ko_tile_inner,
0, True)
# input_x1_L1 -> input_x1_L0A
input_x1_L0A = tik_instance.Tensor(input_x1.dtype, [mo_tile, ko_tile_inner, c0, c0],
name="input_x1_L0A", scope=tik.scope_ca)
with tik_instance.for_range(0, mo_tile) as cc1:
tik_instance.load2dv1(input_x1_L0A[cc1, 0, 0, 0], input_x1_L1[0, cc1, 0, 0], 0, ko_tile_inner,
mo_tile, 0, False)
with tik_instance.if_scope(thread_idx_k == 0):
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 0)
with tik_instance.else_scope():
tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
ko_tile_inner * c0, no_tile * c0, 1)
res_ub = tik_instance.Tensor(input_x1.dtype, [no_tile, mo_tile, c0, c0],
name="resMatmul_ub", scope=tik.scope_ubuf)
tik_instance.data_move(res_ub, res_L0C, 0, 1, no_tile * mo_tile, 0, 0, 1)
tik_instance.data_move(res[(core_n * loop_n_num + cc_n) * no_tile, core_m * mo_tile, 0, 0],
res_ub, 0, no_tile,
mo_tile * c0 * c0 * fp16_size // blocksize, 0,
(mo - mo_tile) * c0 * c0 * fp16_size // blocksize)

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save