for comments

for debug for DEBUG for DEBUG for DEBUG for DEBUG for well performance for pylint for te chip for pylint for pylint nth
6 years ago · 3f38b1a00e
parent bdb00c5624
commit 3f38b1a00e
7 changed files with 378 additions and 52 deletions
--- a/example/resnet50_imagenet2012_THOR/config.py
+++ b/example/resnet50_imagenet2012_THOR/config.py
@ -23,7 +23,7 @@ config = ed({
    "loss_scale": 128,
    "momentum": 0.9,
    "weight_decay": 5e-4,
-    "epoch_size": 50,
+    "epoch_size": 45,
    "buffer_size": 1000,
    "image_height": 224,
    "image_width": 224,
--- a/example/resnet50_imagenet2012_THOR/eval.py
+++ b/example/resnet50_imagenet2012_THOR/eval.py
@ -0,0 +1,60 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+eval.
+"""
+import os
+import argparse
+from dataset_imagenet import create_dataset
+from config import config
+from mindspore import context
+from mindspore.model_zoo.resnet import resnet50
+from mindspore.train.model import Model
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from crossentropy import CrossEntropy
+
+parser = argparse.ArgumentParser(description='Image classification')
+parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
+parser.add_argument('--device_num', type=int, default=1, help='Device num.')
+parser.add_argument('--do_train', type=bool, default=False, help='Do train or not.')
+parser.add_argument('--do_eval', type=bool, default=True, help='Do eval or not.')
+parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
+parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
+args_opt = parser.parse_args()
+
+device_id = int(os.getenv('DEVICE_ID'))
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
+context.set_context(device_id=device_id)
+
+if __name__ == '__main__':
+
+    net = resnet50(class_num=config.class_num)
+    if not config.label_smooth:
+        config.label_smooth_factor = 0.0
+    loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num)
+
+    if args_opt.do_eval:
+        dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, batch_size=config.batch_size)
+        step_size = dataset.get_dataset_size()
+
+        if args_opt.checkpoint_path:
+            param_dict = load_checkpoint(args_opt.checkpoint_path)
+            load_param_into_net(net, param_dict)
+        net.set_train(False)
+
+        model = Model(net, loss_fn=loss, metrics={'acc'})
+        res = model.eval(dataset)
+        print("result:", res, "ckpt=", args_opt.checkpoint_path)
--- a/example/resnet50_imagenet2012_THOR/model/thor.py
+++ b/example/resnet50_imagenet2012_THOR/model/thor.py
@ -21,6 +21,7 @@ from mindspore.common.tensor import Tensor
 from mindspore.nn.optim.optimizer import Optimizer
 from mindspore.ops import functional as F, composite as C, operations as P
 from mindspore.parallel._utils import _get_device_num, _get_mirror_mean
+from model.grad_reducer_thor import DistributedGradReducerThor

 momentum_opt = C.MultitypeFuncGraph("momentum_opt")

--- a/example/resnet50_imagenet2012_THOR/run_infer.sh
+++ b/example/resnet50_imagenet2012_THOR/run_infer.sh
@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then 
+    echo "Usage: sh run_infer.sh [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+PATH1=$(get_real_path $1)
+PATH2=$(get_real_path $2)
+
+
+if [ ! -d $PATH1 ]
+then 
+    echo "error: DATASET_PATH=$1 is not a directory"
+exit 1
+fi 
+
+if [ ! -f $PATH2 ]
+then 
+    echo "error: CHECKPOINT_PATH=$2 is not a file"
+exit 1
+fi 
+
+ulimit -u unlimited
+export DEVICE_NUM=1
+export DEVICE_ID=0
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+
+if [ -d "infer" ];
+then
+    rm -rf ./infer
+fi
+mkdir ./infer
+cp *.py ./infer
+cp *.sh ./infer
+cd ./infer || exit
+env > env.log
+echo "start infering for device $DEVICE_ID"
+python eval.py --do_eval=True --dataset_path=$PATH1 --checkpoint_path=$PATH2 &> log &
+cd ..
--- a/example/resnet50_imagenet2012_THOR/train.py
+++ b/example/resnet50_imagenet2012_THOR/train.py
@ -109,7 +109,7 @@ if __name__ == '__main__':
        step_size = dataset.get_dataset_size()

        loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
-        lr = Tensor(get_model_lr(0, 0.05, 6, 70, 5004))
+        lr = Tensor(get_model_lr(0, 0.045, 6, 70, 5004))
        opt = THOR(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum,
                   filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
                   filter(lambda x: 'matrix_G' in x.name, net.get_parameters()),
--- a/mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py
+++ b/mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py
@ -486,41 +486,41 @@ def cus_cube_matmul_cast(tik_instance, input_x1, trans_a, input_x2, trans_b,
                                   input_x2_cast_ub[count * repeate_times_max * vectorfp32_size],
                                   input_x2_ub[count * repeate_times_max * vectorfp32_size], repeate_num,
                                   1, 1, 4, 8)
-            input_x2_L1 = tik_instance.Tensor("float16", [no_tile, ko_tile_inner, c0, c0],
-                                              name="input_x2_L1", scope=tik.scope_cbuf)
-            tik_instance.data_move(input_x2_L1, input_x2_cast_ub, 0, 1,
-                                   no_tile * ko_tile_inner * c0 * c0 * fp16_size // blocksize, 0, 0)
-            # input_x1 -> input_x1_L1
-            input_x1_L1 = tik_instance.Tensor(input_x1.dtype, [ko_tile_inner, mo_tile, c0, c0],
-                                              name="input_x1_L1", scope=tik.scope_cbuf)
-            tik_instance.data_move(input_x1_L1,
-                                   input_x1[k_idx,
-                                            core_m * mo_tile, 0, 0],
-                                   0, ko_tile_inner, mo_tile * c0 * c0 * fp16_size // blocksize,
-                                   (mo - mo_tile) * c0 * c0 * fp16_size // blocksize, 0)
-            # input_x2_L1 -> input_x2_L0B
-            input_x2_L0B = tik_instance.Tensor("float16", [ko_tile_inner, no_tile, c0, c0],
-                                               name="input_x2_L0B", scope=tik.scope_cb)
-            with tik_instance.for_range(0, ko_tile_inner) as cc2:
-                tik_instance.load2dv1(input_x2_L0B[cc2, 0, 0, 0], input_x2_L1[0, cc2, 0, 0], 0, no_tile,
-                                      ko_tile_inner,
-                                      0, True)
-            # input_x1_L1 -> input_x1_L0A
-            input_x1_L0A = tik_instance.Tensor(input_x1.dtype, [mo_tile, ko_tile_inner, c0, c0],
-                                               name="input_x1_L0A", scope=tik.scope_ca)
-            with tik_instance.for_range(0, mo_tile) as cc1:
-                tik_instance.load2dv1(input_x1_L0A[cc1, 0, 0, 0], input_x1_L1[0, cc1, 0, 0], 0, ko_tile_inner,
-                                      mo_tile, 0, False)
-            with tik_instance.if_scope(thread_idx_k == 0):
-                tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
-                                  ko_tile_inner * c0, no_tile * c0, 0)
-            with tik_instance.else_scope():
-                tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
-                                  ko_tile_inner * c0, no_tile * c0, 1)
-        res_ub = tik_instance.Tensor(input_x1.dtype, [no_tile, mo_tile, c0, c0],
-                                     name="resMatmul_ub", scope=tik.scope_ubuf)
-        tik_instance.data_move(res_ub, res_L0C, 0, 1, no_tile * mo_tile, 0, 0, 1)
-        tik_instance.data_move(res[(core_n * loop_n_num + cc_n) * no_tile, core_m * mo_tile, 0, 0],
-                               res_ub, 0, no_tile,
-                               mo_tile * c0 * c0 * fp16_size // blocksize, 0,
-                               (mo - mo_tile) * c0 * c0 * fp16_size // blocksize)
+                input_x2_L1 = tik_instance.Tensor("float16", [no_tile, ko_tile_inner, c0, c0],
+                                                  name="input_x2_L1", scope=tik.scope_cbuf)
+                tik_instance.data_move(input_x2_L1, input_x2_cast_ub, 0, 1,
+                                       no_tile * ko_tile_inner * c0 * c0 * fp16_size // blocksize, 0, 0)
+                # input_x1 -> input_x1_L1
+                input_x1_L1 = tik_instance.Tensor(input_x1.dtype, [ko_tile_inner, mo_tile, c0, c0],
+                                                  name="input_x1_L1", scope=tik.scope_cbuf)
+                tik_instance.data_move(input_x1_L1,
+                                       input_x1[k_idx,
+                                                core_m * mo_tile, 0, 0],
+                                       0, ko_tile_inner, mo_tile * c0 * c0 * fp16_size // blocksize,
+                                       (mo - mo_tile) * c0 * c0 * fp16_size // blocksize, 0)
+                # input_x2_L1 -> input_x2_L0B
+                input_x2_L0B = tik_instance.Tensor("float16", [ko_tile_inner, no_tile, c0, c0],
+                                                   name="input_x2_L0B", scope=tik.scope_cb)
+                with tik_instance.for_range(0, ko_tile_inner) as cc2:
+                    tik_instance.load2dv1(input_x2_L0B[cc2, 0, 0, 0], input_x2_L1[0, cc2, 0, 0], 0, no_tile,
+                                          ko_tile_inner,
+                                          0, True)
+                # input_x1_L1 -> input_x1_L0A
+                input_x1_L0A = tik_instance.Tensor(input_x1.dtype, [mo_tile, ko_tile_inner, c0, c0],
+                                                   name="input_x1_L0A", scope=tik.scope_ca)
+                with tik_instance.for_range(0, mo_tile) as cc1:
+                    tik_instance.load2dv1(input_x1_L0A[cc1, 0, 0, 0], input_x1_L1[0, cc1, 0, 0], 0, ko_tile_inner,
+                                          mo_tile, 0, False)
+                with tik_instance.if_scope(thread_idx_k == 0):
+                    tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
+                                      ko_tile_inner * c0, no_tile * c0, 0)
+                with tik_instance.else_scope():
+                    tik_instance.mmad(res_L0C, input_x1_L0A, input_x2_L0B, mo_tile * c0,
+                                      ko_tile_inner * c0, no_tile * c0, 1)
+            res_ub = tik_instance.Tensor(input_x1.dtype, [no_tile, mo_tile, c0, c0],
+                                         name="resMatmul_ub", scope=tik.scope_ubuf)
+            tik_instance.data_move(res_ub, res_L0C, 0, 1, no_tile * mo_tile, 0, 0, 1)
+            tik_instance.data_move(res[(core_n * loop_n_num + cc_n) * no_tile, core_m * mo_tile, 0, 0],
+                                   res_ub, 0, no_tile,
+                                   mo_tile * c0 * c0 * fp16_size // blocksize, 0,
+                                   (mo - mo_tile) * c0 * c0 * fp16_size // blocksize)
--- a/mindspore/ops/operations/thor_ops.py
+++ b/mindspore/ops/operations/thor_ops.py