From 263a591473f87545c5a458fa60778b2db75e90b1 Mon Sep 17 00:00:00 2001 From: yoonlee666 Date: Sat, 10 Oct 2020 09:36:42 +0800 Subject: [PATCH] edit readme --- model_zoo/official/nlp/bert/README.md | 9 ++++++--- .../bert/scripts/ascend_distributed_launcher/README.md | 4 ++-- .../get_distribute_pretrain_cmd.py | 2 +- model_zoo/utils/ascend_distributed_launcher/README.md | 4 ++-- .../get_distribute_pretrain_cmd.py | 2 +- model_zoo/utils/hccl_tools/README.md | 4 ++-- model_zoo/utils/hccl_tools/hccl_tools.py | 2 +- 7 files changed, 15 insertions(+), 12 deletions(-) diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md index 85d5f61a70..f516e4129b 100644 --- a/model_zoo/official/nlp/bert/README.md +++ b/model_zoo/official/nlp/bert/README.md @@ -411,20 +411,22 @@ epoch: 0.0, current epoch percent: 0.002, step: 200, outpus are (Tensor(shape=[1 Before running the command below, please check the load pretrain checkpoint path has been set. Please set the checkpoint path to be the absolute full path, e.g:"/username/pretrain/checkpoint_100_300.ckpt". ``` bash scripts/run_classifier.sh - +``` The command above will run in the background, you can view training logs in classfier_log.txt. If you choose accuracy as assessment method, the result will be as follows: +``` acc_num XXX, total_num XXX, accuracy 0.588986 ``` #### evaluation on cluener dataset when running on Ascend ``` bash scripts/ner.sh - +``` The command above will run in the background, you can view training logs in ner_log.txt. If you choose F1 as assessment method, the result will be as follows: +``` Precision 0.920507 Recall 0.948683 F1 0.920507 @@ -433,9 +435,10 @@ F1 0.920507 #### evaluation on squad v1.1 dataset when running on Ascend ``` bash scripts/squad.sh - +``` The command above will run in the background, you can view training logs in squad_log.txt. The result will be as follows: +``` {"exact_match": 80.3878923040233284, "f1": 87.6902384023850329} ``` diff --git a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/README.md b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/README.md index 18a6532fbf..c821a2b513 100644 --- a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/README.md +++ b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/README.md @@ -1,11 +1,11 @@ # Run distribute pretrain ## description -The number of D chips can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. +The number of Ascend accelerators can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. ## how to use -For example, if we want to generate the launch command of the distributed training of Bert model on D chip, we can run the following command in `/bert/` dir: +For example, if we want to generate the launch command of the distributed training of Bert model on Ascend accelerators, we can run the following command in `/bert/` dir: ``` python ./scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py --run_script_dir ./run_pretrain.py --hyper_parameter_config_dir ./scripts/ascend_distributed_launcher/hyper_parameter_config.ini --data_dir /path/dataset/ --hccl_config_dir model_zoo/utils/hccl_tools/hccl_2p_56_x.x.x.x.json ``` diff --git a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py index e586bcb06a..9a0338af6f 100644 --- a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py +++ b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py @@ -59,7 +59,7 @@ def append_cmd_env(cmd, key, value): def distribute_pretrain(): """ - distribute pretrain scripts. The number of D chips can be automatically allocated + distribute pretrain scripts. The number of Ascend accelerators can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. """ cmd = "" diff --git a/model_zoo/utils/ascend_distributed_launcher/README.md b/model_zoo/utils/ascend_distributed_launcher/README.md index 18a6532fbf..c821a2b513 100644 --- a/model_zoo/utils/ascend_distributed_launcher/README.md +++ b/model_zoo/utils/ascend_distributed_launcher/README.md @@ -1,11 +1,11 @@ # Run distribute pretrain ## description -The number of D chips can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. +The number of Ascend accelerators can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. ## how to use -For example, if we want to generate the launch command of the distributed training of Bert model on D chip, we can run the following command in `/bert/` dir: +For example, if we want to generate the launch command of the distributed training of Bert model on Ascend accelerators, we can run the following command in `/bert/` dir: ``` python ./scripts/ascend_distributed_launcher/get_distribute_pretrain_cmd.py --run_script_dir ./run_pretrain.py --hyper_parameter_config_dir ./scripts/ascend_distributed_launcher/hyper_parameter_config.ini --data_dir /path/dataset/ --hccl_config_dir model_zoo/utils/hccl_tools/hccl_2p_56_x.x.x.x.json ``` diff --git a/model_zoo/utils/ascend_distributed_launcher/get_distribute_pretrain_cmd.py b/model_zoo/utils/ascend_distributed_launcher/get_distribute_pretrain_cmd.py index e586bcb06a..9a0338af6f 100644 --- a/model_zoo/utils/ascend_distributed_launcher/get_distribute_pretrain_cmd.py +++ b/model_zoo/utils/ascend_distributed_launcher/get_distribute_pretrain_cmd.py @@ -59,7 +59,7 @@ def append_cmd_env(cmd, key, value): def distribute_pretrain(): """ - distribute pretrain scripts. The number of D chips can be automatically allocated + distribute pretrain scripts. The number of Ascend accelerators can be automatically allocated based on the device_num set in hccl config file, You don not need to specify that. """ cmd = "" diff --git a/model_zoo/utils/hccl_tools/README.md b/model_zoo/utils/hccl_tools/README.md index db97a60312..c237827c7e 100644 --- a/model_zoo/utils/hccl_tools/README.md +++ b/model_zoo/utils/hccl_tools/README.md @@ -1,6 +1,6 @@ # description -mindspore distributed training launch helper utilty that will generate hccl config file. +MindSpore distributed training launch helper utilty that will generate hccl config file. # use @@ -14,4 +14,4 @@ hccl_[device_num]p_[which device]_[server_ip].json ``` # Note -Please note that the D chips used must be continuous, such [0,4) means to use four chips 0,1,2,3; [0,1) means to use chip 0; The first four chips are a group, and the last four chips are a group. In addition to the [0,8) chips are allowed, other cross-group such as [3,6) are prohibited. \ No newline at end of file +Please note that the Ascend accelerators used must be continuous, such [0,4) means to use four chips 0,1,2,3; [0,1) means to use chip 0; The first four chips are a group, and the last four chips are a group. In addition to the [0,8) chips are allowed, other cross-group such as [3,6) are prohibited. diff --git a/model_zoo/utils/hccl_tools/hccl_tools.py b/model_zoo/utils/hccl_tools/hccl_tools.py index b9a039489e..1bd0ea2e94 100644 --- a/model_zoo/utils/hccl_tools/hccl_tools.py +++ b/model_zoo/utils/hccl_tools/hccl_tools.py @@ -37,7 +37,7 @@ def parse_args(): "helper utilty that will generate hccl" " config file") parser.add_argument("--device_num", type=str, default="[0,8)", - help="The number of the D chip used. please note that the D chips" + help="The number of the Ascend accelerators used. please note that the Ascend accelerators" "used must be continuous, such [0,4) means to use four chips " "0,1,2,3; [0,1) means to use chip 0; The first four chips are" "a group, and the last four chips are a group. In addition to"