diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md
index 95aed5e96a..160a872194 100644
--- a/model_zoo/official/nlp/bert/README.md
+++ b/model_zoo/official/nlp/bert/README.md
@@ -14,17 +14,30 @@ This example implements pre-training, fine-tuning and evaluation of [BERT-base](
 ### Pre-Training
 - Set options in `config.py`, including lossscale, optimizer and network. Click [here](https://www.mindspore.cn/tutorial/zh-CN/master/use/data_preparation/loading_the_datasets.html#tfrecord) for more information about dataset and the json schema file.
 
-- Run `run_standalone_pretrain.sh` for non-distributed pre-training of BERT-base and BERT-NEZHA model.
+- Run `run_standalone_pretrain.sh` for non-distributed pre-training of BERT-base and BERT-NEZHA model on `Ascend`.
 
     ``` bash   
-    sh scripts/run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR
+    bash scripts/run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR
     ```
-- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model.
+
+- Run `run_standalone_pretrain_for_gpu.sh` for non-distributed pre-training of BERT-base and BERT-NEZHA model on `GPU`.
+
+    ``` bash   
+    bash scripts/run_standalone_pretrain_for_gpu.sh DEVICE_ID EPOCH_SIZE DATA_DIR SCHEMA_DIR
+    ```
+
+- Run `run_distribute_pretrain.sh` for distributed pre-training of BERT-base and BERT-NEZHA model on `Ascend`.
 
     ``` bash   
-    sh scripts/run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE
+    bash scripts/run_distribute_pretrain.sh DATA_DIR RANK_TABLE_FILE
     ```  
 
+- Run `run_distribute_pretrain_for_gpu.sh` for distributed pre-training of BERT-base and BERT-NEZHA model on `GPU`.
+
+    ```bash
+    bash scripts/run_distribute_pretrain_for_gpu.sh RANK_SIZE EPOCH_SIZE DATA_DIR SCHEMA_DIR
+    ```
+
 ### Fine-Tuning and Evaluation
 - Including three kinds of task: Classification, NER(Named Entity Recognition) and SQuAD(Stanford Question Answering Dataset)
 
diff --git a/model_zoo/official/nlp/bert/run_pretrain.py b/model_zoo/official/nlp/bert/run_pretrain.py
index 291a784441..06cf905fcf 100644
--- a/model_zoo/official/nlp/bert/run_pretrain.py
+++ b/model_zoo/official/nlp/bert/run_pretrain.py
@@ -141,7 +141,7 @@ def run_pretrain():
     else:
         raise ValueError("Don't support optimizer {}, only support [Lamb, Momentum, AdamWeightDecay]".
                          format(cfg.optimizer))
-    callback = [TimeMonitor(args_opt.data_sink_steps), LossCallBack()]
+    callback = [TimeMonitor(args_opt.data_sink_steps), LossCallBack(ds.get_dataset_size())]
     if args_opt.enable_save_ckpt == "true" and args_opt.device_id % min(8, device_num) == 0:
         config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps,
                                      keep_checkpoint_max=args_opt.save_checkpoint_num)
diff --git a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py
index 32c3bb8038..41f656d031 100644
--- a/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py
+++ b/model_zoo/official/nlp/bert/scripts/ascend_distributed_launcher/run_distribute_pretrain.py
@@ -125,7 +125,7 @@ def distribute_pretrain():
         print("log_file_dir: " + cur_dir + "/LOG" + str(device_id) + "/log.txt")
 
         os.chdir(cur_dir + "/LOG" + str(device_id))
-        cmd = 'taskset -c ' + cmdopt + ' python ' + run_script + " "
+        cmd = 'taskset -c ' + cmdopt + ' nohup python ' + run_script + " "
         opt = " ".join(["--" + key + "=" + str(cfg[key]) for key in cfg.keys()])
         if ('device_id' in opt) or ('device_num' in opt) or ('data_dir' in opt):
             raise ValueError("hyper_parameter_config.ini can not setting 'device_id',"
diff --git a/model_zoo/official/nlp/bert/src/utils.py b/model_zoo/official/nlp/bert/src/utils.py
index 6463464734..46d8591e29 100644
--- a/model_zoo/official/nlp/bert/src/utils.py
+++ b/model_zoo/official/nlp/bert/src/utils.py
@@ -18,6 +18,7 @@ Functional Cells used in Bert finetune and evaluation.
 """
 
 import os
+import math
 import numpy as np
 import mindspore.nn as nn
 from mindspore import log as logger
@@ -90,15 +91,14 @@ class LossCallBack(Callback):
     Args:
         per_print_times (int): Print loss every times. Default: 1.
     """
-    def __init__(self, per_print_times=1):
+    def __init__(self, dataset_size=1):
         super(LossCallBack, self).__init__()
-        if not isinstance(per_print_times, int) or per_print_times < 0:
-            raise ValueError("print_step must be int and >= 0")
-        self._per_print_times = per_print_times
+        self._dataset_size = dataset_size
     def step_end(self, run_context):
         cb_params = run_context.original_args()
-        print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
-                                                           str(cb_params.net_outputs)))
+        percent, epoch_num = math.modf(cb_params.cur_step_num / self._dataset_size)
+        print("epoch: {}, current epoch percent: {}, step: {}, outputs are {}"
+              .format(epoch_num, "%.3f" % percent, cb_params.cur_step_num, str(cb_params.net_outputs)))
 
 def LoadNewestCkpt(load_finetune_checkpoint_dir, steps_per_epoch, epoch_num, prefix):
     """
diff --git a/model_zoo/utils/ascend_distributed_launcher/README.md b/model_zoo/utils/ascend_distributed_launcher/README.md
index cefdaee3e8..c8692774f5 100644
--- a/model_zoo/utils/ascend_distributed_launcher/README.md
+++ b/model_zoo/utils/ascend_distributed_launcher/README.md
@@ -7,7 +7,7 @@ The number of D chips can be automatically allocated based on the device_num set
 ## how to use
 For example, if we want to run the distributed training of Bert model on D chip, we can in `/bert/` dir:
 ```
-python model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py --run_script_dir ./run_pretrain.py --hyper_parameter_config_dir model_zoo/utils/ascend_distributed_launcher/hyper_parameter_config.ini --data_dir /path/dataset/ --hccl_config_dir model_zoo/utils/hccl_tools/hccl_2p_56_x.x.x.x.json
+python model_zoo/utils/ascend_distributed_launcher/run_distributed.py --run_script_dir ./run_pretrain.py --hyper_parameter_config_dir model_zoo/utils/ascend_distributed_launcher/hyper_parameter_config.ini --data_dir /path/dataset/ --hccl_config_dir model_zoo/utils/hccl_tools/hccl_2p_56_x.x.x.x.json
 ```
 
 output:
diff --git a/model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py b/model_zoo/utils/ascend_distributed_launcher/run_distributed.py
similarity index 98%
rename from model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py
rename to model_zoo/utils/ascend_distributed_launcher/run_distributed.py
index efc97e0fbe..b21accf7c2 100644
--- a/model_zoo/utils/ascend_distributed_launcher/run_distribute_pretrain.py
+++ b/model_zoo/utils/ascend_distributed_launcher/run_distributed.py
@@ -124,7 +124,7 @@ def distribute_pretrain():
         print("data_dir:", data_dir)
         print("log_file_dir: ./LOG" + str(device_id) + "/log.txt")
 
-        cmd = 'taskset -c ' + cmdopt + ' python ' + run_script + " "
+        cmd = 'taskset -c ' + cmdopt + ' nohup python ' + run_script + " "
         opt = " ".join(["--" + key + "=" + str(cfg[key]) for key in cfg.keys()])
         if ('device_id' in opt) or ('device_num' in opt) or ('data_dir' in opt):
             raise ValueError("hyper_parameter_config.ini can not setting 'device_id',"