From 4ae8285809202039ffdbe9fff5ac95828f1ebec8 Mon Sep 17 00:00:00 2001 From: panfengfeng Date: Fri, 6 Nov 2020 10:13:09 +0800 Subject: [PATCH] update transformer scripts --- model_zoo/official/nlp/transformer/README.md | 4 ++-- model_zoo/official/nlp/transformer/src/config.py | 4 ++-- model_zoo/official/nlp/transformer/train.py | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/model_zoo/official/nlp/transformer/README.md b/model_zoo/official/nlp/transformer/README.md index 607b6474f9..b42d8cb2b0 100644 --- a/model_zoo/official/nlp/transformer/README.md +++ b/model_zoo/official/nlp/transformer/README.md @@ -41,8 +41,8 @@ Note that you can run the scripts based on the dataset mentioned in original pap # [Environment Requirements](#contents) -- Hardware(Ascend) - - Prepare hardware environment with Ascend processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. +- Hardware(Ascend/GPU) + - Prepare hardware environment with Ascend or GPU processor. If you want to try Ascend , please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. - Framework - [MindSpore](https://gitee.com/mindspore/mindspore) - For more information, please check the resources below: diff --git a/model_zoo/official/nlp/transformer/src/config.py b/model_zoo/official/nlp/transformer/src/config.py index 9e143eca46..b31878ebad 100644 --- a/model_zoo/official/nlp/transformer/src/config.py +++ b/model_zoo/official/nlp/transformer/src/config.py @@ -61,8 +61,8 @@ if cfg.transformer_network == 'large': num_attention_heads=16, intermediate_size=4096, hidden_act="relu", - hidden_dropout_prob=0.1, - attention_probs_dropout_prob=0.1, + hidden_dropout_prob=0.2, + attention_probs_dropout_prob=0.2, max_position_embeddings=128, initializer_range=0.02, label_smoothing=0.1, diff --git a/model_zoo/official/nlp/transformer/train.py b/model_zoo/official/nlp/transformer/train.py index 5ac2fa316e..af8557d57e 100644 --- a/model_zoo/official/nlp/transformer/train.py +++ b/model_zoo/official/nlp/transformer/train.py @@ -159,9 +159,11 @@ def run_transformer_train(): hidden_size = transformer_net_cfg.hidden_size if args.device_target == "Ascend" \ else transformer_net_cfg_gpu.hidden_size + learning_rate = cfg.lr_schedule.learning_rate if args.device_target == "Ascend" \ + else 1.0 lr = Tensor(create_dynamic_lr(schedule="constant*rsqrt_hidden*linear_warmup*rsqrt_decay", training_steps=dataset.get_dataset_size()*args.epoch_size, - learning_rate=cfg.lr_schedule.learning_rate, + learning_rate=learning_rate, warmup_steps=cfg.lr_schedule.warmup_steps, hidden_size=hidden_size, start_decay_step=cfg.lr_schedule.start_decay_step,