From c4eb32b43ad0e86a3c75e64fbe8f7272d2bf7432 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8A=A2?= Date: Wed, 23 Dec 2020 20:57:24 +0800 Subject: [PATCH] move textrcnn from official to research, and raise acc when using lstm as RNN arch --- model_zoo/official/nlp/textrcnn/src/utils.py | 29 -------- .../nlp/textrcnn/data_helpers.py | 2 +- .../nlp/textrcnn/eval.py | 0 .../nlp/textrcnn/readme.md | 10 +-- .../nlp/textrcnn/sample.txt | 0 .../nlp/textrcnn/scripts/run_eval.sh | 0 .../nlp/textrcnn/scripts/run_train.sh | 0 .../nlp/textrcnn/src/config.py | 10 +-- .../nlp/textrcnn/src/dataset.py | 0 .../nlp/textrcnn/src/textrcnn.py | 0 model_zoo/research/nlp/textrcnn/src/utils.py | 70 +++++++++++++++++++ .../nlp/textrcnn/train.py | 12 ++-- 12 files changed, 89 insertions(+), 44 deletions(-) delete mode 100644 model_zoo/official/nlp/textrcnn/src/utils.py rename model_zoo/{official => research}/nlp/textrcnn/data_helpers.py (99%) rename model_zoo/{official => research}/nlp/textrcnn/eval.py (100%) rename model_zoo/{official => research}/nlp/textrcnn/readme.md (91%) rename model_zoo/{official => research}/nlp/textrcnn/sample.txt (100%) rename model_zoo/{official => research}/nlp/textrcnn/scripts/run_eval.sh (100%) rename model_zoo/{official => research}/nlp/textrcnn/scripts/run_train.sh (100%) rename model_zoo/{official => research}/nlp/textrcnn/src/config.py (87%) rename model_zoo/{official => research}/nlp/textrcnn/src/dataset.py (100%) rename model_zoo/{official => research}/nlp/textrcnn/src/textrcnn.py (100%) create mode 100644 model_zoo/research/nlp/textrcnn/src/utils.py rename model_zoo/{official => research}/nlp/textrcnn/train.py (88%) diff --git a/model_zoo/official/nlp/textrcnn/src/utils.py b/model_zoo/official/nlp/textrcnn/src/utils.py deleted file mode 100644 index f68401fa63..0000000000 --- a/model_zoo/official/nlp/textrcnn/src/utils.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ -"""training utils""" -from mindspore import dtype as mstype -from mindspore.nn.dynamic_lr import exponential_decay_lr -from mindspore import Tensor - - -def get_lr(cfg, dataset_size): - if cfg.cell == "lstm": - lr = exponential_decay_lr(cfg.lstm_base_lr, cfg.lstm_decay_rate, dataset_size * cfg.num_epochs, - dataset_size, - cfg.lstm_decay_epoch) - lr_ret = Tensor(lr, mstype.float32) - else: - lr_ret = cfg.lr - return lr_ret diff --git a/model_zoo/official/nlp/textrcnn/data_helpers.py b/model_zoo/research/nlp/textrcnn/data_helpers.py similarity index 99% rename from model_zoo/official/nlp/textrcnn/data_helpers.py rename to model_zoo/research/nlp/textrcnn/data_helpers.py index d0ac1599bf..acc242c6e6 100644 --- a/model_zoo/official/nlp/textrcnn/data_helpers.py +++ b/model_zoo/research/nlp/textrcnn/data_helpers.py @@ -22,7 +22,7 @@ parser.add_argument('--data_dir', type=str, help='the source dataset directory.' parser.add_argument('--out_dir', type=str, help='the target dataset directory.', default='./data') args = parser.parse_args() - +np.random.seed(2) def dataset_split(label): """dataset_split api""" diff --git a/model_zoo/official/nlp/textrcnn/eval.py b/model_zoo/research/nlp/textrcnn/eval.py similarity index 100% rename from model_zoo/official/nlp/textrcnn/eval.py rename to model_zoo/research/nlp/textrcnn/eval.py diff --git a/model_zoo/official/nlp/textrcnn/readme.md b/model_zoo/research/nlp/textrcnn/readme.md similarity index 91% rename from model_zoo/official/nlp/textrcnn/readme.md rename to model_zoo/research/nlp/textrcnn/readme.md index bda3d60c38..2330910004 100644 --- a/model_zoo/official/nlp/textrcnn/readme.md +++ b/model_zoo/research/nlp/textrcnn/readme.md @@ -113,17 +113,19 @@ Parameters for both training and evaluation can be set in config.py ```python 'num_epochs': 10, # total training epochs + 'lstm_num_epochs': 15, # total training epochs when using lstm 'batch_size': 64, # training batch size 'cell': 'gru', # the RNN architecture, can be 'vanilla', 'gru' and 'lstm'. - 'opt': 'adam', # the optimizer strategy, can be 'adam' or 'momentum' 'ckpt_folder_path': './ckpt', # the path to save the checkpoints 'preprocess_path': './preprocess', # the directory to save the processed data 'preprocess' : 'false', # whethere to preprocess the data 'data_path': './data/', # the path to store the splited data 'lr': 1e-3, # the training learning rate - 'lstm_base_lr': 3e-3, # the training learning rate when using lstm as RNN cell - 'lstm_decay_rate': 0.9, # lr decay rate when using lstm as RNN cell - 'lstm_decay_epoch': 1, # lr decay epoch when using lstm as RNN cell + 'lstm_lr_init': 2e-3, # learning rate initial value when using lstm + 'lstm_lr_end': 5e-4, # learning rate end value when using lstm + 'lstm_lr_max': 3e-3, # learning eate max value when using lstm + 'lstm_lr_warm_up_epochs': 2 # warm up epoch num when using lstm + 'lstm_lr_adjust_epochs': 9 # lr adjust in lr_adjust_epoch, after that, the lr is lr_end when using lstm 'emb_path': './word2vec', # the directory to save the embedding file 'embed_size': 300, # the dimension of the word embedding 'save_checkpoint_steps': 149, # per step to save the checkpoint diff --git a/model_zoo/official/nlp/textrcnn/sample.txt b/model_zoo/research/nlp/textrcnn/sample.txt similarity index 100% rename from model_zoo/official/nlp/textrcnn/sample.txt rename to model_zoo/research/nlp/textrcnn/sample.txt diff --git a/model_zoo/official/nlp/textrcnn/scripts/run_eval.sh b/model_zoo/research/nlp/textrcnn/scripts/run_eval.sh similarity index 100% rename from model_zoo/official/nlp/textrcnn/scripts/run_eval.sh rename to model_zoo/research/nlp/textrcnn/scripts/run_eval.sh diff --git a/model_zoo/official/nlp/textrcnn/scripts/run_train.sh b/model_zoo/research/nlp/textrcnn/scripts/run_train.sh similarity index 100% rename from model_zoo/official/nlp/textrcnn/scripts/run_train.sh rename to model_zoo/research/nlp/textrcnn/scripts/run_train.sh diff --git a/model_zoo/official/nlp/textrcnn/src/config.py b/model_zoo/research/nlp/textrcnn/src/config.py similarity index 87% rename from model_zoo/official/nlp/textrcnn/src/config.py rename to model_zoo/research/nlp/textrcnn/src/config.py index 0782b709a2..f2be8493c0 100644 --- a/model_zoo/official/nlp/textrcnn/src/config.py +++ b/model_zoo/research/nlp/textrcnn/src/config.py @@ -22,17 +22,19 @@ textrcnn_cfg = edict({ 'pos_dir': 'data/rt-polaritydata/rt-polarity.pos', 'neg_dir': 'data/rt-polaritydata/rt-polarity.neg', 'num_epochs': 10, + 'lstm_num_epochs': 15, 'batch_size': 64, 'cell': 'gru', - 'opt': 'adam', 'ckpt_folder_path': './ckpt', 'preprocess_path': './preprocess', 'preprocess': 'false', 'data_path': './data/', 'lr': 1e-3, - 'lstm_base_lr': 3e-3, - 'lstm_decay_rate': 0.9, - 'lstm_decay_epoch': 1, + 'lstm_lr_init': 2e-3, + 'lstm_lr_end': 5e-4, + 'lstm_lr_max': 3e-3, + 'lstm_lr_warm_up_epochs': 2, + 'lstm_lr_adjust_epochs': 9, 'emb_path': './word2vec', 'embed_size': 300, 'save_checkpoint_steps': 149, diff --git a/model_zoo/official/nlp/textrcnn/src/dataset.py b/model_zoo/research/nlp/textrcnn/src/dataset.py similarity index 100% rename from model_zoo/official/nlp/textrcnn/src/dataset.py rename to model_zoo/research/nlp/textrcnn/src/dataset.py diff --git a/model_zoo/official/nlp/textrcnn/src/textrcnn.py b/model_zoo/research/nlp/textrcnn/src/textrcnn.py similarity index 100% rename from model_zoo/official/nlp/textrcnn/src/textrcnn.py rename to model_zoo/research/nlp/textrcnn/src/textrcnn.py diff --git a/model_zoo/research/nlp/textrcnn/src/utils.py b/model_zoo/research/nlp/textrcnn/src/utils.py new file mode 100644 index 0000000000..1e59552deb --- /dev/null +++ b/model_zoo/research/nlp/textrcnn/src/utils.py @@ -0,0 +1,70 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""training utils""" +import math +import numpy as np +from mindspore import dtype as mstype +from mindspore import Tensor + + +def get_lr(cfg, dataset_size): + if cfg.cell == "lstm": + lr = get_lr_lstm(0, cfg.lstm_lr_init, cfg.lstm_lr_end, cfg.lstm_lr_max, cfg.lstm_lr_warm_up_epochs, + cfg.lstm_num_epochs, dataset_size, cfg.lstm_lr_adjust_epochs) + lr_ret = Tensor(lr, mstype.float32) + else: + lr_ret = cfg.lr + return lr_ret + + +def get_lr_lstm(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_adjust_epoch): + """ + generate learning rate array + + Args: + global_step(int): total steps of the training + lr_init(float): init learning rate + lr_end(float): end learning rate + lr_max(float): max learning rate + warmup_epochs(float): number of warmup epochs + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + lr_adjust_epoch(int): lr adjust in lr_adjust_epoch, after that, the lr is lr_end + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + warmup_steps = steps_per_epoch * warmup_epochs + adjust_steps = lr_adjust_epoch * steps_per_epoch + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + elif i < adjust_steps: + lr = lr_end + \ + (lr_max - lr_end) * \ + (1. + math.cos(math.pi * (i - warmup_steps) / (adjust_steps - warmup_steps))) / 2. + else: + lr = lr_end + if lr < 0.0: + lr = 0.0 + lr_each_step.append(lr) + + current_step = global_step + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[current_step:] + + return learning_rate diff --git a/model_zoo/official/nlp/textrcnn/train.py b/model_zoo/research/nlp/textrcnn/train.py similarity index 88% rename from model_zoo/official/nlp/textrcnn/train.py rename to model_zoo/research/nlp/textrcnn/train.py index 0b67cf9a31..3e46f89a10 100644 --- a/model_zoo/official/nlp/textrcnn/train.py +++ b/model_zoo/research/nlp/textrcnn/train.py @@ -32,7 +32,7 @@ from src.textrcnn import textrcnn from src.utils import get_lr -set_seed(1) +set_seed(2) if __name__ == '__main__': @@ -64,11 +64,11 @@ if __name__ == '__main__': loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True) lr = get_lr(cfg, step_size) + num_epochs = cfg.num_epochs + if cfg.cell == "lstm": + num_epochs = cfg.lstm_num_epochs - if cfg.opt == "adam": - opt = nn.Adam(params=network.trainable_params(), learning_rate=lr) - elif cfg.opt == "momentum": - opt = nn.Momentum(network.trainable_params(), lr, cfg.momentum) + opt = nn.Adam(params=network.trainable_params(), learning_rate=lr) loss_cb = LossMonitor() model = Model(network, loss, opt, {'acc': Accuracy()}, amp_level="O3") @@ -77,5 +77,5 @@ if __name__ == '__main__': config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, \ keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix=cfg.cell, directory=cfg.ckpt_folder_path, config=config_ck) - model.train(cfg.num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb]) + model.train(num_epochs, ds_train, callbacks=[ckpoint_cb, loss_cb]) print("train success")