You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mindspore/model_zoo/official/nlp/gnmt_v2/config/config.py

233 lines
9.4 KiB

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Configuration class for GNMT."""
import os
import json
import copy
from typing import List
import mindspore.common.dtype as mstype
def _is_dataset_file(file: str):
return "tfrecord" in file.lower() or "mindrecord" in file.lower()
def _get_files_from_dir(folder: str):
_files = []
for file in os.listdir(folder):
if _is_dataset_file(file):
_files.append(os.path.join(folder, file))
return _files
def get_source_list(folder: str) -> List:
"""
Get file list from a folder.
Returns:
list, file list.
"""
_list = []
if not folder:
return _list
if os.path.isdir(folder):
_list = _get_files_from_dir(folder)
else:
if _is_dataset_file(folder):
_list.append(folder)
return _list
PARAM_NODES = {"dataset_config",
"model_config",
"loss_scale_config",
"learn_rate_config",
"checkpoint_options"}
class GNMTConfig:
"""
Configuration for `GNMT`.
Args:
random_seed (int): Random seed, it can be changed.
epochs (int): Epoch number.
batch_size (int): Batch size of input dataset.
pre_train_dataset (str): Path of pre-training dataset file or folder.
fine_tune_dataset (str): Path of fine-tune dataset file or folder.
test_dataset (str): Path of test dataset file or folder.
valid_dataset (str): Path of validation dataset file or folder.
dataset_sink_mode (bool): Whether enable dataset sink mode.
seq_length (int): Length of input sequence.
vocab_size (int): The shape of each embedding vector.
hidden_size (int): Size of embedding, attention, dim.
num_hidden_layers (int): Encoder, Decoder layers.
intermediate_size (int): Size of intermediate layer in the Transformer
encoder/decoder cell.
hidden_act (str): Activation function used in the Transformer encoder/decoder
cell.
hidden_dropout_prob (float): The dropout probability for hidden outputs.
attention_dropout_prob (float): The dropout probability for Attention module.
initializer_range (float): Initialization value of TruncatedNormal.
label_smoothing (float): Label smoothing setting.
beam_width (int): Beam width for beam search in inferring.
length_penalty_weight (float): Penalty for sentence length.
max_decode_length (int): Max decode length for inferring.
input_mask_from_dataset (bool): Specifies whether to use the input mask that loaded from
dataset.
init_loss_scale (int): Initialized loss scale.
loss_scale_factor (int): Loss scale factor.
scale_window (int): Window size of loss scale.
lr_scheduler (str): Learning rate scheduler. Please see the Note as follow.
optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam.
lr (float): Initial learning rate.
min_lr (float): Minimum learning rate.
decay_steps (int): Decay steps.
lr_scheduler_power(float): A value used to calculate decayed learning rate.
warmup_lr_remain_steps (int or float): Start decay at 'remain_steps' iteration.
warmup_lr_decay_interval (int):interval between LR decay steps.
decay_start_step (int): Step to decay.
warmup_steps (int): Warm up steps.
existed_ckpt (str): Using existed checkpoint to keep training or not.
save_ckpt_steps (int): Interval of saving ckpt.
keep_ckpt_max (int): Max ckpt files number.
ckpt_prefix (str): Prefix of ckpt file.
ckpt_path (str): Checkpoints save path.
save_graphs (bool): Whether to save graphs, please set to True if mindinsight
is wanted.
dtype (mstype): Data type of the input.
Note:
There are three types of learning rate scheduler, square root scheduler, polynomial
decay scheduler and warmup multistep learning rate scheduler.
In square root scheduler, the following parameters can be used, lr, decay_start_step,
warmup_steps and min_lr.
In polynomial decay scheduler, the following parameters can be used, lr, min_lr, decay_steps,
warmup_steps, lr_scheduler_power.
In warmmup multistep learning rate scheduler, the following parameters can be used, lr, warmup_steps,
warmup_lr_remain_steps, warmup_lr_decay_interval, decay_steps, lr_scheduler_power.
"""
def __init__(self,
random_seed=50,
epochs=6, batch_size=128,
pre_train_dataset: str = None,
fine_tune_dataset: str = None,
test_dataset: str = None,
valid_dataset: str = None,
dataset_sink_mode=True,
seq_length=51, vocab_size=32320, hidden_size=1024,
num_hidden_layers=4, intermediate_size=4096,
hidden_act="tanh",
hidden_dropout_prob=0.2, attention_dropout_prob=0.2,
initializer_range=0.1,
label_smoothing=0.1,
beam_width=2,
length_penalty_weight=0.6,
max_decode_length=50,
input_mask_from_dataset=False,
init_loss_scale=65536,
loss_scale_factor=2, scale_window=1000,
lr_scheduler="WarmupMultiStepLR",
optimizer="adam",
lr=2e-3, min_lr=1e-6,
decay_steps=4, lr_scheduler_power=0.5,
warmup_lr_remain_steps=0.666, warmup_lr_decay_interval=-1,
decay_start_step=-1, warmup_steps=200,
existed_ckpt="", save_ckpt_steps=3452, keep_ckpt_max=6,
ckpt_prefix="gnmt", ckpt_path: str = None,
save_graphs=False,
dtype=mstype.float32):
self.save_graphs = save_graphs
self.random_seed = random_seed
self.pre_train_dataset = get_source_list(pre_train_dataset) # type: List[str]
self.fine_tune_dataset = get_source_list(fine_tune_dataset) # type: List[str]
self.valid_dataset = get_source_list(valid_dataset) # type: List[str]
self.test_dataset = get_source_list(test_dataset) # type: List[str]
if not isinstance(epochs, int) and epochs < 0:
raise ValueError("`epoch` must be type of int.")
self.epochs = epochs
self.dataset_sink_mode = dataset_sink_mode
self.ckpt_path = ckpt_path
self.keep_ckpt_max = keep_ckpt_max
self.save_ckpt_steps = save_ckpt_steps
self.ckpt_prefix = ckpt_prefix
self.existed_ckpt = existed_ckpt
self.batch_size = batch_size
self.seq_length = seq_length
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.hidden_act = hidden_act
self.intermediate_size = intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_dropout_prob = attention_dropout_prob
self.initializer_range = initializer_range
self.label_smoothing = label_smoothing
self.beam_width = beam_width
self.length_penalty_weight = length_penalty_weight
self.max_decode_length = max_decode_length
self.input_mask_from_dataset = input_mask_from_dataset
self.compute_type = mstype.float16
self.dtype = dtype
self.scale_window = scale_window
self.loss_scale_factor = loss_scale_factor
self.init_loss_scale = init_loss_scale
self.optimizer = optimizer
self.lr = lr
self.lr_scheduler = lr_scheduler
self.min_lr = min_lr
self.lr_scheduler_power = lr_scheduler_power
self.warmup_lr_remain_steps = warmup_lr_remain_steps
self.warmup_lr_decay_interval = warmup_lr_decay_interval
self.decay_steps = decay_steps
self.decay_start_step = decay_start_step
self.warmup_steps = warmup_steps
@classmethod
def from_dict(cls, json_object: dict):
"""Constructs a `TransformerConfig` from a Python dictionary of parameters."""
_params = {}
for node in PARAM_NODES:
for key in json_object[node]:
_params[key] = json_object[node][key]
return cls(**_params)
@classmethod
def from_json_file(cls, json_file):
"""Constructs a `TransformerConfig` from a json file of parameters."""
with open(json_file, "r") as reader:
return cls.from_dict(json.load(reader))
def to_dict(self):
"""Serializes this instance to a Python dictionary."""
output = copy.deepcopy(self.__dict__)
return output
def to_json_string(self):
"""Serializes this instance to a JSON string."""
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"