You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
56 lines
1.3 KiB
56 lines
1.3 KiB
{
|
|
"dataset_config": {
|
|
"epochs": 20,
|
|
"batch_size": 192,
|
|
"pre_train_dataset": "",
|
|
"fine_tune_dataset": "",
|
|
"test_dataset": "",
|
|
"valid_dataset": "",
|
|
"dataset_sink_mode": false,
|
|
"dataset_sink_step": 100
|
|
},
|
|
"model_config": {
|
|
"random_seed": 100,
|
|
"save_graphs": false,
|
|
"seq_length": 64,
|
|
"vocab_size": 45744,
|
|
"hidden_size": 1024,
|
|
"num_hidden_layers": 6,
|
|
"num_attention_heads": 8,
|
|
"intermediate_size": 4096,
|
|
"hidden_act": "relu",
|
|
"hidden_dropout_prob": 0.2,
|
|
"attention_dropout_prob": 0.2,
|
|
"max_position_embeddings": 64,
|
|
"initializer_range": 0.02,
|
|
"label_smoothing": 0.1,
|
|
"beam_width": 4,
|
|
"length_penalty_weight": 1.0,
|
|
"max_decode_length": 64,
|
|
"input_mask_from_dataset": true
|
|
},
|
|
"loss_scale_config": {
|
|
"loss_scale_mode": "dynamic",
|
|
"init_loss_scale": 65536,
|
|
"loss_scale_factor": 2,
|
|
"scale_window": 200
|
|
},
|
|
"learn_rate_config": {
|
|
"optimizer": "adam",
|
|
"lr": 1e-4,
|
|
"lr_scheduler": "poly",
|
|
"poly_lr_scheduler_power": 0.5,
|
|
"decay_steps": 10000,
|
|
"decay_start_step": 12000,
|
|
"warmup_steps": 4000,
|
|
"min_lr": 1e-6
|
|
},
|
|
"checkpoint_options": {
|
|
"existed_ckpt": "",
|
|
"save_ckpt_steps": 2500,
|
|
"keep_ckpt_max": 50,
|
|
"ckpt_prefix": "ckpt",
|
|
"ckpt_path": "checkpoints"
|
|
}
|
|
}
|