!9601 fix gnmtv2 doc issues

From: @zhaojichen
mindspore-ci-bot 4 years ago committed by Gitee
commit 30ea5c3b28

@ -39,7 +39,7 @@ Attention mechanism: uses the standardized Bahdanau attention mechanism. First,
Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
- WMT Englis-German for training.
- WMT English-German for training.
- WMT newstest2014 for evaluation.
# [Environment Requirements](#contents)
@ -178,7 +178,7 @@ Almost all required options and parameters can be easily assigned, including the
'hidden_size': 1024 # the output's last dimension of dynamicRNN
'initializer_range': 0.1 # initializer range
'max_decode_length': 50 # max length of decoder
'lr': 2e-1 # initial learning rate
'lr': 2e-3 # initial learning rate
'lr_scheduler': 'WarmupMultiStepLR' # learning rate scheduler
'existed_ckpt': "" # the absolute full path to save the checkpoint file
@ -242,7 +242,7 @@ The `VOCAB_ADDR` is the vocabulary address, `BPE_CODE_ADDR` is the bpe code addr
| Resource | Ascend 910 |
| uploaded Date | 11/06/2020 (month/day/year) |
| MindSpore Version | 1.0.0 |
| Dataset | WMT Englis-German for training |
| Dataset | WMT English-German for training |
| Training Parameters | epoch=6, batch_size=128 |
| Optimizer | Adam |
| Loss Function | Softmax Cross Entropy |

@ -4,7 +4,7 @@
"epochs": 6,
"batch_size": 128,
"dataset_schema": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json",
"pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.tfrecord-001-of-001",
"pre_train_dataset": "/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.mindrecord",
"fine_tune_dataset": null,
"valid_dataset": null,
"dataset_sink_mode": true

@ -93,7 +93,7 @@ class GNMTConfig:
init_loss_scale (int): Initialized loss scale.
loss_scale_factor (int): Loss scale factor.
scale_window (int): Window size of loss scale.
lr_scheduler (str): Whether use lr_scheduler, only support "ISR" now.
lr_scheduler (str): Learning rate scheduler. Please see the Note as follow.
optimizer (str): Optimizer for training, e.g. Adam, Lamb, momentum. Default: Adam.
lr (float): Initial learning rate.
min_lr (float): Minimum learning rate.

@ -6,7 +6,7 @@
"dataset_schema": "/home/workspace/dataset_menu/newstest2014.en.json",
"pre_train_dataset": null,
"fine_tune_dataset": null,
"test_dataset": "/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001",
"test_dataset": "/home/workspace/dataset_menu/newstest2014.en.mindrecord",
"valid_dataset": null,
"dataset_sink_mode": true

@ -15,13 +15,13 @@
# ============================================================================
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "Please run the script as: "
echo "sh run_distributed_train_ascend.sh RANK_TABLE_ADDR DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
echo "for example:"
echo "sh run_distributed_train_ascend.sh \
/home/workspace/rank_table_8p.json \
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
echo "It is better to use absolute path."
echo "=============================================================================================================="

@ -15,13 +15,13 @@
# ============================================================================
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "Please run the script as: "
echo "sh run_standalone_eval_ascend.sh DATASET_SCHEMA_TEST TEST_DATASET EXISTED_CKPT_PATH \
echo "for example:"
echo "sh run_standalone_eval_ascend.sh \
/home/workspace/dataset_menu/newstest2014.en.json \
/home/workspace/dataset_menu/newstest2014.en.tfrecord-001-of-001 \
/home/workspace/dataset_menu/newstest2014.en.mindrecord \
/home/workspace/gnmt_v2/gnmt-6_3452.ckpt \
/home/workspace/wmt16_de_en/vocab.bpe.32000 \
/home/workspace/wmt16_de_en/bpe.32000 \
@ -53,7 +53,7 @@ cp ../*.py ./eval
cp -r ../src ./eval
cp -r ../config ./eval
cd ./eval || exit
echo "start eval for device $DEVICE_ID"
echo "start for evaluation"
env > env.log
python eval.py \
--config=${current_exec_path}/eval/config/config_test.json \

@ -15,12 +15,12 @@
# ============================================================================
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "Please run the script as: "
echo "sh run_standalone_train_ascend.sh DATASET_SCHEMA_TRAIN PRE_TRAIN_DATASET"
echo "for example:"
echo "sh run_standalone_train_ascend.sh \
/home/workspace/dataset_menu/train.tok.clean.bpe.32000.en.json \
echo "It is better to use absolute path."
echo "=============================================================================================================="
@ -42,10 +42,10 @@ cp ../*.py ./train
cp -r ../src ./train
cp -r ../config ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"
echo "start for training"
env > env.log
python train.py \
--config=${current_exec_path}/train/config/config.json \
--dataset_schema_train=$DATASET_SCHEMA_TRAIN \
--pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network${i}.log 2>&1 &
--pre_train_dataset=$PRE_TRAIN_DATASET > log_gnmt_network.log 2>&1 &
cd ..

@ -136,7 +136,7 @@ class BiLingualDataLoader(DataLoader):
columns = ["src", "src_padding", "prev_opt", "target", "tgt_padding"]
with open(self.schema_address, "w", encoding="utf-8") as f:
f.write(' "datasetType":"TF",\n')
f.write(' "datasetType":"MS",\n')
f.write(' "numRows":%s,\n' % provlist[0])
f.write(' "columns":{\n')
t = 1
@ -211,7 +211,7 @@ class TextDataLoader(DataLoader):
columns = ["src", "src_padding"]
with open(self.schema_address, "w", encoding="utf-8") as f:
f.write(' "datasetType":"TF",\n')
f.write(' "datasetType":"MS",\n')
f.write(' "numRows":%s,\n' % provlist[0])
f.write(' "columns":{\n')
t = 1

@ -23,7 +23,7 @@ def _compute_fans(shape):
Computes the number of input and output units for a weight shape.
shape (tuple): Integer shape tuple or TF tensor shape.
shape (tuple): Integer shape tuple or MS tensor shape.
tuple, integer scalars (fan_in, fan_out).
