!9258 add squad for bert

From: @yoonlee666 Reviewed-by: @c_34,@guoqi1024 Signed-off-by: @c_34
4 years ago · 989744c61a
parent ca66aef549 a744ef9113
commit 989744c61a
12 changed files with 740 additions and 59 deletions
--- a/model_zoo/official/nlp/bert/run_classifier.py
+++ b/model_zoo/official/nlp/bert/run_classifier.py
@ -144,12 +144,14 @@ def run_classifier():
    parser.add_argument("--do_eval", type=str, default="false", choices=["true", "false"],
                        help="Enable eval, default is false")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
-    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
-    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--epoch_num", type=int, default=3, help="Epoch number, default is 3.")
+    parser.add_argument("--num_class", type=int, default=2, help="The number of class, default is 2.")
    parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"],
                        help="Enable train data shuffle, default is true")
    parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"],
                        help="Enable eval data shuffle, default is false")
+    parser.add_argument("--train_batch_size", type=int, default=32, help="Train batch size, default is 32")
+    parser.add_argument("--eval_batch_size", type=int, default=1, help="Eval batch size, default is 1")
    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
    parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path")
    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
@ -188,7 +190,7 @@ def run_classifier():
                          assessment_method=assessment_method)

    if args_opt.do_train.lower() == "true":
-        ds = create_classification_dataset(batch_size=optimizer_cfg.batch_size, repeat_count=1,
+        ds = create_classification_dataset(batch_size=args_opt.train_batch_size, repeat_count=1,
                                           assessment_method=assessment_method,
                                           data_file_path=args_opt.train_data_file_path,
                                           schema_file_path=args_opt.schema_file_path,
@ -204,7 +206,7 @@ def run_classifier():
                                                           ds.get_dataset_size(), epoch_num, "classifier")

    if args_opt.do_eval.lower() == "true":
-        ds = create_classification_dataset(batch_size=optimizer_cfg.batch_size, repeat_count=1,
+        ds = create_classification_dataset(batch_size=args_opt.eval_batch_size, repeat_count=1,
                                           assessment_method=assessment_method,
                                           data_file_path=args_opt.eval_data_file_path,
                                           schema_file_path=args_opt.schema_file_path,
--- a/model_zoo/official/nlp/bert/run_ner.py
+++ b/model_zoo/official/nlp/bert/run_ner.py
@ -97,14 +97,12 @@ def eval_result_print(assessment_method="accuracy", callback=None):
    else:
        raise ValueError("Assessment method not supported, support: [accuracy, f1, mcc, spearman_correlation]")

-def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_method="accuracy", data_file="",
-            load_checkpoint_path="", vocab_file="", label_file="", tag_to_index=None):
+def do_eval(dataset=None, network=None, use_crf="", num_class=41, assessment_method="accuracy", data_file="",
+            load_checkpoint_path="", vocab_file="", label_file="", tag_to_index=None, batch_size=1):
    """ do eval """
    if load_checkpoint_path == "":
        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
-    if assessment_method == "clue_benchmark":
-        optimizer_cfg.batch_size = 1
-    net_for_pretraining = network(bert_net_cfg, optimizer_cfg.batch_size, False, num_class,
+    net_for_pretraining = network(bert_net_cfg, batch_size, False, num_class,
                                  use_crf=(use_crf.lower() == "true"), tag_to_index=tag_to_index)
    net_for_pretraining.set_train(False)
    param_dict = load_checkpoint(load_checkpoint_path)
@ -142,7 +140,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth

 def parse_args():
    """set and check parameters."""
-    parser = argparse.ArgumentParser(description="run classifier")
+    parser = argparse.ArgumentParser(description="run ner")
    parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"],
                        help="Device type, default is Ascend")
    parser.add_argument("--assessment_method", type=str, default="F1", choices=["F1", "clue_benchmark"],
@ -154,12 +152,14 @@ def parse_args():
    parser.add_argument("--use_crf", type=str, default="false", choices=["true", "false"],
                        help="Use crf, default is false")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
-    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
-    parser.add_argument("--num_class", type=int, default="41", help="The number of class, default is 41.")
+    parser.add_argument("--epoch_num", type=int, default=5, help="Epoch number, default is 5.")
+    parser.add_argument("--num_class", type=int, default=41, help="The number of class, default is 41.")
    parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"],
                        help="Enable train data shuffle, default is true")
    parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"],
                        help="Enable eval data shuffle, default is false")
+    parser.add_argument("--train_batch_size", type=int, default=32, help="Train batch size, default is 32")
+    parser.add_argument("--eval_batch_size", type=int, default=1, help="Eval batch size, default is 1")
    parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path, used in clue benchmark")
    parser.add_argument("--label_file_path", type=str, default="", help="label file path, used in clue benchmark")
    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
@ -184,6 +184,8 @@ def parse_args():
        raise ValueError("'label_file_path' must be set to use crf")
    if args_opt.assessment_method.lower() == "clue_benchmark" and args_opt.label_file_path == "":
        raise ValueError("'label_file_path' must be set to do clue benchmark")
+    if args_opt.assessment_method.lower() == "clue_benchmark":
+        args_opt.eval_batch_size = 1
    return args_opt


@ -217,11 +219,11 @@ def run_ner():
        number_labels = len(tag_to_index)
    else:
        number_labels = args_opt.num_class
-    netwithloss = BertNER(bert_net_cfg, optimizer_cfg.batch_size, True, num_labels=number_labels,
-                          use_crf=(args_opt.use_crf.lower() == "true"),
-                          tag_to_index=tag_to_index, dropout_prob=0.1)
    if args_opt.do_train.lower() == "true":
-        ds = create_ner_dataset(batch_size=optimizer_cfg.batch_size, repeat_count=1,
+        netwithloss = BertNER(bert_net_cfg, args_opt.train_batch_size, True, num_labels=number_labels,
+                              use_crf=(args_opt.use_crf.lower() == "true"),
+                              tag_to_index=tag_to_index, dropout_prob=0.1)
+        ds = create_ner_dataset(batch_size=args_opt.train_batch_size, repeat_count=1,
                                assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path,
                                schema_file_path=args_opt.schema_file_path,
                                do_shuffle=(args_opt.train_data_shuffle.lower() == "true"))
@ -236,12 +238,13 @@ def run_ner():
                                                           ds.get_dataset_size(), epoch_num, "ner")

    if args_opt.do_eval.lower() == "true":
-        ds = create_ner_dataset(batch_size=optimizer_cfg.batch_size, repeat_count=1,
+        ds = create_ner_dataset(batch_size=args_opt.eval_batch_size, repeat_count=1,
                                assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path,
                                schema_file_path=args_opt.schema_file_path,
                                do_shuffle=(args_opt.eval_data_shuffle.lower() == "true"))
-        do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path,
-                load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label_file_path, tag_to_index)
+        do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method,
+                args_opt.eval_data_file_path, load_finetune_checkpoint_path, args_opt.vocab_file_path,
+                args_opt.label_file_path, tag_to_index, args_opt.eval_batch_size)

 if __name__ == "__main__":
    run_ner()
--- a/model_zoo/official/nlp/bert/run_squad.py
+++ b/model_zoo/official/nlp/bert/run_squad.py
@ -22,9 +22,6 @@ import collections
 from src.bert_for_finetune import BertSquadCell, BertSquad
 from src.finetune_eval_config import optimizer_cfg, bert_net_cfg
 from src.dataset import create_squad_dataset
-from src import tokenization
-from src.create_squad_data import read_squad_examples, convert_examples_to_features
-from src.run_squad import write_predictions
 from src.utils import make_directory, LossCallBack, LoadNewestCkpt, BertLearningRate
 import mindspore.common.dtype as mstype
 from mindspore import context
@ -85,22 +82,10 @@ def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoin
    model.train(epoch_num, dataset, callbacks=callbacks)


-def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", seq_length=384):
+def do_eval(dataset=None, load_checkpoint_path="", eval_batch_size=1):
    """ do eval """
    if load_checkpoint_path == "":
        raise ValueError("Finetune model missed, evaluation task must load finetune model!")
-    tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True)
-    eval_examples = read_squad_examples(eval_json, False)
-    eval_features = convert_examples_to_features(
-        examples=eval_examples,
-        tokenizer=tokenizer,
-        max_seq_length=seq_length,
-        doc_stride=128,
-        max_query_length=64,
-        is_training=False,
-        output_fn=None,
-        verbose_logging=False)
-
    net = BertSquad(bert_net_cfg, False, 2)
    net.set_train(False)
    param_dict = load_checkpoint(load_checkpoint_path)
@ -123,7 +108,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
        start = logits[1].asnumpy()
        end = logits[2].asnumpy()

-        for i in range(optimizer_cfg.batch_size):
+        for i in range(eval_batch_size):
            unique_id = int(ids[i])
            start_logits = [float(x) for x in start[i].flat]
            end_logits = [float(x) for x in end[i].flat]
@ -131,11 +116,11 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="",
                unique_id=unique_id,
                start_logits=start_logits,
                end_logits=end_logits))
-    write_predictions(eval_examples, eval_features, output, 20, 30, True, "./predictions.json", None, None)
+    return output

 def run_squad():
    """run squad task"""
-    parser = argparse.ArgumentParser(description="run classifier")
+    parser = argparse.ArgumentParser(description="run squad")
    parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"],
                        help="Device type, default is Ascend")
    parser.add_argument("--do_train", type=str, default="false", choices=["true", "false"],
@ -143,12 +128,14 @@ def run_squad():
    parser.add_argument("--do_eval", type=str, default="false", choices=["true", "false"],
                        help="Eable eval, default is false")
    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
-    parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.")
-    parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.")
+    parser.add_argument("--epoch_num", type=int, default=3, help="Epoch number, default is 1.")
+    parser.add_argument("--num_class", type=int, default=2, help="The number of class, default is 2.")
    parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"],
                        help="Enable train data shuffle, default is true")
    parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"],
                        help="Enable eval data shuffle, default is false")
+    parser.add_argument("--train_batch_size", type=int, default=32, help="Train batch size, default is 32")
+    parser.add_argument("--eval_batch_size", type=int, default=1, help="Eval batch size, default is 1")
    parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path")
    parser.add_argument("--eval_json_path", type=str, default="", help="Evaluation json file path, can be eval.json")
    parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path")
@ -156,8 +143,6 @@ def run_squad():
    parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path")
    parser.add_argument("--train_data_file_path", type=str, default="",
                        help="Data path, it is better to use absolute path")
-    parser.add_argument("--eval_data_file_path", type=str, default="",
-                        help="Data path, it is better to use absolute path")
    parser.add_argument("--schema_file_path", type=str, default="",
                        help="Schema path, it is better to use absolute path")
    args_opt = parser.parse_args()
@ -171,8 +156,6 @@ def run_squad():
    if args_opt.do_train.lower() == "true" and args_opt.train_data_file_path == "":
        raise ValueError("'train_data_file_path' must be set when do finetune task")
    if args_opt.do_eval.lower() == "true":
-        if args_opt.eval_data_file_path == "":
-            raise ValueError("'eval_data_file_path' must be set when do evaluation task")
        if args_opt.vocab_file_path == "":
            raise ValueError("'vocab_file_path' must be set when do evaluation task")
        if args_opt.eval_json_path == "":
@ -193,7 +176,7 @@ def run_squad():
    netwithloss = BertSquad(bert_net_cfg, True, 2, dropout_prob=0.1)

    if args_opt.do_train.lower() == "true":
-        ds = create_squad_dataset(batch_size=optimizer_cfg.batch_size, repeat_count=1,
+        ds = create_squad_dataset(batch_size=args_opt.train_batch_size, repeat_count=1,
                                  data_file_path=args_opt.train_data_file_path,
                                  schema_file_path=args_opt.schema_file_path,
                                  do_shuffle=(args_opt.train_data_shuffle.lower() == "true"))
@ -207,12 +190,29 @@ def run_squad():
                                                           ds.get_dataset_size(), epoch_num, "squad")

    if args_opt.do_eval.lower() == "true":
-        ds = create_squad_dataset(batch_size=optimizer_cfg.batch_size, repeat_count=1,
-                                  data_file_path=args_opt.eval_data_file_path,
+        from src import tokenization
+        from src.create_squad_data import read_squad_examples, convert_examples_to_features
+        from src.squad_get_predictions import write_predictions
+        from src.squad_postprocess import SQuad_postprocess
+        tokenizer = tokenization.FullTokenizer(vocab_file=args_opt.vocab_file_path, do_lower_case=True)
+        eval_examples = read_squad_examples(args_opt.eval_json_path, False)
+        eval_features = convert_examples_to_features(
+            examples=eval_examples,
+            tokenizer=tokenizer,
+            max_seq_length=bert_net_cfg.seq_length,
+            doc_stride=128,
+            max_query_length=64,
+            is_training=False,
+            output_fn=None,
+            vocab_file=args_opt.vocab_file_path)
+        ds = create_squad_dataset(batch_size=args_opt.eval_batch_size, repeat_count=1,
+                                  data_file_path=eval_features,
                                  schema_file_path=args_opt.schema_file_path, is_training=False,
                                  do_shuffle=(args_opt.eval_data_shuffle.lower() == "true"))
-        do_eval(ds, args_opt.vocab_file_path, args_opt.eval_json_path,
-                load_finetune_checkpoint_path, bert_net_cfg.seq_length)
+        outputs = do_eval(ds, load_finetune_checkpoint_path, args_opt.eval_batch_size)
+        all_predictions = write_predictions(eval_examples, eval_features, outputs, 20, 30, True)
+        SQuad_postprocess(args_opt.eval_json_path, all_predictions, output_metrics="output.json")
+

 if __name__ == "__main__":
    run_squad()
--- a/model_zoo/official/nlp/bert/scripts/run_classifier.sh
+++ b/model_zoo/official/nlp/bert/scripts/run_classifier.sh
@ -32,13 +32,15 @@ python ${PROJECT_DIR}/../run_classifier.py  \
    --do_eval="false" \
    --assessment_method="Accuracy" \
    --device_id=0 \
-    --epoch_num=1 \
+    --epoch_num=3 \
    --num_class=2 \
    --train_data_shuffle="true" \
    --eval_data_shuffle="false" \
+    --train_batch_size=32 \
+    --eval_batch_size=1 \
    --save_finetune_checkpoint_path="" \
    --load_pretrain_checkpoint_path="" \
    --load_finetune_checkpoint_path="" \
    --train_data_file_path="" \
    --eval_data_file_path="" \
-    --schema_file_path="" > classfifier_log.txt 2>&1 &
+    --schema_file_path="" > classifier_log.txt 2>&1 &
--- a/model_zoo/official/nlp/bert/scripts/run_ner.sh
+++ b/model_zoo/official/nlp/bert/scripts/run_ner.sh
@ -33,10 +33,12 @@ python ${PROJECT_DIR}/../run_ner.py  \
    --assessment_method="F1" \
    --use_crf="false" \
    --device_id=0 \
-    --epoch_num=1 \
-    --num_class=2 \
+    --epoch_num=5 \
+    --num_class=41 \
    --train_data_shuffle="true" \
    --eval_data_shuffle="false" \
+    --train_batch_size=32 \
+    --eval_batch_size=1 \
    --vocab_file_path="" \
    --label_file_path="" \
    --save_finetune_checkpoint_path="" \
--- a/model_zoo/official/nlp/bert/scripts/run_squad.sh
+++ b/model_zoo/official/nlp/bert/scripts/run_squad.sh
@ -31,15 +31,16 @@ python ${PROJECT_DIR}/../run_squad.py  \
    --do_train="true" \
    --do_eval="false" \
    --device_id=0 \
-    --epoch_num=1 \
+    --epoch_num=3 \
    --num_class=2 \
    --train_data_shuffle="true" \
    --eval_data_shuffle="false" \
+    --train_batch_size=32 \
+    --eval_batch_size=1 \
    --vocab_file_path="" \
-    --eval_json_path="" \
    --save_finetune_checkpoint_path="" \
    --load_pretrain_checkpoint_path="" \
    --load_finetune_checkpoint_path="" \
    --train_data_file_path="" \
-    --eval_data_file_path="" \
+    --eval_json_path="" \
    --schema_file_path="" > squad_log.txt 2>&1 &
--- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
@ -325,6 +325,8 @@ class BertSquad(nn.Cell):
            total_loss = (start_loss + end_loss) / 2.0
        else:
            start_logits = self.squeeze(logits[:, :, 0:1])
+            start_logits = start_logits + 100 * input_mask
            end_logits = self.squeeze(logits[:, :, 1:2])
+            end_logits = end_logits + 100 * input_mask
            total_loss = (unique_id, start_logits, end_logits)
        return total_loss
--- a/model_zoo/official/nlp/bert/src/create_squad_data.py
+++ b/model_zoo/official/nlp/bert/src/create_squad_data.py
--- a/model_zoo/official/nlp/bert/src/dataset.py
+++ b/model_zoo/official/nlp/bert/src/dataset.py
@ -92,6 +92,11 @@ def create_classification_dataset(batch_size=1, repeat_count=1, assessment_metho
    return ds


+def generator_squad(data_features):
+    for feature in data_features:
+        yield (feature.input_ids, feature.input_mask, feature.segment_ids, feature.unique_id)
+
+
 def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, schema_file_path=None,
                         is_training=True, do_shuffle=True):
    """create finetune or evaluation dataset"""
@ -104,11 +109,12 @@ def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, sche
        ds = ds.map(operations=type_cast_op, input_columns="start_positions")
        ds = ds.map(operations=type_cast_op, input_columns="end_positions")
    else:
-        ds = de.TFRecordDataset([data_file_path], schema_file_path if schema_file_path != "" else None,
-                                columns_list=["input_ids", "input_mask", "segment_ids", "unique_ids"])
+        ds = de.GeneratorDataset(generator_squad(data_file_path), shuffle=do_shuffle,
+                                 column_names=["input_ids", "input_mask", "segment_ids", "unique_ids"])
    ds = ds.map(operations=type_cast_op, input_columns="segment_ids")
    ds = ds.map(operations=type_cast_op, input_columns="input_mask")
    ds = ds.map(operations=type_cast_op, input_columns="input_ids")
+    ds = ds.map(operations=type_cast_op, input_columns="unique_ids")
    ds = ds.repeat(repeat_count)
    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)
--- a/model_zoo/official/nlp/bert/src/finetune_eval_config.py
+++ b/model_zoo/official/nlp/bert/src/finetune_eval_config.py
@ -22,7 +22,6 @@ import mindspore.common.dtype as mstype
 from .bert_model import BertConfig

 optimizer_cfg = edict({
-    'batch_size': 16,
    'optimizer': 'Lamb',
    'AdamWeightDecay': edict({
        'learning_rate': 2e-5,
--- a/model_zoo/official/nlp/bert/src/squad_get_predictions.py
+++ b/model_zoo/official/nlp/bert/src/squad_get_predictions.py
--- a/model_zoo/official/nlp/bert/src/squad_postprocess.py
+++ b/model_zoo/official/nlp/bert/src/squad_postprocess.py
@ -0,0 +1,97 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""evaluation script for SQuAD v1.1"""
+
+from collections import Counter
+import string
+import re
+import json
+import sys
+
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        return re.sub(r'\b(a|an|the)\b', ' ', text)
+
+    def white_space_fix(text):
+        return ' '.join(text.split())
+
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+
+    def lower(text):
+        return text.lower()
+
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+
+def f1_score(prediction, ground_truth):
+    """calculate f1 score"""
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
+
+def exact_match_score(prediction, ground_truth):
+    return normalize_answer(prediction) == normalize_answer(ground_truth)
+
+def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
+    scores_for_ground_truths = []
+    for ground_truth in ground_truths:
+        score = metric_fn(prediction, ground_truth)
+        scores_for_ground_truths.append(score)
+    return max(scores_for_ground_truths)
+
+def evaluate(dataset, predictions):
+    """do evaluation"""
+    f1 = exact_match = total = 0
+    for article in dataset:
+        for paragraph in article['paragraphs']:
+            for qa in paragraph['qas']:
+                total += 1
+                if qa['id'] not in predictions:
+                    message = 'Unanswered question ' + qa['id'] + \
+                              ' will receive score 0.'
+                    print(message, file=sys.stderr)
+                    continue
+                ground_truths = list(map(lambda x: x['text'], qa['answers']))
+                if not ground_truths:
+                    continue
+                prediction = predictions[qa['id']]
+                exact_match += metric_max_over_ground_truths(
+                    exact_match_score, prediction, ground_truths)
+                f1 += metric_max_over_ground_truths(
+                    f1_score, prediction, ground_truths)
+
+    exact_match = 100.0 * exact_match / total
+    f1 = 100.0 * f1 / total
+    return {'exact_match': exact_match, 'f1': f1}
+
+
+def SQuad_postprocess(dataset_file, all_predictions, output_metrics="output.json"):
+    with open(dataset_file) as ds:
+        dataset_json = json.load(ds)
+        dataset = dataset_json['data']
+    re_json = evaluate(dataset, all_predictions)
+    print(json.dumps(re_json))
+    with open(output_metrics, 'w') as wr:
+        wr.write(json.dumps(re_json))