!2763 modify tokenization for transformer model

Merge pull request !2763 from yuchaojie/transformer
pull/2763/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 5e250e26c8

@ -37,13 +37,13 @@ class SampleInstance():
def __str__(self): def __str__(self):
s = "" s = ""
s += "source sos tokens: %s\n" % (" ".join( s += "source sos tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in self.source_sos_tokens])) [tokenization.convert_to_printable(x) for x in self.source_sos_tokens]))
s += "source eos tokens: %s\n" % (" ".join( s += "source eos tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in self.source_eos_tokens])) [tokenization.convert_to_printable(x) for x in self.source_eos_tokens]))
s += "target sos tokens: %s\n" % (" ".join( s += "target sos tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in self.target_sos_tokens])) [tokenization.convert_to_printable(x) for x in self.target_sos_tokens]))
s += "target eos tokens: %s\n" % (" ".join( s += "target eos tokens: %s\n" % (" ".join(
[tokenization.printable_text(x) for x in self.target_eos_tokens])) [tokenization.convert_to_printable(x) for x in self.target_eos_tokens]))
s += "\n" s += "\n"
return s return s
@ -185,9 +185,9 @@ def main():
if total_written <= 20: if total_written <= 20:
logging.info("*** Example ***") logging.info("*** Example ***")
logging.info("source tokens: %s", " ".join( logging.info("source tokens: %s", " ".join(
[tokenization.printable_text(x) for x in instance.source_eos_tokens])) [tokenization.convert_to_printable(x) for x in instance.source_eos_tokens]))
logging.info("target tokens: %s", " ".join( logging.info("target tokens: %s", " ".join(
[tokenization.printable_text(x) for x in instance.target_sos_tokens])) [tokenization.convert_to_printable(x) for x in instance.target_sos_tokens]))
for feature_name in features.keys(): for feature_name in features.keys():
feature = features[feature_name] feature = features[feature_name]

@ -1,4 +1,19 @@
#!/usr/bin/env perl #!/usr/bin/env perl
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
use warnings; use warnings;
use strict; use strict;

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save