From db2a8b5e1d027574d4846c15bfcd5193c11fbf62 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 1 Dec 2020 16:57:45 -0500 Subject: [PATCH] Disable getter pass --- .../dataset/engine/datasetops/dataset_op.cc | 1 + .../minddata/dataset/engine/opt/pre/getter_pass.cc | 10 +--------- .../dataset/text/kernels/basic_tokenizer_op.cc | 2 +- .../minddata/dataset/text/kernels/case_fold_op.cc | 2 +- .../dataset/text/kernels/jieba_tokenizer_op.cc | 4 ++-- .../minddata/dataset/text/kernels/ngram_op.cc | 4 ++-- .../dataset/text/kernels/normalize_utf8_op.cc | 14 +++++++------- .../dataset/text/kernels/regex_replace_op.cc | 9 +++++---- .../text/kernels/sentence_piece_tokenizer_op.cc | 12 ++++++------ .../text/kernels/truncate_sequence_pair_op.cc | 2 +- .../text/kernels/unicode_char_tokenizer_op.cc | 4 ++-- .../text/kernels/whitespace_tokenizer_op.cc | 4 ++-- .../dataset/text/kernels/wordpiece_tokenizer_op.cc | 2 +- tests/ut/cpp/dataset/optimization_pass_test.cc | 10 +++++----- 14 files changed, 37 insertions(+), 43 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc index 99ff0f3fd3..f4f468900b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/dataset_op.cc @@ -323,6 +323,7 @@ Status DatasetOp::GetNumClasses(int64_t *num_classes) { return child_[child_.size() - 1]->GetNumClasses(num_classes); } else { // when num classes isn't found, the default behavior is to return -1 + MS_LOG(WARNING) << "Num classes not defined for : " << Name(); *num_classes = -1; return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/getter_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/getter_pass.cc index 994541cf2e..801e9ea602 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/getter_pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/getter_pass.cc @@ -54,15 +54,7 @@ Status GetterPass::GetterNodes::RunOnNode(std::shared_ptr node, bool * Status GetterPass::RunOnTree(ExecutionTree *tree, bool *modified) { RETURN_IF_NOT_OK(pass_.Run(tree, modified)); - // nested private class variables can be directly accessed by its outer class - for (auto node : pass_.nodes_to_remove_) { - DatasetOp *parent; - node->Parent(&parent, 0); - // only remove node whose is a single child of its parent - if (parent != nullptr && parent->Children().size() == 1) { - RETURN_IF_NOT_OK(node->Remove()); - } - } + // currently the getter pass only disables call_back from the execution tree // clear the callback for selected ops (map when its GetOutputType/Shape) for (auto node : pass_.nodes_to_clear_callback_) node->ClearCallbacks(); diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc index 69ee1b388c..29be9e4bcc 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc @@ -131,7 +131,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string"); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); std::vector strs(input->Size()); int i = 0; for (auto iter = input->begin(); iter != input->end(); iter++) { diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc index a2458a04cd..5d6db7df84 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc @@ -29,7 +29,7 @@ namespace dataset { Status CaseFoldOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string"); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); icu::ErrorCode error; const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error); CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed."); diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc index abcf72c9da..d19a84ac11 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/jieba_tokenizer_op.cc @@ -33,11 +33,11 @@ JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::strin Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); + CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor."); RETURN_UNEXPECTED_IF_NULL(jieba_parser_); if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { - RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor"); + RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor."); } std::string_view sentence_v; diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc index 27b8cb6065..f7e93bb5f6 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc @@ -35,7 +35,7 @@ NgramOp::NgramOp(const std::vector &ngrams, int32_t l_len, int32_t r_le Status NgramOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor"); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor."); std::vector offsets; // offsets for each str std::vector res; // holds the result of ngrams std::string str_buffer; // concat all pad tokens with string interleaved with separators @@ -60,7 +60,7 @@ Status NgramOp::Compute(const std::shared_ptr &input, std::shared_ptr= 0, "Incorrect loop condition"); + CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition."); for (int i = start_ind; i < end_ind - n; i++) { res.emplace_back(str_buffer.substr(offsets[i], offsets[i + n] - offsets[i] - separator_.size())); diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc index 3d3cbf1d5b..9c10c7dbf5 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/normalize_utf8_op.cc @@ -29,7 +29,7 @@ namespace dataset { const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc; Status NormalizeUTF8Op::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string"); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); icu::ErrorCode error; const icu::Normalizer2 *normalize = nullptr; @@ -40,26 +40,26 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr &input, std::share } case NormalizeForm::kNfc: { normalize = icu::Normalizer2::getNFCInstance(error); - CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed"); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed."); break; } case NormalizeForm::kNfkc: { normalize = icu::Normalizer2::getNFKCInstance(error); - CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed"); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed."); break; } case NormalizeForm::kNfd: { normalize = icu::Normalizer2::getNFDInstance(error); - CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed"); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed."); break; } case NormalizeForm::kNfkd: { normalize = icu::Normalizer2::getNFKDInstance(error); - CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed"); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed."); break; } default: { - RETURN_STATUS_UNEXPECTED("unexpected normalize form"); + RETURN_STATUS_UNEXPECTED("Unexpected normalize form."); break; } } @@ -68,7 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr &input, std::share for (auto iter = input->begin(); iter != input->end(); iter++) { icu::StringByteSink sink(&strs[i++]); normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); - CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed."); + CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8 failed."); } return Tensor::CreateFromVector(strs, input->shape(), output); } diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc index 485413cd52..430888c715 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/regex_replace_op.cc @@ -25,7 +25,7 @@ namespace dataset { Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text, std::string *out) const { - CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null"); + CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null."); UErrorCode icu_error = U_ZERO_ERROR; icu::UnicodeString unicode_text = icu::UnicodeString::fromUTF8(text); matcher->reset(unicode_text); @@ -35,17 +35,18 @@ Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std: } else { unicode_out = matcher->replaceFirst(replace_, icu_error); } - CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed"); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed."); unicode_out.toUTF8String(*out); return Status::OK(); } Status RegexReplaceOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string"); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string."); UErrorCode icu_error = U_ZERO_ERROR; icu::RegexMatcher matcher(pattern_, 0, icu_error); - CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "Create icu RegexMatcher failed, you may input one error pattern"); + CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), + "Create icu RegexMatcher failed, you may input one error pattern."); std::vector strs(input->Size()); int i = 0; for (auto iter = input->begin(); iter != input->end(); iter++) { diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/sentence_piece_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/sentence_piece_tokenizer_op.cc index 919f108237..c57658438d 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/sentence_piece_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/sentence_piece_tokenizer_op.cc @@ -56,7 +56,7 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr &input, s } if (input->Rank() != 0 || input->type() != DataType::DE_STRING) { - RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor"); + RETURN_STATUS_UNEXPECTED("Input tensor should be scalar string tensor."); } std::string_view sentence_v; @@ -67,14 +67,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr &input, s std::vector pieces; auto status = processor_.Encode(sentence, &pieces); if (!status.ok()) { - RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error"); + RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error."); } RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output)); } else { std::vector ids; auto status = processor_.Encode(sentence, &ids); if (!status.ok()) { - RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error"); + RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error."); } RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output)); } @@ -84,15 +84,15 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr &input, s Status SentencePieceTokenizerOp::GetModelRealPath(const std::string &model_path, const std::string &filename) { char real_path[PATH_MAX] = {0}; if (file_path_.size() >= PATH_MAX) { - RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid."); + RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid."); } #if defined(_WIN32) || defined(_WIN64) if (_fullpath(real_path, common::SafeCStr(model_path), PATH_MAX) == nullptr) { - RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid."); + RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid."); } #else if (realpath(common::SafeCStr(model_path), real_path) == nullptr) { - RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid."); + RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid."); } #endif std::string abs_path = real_path; diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc index 53a803c542..76c054bd6e 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/truncate_sequence_pair_op.cc @@ -29,7 +29,7 @@ Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output std::shared_ptr seq1 = input[0]; std::shared_ptr seq2 = input[1]; CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1, - "Both sequences should be of rank 1"); + "Both sequences should be of rank 1."); dsize_t length1 = seq1->shape()[0]; dsize_t length2 = seq2->shape()[0]; dsize_t outLength1 = length1; diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc index c8b33d0ce4..dcc8f1a639 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/unicode_char_tokenizer_op.cc @@ -31,9 +31,9 @@ const bool UnicodeCharTokenizerOp::kDefWithOffsets = false; Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); + CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor."); if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { - RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor."); } std::string_view str; RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {})); diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc index c872777813..7e5d25aa1e 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/whitespace_tokenizer_op.cc @@ -35,9 +35,9 @@ const bool WhitespaceTokenizerOp::kDefWithOffsets = false; Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); - CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); + CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor."); if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) { - RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor"); + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor."); } std::string_view str; RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {})); diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc index 04a1274b03..704cd10a7e 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/wordpiece_tokenizer_op.cc @@ -117,7 +117,7 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uin Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { IO_CHECK_VECTOR(input, output); if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) { - RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor"); + RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor."); } dsize_t count = 0; std::vector out_tokens; diff --git a/tests/ut/cpp/dataset/optimization_pass_test.cc b/tests/ut/cpp/dataset/optimization_pass_test.cc index ded77d42a8..b522875945 100644 --- a/tests/ut/cpp/dataset/optimization_pass_test.cc +++ b/tests/ut/cpp/dataset/optimization_pass_test.cc @@ -95,9 +95,9 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestOutputShapeAndTypePass) { // +- ( 4) : [workers: 4] [total rows: 44] // - // verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not - EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos); - EXPECT_EQ(ss_str.find("RepeatOp"), ss_str.npos); + // verify that no ops are removed, but Batch and ProjectOp are not + EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos); + EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos); EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos); EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos); } @@ -129,8 +129,8 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestDatasetSizePass) { exe_tree->Print(ss); std::string ss_str = ss.str(); - // verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not - EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos); + // verify that no ops are removed, but Batch and ProjectOp are not + EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos); EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos); EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos); EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);