!9339 Disable getter optimization pass

From: @ezphlow
Reviewed-by: @robingrosman,@nsyca
Signed-off-by: @nsyca
pull/9339/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 9ed31ea2cb

@ -323,6 +323,7 @@ Status DatasetOp::GetNumClasses(int64_t *num_classes) {
return child_[child_.size() - 1]->GetNumClasses(num_classes);
} else {
// when num classes isn't found, the default behavior is to return -1
MS_LOG(WARNING) << "Num classes not defined for : " << Name();
*num_classes = -1;
return Status::OK();
}

@ -54,15 +54,7 @@ Status GetterPass::GetterNodes::RunOnNode(std::shared_ptr<FilterOp> node, bool *
Status GetterPass::RunOnTree(ExecutionTree *tree, bool *modified) {
RETURN_IF_NOT_OK(pass_.Run(tree, modified));
// nested private class variables can be directly accessed by its outer class
for (auto node : pass_.nodes_to_remove_) {
DatasetOp *parent;
node->Parent(&parent, 0);
// only remove node whose is a single child of its parent
if (parent != nullptr && parent->Children().size() == 1) {
RETURN_IF_NOT_OK(node->Remove());
}
}
// currently the getter pass only disables call_back from the execution tree
// clear the callback for selected ops (map when its GetOutputType/Shape)
for (auto node : pass_.nodes_to_clear_callback_) node->ClearCallbacks();

@ -131,7 +131,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text
Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor> &input,
std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
std::vector<std::string> strs(input->Size());
int i = 0;
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {

@ -29,7 +29,7 @@ namespace dataset {
Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
icu::ErrorCode error;
const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed.");

@ -33,11 +33,11 @@ JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::strin
Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor.");
}
std::string_view sentence_v;

@ -35,7 +35,7 @@ NgramOp::NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, int32_t r_le
Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor.");
std::vector<int32_t> offsets; // offsets for each str
std::vector<std::string> res; // holds the result of ngrams
std::string str_buffer; // concat all pad tokens with string interleaved with separators
@ -60,7 +60,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
if (end_ind - start_ind <= n) {
res.emplace_back(std::string()); // push back empty string
} else {
CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition");
CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition.");
for (int i = start_ind; i < end_ind - n; i++) {
res.emplace_back(str_buffer.substr(offsets[i], offsets[i + n] - offsets[i] - separator_.size()));

@ -29,7 +29,7 @@ namespace dataset {
const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc;
Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
icu::ErrorCode error;
const icu::Normalizer2 *normalize = nullptr;
@ -40,26 +40,26 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
}
case NormalizeForm::kNfc: {
normalize = icu::Normalizer2::getNFCInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed.");
break;
}
case NormalizeForm::kNfkc: {
normalize = icu::Normalizer2::getNFKCInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed.");
break;
}
case NormalizeForm::kNfd: {
normalize = icu::Normalizer2::getNFDInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed.");
break;
}
case NormalizeForm::kNfkd: {
normalize = icu::Normalizer2::getNFKDInstance(error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed.");
break;
}
default: {
RETURN_STATUS_UNEXPECTED("unexpected normalize form");
RETURN_STATUS_UNEXPECTED("Unexpected normalize form.");
break;
}
}
@ -68,7 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
icu::StringByteSink<std::string> sink(&strs[i++]);
normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8 failed.");
}
return Tensor::CreateFromVector(strs, input->shape(), output);
}

@ -25,7 +25,7 @@ namespace dataset {
Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text,
std::string *out) const {
CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null");
CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null.");
UErrorCode icu_error = U_ZERO_ERROR;
icu::UnicodeString unicode_text = icu::UnicodeString::fromUTF8(text);
matcher->reset(unicode_text);
@ -35,17 +35,18 @@ Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std:
} else {
unicode_out = matcher->replaceFirst(replace_, icu_error);
}
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed");
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed.");
unicode_out.toUTF8String(*out);
return Status::OK();
}
Status RegexReplaceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
IO_CHECK(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
UErrorCode icu_error = U_ZERO_ERROR;
icu::RegexMatcher matcher(pattern_, 0, icu_error);
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "Create icu RegexMatcher failed, you may input one error pattern");
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error),
"Create icu RegexMatcher failed, you may input one error pattern.");
std::vector<std::string> strs(input->Size());
int i = 0;
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {

@ -56,7 +56,7 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
}
if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("Input tensor should be scalar string tensor.");
}
std::string_view sentence_v;
@ -67,14 +67,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
std::vector<std::string> pieces;
auto status = processor_.Encode(sentence, &pieces);
if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error.");
}
RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output));
} else {
std::vector<int> ids;
auto status = processor_.Encode(sentence, &ids);
if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error.");
}
RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output));
}
@ -84,15 +84,15 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
Status SentencePieceTokenizerOp::GetModelRealPath(const std::string &model_path, const std::string &filename) {
char real_path[PATH_MAX] = {0};
if (file_path_.size() >= PATH_MAX) {
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
}
#if defined(_WIN32) || defined(_WIN64)
if (_fullpath(real_path, common::SafeCStr(model_path), PATH_MAX) == nullptr) {
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
}
#else
if (realpath(common::SafeCStr(model_path), real_path) == nullptr) {
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
}
#endif
std::string abs_path = real_path;

@ -29,7 +29,7 @@ Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output
std::shared_ptr<Tensor> seq1 = input[0];
std::shared_ptr<Tensor> seq2 = input[1];
CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1,
"Both sequences should be of rank 1");
"Both sequences should be of rank 1.");
dsize_t length1 = seq1->shape()[0];
dsize_t length2 = seq2->shape()[0];
dsize_t outLength1 = length1;

@ -31,9 +31,9 @@ const bool UnicodeCharTokenizerOp::kDefWithOffsets = false;
Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor.");
}
std::string_view str;
RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));

@ -35,9 +35,9 @@ const bool WhitespaceTokenizerOp::kDefWithOffsets = false;
Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor.");
}
std::string_view str;
RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));

@ -117,7 +117,7 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uin
Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor.");
}
dsize_t count = 0;
std::vector<std::string> out_tokens;

@ -95,9 +95,9 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestOutputShapeAndTypePass) {
// +- ( 4) <RandomDataOp>: [workers: 4] [total rows: 44]
//
// verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not
EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos);
EXPECT_EQ(ss_str.find("RepeatOp"), ss_str.npos);
// verify that no ops are removed, but Batch and ProjectOp are not
EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos);
EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos);
EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos);
EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);
}
@ -129,8 +129,8 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestDatasetSizePass) {
exe_tree->Print(ss);
std::string ss_str = ss.str();
// verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not
EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos);
// verify that no ops are removed, but Batch and ProjectOp are not
EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos);
EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos);
EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos);
EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);

Loading…
Cancel
Save