From 9774505ecd0304b7e6716d97d158d24e42643afc Mon Sep 17 00:00:00 2001 From: anthonyaje Date: Mon, 10 Aug 2020 16:54:14 -0400 Subject: [PATCH] Fixed dataset cpp api iterator build function --- .../ccsrc/minddata/dataset/api/iterator.cc | 23 +++++--- .../ut/cpp/dataset/c_api_dataset_ops_test.cc | 52 +++++++++++++++++++ 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc index e60ddc7643..9d1e4a96bf 100644 --- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc +++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc @@ -61,13 +61,20 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr ds) { RETURN_STATUS_UNEXPECTED("Node operation returned nothing"); } - auto root_op = root_ops.front(); - - RETURN_UNEXPECTED_IF_NULL(root_op); - - RETURN_IF_NOT_OK(tree_->AssociateNode(root_op)); - - q.push(std::make_pair(ds, root_op)); + // Iterate through all the DatasetOps returned by Dataset's Build(), associate them + // with the execution tree and add the child and parent relationship between the nodes + // Note that some Dataset objects might return more than one DatasetOps + // e.g. MapDataset will return [ProjectOp, MapOp] if project_columns is set for MapDataset + std::shared_ptr prev_op = nullptr; + for (auto op : root_ops) { + RETURN_IF_NOT_OK(tree_->AssociateNode(op)); + if (prev_op != nullptr) { + RETURN_IF_NOT_OK(prev_op->AddChild(op)); + } + prev_op = op; + } + // Add the last DatasetOp to the queue to be BFS. + q.push(std::make_pair(ds, root_ops.back())); // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes) while (!q.empty()) { @@ -94,7 +101,7 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr ds) { q.push(std::make_pair(child, child_ops.back())); } } - RETURN_IF_NOT_OK(tree_->AssignRoot(root_op)); + RETURN_IF_NOT_OK(tree_->AssignRoot(root_ops.front())); } // Launch the execution tree. diff --git a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc index 57f2636ddd..23751cccee 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_ops_test.cc @@ -425,6 +425,58 @@ TEST_F(MindDataTestPipeline, TestProjectMap) { iter->Stop(); } + +TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) { + MS_LOG(INFO) << "Doing MindDataTestPipeline.TestProjectMapAutoInjection"; + + // Create an ImageFolder Dataset + std::string folder_path = datasets_root_path_ + "/testPK/data/"; + std::shared_ptr ds = ImageFolder(folder_path, true, RandomSampler(false, 10)); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr resize_op = vision::Resize({30, 30}); + EXPECT_NE(resize_op, nullptr); + + // Create a Map operation on ds + // {"image"} is the project columns. This will trigger auto injection of ProjectOp after MapOp. + ds = ds->Map({resize_op}, {}, {}, {"image"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + // 'label' is dropped during the project op + EXPECT_EQ(row.find("label"), row.end()); + // 'image' column should still exist + EXPECT_NE(row.find("image"), row.end()); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + EXPECT_EQ(image->shape()[0], 30); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 20); + + // Manually terminate the pipeline + iter->Stop(); +} + TEST_F(MindDataTestPipeline, TestZipSuccess) { // Testing the member zip() function MS_LOG(INFO) << "Doing MindDataTestPipeline-TestZipSuccess.";