|
|
|
|
@ -82,6 +82,70 @@ TEST_F(MindDataTestPipeline, TestTextFileDatasetBasic) {
|
|
|
|
|
GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(MindDataTestPipeline, TestTextFileDatasetBasicWithPipeline) {
|
|
|
|
|
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileDatasetBasicWithPipeline.";
|
|
|
|
|
// Test TextFile Dataset with single text file and many default inputs
|
|
|
|
|
|
|
|
|
|
// Set configuration
|
|
|
|
|
uint32_t original_seed = GlobalContext::config_manager()->seed();
|
|
|
|
|
uint32_t original_num_parallel_workers = GlobalContext::config_manager()->num_parallel_workers();
|
|
|
|
|
MS_LOG(DEBUG) << "ORIGINAL seed: " << original_seed << ", num_parallel_workers: " << original_num_parallel_workers;
|
|
|
|
|
GlobalContext::config_manager()->set_seed(987);
|
|
|
|
|
GlobalContext::config_manager()->set_num_parallel_workers(4);
|
|
|
|
|
|
|
|
|
|
// Create two TextFile Dataset, with single text file
|
|
|
|
|
// Note: 1.txt has 3 rows
|
|
|
|
|
// Use 2 samples
|
|
|
|
|
// Use defaults for other input parameters
|
|
|
|
|
std::string tf_file1 = datasets_root_path_ + "/testTextFileDataset/1.txt";
|
|
|
|
|
std::shared_ptr<Dataset> ds1 = TextFile({tf_file1}, 2);
|
|
|
|
|
std::shared_ptr<Dataset> ds2 = TextFile({tf_file1}, 2);
|
|
|
|
|
EXPECT_NE(ds1, nullptr);
|
|
|
|
|
EXPECT_NE(ds2, nullptr);
|
|
|
|
|
|
|
|
|
|
// Create two Repeat operation on ds
|
|
|
|
|
int32_t repeat_num = 2;
|
|
|
|
|
ds1 = ds1->Repeat(repeat_num);
|
|
|
|
|
EXPECT_NE(ds1, nullptr);
|
|
|
|
|
repeat_num = 3;
|
|
|
|
|
ds2 = ds2->Repeat(repeat_num);
|
|
|
|
|
EXPECT_NE(ds2, nullptr);
|
|
|
|
|
|
|
|
|
|
// Create a Concat operation on the ds
|
|
|
|
|
ds1 = ds1->Concat({ds2});
|
|
|
|
|
EXPECT_NE(ds1, nullptr);
|
|
|
|
|
|
|
|
|
|
// Create an iterator over the result of the above dataset.
|
|
|
|
|
// This will trigger the creation of the Execution Tree and launch it.
|
|
|
|
|
std::shared_ptr<Iterator> iter = ds1->CreateIterator();
|
|
|
|
|
EXPECT_NE(iter, nullptr);
|
|
|
|
|
|
|
|
|
|
// Iterate the dataset and get each row
|
|
|
|
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
|
|
|
|
iter->GetNextRow(&row);
|
|
|
|
|
|
|
|
|
|
EXPECT_NE(row.find("text"), row.end());
|
|
|
|
|
std::vector<std::string> expected_result = {"Be happy every day.", "This is a text file."};
|
|
|
|
|
|
|
|
|
|
uint64_t i = 0;
|
|
|
|
|
while (row.size() != 0) {
|
|
|
|
|
auto text = row["text"];
|
|
|
|
|
MS_LOG(INFO) << "Tensor text shape: " << text->shape();
|
|
|
|
|
i++;
|
|
|
|
|
iter->GetNextRow(&row);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expect 10 samples
|
|
|
|
|
EXPECT_EQ(i, 10);
|
|
|
|
|
|
|
|
|
|
// Manually terminate the pipeline
|
|
|
|
|
iter->Stop();
|
|
|
|
|
|
|
|
|
|
// Restore configuration
|
|
|
|
|
GlobalContext::config_manager()->set_seed(original_seed);
|
|
|
|
|
GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TEST_F(MindDataTestPipeline, TestTextFileGetters) {
|
|
|
|
|
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileGetters.";
|
|
|
|
|
// Test TextFile Dataset with single text file and many default inputs
|
|
|
|
|
|