!8540 Added GetColumnNames to C++

From: @mahdirahmanihanzaki
Reviewed-by: @nsyca,@jonyguo
Signed-off-by: @nsyca
pull/8540/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit c54a4a4494

@ -274,6 +274,25 @@ int64_t Dataset::GetNumClasses() {
return rc.IsError() ? -1 : num_classes;
}
std::vector<std::string> Dataset::GetColumnNames() {
std::vector<std::string> col_names;
auto ds = shared_from_this();
Status rc;
std::unique_ptr<NativeRuntimeContext> runtime_context = std::make_unique<NativeRuntimeContext>();
rc = runtime_context->Init();
if (rc.IsError()) {
MS_LOG(ERROR) << "GetColumnNames: Initializing RuntimeContext failed.";
return std::vector<std::string>();
}
rc = tree_getters_->Init(ds->IRNode());
if (rc.IsError()) {
MS_LOG(ERROR) << "GetColumnNames: Initializing TreeGetters failed.";
return std::vector<std::string>();
}
rc = tree_getters_->GetColumnNames(&col_names);
return rc.IsError() ? std::vector<std::string>() : col_names;
}
std::vector<std::pair<std::string, std::vector<int32_t>>> Dataset::GetClassIndexing() {
std::vector<std::pair<std::string, std::vector<int32_t>>> output_class_indexing;
auto ds = shared_from_this();

@ -466,6 +466,22 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) {
return Status::OK();
}
Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
std::unordered_map<std::string, int32_t> column_name_id_map = root->column_name_id_map();
if (column_name_id_map.empty()) RETURN_STATUS_UNEXPECTED("GetColumnNames: column_name_id map was empty.");
std::vector<std::pair<std::string, int32_t>> column_name_id_vector(column_name_id_map.begin(),
column_name_id_map.end());
std::sort(column_name_id_vector.begin(), column_name_id_vector.end(),
[](const std::pair<std::string, int32_t> &a, const std::pair<std::string, int32_t> &b) {
return a.second < b.second;
});
for (auto item : column_name_id_vector) {
(*output).push_back(item.first);
}
return Status::OK();
}
Status TreeGetters::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");

@ -166,6 +166,7 @@ class TreeGetters : public TreeConsumer {
Status GetBatchSize(int64_t *batch_size);
Status GetRepeatCount(int64_t *repeat_count);
Status GetNumClasses(int64_t *num_classes);
Status GetColumnNames(std::vector<std::string> *output);
Status GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing);
bool isInitialized();
std::string Name() override { return "TreeGetters"; }

@ -119,6 +119,10 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \return number of classes. If failed, return -1
int64_t GetNumClasses();
/// \brief Gets the column names
/// \return Names of the columns. If failed, return an empty vector
std::vector<std::string> GetColumnNames();
/// \brief Gets the class indexing
/// \return a map of ClassIndexing. If failed, return an empty map
std::vector<std::pair<std::string, std::vector<int32_t>>> GetClassIndexing();

@ -56,8 +56,8 @@ TEST_F(MindDataTestPipeline, TestAlbumBasic) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestAlbumgetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumgetters.";
TEST_F(MindDataTestPipeline, TestAlbumGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumGetters.";
std::string folder_path = datasets_root_path_ + "/testAlbum/images";
std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
@ -72,6 +72,7 @@ TEST_F(MindDataTestPipeline, TestAlbumgetters) {
EXPECT_EQ(batch_size, 1);
int64_t repeat_count = ds->GetRepeatCount();
EXPECT_EQ(repeat_count, 1);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestAlbumDecode) {

@ -81,6 +81,7 @@ TEST_F(MindDataTestPipeline, TestCifar10Getters) {
EXPECT_EQ(ds->GetDatasetSize(), 10000);
std::vector<DataType> types = ds->GetOutputTypes();
std::vector<TensorShape> shapes = ds->GetOutputShapes();
std::vector<std::string> column_names = {"image", "label"};
int64_t num_classes = ds->GetNumClasses();
EXPECT_EQ(types.size(), 2);
EXPECT_EQ(types[0].ToString(), "uint8");
@ -97,6 +98,7 @@ TEST_F(MindDataTestPipeline, TestCifar10Getters) {
EXPECT_EQ(ds->GetOutputShapes(), shapes);
EXPECT_EQ(ds->GetNumClasses(), -1);
EXPECT_EQ(ds->GetColumnNames(), column_names);
EXPECT_EQ(ds->GetDatasetSize(), 10000);
EXPECT_EQ(ds->GetOutputTypes(), types);
EXPECT_EQ(ds->GetOutputShapes(), shapes);
@ -141,15 +143,32 @@ TEST_F(MindDataTestPipeline, TestCifar100Dataset) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCifar100GetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100GetDatasetSize.";
TEST_F(MindDataTestPipeline, TestCifar100Getters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCifar100Getters.";
// Create a Cifar100 Dataset
std::string folder_path = datasets_root_path_ + "/testCifar100Data/";
std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", RandomSampler(false, 10));
EXPECT_NE(ds, nullptr);
std::vector<std::string> column_names = {"image", "coarse_label", "fine_label"};
std::vector<DataType> types = ds->GetOutputTypes();
std::vector<TensorShape> shapes = ds->GetOutputShapes();
int64_t num_classes = ds->GetNumClasses();
EXPECT_EQ(types.size(), 3);
EXPECT_EQ(types[0].ToString(), "uint8");
EXPECT_EQ(types[1].ToString(), "uint32");
EXPECT_EQ(types[2].ToString(), "uint32");
EXPECT_EQ(shapes.size(), 3);
EXPECT_EQ(shapes[0].ToString(), "<32,32,3>");
EXPECT_EQ(shapes[1].ToString(), "<>");
EXPECT_EQ(shapes[2].ToString(), "<>");
EXPECT_EQ(num_classes, -1);
EXPECT_EQ(ds->GetBatchSize(), 1);
EXPECT_EQ(ds->GetRepeatCount(), 1);
EXPECT_EQ(ds->GetDatasetSize(), 10);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestCifar100DatasetFail) {

@ -147,17 +147,19 @@ TEST_F(MindDataTestPipeline, TestCLUEDatasetBasic) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCLUEGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCLUEGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestCLUEGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCLUEGetters.";
// Create a CLUEFile Dataset, with single CLUE file
std::string clue_file = datasets_root_path_ + "/testCLUE/afqmc/train.json";
std::string task = "AFQMC";
std::string usage = "train";
std::shared_ptr<Dataset> ds = CLUE({clue_file}, task, usage, 2);
std::vector<std::string> column_names = {"label", "sentence1", "sentence2"};
EXPECT_NE(ds, nullptr);
EXPECT_EQ(ds->GetDatasetSize(), 2);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestCLUEDatasetCMNLI) {

@ -61,8 +61,8 @@ TEST_F(MindDataTestPipeline, TestCocoDefault) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCocoGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestCocoGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCocoGetters.";
// Create a Coco Dataset
std::string folder_path = datasets_root_path_ + "/testCOCO/train";
std::string annotation_file = datasets_root_path_ + "/testCOCO/annotations/train.json";
@ -70,7 +70,9 @@ TEST_F(MindDataTestPipeline, TestCocoGetDatasetSize) {
std::shared_ptr<Dataset> ds = Coco(folder_path, annotation_file);
EXPECT_NE(ds, nullptr);
std::vector<std::string> column_names = {"image", "bbox", "category_id", "iscrowd"};
EXPECT_EQ(ds->GetDatasetSize(), 6);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestCocoDetection) {

@ -70,8 +70,8 @@ TEST_F(MindDataTestPipeline, TestCSVDatasetBasic) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestCSVGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCSVGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestCSVGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCSVGetters.";
// Create a CSVDataset, with single CSV file
std::string train_file = datasets_root_path_ + "/testCSV/1.csv";
@ -80,6 +80,7 @@ TEST_F(MindDataTestPipeline, TestCSVGetDatasetSize) {
EXPECT_NE(ds, nullptr);
EXPECT_EQ(ds->GetDatasetSize(), 3);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestCSVDatasetMultiFiles) {

@ -62,10 +62,12 @@ TEST_F(MindDataTestPipeline, TestManifestGetters) {
// Create a Manifest Dataset
std::shared_ptr<Dataset> ds1 = Manifest(file_path1);
std::shared_ptr<Dataset> ds2 = Manifest(file_path2);
std::vector<std::string> column_names = {"image", "label"};
EXPECT_NE(ds1, nullptr);
EXPECT_EQ(ds1->GetDatasetSize(), 2);
EXPECT_EQ(ds1->GetNumClasses(), 2);
EXPECT_EQ(ds1->GetColumnNames(), column_names);
EXPECT_NE(ds2, nullptr);
EXPECT_EQ(ds2->GetDatasetSize(), 4);

@ -57,8 +57,8 @@ TEST_F(MindDataTestPipeline, TestMindDataSuccess1) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestMindDataGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataGetDatasetSize with string file pattern.";
TEST_F(MindDataTestPipeline, TestMindDataGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMindDataGetters with string file pattern.";
// Create a MindData Dataset
// Pass one mindrecord shard file to parse dataset info, and search for other mindrecord files with same dataset info,
@ -67,7 +67,10 @@ TEST_F(MindDataTestPipeline, TestMindDataGetDatasetSize) {
std::shared_ptr<Dataset> ds = MindData(file_path);
EXPECT_NE(ds, nullptr);
std::vector<std::string> column_names = {"data", "file_name", "label"};
EXPECT_EQ(ds->GetDatasetSize(), 20);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestMindDataSuccess2) {

@ -69,8 +69,8 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic1) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestRandomDatasetGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestRandomDatasetGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetGetters.";
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
@ -79,7 +79,9 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetGetDatasetSize) {
std::shared_ptr<Dataset> ds = RandomData(50, schema);
EXPECT_NE(ds, nullptr);
std::vector<std::string> column_names = {"image", "label"};
EXPECT_EQ(ds->GetDatasetSize(), 50);
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestRandomDatasetBasic2) {

@ -82,8 +82,8 @@ TEST_F(MindDataTestPipeline, TestTextFileDatasetBasic) {
GlobalContext::config_manager()->set_num_parallel_workers(original_num_parallel_workers);
}
TEST_F(MindDataTestPipeline, TestTextFileGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestTextFileGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTextFileGetters.";
// Test TextFile Dataset with single text file and many default inputs
// Set configuration
@ -101,7 +101,9 @@ TEST_F(MindDataTestPipeline, TestTextFileGetDatasetSize) {
std::shared_ptr<Dataset> ds = TextFile({tf_file1}, 2);
EXPECT_NE(ds, nullptr);
std::vector<std::string> column_names = {"text"};
EXPECT_EQ(ds->GetDatasetSize(), 2);
EXPECT_EQ(ds->GetColumnNames(), column_names);
// Restore configuration
GlobalContext::config_manager()->set_seed(original_seed);

@ -84,8 +84,8 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasic) {
iter->Stop();
}
TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasicGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordDatasetBasicGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasicGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTFRecordDatasetBasicGetters.";
// Create a TFRecord Dataset
std::string file_path = datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data";
@ -112,6 +112,8 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetBasicGetDatasetSize) {
EXPECT_NE(ds, nullptr);
EXPECT_EQ(ds->GetDatasetSize(), 6);
std::vector<std::string> column_names = {"image"};
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestTFRecordDatasetShuffle) {

@ -94,8 +94,8 @@ TEST_F(MindDataTestPipeline, TestVOCGetClassIndex) {
EXPECT_EQ(class_index1[2].second[0], 9);
}
TEST_F(MindDataTestPipeline, TestVOCGetDatasetSize) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCGetDatasetSize.";
TEST_F(MindDataTestPipeline, TestVOCGetters) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestVOCGetters.";
// Create a VOC Dataset
std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
@ -111,6 +111,8 @@ TEST_F(MindDataTestPipeline, TestVOCGetDatasetSize) {
ds = ds->Repeat(2);
EXPECT_EQ(ds->GetDatasetSize(), 6);
std::vector<std::string> column_names = {"image", "bbox", "label", "difficult", "truncate"};
EXPECT_EQ(ds->GetColumnNames(), column_names);
}
TEST_F(MindDataTestPipeline, TestVOCDetection) {

Loading…
Cancel
Save