/** * Copyright 2020 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include "common/common.h" #include "minddata/dataset/core/client.h" #include "minddata/dataset/core/global_context.h" #include "minddata/dataset/engine/datasetops/source/album_op.h" #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" #include "minddata/dataset/util/path.h" #include "minddata/dataset/util/status.h" #include "gtest/gtest.h" #include "utils/log_adapter.h" #include "securec.h" #include "minddata/dataset/include/datasets.h" #include "minddata/dataset/include/transforms.h" using namespace mindspore::dataset; using mindspore::MsLogLevel::ERROR; using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; std::shared_ptr Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); std::shared_ptr Repeat(int repeat_cnt); std::shared_ptr Build(std::vector> ops); std::shared_ptr Album(int64_t num_works, int64_t rows, int64_t conns, std::string path, bool shuf = false, std::unique_ptr sampler = nullptr, bool decode = false) { std::shared_ptr so; AlbumOp::Builder builder; Status rc = builder.SetNumWorkers(num_works) .SetAlbumDir(path) .SetRowsPerBuffer(rows) .SetOpConnectorSize(conns) .SetExtensions({".json"}) .SetSampler(std::move(sampler)) .SetDecode(decode) .Build(&so); return so; } std::shared_ptr AlbumSchema(int64_t num_works, int64_t rows, int64_t conns, std::string path, std::string schema_file, std::vector column_names = {}, bool shuf = false, std::unique_ptr sampler = nullptr, bool decode = false) { std::shared_ptr so; AlbumOp::Builder builder; Status rc = builder.SetNumWorkers(num_works) .SetSchemaFile(schema_file) .SetColumnsToLoad(column_names) .SetAlbumDir(path) .SetRowsPerBuffer(rows) .SetOpConnectorSize(conns) .SetExtensions({".json"}) .SetSampler(std::move(sampler)) .SetDecode(decode) .Build(&so); return so; } class MindDataTestAlbum : public UT::DatasetOpTesting { protected: }; TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) { std::string folder_path = datasets_root_path_ + "/testAlbum/images"; std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json"; std::vector column_names = {"image", "label", "id"}; auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file, column_names, false), Repeat(2)}); tree->Prepare(); Status rc = tree->Launch(); if (rc.IsError()) { MS_LOG(ERROR) << "Return code error detected during tree launch: " << "."; EXPECT_TRUE(false); } else { DatasetIterator di(tree); TensorMap tensor_map; di.GetNextAsMap(&tensor_map); EXPECT_TRUE(rc.IsOk()); uint64_t i = 0; int32_t label = 0; while (tensor_map.size() != 0) { tensor_map["label"]->GetItemAt(&label, {}); MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" << tensor_map["label"] << "\n"; i++; di.GetNextAsMap(&tensor_map); } MS_LOG(INFO) << "got rows" << i << "\n"; EXPECT_TRUE(i == 14); } } TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) { std::string folder_path = datasets_root_path_ + "/testAlbum/images"; std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json"; auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)}); tree->Prepare(); Status rc = tree->Launch(); if (rc.IsError()) { MS_LOG(ERROR) << "Return code error detected during tree launch: " << "."; EXPECT_TRUE(false); } else { DatasetIterator di(tree); TensorMap tensor_map; di.GetNextAsMap(&tensor_map); EXPECT_TRUE(rc.IsOk()); uint64_t i = 0; int32_t label = 0; while (tensor_map.size() != 0) { tensor_map["label"]->GetItemAt(&label, {}); MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" << tensor_map["label"] << "\n"; i++; di.GetNextAsMap(&tensor_map); } MS_LOG(INFO) << "got rows" << i << "\n"; EXPECT_TRUE(i == 14); } } TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) { std::string folder_path = datasets_root_path_ + "/testAlbum/images"; // add the priority column std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json"; auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)}); tree->Prepare(); Status rc = tree->Launch(); if (rc.IsError()) { MS_LOG(ERROR) << "Return code error detected during tree launch: " << "."; EXPECT_TRUE(false); } else { DatasetIterator di(tree); TensorMap tensor_map; di.GetNextAsMap(&tensor_map); EXPECT_TRUE(rc.IsOk()); uint64_t i = 0; int32_t label = 0; double priority = 0; while (tensor_map.size() != 0) { tensor_map["label"]->GetItemAt(&label, {}); tensor_map["_priority"]->GetItemAt(&priority, {}); MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" << tensor_map["label"] << "priority: " << priority << "\n"; i++; di.GetNextAsMap(&tensor_map); } MS_LOG(INFO) << "got rows" << i << "\n"; EXPECT_TRUE(i == 14); } } TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) { std::string folder_path = datasets_root_path_ + "/testAlbum/images"; // add the priority column std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json"; auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)}); tree->Prepare(); Status rc = tree->Launch(); if (rc.IsError()) { MS_LOG(ERROR) << "Return code error detected during tree launch: " << "."; EXPECT_TRUE(false); } else { DatasetIterator di(tree); TensorMap tensor_map; di.GetNextAsMap(&tensor_map); EXPECT_TRUE(rc.IsOk()); uint64_t i = 0; int32_t label = 0; double priority = 0; int64_t id = 0; while (tensor_map.size() != 0) { tensor_map["label"]->GetItemAt(&label, {}); tensor_map["_priority"]->GetItemAt(&priority, {}); tensor_map["id"]->GetItemAt(&id, {}); MS_LOG(ERROR) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" << tensor_map["label"] << "priority: " << priority << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n"; i++; di.GetNextAsMap(&tensor_map); } MS_LOG(INFO) << "got rows" << i << "\n"; EXPECT_TRUE(i == 14); } }