diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index f8f74c86c0..696237e4ad 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -547,6 +547,7 @@ std::shared_ptr Dataset::Zip(const std::vector } Status Dataset::AddCacheOp(std::vector> *node_ops) { if (cache_ != nullptr) { + RETURN_IF_NOT_OK(cache_->Build()); std::shared_ptr cache_op; RETURN_IF_NOT_OK(cache_->CreateCacheOp(num_workers_, &cache_op)); node_ops->push_back(cache_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc index e46fb580ea..c1715f0547 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc @@ -31,7 +31,7 @@ namespace api { CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr &sampler, const bool &decode, const std::set &extensions, const std::shared_ptr &cache) - : Dataset(cache), + : Dataset(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler), @@ -60,6 +60,8 @@ std::vector> CelebANode::Build() { RETURN_EMPTY_IF_ERROR( schema->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); + RETURN_EMPTY_IF_ERROR(AddCacheOp(&node_ops)); + node_ops.push_back(std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, decode_, usage_, extensions_, std::move(schema), std::move(sampler_->Build()))); diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index 85b08cc1a1..24d3a07e62 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -533,9 +533,10 @@ std::shared_ptr VOC(const std::string &dataset_dir, const std::string & /// \param prefetch_sz optional prefetch size /// \return Shared pointer to DatasetCache. If error, nullptr is returned. std::shared_ptr CreateDatasetCache(session_id_type id, uint64_t mem_sz, bool spill, - std::optional hostname, std::optional port, - std::optional num_connections, - std::optional prefetch_sz); + std::optional hostname = std::nullopt, + std::optional port = std::nullopt, + std::optional num_connections = std::nullopt, + std::optional prefetch_sz = std::nullopt); #endif /// \brief Function to create a ZipNode diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt index 3c21213759..a0009da8ff 100644 --- a/tests/ut/cpp/dataset/CMakeLists.txt +++ b/tests/ut/cpp/dataset/CMakeLists.txt @@ -119,6 +119,7 @@ SET(DE_UT_SRCS c_api_datasets_test.cc c_api_dataset_iterator_test.cc c_api_text_vocab_test.cc + c_api_cache_test.cc tensor_op_fusion_pass_test.cc sliding_window_op_test.cc epoch_ctrl_op_test.cc diff --git a/tests/ut/cpp/dataset/c_api_cache_test.cc b/tests/ut/cpp/dataset/c_api_cache_test.cc new file mode 100644 index 0000000000..cb2ed8b2a7 --- /dev/null +++ b/tests/ut/cpp/dataset/c_api_cache_test.cc @@ -0,0 +1,386 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common/common.h" +#include "minddata/dataset/include/datasets.h" + +// IR leaf nodes + +#include "minddata/dataset/engine/ir/datasetops/source/celeba_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/cifar10_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/cifar100_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/voc_node.h" + +using namespace mindspore::dataset; +using namespace mindspore::dataset::api; + +// Helper function to get the session id from SESSION_ID env variable +Status GetSessionFromEnv(session_id_type *session_id); + +class MindDataTestCacheOp : public UT::DatasetOpTesting { + public: + void SetUp() override { + DatasetOpTesting::SetUp(); + GlobalInit(); + } +}; + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCApiSamplerNull) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true, "127.0.0.1", 50053, 1, 1); + EXPECT_NE(some_cache, nullptr); + + // Create an ImageFolder Dataset, this folder_path only has 2 images in it + std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; + std::shared_ptr ds = ImageFolder(folder_path, false, nullptr, {}, {}, some_cache); + EXPECT_EQ(ds, nullptr); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheImageFolderCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create an ImageFolder Dataset, this folder_path only has 2 images in it + std::string folder_path = datasets_root_path_ + "/testImageNetData/train/"; + std::shared_ptr ds = ImageFolder(folder_path, false, RandomSampler(), {}, {}, some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCocoCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a Coco Dataset, this folder_path has 6 images in it + std::string folder_path = datasets_root_path_ + "/testCOCO/train/"; + std::string annotation_file_path = datasets_root_path_ + "/testCOCO/annotations/train.json"; + std::shared_ptr ds = + Coco(folder_path, annotation_file_path, "Detection", false, RandomSampler(), some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 12); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheMnistCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a Mnist Dataset + std::string folder_path = datasets_root_path_ + "/testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, "all", RandomSampler(false, 10), some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a CelebA Dataset, this folder_path has 4 records in it + std::string folder_path = datasets_root_path_ + "/testCelebAData/"; + std::shared_ptr ds = CelebA(folder_path, "all", RandomSampler(false, 10), false, {}, some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 8); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheManifestCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a Manifest Dataset, this file_path has 2 records in it + std::string file_path = datasets_root_path_ + "/testManifestData/cpp.json"; + std::shared_ptr ds = Manifest(file_path, "train", RandomSampler(), {}, false, some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 4); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar10CApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a Cifar10 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar10Data/"; + std::shared_ptr ds = Cifar10(folder_path, "all", RandomSampler(false, 10), some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCifar100CApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a Cifar100 Dataset + std::string folder_path = datasets_root_path_ + "/testCifar100Data/"; + std::shared_ptr ds = Cifar100(folder_path, "all", RandomSampler(false, 10), some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 20); + + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) { + session_id_type env_session; + Status s = GetSessionFromEnv(&env_session); + EXPECT_EQ(s, Status::OK()); + + std::shared_ptr some_cache = CreateDatasetCache(env_session, 0, true); + EXPECT_NE(some_cache, nullptr); + + // Create a VOC Dataset, this folder_path has 9 records in it + std::string folder_path = datasets_root_path_ + "/testVOC2012/"; + std::shared_ptr ds = VOC(folder_path, "Detection", "train", {}, false, RandomSampler(), some_cache); + EXPECT_NE(ds, nullptr); + + // Create a Repeat operation on ds + int32_t repeat_num = 2; + ds = ds->Repeat(repeat_num); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 18); + + // Manually terminate the pipeline + iter->Stop(); +}