From a21eb2d527ab0b364e57c166afc95c36ef143f1b Mon Sep 17 00:00:00 2001 From: Eric Date: Wed, 11 Nov 2020 14:48:21 -0500 Subject: [PATCH] Added random node fix --- mindspore/ccsrc/minddata/dataset/api/datasets.cc | 14 +++++++------- mindspore/ccsrc/minddata/dataset/api/vision.cc | 4 ++++ .../engine/ir/datasetops/source/random_node.cc | 10 ++++++++-- .../engine/ir/datasetops/source/random_node.h | 10 ++++------ .../ccsrc/minddata/dataset/include/datasets.h | 12 ++++-------- tests/ut/cpp/dataset/c_api_cache_test.cc | 2 +- .../cpp/dataset/c_api_dataset_randomdata_test.cc | 12 ------------ 7 files changed, 28 insertions(+), 36 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index c0e0519ced..02e4bc57d1 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -691,10 +691,12 @@ std::shared_ptr Dataset::BuildVocab(const std::vector &colum } return vocab; } + +#endif std::shared_ptr Dataset::Batch(int32_t batch_size, bool drop_remainder) { return std::make_shared(shared_from_this(), batch_size, drop_remainder); } -#endif + SchemaObj::SchemaObj(const std::string &schema_file) : schema_file_(schema_file), num_rows_(0), dataset_type_("") {} // SchemaObj init function @@ -969,16 +971,14 @@ VOCDataset::VOCDataset(const std::string &dataset_dir, const std::string &task, #endif RandomDataDataset::RandomDataDataset(const int32_t &total_rows, std::shared_ptr schema, const std::vector &columns_list, - const std::shared_ptr &sampler, std::shared_ptr cache) { - auto ds = - std::make_shared(total_rows, std::move(schema), std::move(columns_list), std::move(sampler), cache); + std::shared_ptr cache) { + auto ds = std::make_shared(total_rows, std::move(schema), std::move(columns_list), cache); ir_node_ = std::static_pointer_cast(ds); } RandomDataDataset::RandomDataDataset(const int32_t &total_rows, std::string schema_path, const std::vector &columns_list, - const std::shared_ptr &sampler, std::shared_ptr cache) { - auto ds = std::make_shared(total_rows, std::move(schema_path), std::move(columns_list), - std::move(sampler), cache); + std::shared_ptr cache) { + auto ds = std::make_shared(total_rows, std::move(schema_path), std::move(columns_list), cache); ir_node_ = std::static_pointer_cast(ds); } #ifndef ENABLE_ANDROID diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index 2ca4a63160..d7d354df43 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -158,6 +158,7 @@ std::shared_ptr MixUpBatch(float alpha) { // Input validation return op->ValidateParams() ? op : nullptr; } +#endif // Function to create NormalizeOperation. std::shared_ptr Normalize(std::vector mean, std::vector std) { @@ -166,6 +167,7 @@ std::shared_ptr Normalize(std::vector mean, std::vect return op->ValidateParams() ? op : nullptr; } +#ifndef ENABLE_ANDROID // Function to create PadOperation. std::shared_ptr Pad(std::vector padding, std::vector fill_value, BorderType padding_mode) { @@ -702,6 +704,7 @@ Status MixUpBatchOperation::ValidateParams() { std::shared_ptr MixUpBatchOperation::Build() { return std::make_shared(alpha_); } +#endif // NormalizeOperation NormalizeOperation::NormalizeOperation(std::vector mean, std::vector std) : mean_(mean), std_(std) {} @@ -736,6 +739,7 @@ std::shared_ptr NormalizeOperation::Build() { return std::make_shared(mean_[0], mean_[1], mean_[2], std_[0], std_[1], std_[2]); } +#ifndef ENABLE_ANDROID // PadOperation PadOperation::PadOperation(std::vector padding, std::vector fill_value, BorderType padding_mode) : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {} diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc index 83afdbdd2a..be2b096f52 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc @@ -36,8 +36,6 @@ Status RandomNode::ValidateParams() { RETURN_STATUS_SYNTAX_ERROR(err_msg); } - RETURN_IF_NOT_OK(ValidateDatasetSampler("RandomNode", sampler_)); - if (!columns_list_.empty()) { RETURN_IF_NOT_OK(ValidateDatasetColumnParam("RandomNode", "columns_list", columns_list_)); } @@ -89,6 +87,14 @@ std::vector> RandomNode::Build() { data_schema->LoadSchemaString(schema_json_string, columns_to_load); } } + + // RandomOp by itself is a non-mappable dataset that does not support sampling. + // However, if a cache operator is injected at some other place higher in the tree, that cache can + // inherit this sampler from the leaf, providing sampling support from the caching layer. + // That is why we save the sampler here in a leaf node that does not use sampling. + // RandomOp doesn't support sampler, should not support sharding, select sampler should just be sequential. + std::shared_ptr sampler_ = SelectSampler(total_rows_, false, 1, 0); + std::shared_ptr op; op = std::make_shared(num_workers_, connector_que_size_, rows_per_buffer_, total_rows_, std::move(data_schema), std::move(sampler_->Build())); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h index 4b798e7002..0ec2eb34fe 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h @@ -36,22 +36,20 @@ class RandomNode : public DatasetNode { /// \brief Constructor RandomNode(const int32_t &total_rows, std::shared_ptr schema, const std::vector &columns_list, - const std::shared_ptr &sampler, std::shared_ptr cache) + std::shared_ptr cache) : DatasetNode(std::move(cache)), total_rows_(total_rows), schema_path_(""), schema_(std::move(schema)), - columns_list_(columns_list), - sampler_(std::move(sampler)) {} + columns_list_(columns_list) {} /// \brief Constructor RandomNode(const int32_t &total_rows, std::string schema_path, const std::vector &columns_list, - const std::shared_ptr &sampler, std::shared_ptr cache) + std::shared_ptr cache) : DatasetNode(std::move(cache)), total_rows_(total_rows), schema_path_(schema_path), - columns_list_(columns_list), - sampler_(std::move(sampler)) {} + columns_list_(columns_list) {} /// \brief Destructor ~RandomNode() = default; diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index ccdaf22f54..511102fdaf 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -810,11 +810,10 @@ std::shared_ptr operator+(const std::shared_ptr &dataset class RandomDataDataset : public Dataset { public: RandomDataDataset(const int32_t &total_rows, std::shared_ptr schema, - const std::vector &columns_list, const std::shared_ptr &sampler, - std::shared_ptr cache); + const std::vector &columns_list, std::shared_ptr cache); RandomDataDataset(const int32_t &total_rows, std::string schema_path, const std::vector &columns_list, - const std::shared_ptr &sampler, std::shared_ptr cache); + std::shared_ptr cache); }; /// \brief Function to create a RandomDataset @@ -829,16 +828,13 @@ class RandomDataDataset : public Dataset { template > std::shared_ptr RandomData(const int32_t &total_rows = 0, const T &schema = nullptr, const std::vector &columns_list = {}, - const std::shared_ptr &sampler = RandomSampler(), const std::shared_ptr &cache = nullptr) { std::shared_ptr ds; if constexpr (std::is_same::value || std::is_same>::value) { std::shared_ptr schema_obj = schema; - ds = std::make_shared(total_rows, std::move(schema_obj), std::move(columns_list), - std::move(sampler), cache); + ds = std::make_shared(total_rows, std::move(schema_obj), std::move(columns_list), cache); } else { - ds = std::make_shared(total_rows, std::move(schema), std::move(columns_list), std::move(sampler), - cache); + ds = std::make_shared(total_rows, std::move(schema), std::move(columns_list), cache); } return ds; } diff --git a/tests/ut/cpp/dataset/c_api_cache_test.cc b/tests/ut/cpp/dataset/c_api_cache_test.cc index 04cf3a7a54..6cd3d3c740 100644 --- a/tests/ut/cpp/dataset/c_api_cache_test.cc +++ b/tests/ut/cpp/dataset/c_api_cache_test.cc @@ -434,7 +434,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) { std::shared_ptr schema = Schema(); schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2}); schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1}); - std::shared_ptr ds = RandomData(4, schema, {}, RandomSampler(), some_cache); + std::shared_ptr ds = RandomData(4, schema, {}, some_cache); EXPECT_NE(ds, nullptr); // Create a Repeat operation on ds diff --git a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc index 59d017ec5c..b8422f5c91 100644 --- a/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc +++ b/tests/ut/cpp/dataset/c_api_dataset_randomdata_test.cc @@ -402,18 +402,6 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic7) { GlobalContext::config_manager()->set_seed(curr_seed); } -TEST_F(MindDataTestPipeline, TestRandomDatasetWithNullSampler) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetWithNullSampler."; - - // Create a RandomDataset - std::shared_ptr schema = Schema(); - schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2}); - schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1}); - std::shared_ptr ds = RandomData(50, schema, {}, nullptr); - // Expect failure: sampler can not be nullptr - EXPECT_EQ(ds->CreateIterator(), nullptr); -} - TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomDatasetDuplicateColumnName.";