diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc index ca06deb0ee..64616b8d84 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/core/bindings.cc @@ -42,7 +42,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) { .def("get_numa_enable", &ConfigManager::numa_enable) .def("set_numa_enable", &ConfigManager::set_numa_enable) .def("get_op_connector_size", &ConfigManager::op_connector_size) - .def("get_rows_per_buffer", &ConfigManager::rows_per_buffer) .def("get_seed", &ConfigManager::seed) .def("set_rank_id", &ConfigManager::set_rank_id) .def("get_worker_connector_size", &ConfigManager::worker_connector_size) @@ -54,7 +53,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) { .def("get_profiler_file_status", &ConfigManager::get_profiler_file_status) .def("set_num_parallel_workers", &ConfigManager::set_num_parallel_workers) .def("set_op_connector_size", &ConfigManager::set_op_connector_size) - .def("set_rows_per_buffer", &ConfigManager::set_rows_per_buffer) .def("set_seed", &ConfigManager::set_seed) .def("set_worker_connector_size", &ConfigManager::set_worker_connector_size) .def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); }); diff --git a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc index 87dd6e1345..f4f8469daa 100644 --- a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc +++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc @@ -31,8 +31,7 @@ namespace mindspore { namespace dataset { ConfigManager::ConfigManager() - : rows_per_buffer_(kCfgRowsPerBuffer), - num_parallel_workers_(kCfgParallelWorkers), + : num_parallel_workers_(kCfgParallelWorkers), worker_connector_size_(kCfgWorkerConnectorSize), op_connector_size_(kCfgOpConnectorSize), rank_id_(kCfgDefaultRankId), @@ -70,7 +69,6 @@ void ConfigManager::Print(std::ostream &out) const { // Don't show the test/internal ones. Only display the main ones here. // fyi, boolalpha tells the output stream to write "true" and "false" for bools out << "\nClient config settings :" - << "\nDataCache Rows per buffer : " << rows_per_buffer_ << "\nParallelOp workers : " << num_parallel_workers_ << "\nParallelOp worker connector size : " << worker_connector_size_ << "\nSize of each Connector : " << op_connector_size_ << std::endl; @@ -78,7 +76,6 @@ void ConfigManager::Print(std::ostream &out) const { // Private helper function that takes a nlohmann json format and populates the settings Status ConfigManager::FromJson(const nlohmann::json &j) { - set_rows_per_buffer(j.value("rowsPerBuffer", rows_per_buffer_)); set_num_parallel_workers(j.value("numParallelWorkers", num_parallel_workers_)); set_worker_connector_size(j.value("workerConnectorSize", worker_connector_size_)); set_op_connector_size(j.value("opConnectorSize", op_connector_size_)); @@ -115,9 +112,6 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) { return rc; } -// Setter function -void ConfigManager::set_rows_per_buffer(int32_t rows_per_buffer) { rows_per_buffer_ = rows_per_buffer; } - // Setter function void ConfigManager::set_num_parallel_workers(int32_t num_parallel_workers) { num_parallel_workers_ = num_parallel_workers; diff --git a/mindspore/ccsrc/minddata/dataset/core/config_manager.h b/mindspore/ccsrc/minddata/dataset/core/config_manager.h index 21d2f06aa4..59d9ee056a 100644 --- a/mindspore/ccsrc/minddata/dataset/core/config_manager.h +++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.h @@ -74,10 +74,6 @@ class ConfigManager { // @return Status error code Status LoadFile(const std::string &settingsFile); - // getter function - // @return The rows per buffer setting - int32_t rows_per_buffer() const { return rows_per_buffer_; } - // getter function // @return The number of workers setting int32_t num_parallel_workers() const { return num_parallel_workers_; } @@ -112,10 +108,6 @@ class ConfigManager { /// \return auto_num_workers_ bool auto_num_workers() const { return auto_num_workers_; } - // setter function - // @param rows_per_buffer - The setting to apply to the config - void set_rows_per_buffer(int32_t rows_per_buffer); - // setter function // @param num_parallel_workers - The setting to apply to the config void set_num_parallel_workers(int32_t num_parallel_workers); @@ -230,7 +222,6 @@ class ConfigManager { void set_auto_worker_config_(uint8_t cfg) { auto_worker_config_ = cfg; } private: - int32_t rows_per_buffer_; int32_t num_parallel_workers_; int32_t worker_connector_size_; int32_t op_connector_size_; diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc index 83701a46ad..640975c9a5 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.cc @@ -35,7 +35,7 @@ TensorRow::TensorRow(row_id_type id, const std::initializer_list &ls TensorRow::TensorRow(const TensorRow &tr) : id_(tr.id_), path_(tr.path_), row_(tr.row_), tensor_row_flag_(tr.tensor_row_flag_) {} -TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : tensor_row_flag_(flag) {} +TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : id_(kDefaultRowId), path_({}), tensor_row_flag_(flag) {} TensorRow &TensorRow::operator=(const TensorRow &tr) { if (this == &tr) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_perf_run.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_perf_run.cc index 76c6becdf8..97edf22cba 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_perf_run.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_perf_run.cc @@ -540,8 +540,7 @@ Status CachePerfRun::Run() { int64_t elapse_time = std::chrono::duration_cast(end_tick - start_tick).count(); std::cout << "Epoch one (build phase) elapsed time " << elapse_time << " seconds" << std::endl; - std::cout << "Epoch one (build phase) per pipeline per worker summary. Buffer size = " << cfg_.rows_per_buffer() - << std::endl; + std::cout << "Epoch one (build phase) per pipeline per worker summary." << std::endl; PrintEpochSummary(); // Get some stat but we need to connect. The server will thinks it is the (n+1) pipeline diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_pipeline_run.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_pipeline_run.cc index 0005f1b342..27c1887625 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_pipeline_run.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/perf/cache_pipeline_run.cc @@ -228,16 +228,13 @@ Status CachePipelineRun::RunFirstEpoch() { } std::vector keys; - auto rows_per_buffer = cfg_.rows_per_buffer(); - keys.reserve(rows_per_buffer); + keys.reserve(1); int32_t worker_id = 0; for (auto i = start_row_; i <= end_row_; ++i) { keys.push_back(i); - if (keys.size() == rows_per_buffer) { - auto blk = std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)); - RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); - keys.clear(); - } + auto blk = std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)); + RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); + keys.clear(); } if (!keys.empty()) { auto blk = std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)); @@ -355,9 +352,8 @@ Status CachePipelineRun::WriterWorkerEntry(int32_t worker_id) { Status CachePipelineRun::RunReadEpoch() { std::vector keys; - auto rows_per_buffer = cc_->GetPrefetchSize(); // We will use prefetch size to read. auto num_workers = cfg_.num_parallel_workers(); - keys.reserve(rows_per_buffer); + keys.reserve(1); // Spawn workers auto f = std::bind(&CachePipelineRun::ReaderWorkerEntry, this, std::placeholders::_1); std::vector worker_threads; @@ -381,11 +377,9 @@ Status CachePipelineRun::RunReadEpoch() { int32_t worker_id = 0; for (auto id : all_keys) { keys.push_back(id); - if (keys.size() == rows_per_buffer) { - auto blk = std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)); - RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); - keys.clear(); - } + auto blk = std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)); + RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk))); + keys.clear(); } if (!keys.empty()) { auto blk = std::make_unique(IOBlock(keys, IOBlock::kDeIoBlockNone)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc index af3d8defc3..935f93152d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc @@ -31,7 +31,6 @@ BarrierOp::Builder::Builder() { // using the various builder set methods. std::shared_ptr cfg = GlobalContext::config_manager(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -39,17 +38,13 @@ Status BarrierOp::Builder::SanityCheck() const { return Status::OK(); } Status BarrierOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); - *ptr = std::make_shared(builder_rows_per_buffer_, builder_op_connector_size_, builder_condition_name_, - builder_condition_func_); + *ptr = std::make_shared(builder_op_connector_size_, builder_condition_name_, builder_condition_func_); return Status::OK(); } // Construct BarrierOp here, local variables initialized in operator due to tree construction restrictions -BarrierOp::BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name, - py::function condition_func) +BarrierOp::BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func) : PipelineOp(op_connector_size), - rows_per_buffer_(rows_per_buffer), - buffer_id_(0), clean_up_(false), eof_(false), condition_name_(condition_name), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h index 6b13654db2..3df5fee858 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.h @@ -98,16 +98,13 @@ class BarrierOp : public PipelineOp { }; // Constructor for BarrierOp - // @param rows_per_buffer - number of rows in output buffer // @param op_connector_size - connector size // @param condition_name - the condition name associated with this operator // @param condition_func - the blocking condition check per row - // @note - currently rows_per_buffer should = 1 for barrier. // The reason for this is having other values would complicate how the pipeline behaves with other operators // One example of such case is having batch after barrier. Batch would be waiting for data and having // rows per buffer in this case can result in hanging - BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name, - py::function condition_func); + BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func); // Destructor ~BarrierOp(); @@ -156,10 +153,6 @@ class BarrierOp : public PipelineOp { bool clean_up_; // end of file state, we stop reading data and shut down bool eof_; - // rows per buffer - int32_t rows_per_buffer_; - // buffer_id - int32_t buffer_id_; // iterator to pull new rows, we only have one child std::unique_ptr child_iterator_; // condition name, to support multiple barriers diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc index ac402eec0d..c7c19ba5dd 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc @@ -248,7 +248,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) { RETURN_IF_NOT_OK(out_connector_->SendEOF(workerId)); } else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) { TensorRow new_row; - RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &new_row)); + RETURN_IF_NOT_OK(MakeBatchedRow(std::move(table_pair), &new_row)); RETURN_IF_NOT_OK(out_connector_->Add(std::move(new_row), workerId)); } RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair)); @@ -256,7 +256,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) { return Status::OK(); } -Status BatchOp::MakeBatchedBuffer(std::pair, CBatchInfo> table_pair, TensorRow *new_row) { +Status BatchOp::MakeBatchedRow(std::pair, CBatchInfo> table_pair, TensorRow *new_row) { RETURN_UNEXPECTED_IF_NULL(table_pair.first); #ifdef ENABLE_PYTHON if (!in_col_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h index 454d9403e2..7f261a22d0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.h @@ -225,7 +225,7 @@ class BatchOp : public ParallelOp { // Generate buffer with batched tensors // @return Status The status code returned - Status MakeBatchedBuffer(std::pair, CBatchInfo> table_pair, TensorRow *new_row); + Status MakeBatchedRow(std::pair, CBatchInfo> table_pair, TensorRow *new_row); #ifdef ENABLE_PYTHON // Function that calls pyfunc to perform map on batch diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc index 3150b06556..07914ef71f 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.cc @@ -45,14 +45,13 @@ Status CacheBase::Reset() { MS_LOG(DEBUG) << Name() << " performing a self-reset."; return Status::OK(); } -CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, - std::shared_ptr cache_client, std::shared_ptr sampler) +CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr cache_client, + std::shared_ptr sampler) : ParallelOp(num_workers, op_connector_size, std::move(sampler)), row_cnt_(0), num_cache_miss_(0), cache_client_(std::move(cache_client)), - rows_per_buffer_(rows_per_buf), - prefetch_size_(rows_per_buffer_), + prefetch_size_(1), num_prefetchers_(num_workers_) { // Adjust the prefetch size based on the number of workers. auto prefetch_sz_per_thread = cache_client_->GetPrefetchSize() / num_prefetchers_; @@ -92,7 +91,7 @@ Status CacheBase::FetchSamplesToWorkers() { row_cnt_ = 0; ++wait_cnt; std::vector keys; - keys.reserve(rows_per_buffer_); + keys.reserve(1); std::vector prefetch_keys; prefetch_keys.reserve(prefetch_size_); std::unique_ptr sampler_buffer; @@ -107,15 +106,11 @@ Status CacheBase::FetchSamplesToWorkers() { // Batch enough rows for performance reason. if (row_cnt_ % prefetch_size_ == 0) { RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys)); - // Now we tell the WorkerEntry to wait for them to come back. If prefetch_size_ is a multiple - // of rows_per_buffer_, the keys vector will always be empty. But it can be partially filled. - // The only requirement we set up is rows_per_buffer_ is less than or equal to prefetch_size_. + // Now we tell the WorkerEntry to wait for them to come back. for (auto row_id : prefetch_keys) { keys.push_back(row_id); - if (keys.size() == rows_per_buffer_) { - RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); - keys.clear(); - } + RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); + keys.clear(); } prefetch_keys.clear(); } @@ -127,10 +122,8 @@ Status CacheBase::FetchSamplesToWorkers() { RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys)); for (auto row_id : prefetch_keys) { keys.push_back(row_id); - if (keys.size() == rows_per_buffer_) { - RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); - keys.clear(); - } + RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys)); + keys.clear(); } } if (!keys.empty()) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h index f838b52c50..1494320b60 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_base_op.h @@ -42,11 +42,10 @@ class CacheBase : public ParallelOp { /// \brief Base class constructor /// \param num_workers Number of parallel workers /// \param op_connector_size Connector size - /// \param rows_per_buf Number of rows per buffer /// \param cache_client CacheClient for communication to the CacheServer /// \param sampler Sampler which is mandatory - CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, - std::shared_ptr cache_client, std::shared_ptr sampler); + CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr cache_client, + std::shared_ptr sampler); /// \brief Destructor ~CacheBase(); @@ -87,7 +86,6 @@ class CacheBase : public ParallelOp { int64_t row_cnt_; std::atomic num_cache_miss_; std::shared_ptr cache_client_; - int32_t rows_per_buffer_; std::unique_ptr>> keys_miss_; /// \brief Common function to register resources for interrupt diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc index 53782e390b..d96b68e2dc 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.cc @@ -31,7 +31,6 @@ namespace dataset { CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); build_num_workers_ = cfg->num_parallel_workers(); - rows_per_buffer_ = cfg->rows_per_buffer(); build_op_connector_size_ = cfg->op_connector_size(); } @@ -52,8 +51,8 @@ Status CacheLookupOp::Builder::SanityCheck() const { // The builder "build" method creates the final object and does some init on it Status CacheLookupOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); - *ptr = std::make_shared(build_num_workers_, build_op_connector_size_, rows_per_buffer_, - build_cache_client_, build_sampler_); + *ptr = + std::make_shared(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_); return Status::OK(); } Status CacheLookupOp::operator()() { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h index f3211ddd38..7e867d28f5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_lookup_op.h @@ -74,7 +74,6 @@ class CacheLookupOp : public CacheBase, public SamplerRT { private: int32_t build_num_workers_; - int32_t rows_per_buffer_; int32_t build_op_connector_size_; std::shared_ptr build_cache_client_; std::shared_ptr build_sampler_; @@ -86,9 +85,9 @@ class CacheLookupOp : public CacheBase, public SamplerRT { /// \brief Constructor /// \note It takes the same argument as the base class. /// \see CacheBase - CacheLookupOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, - std::shared_ptr cache_client, std::shared_ptr sampler) - : CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler), SamplerRT(*(sampler.get())) {} + CacheLookupOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr cache_client, + std::shared_ptr sampler) + : CacheBase(num_workers, op_connector_size, cache_client, sampler), SamplerRT(*(sampler.get())) {} ~CacheLookupOp() = default; // As a parallel op, we override these two functions Status operator()() override; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc index cccf77f343..c05ac0f961 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.cc @@ -33,7 +33,6 @@ namespace dataset { CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); build_num_workers_ = cfg->num_parallel_workers(); - rows_per_buffer_ = cfg->rows_per_buffer(); build_op_connector_size_ = cfg->op_connector_size(); } @@ -54,17 +53,16 @@ Status CacheOp::Builder::SanityCheck() const { // The builder "build" method creates the final object and does some init on it Status CacheOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); - *ptr = std::make_shared(build_num_workers_, build_op_connector_size_, rows_per_buffer_, build_cache_client_, - build_sampler_); + *ptr = std::make_shared(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_); RETURN_IF_NOT_OK((*ptr)->InitCache()); return Status::OK(); } // Constructor of CacheOp -CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, - std::shared_ptr cache_client, std::shared_ptr sampler) - : CacheBase(num_workers, op_connector_size, rows_per_buf, std::move(cache_client), std::move(sampler)), +CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr cache_client, + std::shared_ptr sampler) + : CacheBase(num_workers, op_connector_size, std::move(cache_client), std::move(sampler)), num_guys_in_(0), phase_(Phase::kBuildPhase) {} diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h index fd7045ca59..3017b2ffb5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/cache_op.h @@ -70,14 +70,6 @@ class CacheOp : public CacheBase, public RandomAccessOp { return *this; } - /// \brief Setter method - /// \param rows_per_buffer - /// \return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - rows_per_buffer_ = rows_per_buffer; - return *this; - } - /// \brief Setter method /// \param sampler /// \return Builder setter method returns reference to the builder. @@ -93,7 +85,6 @@ class CacheOp : public CacheBase, public RandomAccessOp { private: int32_t build_num_workers_; - int32_t rows_per_buffer_; int32_t build_op_connector_size_; std::shared_ptr build_cache_client_; std::shared_ptr build_sampler_; @@ -107,8 +98,8 @@ class CacheOp : public CacheBase, public RandomAccessOp { /// \note The builder class should be used to call it. /// \param num_workers The number of worker threads. /// \param op_connector_size The size of each queue in the connector. - CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf, - std::shared_ptr cache_client, std::shared_ptr sampler); + CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr cache_client, + std::shared_ptr sampler); // Destructor ~CacheOp(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc index 842468c892..4ddeed74a7 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc @@ -41,7 +41,6 @@ constexpr int32_t ShuffleOp::kShuffleStateDrain; ShuffleOp::Builder::Builder() : build_shuffle_size_(0), build_reshuffle_each_epoch_(true) { std::shared_ptr cfg = GlobalContext::config_manager(); build_op_connector_size_ = cfg->op_connector_size(); - build_rows_per_buffer_ = cfg->rows_per_buffer(); build_shuffle_seed_ = GetSeed(); } @@ -56,20 +55,17 @@ Status ShuffleOp::Builder::SanityCheck() const { Status ShuffleOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); *ptr = std::make_shared(build_shuffle_size_, build_shuffle_seed_, build_op_connector_size_, - build_reshuffle_each_epoch_, build_rows_per_buffer_); + build_reshuffle_each_epoch_); return Status::OK(); } // Constructor of the ShuffleOp -ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch, - int32_t rows_per_buffer) +ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch) : PipelineOp(op_connector_size), shuffle_size_(shuffle_size), shuffle_seed_(shuffle_seed), reshuffle_each_epoch_(reset_every_epoch), rng_(shuffle_seed), - buffer_counter_(0), - rows_per_buffer_(rows_per_buffer), shuffle_buffer_(std::make_unique()), shuffle_last_row_idx_(0), shuffle_buffer_state_(kShuffleStateInit) {} @@ -87,7 +83,6 @@ Status ShuffleOp::SelfReset() { } shuffle_buffer_ = std::make_unique(); - buffer_counter_ = 0; shuffle_last_row_idx_ = 0; shuffle_buffer_state_ = kShuffleStateInit; return Status::OK(); @@ -104,8 +99,8 @@ void ShuffleOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info PipelineOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nShuffle size: " << shuffle_size_ << "\nRows per buffer: " << rows_per_buffer_ - << "\nShuffle buffer state: " << shuffle_buffer_state_ << "\nShuffle seed: " << shuffle_seed_ << "\n\n"; + out << "\nShuffle size: " << shuffle_size_ << "\nShuffle buffer state: " << shuffle_buffer_state_ + << "\nShuffle seed: " << shuffle_seed_ << "\n\n"; } } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h index 9b733a7981..253d3d65e9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.h @@ -121,9 +121,7 @@ class ShuffleOp : public PipelineOp { // @param shuffle_size - The size for the shuffle buffer // @param shuffle_seed - The seed to use for random number generation // @param op_connector_size - The output connector queue size - // @param rows_per_buffer - The requested number of rows per buffer - ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch, - int32_t rows_per_buffer); + ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch); // Destructor ~ShuffleOp() = default; @@ -183,8 +181,6 @@ class ShuffleOp : public PipelineOp { // (ie uniform_int_distribution) because we will need to create up to |dataset| instances // of the distribution object in the common case of a perfect shuffle std::mt19937_64 rng_; - int32_t buffer_counter_; // For creating new buffer id's - int32_t rows_per_buffer_; // Number of rows to pack into output buffer // A single (potentially large) buffer of tensor rows for performing shuffling. std::unique_ptr shuffle_buffer_; int32_t shuffle_last_row_idx_; // Internal tracking of the last slot of our shuffle buffer diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index 968afc0b70..229ef94e20 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -32,7 +32,6 @@ namespace dataset { AlbumOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -52,9 +51,8 @@ Status AlbumOp::Builder::Build(std::shared_ptr *ptr) { MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << "."; builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_); } - *ptr = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, - builder_op_connector_size_, builder_decode_, builder_extensions_, - std::move(builder_schema_), std::move(builder_sampler_)); + *ptr = std::make_shared(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_, + builder_extensions_, std::move(builder_schema_), std::move(builder_sampler_)); return Status::OK(); } @@ -69,10 +67,10 @@ Status AlbumOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } -AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, +AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set &exts, std::unique_ptr data_schema, std::shared_ptr sampler) - : MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer), + : MappableLeafOp(num_wkrs, queue_size, std::move(sampler)), folder_path_(file_dir), decode_(do_decode), extensions_(exts), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h index bfe88760cb..9442ea86bf 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h @@ -58,14 +58,6 @@ class AlbumOp : public MappableLeafOp { /// \brief Destructor. ~Builder() = default; - /// \brief Setter method - /// \param[in] rows_per_buffer - /// \return Builder setter method returns reference to the builder - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - /// \brief Setter method /// \param[in] size /// \return Builder setter method returns reference to the builder @@ -154,16 +146,14 @@ class AlbumOp : public MappableLeafOp { /// \brief Constructor /// \param[in] num_wkrs - Num of workers reading images in parallel - /// \param[in] rows_per_buffer Number of images (rows) in each buffer /// \param[in] file_dir - directory of Album /// \param[in] queue_size - connector size /// \param[in] do_decode - decode image files /// \param[in] exts - set of file extensions to read, if empty, read everything under the dir /// \param[in] data_schema - schema of dataset /// \param[in] sampler - sampler tells AlbumOp what to read - AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode, - const std::set &exts, std::unique_ptr data_schema, - std::shared_ptr sampler); + AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set &exts, + std::unique_ptr data_schema, std::shared_ptr sampler); /// \brief Destructor. ~AlbumOp() = default; @@ -273,7 +263,6 @@ class AlbumOp : public MappableLeafOp { /// \return Status The status code returned Status ComputeColMap() override; - int32_t rows_per_buffer_; std::string folder_path_; // directory of image folder bool decode_; std::set extensions_; // extensions allowed diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc index 33ed0f1241..93a0450244 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc @@ -34,7 +34,6 @@ namespace dataset { CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -54,9 +53,9 @@ Status CelebAOp::Builder::Build(std::shared_ptr *op) { // label is like this:0 1 0 0 1...... RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); - *op = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, - builder_op_connector_size_, builder_decode_, builder_usage_, builder_extensions_, - std::move(builder_schema_), std::move(builder_sampler_)); + *op = std::make_shared(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_, + builder_usage_, builder_extensions_, std::move(builder_schema_), + std::move(builder_sampler_)); if (*op == nullptr) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "CelebAOp init failed."); } @@ -76,10 +75,10 @@ Status CelebAOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } -CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, - bool decode, const std::string &usage, const std::set &exts, - std::unique_ptr schema, std::shared_ptr sampler) - : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), +CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, + const std::string &usage, const std::set &exts, std::unique_ptr schema, + std::shared_ptr sampler) + : MappableLeafOp(num_workers, queue_size, std::move(sampler)), folder_path_(dir), decode_(decode), extensions_(exts), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h index c2375fc62a..ea980ab419 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.h @@ -53,14 +53,6 @@ class CelebAOp : public MappableLeafOp { // Destructor. ~Builder() = default; - // Setter method - // @param int32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method // @param int32_t size // @return Builder setter method returns reference to the builder. @@ -139,13 +131,11 @@ class CelebAOp : public MappableLeafOp { // Constructor // @param int32_t - num_workers - Num of workers reading images in parallel - // @param int32_t - rows_per_buffer Number of images (rows) in each buffer // @param std::string - dir directory of celeba dataset // @param int32_t queueSize - connector queue size // @param std::unique_ptr sampler - sampler tells CelebAOp what to read - CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, bool decode, - const std::string &usage, const std::set &exts, std::unique_ptr schema, - std::shared_ptr sampler); + CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage, + const std::set &exts, std::unique_ptr schema, std::shared_ptr sampler); ~CelebAOp() override = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc index 585b22547b..50b8468260 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc @@ -39,7 +39,6 @@ constexpr uint32_t kCifarImageSize = kCifarImageHeight * kCifarImageWidth * kCif CifarOp::Builder::Builder() : sampler_(nullptr), usage_("") { std::shared_ptr cfg = GlobalContext::config_manager(); num_workers_ = cfg->num_parallel_workers(); - rows_per_buffer_ = cfg->rows_per_buffer(); op_connect_size_ = cfg->op_connector_size(); cifar_type_ = kCifar10; } @@ -65,8 +64,8 @@ Status CifarOp::Builder::Build(std::shared_ptr *ptr) { ColDescriptor("fine_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &another_scalar))); } - *ptr = std::make_shared(cifar_type_, usage_, num_workers_, rows_per_buffer_, dir_, op_connect_size_, - std::move(schema_), std::move(sampler_)); + *ptr = std::make_shared(cifar_type_, usage_, num_workers_, dir_, op_connect_size_, std::move(schema_), + std::move(sampler_)); return Status::OK(); } @@ -85,10 +84,9 @@ Status CifarOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } -CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf, - const std::string &file_dir, int32_t queue_size, std::unique_ptr data_schema, - std::shared_ptr sampler) - : MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buf), +CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir, + int32_t queue_size, std::unique_ptr data_schema, std::shared_ptr sampler) + : MappableLeafOp(num_works, queue_size, std::move(sampler)), cifar_type_(type), usage_(usage), folder_path_(file_dir), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h index ff80d6c104..993638d95a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.h @@ -49,14 +49,6 @@ class CifarOp : public MappableLeafOp { // Destructor. ~Builder() = default; - // Setter method - // @param uint32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method // @param uint32_t size // @return Builder setter method returns reference to the builder. @@ -122,7 +114,6 @@ class CifarOp : public MappableLeafOp { std::string dir_; std::string usage_; int32_t num_workers_; - int32_t rows_per_buffer_; int32_t op_connect_size_; std::shared_ptr sampler_; std::unique_ptr schema_; @@ -133,13 +124,11 @@ class CifarOp : public MappableLeafOp { // @param CifarType type - Cifar10 or Cifar100 // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all' // @param uint32_t numWorks - Num of workers reading images in parallel - // @param uint32_t - rowsPerBuffer Number of images (rows) in each buffer // @param std::string - dir directory of cifar dataset // @param uint32_t - queueSize - connector queue size // @param std::unique_ptr sampler - sampler tells ImageFolderOp what to read - CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf, - const std::string &file_dir, int32_t queue_size, std::unique_ptr data_schema, - std::shared_ptr sampler); + CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir, int32_t queue_size, + std::unique_ptr data_schema, std::shared_ptr sampler); // Destructor. ~CifarOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc index 76b43176e3..6d088d73ea 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.cc @@ -36,7 +36,6 @@ ClueOp::Builder::Builder() std::shared_ptr config_manager = GlobalContext::config_manager(); builder_num_workers_ = config_manager->num_parallel_workers(); builder_op_connector_size_ = config_manager->op_connector_size(); - builder_rows_per_buffer_ = config_manager->rows_per_buffer(); builder_worker_connector_size_ = config_manager->worker_connector_size(); } @@ -67,9 +66,8 @@ Status ClueOp::Builder::Build(std::shared_ptr *op) { } std::shared_ptr clue_op = std::make_shared( - builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, ck_map, - builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, - builder_device_id_); + builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, ck_map, builder_clue_files_list_, + builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_); RETURN_IF_NOT_OK(clue_op->Init()); *op = std::move(clue_op); @@ -87,11 +85,11 @@ std::vector ClueOp::Builder::split(const std::string &s, char delim return res; } -ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, - ColKeyMap cols_to_keyword, std::vector clue_files_list, int32_t op_connector_size, - bool shuffle_files, int32_t num_devices, int32_t device_id) - : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size, - shuffle_files, num_devices, device_id), +ClueOp::ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword, + std::vector clue_files_list, int32_t op_connector_size, bool shuffle_files, + int32_t num_devices, int32_t device_id) + : NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices, + device_id), clue_files_list_(std::move(clue_files_list)), cols_to_keyword_(cols_to_keyword) {} @@ -200,8 +198,7 @@ void ClueOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info ParallelOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_ - << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ + out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nClue files list:\n"; for (int i = 0; i < clue_files_list_.size(); ++i) { out << " " << clue_files_list_[i]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h index 1519452d1a..fb5df5080b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/clue_op.h @@ -138,9 +138,9 @@ class ClueOp : public NonMappableLeafOp { }; // Constructor of ClueOp - ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, - ColKeyMap cols_to_keyword, std::vector clue_files_list, int32_t op_connector_size, - bool shuffle_files, int32_t num_devices, int32_t device_id); + ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword, + std::vector clue_files_list, int32_t op_connector_size, bool shuffle_files, int32_t num_devices, + int32_t device_id); // Default destructor ~ClueOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc index c2dfaf6ec8..64d9875a92 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc @@ -50,7 +50,6 @@ const unsigned int kPadValueZero = 0; CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); builder_task_type_ = TaskType::Detection; } @@ -100,8 +99,8 @@ Status CocoOp::Builder::Build(std::shared_ptr *ptr) { RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic."); } *ptr = std::make_shared(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_, - builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_, - std::move(builder_schema_), std::move(builder_sampler_)); + builder_op_connector_size_, builder_decode_, std::move(builder_schema_), + std::move(builder_sampler_)); return Status::OK(); } @@ -122,9 +121,9 @@ Status CocoOp::Builder::SanityCheck() { } CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, - int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, - std::unique_ptr data_schema, std::shared_ptr sampler) - : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), + int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr data_schema, + std::shared_ptr sampler) + : MappableLeafOp(num_workers, queue_size, std::move(sampler)), decode_(decode), task_type_(task_type), image_folder_path_(image_folder_path), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h index bd6cd99adb..29e04ee396 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.h @@ -109,14 +109,6 @@ class CocoOp : public MappableLeafOp { return *this; } - // Setter method. - // @param int32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method. // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -159,15 +151,14 @@ class CocoOp : public MappableLeafOp { // @param std::string image_folder_path - image folder path of Coco // @param std::string annotation_path - annotation json path of Coco // @param int32_t num_workers - number of workers reading images in parallel - // @param int32_t rows_per_buffer - number of images (rows) in each buffer // @param int32_t queue_size - connector queue size // @param int64_t num_samples - number of samples to read // @param bool decode - whether to decode images // @param std::unique_ptr data_schema - the schema of the Coco dataset // @param std::shared_ptr sampler - sampler tells CocoOp what to read CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path, - int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode, - std::unique_ptr data_schema, std::shared_ptr sampler); + int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr data_schema, + std::shared_ptr sampler); // Destructor ~CocoOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc index 801844741a..17a24dab4d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc @@ -32,7 +32,6 @@ CsvOp::Builder::Builder() std::shared_ptr config_manager = GlobalContext::config_manager(); builder_num_workers_ = config_manager->num_parallel_workers(); builder_op_connector_size_ = config_manager->op_connector_size(); - builder_rows_per_buffer_ = config_manager->rows_per_buffer(); builder_worker_connector_size_ = config_manager->worker_connector_size(); } @@ -59,8 +58,8 @@ Status CsvOp::Builder::Build(std::shared_ptr *op) { std::shared_ptr csv_op = std::make_shared( builder_csv_files_list_, builder_field_delim_, builder_column_default_list_, builder_column_name_list_, - builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, - builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_); + builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, builder_op_connector_size_, + builder_shuffle_files_, builder_num_devices_, builder_device_id_); RETURN_IF_NOT_OK(csv_op->Init()); *op = std::move(csv_op); @@ -69,11 +68,11 @@ Status CsvOp::Builder::Build(std::shared_ptr *op) { CsvOp::CsvOp(const std::vector &csv_files_list, char field_delim, const std::vector> &column_default, - const std::vector &column_name, int32_t num_workers, int64_t rows_per_buffer, - int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files, - int32_t num_devices, int32_t device_id) - : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size, - shuffle_files, num_devices, device_id), + const std::vector &column_name, int32_t num_workers, int64_t num_samples, + int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files, int32_t num_devices, + int32_t device_id) + : NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices, + device_id), csv_files_list_(std::move(csv_files_list)), field_delim_(field_delim), column_default_list_(column_default), @@ -91,11 +90,10 @@ Status CsvOp::Init() { return Status::OK(); } -CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim, +CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim, std::vector> column_default, std::string file_path) : worker_id_(worker_id), buffer_connector_(connector), - csv_rows_per_buffer_(rows_per_buffer), csv_field_delim_(field_delim), column_default_(column_default), file_path_(file_path), @@ -469,8 +467,7 @@ Status CsvOp::CsvParser::InitCsvParser() { } Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) { - CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_, - file); + CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file); csv_parser.SetStartOffset(start_offset); csv_parser.SetEndOffset(end_offset); std::ifstream ifs; @@ -516,8 +513,7 @@ void CsvOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info ParallelOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_ - << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ + out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nCsv files list:\n"; for (int i = 0; i < csv_files_list_.size(); ++i) { out << " " << csv_files_list_[i]; @@ -592,7 +588,7 @@ Status CsvOp::CalculateNumRowsPerShard() { } int64_t CsvOp::CountTotalRows(const std::string &file) { - CsvParser csv_parser(0, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_, file); + CsvParser csv_parser(0, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file); std::ifstream ifs; ifs.open(file, std::ifstream::in); if (!ifs.is_open()) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h index a6a61c3501..ce019229e9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.h @@ -65,7 +65,7 @@ class CsvOp : public NonMappableLeafOp { public: CsvParser() = delete; - CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim, + CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim, std::vector> column_default, std::string file_path); ~CsvParser() = default; @@ -128,7 +128,6 @@ class CsvOp : public NonMappableLeafOp { int32_t worker_id_; JaggedConnector *buffer_connector_; - int64_t csv_rows_per_buffer_; const char csv_field_delim_; std::vector> column_default_; State cur_state_; @@ -261,8 +260,8 @@ class CsvOp : public NonMappableLeafOp { CsvOp(const std::vector &csv_files_list, char field_delim, const std::vector> &column_default, const std::vector &column_name, - int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size, - int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id); + int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size, + bool shuffle_files, int32_t num_devices, int32_t device_id); // Default destructor ~CsvOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc index 81b56c82da..48e24d6840 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc @@ -28,7 +28,6 @@ namespace dataset { ImageFolderOp::Builder::Builder() : builder_decode_(false), builder_recursive_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -45,10 +44,9 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr *ptr) { builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); RETURN_IF_NOT_OK(builder_schema_->AddColumn( ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar))); - *ptr = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_dir_, - builder_op_connector_size_, builder_recursive_, builder_decode_, - builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), - std::move(builder_sampler_)); + *ptr = std::make_shared( + builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_recursive_, builder_decode_, + builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_)); return Status::OK(); } @@ -64,11 +62,10 @@ Status ImageFolderOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } -ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, - bool recursive, bool do_decode, const std::set &exts, - const std::map &map, std::unique_ptr data_schema, - std::shared_ptr sampler) - : MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer), +ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode, + const std::set &exts, const std::map &map, + std::unique_ptr data_schema, std::shared_ptr sampler) + : MappableLeafOp(num_wkrs, queue_size, std::move(sampler)), folder_path_(file_dir), recursive_(recursive), decode_(do_decode), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h index 1cf6b366a5..32f742e47b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.h @@ -63,14 +63,6 @@ class ImageFolderOp : public MappableLeafOp { // Destructor. ~Builder() = default; - // Setter method - // @param int32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method // @param int32_t size // @return Builder setter method returns reference to the builder. @@ -159,13 +151,12 @@ class ImageFolderOp : public MappableLeafOp { // Constructor // @param int32_t num_wkrs - Num of workers reading images in parallel - // @param int32_t - rows_per_buffer Number of images (rows) in each buffer // @param std::string - dir directory of ImageNetFolder // @param int32_t queue_size - connector queue size // @param std::set exts - set of file extensions to read, if empty, read everything under the dir // @param td::unique_ptr sampler - sampler tells ImageFolderOp what to read - ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool recursive, - bool do_decode, const std::set &exts, const std::map &map, + ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode, + const std::set &exts, const std::map &map, std::unique_ptr, std::shared_ptr sampler); // Destructor. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc index 3c57e9eff5..4ffba9cfb5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc @@ -33,7 +33,6 @@ namespace dataset { ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_decode_(false) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -49,9 +48,9 @@ Status ManifestOp::Builder::Build(std::shared_ptr *ptr) { builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); - *ptr = std::make_shared(builder_num_workers_, builder_rows_per_buffer_, builder_file_, - builder_op_connector_size_, builder_decode_, builder_labels_to_read_, - std::move(builder_schema_), std::move(builder_sampler_), builder_usage_); + *ptr = std::make_shared(builder_num_workers_, builder_file_, builder_op_connector_size_, builder_decode_, + builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_), + builder_usage_); return Status::OK(); } @@ -64,10 +63,10 @@ Status ManifestOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } -ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, +ManifestOp::ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode, const std::map &class_index, std::unique_ptr data_schema, std::shared_ptr sampler, std::string usage) - : MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buffer), + : MappableLeafOp(num_works, queue_size, std::move(sampler)), io_block_pushed_(0), sampler_ind_(0), data_schema_(std::move(data_schema)), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h index 2ccb0fda8f..76562b2d0b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.h @@ -46,14 +46,6 @@ class ManifestOp : public MappableLeafOp { // Destructor ~Builder() = default; - // Setter method - // @param int32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method // @param int32_t size // @return Builder setter method returns reference to the builder. @@ -134,11 +126,10 @@ class ManifestOp : public MappableLeafOp { // Constructor // @param int32_t num_works - Num of workers reading images in parallel - // @param int32_t - rows_per_buffer Number of images (rows) in each buffer // @param std::string - file list of Manifest // @param int32_t queue_size - connector queue size // @param td::unique_ptr sampler - sampler tells ImageFolderOp what to read - ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode, + ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode, const std::map &class_index, std::unique_ptr data_schema, std::shared_ptr sampler, std::string usage); // Destructor. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.cc index 5f6a131d79..b2d63cbc41 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.cc @@ -24,9 +24,8 @@ namespace mindspore { namespace dataset { -MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr sampler, - int32_t rows_per_buffer) - : ParallelOp(num_wkrs, queue_size, std::move(sampler)), rows_per_buffer_(rows_per_buffer) {} +MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr sampler) + : ParallelOp(num_wkrs, queue_size, std::move(sampler)) {} // Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work Status MappableLeafOp::operator()() { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.h index 9ff9a61c10..fd1356361b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mappable_leaf_op.h @@ -47,16 +47,13 @@ namespace dataset { template class Queue; -using ImageLabelPair = std::shared_ptr>; -using FolderImagesPair = std::shared_ptr>>; - class MappableLeafOp : public ParallelOp, public RandomAccessOp { public: /// Constructor /// \param int32_t num_wkrs - Num of workers reading images in parallel /// \param int32_t queue_size - connector queue size /// \param td::unique_ptr sampler - sampler tells the source what to read - MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr sampler, int32_t rows_per_buffer); + MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr sampler); /// Destructor. ~MappableLeafOp() = default; @@ -94,10 +91,6 @@ class MappableLeafOp : public ParallelOp, public RandomAccessOp { /// Reset function to be called after every epoch to reset the source op after /// \return Status The status code returned Status Reset() override; - - int32_t rows_per_buffer_; - int64_t row_cnt_; - int64_t buf_cnt_; }; } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc index 00140c38cc..246c8714ee 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc @@ -50,7 +50,6 @@ MindRecordOp::Builder::Builder() : build_dataset_file_({}) { std::shared_ptr cfg = GlobalContext::config_manager(); build_num_mind_record_workers_ = kDefaultMindRecordWorkers; - build_rows_per_buffer_ = cfg->rows_per_buffer(); build_op_connector_queue_size_ = cfg->op_connector_size(); builder_num_workers_ = 0; build_load_dataset_ = false; @@ -114,7 +113,7 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, std::vector &columns_to_load, const std::vector> &operators, int64_t num_padded, const mindrecord::json &sample_json, const std::map &sample_bytes) - : MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared(0, 0), 1), + : MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared(0, 0)), dataset_file_(dataset_file), load_dataset_(load_dataset), columns_to_load_(columns_to_load), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc index c7498deb06..0ba04252bb 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.cc @@ -35,7 +35,6 @@ const int32_t kMnistImageCols = 28; MnistOp::Builder::Builder() : builder_sampler_(nullptr), builder_usage_("") { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -52,8 +51,8 @@ Status MnistOp::Builder::Build(std::shared_ptr *ptr) { TensorShape scalar = TensorShape::CreateScalar(); RETURN_IF_NOT_OK(builder_schema_->AddColumn( ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar))); - *ptr = std::make_shared(builder_usage_, builder_num_workers_, builder_rows_per_buffer_, builder_dir_, - builder_op_connector_size_, std::move(builder_schema_), std::move(builder_sampler_)); + *ptr = std::make_shared(builder_usage_, builder_num_workers_, builder_dir_, builder_op_connector_size_, + std::move(builder_schema_), std::move(builder_sampler_)); return Status::OK(); } @@ -73,9 +72,9 @@ Status MnistOp::Builder::SanityCheck() { return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg); } -MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, - int32_t queue_size, std::unique_ptr data_schema, std::shared_ptr sampler) - : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), +MnistOp::MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size, + std::unique_ptr data_schema, std::shared_ptr sampler) + : MappableLeafOp(num_workers, queue_size, std::move(sampler)), usage_(usage), folder_path_(folder_path), image_path_({}), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h index c95b305e66..03b200c34d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mnist_op.h @@ -52,14 +52,6 @@ class MnistOp : public MappableLeafOp { // Destructor. ~Builder() = default; - // Setter method - // @param int32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method // @param int32_t op_connector_size // @return Builder setter method returns reference to the builder. @@ -121,13 +113,12 @@ class MnistOp : public MappableLeafOp { // Constructor // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all' // @param int32_t num_workers - number of workers reading images in parallel - // @param int32_t rows_per_buffer - number of images (rows) in each buffer // @param std::string folder_path - dir directory of mnist // @param int32_t queue_size - connector queue size // @param std::unique_ptr data_schema - the schema of the mnist dataset // @param td::unique_ptr sampler - sampler tells MnistOp what to read - MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path, - int32_t queue_size, std::unique_ptr data_schema, std::shared_ptr sampler); + MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size, + std::unique_ptr data_schema, std::shared_ptr sampler); // Destructor. ~MnistOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc index 226f0ddba6..395c458b2c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.cc @@ -36,13 +36,12 @@ namespace mindspore { namespace dataset { -NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, - int64_t total_num_rows, int32_t op_connector_size, bool shuffle_files, - int32_t num_devices, int32_t device_id) +NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, + int32_t op_connector_size, bool shuffle_files, int32_t num_devices, + int32_t device_id) : ParallelOp(num_workers, op_connector_size), device_id_(device_id), num_devices_(num_devices), - rows_per_buffer_(rows_per_buffer), filename_index_(std::make_unique()), load_io_block_queue_(true), load_jagged_connector_(true), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.h index 3b8645adfa..ac552aa8c8 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/nonmappable_leaf_op.h @@ -49,14 +49,13 @@ class NonMappableLeafOp : public ParallelOp { // @note The builder class should be used to call this constructor. // @param num_workers - number of worker threads reading data from tf_file files. // @param worker_connector_size - size of each internal queue. - // @param rows_per_buffer - number of rows that a full buffer will contain. // @param total_num_rows - Number of rows to read // @param dataset_files_list - list of filepaths for the dataset files. // @param op_connector_size - size of each queue in the connector that the child operator pulls from. // @param columns_to_load - the names of the columns to load data from. // @param shuffle_files - whether or not to shuffle the files before reading data. // @param equal_rows_per_shard - whether or not to get equal rows for each process. - NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows, + NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id); // Default destructor @@ -77,9 +76,6 @@ class NonMappableLeafOp : public ParallelOp { // @return Status - the error code returned. Status Reset() override; - // Getter method - int64_t rows_per_buffer() const { return rows_per_buffer_; } - // Op name getter // @return Name of the current Op std::string Name() const override { return "NonMappableLeafOp"; } @@ -157,7 +153,6 @@ class NonMappableLeafOp : public ParallelOp { bool finished_reading_dataset_; int64_t total_rows_; - int64_t rows_per_buffer_; WaitPost io_block_queue_wait_post_; bool load_io_block_queue_; std::mutex load_io_block_queue_mutex_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc index 0158905bfa..4c7ec3520b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc @@ -37,7 +37,6 @@ RandomDataOp::Builder::Builder() // Some arguments to the RandomDataOp have a default argument that is taken from the config. // The user may override these defaults by using the builder set methods. std::shared_ptr cfg = GlobalContext::config_manager(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_num_workers_ = cfg->num_parallel_workers(); builder_op_connector_size_ = cfg->op_connector_size(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h index 005e940fb4..6fd597599e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.h @@ -97,16 +97,6 @@ class RandomDataOp : public ParallelOp { return *this; } - /** - * Builder set method - * @param rows_per_buffer - The number of rows in each DataBuffer - * @return Builder - The modified builder by reference - */ - Builder &SetRowsPerBuffer(int64_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - /** * Builder set method * @param total_rows - The total number of rows in the dataset diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc index 5aa7e6210d..904b6891dc 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc @@ -36,7 +36,6 @@ TextFileOp::Builder::Builder() std::shared_ptr config_manager = GlobalContext::config_manager(); builder_num_workers_ = config_manager->num_parallel_workers(); builder_op_connector_size_ = config_manager->op_connector_size(); - builder_rows_per_buffer_ = config_manager->rows_per_buffer(); builder_worker_connector_size_ = config_manager->worker_connector_size(); } @@ -65,21 +64,21 @@ Status TextFileOp::Builder::Build(std::shared_ptr *op) { RETURN_IF_NOT_OK( builder_schema_->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); - std::shared_ptr text_file_op = std::make_shared( - builder_num_workers_, builder_rows_per_buffer_, builder_total_rows_, builder_worker_connector_size_, - std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, builder_shuffle_files_, - builder_num_devices_, builder_device_id_); + std::shared_ptr text_file_op = + std::make_shared(builder_num_workers_, builder_total_rows_, builder_worker_connector_size_, + std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, + builder_shuffle_files_, builder_num_devices_, builder_device_id_); RETURN_IF_NOT_OK(text_file_op->Init()); *op = std::move(text_file_op); return Status::OK(); } -TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size, +TextFileOp::TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr schema, std::vector text_files_list, int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id) - : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_rows, op_connector_size, - shuffle_files, num_devices, device_id), + : NonMappableLeafOp(num_workers, worker_connector_size, total_rows, op_connector_size, shuffle_files, num_devices, + device_id), text_files_list_(std::move(text_files_list)), data_schema_(std::move(schema)) {} @@ -94,9 +93,8 @@ void TextFileOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info ParallelOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nRows per buffer: " << rows_per_buffer_ << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ - << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") - << "\nText files list:\n"; + out << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ + << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n"; for (int i = 0; i < text_files_list_.size(); ++i) { out << " " << text_files_list_[i]; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h index e6f6092c67..99e2670e85 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.h @@ -129,7 +129,6 @@ class TextFileOp : public NonMappableLeafOp { // Constructor of TextFileOp // @note The builder class should be used to call this constructor. // @param num_workers - number of worker threads reading data from tf_file files. - // @param rows_per_buffer - number of rows that a full buffer will contain. // @param total_num_rows - number of rows to read // @param dataset_files_list - list of filepaths for the dataset files. // @param data_schema - the data schema object. @@ -137,9 +136,9 @@ class TextFileOp : public NonMappableLeafOp { // @param columns_to_load - the names of the columns to load data from. // @param shuffle_files - whether or not to shuffle the files before reading data. // @param equal_rows_per_shard - whether or not to get equal rows for each process. - TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size, - std::unique_ptr, std::vector text_files_list, int32_t op_connector_size, - bool shuffle_files, int32_t num_devices, int32_t device_id); + TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr, + std::vector text_files_list, int32_t op_connector_size, bool shuffle_files, + int32_t num_devices, int32_t device_id); // Default destructor ~TextFileOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc index 04ccd429a1..dc779ca382 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc @@ -47,7 +47,6 @@ TFReaderOp::Builder::Builder() builder_num_workers_ = config_manager->num_parallel_workers(); builder_worker_connector_size_ = config_manager->worker_connector_size(); builder_op_connector_size_ = config_manager->op_connector_size(); - builder_rows_per_buffer_ = config_manager->rows_per_buffer(); builder_shuffle_files_ = false; builder_data_schema_ = std::make_unique(); } @@ -114,22 +113,21 @@ Status TFReaderOp::Builder::Build(std::shared_ptr *out_tf_reader_op) } std::shared_ptr new_tf_reader_op = std::make_shared( - builder_num_workers_, builder_worker_connector_size_, builder_rows_per_buffer_, builder_total_rows_, - builder_dataset_files_list_, std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, - builder_shuffle_files_, builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_); + builder_num_workers_, builder_worker_connector_size_, builder_total_rows_, builder_dataset_files_list_, + std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, builder_shuffle_files_, + builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_); RETURN_IF_NOT_OK(new_tf_reader_op->Init()); *out_tf_reader_op = std::move(new_tf_reader_op); return Status::OK(); } -TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, - int64_t total_num_rows, std::vector dataset_files_list, - std::unique_ptr data_schema, int32_t op_connector_size, - std::vector columns_to_load, bool shuffle_files, int32_t num_devices, - int32_t device_id, bool equal_rows_per_shard) - : NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_num_rows, op_connector_size, - shuffle_files, num_devices, device_id), +TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, + std::vector dataset_files_list, std::unique_ptr data_schema, + int32_t op_connector_size, std::vector columns_to_load, bool shuffle_files, + int32_t num_devices, int32_t device_id, bool equal_rows_per_shard) + : NonMappableLeafOp(num_workers, worker_connector_size, total_num_rows, op_connector_size, shuffle_files, + num_devices, device_id), dataset_files_list_(std::move(dataset_files_list)), columns_to_load_(std::move(columns_to_load)), data_schema_(std::move(data_schema)), @@ -146,8 +144,8 @@ void TFReaderOp::Print(std::ostream &out, bool show_all) const { // Call the super class for displaying any common detailed info ParallelOp::Print(out, show_all); // Then show any custom derived-internal stuff - out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ - << "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") + out << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_ + << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nDataset files list: Size: " << dataset_files_list_.size() << "\n"; for (int i = 0; i < dataset_files_list_.size(); ++i) { out << " " << dataset_files_list_[i]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h index 56cd516cf6..02e02ba6aa 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.h @@ -173,7 +173,6 @@ class TFReaderOp : public NonMappableLeafOp { // @note The builder class should be used to call this constructor. // @param num_workers - number of worker threads reading data from tf_file files. // @param worker_connector_size - size of each internal queue. - // @param rows_per_buffer - number of rows that a full buffer will contain. // @param total_num_rows - Number of rows to read // @param dataset_files_list - list of filepaths for the dataset files. // @param data_schema - the data schema object. @@ -181,7 +180,7 @@ class TFReaderOp : public NonMappableLeafOp { // @param columns_to_load - the names of the columns to load data from. // @param shuffle_files - whether or not to shuffle the files before reading data. // @param equal_rows_per_shard - whether or not to get equal rows for each process. - TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows, + TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows, std::vector dataset_files_list, std::unique_ptr data_schema, int32_t op_connector_size, std::vector columns_to_load, bool shuffle_files, int32_t num_devices, int32_t device_id, bool equal_rows_per_shard); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc index 56c6ee2833..cbc005a77a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc @@ -47,7 +47,6 @@ const char kImageSetsExtension[] = ".txt"; VOCOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) { std::shared_ptr cfg = GlobalContext::config_manager(); builder_num_workers_ = cfg->num_parallel_workers(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); builder_task_type_ = TaskType::Segmentation; } @@ -78,8 +77,8 @@ Status VOCOp::Builder::Build(std::shared_ptr *ptr) { ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1))); } *ptr = std::make_shared(builder_task_type_, builder_usage_, builder_dir_, builder_labels_to_read_, - builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_, - builder_decode_, std::move(builder_schema_), std::move(builder_sampler_)); + builder_num_workers_, builder_op_connector_size_, builder_decode_, + std::move(builder_schema_), std::move(builder_sampler_)); return Status::OK(); } @@ -96,10 +95,9 @@ Status VOCOp::Builder::SanityCheck() { } VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, - const std::map &class_index, int32_t num_workers, int32_t rows_per_buffer, - int32_t queue_size, bool decode, std::unique_ptr data_schema, - std::shared_ptr sampler) - : MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer), + const std::map &class_index, int32_t num_workers, int32_t queue_size, bool decode, + std::unique_ptr data_schema, std::shared_ptr sampler) + : MappableLeafOp(num_workers, queue_size, std::move(sampler)), decode_(decode), task_type_(task_type), usage_(task_mode), diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h index 7ba853449e..34b34c0a8d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.h @@ -112,14 +112,6 @@ class VOCOp : public MappableLeafOp { return *this; } - // Setter method. - // @param int32_t rows_per_buffer - // @return Builder setter method returns reference to the builder. - Builder &SetRowsPerBuffer(int32_t rows_per_buffer) { - builder_rows_per_buffer_ = rows_per_buffer; - return *this; - } - // Setter method. // @param std::shared_ptr sampler // @return Builder setter method returns reference to the builder. @@ -164,14 +156,13 @@ class VOCOp : public MappableLeafOp { // @param std::string folder_path - dir directory of VOC // @param std::map class_index - input class-to-index of annotation // @param int32_t num_workers - number of workers reading images in parallel - // @param int32_t rows_per_buffer - number of images (rows) in each buffer // @param int32_t queue_size - connector queue size // @param bool decode - whether to decode images // @param std::unique_ptr data_schema - the schema of the VOC dataset // @param std::shared_ptr sampler - sampler tells VOCOp what to read VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path, - const std::map &class_index, int32_t num_workers, int32_t rows_per_buffer, - int32_t queue_size, bool decode, std::unique_ptr data_schema, std::shared_ptr sampler); + const std::map &class_index, int32_t num_workers, int32_t queue_size, bool decode, + std::unique_ptr data_schema, std::shared_ptr sampler); // Destructor ~VOCOp() = default; @@ -255,11 +246,9 @@ class VOCOp : public MappableLeafOp { bool decode_; int64_t row_cnt_; - int64_t buf_cnt_; std::string folder_path_; TaskType task_type_; std::string usage_; - int32_t rows_per_buffer_; std::unique_ptr data_schema_; std::vector image_ids_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc index d329cdd102..b76b953640 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.cc @@ -33,7 +33,6 @@ ZipOp::Builder::Builder() { // using the various builder set methods. std::shared_ptr cfg = GlobalContext::config_manager(); - builder_rows_per_buffer_ = cfg->rows_per_buffer(); builder_op_connector_size_ = cfg->op_connector_size(); } @@ -41,18 +40,13 @@ Status ZipOp::Builder::SanityCheck() const { return Status::OK(); } Status ZipOp::Builder::Build(std::shared_ptr *ptr) { RETURN_IF_NOT_OK(SanityCheck()); - *ptr = std::make_shared(builder_rows_per_buffer_, builder_op_connector_size_); + *ptr = std::make_shared(builder_op_connector_size_); return Status::OK(); } // Construct ZipOp here, local variables initialized in operator due to tree construction restrictions -ZipOp::ZipOp(int32_t rows_per_buffer, int32_t op_connector_size) - : PipelineOp(op_connector_size), - children_num_(0), - rows_per_buffer_(rows_per_buffer), - buffer_id_(0), - draining_(false), - eof_(false) {} +ZipOp::ZipOp(int32_t op_connector_size) + : PipelineOp(op_connector_size), children_num_(0), draining_(false), eof_(false) {} // destructor ZipOp::~ZipOp() {} diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h index 2591e5cf47..f14adc70fd 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/zip_op.h @@ -76,9 +76,8 @@ class ZipOp : public PipelineOp { }; // Constructor for ZipOp - // @param rows_per_buffer - number of rows in output buffer // @param op_connector_size - connector size - ZipOp(int32_t rows_per_buffer, int32_t op_connector_size); + explicit ZipOp(int32_t op_connector_size); // Destructor ~ZipOp(); @@ -136,8 +135,6 @@ class ZipOp : public PipelineOp { Status ComputeColMap() override; int32_t children_num_; - int32_t rows_per_buffer_; - int32_t buffer_id_; bool draining_; bool eof_; std::vector> child_iterators_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc index 2a1fb556ac..36aae6e9d2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc @@ -58,13 +58,13 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro // Helper function to inject a shuffle operator over top of current operator being built Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, - int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr *shuffle_op) { + int32_t connector_que_size, std::shared_ptr *shuffle_op) { std::shared_ptr new_shuffle_op = nullptr; int64_t shuffle_size = 0; RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size)); MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size; // Add the shuffle op - *shuffle_op = std::make_shared(shuffle_size, GetSeed(), connector_que_size, true, rows_per_buffer); + *shuffle_op = std::make_shared(shuffle_size, GetSeed(), connector_que_size, true); return Status::OK(); } @@ -231,7 +231,6 @@ DatasetNode::DatasetNode() // Fetch some default value from config manager std::shared_ptr cfg = GlobalContext::config_manager(); num_workers_ = cfg->num_parallel_workers(); - rows_per_buffer_ = cfg->rows_per_buffer(); connector_que_size_ = cfg->op_connector_size(); worker_connector_size_ = cfg->worker_connector_size(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h index 0695335288..43c81afa6c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h @@ -92,7 +92,7 @@ constexpr char kTFRecordNode[] = "TFRecordDataset"; constexpr char kVOCNode[] = "VOCDataset"; Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, - int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr *shuffle_op); + int32_t connector_que_size, std::shared_ptr *shuffle_op); // Helper function to validate dataset files parameter Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector &dataset_files); @@ -323,7 +323,6 @@ class DatasetNode : public std::enable_shared_from_this { std::shared_ptr cache_; int64_t dataset_size_; int32_t num_workers_; - int32_t rows_per_buffer_; int32_t connector_que_size_; int32_t worker_connector_size_; int32_t total_repeats_; // Number of times required to run this operator diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc index 85267c0785..dc5c3f9595 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/shuffle_node.cc @@ -44,8 +44,7 @@ void ShuffleNode::Print(std::ostream &out) const { // Function to build the ShuffleOp Status ShuffleNode::Build(std::vector> *const node_ops) { - auto op = std::make_shared(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_, - rows_per_buffer_); + auto op = std::make_shared(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_); op->set_total_repeats(GetTotalRepeats()); op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc index 8fdd53a93d..017847d1ae 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc @@ -74,8 +74,8 @@ Status AlbumNode::Build(std::vector> *const node_ops) std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - auto album_op = std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, decode_, - extensions, std::move(schema), std::move(sampler_rt)); + auto album_op = std::make_shared(num_workers_, dataset_dir_, connector_que_size_, decode_, extensions, + std::move(schema), std::move(sampler_rt)); album_op->set_total_repeats(GetTotalRepeats()); album_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(album_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc index 8f451f41a5..cd46800d4d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/celeba_node.cc @@ -69,8 +69,8 @@ Status CelebANode::Build(std::vector> *const node_ops std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - auto celeba_op = std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, - decode_, usage_, extensions_, std::move(schema), std::move(sampler_rt)); + auto celeba_op = std::make_shared(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_, + extensions_, std::move(schema), std::move(sampler_rt)); celeba_op->set_total_repeats(GetTotalRepeats()); celeba_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(celeba_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc index 991be365ce..bd459d24e1 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar100_node.cc @@ -66,9 +66,8 @@ Status Cifar100Node::Build(std::vector> *const node_o std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - auto cifar_op = - std::make_shared(CifarOp::CifarType::kCifar100, usage_, num_workers_, rows_per_buffer_, dataset_dir_, - connector_que_size_, std::move(schema), std::move(sampler_rt)); + auto cifar_op = std::make_shared(CifarOp::CifarType::kCifar100, usage_, num_workers_, dataset_dir_, + connector_que_size_, std::move(schema), std::move(sampler_rt)); cifar_op->set_total_repeats(GetTotalRepeats()); cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(cifar_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc index 3616a4a7b1..0ae429a7b2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cifar10_node.cc @@ -64,9 +64,8 @@ Status Cifar10Node::Build(std::vector> *const node_op std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - auto cifar_op = - std::make_shared(CifarOp::CifarType::kCifar10, usage_, num_workers_, rows_per_buffer_, dataset_dir_, - connector_que_size_, std::move(schema), std::move(sampler_rt)); + auto cifar_op = std::make_shared(CifarOp::CifarType::kCifar10, usage_, num_workers_, dataset_dir_, + connector_que_size_, std::move(schema), std::move(sampler_rt)); cifar_op->set_total_repeats(GetTotalRepeats()); cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(cifar_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc index a1b10b0f5c..e60e6f94b3 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/clue_node.cc @@ -177,8 +177,8 @@ Status CLUENode::Build(std::vector> *const node_ops) std::sort(sorted_dataset_files.begin(), sorted_dataset_files.end()); std::shared_ptr clue_op = - std::make_shared(num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, ck_map, - sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_); + std::make_shared(num_workers_, num_samples_, worker_connector_size_, ck_map, sorted_dataset_files, + connector_que_size_, shuffle_files, num_shards_, shard_id_); RETURN_IF_NOT_OK(clue_op->Init()); @@ -191,8 +191,8 @@ Status CLUENode::Build(std::vector> *const node_ops) RETURN_IF_NOT_OK(ClueOp::CountAllFileRows(sorted_dataset_files, &num_rows)); // Add the shuffle op after this op - RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, - rows_per_buffer_, &shuffle_op)); + RETURN_IF_NOT_OK( + AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); shuffle_op->set_total_repeats(GetTotalRepeats()); shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(shuffle_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc index 715c15e240..88ac78ffa4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/coco_node.cc @@ -123,8 +123,8 @@ Status CocoNode::Build(std::vector> *const node_ops) RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); std::shared_ptr op = - std::make_shared(task_type, dataset_dir_, annotation_file_, num_workers_, rows_per_buffer_, - connector_que_size_, decode_, std::move(schema), std::move(sampler_rt)); + std::make_shared(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_, + std::move(schema), std::move(sampler_rt)); op->set_total_repeats(GetTotalRepeats()); op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc index fd6471c455..99e999c04c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/csv_node.cc @@ -114,8 +114,8 @@ Status CSVNode::Build(std::vector> *const node_ops) { } std::shared_ptr csv_op = std::make_shared( - sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, rows_per_buffer_, - num_samples_, worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_); + sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, num_samples_, + worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_); RETURN_IF_NOT_OK(csv_op->Init()); @@ -128,8 +128,8 @@ Status CSVNode::Build(std::vector> *const node_ops) { RETURN_IF_NOT_OK(CsvOp::CountAllFileRows(sorted_dataset_files, column_names_.empty(), &num_rows)); // Add the shuffle op after this op - RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, - rows_per_buffer_, &shuffle_op)); + RETURN_IF_NOT_OK( + AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); shuffle_op->set_total_repeats(GetTotalRepeats()); shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(shuffle_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc index ad6d539a3d..258d3e9412 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/image_folder_node.cc @@ -72,9 +72,8 @@ Status ImageFolderNode::Build(std::vector> *const nod std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - auto op = - std::make_shared(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, recursive_, - decode_, exts_, class_indexing_, std::move(schema), std::move(sampler_rt)); + auto op = std::make_shared(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_, + class_indexing_, std::move(schema), std::move(sampler_rt)); op->set_total_repeats(GetTotalRepeats()); op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc index 2d33cc7567..36312d80e4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/manifest_node.cc @@ -94,8 +94,8 @@ Status ManifestNode::Build(std::vector> *const node_o std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - manifest_op = std::make_shared(num_workers_, rows_per_buffer_, dataset_file_, connector_que_size_, - decode_, class_index_, std::move(schema), std::move(sampler_rt), usage_); + manifest_op = std::make_shared(num_workers_, dataset_file_, connector_que_size_, decode_, class_index_, + std::move(schema), std::move(sampler_rt), usage_); manifest_op->set_total_repeats(GetTotalRepeats()); manifest_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(manifest_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc index c0e59d195f..e3fa2eca3a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc @@ -60,8 +60,8 @@ Status MnistNode::Build(std::vector> *const node_ops) std::shared_ptr sampler_rt = nullptr; RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); - auto op = std::make_shared(usage_, num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, - std::move(schema), std::move(sampler_rt)); + auto op = std::make_shared(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema), + std::move(sampler_rt)); op->set_total_repeats(GetTotalRepeats()); op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc index 315d220949..58e80b2a4d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/text_file_node.cc @@ -82,9 +82,9 @@ Status TextFileNode::Build(std::vector> *const node_o RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1))); // Create and initialize TextFileOp - std::shared_ptr text_file_op = std::make_shared( - num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, std::move(schema), sorted_dataset_files, - connector_que_size_, shuffle_files, num_shards_, shard_id_); + std::shared_ptr text_file_op = + std::make_shared(num_workers_, num_samples_, worker_connector_size_, std::move(schema), + sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_); RETURN_IF_NOT_OK(text_file_op->Init()); if (cache_ == nullptr && shuffle_ == ShuffleMode::kGlobal && !IsDescendantOfCache()) { @@ -96,8 +96,8 @@ Status TextFileNode::Build(std::vector> *const node_o RETURN_IF_NOT_OK(TextFileOp::CountAllFileRows(sorted_dataset_files, &num_rows)); // Add the shuffle op after this op - RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, - rows_per_buffer_, &shuffle_op)); + RETURN_IF_NOT_OK( + AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); shuffle_op->set_total_repeats(GetTotalRepeats()); shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(shuffle_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc index 2d2ce25144..db336343e6 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc @@ -124,8 +124,8 @@ Status TFRecordNode::Build(std::vector> *const node_o // Create and initialize TFReaderOp std::shared_ptr tf_reader_op = std::make_shared( - num_workers_, worker_connector_size_, rows_per_buffer_, num_samples_, sorted_dir_files, std::move(data_schema), - connector_que_size_, columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_); + num_workers_, worker_connector_size_, num_samples_, sorted_dir_files, std::move(data_schema), connector_que_size_, + columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_); RETURN_IF_NOT_OK(tf_reader_op->Init()); @@ -139,8 +139,7 @@ Status TFRecordNode::Build(std::vector> *const node_o RETURN_IF_NOT_OK(TFReaderOp::CountTotalRows(&num_rows, sorted_dir_files)); // Add the shuffle op after this op - RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_, - rows_per_buffer_, &shuffle_op)); + RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op)); shuffle_op->set_total_repeats(GetTotalRepeats()); shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(shuffle_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc index ab1a92c92a..8144dfec17 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/voc_node.cc @@ -112,8 +112,8 @@ Status VOCNode::Build(std::vector> *const node_ops) { RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); std::shared_ptr voc_op; - voc_op = std::make_shared(task_type_, usage_, dataset_dir_, class_index_, num_workers_, rows_per_buffer_, - connector_que_size_, decode_, std::move(schema), std::move(sampler_rt)); + voc_op = std::make_shared(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_, + decode_, std::move(schema), std::move(sampler_rt)); voc_op->set_total_repeats(GetTotalRepeats()); voc_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(voc_op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc index 79871503fe..c58ee30a66 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/sync_wait_node.cc @@ -43,11 +43,9 @@ void SyncWaitNode::Print(std::ostream &out) const { // Function to build the BarrierOp Status SyncWaitNode::Build(std::vector> *const node_ops) { - // Right now barrier should only take num_rows_per_buffer = 1 // The reason for this is because having it otherwise can lead to blocking issues // See barrier_op.h for more details - const int32_t rows_per_buffer = 1; - auto op = std::make_shared(rows_per_buffer, connector_que_size_, condition_name_, callback_); + auto op = std::make_shared(connector_que_size_, condition_name_, callback_); op->set_total_repeats(GetTotalRepeats()); op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(op); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc index 9aef58823f..88877773d1 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/zip_node.cc @@ -58,7 +58,7 @@ Status ZipNode::ValidateParams() { } Status ZipNode::Build(std::vector> *const node_ops) { - auto op = std::make_shared(rows_per_buffer_, connector_que_size_); + auto op = std::make_shared(connector_que_size_); op->set_total_repeats(GetTotalRepeats()); op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); node_ops->push_back(op); diff --git a/tests/ut/cpp/dataset/album_op_test.cc b/tests/ut/cpp/dataset/album_op_test.cc index d347dcb3f6..b28d3ee2e0 100644 --- a/tests/ut/cpp/dataset/album_op_test.cc +++ b/tests/ut/cpp/dataset/album_op_test.cc @@ -31,7 +31,7 @@ using mindspore::MsLogLevel::ERROR; using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; -std::shared_ptr Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); +std::shared_ptr Batch(int batch_size = 1, bool drop = false); std::shared_ptr Repeat(int repeat_cnt); @@ -42,10 +42,10 @@ std::shared_ptr Album(int64_t num_works, int64_t rows, int64_t conns, s std::shared_ptr so; AlbumOp::Builder builder; Status rc = builder.SetNumWorkers(num_works) - .SetAlbumDir(path) - .SetRowsPerBuffer(rows) - .SetOpConnectorSize(conns) - .SetExtensions({".json"}) + .SetAlbumDir(path) + + .SetOpConnectorSize(conns) + .SetExtensions({".json"}) .SetSampler(std::move(sampler)) .SetDecode(decode) .Build(&so); @@ -59,12 +59,12 @@ std::shared_ptr AlbumSchema(int64_t num_works, int64_t rows, int64_t co std::shared_ptr so; AlbumOp::Builder builder; Status rc = builder.SetNumWorkers(num_works) - .SetSchemaFile(schema_file) - .SetColumnsToLoad(column_names) - .SetAlbumDir(path) - .SetRowsPerBuffer(rows) - .SetOpConnectorSize(conns) - .SetExtensions({".json"}) + .SetSchemaFile(schema_file) + .SetColumnsToLoad(column_names) + .SetAlbumDir(path) + + .SetOpConnectorSize(conns) + .SetExtensions({".json"}) .SetSampler(std::move(sampler)) .SetDecode(decode) .Build(&so); @@ -180,8 +180,8 @@ TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) { EXPECT_OK(tensor_map["_priority"]->GetItemAt(&priority, {})); EXPECT_OK(tensor_map["id"]->GetItemAt(&id, {})); MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape" - << tensor_map["label"] << "priority: " << priority << " embedding : " - << tensor_map["_embedding"]->shape() << " id: " << id << "\n"; + << tensor_map["label"] << "priority: " << priority + << " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n"; i++; di.GetNextAsMap(&tensor_map); } diff --git a/tests/ut/cpp/dataset/batch_op_test.cc b/tests/ut/cpp/dataset/batch_op_test.cc index 2b65e8b3a8..3aac382005 100644 --- a/tests/ut/cpp/dataset/batch_op_test.cc +++ b/tests/ut/cpp/dataset/batch_op_test.cc @@ -34,7 +34,7 @@ class MindDataTestBatchOp : public UT::DatasetOpTesting { protected: }; -std::shared_ptr Batch(int32_t batch_size = 1, bool drop = false, int rows_per_buf = 2) { +std::shared_ptr Batch(int32_t batch_size = 1, bool drop = false) { Status rc; std::shared_ptr op; rc = de::BatchOp::Builder(batch_size).SetDrop(drop).Build(&op); @@ -50,10 +50,10 @@ std::shared_ptr Repeat(int repeat_cnt = 1) { return op; } -std::shared_ptr TFReader(std::string schema, int rows_per_buf = 2, int num_works = 8) { +std::shared_ptr TFReader(std::string schema, int num_works = 8) { std::shared_ptr so; de::TFReaderOp::Builder builder; - builder.SetDatasetFilesList({schema}).SetRowsPerBuffer(rows_per_buf).SetNumWorkers(num_works); + builder.SetDatasetFilesList({schema}).SetNumWorkers(num_works); Status rc = builder.Build(&so); return so; } @@ -111,7 +111,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) { bool success = false; auto op1 = TFReader(schema_file); auto op2 = Repeat(2); - auto op3 = Batch(7, true, 99); + auto op3 = Batch(7, true); op1->set_total_repeats(2); op1->set_num_repeats_per_epoch(2); auto tree = Build({op1, op2, op3}); @@ -161,7 +161,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) { bool success = false; auto op1 = TFReader(schema_file); auto op2 = Repeat(2); - auto op3 = Batch(7, false, 99); + auto op3 = Batch(7, false); op1->set_total_repeats(2); op1->set_num_repeats_per_epoch(2); auto tree = Build({op1, op2, op3}); @@ -217,7 +217,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) { std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; bool success = false; auto op1 = TFReader(schema_file); - auto op2 = Batch(7, false, 99); + auto op2 = Batch(7, false); auto op3 = Repeat(2); op1->set_total_repeats(2); op1->set_num_repeats_per_epoch(2); @@ -270,7 +270,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) { std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data"; bool success = false; auto op1 = TFReader(schema_file); - auto op2 = Batch(5, true, 99); + auto op2 = Batch(5, true); auto op3 = Repeat(2); op1->set_total_repeats(2); op1->set_num_repeats_per_epoch(2); diff --git a/tests/ut/cpp/dataset/cache_op_test.cc b/tests/ut/cpp/dataset/cache_op_test.cc index 44af34c222..afa69efffc 100644 --- a/tests/ut/cpp/dataset/cache_op_test.cc +++ b/tests/ut/cpp/dataset/cache_op_test.cc @@ -253,7 +253,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) { // RandomDataOp std::shared_ptr myRandomDataOp; rc = RandomDataOp::Builder() - .SetRowsPerBuffer(4) + .SetNumWorkers(4) .SetDataSchema(std::move(testSchema)) .SetTotalRows(50) // 50 samples for now @@ -277,7 +277,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) { rc = CacheOp::Builder() .SetNumWorkers(5) .SetClient(myClient) - .SetRowsPerBuffer(1) + .SetSampler(std::move(seq_sampler)) .Build(&myCacheOp); ASSERT_TRUE(rc.IsOk()); @@ -379,7 +379,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) { // RandomDataOp std::shared_ptr myRandomDataOp; rc = RandomDataOp::Builder() - .SetRowsPerBuffer(2) + .SetNumWorkers(4) .SetDataSchema(std::move(testSchema)) .SetTotalRows(10) @@ -401,7 +401,6 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) { rc = CacheOp::Builder() .SetNumWorkers(4) .SetClient(myClient) - .SetRowsPerBuffer(3) .SetSampler(std::move(seq_sampler)) .Build(&myCacheOp); ASSERT_TRUE(rc.IsOk()); @@ -484,7 +483,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) { ImageFolderOp::Builder builder; builder.SetOpConnectorSize(3) .SetNumWorkers(3) - .SetRowsPerBuffer(2) + .SetExtensions({".jpg", ".JPEG"}) .SetRecursive(true) .SetImageFolderDir(datasets_root_path_ + "/testPK/data"); diff --git a/tests/ut/cpp/dataset/celeba_op_test.cc b/tests/ut/cpp/dataset/celeba_op_test.cc index 202a6a8c95..9d69b8bcda 100644 --- a/tests/ut/cpp/dataset/celeba_op_test.cc +++ b/tests/ut/cpp/dataset/celeba_op_test.cc @@ -26,41 +26,45 @@ #include "securec.h" using namespace mindspore::dataset; -using mindspore::MsLogLevel::ERROR; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::ERROR; std::shared_ptr Repeat(int repeat_cnt); std::shared_ptr Build(std::vector> ops); -std::shared_ptr Celeba(int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, - const std::string &dir, std::shared_ptr sampler = nullptr, - bool decode = false, const std::string &dataset_type = "all") { +std::shared_ptr Celeba(int32_t num_workers, int32_t queue_size, const std::string &dir, + std::shared_ptr sampler = nullptr, bool decode = false, + const std::string &dataset_type = "all") { std::shared_ptr so; CelebAOp::Builder builder; Status rc = builder.SetNumWorkers(num_workers) .SetCelebADir(dir) - .SetRowsPerBuffer(rows_per_buffer) .SetOpConnectorSize(queue_size) .SetSampler(std::move(sampler)) .SetDecode(decode) - .SetUsage(dataset_type).Build(&so); + .SetUsage(dataset_type) + .Build(&so); return so; } class MindDataTestCelebaDataset : public UT::DatasetOpTesting { -protected: + protected: }; TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) { std::string dir = datasets_root_path_ + "/testCelebAData/"; - uint32_t expect_labels[4][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, - {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, - {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, - {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}}; + uint32_t expect_labels[4][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}}; uint32_t count = 0; - auto tree = Build({Celeba(16, 2, 32, dir)}); + auto tree = Build({Celeba(16, 2, dir)}); tree->Prepare(); Status rc = tree->Launch(); if (rc.IsError()) { @@ -86,16 +90,24 @@ TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) { TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) { std::string dir = datasets_root_path_ + "/testCelebAData/"; - uint32_t expect_labels[8][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, - {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, - {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, - {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, - {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}, - {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, - {0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1}, - {0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}}; + uint32_t expect_labels[8][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}, + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, + {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, + 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}}; uint32_t count = 0; - auto op1 = Celeba(16, 2, 32, dir); + auto op1 = Celeba(16, 2, dir); auto op2 = Repeat(2); auto tree = Build({op1, op2}); op1->set_total_repeats(2); @@ -131,7 +143,7 @@ TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) { 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}}; std::string dir = datasets_root_path_ + "/testCelebAData/"; uint32_t count = 0; - auto tree = Build({Celeba(16, 2, 32, dir, std::move(sampler))}); + auto tree = Build({Celeba(16, 2, dir, std::move(sampler))}); tree->Prepare(); Status rc = tree->Launch(); if (rc.IsError()) { diff --git a/tests/ut/cpp/dataset/cifar_op_test.cc b/tests/ut/cpp/dataset/cifar_op_test.cc index caa77a3aed..f807174a5b 100644 --- a/tests/ut/cpp/dataset/cifar_op_test.cc +++ b/tests/ut/cpp/dataset/cifar_op_test.cc @@ -47,7 +47,7 @@ std::shared_ptr Cifarop(uint64_t num_works, uint64_t rows, uint64_t con CifarOp::Builder builder; Status rc = builder.SetNumWorkers(num_works) .SetCifarDir(path) - .SetRowsPerBuffer(rows) + .SetOpConnectorSize(conns) .SetSampler(std::move(sampler)) .SetCifarType(cifar10) diff --git a/tests/ut/cpp/dataset/client_config_test.cc b/tests/ut/cpp/dataset/client_config_test.cc index a22d9f6b51..3dbe94defa 100644 --- a/tests/ut/cpp/dataset/client_config_test.cc +++ b/tests/ut/cpp/dataset/client_config_test.cc @@ -45,20 +45,17 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) { std::shared_ptr my_conf = GlobalContext::config_manager(); ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers); - ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer); ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize); ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize); ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed); my_conf->set_num_parallel_workers(2); - my_conf->set_rows_per_buffer(1); my_conf->set_worker_connector_size(3); my_conf->set_op_connector_size(4); my_conf->set_seed(5); ASSERT_EQ(my_conf->num_parallel_workers(), 2); - ASSERT_EQ(my_conf->rows_per_buffer(), 1); ASSERT_EQ(my_conf->worker_connector_size(), 3); ASSERT_EQ(my_conf->op_connector_size(), 4); ASSERT_EQ(my_conf->seed(), 5); @@ -67,7 +64,6 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) { ASSERT_TRUE(my_conf->LoadFile(file)); ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers); - ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer); ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize); ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize); ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed); diff --git a/tests/ut/cpp/dataset/clue_op_test.cc b/tests/ut/cpp/dataset/clue_op_test.cc index e9be170cd6..5433a84e56 100644 --- a/tests/ut/cpp/dataset/clue_op_test.cc +++ b/tests/ut/cpp/dataset/clue_op_test.cc @@ -50,7 +50,7 @@ TEST_F(MindDataTestCLUEOp, TestCLUEBasic) { std::shared_ptr op; ClueOp::Builder builder; builder.SetClueFilesList({dataset_path}) - .SetRowsPerBuffer(16) + .SetOpConnectorSize(2) .SetColsKeyMap(key_map); diff --git a/tests/ut/cpp/dataset/coco_op_test.cc b/tests/ut/cpp/dataset/coco_op_test.cc index 4b2f1c7bcd..7910ef8678 100644 --- a/tests/ut/cpp/dataset/coco_op_test.cc +++ b/tests/ut/cpp/dataset/coco_op_test.cc @@ -43,7 +43,7 @@ using mindspore::MsLogLevel::ERROR; using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; -std::shared_ptr Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); +std::shared_ptr Batch(int batch_size = 1, bool drop = false); std::shared_ptr Build(std::vector> ops); diff --git a/tests/ut/cpp/dataset/concat_op_test.cc b/tests/ut/cpp/dataset/concat_op_test.cc index 301d8be560..e1730b22d6 100644 --- a/tests/ut/cpp/dataset/concat_op_test.cc +++ b/tests/ut/cpp/dataset/concat_op_test.cc @@ -51,7 +51,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) { // TFReaderOp1 std::shared_ptr my_tfreader_op1; TFReaderOp::Builder builder1; - builder1.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); + builder1.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); std::unique_ptr schema1 = std::make_unique(); schema1->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {}); builder1.SetDataSchema(std::move(schema1)); @@ -63,7 +63,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) { // TFReaderOp2 std::shared_ptr my_tfreader_op2; TFReaderOp::Builder builder2; - builder2.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); + builder2.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); std::unique_ptr schema2 = std::make_unique(); schema2->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {}); builder2.SetDataSchema(std::move(schema2)); diff --git a/tests/ut/cpp/dataset/csv_op_test.cc b/tests/ut/cpp/dataset/csv_op_test.cc index 8e5f8ebddc..40aa6eec03 100644 --- a/tests/ut/cpp/dataset/csv_op_test.cc +++ b/tests/ut/cpp/dataset/csv_op_test.cc @@ -52,7 +52,7 @@ TEST_F(MindDataTestCSVOp, TestCSVBasic) { std::shared_ptr op; CsvOp::Builder builder; builder.SetCsvFilesList({dataset_path}) - .SetRowsPerBuffer(16) + .SetShuffleFiles(false) .SetOpConnectorSize(2) .SetFieldDelim(',') diff --git a/tests/ut/cpp/dataset/execution_tree_test.cc b/tests/ut/cpp/dataset/execution_tree_test.cc index b871dd00d8..87fd5cb785 100644 --- a/tests/ut/cpp/dataset/execution_tree_test.cc +++ b/tests/ut/cpp/dataset/execution_tree_test.cc @@ -44,19 +44,15 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) { uint32_t shuffle_size = 32; uint32_t connector_size = 8; - - std::shared_ptr leaf_op1 = - std::make_shared(shuffle_size, 0, connector_size, false, 32); + std::shared_ptr leaf_op1 = std::make_shared(shuffle_size, 0, connector_size, false); ASSERT_NE(leaf_op1, nullptr); my_tree->AssociateNode(leaf_op1); shuffle_size = 16; - std::shared_ptr leaf_op2 = - std::make_shared(shuffle_size, 0, connector_size, false, 32); + std::shared_ptr leaf_op2 = std::make_shared(shuffle_size, 0, connector_size, false); ASSERT_NE(leaf_op2, nullptr); my_tree->AssociateNode(leaf_op2); shuffle_size = 8; - std::shared_ptr parent_op = - std::make_shared(shuffle_size, 0, connector_size, false, 32); + std::shared_ptr parent_op = std::make_shared(shuffle_size, 0, connector_size, false); ASSERT_NE(parent_op, nullptr); my_tree->AssociateNode(parent_op); @@ -68,8 +64,7 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) { parent_op->AddChild(std::move(leaf_op1)); parent_op->AddChild(std::move(leaf_op2)); shuffle_size = 4; - std::shared_ptr root_op = - std::make_shared(shuffle_size, 0, connector_size, false, 32); + std::shared_ptr root_op = std::make_shared(shuffle_size, 0, connector_size, false); my_tree->AssignRoot(root_op); root_op->AddChild(parent_op); ASSERT_NE(root_op, nullptr); @@ -105,10 +100,10 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree2) { std::string dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data"; std::shared_ptr my_tfreader_op; TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(2) - .SetNumWorkers(2) + .SetDatasetFilesList({dataset_path}) + + .SetWorkerConnectorSize(2) + .SetNumWorkers(2) .Build(&my_tfreader_op); my_tree->AssociateNode(my_tfreader_op); diff --git a/tests/ut/cpp/dataset/image_folder_op_test.cc b/tests/ut/cpp/dataset/image_folder_op_test.cc index 8383563440..de9c6eb550 100644 --- a/tests/ut/cpp/dataset/image_folder_op_test.cc +++ b/tests/ut/cpp/dataset/image_folder_op_test.cc @@ -40,7 +40,7 @@ using mindspore::LogStream; using mindspore::ExceptionType::NoExceptionType; using mindspore::MsLogLevel::ERROR; -std::shared_ptr Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); +std::shared_ptr Batch(int batch_size = 1, bool drop = false); std::shared_ptr Repeat(int repeat_cnt); @@ -53,7 +53,7 @@ std::shared_ptr ImageFolder(int64_t num_works, int64_t rows, int6 ImageFolderOp::Builder builder; Status rc = builder.SetNumWorkers(num_works) .SetImageFolderDir(path) - .SetRowsPerBuffer(rows) + .SetOpConnectorSize(conns) .SetExtensions({".jpg", ".JPEG"}) .SetSampler(std::move(sampler)) diff --git a/tests/ut/cpp/dataset/ir_callback_test.cc b/tests/ut/cpp/dataset/ir_callback_test.cc index 229918f8e9..1fe30e493c 100644 --- a/tests/ut/cpp/dataset/ir_callback_test.cc +++ b/tests/ut/cpp/dataset/ir_callback_test.cc @@ -156,7 +156,7 @@ TEST_F(MindDataTestCallback, TestBasicCallback) { ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape); ASSERT_OK(schema->AddColumn(col)); std::shared_ptr leaf; - rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf); + rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf); EXPECT_TRUE(rc.IsOk()); // config mapOp std::shared_ptr map_op; @@ -208,7 +208,7 @@ TEST_F(MindDataTestCallback, TestMultiEpochCallback) { ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape); ASSERT_OK(schema->AddColumn(col)); std::shared_ptr leaf; - rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); + rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); EXPECT_TRUE(rc.IsOk()); // config mapOp std::shared_ptr map_op; @@ -273,7 +273,7 @@ TEST_F(MindDataTestCallback, TestSelectedCallback) { ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape); ASSERT_OK(schema->AddColumn(col)); std::shared_ptr leaf; - rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); + rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf); EXPECT_TRUE(rc.IsOk()); // config mapOp std::shared_ptr map_op; diff --git a/tests/ut/cpp/dataset/manifest_op_test.cc b/tests/ut/cpp/dataset/manifest_op_test.cc index dbc45345ab..ae6114eadf 100644 --- a/tests/ut/cpp/dataset/manifest_op_test.cc +++ b/tests/ut/cpp/dataset/manifest_op_test.cc @@ -46,9 +46,14 @@ std::shared_ptr Manifest(int32_t num_works, int32_t rows, int32_t co std::map map = {}, bool decode = false) { std::shared_ptr so; ManifestOp::Builder builder; - Status rc = builder.SetNumWorkers(num_works).SetManifestFile(file).SetRowsPerBuffer( - rows).SetOpConnectorSize(conns).SetSampler(std::move(sampler)).SetClassIndex(map).SetDecode(decode) - .SetUsage(usage).Build(&so); + Status rc = builder.SetNumWorkers(num_works) + .SetManifestFile(file) + .SetOpConnectorSize(conns) + .SetSampler(std::move(sampler)) + .SetClassIndex(map) + .SetDecode(decode) + .SetUsage(usage) + .Build(&so); return so; } diff --git a/tests/ut/cpp/dataset/map_op_test.cc b/tests/ut/cpp/dataset/map_op_test.cc index 64dd8bc15a..791170fc9c 100644 --- a/tests/ut/cpp/dataset/map_op_test.cc +++ b/tests/ut/cpp/dataset/map_op_test.cc @@ -110,7 +110,7 @@ class MindDataTestMapOp : public UT::DatasetOpTesting { TFReaderOp::Builder builder; builder.SetDatasetFilesList({dataset_path_}) .SetColumnsToLoad({"image", "label", "A", "B"}) - .SetRowsPerBuffer(2) + .SetWorkerConnectorSize(2) .SetNumWorkers(2); @@ -516,7 +516,7 @@ TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) { TFReaderOp::Builder sobuilder; sobuilder.SetDatasetFilesList({dataset_path_}) .SetColumnsToLoad({"image", "label"}) - .SetRowsPerBuffer(2) + .SetWorkerConnectorSize(2) .SetNumWorkers(2); rc = sobuilder.Build(&my_tfreader_op); diff --git a/tests/ut/cpp/dataset/mind_record_op_test.cc b/tests/ut/cpp/dataset/mind_record_op_test.cc index 81da99453f..caa884bf7b 100644 --- a/tests/ut/cpp/dataset/mind_record_op_test.cc +++ b/tests/ut/cpp/dataset/mind_record_op_test.cc @@ -29,12 +29,11 @@ namespace common = mindspore::common; using namespace mindspore::dataset; -using mindspore::MsLogLevel::INFO; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::INFO; -class MindDataTestMindRecordOp : public UT::DatasetOpTesting { -}; +class MindDataTestMindRecordOp : public UT::DatasetOpTesting {}; TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) { // single MindRecord op and nothing else @@ -63,10 +62,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsOk()); @@ -134,11 +132,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordSample) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list) - .SetOperators(operators); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list) + .SetOperators(operators); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsOk()); @@ -206,11 +203,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordShuffle) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list) - .SetOperators(operators); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list) + .SetOperators(operators); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsOk()); @@ -281,11 +277,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordCategory) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list) - .SetOperators(operators); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list) + .SetOperators(operators); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsOk()); @@ -350,10 +345,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsOk()); @@ -364,8 +358,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { uint32_t num_repeats = 2; std::shared_ptr my_repeat_op; - rc = RepeatOp::Builder(num_repeats) - .Build(&my_repeat_op); + rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_repeat_op); EXPECT_TRUE(rc.IsOk()); @@ -375,7 +368,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { rc = my_repeat_op->AddChild(my_mindrecord_op); EXPECT_TRUE(rc.IsOk()); - // Set children/root layout. rc = my_tree->AssignRoot(my_repeat_op); EXPECT_TRUE(rc.IsOk()); @@ -407,7 +399,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) { } } - TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { // single MindRecord op and nothing else // @@ -435,10 +426,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsOk()); @@ -449,8 +439,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) { uint32_t num_repeats = 2; std::shared_ptr my_repeat_op; - rc = RepeatOp::Builder(num_repeats) - .Build(&my_repeat_op); + rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_repeat_op); EXPECT_TRUE(rc.IsOk()); @@ -518,10 +507,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordInvalidColumnList) { std::shared_ptr my_mindrecord_op; MindRecordOp::Builder builder; builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"}) - .SetLoadDataset(true) - .SetRowsPerBuffer(3) - .SetNumMindRecordWorkers(4) - .SetColumnsToLoad(column_list); + .SetLoadDataset(true) + .SetNumMindRecordWorkers(4) + .SetColumnsToLoad(column_list); rc = builder.Build(&my_mindrecord_op); ASSERT_TRUE(rc.IsError()); ASSERT_TRUE(rc.ToString().find_first_of("illegal column list") != std::string::npos); diff --git a/tests/ut/cpp/dataset/mnist_op_test.cc b/tests/ut/cpp/dataset/mnist_op_test.cc index 814bc687f5..13e6a23b21 100644 --- a/tests/ut/cpp/dataset/mnist_op_test.cc +++ b/tests/ut/cpp/dataset/mnist_op_test.cc @@ -42,7 +42,7 @@ using mindspore::MsLogLevel::ERROR; using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; -std::shared_ptr Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); +std::shared_ptr Batch(int batch_size = 1, bool drop = false); std::shared_ptr Repeat(int repeat_cnt); @@ -57,7 +57,7 @@ std::shared_ptr CreateMnist(int64_t num_wrks, int64_t rows, int64_t con MnistOp::Builder builder; Status rc = builder.SetNumWorkers(num_wrks) .SetDir(path) - .SetRowsPerBuffer(rows) + .SetOpConnectorSize(conns) .SetSampler(std::move(sampler)) .Build(&so); diff --git a/tests/ut/cpp/dataset/project_op_test.cc b/tests/ut/cpp/dataset/project_op_test.cc index 1f5d24cf7b..5c8fdabb2d 100644 --- a/tests/ut/cpp/dataset/project_op_test.cc +++ b/tests/ut/cpp/dataset/project_op_test.cc @@ -41,7 +41,7 @@ TEST_F(MindDataTestProjectOp, TestProjectProject) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); + builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); diff --git a/tests/ut/cpp/dataset/random_data_op_test.cc b/tests/ut/cpp/dataset/random_data_op_test.cc index ac1a5013fe..694418728b 100644 --- a/tests/ut/cpp/dataset/random_data_op_test.cc +++ b/tests/ut/cpp/dataset/random_data_op_test.cc @@ -76,11 +76,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) { std::shared_ptr myRandomDataOp; RandomDataOp::Builder builder; - rc = builder.SetRowsPerBuffer(2) - .SetNumWorkers(1) - .SetDataSchema(std::move(testSchema)) - .SetTotalRows(25) - .Build(&myRandomDataOp); + rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(25).Build(&myRandomDataOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myRandomDataOp); @@ -134,9 +130,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) { std::shared_ptr myRandomDataOp; RandomDataOp::Builder builder; - rc = builder.SetRowsPerBuffer(2) - .SetNumWorkers(1) - .Build(&myRandomDataOp); + rc = builder.SetNumWorkers(1).Build(&myRandomDataOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myRandomDataOp); @@ -171,11 +165,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) { std::shared_ptr myRandomDataOp; RandomDataOp::Builder builder; - rc = builder.SetRowsPerBuffer(2) - .SetNumWorkers(1) - .SetDataSchema(std::move(testSchema)) - .SetTotalRows(10) - .Build(&myRandomDataOp); + rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myRandomDataOp); @@ -235,11 +225,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) { std::shared_ptr myRandomDataOp; RandomDataOp::Builder builder; - rc = builder.SetRowsPerBuffer(2) - .SetNumWorkers(1) - .SetDataSchema(std::move(testSchema)) - .SetTotalRows(10) - .Build(&myRandomDataOp); + rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myRandomDataOp); @@ -315,11 +301,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) { std::shared_ptr myRandomDataOp; RandomDataOp::Builder builder; - rc = builder.SetRowsPerBuffer(2) - .SetNumWorkers(4) - .SetDataSchema(std::move(testSchema)) - .SetTotalRows(10) - .Build(&myRandomDataOp); + rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myRandomDataOp); @@ -395,11 +377,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { std::shared_ptr myRandomDataOp; RandomDataOp::Builder builder; - rc = builder.SetRowsPerBuffer(2) - .SetNumWorkers(4) - .SetDataSchema(std::move(testSchema)) - .SetTotalRows(10) - .Build(&myRandomDataOp); + rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myRandomDataOp); @@ -407,9 +385,9 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) { std::shared_ptr myShuffleOp; rc = ShuffleOp::Builder() - .SetRowsPerBuffer(2) - .SetShuffleSize(4) - .Build(&myShuffleOp); + + .SetShuffleSize(4) + .Build(&myShuffleOp); EXPECT_TRUE(rc.IsOk()); rc = myTree->AssociateNode(myShuffleOp); EXPECT_TRUE(rc.IsOk()); diff --git a/tests/ut/cpp/dataset/rename_op_test.cc b/tests/ut/cpp/dataset/rename_op_test.cc index acb22d4e7b..bd884ff18e 100644 --- a/tests/ut/cpp/dataset/rename_op_test.cc +++ b/tests/ut/cpp/dataset/rename_op_test.cc @@ -53,10 +53,10 @@ TEST_F(MindDataTestRenameOp, TestRenameOpDefault) { std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data"; std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(16) - .SetNumWorkers(1) + .SetDatasetFilesList({dataset_path}) + + .SetWorkerConnectorSize(16) + .SetNumWorkers(1) .Build(&my_tfreader_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_tfreader_op); diff --git a/tests/ut/cpp/dataset/sentence_piece_vocab_op_test.cc b/tests/ut/cpp/dataset/sentence_piece_vocab_op_test.cc index 6db89eb7b7..f906d5d16e 100644 --- a/tests/ut/cpp/dataset/sentence_piece_vocab_op_test.cc +++ b/tests/ut/cpp/dataset/sentence_piece_vocab_op_test.cc @@ -46,7 +46,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) { std::shared_ptr file_op; TextFileOp::Builder builder_file; - builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2); + builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2); Status rc = builder_file.Build(&file_op); ASSERT_TRUE(rc.IsOk()); @@ -119,7 +119,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) { std::shared_ptr file_op; TextFileOp::Builder builder_file; - builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2); + builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2); Status rc = builder_file.Build(&file_op); ASSERT_TRUE(rc.IsOk()); diff --git a/tests/ut/cpp/dataset/shuffle_op_test.cc b/tests/ut/cpp/dataset/shuffle_op_test.cc index 6bde46a90e..81515c973f 100644 --- a/tests/ut/cpp/dataset/shuffle_op_test.cc +++ b/tests/ut/cpp/dataset/shuffle_op_test.cc @@ -56,16 +56,16 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) { dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data"; std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(16) - .SetNumWorkers(1) + .SetDatasetFilesList({dataset_path}) + + .SetWorkerConnectorSize(16) + .SetNumWorkers(1) .Build(&my_tfreader_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_tfreader_op); EXPECT_TRUE(rc.IsOk()); std::shared_ptr my_shuffle_op; - rc = ShuffleOp::Builder().SetRowsPerBuffer(2).SetShuffleSize(4).Build(&my_shuffle_op); + rc = ShuffleOp::Builder().SetShuffleSize(4).Build(&my_shuffle_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_shuffle_op); EXPECT_TRUE(rc.IsOk()); @@ -130,7 +130,6 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) { std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(3) .SetWorkerConnectorSize(16) .SetNumWorkers(2) .Build(&my_tfreader_op); @@ -138,7 +137,7 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) { rc = my_tree->AssociateNode(my_tfreader_op); EXPECT_TRUE(rc.IsOk()); std::shared_ptr my_shuffle_op; - rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).SetRowsPerBuffer(3).Build(&my_shuffle_op); + rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).Build(&my_shuffle_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_shuffle_op); EXPECT_TRUE(rc.IsOk()); @@ -201,14 +200,13 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic3) { std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(3) .SetWorkerConnectorSize(16) .SetNumWorkers(2) .Build(&my_tfreader_op); EXPECT_TRUE(rc.IsOk()); my_tree->AssociateNode(my_tfreader_op); std::shared_ptr my_shuffle_op; - rc = ShuffleOp::Builder().SetShuffleSize(100).SetRowsPerBuffer(3).Build(&my_shuffle_op); + rc = ShuffleOp::Builder().SetShuffleSize(100).Build(&my_shuffle_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_shuffle_op); EXPECT_TRUE(rc.IsOk()); @@ -275,7 +273,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) { std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(3) .SetWorkerConnectorSize(16) .SetNumWorkers(2) .Build(&my_tfreader_op); @@ -286,7 +283,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) { rc = ShuffleOp::Builder() .SetShuffleSize(4) .SetShuffleSeed(100) - .SetRowsPerBuffer(3) .SetReshuffleEachEpoch(true) .Build(&my_shuffle_op); EXPECT_TRUE(rc.IsOk()); diff --git a/tests/ut/cpp/dataset/skip_op_test.cc b/tests/ut/cpp/dataset/skip_op_test.cc index e5a9811c52..40ee70905a 100644 --- a/tests/ut/cpp/dataset/skip_op_test.cc +++ b/tests/ut/cpp/dataset/skip_op_test.cc @@ -35,7 +35,7 @@ TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); + builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); diff --git a/tests/ut/cpp/dataset/take_op_test.cc b/tests/ut/cpp/dataset/take_op_test.cc index 9b49c39201..ea78cdb02b 100644 --- a/tests/ut/cpp/dataset/take_op_test.cc +++ b/tests/ut/cpp/dataset/take_op_test.cc @@ -42,7 +42,7 @@ TEST_F(MindDataTestTakeOp, TestTakeProject) { // TFReaderOp std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); + builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); diff --git a/tests/ut/cpp/dataset/text_file_op_test.cc b/tests/ut/cpp/dataset/text_file_op_test.cc index 09cd1b770f..f8b79a6bb7 100644 --- a/tests/ut/cpp/dataset/text_file_op_test.cc +++ b/tests/ut/cpp/dataset/text_file_op_test.cc @@ -45,7 +45,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileBasic) { std::shared_ptr op; TextFileOp::Builder builder; - builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2); + builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2); Status rc = builder.Build(&op); ASSERT_TRUE(rc.IsOk()); @@ -94,7 +94,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileFileNotExist) { std::shared_ptr op; TextFileOp::Builder builder; - builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2); + builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2); Status rc = builder.Build(&op); ASSERT_TRUE(rc.IsOk()); diff --git a/tests/ut/cpp/dataset/tfReader_op_test.cc b/tests/ut/cpp/dataset/tfReader_op_test.cc index 4422a81728..546fffdd28 100644 --- a/tests/ut/cpp/dataset/tfReader_op_test.cc +++ b/tests/ut/cpp/dataset/tfReader_op_test.cc @@ -44,7 +44,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16); + builder.SetDatasetFilesList({dataset_path}); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); @@ -148,7 +148,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(1); + builder.SetDatasetFilesList({dataset_path}); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); @@ -200,7 +200,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1).SetRowsPerBuffer(16); + builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); @@ -252,10 +252,11 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(16) - .SetNumWorkers(1); - std::unique_ptr schema = std::make_unique(); + builder + .SetDatasetFilesList({dataset_path}) + + .SetNumWorkers(1); + std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); Status rc = builder.Build(&my_tfreader_op); @@ -307,7 +308,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) { // TFReaderOp std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16); + builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {}); builder.SetDataSchema(std::move(schema)); @@ -378,7 +379,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; builder.SetDatasetFilesList({dataset_path + "/test.data"}) - .SetRowsPerBuffer(16) + .SetNumWorkers(16) .SetDataSchema(std::move(data_schema)); Status rc = builder.Build(&my_tfreader_op); @@ -605,7 +606,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16); + builder.SetDatasetFilesList({dataset_path}); Status rc = builder.Build(&my_tfreader_op); ASSERT_TRUE(rc.IsOk()); @@ -697,7 +698,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) { std::shared_ptr my_tfreader_op; TFReaderOp::Builder builder; - builder.SetDatasetFilesList({invalid_file, valid_file, schema_file}).SetRowsPerBuffer(16); + builder.SetDatasetFilesList({invalid_file, valid_file, schema_file}); std::unique_ptr schema = std::make_unique(); schema->LoadSchemaFile(schema_file, {}); @@ -706,7 +707,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) { Status rc = builder.Build(&my_tfreader_op); ASSERT_TRUE(!rc.IsOk()); - builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file}).SetRowsPerBuffer(16); + builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file}); schema = std::make_unique(); schema->LoadSchemaFile(schema_file, {}); diff --git a/tests/ut/cpp/dataset/voc_op_test.cc b/tests/ut/cpp/dataset/voc_op_test.cc index 2bafbddf6d..ef004b93ad 100644 --- a/tests/ut/cpp/dataset/voc_op_test.cc +++ b/tests/ut/cpp/dataset/voc_op_test.cc @@ -39,11 +39,11 @@ namespace common = mindspore::common; using namespace mindspore::dataset; -using mindspore::MsLogLevel::ERROR; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::ERROR; -std::shared_ptr Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2); +std::shared_ptr Batch(int batch_size = 1, bool drop = false); std::shared_ptr Build(std::vector> ops); @@ -61,8 +61,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) { std::string task_mode("train"); std::shared_ptr my_voc_op; VOCOp::Builder builder; - Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode) - .Build(&my_voc_op); + Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op); ASSERT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_voc_op); @@ -87,7 +86,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) { while (!tensor_list.empty()) { MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; - //Display the tensor by calling the printer on it + // Display the tensor by calling the printer on it for (int i = 0; i < tensor_list.size(); i++) { std::ostringstream ss; ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; @@ -112,8 +111,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) { std::string task_mode("train"); std::shared_ptr my_voc_op; VOCOp::Builder builder; - Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode) - .Build(&my_voc_op); + Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op); ASSERT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_voc_op); @@ -138,7 +136,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) { while (!tensor_list.empty()) { MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; - //Display the tensor by calling the printer on it + // Display the tensor by calling the printer on it for (int i = 0; i < tensor_list.size(); i++) { std::ostringstream ss; ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; @@ -168,9 +166,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) { std::shared_ptr my_voc_op; VOCOp::Builder builder; Status rc = - builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode) - .SetClassIndex(class_index) - .Build(&my_voc_op); + builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).SetClassIndex(class_index).Build(&my_voc_op); ASSERT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_voc_op); @@ -195,7 +191,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) { while (!tensor_list.empty()) { MS_LOG(DEBUG) << "Row display for row #: " << row_count << "."; - //Display the tensor by calling the printer on it + // Display the tensor by calling the printer on it for (int i = 0; i < tensor_list.size(); i++) { std::ostringstream ss; ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl; diff --git a/tests/ut/cpp/dataset/zip_op_test.cc b/tests/ut/cpp/dataset/zip_op_test.cc index 4a7d34374d..858c6d8179 100644 --- a/tests/ut/cpp/dataset/zip_op_test.cc +++ b/tests/ut/cpp/dataset/zip_op_test.cc @@ -36,22 +36,21 @@ namespace common = mindspore::common; using namespace mindspore::dataset; -using mindspore::MsLogLevel::INFO; -using mindspore::ExceptionType::NoExceptionType; using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::INFO; -class MindDataTestZipOp : public UT::DatasetOpTesting { - }; +class MindDataTestZipOp : public UT::DatasetOpTesting {}; TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { -/* Tree: - * - * - * OpId(2) ZipOp - * / \ - * OpId(0) TFReaderOp OpId(1) TFReaderOp - * Start with an empty execution tree -*/ + /* Tree: + * + * + * OpId(2) ZipOp + * / \ + * OpId(0) TFReaderOp OpId(1) TFReaderOp + * Start with an empty execution tree + */ Status rc; MS_LOG(INFO) << "UT test TestZipBasic."; auto my_tree = std::make_shared(); @@ -61,21 +60,19 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data"; std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(16) - .SetNumWorkers(1) - .Build(&my_tfreader_op); + .SetDatasetFilesList({dataset_path}) + .SetWorkerConnectorSize(16) + .SetNumWorkers(1) + .Build(&my_tfreader_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_tfreader_op); EXPECT_TRUE(rc.IsOk()); std::shared_ptr my_tfreader_op2; rc = TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path2}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(1) - .SetNumWorkers(1) - .Build(&my_tfreader_op2); + .SetDatasetFilesList({dataset_path2}) + .SetWorkerConnectorSize(1) + .SetNumWorkers(1) + .Build(&my_tfreader_op2); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_tfreader_op2); EXPECT_TRUE(rc.IsOk()); @@ -123,20 +120,19 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) { EXPECT_TRUE(rc.IsOk()); row_count++; } - ASSERT_EQ(row_count, 3); // Should be 3 rows fetched + ASSERT_EQ(row_count, 3); // Should be 3 rows fetched } - TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { -/* Tree: - * OpId(3) Repeat(3) - * - * OpId(2) ZipOp - * / \ - * OpId(0) TFReaderOp OpId(1) TFReaderOp - * - * Start with an empty execution tree -*/ + /* Tree: + * OpId(3) Repeat(3) + * + * OpId(2) ZipOp + * / \ + * OpId(0) TFReaderOp OpId(1) TFReaderOp + * + * Start with an empty execution tree + */ Status rc; MS_LOG(INFO) << "UT test TestZipRepeat."; auto my_tree = std::make_shared(); @@ -146,21 +142,21 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data"; std::shared_ptr my_tfreader_op; rc = TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(16) - .SetNumWorkers(1) - .Build(&my_tfreader_op); + .SetDatasetFilesList({dataset_path}) + + .SetWorkerConnectorSize(16) + .SetNumWorkers(1) + .Build(&my_tfreader_op); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_tfreader_op); EXPECT_TRUE(rc.IsOk()); std::shared_ptr my_tfreader_op2; rc = TFReaderOp::Builder() - .SetDatasetFilesList({dataset_path2}) - .SetRowsPerBuffer(2) - .SetWorkerConnectorSize(1) - .SetNumWorkers(1) - .Build(&my_tfreader_op2); + .SetDatasetFilesList({dataset_path2}) + + .SetWorkerConnectorSize(1) + .SetNumWorkers(1) + .Build(&my_tfreader_op2); EXPECT_TRUE(rc.IsOk()); rc = my_tree->AssociateNode(my_tfreader_op2); EXPECT_TRUE(rc.IsOk()); @@ -221,5 +217,5 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) { EXPECT_TRUE(rc.IsOk()); row_count++; } - ASSERT_EQ(row_count, 9); // Should be 9 rows fetched + ASSERT_EQ(row_count, 9); // Should be 9 rows fetched } diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py index 41f0035201..fdeab39e3b 100644 --- a/tests/ut/python/dataset/test_config.py +++ b/tests/ut/python/dataset/test_config.py @@ -43,21 +43,18 @@ def test_basic(): ds.config.load('../data/dataset/declient.cfg') - # assert ds.config.get_rows_per_buffer() == 32 assert ds.config.get_num_parallel_workers() == 8 # assert ds.config.get_worker_connector_size() == 16 assert ds.config.get_prefetch_size() == 16 assert ds.config.get_seed() == 5489 assert ds.config.get_monitor_sampling_interval() == 15 - # ds.config.set_rows_per_buffer(1) ds.config.set_num_parallel_workers(2) # ds.config.set_worker_connector_size(3) ds.config.set_prefetch_size(4) ds.config.set_seed(5) ds.config.set_monitor_sampling_interval(45) - # assert ds.config.get_rows_per_buffer() == 1 assert ds.config.get_num_parallel_workers() == 2 # assert ds.config.get_worker_connector_size() == 3 assert ds.config.get_prefetch_size() == 4