!2143 dataset: remove storage_op c++ part

Merge pull request !2143 from ms_yan/del_storage_c++
pull/2143/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 3401e1c80b

@ -48,7 +48,6 @@ namespace dataset {
using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr<DatasetOp> *);
static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {
{kStorage, &DEPipeline::ParseStorageOp},
{kShuffle, &DEPipeline::ParseShuffleOp},
{kMindrecord, &DEPipeline::ParseMindRecordOp},
{kMap, &DEPipeline::ParseMapOp},
@ -301,70 +300,6 @@ Status DEPipeline::SetBatchParameters(const py::dict &args) {
return Status::OK();
}
Status DEPipeline::ValidateArgStorageOp(const py::dict &args) {
// Required arguments
if (((args.contains("dataset_files") && args["dataset_files"].is_none()) || args["schema"].is_none()) &&
((args.contains("dataset_dir") && args["dataset_dir"].is_none()) ||
(args["schema"].is_none() && args["schema_json_string"].is_none()))) {
std::string err_msg = "Error: at least one of dataset_files or schema_file is missing";
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
Status DEPipeline::ParseStorageOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
RETURN_IF_NOT_OK(ValidateArgStorageOp(args));
std::shared_ptr<StorageOp::Builder> builder;
if (args.contains("dataset_files") && !args["dataset_files"].is_none()) {
builder = std::make_shared<StorageOp::Builder>();
(void)builder->SetDatasetFileList(ToStringVector(args["dataset_files"]));
(void)builder->SetSchemaFile(ToString(args["schema"]));
} else if (args.contains("dataset_dir") && !args["dataset_dir"].is_none()) {
builder = std::make_shared<StorageOp::Builder>();
(void)builder->SetDatasetFilesDir(ToString(args["dataset_dir"]));
if (!args["schema"].is_none()) {
(void)builder->SetSchemaFile(ToString(args["schema"]));
} else if (!args["schema_json_string"].is_none()) {
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
std::string s = ToString(args["schema_json_string"]);
RETURN_IF_NOT_OK(schema->LoadSchemaString(s, std::vector<std::string>()));
(void)builder->SetNumRows(schema->num_rows());
(void)builder->SetSchema(std::move(schema));
}
}
// Optional arguments
for (auto arg : args) {
std::string key = py::str(arg.first);
py::handle value = arg.second;
if (!value.is_none()) {
if (key == "num_parallel_workers") {
(void)builder->SetNumWorkers(ToInt(value));
} else if (key == "prefetch_size") {
(void)builder->SetOpConnectorSize(ToInt(value));
} else if (key == "columns_list") {
(void)builder->SetColumnsToLoad(ToStringVector(value));
} else if (key == "distribution") {
(void)builder->SetDataDistributionFile(ToString(value));
} else if (key == "labels_filename") {
(void)builder->setLabelsFileName(ToString(value));
} else if (key == "dataset_usage") {
(void)builder->SetDatasetUsage(ToString(value));
}
}
}
(void)builder->SetBatchSize(temp_batch_size_);
(void)builder->SetDropRemainder(temp_drop_remainder_);
std::shared_ptr<StorageOp> op;
RETURN_IF_NOT_OK(builder->Build(&op));
num_rows_ = op->num_rows();
num_classes_ = op->num_classes();
*ptr = op;
return Status::OK();
}
Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
std::shared_ptr<ShuffleOp::Builder> builder = std::make_shared<ShuffleOp::Builder>();
if (!args["buffer_size"].is_none()) {

@ -37,7 +37,6 @@ using DsOpPtr = std::shared_ptr<DatasetOp>;
// enum for the dataset operator names
enum OpName {
kStorage = 0,
kShuffle,
kMindrecord,
kBatch,
@ -105,8 +104,6 @@ class DEPipeline {
int GetRepeatCount() const;
Status ParseStorageOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
Status ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
Status ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
@ -181,9 +178,6 @@ class DEPipeline {
std::unique_ptr<DatasetIterator> iterator_;
// Validate required args passed to storage op.
Status ValidateArgStorageOp(const py::dict &args);
static Status ParsePadInfo(py::handle value, PadInfo *pad_info);
int batch_size_;

@ -826,7 +826,6 @@ PYBIND11_MODULE(_c_dataengine, m) {
(void)py::class_<DatasetOp, std::shared_ptr<DatasetOp>>(m, "DatasetOp");
(void)py::enum_<OpName>(m, "OpName", py::arithmetic())
.value("STORAGE", OpName::kStorage)
.value("SHUFFLE", OpName::kShuffle)
.value("BATCH", OpName::kBatch)
.value("BUCKETBATCH", OpName::kBucketBatch)

@ -39,7 +39,6 @@
#include "dataset/engine/datasetops/shuffle_op.h"
#include "dataset/engine/datasetops/source/generator_op.h"
#include "dataset/engine/datasetops/source/mindrecord_op.h"
#include "dataset/engine/datasetops/source/storage_op.h"
#include "dataset/engine/datasetops/source/tf_reader_op.h"
#include "dataset/engine/datasetops/take_op.h"
#include "dataset/engine/datasetops/zip_op.h"

@ -17,8 +17,6 @@
#include "dataset/util/allocator.h"
#include "dataset/core/global_context.h"
#include "dataset/core/tensor.h"
#include "dataset/engine/datasetops/source/storage_client.h"
#include "dataset/engine/datasetops/source/tf_buffer.h"
namespace mindspore {
namespace dataset {
@ -26,37 +24,6 @@ namespace dataset {
// Description: This is the main constructor that is used for making a buffer
DataBuffer::DataBuffer(int32_t id, BufferFlags flags) : buffer_id_(id), tensor_table_(nullptr), buffer_flags_(flags) {}
// Name: CreateDataBuffer()
// Description: A static factory method to create the appropriate type of derived class
// buffer. Returns the base class reference for DataBuffer.
Status DataBuffer::CreateDataBuffer(
int32_t id, // In: The id for the new buffer
std::shared_ptr<StorageClient> storage_client, // In: The storage client that is related to this buffer type
std::unique_ptr<DataBuffer> *ptr) {
std::unique_ptr<DataBuffer> new_data_buffer;
try {
DatasetType ds_type = storage_client->schema()->dataset_type();
switch (ds_type) {
case DatasetType::kTf: {
// This type of buffer is for TF record data.
// Allocate derived class version for a TF buffers
new_data_buffer = std::make_unique<TFBuffer>(id, kDeBFlagNone, storage_client);
break;
}
default: {
std::string errMsg("Invalid buffer type");
RETURN_STATUS_UNEXPECTED(errMsg);
}
}
} catch (std::bad_alloc &e) {
return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__, e.what());
} catch (std::exception &e) {
RETURN_STATUS_UNEXPECTED(e.what());
}
*ptr = std::move(new_data_buffer);
return Status::OK();
}
// Name: print()
// Description: A function that prints info about the DataBuffer (base class version)
void DataBuffer::Print(std::ostream &out, // In: The output stream to print to

@ -29,9 +29,6 @@
namespace mindspore {
namespace dataset {
// Forward declares
class StorageClient;
// The DataBuffer class is a base class that will represent the data for n values based
// on a unique row id for each row of data.
// There can be different types of DataBuffers to abstract over how the data is stored
@ -53,14 +50,6 @@ class DataBuffer {
// Destructor
virtual ~DataBuffer();
// Name: CreateDataBuffer()
// Description: A factory method to create the appropriate type of derived class
// buffer. Returns the base class reference for DataBuffer.
static Status CreateDataBuffer(
int32_t id, // In: The id for the new buffer
std::shared_ptr<StorageClient>, // In: The StorageClient is used to choose the buffer type to create
std::unique_ptr<DataBuffer> *);
// Name: print()
// Description: A function that prints info about the DataBuffer (base class version)
virtual void Print(std::ostream &out, // In: The output stream to print to

@ -53,7 +53,7 @@ class IteratorBase {
// messages are encountered (such as eoe or eof), then an empty TensorRow is returned back.
// @return Status - The error code return
// @note The position of a Tensor/column might be different from the initial column order
// in the storageOp. User must be aware that MapOp, ZipOps, and others might change
// in corresponding Dataset Op. User must be aware that MapOp, ZipOps, and others might change
// the column ordering.
virtual Status FetchNextTensorRow(TensorRow *out_row);

@ -40,7 +40,7 @@ class ConcatOp : public PipelineOp {
~Builder() = default;
// The builder "build" method creates the final object.
// @return shared_ptr to the new StorageOp object
// @return shared_ptr to the new ConcatOp object
Status Build(std::shared_ptr<ConcatOp> *);
private:

@ -40,7 +40,7 @@ class ProjectOp : public PipelineOp {
~Builder() = default;
// The builder "build" method creates the final object.
// @return shared_ptr to the new StorageOp object.
// @return shared_ptr to the new ProjectOp object.
Status Build(std::shared_ptr<ProjectOp> *);
private:

@ -67,7 +67,7 @@ class RenameOp : public PipelineOp {
}
// The builder "build" method creates the ZipOp dataset Operator.
// @return shared_ptr to the new StorageOp object
// @return shared_ptr to the new RenameOp object
Status Build(std::shared_ptr<RenameOp> *);
private:

@ -42,7 +42,7 @@ class RepeatOp : public PipelineOp {
~Builder() = default;
// The builder "build" method creates the final object.
// @return shared_ptr to the new StorageOp object
// @return shared_ptr to the new RepeatOp object
Status Build(std::shared_ptr<RepeatOp> *);
private:

@ -101,7 +101,7 @@ class ShuffleOp : public PipelineOp {
}
// The builder "build" method creates the final object.
// @return shared_ptr to the new StorageOp object
// @return shared_ptr to the new ShuffleOp object
Status Build(std::shared_ptr<ShuffleOp> *);
private:

@ -37,7 +37,7 @@ class SkipOp : public PipelineOp {
~Builder() = default;
// The builder "build" method creates the final object.
// @return shared_ptr to the new StorageOp object
// @return shared_ptr to the new SkipOp object
Status Build(std::shared_ptr<SkipOp> *);
private:

@ -5,10 +5,6 @@ add_library(engine-datasetops-source OBJECT
generator_op.cc
io_block.cc
mindrecord_op.cc
storage_client.cc
storage_op.cc
tf_buffer.cc
tf_client.cc
tf_reader_op.cc
image_folder_op.cc
mnist_op.cc

@ -25,7 +25,7 @@
namespace mindspore {
namespace dataset {
GeneratorOp::Builder::Builder() {
// Some arguments to the StorageOp constructor have a default argument that is taken
// Some arguments to the GeneratorOp constructor have a default argument that is taken
// from the client config.
build_buffer_size_ = kCfgRowsPerBuffer;
build_op_connector_size_ = kCfgOpConnectorSize;

@ -72,7 +72,7 @@ class GeneratorOp : public PipelineOp {
}
// The builder "build" method creates the final object.
// @return shared_ptr to the new StorageOp object
// @return shared_ptr to the new GeneratorOp object
Status Build(std::shared_ptr<GeneratorOp> *);
private:

@ -198,7 +198,7 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp {
// @param show_all
void Print(std::ostream &out, bool show_all) const override;
// This function is a hack! It is to return the num_class and num_rows the old storageOp does. The result
// This function is a hack! It is to return the num_class and num_rows. The result
// returned by this function may not be consistent with what image_folder_op is going to return
// user this at your own risk!
static Status CountRowsAndClasses(const std::string &path, const std::set<std::string> &exts, int64_t *num_rows,

@ -44,7 +44,7 @@ using mindrecord::ShardReader;
MindRecordOp::Builder::Builder() : build_dataset_file_({}) {
// Some arguments to the MindRecordOp constructor have a default argument that is taken
// from the client config.
// The user may choose to change these values for the construction of the StorageOp by
// The user may choose to change these values for the construction of the MindRecordOp by
// using the various builder set methods.
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();

@ -45,7 +45,7 @@ class PythonSampler : public Sampler {
Status ResetSampler() override;
// Op calls this to get next Buffer that contains all the sampleIds
// @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to StorageOp
// @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to corresponding Dataset Op
// @param int32_t workerId - not meant to be used
// @return - The error code return
Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;

@ -38,7 +38,7 @@ class RandomAccessOp {
// @return - The error code return
Status GetNumRowsInDataset(int64_t *num_rows) const;
// sampler gets label , imageIds from storageOp, this function is unique to PK
// sampler gets label , imageIds from corresponding Dataset Op, this function is unique to PK
// @param std::map<int64_t, std::vector<int64_t>> * map
// @return - The error code return
virtual Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *map) const {

@ -44,7 +44,7 @@ class SequentialSampler : public Sampler {
Status ResetSampler() override;
// Op calls this to get next Buffer that contains all the sampleIds
// @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to StorageOp
// @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to corresponding Dataset Op
// @param int32_t workerId - not meant to be used
// @return - The error code return
Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;

@ -1,190 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define MAX_INTEGER_INT32 2147483647
#include <iostream>
#include <memory>
#include <utility>
#include <nlohmann/json.hpp>
#include "dataset/core/constants.h"
#include "dataset/engine/datasetops/source/storage_client.h"
#include "dataset/engine/datasetops/source/storage_op.h"
#include "dataset/engine/datasetops/source/tf_client.h"
#include "dataset/util/status.h"
namespace mindspore {
namespace dataset {
// Name: Constructor
// Description:
StorageClient::StorageClient(std::unique_ptr<DataSchema> schema, // In: The schema for this storage client.
StorageOp *store_op) // In: The StorageOp that's using this client
: data_schema_(std::move(schema)), num_rows_in_dataset_(0), storage_op_(store_op), num_classes_(0) {}
// Name: Print()
// Description: A function that prints info about the StorageClient
// In: The output stream to print to
void StorageClient::Print(std::ostream &out) const {
// not much to show here folks!
// out << "Storage client:\n";
}
// This is a local-only static function to drive the switch statement for creating
// the storage client (not a static member function)
static Status CreateStorageClientSwitch(
std::unique_ptr<DataSchema> schema, // In: The schema to set into the client
StorageOp *store_op, // In: The StorageOp we are operating on
std::shared_ptr<StorageClient> *out_client) { // Out: the created storage client
switch (schema->dataset_type()) {
case DatasetType::kArrow: {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
"Storage client not implemented yet for arrow dataset type.");
}
case DatasetType::kTf: {
// Construct the derived class TFClient, stored as base class StorageClient
store_op->set_rows_per_buffer(32);
*out_client = std::make_unique<TFClient>(std::move(schema), store_op);
break;
}
case DatasetType::kUnknown:
default: {
RETURN_STATUS_UNEXPECTED("Invalid dataset type.");
}
}
if (*out_client) {
RETURN_IF_NOT_OK((*out_client)->Init());
}
return Status::OK();
}
// Name: CreateStorageClient()
// Description: A factory method to create the derived storage client.
// Every dataset has a required field for the dataset type in a config
// file. This type will determine the child class to return for the
// type of storage client. It also creates the schema and sticks it
// into the cache.
Status StorageClient::CreateStorageClient(
StorageOp *store_op, // In: A backpointer to the owning cache for this client.
std::string dataset_schema_path, // In: The path to the schema
std::shared_ptr<StorageClient> *out_client) { // Out: the created storage client
// Make a new schema first. This only assigns the dataset type. It does not
// create the columns yet.
auto new_schema = std::make_unique<DataSchema>();
RETURN_IF_NOT_OK(new_schema->LoadDatasetType(dataset_schema_path));
RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client));
return Status::OK();
}
// Name: CreateStorageClient()
// Description: A factory method to create the derived storage client.
// This creator is a user-override for the schema properties where
// the user has input the layout of the data (typically used in testcases)
Status StorageClient::CreateStorageClient(
StorageOp *store_op, // In: A backpointer to the owning cache for this client.
DatasetType in_type, // In: The type of dataset
std::shared_ptr<StorageClient> *out_client) { // Out: the created storage client
// The dataset type is passed in by the user. Create an empty schema with only
// only the dataset type filled in and then create the client with it.
auto new_schema = std::make_unique<DataSchema>();
new_schema->set_dataset_type(in_type);
RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client));
return Status::OK();
}
// Name: LoadDatasetLayout()
// Description: There are 2 ways to define the properties of the data in the storage
// layer: LoadDatasetLayout() and AssignDatasetLayout().
// LoadDatasetLayout() will parse the json config file that comes with
// the dataset.
Status StorageClient::LoadDatasetLayout() {
// Access the json file to populate our schema, assume the json file is accessible
// locally.
RETURN_IF_NOT_OK(data_schema_->LoadSchemaFile(storage_op_->schema_file(), storage_op_->columns_to_load()));
// The number of rows in the schema file is an optional config. For example,
// maybe the derived storage client will know how to determine the total number
// of rows a different way rather than having it in the schema config json file.
// Thus, mNumRowsInDataset can still be zero and force the derived class override
// to determine it another way.
uint32_t num_rows = 0;
RETURN_IF_NOT_OK(this->numRowsFromFile(num_rows));
CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647");
if (num_rows_in_dataset_ == 0 || num_rows < num_rows_in_dataset_) {
num_rows_in_dataset_ = num_rows;
}
return Status::OK();
}
// Name: AssignDatasetLayout()
// Description: There are 2 ways to define the properties of the data in the storage
// layer: LoadDatasetLayout() and AssignDatasetLayout().
// AssignDatasetLayout() will take input from the caller and assign that
// info into the storage client.
Status StorageClient::AssignDatasetLayout(uint32_t num_rows, // In: The number of rows in the dataset
const DataSchema &schema) { // In: The schema for the dataset
// Since this is just an assignment into the storage client, you probably won't need
// to override this one in a derived class. First some sanity checks
CHECK_FAIL_RETURN_UNEXPECTED(data_schema_->dataset_type() == schema.dataset_type(),
"Assigning a schema into StorageClient with mismatched dataset types!");
CHECK_FAIL_RETURN_UNEXPECTED(data_schema_->NumColumns() == 0,
"Assigning a schema into StorageClient that already has non-empty schema!");
// The current schema was just an empty one with only the dataset field populated.
// Let's copy construct a new one that will be a copy of the input schema (releasing the old
// one) and then set the number of rows that the user requested.
data_schema_ = std::make_unique<DataSchema>(schema);
CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647");
num_rows_in_dataset_ = num_rows;
return Status::OK();
}
// Name: numRowsFromFile()
// Description: Reads the schema json file to see if the optional numRows field has
// been set and returns it.
Status StorageClient::numRowsFromFile(uint32_t &num_rows) const {
std::string schemaFile = storage_op_->schema_file();
try {
std::ifstream in(schemaFile);
nlohmann::json js;
in >> js;
if (js.find("numRows") == js.end()) {
num_rows = MAX_INTEGER_INT32;
} else {
num_rows = js.value("numRows", 0);
}
if (num_rows == 0) {
std::string err_msg =
"Storage client has not properly done dataset "
"handshake to initialize schema and number of rows.";
RETURN_STATUS_UNEXPECTED(err_msg);
}
}
// Catch any exception and rethrow it as our own
catch (const std::exception &err) {
std::ostringstream ss;
ss << "Schema file failed to load:\n" << err.what();
std::string err_msg = ss.str();
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
// Get'r function
DataSchema *StorageClient::schema() const { return data_schema_.get(); }
} // namespace dataset
} // namespace mindspore

@ -1,128 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_
#define DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "dataset/engine/data_schema.h"
#include "dataset/engine/datasetops/source/storage_op.h"
#include "dataset/util/status.h"
namespace mindspore {
namespace dataset {
// The Storage Client is the interface and base class that the StorageOp
// will use to perform any interactions with the storage layer.
// The different types of datasets will have different derived classes
// under that storage client super class.
class StorageClient {
public:
// Name: Constructor
// Description:
StorageClient(std::unique_ptr<DataSchema> schema, // In: The schema for this storage client.
StorageOp *store_op); // In: The StorageOp that's using this client
// Destructor
virtual ~StorageClient() { storage_op_ = nullptr; }
virtual Status Init() { return Status::OK(); }
// Name: CreateStorageClient()
// Description: A factory method to create the derived storage client.
// Every dataset has a required field for the dataset type in a config
// file. This type will determine the child class to return for the
// type of storage client.
static Status CreateStorageClient(StorageOp *store_op, // In: A backpointer to the owning storage op for this client.
std::string dataset_schema_path, // In: The path to the dataset
std::shared_ptr<StorageClient> *out_client); // Out: the created storage client
// Name: CreateStorageClient()
// Description: A factory method to create the derived storage client.
// This creator is a user-override for the schema properties where
// the user has input the layout of the data (typically used in testcases)
static Status CreateStorageClient(StorageOp *store_op, // In: A backpointer to the owning cache for this client.
DatasetType in_type, // In: The type of dataset
std::shared_ptr<StorageClient> *out_client); // Out: the created storage client
// Name: Print()
// Description: A function that prints info about the StorageClient
virtual void Print(std::ostream &out) const; // In: The output stream to print to
// Provide stream operator for displaying
friend std::ostream &operator<<(std::ostream &out, const StorageClient &storage_client) {
storage_client.Print(out);
return out;
}
// Name: LoadDatasetLayout()
// Description: There are 2 ways to define the properties of the data in the storage
// layer: LoadDatasetLayout() and AssignDatasetLayout().
// LoadDatasetLayout() will parse the json config file that comes with
// the dataset and internally populate row counts and schema.
virtual Status LoadDatasetLayout();
// Name: AssignDatasetLayout()
// Description: There are 2 ways to define the properties of the data in the storage
// layer: LoadDatasetLayout() and AssignDatasetLayout().
// AssignDatasetLayout() will take input from the caller and assign that
virtual Status AssignDatasetLayout(uint32_t num_rows, // In: The number of rows in the dataset
const DataSchema &schema); // In: The schema for the dataset
// Name: Reset()
// Description: Resets any state info inside the client back to it's initialized
// state.
virtual Status Reset() = 0;
// Name: IsMoreData
// Description: General routine to ask if more data exists in the storage side for
// a given buffer id.
virtual bool IsMoreData(uint32_t id) { return true; }
// Name: numRowsFromFile()
// Description: Reads the schema json file to see if the optional numRows field has
// been set and returns it.
Status numRowsFromFile(uint32_t &num_rows) const;
// Get'r functions
DataSchema *schema() const;
uint32_t num_rows() const { return num_rows_in_dataset_; }
// Name: rows_per_buffer()
// Description: This default version simply gives you the count of the requested
// rows per buffer that the user defined in the storage op.
// However, if some condition down in the storage client layers
// could result in a buffer that has a different number of rows,
// then the derived class can override this method to provide their
// own implementation.
virtual uint32_t rows_per_buffer() { return storage_op_->rows_per_buffer(); }
// Description: Get the label classes num. Only manifest and Imagenet dataset support this parameter
virtual uint32_t num_classes() const { return 0; }
protected:
std::unique_ptr<DataSchema> data_schema_; // The schema for the data
uint32_t num_rows_in_dataset_; // The number of rows in the dataset
StorageOp *storage_op_; // Back pointer to the owning storage operator.
std::vector<std::string> col_names_;
uint32_t num_classes_;
};
} // namespace dataset
} // namespace mindspore
#endif // DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save