!2143 dataset: remove storage_op c++ part

Merge pull request !2143 from ms_yan/del_storage_c++
5 years ago · 3401e1c80b
parent 228a1b87f8 cf82aa9035
commit 3401e1c80b
48 changed files with 202 additions and 2882 deletions
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@ -48,7 +48,6 @@ namespace dataset {
 using pFunction = Status (DEPipeline::*)(const py::dict &, std::shared_ptr<DatasetOp> *);

 static std::unordered_map<uint32_t, pFunction> g_parse_op_func_ = {
-  {kStorage, &DEPipeline::ParseStorageOp},
  {kShuffle, &DEPipeline::ParseShuffleOp},
  {kMindrecord, &DEPipeline::ParseMindRecordOp},
  {kMap, &DEPipeline::ParseMapOp},
@ -301,70 +300,6 @@ Status DEPipeline::SetBatchParameters(const py::dict &args) {
  return Status::OK();
 }

-Status DEPipeline::ValidateArgStorageOp(const py::dict &args) {
-  // Required arguments
-  if (((args.contains("dataset_files") && args["dataset_files"].is_none()) || args["schema"].is_none()) &&
-      ((args.contains("dataset_dir") && args["dataset_dir"].is_none()) ||
-       (args["schema"].is_none() && args["schema_json_string"].is_none()))) {
-    std::string err_msg = "Error: at least one of dataset_files or schema_file is missing";
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
-
-  return Status::OK();
-}
-
-Status DEPipeline::ParseStorageOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
-  RETURN_IF_NOT_OK(ValidateArgStorageOp(args));
-  std::shared_ptr<StorageOp::Builder> builder;
-  if (args.contains("dataset_files") && !args["dataset_files"].is_none()) {
-    builder = std::make_shared<StorageOp::Builder>();
-    (void)builder->SetDatasetFileList(ToStringVector(args["dataset_files"]));
-    (void)builder->SetSchemaFile(ToString(args["schema"]));
-  } else if (args.contains("dataset_dir") && !args["dataset_dir"].is_none()) {
-    builder = std::make_shared<StorageOp::Builder>();
-    (void)builder->SetDatasetFilesDir(ToString(args["dataset_dir"]));
-    if (!args["schema"].is_none()) {
-      (void)builder->SetSchemaFile(ToString(args["schema"]));
-    } else if (!args["schema_json_string"].is_none()) {
-      std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
-      std::string s = ToString(args["schema_json_string"]);
-      RETURN_IF_NOT_OK(schema->LoadSchemaString(s, std::vector<std::string>()));
-      (void)builder->SetNumRows(schema->num_rows());
-      (void)builder->SetSchema(std::move(schema));
-    }
-  }
-
-  // Optional arguments
-  for (auto arg : args) {
-    std::string key = py::str(arg.first);
-    py::handle value = arg.second;
-    if (!value.is_none()) {
-      if (key == "num_parallel_workers") {
-        (void)builder->SetNumWorkers(ToInt(value));
-      } else if (key == "prefetch_size") {
-        (void)builder->SetOpConnectorSize(ToInt(value));
-      } else if (key == "columns_list") {
-        (void)builder->SetColumnsToLoad(ToStringVector(value));
-      } else if (key == "distribution") {
-        (void)builder->SetDataDistributionFile(ToString(value));
-      } else if (key == "labels_filename") {
-        (void)builder->setLabelsFileName(ToString(value));
-      } else if (key == "dataset_usage") {
-        (void)builder->SetDatasetUsage(ToString(value));
-      }
-    }
-  }
-  (void)builder->SetBatchSize(temp_batch_size_);
-  (void)builder->SetDropRemainder(temp_drop_remainder_);
-
-  std::shared_ptr<StorageOp> op;
-  RETURN_IF_NOT_OK(builder->Build(&op));
-  num_rows_ = op->num_rows();
-  num_classes_ = op->num_classes();
-  *ptr = op;
-  return Status::OK();
-}
-
 Status DEPipeline::ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr) {
  std::shared_ptr<ShuffleOp::Builder> builder = std::make_shared<ShuffleOp::Builder>();
  if (!args["buffer_size"].is_none()) {
--- a/mindspore/ccsrc/dataset/api/de_pipeline.h
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.h
@ -37,7 +37,6 @@ using DsOpPtr = std::shared_ptr<DatasetOp>;

 // enum for the dataset operator names
 enum OpName {
-  kStorage = 0,
  kShuffle,
  kMindrecord,
  kBatch,
@ -105,8 +104,6 @@ class DEPipeline {

  int GetRepeatCount() const;

-  Status ParseStorageOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
-
  Status ParseShuffleOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);

  Status ParseMindRecordOp(const py::dict &args, std::shared_ptr<DatasetOp> *ptr);
@ -181,9 +178,6 @@ class DEPipeline {

  std::unique_ptr<DatasetIterator> iterator_;

-  // Validate required args passed to storage op.
-  Status ValidateArgStorageOp(const py::dict &args);
-
  static Status ParsePadInfo(py::handle value, PadInfo *pad_info);

  int batch_size_;
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@ -826,7 +826,6 @@ PYBIND11_MODULE(_c_dataengine, m) {
  (void)py::class_<DatasetOp, std::shared_ptr<DatasetOp>>(m, "DatasetOp");

  (void)py::enum_<OpName>(m, "OpName", py::arithmetic())
-    .value("STORAGE", OpName::kStorage)
    .value("SHUFFLE", OpName::kShuffle)
    .value("BATCH", OpName::kBatch)
    .value("BUCKETBATCH", OpName::kBucketBatch)
--- a/mindspore/ccsrc/dataset/core/client.h
+++ b/mindspore/ccsrc/dataset/core/client.h
@ -39,7 +39,6 @@
 #include "dataset/engine/datasetops/shuffle_op.h"
 #include "dataset/engine/datasetops/source/generator_op.h"
 #include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/storage_op.h"
 #include "dataset/engine/datasetops/source/tf_reader_op.h"
 #include "dataset/engine/datasetops/take_op.h"
 #include "dataset/engine/datasetops/zip_op.h"
--- a/mindspore/ccsrc/dataset/engine/data_buffer.cc
+++ b/mindspore/ccsrc/dataset/engine/data_buffer.cc
@ -17,8 +17,6 @@
 #include "dataset/util/allocator.h"
 #include "dataset/core/global_context.h"
 #include "dataset/core/tensor.h"
-#include "dataset/engine/datasetops/source/storage_client.h"
-#include "dataset/engine/datasetops/source/tf_buffer.h"

 namespace mindspore {
 namespace dataset {
@ -26,37 +24,6 @@ namespace dataset {
 // Description: This is the main constructor that is used for making a buffer
 DataBuffer::DataBuffer(int32_t id, BufferFlags flags) : buffer_id_(id), tensor_table_(nullptr), buffer_flags_(flags) {}

-// Name: CreateDataBuffer()
-// Description: A static factory method to create the appropriate type of derived class
-//              buffer.  Returns the base class reference for DataBuffer.
-Status DataBuffer::CreateDataBuffer(
-  int32_t id,                                     // In: The id for the new buffer
-  std::shared_ptr<StorageClient> storage_client,  // In: The storage client that is related to this buffer type
-  std::unique_ptr<DataBuffer> *ptr) {
-  std::unique_ptr<DataBuffer> new_data_buffer;
-  try {
-    DatasetType ds_type = storage_client->schema()->dataset_type();
-    switch (ds_type) {
-      case DatasetType::kTf: {
-        // This type of buffer is for TF record data.
-        // Allocate derived class version for a TF buffers
-        new_data_buffer = std::make_unique<TFBuffer>(id, kDeBFlagNone, storage_client);
-        break;
-      }
-      default: {
-        std::string errMsg("Invalid buffer type");
-        RETURN_STATUS_UNEXPECTED(errMsg);
-      }
-    }
-  } catch (std::bad_alloc &e) {
-    return Status(StatusCode::kOutOfMemory, __LINE__, __FILE__, e.what());
-  } catch (std::exception &e) {
-    RETURN_STATUS_UNEXPECTED(e.what());
-  }
-  *ptr = std::move(new_data_buffer);
-  return Status::OK();
-}
-
 // Name: print()
 // Description: A function that prints info about the DataBuffer (base class version)
 void DataBuffer::Print(std::ostream &out,      // In: The output stream to print to
--- a/mindspore/ccsrc/dataset/engine/data_buffer.h
+++ b/mindspore/ccsrc/dataset/engine/data_buffer.h
@ -29,9 +29,6 @@

 namespace mindspore {
 namespace dataset {
-// Forward declares
-class StorageClient;
-
 // The DataBuffer class is a base class that will represent the data for n values based
 // on a unique row id for each row of data.
 // There can be different types of DataBuffers to abstract over how the data is stored
@ -53,14 +50,6 @@ class DataBuffer {
  // Destructor
  virtual ~DataBuffer();

-  // Name: CreateDataBuffer()
-  // Description: A factory method to create the appropriate type of derived class
-  //              buffer.  Returns the base class reference for DataBuffer.
-  static Status CreateDataBuffer(
-    int32_t id,                      // In: The id for the new buffer
-    std::shared_ptr<StorageClient>,  // In: The StorageClient is used to choose the buffer type to create
-    std::unique_ptr<DataBuffer> *);
-
  // Name: print()
  // Description: A function that prints info about the DataBuffer (base class version)
  virtual void Print(std::ostream &out,     // In: The output stream to print to
--- a/mindspore/ccsrc/dataset/engine/dataset_iterator.h
+++ b/mindspore/ccsrc/dataset/engine/dataset_iterator.h
@ -53,7 +53,7 @@ class IteratorBase {
  // messages are encountered (such as eoe or eof), then an empty TensorRow is returned back.
  // @return Status - The error code return
  // @note The position of a Tensor/column might be different from the initial column order
-  // in the storageOp. User must be aware that MapOp, ZipOps, and others might change
+  // in corresponding Dataset Op. User must be aware that MapOp, ZipOps, and others might change
  // the column ordering.
  virtual Status FetchNextTensorRow(TensorRow *out_row);

--- a/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/concat_op.h
@ -40,7 +40,7 @@ class ConcatOp : public PipelineOp {
    ~Builder() = default;

    // The builder "build" method creates the final object.
-    // @return shared_ptr to the new StorageOp object
+    // @return shared_ptr to the new ConcatOp object
    Status Build(std::shared_ptr<ConcatOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/project_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/project_op.h
@ -40,7 +40,7 @@ class ProjectOp : public PipelineOp {
    ~Builder() = default;

    // The builder "build" method creates the final object.
-    // @return shared_ptr to the new StorageOp object.
+    // @return shared_ptr to the new ProjectOp object.
    Status Build(std::shared_ptr<ProjectOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/rename_op.h
@ -67,7 +67,7 @@ class RenameOp : public PipelineOp {
    }

    // The builder "build" method creates the ZipOp dataset Operator.
-    // @return shared_ptr to the new StorageOp object
+    // @return shared_ptr to the new RenameOp object
    Status Build(std::shared_ptr<RenameOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/repeat_op.h
@ -42,7 +42,7 @@ class RepeatOp : public PipelineOp {
    ~Builder() = default;

    // The builder "build" method creates the final object.
-    // @return shared_ptr to the new StorageOp object
+    // @return shared_ptr to the new RepeatOp object
    Status Build(std::shared_ptr<RepeatOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/shuffle_op.h
@ -101,7 +101,7 @@ class ShuffleOp : public PipelineOp {
    }

    // The builder "build" method creates the final object.
-    // @return shared_ptr to the new StorageOp object
+    // @return shared_ptr to the new ShuffleOp object
    Status Build(std::shared_ptr<ShuffleOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/skip_op.h
@ -37,7 +37,7 @@ class SkipOp : public PipelineOp {
    ~Builder() = default;

    // The builder "build" method creates the final object.
-    // @return shared_ptr to the new StorageOp object
+    // @return shared_ptr to the new SkipOp object
    Status Build(std::shared_ptr<SkipOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
@ -5,10 +5,6 @@ add_library(engine-datasetops-source OBJECT
    generator_op.cc
    io_block.cc
    mindrecord_op.cc
-    storage_client.cc
-    storage_op.cc
-    tf_buffer.cc
-    tf_client.cc
    tf_reader_op.cc
    image_folder_op.cc
    mnist_op.cc
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.cc
@ -25,7 +25,7 @@
 namespace mindspore {
 namespace dataset {
 GeneratorOp::Builder::Builder() {
-  // Some arguments to the StorageOp constructor have a default argument that is taken
+  // Some arguments to the GeneratorOp constructor have a default argument that is taken
  // from the client config.
  build_buffer_size_ = kCfgRowsPerBuffer;
  build_op_connector_size_ = kCfgOpConnectorSize;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/generator_op.h
@ -72,7 +72,7 @@ class GeneratorOp : public PipelineOp {
    }

    // The builder "build" method creates the final object.
-    // @return shared_ptr to the new StorageOp object
+    // @return shared_ptr to the new GeneratorOp object
    Status Build(std::shared_ptr<GeneratorOp> *);

   private:
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.h
@ -198,7 +198,7 @@ class ImageFolderOp : public ParallelOp, public RandomAccessOp {
  // @param show_all
  void Print(std::ostream &out, bool show_all) const override;

-  // This function is a hack! It is to return the num_class and num_rows the old storageOp does. The result
+  // This function is a hack! It is to return the num_class and num_rows. The result
  // returned by this function may not be consistent with what image_folder_op is going to return
  // user this at your own risk!
  static Status CountRowsAndClasses(const std::string &path, const std::set<std::string> &exts, int64_t *num_rows,
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mindrecord_op.cc
@ -44,7 +44,7 @@ using mindrecord::ShardReader;
 MindRecordOp::Builder::Builder() : build_dataset_file_({}) {
  // Some arguments to the MindRecordOp constructor have a default argument that is taken
  // from the client config.
-  // The user may choose to change these values for the construction of the StorageOp by
+  // The user may choose to change these values for the construction of the MindRecordOp by
  // using the various builder set methods.

  std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/python_sampler.h
@ -45,7 +45,7 @@ class PythonSampler : public Sampler {
  Status ResetSampler() override;

  // Op calls this to get next Buffer that contains all the sampleIds
-  // @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to StorageOp
+  // @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to corresponding Dataset Op
  // @param int32_t workerId - not meant to be used
  // @return - The error code return
  Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.h
@ -38,7 +38,7 @@ class RandomAccessOp {
  // @return - The error code return
  Status GetNumRowsInDataset(int64_t *num_rows) const;

-  // sampler gets label , imageIds from storageOp, this function is unique to PK
+  // sampler gets label , imageIds from corresponding Dataset Op, this function is unique to PK
  // @param std::map<int64_t, std::vector<int64_t>> * map
  // @return - The error code return
  virtual Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *map) const {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.h
@ -44,7 +44,7 @@ class SequentialSampler : public Sampler {
  Status ResetSampler() override;

  // Op calls this to get next Buffer that contains all the sampleIds
-  // @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to StorageOp
+  // @param std::unique_ptr<DataBuffer> pBuffer - Buffer to be returned to corresponding Dataset Op
  // @param int32_t workerId - not meant to be used
  // @return - The error code return
  Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.cc
@ -1,190 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define MAX_INTEGER_INT32 2147483647
-
-#include <iostream>
-#include <memory>
-#include <utility>
-#include <nlohmann/json.hpp>
-#include "dataset/core/constants.h"
-#include "dataset/engine/datasetops/source/storage_client.h"
-#include "dataset/engine/datasetops/source/storage_op.h"
-#include "dataset/engine/datasetops/source/tf_client.h"
-#include "dataset/util/status.h"
-
-namespace mindspore {
-namespace dataset {
-// Name: Constructor
-// Description:
-StorageClient::StorageClient(std::unique_ptr<DataSchema> schema,  // In: The schema for this storage client.
-                             StorageOp *store_op)                 // In: The StorageOp that's using this client
-    : data_schema_(std::move(schema)), num_rows_in_dataset_(0), storage_op_(store_op), num_classes_(0) {}
-
-// Name: Print()
-// Description: A function that prints info about the StorageClient
-// In: The output stream to print to
-void StorageClient::Print(std::ostream &out) const {
-  // not much to show here folks!
-  // out << "Storage client:\n";
-}
-
-// This is a local-only static function to drive the switch statement for creating
-// the storage client (not a static member function)
-static Status CreateStorageClientSwitch(
-  std::unique_ptr<DataSchema> schema,            // In: The schema  to set into the client
-  StorageOp *store_op,                           // In: The StorageOp we are operating on
-  std::shared_ptr<StorageClient> *out_client) {  // Out: the created storage client
-  switch (schema->dataset_type()) {
-    case DatasetType::kArrow: {
-      return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__,
-                    "Storage client not implemented yet for arrow dataset type.");
-    }
-    case DatasetType::kTf: {
-      // Construct the derived class TFClient, stored as base class StorageClient
-      store_op->set_rows_per_buffer(32);
-      *out_client = std::make_unique<TFClient>(std::move(schema), store_op);
-      break;
-    }
-    case DatasetType::kUnknown:
-    default: {
-      RETURN_STATUS_UNEXPECTED("Invalid dataset type.");
-    }
-  }
-  if (*out_client) {
-    RETURN_IF_NOT_OK((*out_client)->Init());
-  }
-  return Status::OK();
-}
-
-// Name: CreateStorageClient()
-// Description: A factory method to create the derived storage client.
-//              Every dataset has a required field for the dataset type in a config
-//              file.  This type will determine the child class to return for the
-//              type of storage client.  It also creates the schema and sticks it
-//              into the cache.
-Status StorageClient::CreateStorageClient(
-  StorageOp *store_op,                           // In: A backpointer to the owning cache for this client.
-  std::string dataset_schema_path,               // In: The path to the schema
-  std::shared_ptr<StorageClient> *out_client) {  // Out: the created storage client
-  // Make a new schema first.  This only assigns the dataset type.  It does not
-  // create the columns yet.
-  auto new_schema = std::make_unique<DataSchema>();
-  RETURN_IF_NOT_OK(new_schema->LoadDatasetType(dataset_schema_path));
-  RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client));
-  return Status::OK();
-}
-
-// Name: CreateStorageClient()
-// Description: A factory method to create the derived storage client.
-//              This creator is a user-override for the schema properties where
-//              the user has input the layout of the data (typically used in testcases)
-Status StorageClient::CreateStorageClient(
-  StorageOp *store_op,                           // In: A backpointer to the owning cache for this client.
-  DatasetType in_type,                           // In: The type of dataset
-  std::shared_ptr<StorageClient> *out_client) {  // Out: the created storage client
-  // The dataset type is passed in by the user.  Create an empty schema with only
-  // only the dataset type filled in and then create the client with it.
-  auto new_schema = std::make_unique<DataSchema>();
-  new_schema->set_dataset_type(in_type);
-  RETURN_IF_NOT_OK(CreateStorageClientSwitch(std::move(new_schema), store_op, out_client));
-  return Status::OK();
-}
-
-// Name: LoadDatasetLayout()
-// Description: There are 2 ways to define the properties of the data in the storage
-//              layer: LoadDatasetLayout() and AssignDatasetLayout().
-//              LoadDatasetLayout() will parse the json config file that comes with
-//              the dataset.
-Status StorageClient::LoadDatasetLayout() {
-  // Access the json file to populate our schema, assume the json file is accessible
-  // locally.
-  RETURN_IF_NOT_OK(data_schema_->LoadSchemaFile(storage_op_->schema_file(), storage_op_->columns_to_load()));
-
-  // The number of rows in the schema file is an optional config.  For example,
-  // maybe the derived storage client will know how to determine the total number
-  // of rows a different way rather than having it in the schema config json file.
-  // Thus, mNumRowsInDataset can still be zero and force the derived class override
-  // to determine it another way.
-  uint32_t num_rows = 0;
-  RETURN_IF_NOT_OK(this->numRowsFromFile(num_rows));
-  CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647");
-  if (num_rows_in_dataset_ == 0 || num_rows < num_rows_in_dataset_) {
-    num_rows_in_dataset_ = num_rows;
-  }
-
-  return Status::OK();
-}
-
-// Name: AssignDatasetLayout()
-// Description: There are 2 ways to define the properties of the data in the storage
-//              layer: LoadDatasetLayout() and AssignDatasetLayout().
-//              AssignDatasetLayout() will take input from the caller and assign that
-//              info into the storage client.
-Status StorageClient::AssignDatasetLayout(uint32_t num_rows,           // In: The number of rows in the dataset
-                                          const DataSchema &schema) {  // In: The schema for the dataset
-  // Since this is just an assignment into the storage client, you probably won't need
-  // to override this one in a derived class.  First some sanity checks
-  CHECK_FAIL_RETURN_UNEXPECTED(data_schema_->dataset_type() == schema.dataset_type(),
-                               "Assigning a schema into StorageClient with mismatched dataset types!");
-  CHECK_FAIL_RETURN_UNEXPECTED(data_schema_->NumColumns() == 0,
-                               "Assigning a schema into StorageClient that already has non-empty schema!");
-
-  // The current schema was just an empty one with only the dataset field populated.
-  // Let's copy construct a new one that will be a copy of the input schema (releasing the old
-  // one) and then set the number of rows that the user requested.
-  data_schema_ = std::make_unique<DataSchema>(schema);
-  CHECK_FAIL_RETURN_UNEXPECTED(num_rows <= MAX_INTEGER_INT32, "numRows exceeds the boundary numRows>2147483647");
-  num_rows_in_dataset_ = num_rows;
-
-  return Status::OK();
-}
-
-// Name: numRowsFromFile()
-// Description: Reads the schema json file to see if the optional numRows field has
-//              been set and returns it.
-Status StorageClient::numRowsFromFile(uint32_t &num_rows) const {
-  std::string schemaFile = storage_op_->schema_file();
-  try {
-    std::ifstream in(schemaFile);
-    nlohmann::json js;
-    in >> js;
-    if (js.find("numRows") == js.end()) {
-      num_rows = MAX_INTEGER_INT32;
-    } else {
-      num_rows = js.value("numRows", 0);
-    }
-    if (num_rows == 0) {
-      std::string err_msg =
-        "Storage client has not properly done dataset "
-        "handshake to initialize schema and number of rows.";
-      RETURN_STATUS_UNEXPECTED(err_msg);
-    }
-  }
-  // Catch any exception and rethrow it as our own
-  catch (const std::exception &err) {
-    std::ostringstream ss;
-    ss << "Schema file failed to load:\n" << err.what();
-    std::string err_msg = ss.str();
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
-  return Status::OK();
-}
-
-// Get'r function
-DataSchema *StorageClient::schema() const { return data_schema_.get(); }
-}  // namespace dataset
-}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_client.h
@ -1,128 +0,0 @@
-/**
- * Copyright 2019 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_
-#define DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_
-
-#include <iostream>
-#include <memory>
-#include <string>
-#include <vector>
-#include "dataset/engine/data_schema.h"
-#include "dataset/engine/datasetops/source/storage_op.h"
-#include "dataset/util/status.h"
-
-namespace mindspore {
-namespace dataset {
-// The Storage Client is the interface and base class that the StorageOp
-// will use to perform any interactions with the storage layer.
-// The different types of datasets will have different derived classes
-// under that storage client super class.
-class StorageClient {
- public:
-  // Name: Constructor
-  // Description:
-  StorageClient(std::unique_ptr<DataSchema> schema,  // In: The schema for this storage client.
-                StorageOp *store_op);                // In: The StorageOp that's using this client
-
-  // Destructor
-  virtual ~StorageClient() { storage_op_ = nullptr; }
-
-  virtual Status Init() { return Status::OK(); }
-
-  // Name: CreateStorageClient()
-  // Description: A factory method to create the derived storage client.
-  //              Every dataset has a required field for the dataset type in a config
-  //              file.  This type will determine the child class to return for the
-  //              type of storage client.
-  static Status CreateStorageClient(StorageOp *store_op,  // In: A backpointer to the owning storage op for this client.
-                                    std::string dataset_schema_path,              // In: The path to the dataset
-                                    std::shared_ptr<StorageClient> *out_client);  // Out: the created storage client
-
-  // Name: CreateStorageClient()
-  // Description: A factory method to create the derived storage client.
-  //              This creator is a user-override for the schema properties where
-  //              the user has input the layout of the data (typically used in testcases)
-  static Status CreateStorageClient(StorageOp *store_op,  // In: A backpointer to the owning cache for this client.
-                                    DatasetType in_type,  // In: The type of dataset
-                                    std::shared_ptr<StorageClient> *out_client);  // Out: the created storage client
-
-  // Name: Print()
-  // Description: A function that prints info about the StorageClient
-  virtual void Print(std::ostream &out) const;  // In: The output stream to print to
-
-  // Provide stream operator for displaying
-  friend std::ostream &operator<<(std::ostream &out, const StorageClient &storage_client) {
-    storage_client.Print(out);
-    return out;
-  }
-
-  // Name: LoadDatasetLayout()
-  // Description: There are 2 ways to define the properties of the data in the storage
-  //              layer: LoadDatasetLayout() and AssignDatasetLayout().
-  //              LoadDatasetLayout() will parse the json config file that comes with
-  //              the dataset and internally populate row counts and schema.
-  virtual Status LoadDatasetLayout();
-
-  // Name: AssignDatasetLayout()
-  // Description: There are 2 ways to define the properties of the data in the storage
-  //              layer: LoadDatasetLayout() and AssignDatasetLayout().
-  //              AssignDatasetLayout() will take input from the caller and assign that
-  virtual Status AssignDatasetLayout(uint32_t num_rows,          // In: The number of rows in the dataset
-                                     const DataSchema &schema);  // In: The schema for the dataset
-
-  // Name: Reset()
-  // Description: Resets any state info inside the client back to it's initialized
-  //              state.
-  virtual Status Reset() = 0;
-
-  // Name: IsMoreData
-  // Description: General routine to ask if more data exists in the storage side for
-  //              a given buffer id.
-  virtual bool IsMoreData(uint32_t id) { return true; }
-
-  // Name: numRowsFromFile()
-  // Description: Reads the schema json file to see if the optional numRows field has
-  //              been set and returns it.
-  Status numRowsFromFile(uint32_t &num_rows) const;
-
-  // Get'r functions
-  DataSchema *schema() const;
-
-  uint32_t num_rows() const { return num_rows_in_dataset_; }
-
-  // Name: rows_per_buffer()
-  // Description: This default version simply gives you the count of the requested
-  //              rows per buffer that the user defined in the storage op.
-  //              However, if some condition down in the storage client layers
-  //              could result in a buffer that has a different number of rows,
-  //              then the derived class can override this method to provide their
-  //              own implementation.
-  virtual uint32_t rows_per_buffer() { return storage_op_->rows_per_buffer(); }
-
-  // Description: Get the label classes num. Only manifest and Imagenet dataset support this parameter
-  virtual uint32_t num_classes() const { return 0; }
-
- protected:
-  std::unique_ptr<DataSchema> data_schema_;  // The schema for the data
-  uint32_t num_rows_in_dataset_;             // The number of rows in the dataset
-  StorageOp *storage_op_;                    // Back pointer to the owning storage operator.
-  std::vector<std::string> col_names_;
-  uint32_t num_classes_;
-};
-}  // namespace dataset
-}  // namespace mindspore
-
-#endif  // DATASET_ENGINE_DATASETOPS_SOURCE_STORAGE_CLIENT_H_
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.cc
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/storage_op.h
--- a/Show More
+++ b/Show More