!10799 make user-facing headers standalone for minddata

From: @mhmotallebi Reviewed-by: Signed-off-by:
4 years ago · a477a97278
parent f076bc6524 f48ab2b5c9
commit a477a97278
33 changed files with 182 additions and 83 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/config.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/config.cc
@ -17,6 +17,7 @@
 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/include/config.h"
+#include "minddata/dataset/util/log_adapter.h"
 #include "minddata/dataset/util/status.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@ -19,16 +19,33 @@
 #include <fstream>
 #include <unordered_set>
 #include <utility>
+
+#include "minddata/dataset/engine/runtime_context.h"
 #include "minddata/dataset/include/samplers.h"
 #include "minddata/dataset/include/transforms.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
+
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/engine/consumers/tree_consumer.h"
+
+#include "minddata/dataset/kernels/c_func_op.h"
+#include "minddata/dataset/kernels/tensor_op.h"

 #ifndef ENABLE_ANDROID
 #include "minddata/dataset/engine/ir/cache/dataset_cache_impl.h"
 #endif

+#ifndef ENABLE_ANDROID
+#include "minddata/dataset/text/sentence_piece_vocab.h"
+#include "minddata/dataset/text/vocab.h"
+#endif
+
 // Sampler headers (in alphabetical order)
 #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"

+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
+
 // IR non-leaf nodes
 #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
 #ifndef ENABLE_ANDROID
@ -57,7 +74,6 @@
 #endif

 #include "minddata/dataset/core/config_manager.h"
-#include "minddata/dataset/util/path.h"
 #include "minddata/dataset/util/random.h"
 #include "minddata/dataset/util/services.h"

@ -939,6 +955,7 @@ TFRecordDataset::TFRecordDataset(const std::vector<std::string> &dataset_files,
                                           shard_id, shard_equal_rows, cache);
  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
+
 #endif
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc
@ -16,11 +16,15 @@
 #include "minddata/dataset/include/iterator.h"
 #include "minddata/dataset/core/client.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
+#include "minddata/dataset/engine/runtime_context.h"
 #include "minddata/dataset/include/datasets.h"

 namespace mindspore {
 namespace dataset {

+Iterator::Iterator() : consumer_(nullptr) {}
+Iterator::~Iterator() { Stop(); }
+
 // Get the next row from the data pipeline.
 bool Iterator::GetNextRow(TensorMap *row) {
  Status rc = consumer_->GetNextAsMap(row);
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/datasets_bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/include/datasets_bindings.cc
@ -23,6 +23,8 @@
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/text/sentence_piece_vocab.h"
+
 // IR non-leaf nodes
 #include "minddata/dataset/engine/ir/datasetops/batch_node.h"
 #include "minddata/dataset/engine/ir/datasetops/concat_node.h"
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/text/bindings.cc
@ -21,6 +21,7 @@
 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/text/vocab.h"
 #include "minddata/dataset/text/sentence_piece_vocab.h"
+#include "minddata/dataset/include/constants.h"

 namespace mindspore {
 namespace dataset {
--- a/mindspore/ccsrc/minddata/dataset/api/text.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/text.cc
@ -39,6 +39,7 @@
 #include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
 #include "minddata/dataset/text/kernels/whitespace_tokenizer_op.h"
 #endif
+#include "minddata/dataset/core/data_type.h"
 #include "minddata/dataset/util/path.h"

 namespace mindspore {
@ -87,7 +88,7 @@ std::shared_ptr<JiebaTokenizerOperation> JiebaTokenizer(const std::string &hmm_p
 }

 std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
-                                        const DataType &data_type) {
+                                        const std::string &data_type) {
  auto op = std::make_shared<LookupOperation>(vocab, unknown_token, data_type);

  return op->ValidateParams() ? op : nullptr;
@ -142,7 +143,7 @@ std::shared_ptr<SlidingWindowOperation> SlidingWindow(const int32_t width, const
  return op->ValidateParams() ? op : nullptr;
 }

-std::shared_ptr<ToNumberOperation> ToNumber(const DataType data_type) {
+std::shared_ptr<ToNumberOperation> ToNumber(const std::string &data_type) {
  auto op = std::make_shared<ToNumberOperation>(data_type);

  return op->ValidateParams() ? op : nullptr;
@ -200,6 +201,19 @@ Status ValidateTokenizerDirParam(const std::string &tokenizer_name, const std::s
  return Status::OK();
 }

+// Helper functions to help validate data type passed by user
+bool IsTypeNumeric(const std::string &data_type) {
+  if (data_type == "int8" || data_type == "uint8" || data_type == "int16" || data_type == "uint16" ||
+      data_type == "int32" || data_type == "uint32" || data_type == "int64" || data_type == "uint64" ||
+      data_type == "float16" || data_type == "float32" || data_type == "float64")
+    return true;
+  return false;
+}
+
+bool IsTypeBoolean(const std::string &data_type) { return data_type == "bool"; }
+
+bool IsTypeString(const std::string &data_type) { return data_type == "string"; }
+
 /* ####################################### Derived TensorOperation classes ################################# */

 // (In alphabetical order)
@ -239,6 +253,8 @@ BertTokenizerOperation::BertTokenizerOperation(const std::shared_ptr<Vocab> &voc
      preserve_unused_token_(preserve_unused_token),
      with_offsets_(with_offsets) {}

+BertTokenizerOperation::~BertTokenizerOperation() = default;
+
 Status BertTokenizerOperation::ValidateParams() {
  if (vocab_ == nullptr) {
    std::string err_msg = "BertTokenizer: vocab object type is incorrect or null.";
@ -303,9 +319,11 @@ std::shared_ptr<TensorOp> JiebaTokenizerOperation::Build() {

 // LookupOperation
 LookupOperation::LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
-                                 const DataType &data_type)
+                                 const std::string &data_type)
    : vocab_(vocab), unknown_token_(unknown_token), default_id_(Vocab::kNoTokenExists), data_type_(data_type) {}

+LookupOperation::~LookupOperation() = default;
+
 Status LookupOperation::ValidateParams() {
  if (vocab_ == nullptr) {
    std::string err_msg = "Lookup: vocab object type is incorrect or null.";
@ -320,7 +338,7 @@ Status LookupOperation::ValidateParams() {
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
  }

-  if (!data_type_.IsNumeric()) {
+  if (!IsTypeNumeric(data_type_)) {
    std::string err_msg = "Lookup does not support a string to string mapping, data_type can only be numeric.";
    MS_LOG(ERROR) << err_msg;
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
@ -330,7 +348,7 @@ Status LookupOperation::ValidateParams() {
 }

 std::shared_ptr<TensorOp> LookupOperation::Build() {
-  std::shared_ptr<LookupOp> tensor_op = std::make_shared<LookupOp>(vocab_, default_id_, data_type_);
+  std::shared_ptr<LookupOp> tensor_op = std::make_shared<LookupOp>(vocab_, default_id_, DataType(data_type_));
  return tensor_op;
 }

@ -419,6 +437,8 @@ std::shared_ptr<TensorOp> RegexTokenizerOperation::Build() {
 #endif

 // SentencePieceTokenizerOperation
+SentencePieceTokenizerOperation::~SentencePieceTokenizerOperation() = default;
+
 SentencePieceTokenizerOperation::SentencePieceTokenizerOperation(const std::shared_ptr<SentencePieceVocab> &vocab,
                                                                 SPieceTokenizerOutType out_type)
    : vocab_(vocab), vocab_path_(std::string()), load_type_(SPieceTokenizerLoadType::kModel), out_type_(out_type) {}
@ -482,11 +502,11 @@ std::shared_ptr<TensorOp> SlidingWindowOperation::Build() {
 }

 // ToNumberOperation
-ToNumberOperation::ToNumberOperation(DataType data_type) : data_type_(data_type) {}
+ToNumberOperation::ToNumberOperation(std::string data_type) : data_type_(data_type) {}

 Status ToNumberOperation::ValidateParams() {
-  if (!data_type_.IsNumeric() || data_type_.IsBool()) {
-    std::string err_msg = "ToNumber : The parameter data_type must be a numeric type, got: " + data_type_.ToString();
+  if (!IsTypeNumeric(data_type_) || IsTypeBoolean(data_type_)) {
+    std::string err_msg = "ToNumber : The parameter data_type must be a numeric type, got: " + data_type_;
    MS_LOG(ERROR) << err_msg;
    RETURN_STATUS_SYNTAX_ERROR(err_msg);
  }
--- a/mindspore/ccsrc/minddata/dataset/core/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/core/constants.h
@ -59,6 +59,9 @@ enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
 // Possible values for SPieceTokenizerLoadType
 enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };

+// Possible values for SentencePieceModel
+enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
+
 // Possible values for NormalizeForm
 enum class NormalizeForm {
  kNone = 0,
--- a/mindspore/ccsrc/minddata/dataset/core/global_context.h
+++ b/mindspore/ccsrc/minddata/dataset/core/global_context.h
@ -19,6 +19,7 @@
 #include <memory>
 #include <mutex>

+#include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/core/constants.h"
 #include "minddata/dataset/util/allocator.h"
 #include "minddata/dataset/util/status.h"
@ -27,7 +28,6 @@ namespace mindspore {
 namespace dataset {
 // forward declare
 class MemoryPool;
-class ConfigManager;
 class Tensor;
 class CVTensor;

--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@ -22,8 +22,10 @@
 #include <utility>
 #include <vector>
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
-#include "minddata/dataset/engine/tree_adapter.h"
+#include "minddata/dataset/engine/datasetops/device_queue_op.h"
 #include "minddata/dataset/engine/opt/pre/getter_pass.h"
+#include "minddata/dataset/engine/tree_adapter.h"
+#include "minddata/mindrecord/include/shard_index_generator.h"

 #ifndef ENABLE_ANDROID
 #include "minddata/mindrecord/include/shard_header.h"
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/build_sentence_piece_vocab_node.h
@ -23,6 +23,7 @@
 #include <unordered_map>
 #include <vector>

+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/datasets.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
@ -24,13 +24,26 @@
 #include <utility>
 #include <vector>

-#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/engine/consumers/tree_consumer.h"
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/filter_op.h"
+#include "minddata/dataset/engine/datasetops/map_op/map_op.h"
+#include "minddata/dataset/engine/datasetops/project_op.h"
+#include "minddata/dataset/engine/datasetops/repeat_op.h"
+#include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/skip_op.h"
+#include "minddata/dataset/engine/datasetops/take_op.h"
+#include "minddata/dataset/engine/ir/cache/dataset_cache.h"
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"

 namespace mindspore {
 namespace dataset {

 class Dataset;
+class DatasetCache;
 class SamplerObj;
 class IRNodePass;
 class DatasetSizeGetter;
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/minddata_node.h
@ -22,6 +22,7 @@
 #include <string>
 #include <vector>

+#include "minddata/dataset/engine/datasetops/source/mindrecord_op.h"
 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
@ -24,6 +24,7 @@
 #include "minddata/dataset/engine/datasetops/source/random_data_op.h"
 #include "minddata/dataset/util/random.h"
 #include "minddata/dataset/util/status.h"
+
 namespace mindspore {
 namespace dataset {

--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
@ -22,7 +22,9 @@
 #include <utility>
 #include <vector>

+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
 #include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
+#include "minddata/dataset/include/samplers.h"

 namespace mindspore {
 namespace dataset {
--- a/mindspore/ccsrc/minddata/dataset/include/config.h
+++ b/mindspore/ccsrc/minddata/dataset/include/config.h
@ -20,8 +20,6 @@
 #include <cstdint>
 #include <string>

-#include "minddata/dataset/util/log_adapter.h"
-
 namespace mindspore {
 namespace dataset {

--- a/mindspore/ccsrc/minddata/dataset/include/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/include/constants.h
@ -59,6 +59,9 @@ enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
 // Possible values for SPieceTokenizerLoadType
 enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };

+// Possible values for SentencePieceModel
+enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
+
 // Possible values for NormalizeForm
 enum class NormalizeForm {
  kNone = 0,
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
@ -17,6 +17,7 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_

+#include <sys/stat.h>
 #include <unistd.h>
 #include <map>
 #include <memory>
@ -26,27 +27,18 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
-#include "minddata/dataset/engine/ir/cache/dataset_cache.h"

-#include "minddata/dataset/core/constants.h"
-#include "minddata/dataset/engine/consumers/tree_consumer.h"
-#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
 #include "minddata/dataset/include/iterator.h"
 #include "minddata/dataset/include/samplers.h"
 #include "minddata/dataset/include/tensor.h"
+#include "minddata/dataset/include/text.h"
 #include "minddata/dataset/include/type_id.h"
-#include "minddata/dataset/kernels/c_func_op.h"
-#include "minddata/dataset/kernels/tensor_op.h"
-#include "minddata/dataset/util/path.h"
-#ifndef ENABLE_ANDROID
-#include "minddata/dataset/text/sentence_piece_vocab.h"
-#include "minddata/dataset/text/vocab.h"
-#endif

 namespace mindspore {
 namespace dataset {

 class Tensor;
+class TensorRow;
 class TensorShape;
 class TreeAdapter;
 class TreeGetters;
@ -54,6 +46,7 @@ class TreeGetters;
 class Vocab;
 #endif

+class DatasetCache;
 class DatasetNode;

 class Iterator;
@ -77,12 +70,20 @@ class ConcatDataset;
 class RenameDataset;
 #endif

+#ifndef ENABLE_ANDROID
+class SentencePieceVocab;
+enum class SentencePieceModel;
+#endif
+
+class DSCallback;
+
 class RepeatDataset;

 #ifndef ENABLE_ANDROID
 class SkipDataset;
 class TakeDataset;
 class ZipDataset;
+
 #endif

 /// \class Dataset datasets.h
@ -969,8 +970,12 @@ std::shared_ptr<TFRecordDataset> TFRecord(const std::vector<std::string> &datase
  } else {
    std::string schema_path = schema;
    if (!schema_path.empty()) {
-      Path schema_file(schema_path);
-      if (!schema_file.Exists()) {
+      struct stat sb;
+      int rc = stat(common::SafeCStr(schema_path), &sb);
+      if (rc == -1 && errno != ENOENT) {
+        MS_LOG(WARNING) << "Unable to query the status of [" << schema_path << "]. Errno = " << errno << ".";
+      }
+      if (rc != 0) {
        MS_LOG(ERROR) << "TFRecordDataset: schema path [" << schema_path << "] is invalid or does not exist.";
        return nullptr;
      }
--- a/mindspore/ccsrc/minddata/dataset/include/de_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/include/de_tensor.h
@ -14,14 +14,14 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_API_DETENSOR_H_
-#define MINDSPORE_CCSRC_MINDDATA_DATASET_API_DETENSOR_H_
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DETENSOR_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DETENSOR_H_
 #include <string>
 #include <vector>
 #include <memory>
 #include "include/ms_tensor.h"
+#include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/tensor.h"
-#include "minddata/dataset/util/status.h"
 namespace mindspore {
 namespace tensor {
 class DETensor : public mindspore::tensor::MSTensor {
@ -79,4 +79,4 @@ class DETensor : public mindspore::tensor::MSTensor {
 };
 }  // namespace tensor
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_API_DETENSOR_H_
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DETENSOR_H_
--- a/mindspore/ccsrc/minddata/dataset/include/execute.h
+++ b/mindspore/ccsrc/minddata/dataset/include/execute.h
@ -14,12 +14,13 @@
 * limitations under the License.
 */

-#ifndef DATASET_API_EXECUTE_H_
-#define DATASET_API_EXECUTE_H_
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_

 #include <vector>
 #include <memory>
-#include "minddata/dataset/core/constants.h"
+
+#include "minddata/dataset/include/constants.h"
 #ifdef ENABLE_ANDROID
 #include "minddata/dataset/include/de_tensor.h"
 #endif
@ -55,4 +56,4 @@ class Execute {

 }  // namespace dataset
 }  // namespace mindspore
-#endif  // DATASET_API_EXECUTE_H_
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
--- a/mindspore/ccsrc/minddata/dataset/include/iterator.h
+++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h
@ -21,7 +21,6 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "minddata/dataset/engine/runtime_context.h"
 #include "minddata/dataset/include/status.h"

 namespace mindspore {
@ -45,10 +44,10 @@ using TensorVec = std::vector<std::shared_ptr<Tensor>>;
 class Iterator {
 public:
  /// \brief Constructor
-  Iterator() : consumer_(nullptr) {}
+  Iterator();

  /// \brief Destructor
-  ~Iterator() { Stop(); }
+  ~Iterator();

  /// \brief Method for building and launching the pipeline.
  /// \param[in] ops - a vector of DatasetOp in the data pipeline.
--- a/mindspore/ccsrc/minddata/dataset/include/samplers.h
+++ b/mindspore/ccsrc/minddata/dataset/include/samplers.h
@ -21,10 +21,11 @@
 #include <string>
 #include <vector>

-#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/include/status.h"
 #ifndef ENABLE_ANDROID
 #include "minddata/mindrecord/include/shard_column.h"
 #include "minddata/mindrecord/include/shard_error.h"
+#include "minddata/mindrecord/include/shard_operator.h"
 #include "minddata/mindrecord/include/shard_reader.h"
 #endif

--- a/mindspore/ccsrc/minddata/dataset/include/status.h
+++ b/mindspore/ccsrc/minddata/dataset/include/status.h
@ -51,6 +51,13 @@ namespace dataset {
    }                                                                      \
  } while (false)

+#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e)                 \
+  do {                                                                 \
+    if (!(_condition)) {                                               \
+      return Status(StatusCode::kSyntaxError, __LINE__, __FILE__, _e); \
+    }                                                                  \
+  } while (false)
+
 #define RETURN_UNEXPECTED_IF_NULL(_ptr)                                         \
  do {                                                                          \
    if ((_ptr) == nullptr) {                                                    \
@ -71,6 +78,15 @@ namespace dataset {
    return Status(StatusCode::kSyntaxError, __LINE__, __FILE__, _e); \
  } while (false)

+#define RETURN_SECOND_IF_ERROR(_s, _r) \
+  do {                                 \
+    Status __rc = (_s);                \
+    if (__rc.IsError()) {              \
+      MS_LOG(ERROR) << __rc;           \
+      return _r;                       \
+    }                                  \
+  } while (false)
+
 enum class StatusCode : char {
  kOK = 0,
  kOutOfMemory = 1,
@ -151,6 +167,12 @@ class Status {
  StatusCode code_;
  std::string err_msg_;
 };
+
+#if !defined(_WIN32) && !defined(_WIN64)
+const float MAX_MEMORY_USAGE_THRESHOLD = 0.95;
+
+float GetMemoryUsage();
+#endif
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
--- a/mindspore/ccsrc/minddata/dataset/include/text.h
+++ b/mindspore/ccsrc/minddata/dataset/include/text.h
@ -22,18 +22,16 @@
 #include <utility>
 #include <vector>

-#include "mindspore/ccsrc/minddata/dataset/core/data_type.h"
-#include "minddata/dataset/core/constants.h"
+#include "minddata/dataset/include/constants.h"
+#include "minddata/dataset/include/status.h"
 #include "minddata/dataset/include/transforms.h"
-#include "minddata/dataset/util/status.h"
-
-#include "minddata/dataset/text/kernels/sentence_piece_tokenizer_op.h"
-#include "minddata/dataset/text/sentence_piece_vocab.h"
-#include "minddata/dataset/text/vocab.h"

 namespace mindspore {
 namespace dataset {

+class Vocab;
+class SentencePieceVocab;
+
 // Transform operations for text
 namespace text {

@ -146,10 +144,11 @@ std::shared_ptr<JiebaTokenizerOperation> JiebaTokenizer(const std::string &hmm_p
 /// \param[in] vocab a Vocab object.
 /// \param[in] unknown_token word to use for lookup if the word being looked up is out of Vocabulary (oov).
 ///   If unknown_token is oov, runtime error will be thrown.
-/// \param[in] DataType type of the tensor after lookup, typically int32.
+/// \param[in] data_type type of the tensor after lookup, typically int32.
 /// \return Shared pointer to the current TensorOperation.
+
 std::shared_ptr<LookupOperation> Lookup(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
-                                        const mindspore::dataset::DataType &data_type = DataType("int32"));
+                                        const std::string &data_type = "int32");

 /// \brief TensorOp to generate n-gram from a 1-D string Tensor.
 /// \param[in] ngrams ngrams is a vector of positive integers. For example, if ngrams={4, 3}, then the result
@ -226,9 +225,9 @@ std::shared_ptr<SlidingWindowOperation> SlidingWindow(const int32_t width, const
 ///   https://en.cppreference.com/w/cpp/string/basic_string/stof,
 ///   https://en.cppreference.com/w/cpp/string/basic_string/stoul,
 ///   except that any strings which represent negative numbers cannot be cast to an unsigned integer type.
-/// \param[in] data_type DataType of the tensor to be casted to. Must be a numeric type.
+/// \param[in] data_type of the tensor to be casted to. Must be a numeric type.
 /// \return Shared pointer to the current TensorOperation.
-std::shared_ptr<ToNumberOperation> ToNumber(const DataType data_type);
+std::shared_ptr<ToNumberOperation> ToNumber(const std::string &data_type);

 /// \brief Truncate a pair of rank-1 tensors such that the total length is less than max_length.
 /// \param[in] max_length Maximum length required.
@ -285,7 +284,7 @@ class BertTokenizerOperation : public TensorOperation {
                         bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token,
                         bool with_offsets);

-  ~BertTokenizerOperation() = default;
+  ~BertTokenizerOperation();

  std::shared_ptr<TensorOp> Build() override;

@ -342,9 +341,9 @@ class JiebaTokenizerOperation : public TensorOperation {
 class LookupOperation : public TensorOperation {
 public:
  explicit LookupOperation(const std::shared_ptr<Vocab> &vocab, const std::string &unknown_token,
-                           const DataType &data_type);
+                           const std::string &data_type);

-  ~LookupOperation() = default;
+  ~LookupOperation();

  std::shared_ptr<TensorOp> Build() override;

@ -356,7 +355,7 @@ class LookupOperation : public TensorOperation {
  std::shared_ptr<Vocab> vocab_;
  std::string unknown_token_;
  int32_t default_id_;
-  DataType data_type_;
+  std::string data_type_;
 };

 class NgramOperation : public TensorOperation {
@ -439,7 +438,7 @@ class SentencePieceTokenizerOperation : public TensorOperation {

  SentencePieceTokenizerOperation(const std::string &vocab_path, SPieceTokenizerOutType out_type);

-  ~SentencePieceTokenizerOperation() = default;
+  ~SentencePieceTokenizerOperation();

  std::shared_ptr<TensorOp> Build() override;

@ -473,7 +472,7 @@ class SlidingWindowOperation : public TensorOperation {

 class ToNumberOperation : public TensorOperation {
 public:
-  explicit ToNumberOperation(DataType data_type);
+  explicit ToNumberOperation(std::string data_type);

  ~ToNumberOperation() = default;

@ -484,7 +483,7 @@ class ToNumberOperation : public TensorOperation {
  std::string Name() const override { return kToNumberOperation; }

 private:
-  DataType data_type_;
+  std::string data_type_;
 };

 class TruncateSequencePairOperation : public TensorOperation {
--- a/mindspore/ccsrc/minddata/dataset/include/type_id.h
+++ b/mindspore/ccsrc/minddata/dataset/include/type_id.h
@ -16,7 +16,6 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TYPEID_H_

-#include "minddata/dataset/core/data_type.h"
 #include "mindspore/core/ir/dtype/type_id.h"

 namespace mindspore {
--- a/mindspore/ccsrc/minddata/dataset/text/sentence_piece_vocab.h
+++ b/mindspore/ccsrc/minddata/dataset/text/sentence_piece_vocab.h
@ -22,10 +22,11 @@
 #include <vector>
 #include <unordered_map>
 #include "minddata/dataset/util/status.h"
+#include "minddata/dataset/include/constants.h"

 namespace mindspore {
 namespace dataset {
-enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
+
 class SentencePieceVocab {
 public:
  static Status BuildFromFile(const std::vector<std::string> &path_list, const int vocab_size,
--- a/Show More
+++ b/Show More