-Add DE_STRING

-replace switch'case by indexing - Add test case - Add constructors - Add getItem string - Fix bugs - Add more tests - Tensor iterator - asNumpy - TextFileDataset - Tensor(Numpy) - Super > 2D - Add more test cases for GeneratorDataset - Change StartAddr to GetBuffer and GetMutableNuffer - Raise an error if batch is used with strings Clean-up work
5 years ago · e8ca243364
parent a3110549ea
commit e8ca243364
43 changed files with 781 additions and 312 deletions
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@ -237,6 +237,11 @@ void bindTensor(py::module *m) {
    .def("type", &Tensor::type)
    .def("as_array", [](py::object &t) {
      auto &tensor = py::cast<Tensor &>(t);
+      if (tensor.type() == DataType::DE_STRING) {
+        py::array res;
+        tensor.GetDataAsNumpyStrings(&res);
+        return res;
+      }
      py::buffer_info info;
      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
      return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
--- a/mindspore/ccsrc/dataset/core/cv_tensor.cc
+++ b/mindspore/ccsrc/dataset/core/cv_tensor.cc
@ -24,15 +24,15 @@
 namespace mindspore {
 namespace dataset {
 CVTensor::CVTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) {
-  (void)this->MatInit(StartAddr(), shape_, type_, &mat_);
+  (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
 }

 CVTensor::CVTensor(const TensorShape &shape, const DataType &type, const uchar *data) : Tensor(shape, type, data) {
-  (void)this->MatInit(StartAddr(), shape_, type_, &mat_);
+  (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
 }

 CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) {
-  (void)this->MatInit(StartAddr(), shape_, type_, &mat_);
+  (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
 }

 std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) {
@ -83,19 +83,19 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &

 Status CVTensor::Reshape(const TensorShape &shape) {
  RETURN_IF_NOT_OK(Tensor::Reshape(shape));
-  RETURN_IF_NOT_OK(this->MatInit(StartAddr(), shape_, type_, &mat_));
+  RETURN_IF_NOT_OK(this->MatInit(GetMutableBuffer(), shape_, type_, &mat_));
  return Status::OK();
 }

 Status CVTensor::ExpandDim(const dsize_t &axis) {
  RETURN_IF_NOT_OK(Tensor::ExpandDim(axis));
-  RETURN_IF_NOT_OK(this->MatInit(StartAddr(), shape_, type_, &mat_));
+  RETURN_IF_NOT_OK(this->MatInit(GetMutableBuffer(), shape_, type_, &mat_));
  return Status::OK();
 }

 void CVTensor::Squeeze() {
  Tensor::Squeeze();
-  (void)this->MatInit(StartAddr(), shape_, type_, &mat_);
+  (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
 }
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/dataset/core/data_type.cc
--- a/mindspore/ccsrc/dataset/core/data_type.h
+++ b/mindspore/ccsrc/dataset/core/data_type.h
@ -16,18 +16,25 @@
 #ifndef DATASET_CORE_DATA_TYPE_H_
 #define DATASET_CORE_DATA_TYPE_H_

+#include <opencv2/core/hal/interface.h>
+
 #include <string>
+
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
+
+#include "dataset/core/constants.h"
 #include "dataset/core/pybind_support.h"

 namespace py = pybind11;
 namespace mindspore {
 namespace dataset {
+
 // Class that represents basic data types in DataEngine.
 class DataType {
 public:
  enum Type : uint8_t {
+    DE_UNKNOWN = 0,
    DE_BOOL,
    DE_INT8,
    DE_UINT8,
@ -40,20 +47,60 @@ class DataType {
    DE_FLOAT16,
    DE_FLOAT32,
    DE_FLOAT64,
-    DE_UNKNOWN
+    DE_STRING,
+    NUM_OF_TYPES
  };

-  static constexpr uint8_t DE_BOOL_SIZE = 1;
-  static constexpr uint8_t DE_UINT8_SIZE = 1;
-  static constexpr uint8_t DE_INT8_SIZE = 1;
-  static constexpr uint8_t DE_UINT16_SIZE = 2;
-  static constexpr uint8_t DE_INT16_SIZE = 2;
-  static constexpr uint8_t DE_UINT32_SIZE = 4;
-  static constexpr uint8_t DE_INT32_SIZE = 4;
-  static constexpr uint8_t DE_INT64_SIZE = 8;
-  static constexpr uint8_t DE_UINT64_SIZE = 8;
-  static constexpr uint8_t DE_FLOAT32_SIZE = 4;
-  static constexpr uint8_t DE_FLOAT64_SIZE = 8;
+  inline static constexpr uint8_t SIZE_IN_BYTES[] = {0,   // DE_UNKNOWN
+                                                     1,   // DE_BOOL
+                                                     1,   // DE_INT8
+                                                     1,   // DE_UINT8
+                                                     2,   // DE_INT16
+                                                     2,   // DE_UINT16
+                                                     4,   // DE_INT32
+                                                     4,   // DE_UINT32
+                                                     8,   // DE_INT64
+                                                     8,   // DE_UINT64
+                                                     2,   // DE_FLOAT16
+                                                     4,   // DE_FLOAT32
+                                                     8,   // DE_FLOAT64
+                                                     0};  // DE_STRING
+
+  inline static const char *TO_STRINGS[] = {"unknown", "bool",  "int8",   "uint8",   "int16",   "uint16",  "int32",
+                                            "uint32",  "int64", "uint64", "float16", "float32", "float64", "string"};
+
+  inline static const char *PYBIND_TYPES[] = {"object", "bool",  "int8",   "uint8",   "int16",   "uint16", "int32",
+                                              "uint32", "int64", "uint64", "float16", "float32", "double", "bytes"};
+
+  inline static const std::string PYBIND_FORMAT_DESCRIPTOR[] = {"",                                        // DE_UNKNOWN
+                                                                py::format_descriptor<bool>::format(),     // DE_BOOL
+                                                                py::format_descriptor<int8_t>::format(),   // DE_INT8
+                                                                py::format_descriptor<uint8_t>::format(),  // DE_UINT8
+                                                                py::format_descriptor<int16_t>::format(),  // DE_INT16
+                                                                py::format_descriptor<uint16_t>::format(),  // DE_UINT16
+                                                                py::format_descriptor<int32_t>::format(),   // DE_INT32
+                                                                py::format_descriptor<uint32_t>::format(),  // DE_UINT32
+                                                                py::format_descriptor<int64_t>::format(),   // DE_INT64
+                                                                py::format_descriptor<uint64_t>::format(),  // DE_UINT64
+                                                                "e",                                      // DE_FLOAT16
+                                                                py::format_descriptor<float>::format(),   // DE_FLOAT32
+                                                                py::format_descriptor<double>::format(),  // DE_FLOAT64
+                                                                "S"};                                     // DE_STRING
+
+  inline static constexpr uint8_t CV_TYPES[] = {kCVInvalidType,   // DE_UNKNOWN
+                                                CV_8U,            // DE_BOOL
+                                                CV_8S,            // DE_INT8
+                                                CV_8U,            // DE_UINT8
+                                                CV_16S,           // DE_INT16
+                                                CV_16U,           // DE_UINT16
+                                                CV_32S,           // DE_INT32
+                                                kCVInvalidType,   // DE_UINT32
+                                                kCVInvalidType,   // DE_INT64
+                                                kCVInvalidType,   // DE_UINT64
+                                                CV_16F,           // DE_FLOAT16
+                                                CV_32F,           // DE_FLOAT32
+                                                CV_64F,           // DE_FLOAT64
+                                                kCVInvalidType};  // DE_STRING

  // No arg constructor to create an unknown shape
  DataType() : type_(DE_UNKNOWN) {}
@ -160,6 +207,8 @@ class DataType {

  bool IsBool() const { return type_ == DataType::DE_BOOL; }

+  bool IsNumeric() const { return type_ != DataType::DE_STRING; }
+
  Type value() const { return type_; }

 private:
@ -226,6 +275,11 @@ inline bool DataType::IsCompatible<uint8_t>() const {
  return type_ == DataType::DE_UINT8;
 }

+template <>
+inline bool DataType::IsCompatible<std::string_view>() const {
+  return type_ == DataType::DE_STRING;
+}
+
 template <>
 inline bool DataType::IsLooselyCompatible<bool>() const {
  return type_ == DataType::DE_BOOL;
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.cc
@ -215,5 +215,17 @@ TensorShape TensorShape::Squeeze() const {
  }
  return TensorShape(new_shape);
 }
+std::vector<dsize_t> TensorShape::Strides() {
+  std::vector<dsize_t> strides(Rank());
+  dsize_t count = NumOfElements();
+  for (dsize_t i = 0; i < Rank(); i++) {
+    if (raw_shape_[i] != 0)
+      count /= raw_shape_[i];
+    else
+      count = 0;
+    strides[i] = count;
+  }
+  return strides;
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/core/tensor_shape.h
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.h
@ -156,6 +156,8 @@ class TensorShape {

  TensorShape Squeeze() const;

+  std::vector<dsize_t> Strides();
+
 private:
  // True if known and valid shape, false otherwise
  bool known_;
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@ -74,6 +74,10 @@ Status BatchOp::operator()() {
  std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
  child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
  RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
+  for (const auto &t : new_row) {
+    CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(),
+                                 "[Batch ERROR] Batch does not support Tensor of type string yet.");
+  }
  RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());  // must come after the first fetch above
  int32_t cur_batch_size = 0;
  RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0)));
@ -445,8 +449,8 @@ Status BatchOp::PadHelper(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> d
      src_flat_ind += src_s[i] * cur_ind[i];
      dst_flat_ind += dst_s[i] * cur_ind[i];
    }
-    unsigned char *src_addr = src->StartAddr() + src_flat_ind * type_size;
-    unsigned char *dst_addr = dst->StartAddr() + dst_flat_ind * type_size;
+    unsigned char *src_addr = src->GetMutableBuffer() + src_flat_ind * type_size;
+    unsigned char *dst_addr = dst->GetMutableBuffer() + dst_flat_ind * type_size;
    CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error");
  } else {  // not the last dimension, keep doing recursion
    dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]);
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
@ -85,6 +85,13 @@ Status DeviceQueueOp::operator()() {

 Status DeviceQueueOp::CheckExceptions(const std::unique_ptr<DataBuffer> &buffer) const {
  // this method checks if the buffer meets the conditions to be sent to TDT
+  if (buffer->NumRows() != 0) {
+    TensorRow row;
+    buffer->GetRow(0, &row);
+    for (const auto &item : row) {
+      CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device.");
+    }
+  }
  return Status::OK();
 }

@ -207,7 +214,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
      return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed.");
    }
    (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_);
-    unsigned char *column_data = curr_row[i]->StartAddr();
+    unsigned char *column_data = curr_row[i]->GetMutableBuffer();
    if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data,
                 static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) {
      MS_LOG(ERROR) << "memcpy_s failed!";
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@ -407,7 +407,7 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t>
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
                                        TensorShape(std::vector<dsize_t>(1, num_elements)),
                                        data_schema_->column(0).type()));
-  (void)handle.read(reinterpret_cast<char *>(image->StartAddr()), num_elements);
+  (void)handle.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
  if (decode_ == true) {
    Status rc = Decode(image, &image);
    if (rc.IsError()) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@ -197,7 +197,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
  std::shared_ptr<Tensor> fine_label;
  std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
  std::shared_ptr<Tensor> copy_image =
-    std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->StartAddr());
+    std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetMutableBuffer());
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
                                        data_schema_->column(1).type(),
                                        reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0])));
@ -394,7 +394,7 @@ Status CifarOp::ParseCifarData() {
                                            data_schema_->column(0).type()));
      for (int ch = 0; ch < kCifarImageChannel; ++ch) {
        for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) {
-          (image_tensor->StartAddr())[pix * kCifarImageChannel + ch] = block[cur_block_index++];
+          (image_tensor->GetMutableBuffer())[pix * kCifarImageChannel + ch] = block[cur_block_index++];
        }
      }
      cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels));
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@ -216,7 +216,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) {
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
                                        TensorShape(std::vector<dsize_t>(1, num_elements)),
                                        data_schema_->column(0).type(), nullptr));
-  (void)fs.read(reinterpret_cast<char *>(image->StartAddr()), num_elements);
+  (void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
  fs.close();
  if (decode_ == true) {
    Status rc = Decode(image, &image);
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@ -210,7 +210,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
                                        TensorShape(std::vector<dsize_t>(1, num_elements)),
                                        data_schema_->column(0).type(), nullptr));
-  (void)fs.read(reinterpret_cast<char *>(image->StartAddr()), num_elements);
+  (void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
  if (fs.fail()) {
    fs.close();
    RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first);
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@ -170,7 +170,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow)
  int32_t l = mnist_pair.second;
  // make a copy of cached tensor
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(),
-                                        mnist_pair.first->type(), mnist_pair.first->StartAddr()));
+                                        mnist_pair.first->type(), mnist_pair.first->GetMutableBuffer()));
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
                                        data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l)));
  (*trow) = {std::move(image), std::move(label)};
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.cc
@ -127,7 +127,7 @@ Status RandomDataOp::GenerateSchema() {
    // For each column:
    // - choose a datatype
    // - generate a shape that randomly chooses the number of dimensions and the dimension values.
-    DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, kMaxDataType));
+    DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, DataType::NUM_OF_TYPES - 2));
    int32_t rank = GenRandomInt(1, kMaxRank);
    std::vector<dsize_t> dims;
    for (int32_t d = 0; d < rank; d++) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/random_data_op.h
@ -43,7 +43,6 @@ class RandomDataOp : public ParallelOp {
  static constexpr int32_t kMaxNumColumns = 4;
  static constexpr int32_t kMaxRank = 4;
  static constexpr int32_t kMaxDimValue = 2048;
-  static constexpr int32_t kMaxDataType = (DataType::DE_UNKNOWN - 1);
  static constexpr int32_t kMaxTotalRows = 1024;

  // A nested builder class to aid in the construction of a RandomDataOp
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
@ -58,7 +58,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer
    (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone);
    std::shared_ptr<Tensor> sample_ids;
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_));
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr());
+    int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer());
    while (cnt_ < samples_per_buffer_) {
      int64_t next_id = (num_devices_ * (cnt_++) + device_id_) % num_rows_;
      *(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast<size_t>(next_id)] : next_id;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
@ -58,7 +58,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
    int64_t last_id =
      (samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_;
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_));
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr());
+    int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer());
    while (next_id_ < last_id) {
      int64_t cls_id = next_id_++ / samples_per_class_;
      const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]];
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
@ -38,7 +38,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
    std::shared_ptr<Tensor> sampleIds;
    int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_;
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_));
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->StartAddr());
+    int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer());
    for (int64_t i = 0; i < (last_id - next_id_); i++) {
      *(id_ptr + i) = replacement_ ? (*dist)(rnd_) : shuffled_ids_[static_cast<size_t>(i + next_id_)];
    }
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sampler.cc
@ -40,7 +40,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
  }
  TensorShape shape(std::vector<dsize_t>(1, num_elements));
  RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type()));
-  (void)(*sample_ids)->StartAddr();  // allocate memory in case user forgets!
+  (void)(*sample_ids)->GetMutableBuffer();  // allocate memory in case user forgets!
  return Status::OK();
 }

--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
@ -31,7 +31,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer)
    std::shared_ptr<Tensor> sampleIds;
    int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_;
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_));
-    int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->StartAddr());
+    int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer());
    while (next_id_ < lastId) {
      *(idPtr++) = next_id_++;
    }
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
@ -78,7 +78,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffe
    RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));

    // Initialize tensor
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->StartAddr());
+    int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer());
    while (sample_id_ < last_id) {
      if (indices_[sample_id_] >= num_rows_) {
        std::string err_msg =
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
@ -111,7 +111,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf
    RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));

    // Initialize tensor.
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->StartAddr());
+    int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer());
    // Assign the data to tensor element.
    while (sample_id_ < last_id) {
      int64_t genId;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/text_file_op.cc
@ -146,10 +146,7 @@ Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTa
  (*tensor_table)->push_back(std::move(tRow));

  std::shared_ptr<Tensor> tensor;
-  RETURN_IF_NOT_OK(
-    Tensor::CreateTensor(&tensor, data_schema_->column(0).tensorImpl(),
-                         TensorShape(std::vector<dsize_t>(1, line.size())), data_schema_->column(0).type(),
-                         const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(common::SafeCStr(line)))));
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar()));
  (**tensor_table)[row][0] = std::move(tensor);
  return Status::OK();
 }
--- a/Show More
+++ b/Show More