!2000 Make GetMutableBuffer protected

Merge pull request !2000 from h.farahat/tensor_mutabble_buffer
5 years ago · 8eb8663fbd
parent 899c95be51 0c5e47111b
commit 8eb8663fbd
32 changed files with 212 additions and 250 deletions
--- a/mindspore/ccsrc/dataset/core/data_type.h
+++ b/mindspore/ccsrc/dataset/core/data_type.h
@ -128,7 +128,9 @@ class DataType {
  // @tparam T
  // @return true or false
  template <typename T>
-  bool IsCompatible() const;
+  bool IsCompatible() const {
+    return type_ == FromCType<T>();
+  }

  // returns true if the template type is the same as the Tensor type_
  // @tparam T
@ -146,6 +148,9 @@ class DataType {
    return out;
  }

+  template <typename T>
+  static DataType FromCType();
+
  // Convert from DataType to Pybind type
  // @return
  py::dtype AsNumpyType() const;
@ -191,68 +196,68 @@ class DataType {
 };

 template <>
-inline bool DataType::IsCompatible<bool>() const {
-  return type_ == DataType::DE_BOOL;
+inline DataType DataType::FromCType<bool>() {
+  return DataType(DataType::DE_BOOL);
 }

 template <>
-inline bool DataType::IsCompatible<double>() const {
-  return type_ == DataType::DE_FLOAT64;
+inline DataType DataType::FromCType<double>() {
+  return DataType(DataType::DE_FLOAT64);
 }

 template <>
-inline bool DataType::IsCompatible<float>() const {
-  return type_ == DataType::DE_FLOAT32;
+inline DataType DataType::FromCType<float>() {
+  return DataType(DataType::DE_FLOAT32);
 }

 template <>
-inline bool DataType::IsCompatible<float16>() const {
-  return type_ == DataType::DE_FLOAT16;
+inline DataType DataType::FromCType<float16>() {
+  return DataType(DataType::DE_FLOAT16);
 }

 template <>
-inline bool DataType::IsCompatible<int64_t>() const {
-  return type_ == DataType::DE_INT64;
+inline DataType DataType::FromCType<int64_t>() {
+  return DataType(DataType::DE_INT64);
 }

 template <>
-inline bool DataType::IsCompatible<uint64_t>() const {
-  return type_ == DataType::DE_UINT64;
+inline DataType DataType::FromCType<uint64_t>() {
+  return DataType(DataType::DE_UINT64);
 }

 template <>
-inline bool DataType::IsCompatible<int32_t>() const {
-  return type_ == DataType::DE_INT32;
+inline DataType DataType::FromCType<int32_t>() {
+  return DataType(DataType::DE_INT32);
 }

 template <>
-inline bool DataType::IsCompatible<uint32_t>() const {
-  return type_ == DataType::DE_UINT32;
+inline DataType DataType::FromCType<uint32_t>() {
+  return DataType(DataType::DE_UINT32);
 }

 template <>
-inline bool DataType::IsCompatible<int16_t>() const {
-  return type_ == DataType::DE_INT16;
+inline DataType DataType::FromCType<int16_t>() {
+  return DataType(DataType::DE_INT16);
 }

 template <>
-inline bool DataType::IsCompatible<uint16_t>() const {
-  return type_ == DataType::DE_UINT16;
+inline DataType DataType::FromCType<uint16_t>() {
+  return DataType(DataType::DE_UINT16);
 }

 template <>
-inline bool DataType::IsCompatible<int8_t>() const {
-  return type_ == DataType::DE_INT8;
+inline DataType DataType::FromCType<int8_t>() {
+  return DataType(DataType::DE_INT8);
 }

 template <>
-inline bool DataType::IsCompatible<uint8_t>() const {
-  return type_ == DataType::DE_UINT8;
+inline DataType DataType::FromCType<uint8_t>() {
+  return DataType(DataType::DE_UINT8);
 }

 template <>
-inline bool DataType::IsCompatible<std::string_view>() const {
-  return type_ == DataType::DE_STRING;
+inline DataType DataType::FromCType<std::string_view>() {
+  return DataType(DataType::DE_STRING);
 }

 template <>
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
@ -18,6 +18,7 @@
 #include <algorithm>
 #include <iomanip>
 #include <iostream>
+#include <fstream>
 #include <memory>
 #include <vector>
 #include <utility>
@ -311,6 +312,50 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::Byte
  return Status::OK();
 }

+Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &file_path) {
+  std::ifstream fs;
+  fs.open(file_path, std::ios::binary | std::ios::in);
+  CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + file_path);
+  int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
+  CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file");
+  RETURN_IF_NOT_OK(
+    Tensor::CreateTensor(ptr, TensorImpl::kFlexible, TensorShape{num_bytes}, DataType(DataType::DE_UINT8)));
+  int64_t written_bytes = fs.read(reinterpret_cast<char *>((*ptr)->GetMutableBuffer()), num_bytes).gcount();
+  CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), "Error in writing to tensor");
+  fs.close();
+  return Status::OK();
+}
+
+Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
+                            const TensorShape &shape, const DataType &type, dsize_t pad_size) {
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(ptr, TensorImpl::kFlexible, shape, type));
+
+  unsigned char *current_tensor_addr = (*ptr)->GetMutableBuffer();
+  int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
+
+  for (int i = 0; i < bytes_list.value_size(); i++) {
+    // read string data into tensor
+    const std::string &current_element = bytes_list.value(i);
+    int return_code =
+      memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
+
+    CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when reading bytesList element into Tensor");
+
+    current_tensor_addr += current_element.size();
+    tensor_bytes_remaining -= current_element.size();
+
+    // pad
+    int64_t chars_to_pad = pad_size - current_element.size();
+    return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
+    CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when padding Tensor");
+
+    current_tensor_addr += chars_to_pad;
+    tensor_bytes_remaining -= chars_to_pad;
+  }
+
+  return Status::OK();
+}
+
 // Memcpy the given strided array's used part to consecutive memory
 // Consider a 3-d array
 // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@ -135,9 +135,41 @@ class Tensor {
  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
                             const TensorShape &shape = TensorShape::CreateUnknownRankShape());

+  // create tensor from protobuf bytelist with strings
  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
                             const TensorShape &shape);

+  // A static factory method to create a Tensor from a given list of numbers.
+  // @param ptr output argument to hold the created Tensor
+  // @param items elements of the tensor
+  // @param shape shape of the tensor
+  // @return Status Code
+  template <typename T>
+  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<T> &items,
+                             const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) {
+    DataType type = DataType::FromCType<T>();
+    auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
+    TensorShape shape = shape_req;
+    if (!shape.known()) {
+      shape = TensorShape({static_cast<dsize_t>(items.size())});
+    }
+    return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr);
+  }
+
+  // A static factory method to create a Tensor from a given number.
+  // @param ptr output argument to hold the created Tensor
+  // @param item value
+  // @return Status Code
+  template <typename T>
+  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
+    return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
+  }
+  // Create tensor from protobuf bytelist with uint8 or int8 types
+  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
+                             const TensorShape &shape, const DataType &type, dsize_t pad_size);
+
+  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &path);
+
  // Copy raw data of a array based on shape and strides to the destination pointer
  // @param dst Pointer to the destination array where the content is to be copied
  // @param src Pointer to the source of strided array to be copied
@ -260,11 +292,6 @@ class Tensor {
  // @return const unsigned char*
  const unsigned char *GetBuffer() const;

-  // Get the starting memory address for the data of the tensor.  This potentially
-  // drives an allocation if the data area.
-  // @return unsigned char*
-  unsigned char *GetMutableBuffer();
-
  // Getter of the type
  // @return
  DataType type() const { return type_; }
@ -518,6 +545,7 @@ class Tensor {
  // @return TensorIterator
  template <typename T>
  TensorIterator<T> begin() {
+    AllocateBuffer(SizeInBytes());
    return TensorIterator<T>(data_);
  }

@ -536,6 +564,11 @@ class Tensor {
  Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);

 protected:
+  // Get the starting memory address for the data of the tensor.  This potentially
+  // drives an allocation if the data is null.
+  // @return unsigned char*
+  unsigned char *GetMutableBuffer();
+
  // A function that prints Tensor recursively, first called by print
  // @param out
  // @param cur_dim
--- a/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/device_queue_op.cc
@ -254,7 +254,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
      return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed.");
    }
    (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_);
-    unsigned char *column_data = curr_row[i]->GetMutableBuffer();
+    const unsigned char *column_data = curr_row[i]->GetBuffer();
    if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data,
                 static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) {
      MS_LOG(ERROR) << "memcpy_s failed!";
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/celeba_op.cc
@ -363,19 +363,7 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t>

  Path path(folder_path_);
  Path image_path = path / image_label.first;
-  std::ifstream handle(image_path.toString(), std::ios::binary | std::ios::in);
-  if (handle.fail()) {
-    std::string err_msg = "Fail to open file: " + image_path.toString();
-    return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, err_msg);
-  }
-
-  (void)handle.seekg(0, std::ios::end);
-  int64_t num_elements = handle.tellg();
-  (void)handle.seekg(0, std::ios::beg);
-  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
-                                        TensorShape(std::vector<dsize_t>(1, num_elements)),
-                                        data_schema_->column(0).type()));
-  (void)handle.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, image_path.toString()));
  if (decode_ == true) {
    Status rc = Decode(image, &image);
    if (rc.IsError()) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/cifar_op.cc
@ -195,7 +195,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
  std::shared_ptr<Tensor> fine_label;
  std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
  std::shared_ptr<Tensor> copy_image =
-    std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetMutableBuffer());
+    std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetBuffer());
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
                                        data_schema_->column(1).type(),
                                        reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0])));
@ -366,10 +366,9 @@ Status CifarOp::ParseCifarData() {
      RETURN_IF_NOT_OK(Tensor::CreateTensor(&image_tensor, data_schema_->column(0).tensorImpl(),
                                            TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
                                            data_schema_->column(0).type()));
-      for (int ch = 0; ch < kCifarImageChannel; ++ch) {
-        for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) {
-          (image_tensor->GetMutableBuffer())[pix * kCifarImageChannel + ch] = block[cur_block_index++];
-        }
+      auto itr = image_tensor->begin<uint8_t>();
+      for (; itr != image_tensor->end<uint8_t>(); itr++) {
+        *itr = block[cur_block_index++];
      }
      cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels));
    }
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/coco_op.cc
@ -591,17 +591,8 @@ Status CocoOp::LaunchThreadsAndInitOp() {
 }

 Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
-  std::ifstream fs;
-  fs.open(path, std::ios::binary | std::ios::in);
-  if (fs.fail()) {
-    RETURN_STATUS_UNEXPECTED("Fail to open file: " + path);
-  }
-  int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
-  (void)fs.seekg(0, std::ios::beg);
-  RETURN_IF_NOT_OK(
-    Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type()));
-  (void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements);
-  fs.close();
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path));
+
  if (decode_ == true) {
    Status rc = Decode(*tensor, tensor);
    if (rc.IsError()) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/image_folder_op.cc
@ -204,18 +204,8 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) {
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
                                        data_schema_->column(1).type(),
                                        reinterpret_cast<unsigned char *>(&pairPtr->second)));
-  std::ifstream fs;
-  fs.open(folder_path_ + (pairPtr->first), std::ios::binary | std::ios::in);
-  if (fs.fail()) {
-    RETURN_STATUS_UNEXPECTED("Fail to open file: " + pairPtr->first);
-  }
-  int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
-  (void)fs.seekg(0, std::ios::beg);
-  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
-                                        TensorShape(std::vector<dsize_t>(1, num_elements)),
-                                        data_schema_->column(0).type(), nullptr));
-  (void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
-  fs.close();
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, folder_path_ + (pairPtr->first)));
+
  if (decode_ == true) {
    Status rc = Decode(image, &image);
    if (rc.IsError()) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/manifest_op.cc
@ -198,23 +198,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s
      data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&label_index[0])));
  }

-  std::ifstream fs;
-  fs.open(data.first, std::ios::binary | std::ios::in);
-  if (!fs.is_open()) {
-    RETURN_STATUS_UNEXPECTED("Fail to open file: " + data.first);
-  }
-
-  int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
-  (void)fs.seekg(0, std::ios::beg);
-  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
-                                        TensorShape(std::vector<dsize_t>(1, num_elements)),
-                                        data_schema_->column(0).type(), nullptr));
-  (void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
-  if (fs.fail()) {
-    fs.close();
-    RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first);
-  }
-  fs.close();
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data.first));
  if (decode_ == true) {
    Status rc = Decode(image, &image);
    if (rc.IsError()) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/mnist_op.cc
@ -167,7 +167,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow)
  int32_t l = mnist_pair.second;
  // make a copy of cached tensor
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(),
-                                        mnist_pair.first->type(), mnist_pair.first->GetMutableBuffer()));
+                                        mnist_pair.first->type(), mnist_pair.first->GetBuffer()));
  RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
                                        data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l)));
  (*trow) = {std::move(image), std::move(label)};
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/distributed_sampler.cc
@ -68,8 +68,8 @@ Status DistributedSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer
    (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone);
    std::shared_ptr<Tensor> sample_ids;
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_));
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer());
-    while (cnt_ < samples_per_buffer_) {
+    auto id_ptr = sample_ids->begin<int64_t>();
+    while (cnt_ < samples_per_buffer_ && id_ptr != sample_ids->end<int64_t>()) {
      int64_t sampled_id = (num_devices_ * cnt_ + device_id_) % num_rows_;
      if (shuffle_) {
        sampled_id = shuffle_vec_[static_cast<size_t>(sampled_id)];
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/pk_sampler.cc
@ -73,8 +73,8 @@ Status PKSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
    std::shared_ptr<Tensor> sample_ids;
    int64_t last_id = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_;
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_));
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer());
-    while (next_id_ < last_id) {
+    auto id_ptr = sample_ids->begin<int64_t>();
+    while (next_id_ < last_id && id_ptr != sample_ids->end<int64_t>()) {
      int64_t cls_id = next_id_++ / samples_per_class_;
      const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]];
      int64_t rnd_ind = std::uniform_int_distribution<int64_t>(0, samples.size() - 1)(rnd_);
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/random_sampler.cc
@ -45,7 +45,7 @@ Status RandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
    std::shared_ptr<Tensor> sampleIds;
    int64_t last_id = std::min(samples_per_buffer_ + next_id_, num_samples_);
    RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_));
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer());
+    auto id_ptr = sampleIds->begin<int64_t>();

    for (int64_t i = 0; i < (last_id - next_id_); i++) {
      int64_t sampled_id = 0;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/sequential_sampler.cc
@ -42,7 +42,7 @@ Status SequentialSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer)
    int64_t num_elements = std::min(remaining_ids, samples_per_buffer_);

    RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, num_elements));
-    int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer());
+    auto idPtr = sampleIds->begin<int64_t>();
    for (int64_t i = 0; i < num_elements; i++) {
      int64_t sampled_id = current_id_;
      if (HasChildSampler()) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/subset_random_sampler.cc
@ -94,7 +94,7 @@ Status SubsetRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffe
    RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));

    // Initialize tensor
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer());
+    auto id_ptr = outputIds->begin<int64_t>();
    while (sample_id_ < last_id) {
      if (indices_[sample_id_] >= num_rows_) {
        std::string err_msg =
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/weighted_random_sampler.cc
@ -125,7 +125,7 @@ Status WeightedRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buf
    RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));

    // Initialize tensor.
-    int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer());
+    auto id_ptr = outputIds->begin<int64_t>();
    // Assign the data to tensor element.
    while (sample_id_ < last_id) {
      int64_t genId;
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.cc
@ -771,53 +771,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
  // know how many elements there are and the total bytes, create tensor here:
  TensorShape current_shape = TensorShape::CreateScalar();
  RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape));
-  RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type()));
-
-  // Tensors are lazily allocated, this eagerly allocates memory for the tensor.
-  unsigned char *current_tensor_addr = (*tensor)->GetMutableBuffer();
-  int64_t tensor_bytes_remaining = (*num_elements) * pad_size;
-
-  if (current_tensor_addr == nullptr) {
-    std::string err_msg = "tensor memory allocation failed";
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
-
-  RETURN_IF_NOT_OK(LoadAndPadBytes(current_tensor_addr, bytes_list, tensor_bytes_remaining, pad_size));
-
-  return Status::OK();
-}
-
-Status TFReaderOp::LoadAndPadBytes(unsigned char *current_tensor_addr, const dataengine::BytesList &bytes_list,
-                                   int64_t tensor_bytes_remaining, int64_t pad_size) {
-  if (current_tensor_addr == nullptr) {
-    std::string err_msg = "current_tensor_addr is null";
-    RETURN_STATUS_UNEXPECTED(err_msg);
-  }
-
-  for (int i = 0; i < bytes_list.value_size(); i++) {
-    // read string data into tensor
-    const std::string &current_element = bytes_list.value(i);
-    int return_code =
-      memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
-    if (return_code != 0) {
-      std::string err_msg = "memcpy_s failed when reading bytesList element into Tensor";
-      RETURN_STATUS_UNEXPECTED(err_msg);
-    }
-
-    current_tensor_addr += current_element.size();
-    tensor_bytes_remaining -= current_element.size();
-
-    // pad
-    int64_t chars_to_pad = pad_size - current_element.size();
-    return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
-    if (return_code != 0) {
-      std::string err_msg = "memset_s failed when padding bytesList in Tensor";
-      RETURN_STATUS_UNEXPECTED(err_msg);
-    }
-
-    current_tensor_addr += chars_to_pad;
-    tensor_bytes_remaining -= chars_to_pad;
-  }
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, current_shape, current_col.type(), pad_size));

  return Status::OK();
 }
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/tf_reader_op.h
@ -296,17 +296,8 @@ class TFReaderOp : public ParallelOp {
  // @param column_values_list - the cell that contains the bytes list to read from.
  // @param elementStr - the string we read the value into.
  // @return Status - the error code returned.
-  Status LoadBytesList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
-                       int32_t *num_elements, std::shared_ptr<Tensor> *tensor);
-
-  // Loads all the strings in bytes_list into the memory at current_tensor_addr.
-  // @param current_tensor_addr - the memory address to load the strings to.
-  // @param bytes_list - the list of strings to load.
-  // @param tensor_bytes_remaining - the number of bytes available for this function to use.
-  // @param pad_size - number of bytes to pad to.
-  // @return Status - the error code returned.
-  Status LoadAndPadBytes(unsigned char *current_tensor_addr, const dataengine::BytesList &bytes_list,
-                         int64_t tensor_bytes_remaining, int64_t pad_size);
+  static Status LoadBytesList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
+                              int32_t *num_elements, std::shared_ptr<Tensor> *tensor);

  // Reads values from a float list
  // @param current_col - the column descriptor containing the expected shape and type of the data.
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/voc_op.cc
@ -368,17 +368,7 @@ Status VOCOp::LaunchThreadsAndInitOp() {
 }

 Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
-  std::ifstream fs;
-  fs.open(path, std::ios::binary | std::ios::in);
-  if (fs.fail()) {
-    RETURN_STATUS_UNEXPECTED("Fail to open file: " + path);
-  }
-  int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
-  (void)fs.seekg(0, std::ios::beg);
-  RETURN_IF_NOT_OK(
-    Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type()));
-  (void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements);
-  fs.close();
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path));
  if (decode_ == true) {
    Status rc = Decode(*tensor, tensor);
    if (rc.IsError()) {
--- a/mindspore/ccsrc/dataset/engine/gnn/graph.cc
+++ b/mindspore/ccsrc/dataset/engine/gnn/graph.cc
@ -59,7 +59,7 @@ Status Graph::CreateTensorByVector(const std::vector<std::vector<T>> &data, Data
  size_t n = data[0].size();
  RETURN_IF_NOT_OK(Tensor::CreateTensor(
    &tensor, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, nullptr));
-  T *ptr = reinterpret_cast<T *>(tensor->GetMutableBuffer());
+  auto ptr = tensor->begin<T>();
  for (const auto &id_m : data) {
    CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size");
    for (const auto &id_n : id_m) {
--- a/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
+++ b/mindspore/ccsrc/dataset/engine/tdt/tdt_plugin.cc
@ -119,7 +119,7 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i
    data_item.tensorShape_ = dataShapes;
    data_item.tensorType_ = datatype;
    data_item.dataLen_ = ts->SizeInBytes();
-    data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<void *>(ts->GetMutableBuffer()), [](void *elem) {});
+    data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](void *elem) {});
    items.emplace_back(data_item);
    MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is "
                 << ts->Size() << ".";
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.h
+++ b/mindspore/ccsrc/dataset/kernels/data/data_utils.h
@ -101,11 +101,11 @@ Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor>
                    const std::vector<dsize_t> &pad_shape, const std::string &pad_val);

 // recursive helper function for padding string tensors. This function could be very expensive if called on a
-// multi-dimensional tensor it is only meant to be called by PadEndNumeric.
+// multi-dimensional tensor it is only meant to be called by PadEndString.
 // @tparam T - type of tensor and fill value
 // @param std::shared_ptr<Tensor> src - Tensor to pad from
 // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
-// @param std::vector<dsize_t> cur_ind - recursion helper
+// @param std::vector<dsize_t> cur_ind - recursion helperas text
 // @param std::string pad_val - value to pad tensor with
 // @param size_t cur_dim - recursion helper
 // @return Status - The error code return
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
@ -119,17 +119,14 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
  }
 }

-bool HasJpegMagic(const unsigned char *data, size_t data_size) {
+bool HasJpegMagic(const std::shared_ptr<Tensor> &input) {
  const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
  constexpr size_t kJpegMagicLen = 3;
-  return data_size >= kJpegMagicLen && memcmp(data, kJpegMagic, kJpegMagicLen) == 0;
+  return input->SizeInBytes() >= kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
 }

 Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
-  if (input->GetMutableBuffer() == nullptr) {
-    RETURN_STATUS_UNEXPECTED("Tensor is nullptr");
-  }
-  if (HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) {
+  if (HasJpegMagic(input)) {
    return JpegCropAndDecode(input, output);
  } else {
    return DecodeCv(input, output);
@ -283,7 +280,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
  jerr.pub.error_exit = JpegErrorExitCustom;
  try {
    jpeg_create_decompress(&cinfo);
-    JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
+    JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
    (void)jpeg_read_header(&cinfo, TRUE);
    RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
    jpeg_calc_output_dimensions(&cinfo);
@ -312,7 +309,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
  TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
  auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
  const int buffer_size = output_tensor->SizeInBytes();
-  JSAMPLE *buffer = static_cast<JSAMPLE *>(output_tensor->GetMutableBuffer());
+  JSAMPLE *buffer = static_cast<JSAMPLE *>(reinterpret_cast<uchar *>(&(*output_tensor->begin<uint8_t>())));
  const int max_scanlines_to_read = skipped_scanlines + crop_h;
  // stride refers to output tensor, which has 3 components at most
  const int stride = crop_w * kOutNumComponents;
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.h
@ -96,7 +96,7 @@ Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out

 Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);

-bool HasJpegMagic(const unsigned char *data, size_t data_size);
+bool HasJpegMagic(const std::shared_ptr<Tensor> &input);

 void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);

--- a/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/random_crop_decode_resize_op.cc
@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
  if (input == nullptr) {
    RETURN_STATUS_UNEXPECTED("input tensor is null");
  }
-  if (!HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) {
+  if (!HasJpegMagic(input)) {
    DecodeOp op(true);
    std::shared_ptr<Tensor> decoded;
    RETURN_IF_NOT_OK(op.Compute(input, &decoded));
@ -43,7 +43,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
    jerr.pub.error_exit = JpegErrorExitCustom;
    try {
      jpeg_create_decompress(&cinfo);
-      JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
+      JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
      (void)jpeg_read_header(&cinfo, TRUE);
      jpeg_calc_output_dimensions(&cinfo);
    } catch (std::runtime_error &e) {
--- a/Show More
+++ b/Show More