diff --git a/mindspore/ccsrc/minddata/dataset/api/execute.cc b/mindspore/ccsrc/minddata/dataset/api/execute.cc index c4fb4c8c67..fc6861f08e 100644 --- a/mindspore/ccsrc/minddata/dataset/api/execute.cc +++ b/mindspore/ccsrc/minddata/dataset/api/execute.cc @@ -21,7 +21,7 @@ #include "minddata/dataset/core/device_resource.h" #include "minddata/dataset/core/device_tensor.h" #include "minddata/dataset/core/tensor_row.h" -#include "minddata/dataset/include/tensor.h" +#include "minddata/dataset/core/tensor.h" #include "minddata/dataset/include/type_id.h" #include "minddata/dataset/kernels/ir/tensor_operation.h" #include "minddata/dataset/kernels/tensor_op.h" diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc index 39638350ce..045c2ae396 100644 --- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc +++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc @@ -35,9 +35,10 @@ struct Compose::Data { }; Compose::Compose(const std::vector &transforms) : data_(std::make_shared()) { - (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), - [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), + [](TensorTransform *const op) -> std::shared_ptr { + return op != nullptr ? op->Parse() : nullptr; + }); } Compose::Compose(const std::vector> &transforms) : data_(std::make_shared()) { @@ -78,9 +79,10 @@ struct RandomApply::Data { RandomApply::RandomApply(const std::vector &transforms, double prob) : data_(std::make_shared()) { - (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), - [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), + [](TensorTransform *const op) -> std::shared_ptr { + return op != nullptr ? op->Parse() : nullptr; + }); data_->prob_ = prob; } @@ -110,9 +112,10 @@ struct RandomChoice::Data { }; RandomChoice::RandomChoice(const std::vector &transforms) : data_(std::make_shared()) { - (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), - [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), + [](TensorTransform *const op) -> std::shared_ptr { + return op != nullptr ? op->Parse() : nullptr; + }); } RandomChoice::RandomChoice(const std::vector> &transforms) diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index 7c1d6b82a7..093d6726fd 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -813,7 +813,7 @@ UniformAugment::UniformAugment(const std::vector &transforms, : data_(std::make_shared()) { (void)std::transform( transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), - [](TensorTransform *op) -> std::shared_ptr { return op ? op->Parse() : nullptr; }); + [](TensorTransform *const op) -> std::shared_ptr { return op ? op->Parse() : nullptr; }); data_->num_ops_ = num_ops; } diff --git a/mindspore/ccsrc/minddata/dataset/core/de_tensor.h b/mindspore/ccsrc/minddata/dataset/core/de_tensor.h index 65b733404a..ea22585e3c 100644 --- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.h @@ -22,7 +22,7 @@ #include "include/api/status.h" #include "include/api/types.h" #include "mindspore/core/ir/api_tensor_impl.h" -#include "minddata/dataset/include/tensor.h" +#include "minddata/dataset/core/tensor.h" namespace mindspore { namespace dataset { diff --git a/mindspore/ccsrc/minddata/dataset/include/tensor.h b/mindspore/ccsrc/minddata/dataset/include/tensor.h deleted file mode 100644 index b4f517330e..0000000000 --- a/mindspore/ccsrc/minddata/dataset/include/tensor.h +++ /dev/null @@ -1,806 +0,0 @@ -/** - * Copyright 2019-2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ -#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ - -#include -#include -#include -#include -#include "./securec.h" -#ifndef ENABLE_ANDROID -#include "utils/log_adapter.h" -#else -#include "mindspore/lite/src/common/log_adapter.h" -#endif -#if defined(_WIN32) || defined(_WIN64) -#undef HAVE_STDDEF_H -#undef HAVE_STDLIB_H -#endif - -#ifdef ENABLE_PYTHON -#include "pybind11/numpy.h" -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" -#endif - -#include "utils/ms_utils.h" -#include "include/api/status.h" -#include "minddata/dataset/core/constants.h" -#include "minddata/dataset/core/data_type.h" -#include "minddata/dataset/core/tensor_helpers.h" -#include "minddata/dataset/core/tensor_shape.h" -#include "minddata/dataset/core/de_tensor.h" -#ifndef ENABLE_ANDROID -#include "proto/example.pb.h" -#endif - -#ifdef ENABLE_PYTHON -namespace py = pybind11; -#endif -namespace mindspore { -namespace dataset { -class Tensor; -template -class Allocator; - -using CharAllocPtr = std::unique_ptr>; -using TensorAllocPtr = std::shared_ptr>; // An allocator shared_ptr for Tensors -using offset_t = uint32_t; // type of offset values to store strings locations -using TensorPtr = std::shared_ptr; - -class Tensor { - public: - Tensor() = delete; - Tensor(const Tensor &other) = delete; - Tensor &operator=(const Tensor &other) = delete; - - /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead - /// \note The shape and type information should be known and valid - /// \note The constructor does not allocate data - /// \param shape TensorShape - /// \param type DataType - Tensor(const TensorShape &shape, const DataType &type); - - /// Move constructor - /// \param other Tensor to be moved - Tensor(Tensor &&other) noexcept; - - /// Move assignment operator - /// \param other Tensor to be moved - Tensor &operator=(Tensor &&other) noexcept; - - /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized. - /// \param[in] shape shape of the output tensor - /// \param[in] type type of the output tensor - /// \param[out] out Generated tensor - /// \return Status code - static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out); - - /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type. - /// Data will be copied into the new created tensor. - /// \param[in] shape shape of the output tensor - /// \param[in] type type of the output tensor - /// \param[in] src pointer to the source data - /// \param[out] out Generated tensor - /// \return Status code - static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out); - - /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor. - /// \param[in] shape shape of the output tensor - /// \param[in] type type of the output tensor - /// \param[in] src pointer to the source data - /// \param[in] length length of the src data - /// \param[out] out Generated tensor - /// \return Status code - static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, - const dsize_t &length, TensorPtr *out); - - /// Create a copy of the input tensor - /// \param[in] in original tensor to be copied - /// \param[out] out output tensor to be generated - /// \return Status - static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) { - return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out); - } - -#ifdef ENABLE_PYTHON - /// Create a Tensor from a given py::array - /// \param[in] arr py::array - /// \param[out] out Created tensor - /// \return Status Code - static Status CreateFromNpArray(const py::array &arr, TensorPtr *out); -#endif - -#ifndef ENABLE_ANDROID - /// Create a tensor of type DE_STRING from a BytesList. - /// \param[in] bytes_list protobuf's Bytelist - /// \param[in] shape shape of the output tensor - /// \param[out] out created Tensor - /// \return Status Code - static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out); - - /// Create a tensor of type UINT8 or INT8 from a BytesList. - /// The tensor will be padded with ' ' to reach the required pad_size. - /// \param[in] bytes_list protobuf's Bytelist - /// \param[in] shape shape of the output tensor - /// \param[in] type type of created tensor. Should be DE_UINT8 or INT8 - /// \param[in] pad_size The size of the tensor after padding - /// \param[out] out created Tensor - /// \return Status Code - static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, - const DataType &type, dsize_t pad_size, TensorPtr *out); -#endif - - /// Create a Tensor from a given list of values. - /// \tparam type of the values to be inserted. - /// \param[in] items elements of the tensor - /// \param[in] shape shape of the output tensor - /// \param[out] out output argument to hold the created Tensor - /// \return Status Code - template - static Status CreateFromVector(const std::vector &items, const TensorShape &shape, TensorPtr *out) { - CHECK_FAIL_RETURN_UNEXPECTED( - items.size() == shape.NumOfElements(), - "Number of elements in the vector does not match the number of elements of the shape required"); - DataType type = DataType::FromCType(); - // if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case. - auto items_ptr = reinterpret_cast(&items[0]); - return CreateFromMemory(shape, type, items_ptr, out); - } - - /// Create a 1D Tensor from a given list of values. - /// \tparam type of the values to be inserted. - /// \param[in] items elements of the tensor - /// \param[out] out output argument to hold the created Tensor - /// \return Status Code - template - static Status CreateFromVector(const std::vector &items, TensorPtr *out) { - return CreateFromVector(items, TensorShape({static_cast(items.size())}), out); - } - - /// Create a 1D boolean Tensor from a given list of boolean values. - /// \param[in] items elements of the tensor - /// \param[in] shape shape of the output tensor - /// \param[out] out output argument to hold the created Tensor - /// \return Status Code - static Status CreateFromVector(const std::vector &items, const TensorShape &shape, TensorPtr *out) { - std::vector temp(items.begin(), items.end()); - RETURN_IF_NOT_OK(CreateFromVector(temp, shape, out)); - (*out)->type_ = DataType(DataType::DE_BOOL); - return Status::OK(); - } - - /// Create a numeric scalar Tensor from the given value. - /// \tparam T type of value - /// \param[in] item value - /// \param[out] out Created tensor - /// \return Status code - template - static Status CreateScalar(const T &item, TensorPtr *out) { - DataType type = DataType::FromCType(); - auto item_ptr = reinterpret_cast(&item); - return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out); - } - - /// Create a tensor from a binary file on disk. - /// \param[in] path file to be read - /// \param[out] out Created Tensor - /// \return Status code - static Status CreateFromFile(const std::string &path, TensorPtr *out); - - /// Destruct the tensor and release the memory using the allocator - virtual ~Tensor(); - - /// Equality operator. compares tensor shape, type and data - /// \param[in] rhs Tensor to be compared with - /// \return bool - bool operator==(const Tensor &rhs) const; - - bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); } - - /// Get item located at `index`, caller needs to provide the type. - /// \tparam T - /// \param[in] index vector - /// \return return the item specified at index - template - Status GetItemAt(T *o, const std::vector &index) const; - - /// Get string located at `index`. - /// \param[in] index vector - /// \return return std::string_view specified at index - Status GetItemAt(std::string_view *o, const std::vector &index) const; - - template - Status GetUnsignedIntAt(T *o, const std::vector &index) const; - - template - Status GetSignedIntAt(T *o, const std::vector &index) const; - - template - Status GetFloatAt(T *o, const std::vector &index) const; - - /// set item at location specified by index - /// \tparam `T` - /// \param[in] index - /// \param[in] value of type `T` - template - Status SetItemAt(const std::vector &index, const T &value) { - T *ptr = nullptr; - RETURN_IF_NOT_OK(GetItemPtr(&ptr, index)); - *ptr = value; - return Status::OK(); - } - - /// set string item at location specified by index - /// \param[in] index - /// \param[in] value of type std::string - Status SetItemAt(const std::vector &index, const std::string &value) { - RETURN_UNEXPECTED_IF_NULL(data_); - uchar *ptr = nullptr; - offset_t length = 0; - RETURN_IF_NOT_OK(GetItemPtr(&ptr, index, &length)); - if (value.length() != length) { - RETURN_STATUS_UNEXPECTED("Length of the new string does not match the item."); - } - memcpy_s(reinterpret_cast(ptr), length, value.c_str(), length); - - return Status::OK(); - } - - /// fill tensor with Zeros. Does not support strings. - Status Zero() { - CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings.."); - dsize_t size = SizeInBytes(); - CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(GetMutableBuffer(), size, 0, size) == 0, - "Failed to fill tensor with zeroes."); - return Status::OK(); - } - - /// Fill all elements in the Tensor with the given value of type `T`. Does not support strings. - /// \tparam T - /// \param value[in] - template - Status Fill(const T &value) { - CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings."); - int64_t cellSize = type_.SizeInBytes(); - if ((data_ != nullptr) && type_.IsCompatible()) { - for (dsize_t i = 0; i < Size(); i++) { - CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s((data_ + i * cellSize), cellSize, &value, cellSize) == 0, "memcpy err"); - } - return Status::OK(); - } else { - std::string err; - err += (data_ == nullptr) ? "data_ is nullptr \t" : ""; - err += type_.IsCompatible() ? "data type not compatible\t" : ""; - return Status(StatusCode::kMDUnexpectedError, err); - } - } - - /// Getter function for shape - /// \return - const TensorShape &shape() const { return shape_; } - - /// Check if tensor has data - /// \return bool - true if tensor is not empty - bool HasData() const { return data_ != nullptr; } - - /// Reshape the tensor. The given shape should have the same number of elements in the Tensor - /// \param shape - virtual Status Reshape(const TensorShape &shape); - - /// \return number of elements in this tensor - dsize_t Size() const { return shape().NumOfElements(); } - - /// \return the number of bytes this tensor is needs - dsize_t SizeInBytes() const { - if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements(); - return data_end_ - data_; - } - - /// \return the rank of the tensor - dsize_t Rank() const { return shape().Rank(); } - - /// Get the starting memory address as a constant for the data of the tensor. This potentially - /// drives an allocation if the data area. - /// \return const unsigned char* - const unsigned char *GetBuffer() const { return data_; } - - /// Getter of the type - /// \return - DataType type() const { return type_; } - - /// Provide stream operator for displaying it - /// \param output stream - /// \param so the Tensor object to be printed - /// \return output stream - friend std::ostream &operator<<(std::ostream &out, const Tensor &so) { - so.Print(out); - return out; - } - - /// Invalidate this Tensor by setting the type and shape to unknown and MData to null. - /// Calling this method will make the Tensor and its data inaccessible, use it with caution. - void Invalidate(); - - /// Copy input tensor into self at the location index. - /// Index is a vector of axes which can be incomplete: - /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. - /// \param index - /// \param input - /// \param partial_insert: boolean to determine if insertion along the full axis is enforced - /// \return Status code - Status InsertTensor(const std::vector &index, const std::shared_ptr &input, - const bool partial_insert = false); - - /// Find the address of the given index. Used in InsertTensor. - /// Example: - /// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1 - /// \param index incomplete index - /// \param output: startAddrofIndex - /// \param output: remaining - /// \return Status code - Status StartAddrOfIndex(std::vector ind, uchar **start_addr_of_index, TensorShape *remaining); - - /// Expand the shape of the Tensor with one extra dimension. - /// For example, if the shape is <512,512,3>: - /// *- ExpandDim(0) gives: <1,512,512,3> - /// *- ExpandDim(1) gives: <512,1,512,3> - /// *- ExpandDim(3) gives: <512,512,3,1> - /// \param axis location of the dim - virtual Status ExpandDim(const dsize_t &axis); - - virtual void Squeeze(); - - /// Calculates the strides of the Tensor - /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) - /// The strides will be {6,2,1}. - /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) - /// The strides will be {24,8,4}. - /// \return vector of integers - std::vector Strides() const; - - std::string ToString() { - std::stringstream ss; - this->Print(ss); - return ss.str(); - } - - /// Handle negative indices. - /// \param[out] out modified index - /// \param[in] index - /// \param[in] length axis length used to modify index - /// \return dsize_t modified index - static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } - - /// Handle negative indices for a vector of indices. - /// \param[out] out modified vector of indices - /// \param[in] index_vector vector of indices - /// \return std::vector modified vector of indices - static inline std::vector HandleNegIndices(std::vector index_vector, std::vector length) { - std::vector indices(index_vector.size(), 0); - for (int i = 0; i < index_vector.size(); i++) { - indices[i] = HandleNeg(index_vector[i], length[i]); - } - return indices; - } - - /// Slice tensor bases on the given indices. Copy the sliced data into out tensor. - /// Based on the type of tensor, SliceNumeric or SliceString will be called - /// \param[out] out Tensor - /// \param[in] slice_options vector of SliceOption objects - /// \return Status error code - Status Slice(TensorPtr *out, const std::vector slice_options); - - /// Get slice_option according to shape and index. - /// \param[in] slice_option input SliceOption object - /// \param[in] slice_index index of SliceOption object - /// \param[out] output slice_option with shape info - /// \return Status error code - Status GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index, SliceOption *slice_option_ptr); - -#ifdef ENABLE_PYTHON - /// Constructs numpy array from input tensor - /// \param[in] data this data is the location of python data - /// \return Status code - Status GetDataAsNumpy(py::array *data); - - Status GetDataAsNumpyStrings(py::array *data); - - static Status GetBufferInfo(Tensor *t, py::buffer_info *out); -#endif - - /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor - /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 - /// \tparam T type of values in the Tensor Iterator - template - class TensorIterator { - public: - using iterator_category = std::random_access_iterator_tag; - using value_type = T; - using difference_type = ptrdiff_t; - using pointer = T *; - using reference = T &; - - explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast(ptr); } - - TensorIterator(const TensorIterator &raw_iterator) { ptr_ = raw_iterator.ptr_; } - - ~TensorIterator() = default; - - TensorIterator &operator=(const TensorIterator &rhs) { - ptr_ = rhs.ptr_; - return *this; - } - - TensorIterator &operator=(T *rhs) { - ptr_ = rhs; - return *this; - } - - bool operator==(const TensorIterator &rhs) { return ptr_ == rhs.ptr_; } - - bool operator!=(const TensorIterator &rhs) { return !(*this == rhs); } - - operator bool() const { return ptr_ != nullptr; } - - T &operator*() { return *ptr_; } - - const T &operator*() const { return *ptr_; } - - T *operator->() { return ptr_; } - - TensorIterator &operator+=(const ptrdiff_t &inc) { - ptr_ += inc; - return *this; - } - - TensorIterator &operator-=(const ptrdiff_t &inc) { - ptr_ -= inc; - return *this; - } - - TensorIterator &operator++() { - ++ptr_; - return *this; - } - - TensorIterator &operator--() { - --ptr_; - return *this; - } - - TensorIterator operator++(int) { - auto temp(*this); - ++ptr_; - return temp; - } - - TensorIterator operator--(int) { - auto temp(*this); - --ptr_; - return temp; - } - - TensorIterator operator+(const ptrdiff_t &inc) { - auto oldPtr = ptr_; - ptr_ += inc; - auto temp(*this); - ptr_ = oldPtr; - return temp; - } - - TensorIterator operator-(const ptrdiff_t &inc) { - auto oldPtr = ptr_; - ptr_ -= inc; - auto temp(*this); - ptr_ = oldPtr; - return temp; - } - - protected: - T *ptr_; - }; - - // Specialization of TensorIterator for strings. It returns std::string_view for every item. - // \tparam DUMMY, used to mbe able to specialize the inner class - template - class TensorIterator { - public: - using iterator_category = std::random_access_iterator_tag; - using value_type = std::string_view; - using difference_type = ptrdiff_t; - using pointer = std::string_view *; - using reference = std::string_view &; - - explicit TensorIterator(uchar *data = nullptr, dsize_t index = 0) { - data_ = reinterpret_cast(data); - index_ = index; - } - - TensorIterator(const TensorIterator &raw_iterator) { - data_ = raw_iterator.data_; - index_ = raw_iterator.index_; - } - - ~TensorIterator() = default; - - bool operator==(const TensorIterator &rhs) { return data_ == rhs.data_ && index_ == rhs.index_; } - - bool operator!=(const TensorIterator &rhs) { return !(*this == rhs); } - - operator bool() const { return data_ != nullptr; } - - std::string_view operator*() const { - auto offset_ = reinterpret_cast(data_); - offset_t start = offset_[index_]; - return std::string_view{data_ + start}; - } - - TensorIterator &operator+=(const dsize_t &inc) { - index_ += inc; - return *this; - } - - TensorIterator &operator-=(const dsize_t &inc) { - index_ -= inc; - return *this; - } - - TensorIterator &operator++() { - ++index_; - return *this; - } - - TensorIterator &operator--() { - --index_; - return *this; - } - - TensorIterator operator++(int) { - auto temp(*this); - ++index_; - return temp; - } - - TensorIterator operator--(int) { - auto temp(*this); - --index_; - return temp; - } - - TensorIterator operator+(const dsize_t &inc) { - auto oldPtr = index_; - index_ += inc; - auto temp(*this); - index_ = oldPtr; - return temp; - } - - TensorIterator operator-(const dsize_t &inc) { - auto oldPtr = index_; - index_ -= inc; - auto temp(*this); - index_ = oldPtr; - return temp; - } - - protected: - dsize_t index_; - const char *data_; - }; - - /// Return a TensorIterator that points to the start of the Tensor. - /// It's the user responsibility to use the correct type that matches the Tensor type - /// \tparam T The type of values in the Tensor - /// \return TensorIterator - template - TensorIterator begin() { - return TensorIterator(data_); - } - - /// Return a linear iterator that points to the place after the last element of the Tensor. - /// \tparam T The type of values in the Tensor - /// \return TensorIterator - template - TensorIterator end() { - return TensorIterator(data_end_); - } - - /// Copies the last dimension at `index` from Tensor `src` to this Tensor. - /// \param[in] src Tensor - /// \param[in] index vector to the start of the dimension. The last dim should be 0 - /// \return Status - Status CopyLastDimAt(const std::shared_ptr &src, const std::vector &index); - - protected: - /// Allocate memory for the tensor using the data_allocator - /// \param[in] length number of bytes to be allocated - /// \return Error Status - Status AllocateBuffer(const dsize_t &length); - - /// Get the starting memory address for the data of the tensor. This potentially - /// drives an allocation if the data is null. - /// \return unsigned char* - unsigned char *GetMutableBuffer() { return data_; } - - /// A function that prints Tensor recursively, first called by print - /// \param[in] out - /// \param[in] cur_dim - /// \param[in] cur_index - void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector &cur_index) const; - - /// A function that prints info about the tensor - /// \param[out] out output stream - void Print(std::ostream &out) const; - - /// A function that print the value as specified by its index - /// \param[in] index vector representing the index - /// \param[out] out - void PrintItemAt(const std::vector &index, std::ostream &out) const; - - /// Get pointer to item located at `index`, caller needs to provide the type. - /// \tparam T - /// \param[in] index vector - /// \return return a pointer to the item specified at index of type `T` - template - Status GetItemPtr(T **, const std::vector &index) const; - - /// Get pointer to string located at `index` and the length of string - /// \param[in] index vector - /// \return return a pointer to the string specified at index and the length of the string - Status GetItemPtr(uchar **, const std::vector &index, offset_t *length = nullptr) const; - - /// Given a flat index of an item string, return the start and length of the item - /// \param[in] index flat index of the item - /// \param[out] start address of the ths string - /// \param[out] length of the string - Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const; - - /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if - /// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string - /// of the tensor. - uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; } - - /// all access to shape_ should be via shape - TensorShape shape_; - /// data type of tensor - DataType type_; - /// pointer to the start of the physical data - unsigned char *data_; - /// An allocator for data_ - CharAllocPtr data_allocator_; - /// pointer to the end of the physical data - unsigned char *data_end_ = nullptr; - - /// shape for interpretation of YUV image - std::vector yuv_shape_; - - private: - friend class DETensor; - - /// Slice numeric tensors. - Status SliceNumeric(TensorPtr *out, const std::vector> &indices, const TensorShape &shape); - - /// Slice string tensors - Status SliceString(TensorPtr *out, const std::vector> &indices, const TensorShape &shape); - - /// Copy raw data of a array based on shape and strides to the destination pointer - /// \param dst [out] Pointer to the destination array where the content is to be copied - /// \param[in] src Pointer to the source of strided array to be copied - /// \param[in] shape shape of the source array - /// \param[in] strides strides of the source array - /// \param[in] type_size number of bytes needed to store one array element's type - /// \return Status Code - static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector shape, - std::vector strides, uint8_t type_size); - - /// const of the size of the offset variable - static constexpr uint8_t kOffsetSize = sizeof(offset_t); - -#ifdef ENABLE_PYTHON - /// Helper function to create a tensor from Numpy array of strings - /// \param[in] arr Numpy array - /// \param[out] out Created Tensor - /// \return Status - static Status CreateFromNpString(py::array arr, TensorPtr *out); -#endif -}; -template <> -inline Tensor::TensorIterator Tensor::end() { - return TensorIterator(data_, shape_.NumOfElements()); -} - -/// Create a Tensor from a given list of strings. -/// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings. -/// The offset array will store one extra value to find the length of the last string. -/// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n -/// The value of each offset is the start index of the corresponding string -/// Offsets is of type offset_t -/// strings will ne null-terminated -/// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING) -/// |----------------------------------------------------------------| -/// | OFFSET ARRAY | STRINGS | -/// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 | -/// | 11 | 15 | 18 | abc\0 | de\0 | -/// |----------------------------------------------------------------| -/// \param[in] items elements of the tensor -/// \param[in] shape shape of the output tensor -/// \param[out] out output argument to hold the created Tensor -/// \return Status Code -template <> -inline Status Tensor::CreateFromVector(const std::vector &items, const TensorShape &shape, - TensorPtr *out) { - CHECK_FAIL_RETURN_UNEXPECTED( - items.size() == shape.NumOfElements(), - "Number of elements in the vector does not match the number of elements of the shape required"); - const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); - *out = std::allocate_shared(*alloc, TensorShape({static_cast(items.size())}), - DataType(DataType::DE_STRING)); - if (items.size() == 0) { - if (shape.known()) { - return (*out)->Reshape(shape); - } - } - auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; }; - dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum); - - // total bytes needed = offset array + strings - // offset array needs to store one offset var per element + 1 extra to get the length of the last string. - // strings will be null-terminated --> need 1 extra byte per element - dsize_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length; - - (*out)->AllocateBuffer(num_bytes); - auto offset_arr = reinterpret_cast((*out)->data_); - uchar *buf = (*out)->GetStringsBuffer(); - - offset_t offset = buf - (*out)->data_; // the first string will start here - uint32_t i = 0; - for (const auto &str : items) { - // insert the start index of the string. - offset_arr[i++] = offset; - // total bytes are reduced by kOffsetSize - num_bytes -= kOffsetSize; - // insert actual string - int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1); - if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor"; - // next string will be stored right after the current one. - offset = offset + str.length() + 1; - // total bytes are reduced by the length of the string - num_bytes -= str.length() + 1; - } - // store one more offset value so we can get the length of the last string - offset_arr[i] = offset; - - (*out)->data_end_ = (*out)->data_ + offset_arr[i]; - - MS_ASSERT(num_bytes == 0); - if (shape.known()) { - RETURN_IF_NOT_OK((*out)->Reshape(shape)); - } - return Status::OK(); -} -/// Create a string scalar Tensor from the given value. -/// \param[in] item value -/// \param[out] out Created tensor -/// \return Status code -template <> -inline Status Tensor::CreateScalar(const std::string &item, TensorPtr *out) { - return CreateFromVector({item}, TensorShape::CreateScalar(), out); -} -} // namespace dataset -} // namespace mindspore -#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_