@ -35,6 +35,7 @@
# include "dataset/util/allocator.h"
# include "dataset/util/allocator.h"
# include "dataset/util/de_error.h"
# include "dataset/util/de_error.h"
# include "dataset/util/status.h"
# include "dataset/util/status.h"
# include "proto/example.pb.h"
namespace py = pybind11 ;
namespace py = pybind11 ;
namespace mindspore {
namespace mindspore {
@ -64,6 +65,8 @@ class Tensor {
// @param data unsigned char*, pointer to the data.
// @param data unsigned char*, pointer to the data.
Tensor ( const TensorShape & shape , const DataType & type , const unsigned char * data ) ;
Tensor ( const TensorShape & shape , const DataType & type , const unsigned char * data ) ;
Tensor ( const TensorShape & shape , const DataType & type , const unsigned char * data , const dsize_t & length ) ;
Tensor ( const Tensor & other ) = delete ;
Tensor ( const Tensor & other ) = delete ;
Tensor & operator = ( const Tensor & other ) = delete ;
Tensor & operator = ( const Tensor & other ) = delete ;
@ -72,6 +75,8 @@ class Tensor {
Tensor & operator = ( Tensor & & other ) noexcept ;
Tensor & operator = ( Tensor & & other ) noexcept ;
Status AllocateBuffer ( const dsize_t & length ) ;
// type of offest values to store strings information
// type of offest values to store strings information
using offset_t = uint32_t ;
using offset_t = uint32_t ;
// const of the size of the offset variable
// const of the size of the offset variable
@ -84,15 +89,24 @@ class Tensor {
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
// the size of the vector `strings`.
// the size of the vector `strings`.
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
// OFFSET1, OFFSET2, ... String1, String2, ...
// Thr offset array will store one extra value to find the length of the last string.
// The value of each offset is the end index of the corresponding string
// OFFSET1, OFFSET2, ..., OFFSETn+1, STRING1, STRING2, ..., STRINGn
// The value of each offset is the start index of the corresponding string
// Offsets is of type offest_t
// Offsets is of type offest_t
// strings will ne null-terminated
// strings will ne null-terminated
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
// 3 6 a b c \0 d e \0
// |----------------------------------------------------------------|
// | OFFSET ARRAY | STRINGS |
// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
// | 11 | 15 | 18 | abc\0 | de\0 |
// |----------------------------------------------------------------|
explicit Tensor ( const std : : vector < std : : string > & strings ,
explicit Tensor ( const std : : vector < std : : string > & strings ,
const TensorShape & shape = TensorShape : : CreateUnknownRankShape ( ) ) ;
const TensorShape & shape = TensorShape : : CreateUnknownRankShape ( ) ) ;
// Same as Tensor(vector<string>) but the input is protobuf bytelist
explicit Tensor ( const dataengine : : BytesList & bytes_list ,
const TensorShape & shape = TensorShape : : CreateUnknownRankShape ( ) ) ;
// A static factory method to create the given flavour of derived Tensor
// A static factory method to create the given flavour of derived Tensor
// Returns the base class reference for the Tensor.
// Returns the base class reference for the Tensor.
// @param ptr output argument to hold the created Tensor of given tensor_impl
// @param ptr output argument to hold the created Tensor of given tensor_impl
@ -121,6 +135,9 @@ class Tensor {
static Status CreateTensor ( std : : shared_ptr < Tensor > * ptr , const std : : vector < std : : string > & strings ,
static Status CreateTensor ( std : : shared_ptr < Tensor > * ptr , const std : : vector < std : : string > & strings ,
const TensorShape & shape = TensorShape : : CreateUnknownRankShape ( ) ) ;
const TensorShape & shape = TensorShape : : CreateUnknownRankShape ( ) ) ;
static Status CreateTensor ( std : : shared_ptr < Tensor > * ptr , const dataengine : : BytesList & bytes_list ,
const TensorShape & shape ) ;
// Copy raw data of a array based on shape and strides to the destination pointer
// Copy raw data of a array based on shape and strides to the destination pointer
// @param dst Pointer to the destination array where the content is to be copied
// @param dst Pointer to the destination array where the content is to be copied
// @param src Pointer to the source of strided array to be copied
// @param src Pointer to the source of strided array to be copied
@ -166,7 +183,7 @@ class Tensor {
// @param value of type `T`
// @param value of type `T`
template < typename T >
template < typename T >
Status SetItemAt ( const std : : vector < dsize_t > & index , const T & value ) {
Status SetItemAt ( const std : : vector < dsize_t > & index , const T & value ) {
static_cast < void > ( GetMutableBuffer ( ) ) ;
RETURN_IF_NOT_OK ( AllocateBuffer ( SizeInBytes ( ) ) ) ;
T * ptr = nullptr ;
T * ptr = nullptr ;
RETURN_IF_NOT_OK ( GetItemPtr < T > ( & ptr , index ) ) ;
RETURN_IF_NOT_OK ( GetItemPtr < T > ( & ptr , index ) ) ;
* ptr = value ;
* ptr = value ;
@ -203,7 +220,7 @@ class Tensor {
template < typename T >
template < typename T >
Status Fill ( const T & value ) {
Status Fill ( const T & value ) {
CHECK_FAIL_RETURN_UNEXPECTED ( type_ ! = DataType : : DE_STRING , " Cannot use fill on tensor of strings. " ) ;
CHECK_FAIL_RETURN_UNEXPECTED ( type_ ! = DataType : : DE_STRING , " Cannot use fill on tensor of strings. " ) ;
static_cast < void > ( GetMutableBuffer ( ) ) ;
RETURN_IF_NOT_OK ( AllocateBuffer ( SizeInBytes ( ) ) ) ;
int64_t cellSize = type_ . SizeInBytes ( ) ;
int64_t cellSize = type_ . SizeInBytes ( ) ;
if ( ( data_ ! = nullptr ) & & type_ . IsCompatible < T > ( ) ) {
if ( ( data_ ! = nullptr ) & & type_ . IsCompatible < T > ( ) ) {
for ( dsize_t i = 0 ; i < Size ( ) ; i + + ) {
for ( dsize_t i = 0 ; i < Size ( ) ; i + + ) {
@ -418,32 +435,28 @@ class Tensor {
using pointer = std : : string_view * ;
using pointer = std : : string_view * ;
using reference = std : : string_view & ;
using reference = std : : string_view & ;
explicit TensorIterator ( uchar * offset = nullptr , const uchar * buf = nullptr , dsize_t index = 0 ) {
explicit TensorIterator ( uchar * data = nullptr , dsize_t index = 0 ) {
offset_ = reinterpret_cast < offset_t * > ( offset ) ;
data_ = reinterpret_cast < const char * > ( data ) ;
buf_ = reinterpret_cast < const char * > ( buf ) ;
index_ = index ;
index_ = index ;
}
}
TensorIterator ( const TensorIterator < std : : string_view , DUMMY > & raw_iterator ) {
TensorIterator ( const TensorIterator < std : : string_view , DUMMY > & raw_iterator ) {
offset_ = raw_iterator . offset_ ;
data_ = raw_iterator . data_ ;
buf_ = raw_iterator . buf_ ;
index_ = raw_iterator . index_ ;
index_ = raw_iterator . index_ ;
}
}
~ TensorIterator ( ) = default ;
~ TensorIterator ( ) = default ;
bool operator = = ( const TensorIterator < std : : string_view > & rhs ) {
bool operator = = ( const TensorIterator < std : : string_view > & rhs ) { return data_ = = rhs . data_ & & index_ = = rhs . index_ ; }
return buf_ = = rhs . buf_ & & offset_ = = rhs . offset_ & & index_ = = rhs . index_ ;
}
bool operator ! = ( const TensorIterator < std : : string_view > & rhs ) { return ! ( * this = = rhs ) ; }
bool operator ! = ( const TensorIterator < std : : string_view > & rhs ) { return ! ( * this = = rhs ) ; }
operator bool ( ) const { return offset _ ! = nullptr ; }
operator bool ( ) const { return data _ ! = nullptr ; }
std : : string_view operator * ( ) const {
std : : string_view operator * ( ) const {
offset_t start = 0 ;
auto offset_ = reinterpret_cast < const offset_t * > ( data_ ) ;
if ( index_ ! = 0 ) start = offset_ [ index_ - 1 ] + 1 ;
offset_t start = offset_ [ index_ ] ;
return std : : string_view { buf _ + start } ;
return std : : string_view { data _ + start } ;
}
}
TensorIterator < std : : string_view > & operator + = ( const dsize_t & inc ) {
TensorIterator < std : : string_view > & operator + = ( const dsize_t & inc ) {
@ -496,8 +509,7 @@ class Tensor {
protected :
protected :
dsize_t index_ ;
dsize_t index_ ;
offset_t * offset_ ;
const char * data_ ;
const char * buf_ ;
} ;
} ;
// Return a TensorIterator that points to the start of the Tensor.
// Return a TensorIterator that points to the start of the Tensor.
@ -518,11 +530,6 @@ class Tensor {
}
}
protected :
protected :
// Returns the location of the item assuming row major memory layout.
// @param index
// @return
Status ToFlatIndex ( const std : : vector < dsize_t > & index , dsize_t * flat_index ) const ;
// A function that prints Tensor recursively, first called by print
// A function that prints Tensor recursively, first called by print
// @param out
// @param out
// @param cur_dim
// @param cur_dim
@ -559,7 +566,7 @@ class Tensor {
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
// tensor's type is a string, otherwise undefined address would be returned.
// tensor's type is a string, otherwise undefined address would be returned.
// @return address of the first string of the tensor.
// @return address of the first string of the tensor.
uchar * GetStringsBuffer ( ) const { return data_ + kOffsetSize * shape_ . NumOfElements ( ) ; }
uchar * GetStringsBuffer ( ) const { return data_ + kOffsetSize * shape_ . NumOfElements ( ) + kOffsetSize ; }
// all access to shape_ should be via shape
// all access to shape_ should be via shape
TensorShape shape_ ;
TensorShape shape_ ;
@ -573,14 +580,8 @@ class Tensor {
unsigned char * data_end_ = nullptr ;
unsigned char * data_end_ = nullptr ;
} ;
} ;
template < >
template < >
inline Tensor : : TensorIterator < std : : string_view > Tensor : : begin < std : : string_view > ( ) {
uchar * buf = GetStringsBuffer ( ) ;
return TensorIterator < std : : string_view > ( data_ , buf ) ;
}
template < >
inline Tensor : : TensorIterator < std : : string_view > Tensor : : end < std : : string_view > ( ) {
inline Tensor : : TensorIterator < std : : string_view > Tensor : : end < std : : string_view > ( ) {
uchar * buf = GetStringsBuffer ( ) ;
return TensorIterator < std : : string_view > ( data_ , shape_ . NumOfElements ( ) ) ;
return TensorIterator < std : : string_view > ( data_ , buf , shape_ . NumOfElements ( ) ) ;
}
}
} // namespace dataset
} // namespace dataset
} // namespace mindspore
} // namespace mindspore