!2000 Make GetMutableBuffer protected

Merge pull request !2000 from h.farahat/tensor_mutabble_buffer
pull/2000/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit 8eb8663fbd

@ -128,7 +128,9 @@ class DataType {
// @tparam T // @tparam T
// @return true or false // @return true or false
template <typename T> template <typename T>
bool IsCompatible() const; bool IsCompatible() const {
return type_ == FromCType<T>();
}
// returns true if the template type is the same as the Tensor type_ // returns true if the template type is the same as the Tensor type_
// @tparam T // @tparam T
@ -146,6 +148,9 @@ class DataType {
return out; return out;
} }
template <typename T>
static DataType FromCType();
// Convert from DataType to Pybind type // Convert from DataType to Pybind type
// @return // @return
py::dtype AsNumpyType() const; py::dtype AsNumpyType() const;
@ -191,68 +196,68 @@ class DataType {
}; };
template <> template <>
inline bool DataType::IsCompatible<bool>() const { inline DataType DataType::FromCType<bool>() {
return type_ == DataType::DE_BOOL; return DataType(DataType::DE_BOOL);
} }
template <> template <>
inline bool DataType::IsCompatible<double>() const { inline DataType DataType::FromCType<double>() {
return type_ == DataType::DE_FLOAT64; return DataType(DataType::DE_FLOAT64);
} }
template <> template <>
inline bool DataType::IsCompatible<float>() const { inline DataType DataType::FromCType<float>() {
return type_ == DataType::DE_FLOAT32; return DataType(DataType::DE_FLOAT32);
} }
template <> template <>
inline bool DataType::IsCompatible<float16>() const { inline DataType DataType::FromCType<float16>() {
return type_ == DataType::DE_FLOAT16; return DataType(DataType::DE_FLOAT16);
} }
template <> template <>
inline bool DataType::IsCompatible<int64_t>() const { inline DataType DataType::FromCType<int64_t>() {
return type_ == DataType::DE_INT64; return DataType(DataType::DE_INT64);
} }
template <> template <>
inline bool DataType::IsCompatible<uint64_t>() const { inline DataType DataType::FromCType<uint64_t>() {
return type_ == DataType::DE_UINT64; return DataType(DataType::DE_UINT64);
} }
template <> template <>
inline bool DataType::IsCompatible<int32_t>() const { inline DataType DataType::FromCType<int32_t>() {
return type_ == DataType::DE_INT32; return DataType(DataType::DE_INT32);
} }
template <> template <>
inline bool DataType::IsCompatible<uint32_t>() const { inline DataType DataType::FromCType<uint32_t>() {
return type_ == DataType::DE_UINT32; return DataType(DataType::DE_UINT32);
} }
template <> template <>
inline bool DataType::IsCompatible<int16_t>() const { inline DataType DataType::FromCType<int16_t>() {
return type_ == DataType::DE_INT16; return DataType(DataType::DE_INT16);
} }
template <> template <>
inline bool DataType::IsCompatible<uint16_t>() const { inline DataType DataType::FromCType<uint16_t>() {
return type_ == DataType::DE_UINT16; return DataType(DataType::DE_UINT16);
} }
template <> template <>
inline bool DataType::IsCompatible<int8_t>() const { inline DataType DataType::FromCType<int8_t>() {
return type_ == DataType::DE_INT8; return DataType(DataType::DE_INT8);
} }
template <> template <>
inline bool DataType::IsCompatible<uint8_t>() const { inline DataType DataType::FromCType<uint8_t>() {
return type_ == DataType::DE_UINT8; return DataType(DataType::DE_UINT8);
} }
template <> template <>
inline bool DataType::IsCompatible<std::string_view>() const { inline DataType DataType::FromCType<std::string_view>() {
return type_ == DataType::DE_STRING; return DataType(DataType::DE_STRING);
} }
template <> template <>

@ -18,6 +18,7 @@
#include <algorithm> #include <algorithm>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <fstream>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <utility> #include <utility>
@ -311,6 +312,50 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::Byte
return Status::OK(); return Status::OK();
} }
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &file_path) {
std::ifstream fs;
fs.open(file_path, std::ios::binary | std::ios::in);
CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + file_path);
int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file");
RETURN_IF_NOT_OK(
Tensor::CreateTensor(ptr, TensorImpl::kFlexible, TensorShape{num_bytes}, DataType(DataType::DE_UINT8)));
int64_t written_bytes = fs.read(reinterpret_cast<char *>((*ptr)->GetMutableBuffer()), num_bytes).gcount();
CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), "Error in writing to tensor");
fs.close();
return Status::OK();
}
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
const TensorShape &shape, const DataType &type, dsize_t pad_size) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(ptr, TensorImpl::kFlexible, shape, type));
unsigned char *current_tensor_addr = (*ptr)->GetMutableBuffer();
int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
for (int i = 0; i < bytes_list.value_size(); i++) {
// read string data into tensor
const std::string &current_element = bytes_list.value(i);
int return_code =
memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when reading bytesList element into Tensor");
current_tensor_addr += current_element.size();
tensor_bytes_remaining -= current_element.size();
// pad
int64_t chars_to_pad = pad_size - current_element.size();
return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
CHECK_FAIL_RETURN_UNEXPECTED(return_code == 0, "memcpy_s failed when padding Tensor");
current_tensor_addr += chars_to_pad;
tensor_bytes_remaining -= chars_to_pad;
}
return Status::OK();
}
// Memcpy the given strided array's used part to consecutive memory // Memcpy the given strided array's used part to consecutive memory
// Consider a 3-d array // Consider a 3-d array
// A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]] // A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]

@ -135,9 +135,41 @@ class Tensor {
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
const TensorShape &shape = TensorShape::CreateUnknownRankShape()); const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// create tensor from protobuf bytelist with strings
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
const TensorShape &shape); const TensorShape &shape);
// A static factory method to create a Tensor from a given list of numbers.
// @param ptr output argument to hold the created Tensor
// @param items elements of the tensor
// @param shape shape of the tensor
// @return Status Code
template <typename T>
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<T> &items,
const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) {
DataType type = DataType::FromCType<T>();
auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
TensorShape shape = shape_req;
if (!shape.known()) {
shape = TensorShape({static_cast<dsize_t>(items.size())});
}
return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr);
}
// A static factory method to create a Tensor from a given number.
// @param ptr output argument to hold the created Tensor
// @param item value
// @return Status Code
template <typename T>
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
}
// Create tensor from protobuf bytelist with uint8 or int8 types
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
const TensorShape &shape, const DataType &type, dsize_t pad_size);
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &path);
// Copy raw data of a array based on shape and strides to the destination pointer // Copy raw data of a array based on shape and strides to the destination pointer
// @param dst Pointer to the destination array where the content is to be copied // @param dst Pointer to the destination array where the content is to be copied
// @param src Pointer to the source of strided array to be copied // @param src Pointer to the source of strided array to be copied
@ -260,11 +292,6 @@ class Tensor {
// @return const unsigned char* // @return const unsigned char*
const unsigned char *GetBuffer() const; const unsigned char *GetBuffer() const;
// Get the starting memory address for the data of the tensor. This potentially
// drives an allocation if the data area.
// @return unsigned char*
unsigned char *GetMutableBuffer();
// Getter of the type // Getter of the type
// @return // @return
DataType type() const { return type_; } DataType type() const { return type_; }
@ -518,6 +545,7 @@ class Tensor {
// @return TensorIterator // @return TensorIterator
template <typename T> template <typename T>
TensorIterator<T> begin() { TensorIterator<T> begin() {
AllocateBuffer(SizeInBytes());
return TensorIterator<T>(data_); return TensorIterator<T>(data_);
} }
@ -536,6 +564,11 @@ class Tensor {
Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index); Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
protected: protected:
// Get the starting memory address for the data of the tensor. This potentially
// drives an allocation if the data is null.
// @return unsigned char*
unsigned char *GetMutableBuffer();
// A function that prints Tensor recursively, first called by print // A function that prints Tensor recursively, first called by print
// @param out // @param out
// @param cur_dim // @param cur_dim

@ -254,7 +254,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed."); return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed.");
} }
(void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_); (void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_);
unsigned char *column_data = curr_row[i]->GetMutableBuffer(); const unsigned char *column_data = curr_row[i]->GetBuffer();
if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data, if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data,
static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) { static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) {
MS_LOG(ERROR) << "memcpy_s failed!"; MS_LOG(ERROR) << "memcpy_s failed!";

@ -363,19 +363,7 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t>
Path path(folder_path_); Path path(folder_path_);
Path image_path = path / image_label.first; Path image_path = path / image_label.first;
std::ifstream handle(image_path.toString(), std::ios::binary | std::ios::in); RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, image_path.toString()));
if (handle.fail()) {
std::string err_msg = "Fail to open file: " + image_path.toString();
return Status(StatusCode::kFileNotExist, __LINE__, __FILE__, err_msg);
}
(void)handle.seekg(0, std::ios::end);
int64_t num_elements = handle.tellg();
(void)handle.seekg(0, std::ios::beg);
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
TensorShape(std::vector<dsize_t>(1, num_elements)),
data_schema_->column(0).type()));
(void)handle.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(image, &image); Status rc = Decode(image, &image);
if (rc.IsError()) { if (rc.IsError()) {

@ -195,7 +195,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
std::shared_ptr<Tensor> fine_label; std::shared_ptr<Tensor> fine_label;
std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
std::shared_ptr<Tensor> copy_image = std::shared_ptr<Tensor> copy_image =
std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetMutableBuffer()); std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetBuffer());
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
data_schema_->column(1).type(), data_schema_->column(1).type(),
reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0]))); reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0])));
@ -366,10 +366,9 @@ Status CifarOp::ParseCifarData() {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image_tensor, data_schema_->column(0).tensorImpl(), RETURN_IF_NOT_OK(Tensor::CreateTensor(&image_tensor, data_schema_->column(0).tensorImpl(),
TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}), TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
data_schema_->column(0).type())); data_schema_->column(0).type()));
for (int ch = 0; ch < kCifarImageChannel; ++ch) { auto itr = image_tensor->begin<uint8_t>();
for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) { for (; itr != image_tensor->end<uint8_t>(); itr++) {
(image_tensor->GetMutableBuffer())[pix * kCifarImageChannel + ch] = block[cur_block_index++]; *itr = block[cur_block_index++];
}
} }
cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels)); cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels));
} }

@ -591,17 +591,8 @@ Status CocoOp::LaunchThreadsAndInitOp() {
} }
Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) { Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
std::ifstream fs; RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path));
fs.open(path, std::ios::binary | std::ios::in);
if (fs.fail()) {
RETURN_STATUS_UNEXPECTED("Fail to open file: " + path);
}
int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
(void)fs.seekg(0, std::ios::beg);
RETURN_IF_NOT_OK(
Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type()));
(void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements);
fs.close();
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(*tensor, tensor); Status rc = Decode(*tensor, tensor);
if (rc.IsError()) { if (rc.IsError()) {

@ -204,18 +204,8 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
data_schema_->column(1).type(), data_schema_->column(1).type(),
reinterpret_cast<unsigned char *>(&pairPtr->second))); reinterpret_cast<unsigned char *>(&pairPtr->second)));
std::ifstream fs; RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, folder_path_ + (pairPtr->first)));
fs.open(folder_path_ + (pairPtr->first), std::ios::binary | std::ios::in);
if (fs.fail()) {
RETURN_STATUS_UNEXPECTED("Fail to open file: " + pairPtr->first);
}
int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
(void)fs.seekg(0, std::ios::beg);
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
TensorShape(std::vector<dsize_t>(1, num_elements)),
data_schema_->column(0).type(), nullptr));
(void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
fs.close();
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(image, &image); Status rc = Decode(image, &image);
if (rc.IsError()) { if (rc.IsError()) {

@ -198,23 +198,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&label_index[0]))); data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&label_index[0])));
} }
std::ifstream fs; RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data.first));
fs.open(data.first, std::ios::binary | std::ios::in);
if (!fs.is_open()) {
RETURN_STATUS_UNEXPECTED("Fail to open file: " + data.first);
}
int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
(void)fs.seekg(0, std::ios::beg);
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
TensorShape(std::vector<dsize_t>(1, num_elements)),
data_schema_->column(0).type(), nullptr));
(void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
if (fs.fail()) {
fs.close();
RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first);
}
fs.close();
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(image, &image); Status rc = Decode(image, &image);
if (rc.IsError()) { if (rc.IsError()) {

@ -167,7 +167,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow)
int32_t l = mnist_pair.second; int32_t l = mnist_pair.second;
// make a copy of cached tensor // make a copy of cached tensor
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(), RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(),
mnist_pair.first->type(), mnist_pair.first->GetMutableBuffer())); mnist_pair.first->type(), mnist_pair.first->GetBuffer()));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l))); data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l)));
(*trow) = {std::move(image), std::move(label)}; (*trow) = {std::move(image), std::move(label)};

@ -68,8 +68,8 @@ Status DistributedSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer
(*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone); (*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone);
std::shared_ptr<Tensor> sample_ids; std::shared_ptr<Tensor> sample_ids;
RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_)); RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_));
int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer()); auto id_ptr = sample_ids->begin<int64_t>();
while (cnt_ < samples_per_buffer_) { while (cnt_ < samples_per_buffer_ && id_ptr != sample_ids->end<int64_t>()) {
int64_t sampled_id = (num_devices_ * cnt_ + device_id_) % num_rows_; int64_t sampled_id = (num_devices_ * cnt_ + device_id_) % num_rows_;
if (shuffle_) { if (shuffle_) {
sampled_id = shuffle_vec_[static_cast<size_t>(sampled_id)]; sampled_id = shuffle_vec_[static_cast<size_t>(sampled_id)];

@ -73,8 +73,8 @@ Status PKSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
std::shared_ptr<Tensor> sample_ids; std::shared_ptr<Tensor> sample_ids;
int64_t last_id = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_; int64_t last_id = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_;
RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_)); RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_));
int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer()); auto id_ptr = sample_ids->begin<int64_t>();
while (next_id_ < last_id) { while (next_id_ < last_id && id_ptr != sample_ids->end<int64_t>()) {
int64_t cls_id = next_id_++ / samples_per_class_; int64_t cls_id = next_id_++ / samples_per_class_;
const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]]; const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]];
int64_t rnd_ind = std::uniform_int_distribution<int64_t>(0, samples.size() - 1)(rnd_); int64_t rnd_ind = std::uniform_int_distribution<int64_t>(0, samples.size() - 1)(rnd_);

@ -45,7 +45,7 @@ Status RandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
std::shared_ptr<Tensor> sampleIds; std::shared_ptr<Tensor> sampleIds;
int64_t last_id = std::min(samples_per_buffer_ + next_id_, num_samples_); int64_t last_id = std::min(samples_per_buffer_ + next_id_, num_samples_);
RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_)); RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_));
int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer()); auto id_ptr = sampleIds->begin<int64_t>();
for (int64_t i = 0; i < (last_id - next_id_); i++) { for (int64_t i = 0; i < (last_id - next_id_); i++) {
int64_t sampled_id = 0; int64_t sampled_id = 0;

@ -42,7 +42,7 @@ Status SequentialSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer)
int64_t num_elements = std::min(remaining_ids, samples_per_buffer_); int64_t num_elements = std::min(remaining_ids, samples_per_buffer_);
RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, num_elements)); RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, num_elements));
int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer()); auto idPtr = sampleIds->begin<int64_t>();
for (int64_t i = 0; i < num_elements; i++) { for (int64_t i = 0; i < num_elements; i++) {
int64_t sampled_id = current_id_; int64_t sampled_id = current_id_;
if (HasChildSampler()) { if (HasChildSampler()) {

@ -94,7 +94,7 @@ Status SubsetRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffe
RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));
// Initialize tensor // Initialize tensor
int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer()); auto id_ptr = outputIds->begin<int64_t>();
while (sample_id_ < last_id) { while (sample_id_ < last_id) {
if (indices_[sample_id_] >= num_rows_) { if (indices_[sample_id_] >= num_rows_) {
std::string err_msg = std::string err_msg =

@ -125,7 +125,7 @@ Status WeightedRandomSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buf
RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_)); RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));
// Initialize tensor. // Initialize tensor.
int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer()); auto id_ptr = outputIds->begin<int64_t>();
// Assign the data to tensor element. // Assign the data to tensor element.
while (sample_id_ < last_id) { while (sample_id_ < last_id) {
int64_t genId; int64_t genId;

@ -771,53 +771,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
// know how many elements there are and the total bytes, create tensor here: // know how many elements there are and the total bytes, create tensor here:
TensorShape current_shape = TensorShape::CreateScalar(); TensorShape current_shape = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape));
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, current_shape, current_col.type(), pad_size));
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
unsigned char *current_tensor_addr = (*tensor)->GetMutableBuffer();
int64_t tensor_bytes_remaining = (*num_elements) * pad_size;
if (current_tensor_addr == nullptr) {
std::string err_msg = "tensor memory allocation failed";
RETURN_STATUS_UNEXPECTED(err_msg);
}
RETURN_IF_NOT_OK(LoadAndPadBytes(current_tensor_addr, bytes_list, tensor_bytes_remaining, pad_size));
return Status::OK();
}
Status TFReaderOp::LoadAndPadBytes(unsigned char *current_tensor_addr, const dataengine::BytesList &bytes_list,
int64_t tensor_bytes_remaining, int64_t pad_size) {
if (current_tensor_addr == nullptr) {
std::string err_msg = "current_tensor_addr is null";
RETURN_STATUS_UNEXPECTED(err_msg);
}
for (int i = 0; i < bytes_list.value_size(); i++) {
// read string data into tensor
const std::string &current_element = bytes_list.value(i);
int return_code =
memcpy_s(current_tensor_addr, tensor_bytes_remaining, common::SafeCStr(current_element), current_element.size());
if (return_code != 0) {
std::string err_msg = "memcpy_s failed when reading bytesList element into Tensor";
RETURN_STATUS_UNEXPECTED(err_msg);
}
current_tensor_addr += current_element.size();
tensor_bytes_remaining -= current_element.size();
// pad
int64_t chars_to_pad = pad_size - current_element.size();
return_code = memset_s(current_tensor_addr, tensor_bytes_remaining, static_cast<int>(' '), chars_to_pad);
if (return_code != 0) {
std::string err_msg = "memset_s failed when padding bytesList in Tensor";
RETURN_STATUS_UNEXPECTED(err_msg);
}
current_tensor_addr += chars_to_pad;
tensor_bytes_remaining -= chars_to_pad;
}
return Status::OK(); return Status::OK();
} }

@ -296,17 +296,8 @@ class TFReaderOp : public ParallelOp {
// @param column_values_list - the cell that contains the bytes list to read from. // @param column_values_list - the cell that contains the bytes list to read from.
// @param elementStr - the string we read the value into. // @param elementStr - the string we read the value into.
// @return Status - the error code returned. // @return Status - the error code returned.
Status LoadBytesList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list, static Status LoadBytesList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
int32_t *num_elements, std::shared_ptr<Tensor> *tensor); int32_t *num_elements, std::shared_ptr<Tensor> *tensor);
// Loads all the strings in bytes_list into the memory at current_tensor_addr.
// @param current_tensor_addr - the memory address to load the strings to.
// @param bytes_list - the list of strings to load.
// @param tensor_bytes_remaining - the number of bytes available for this function to use.
// @param pad_size - number of bytes to pad to.
// @return Status - the error code returned.
Status LoadAndPadBytes(unsigned char *current_tensor_addr, const dataengine::BytesList &bytes_list,
int64_t tensor_bytes_remaining, int64_t pad_size);
// Reads values from a float list // Reads values from a float list
// @param current_col - the column descriptor containing the expected shape and type of the data. // @param current_col - the column descriptor containing the expected shape and type of the data.

@ -368,17 +368,7 @@ Status VOCOp::LaunchThreadsAndInitOp() {
} }
Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) { Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
std::ifstream fs; RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path));
fs.open(path, std::ios::binary | std::ios::in);
if (fs.fail()) {
RETURN_STATUS_UNEXPECTED("Fail to open file: " + path);
}
int64_t num_elements = fs.seekg(0, std::ios::end).tellg();
(void)fs.seekg(0, std::ios::beg);
RETURN_IF_NOT_OK(
Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type()));
(void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements);
fs.close();
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(*tensor, tensor); Status rc = Decode(*tensor, tensor);
if (rc.IsError()) { if (rc.IsError()) {

@ -59,7 +59,7 @@ Status Graph::CreateTensorByVector(const std::vector<std::vector<T>> &data, Data
size_t n = data[0].size(); size_t n = data[0].size();
RETURN_IF_NOT_OK(Tensor::CreateTensor( RETURN_IF_NOT_OK(Tensor::CreateTensor(
&tensor, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, nullptr)); &tensor, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, nullptr));
T *ptr = reinterpret_cast<T *>(tensor->GetMutableBuffer()); auto ptr = tensor->begin<T>();
for (const auto &id_m : data) { for (const auto &id_m : data) {
CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size"); CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size");
for (const auto &id_n : id_m) { for (const auto &id_n : id_m) {

@ -119,7 +119,7 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i
data_item.tensorShape_ = dataShapes; data_item.tensorShape_ = dataShapes;
data_item.tensorType_ = datatype; data_item.tensorType_ = datatype;
data_item.dataLen_ = ts->SizeInBytes(); data_item.dataLen_ = ts->SizeInBytes();
data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<void *>(ts->GetMutableBuffer()), [](void *elem) {}); data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<uchar *>(&(*ts->begin<uint8_t>())), [](void *elem) {});
items.emplace_back(data_item); items.emplace_back(data_item);
MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is " MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is "
<< ts->Size() << "."; << ts->Size() << ".";

@ -101,11 +101,11 @@ Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor>
const std::vector<dsize_t> &pad_shape, const std::string &pad_val); const std::vector<dsize_t> &pad_shape, const std::string &pad_val);
// recursive helper function for padding string tensors. This function could be very expensive if called on a // recursive helper function for padding string tensors. This function could be very expensive if called on a
// multi-dimensional tensor it is only meant to be called by PadEndNumeric. // multi-dimensional tensor it is only meant to be called by PadEndString.
// @tparam T - type of tensor and fill value // @tparam T - type of tensor and fill value
// @param std::shared_ptr<Tensor> src - Tensor to pad from // @param std::shared_ptr<Tensor> src - Tensor to pad from
// @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
// @param std::vector<dsize_t> cur_ind - recursion helper // @param std::vector<dsize_t> cur_ind - recursion helperas text
// @param std::string pad_val - value to pad tensor with // @param std::string pad_val - value to pad tensor with
// @param size_t cur_dim - recursion helper // @param size_t cur_dim - recursion helper
// @return Status - The error code return // @return Status - The error code return

@ -119,17 +119,14 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
} }
} }
bool HasJpegMagic(const unsigned char *data, size_t data_size) { bool HasJpegMagic(const std::shared_ptr<Tensor> &input) {
const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF"; const unsigned char *kJpegMagic = (unsigned char *)"\xFF\xD8\xFF";
constexpr size_t kJpegMagicLen = 3; constexpr size_t kJpegMagicLen = 3;
return data_size >= kJpegMagicLen && memcmp(data, kJpegMagic, kJpegMagicLen) == 0; return input->SizeInBytes() >= kJpegMagicLen && memcmp(input->GetBuffer(), kJpegMagic, kJpegMagicLen) == 0;
} }
Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
if (input->GetMutableBuffer() == nullptr) { if (HasJpegMagic(input)) {
RETURN_STATUS_UNEXPECTED("Tensor is nullptr");
}
if (HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) {
return JpegCropAndDecode(input, output); return JpegCropAndDecode(input, output);
} else { } else {
return DecodeCv(input, output); return DecodeCv(input, output);
@ -283,7 +280,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
jerr.pub.error_exit = JpegErrorExitCustom; jerr.pub.error_exit = JpegErrorExitCustom;
try { try {
jpeg_create_decompress(&cinfo); jpeg_create_decompress(&cinfo);
JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes()); JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
(void)jpeg_read_header(&cinfo, TRUE); (void)jpeg_read_header(&cinfo, TRUE);
RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo)); RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
jpeg_calc_output_dimensions(&cinfo); jpeg_calc_output_dimensions(&cinfo);
@ -312,7 +309,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents}); TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8)); auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
const int buffer_size = output_tensor->SizeInBytes(); const int buffer_size = output_tensor->SizeInBytes();
JSAMPLE *buffer = static_cast<JSAMPLE *>(output_tensor->GetMutableBuffer()); JSAMPLE *buffer = static_cast<JSAMPLE *>(reinterpret_cast<uchar *>(&(*output_tensor->begin<uint8_t>())));
const int max_scanlines_to_read = skipped_scanlines + crop_h; const int max_scanlines_to_read = skipped_scanlines + crop_h;
// stride refers to output tensor, which has 3 components at most // stride refers to output tensor, which has 3 components at most
const int stride = crop_w * kOutNumComponents; const int stride = crop_w * kOutNumComponents;

@ -96,7 +96,7 @@ Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output); Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output);
bool HasJpegMagic(const unsigned char *data, size_t data_size); bool HasJpegMagic(const std::shared_ptr<Tensor> &input);
void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size); void JpegSetSource(j_decompress_ptr c_info, const void *data, int64_t data_size);

@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
if (input == nullptr) { if (input == nullptr) {
RETURN_STATUS_UNEXPECTED("input tensor is null"); RETURN_STATUS_UNEXPECTED("input tensor is null");
} }
if (!HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) { if (!HasJpegMagic(input)) {
DecodeOp op(true); DecodeOp op(true);
std::shared_ptr<Tensor> decoded; std::shared_ptr<Tensor> decoded;
RETURN_IF_NOT_OK(op.Compute(input, &decoded)); RETURN_IF_NOT_OK(op.Compute(input, &decoded));
@ -43,7 +43,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
jerr.pub.error_exit = JpegErrorExitCustom; jerr.pub.error_exit = JpegErrorExitCustom;
try { try {
jpeg_create_decompress(&cinfo); jpeg_create_decompress(&cinfo);
JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes()); JpegSetSource(&cinfo, input->GetBuffer(), input->SizeInBytes());
(void)jpeg_read_header(&cinfo, TRUE); (void)jpeg_read_header(&cinfo, TRUE);
jpeg_calc_output_dimensions(&cinfo); jpeg_calc_output_dimensions(&cinfo);
} catch (std::runtime_error &e) { } catch (std::runtime_error &e) {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save