minddata err msg add file path

pull/11492/head
xiefangqi 4 years ago
parent 29e5f0fcdb
commit 3f76abcdee

@ -14,22 +14,22 @@
* limitations under the License.
*/
#include <utility>
#include "minddata/dataset/core/tensor_row.h"
#include <utility>
namespace mindspore {
namespace dataset {
TensorRow::TensorRow() noexcept : id_(kDefaultRowId) {}
TensorRow::TensorRow() noexcept : id_(kDefaultRowId), path_({}) {}
TensorRow::TensorRow(size_type n, TensorRow::value_type t) noexcept : id_(kDefaultRowId), row_(n, t) {}
TensorRow::TensorRow(size_type n, TensorRow::value_type t) noexcept : id_(kDefaultRowId), path_({}), row_(n, t) {}
TensorRow::TensorRow(const TensorRow::vector_type &v) : id_(kDefaultRowId), row_(v) {}
TensorRow::TensorRow(const TensorRow::vector_type &v) : id_(kDefaultRowId), path_({}), row_(v) {}
TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &lst) : id_(id), row_(lst) {}
TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &lst) : id_(id), path_({}), row_(lst) {}
TensorRow::TensorRow(const TensorRow &tr) : id_(tr.id_), row_(tr.row_) {}
TensorRow::TensorRow(const TensorRow &tr) : id_(tr.id_), path_(tr.path_), row_(tr.row_) {}
TensorRow &TensorRow::operator=(const TensorRow &tr) {
if (this == &tr) {
@ -37,6 +37,7 @@ TensorRow &TensorRow::operator=(const TensorRow &tr) {
}
row_ = tr.row_;
id_ = tr.id_;
path_ = tr.path_;
return *this;
}
@ -45,13 +46,14 @@ TensorRow &TensorRow::operator=(const std::initializer_list<TensorRow::value_typ
return *this;
}
TensorRow::TensorRow(TensorRow::vector_type &&v) noexcept : id_(kDefaultRowId), row_(std::move(v)) {}
TensorRow::TensorRow(TensorRow::vector_type &&v) noexcept : id_(kDefaultRowId), path_({}), row_(std::move(v)) {}
TensorRow::TensorRow(row_id_type id, std::initializer_list<value_type> &&lst) noexcept
: id_(id), row_(std::move(lst)) {}
: id_(id), path_({}), row_(std::move(lst)) {}
TensorRow::TensorRow(TensorRow &&tr) noexcept {
id_ = tr.id_;
path_ = std::move(tr.path_);
row_ = std::move(tr.row_);
}
@ -62,6 +64,7 @@ TensorRow &TensorRow::operator=(TensorRow &&tr) noexcept {
row_ = std::move(tr.row_);
id_ = tr.id_;
tr.id_ = kDefaultRowId;
path_ = std::move(tr.path_);
return *this;
}

@ -19,6 +19,7 @@
#include <deque>
#include <memory>
#include <string>
#include <vector>
#include "minddata/dataset/core/tensor.h"
@ -164,6 +165,10 @@ class TensorRow {
void setId(row_id_type id) { id_ = id; }
std::vector<std::string> getPath() const { return path_; }
void setPath(std::vector<std::string> path) { path_ = path; }
const vector_type &getRow() const { return row_; }
int64_t SizeInBytes() const {
@ -219,6 +224,7 @@ class TensorRow {
protected:
row_id_type id_;
std::vector<std::string> path_;
std::vector<std::shared_ptr<Tensor>> row_;
};
} // namespace dataset

@ -41,11 +41,31 @@ Status CpuMapJob::Run(std::vector<TensorRow> in, std::vector<TensorRow> *out) {
// Call compute function for cpu
Status rc = ops_[i]->Compute(input_row, &result_row);
if (rc.IsError()) {
if (input_row.getId() >= 0) {
MS_LOG(ERROR) << "The TensorRow with id=" + std::to_string(input_row.getId()) + " failed on " +
std::to_string(i) + " TensorOp in Map: " + ops_[i]->Name();
std::string err_msg = "";
std::string op_name = ops_[i]->Name();
std::string abbr_op_name = op_name.substr(0, op_name.length() - 2);
err_msg += "map operation: [" + abbr_op_name + "] failed. ";
if (input_row.getPath().size() > 0 && !input_row.getPath()[0].empty()) {
err_msg += "The corresponding data files: " + input_row.getPath()[0];
if (input_row.getPath().size() > 1) {
std::set<std::string> path_set;
path_set.insert(input_row.getPath()[0]);
for (auto j = 1; j < input_row.getPath().size(); j++) {
if (!input_row.getPath()[j].empty() && path_set.find(input_row.getPath()[j]) == path_set.end()) {
err_msg += ", " + input_row.getPath()[j];
path_set.insert(input_row.getPath()[j]);
}
}
}
err_msg += ". ";
}
return rc;
std::string tensor_err_msg = rc.GetErrDescription();
if (rc.GetLineOfCode() < 0) {
err_msg += "Error description:\n";
}
err_msg += tensor_err_msg;
rc.SetErrDescription(err_msg);
RETURN_IF_NOT_OK(rc);
}
// Assign result_row to to_process for the next TensorOp processing, except for the last TensorOp in the list.

@ -288,6 +288,13 @@ Status MapOp::WorkerCompute(DataBuffer *in_buffer, TensorQTable *new_tensor_tabl
(void)std::transform(to_process_indices_.begin(), to_process_indices_.end(), std::back_inserter(to_process),
[&cur_row](const auto &it) { return std::move(cur_row[it]); });
to_process.setId(cur_row.getId());
std::vector<std::string> cur_row_path = cur_row.getPath();
if (cur_row_path.size() > 0) {
std::vector<std::string> to_process_path;
(void)std::transform(to_process_indices_.begin(), to_process_indices_.end(), std::back_inserter(to_process_path),
[&cur_row_path](const auto &it) { return cur_row_path[it]; });
to_process.setPath(to_process_path);
}
job_input_table.push_back(std::move(to_process));
original_table.push_back(std::move(cur_row));
}

@ -88,6 +88,9 @@ Status ProjectOp::Project(std::unique_ptr<DataBuffer> *data_buffer) {
TensorRow new_row;
(void)std::transform(projected_column_indices_.begin(), projected_column_indices_.end(),
std::back_inserter(new_row), [&current_row](uint32_t x) { return current_row[x]; });
// Now if columns changed after map, we don't know which column we should keep,
// so temporarily we don't support print file_path after ProjectOp.
new_row.setPath({});
new_tensor_table->push_back(new_row);
}
(*data_buffer)->set_tensor_table(std::move(new_tensor_table));

@ -95,7 +95,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir
bool StrComp(const std::string &a, const std::string &b) {
// returns 1 if string "a" represent a numeric value less than string "b"
// the following will always return name, provided there is only one "." character in name
// "." character is guaranteed to exist since the extension is checked befor this function call.
// "." character is guaranteed to exist since the extension is checked before this function call.
int64_t value_a = std::stoi(a.substr(1, a.find(".")).c_str());
int64_t value_b = std::stoi(b.substr(1, b.find(".")).c_str());
return value_a < value_b;
@ -441,7 +441,7 @@ Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num,
// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
// possible optimization: the helper functions of LoadTensorRow should be optimized
// to take a reference to a column descriptor?
// the design of this class is to make the code more readable, forgoing minor perfomance gain like
// the design of this class is to make the code more readable, forgoing minor performance gain like
// getting rid of duplicated checks
Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, TensorRow *row) {
// testing here is to just print out file path
@ -530,6 +530,8 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, Tenso
}
}
file_handle.close();
std::vector<std::string> path(row->size(), folder_path_ + file);
row->setPath(path);
return Status::OK();
}

@ -87,6 +87,7 @@ CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::stri
extensions_(exts),
data_schema_(std::move(schema)),
num_rows_in_attr_file_(0),
attr_file_(""),
usage_(usage) {
attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
io_block_queues_.Init(num_workers_, queue_size);
@ -120,6 +121,7 @@ Status CelebAOp::ParseAttrFile() {
"Invalid file, failed to open Celeba attr file: " + attr_file_name);
}
attr_file_ = (folder_path / "list_attr_celeba.txt").toString();
const auto PushBackToQueue = [this](std::vector<std::string> &vec, std::ifstream &attr_file,
std::ifstream &partition_file) {
Status s = attr_info_queue_->EmplaceBack(vec);
@ -409,6 +411,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string,
label->Squeeze();
(*row) = TensorRow(row_id, {std::move(image), std::move(label)});
// Add file path info
row->setPath({image_path.toString(), attr_file_});
return Status::OK();
}

@ -232,6 +232,7 @@ class CelebAOp : public ParallelOp, RandomAccessOp {
std::vector<std::pair<std::string, std::vector<int32_t>>> image_labels_vec_;
std::string usage_;
std::ifstream partition_file_;
std::string attr_file_;
};
} // namespace dataset
} // namespace mindspore

@ -216,14 +216,19 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
std::shared_ptr<Tensor> fine_label;
std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
std::shared_ptr<Tensor> copy_image;
uint64_t path_index = std::ceil(index / kCifarBlockImageNum);
RETURN_IF_NOT_OK(Tensor::CreateFromTensor(ori_image, &copy_image));
RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[0], &label));
if (cifar_image_label_pairs_[index].second.size() > 1) {
RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[1], &fine_label));
(*trow) = TensorRow(index, {copy_image, std::move(label), std::move(fine_label)});
// Add file path info
trow->setPath({path_record_[path_index], path_record_[path_index], path_record_[path_index]});
} else {
(*trow) = TensorRow(index, {copy_image, std::move(label)});
// Add file path info
trow->setPath({path_record_[path_index], path_record_[path_index]});
}
return Status::OK();
@ -310,6 +315,8 @@ Status CifarOp::ReadCifar10BlockData() {
(void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char));
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar10 file: " + file);
(void)cifar_raw_data_block_->EmplaceBack(image_data);
// Add file path info
path_record_.push_back(file);
}
in.close();
}
@ -350,6 +357,8 @@ Status CifarOp::ReadCifar100BlockData() {
(void)in.read(reinterpret_cast<char *>(&(image_data[0])), block_size * sizeof(unsigned char));
CHECK_FAIL_RETURN_UNEXPECTED(!in.fail(), "Invalid data, failed to read data from cifar100 file: " + file);
(void)cifar_raw_data_block_->EmplaceBack(image_data);
// Add file path info
path_record_.push_back(file);
}
in.close();
}

@ -219,7 +219,7 @@ class CifarOp : public ParallelOp, public RandomAccessOp {
// @return
Status ParseCifarData();
// Method derived from RandomAccess Op, enable Sampler to get all ids for each calss
// Method derived from RandomAccess Op, enable Sampler to get all ids for each class
// @param (std::map<uint64_t, std::vector<uint64_t >> * map - key label, val all ids for this class
// @return Status The status code returned
Status GetClassIds(std::map<int32_t, std::vector<int64_t>> *cls_ids) const override;
@ -238,6 +238,7 @@ class CifarOp : public ParallelOp, public RandomAccessOp {
const std::string usage_; // can only be either "train" or "test"
std::unique_ptr<Queue<std::vector<unsigned char>>> cifar_raw_data_block_;
std::vector<std::string> cifar_files_;
std::vector<std::string> path_record_;
std::vector<std::pair<std::shared_ptr<Tensor>, std::vector<uint32_t>>> cifar_image_label_pairs_;
};
} // namespace dataset

@ -202,6 +202,9 @@ Status ClueOp::LoadFile(const std::string &file, const int64_t start_offset, con
}
int cols_count = cols_to_keyword_.size();
TensorRow tRow(cols_count, nullptr);
// Add file path info
std::vector<std::string> file_path(cols_count, file);
tRow.setPath(file_path);
tensor_table->push_back(std::move(tRow));
int cout = 0;
for (auto &p : cols_to_keyword_) {

@ -97,7 +97,7 @@ Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
ColDescriptor(std::string(kJsonAnnoArea), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
break;
default:
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff, Keypoint or Panoptic.");
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
}
*ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_,
builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_,
@ -263,7 +263,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Te
} else if (task_type_ == TaskType::Panoptic) {
RETURN_IF_NOT_OK(LoadMixTensorRow(row_id, image_id, image, coordinate, trow));
} else {
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff or Panoptic.");
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff or Panoptic.");
}
return Status::OK();
@ -302,6 +302,8 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd));
(*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)});
std::string image_full_path = image_folder_path_ + std::string("/") + image_id;
trow->setPath({image_full_path, annotation_path_, annotation_path_, annotation_path_});
return Status::OK();
}
@ -324,6 +326,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
RETURN_IF_NOT_OK(Tensor::CreateFromVector(item_queue, TensorShape(bbox_dim), &item));
(*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)});
std::string image_full_path = image_folder_path_ + std::string("/") + image_id;
trow->setPath({image_full_path, annotation_path_, annotation_path_});
return Status::OK();
}
@ -332,7 +336,7 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
// column ["bbox"] with datatype=float32
// column ["category_id"] with datatype=uint32
// column ["iscrowd"] with datatype=uint32
// column ["area"] with datattype=uint32
// column ["area"] with datatype=uint32
Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id, std::shared_ptr<Tensor> image,
std::shared_ptr<Tensor> coordinate, TensorRow *trow) {
std::shared_ptr<Tensor> category_id, iscrowd, area;
@ -365,6 +369,8 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
(*trow) = TensorRow(
row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)});
std::string image_full_path = image_folder_path_ + std::string("/") + image_id;
trow->setPath({image_full_path, annotation_path_, annotation_path_, annotation_path_, annotation_path_});
return Status::OK();
}
@ -461,7 +467,7 @@ Status CocoOp::ParseAnnotationIds() {
RETURN_IF_NOT_OK(PanopticColumnLoad(annotation, file_name, image_id));
break;
default:
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type shoule be Detection, Stuff, Keypoint or Panoptic.");
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
}
}
for (auto img : image_que) {

@ -110,12 +110,14 @@ Status CsvOp::Init() {
}
CsvOp::CsvParser::CsvParser(int32_t worker_id, std::shared_ptr<JaggedConnector> connector, int64_t rows_per_buffer,
char field_delim, std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default)
char field_delim, std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default,
std::string file_path)
: worker_id_(worker_id),
buffer_connector_(connector),
csv_rows_per_buffer_(rows_per_buffer),
csv_field_delim_(field_delim),
column_default_(column_default),
file_path_(file_path),
cur_state_(START_OF_FILE),
pos_(0),
cur_row_(0),
@ -358,8 +360,11 @@ Status CsvOp::CsvParser::InitCsvParser() {
{{State::START_OF_FILE, Message::MS_NORMAL},
{State::UNQUOTE,
[this](CsvParser &, char c) -> int {
TensorRow row(column_default_.size(), nullptr);
std::vector<std::string> file_path(column_default_.size(), file_path_);
row.setPath(file_path);
this->tensor_table_ = std::make_unique<TensorQTable>();
this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr));
this->tensor_table_->push_back(row);
this->str_buf_[0] = c;
this->pos_ = 1;
return 0;
@ -367,15 +372,21 @@ Status CsvOp::CsvParser::InitCsvParser() {
{{State::START_OF_FILE, Message::MS_DELIM},
{State::DELIM,
[this](CsvParser &, char c) -> int {
TensorRow row(column_default_.size(), nullptr);
std::vector<std::string> file_path(column_default_.size(), file_path_);
row.setPath(file_path);
this->tensor_table_ = std::make_unique<TensorQTable>();
this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr));
this->tensor_table_->push_back(row);
return this->PutRecord(c);
}}},
{{State::START_OF_FILE, Message::MS_QUOTE},
{State::QUOTE,
[this](CsvParser &, char c) -> int {
TensorRow row(column_default_.size(), nullptr);
std::vector<std::string> file_path(column_default_.size(), file_path_);
row.setPath(file_path);
this->tensor_table_ = std::make_unique<TensorQTable>();
this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr));
this->tensor_table_->push_back(row);
this->pos_ = 0;
return 0;
}}},
@ -458,7 +469,10 @@ Status CsvOp::CsvParser::InitCsvParser() {
{State::UNQUOTE,
[this](CsvParser &, char c) -> int {
if (this->total_rows_ > this->start_offset_ && this->total_rows_ <= this->end_offset_) {
this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr));
TensorRow row(column_default_.size(), nullptr);
std::vector<std::string> file_path(column_default_.size(), file_path_);
row.setPath(file_path);
this->tensor_table_->push_back(row);
}
this->str_buf_[0] = c;
this->pos_ = 1;
@ -468,7 +482,10 @@ Status CsvOp::CsvParser::InitCsvParser() {
{State::DELIM,
[this](CsvParser &, char c) -> int {
if (this->total_rows_ > this->start_offset_ && this->total_rows_ <= this->end_offset_) {
this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr));
TensorRow row(column_default_.size(), nullptr);
std::vector<std::string> file_path(column_default_.size(), file_path_);
row.setPath(file_path);
this->tensor_table_->push_back(row);
}
return this->PutRecord(c);
}}},
@ -476,7 +493,10 @@ Status CsvOp::CsvParser::InitCsvParser() {
{State::QUOTE,
[this](CsvParser &, char c) -> int {
if (this->total_rows_ > this->start_offset_ && this->total_rows_ <= this->end_offset_) {
this->tensor_table_->push_back(TensorRow(column_default_.size(), nullptr));
TensorRow row(column_default_.size(), nullptr);
std::vector<std::string> file_path(column_default_.size(), file_path_);
row.setPath(file_path);
this->tensor_table_->push_back(row);
}
return 0;
}}},
@ -497,7 +517,7 @@ Status CsvOp::Reset() {
Status CsvOp::LoadFile(const std::string &file, const int64_t start_offset, const int64_t end_offset,
const int32_t worker_id) {
CsvParser csv_parser(worker_id, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_);
CsvParser csv_parser(worker_id, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_, file);
csv_parser.SetStartOffset(start_offset);
csv_parser.SetEndOffset(end_offset);
std::ifstream ifs;
@ -512,7 +532,7 @@ Status CsvOp::LoadFile(const std::string &file, const int64_t start_offset, cons
csv_parser.Reset();
try {
while (ifs.good()) {
// when ifstream reachs the end of file, the function get() return std::char_traits<char>::eof()
// when ifstream reaches the end of file, the function get() return std::char_traits<char>::eof()
// which is a 32-bit -1, it's not equal to the 8-bit -1 on Euler OS. So instead of char, we use
// int to receive its return value.
int chr = ifs.get();
@ -799,7 +819,7 @@ Status CsvOp::CalculateNumRowsPerShard() {
}
int64_t CsvOp::CountTotalRows(const std::string &file) {
CsvParser csv_parser(0, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_);
CsvParser csv_parser(0, jagged_buffer_connector_, rows_per_buffer_, field_delim_, column_default_list_, file);
std::ifstream ifs;
ifs.open(file, std::ifstream::in);
if (!ifs.is_open()) {

@ -64,7 +64,7 @@ class CsvOp : public ParallelOp {
CsvParser() = delete;
CsvParser(int32_t worker_id, std::shared_ptr<JaggedConnector> connector, int64_t rows_per_buffer, char field_delim,
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default);
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path);
~CsvParser() = default;
@ -142,6 +142,7 @@ class CsvOp : public ParallelOp {
std::unique_ptr<TensorQTable> tensor_table_;
std::unique_ptr<DataBuffer> cur_buffer_;
std::string err_message_;
std::string file_path_;
};
class Builder {

@ -230,6 +230,7 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr,
}
}
(*trow) = TensorRow(row_id, {std::move(image), std::move(label)});
trow->setPath({folder_path_ + (pairPtr->first), std::string("")});
return Status::OK();
}

@ -219,6 +219,7 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string
}
}
(*trow) = TensorRow(row_id, {std::move(image), std::move(label)});
trow->setPath({data.first, file_});
return Status::OK();
}

@ -284,6 +284,8 @@ Status MindRecordOp::GetBufferFromReader(std::unique_ptr<DataBuffer> *fetched_bu
if (task_type == mindrecord::TaskType::kPaddedTask) {
TensorRow tensor_row;
RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, {}, mindrecord::json(), task_type));
std::vector<std::string> file_path(tensor_row.size(), dataset_file_[0]);
tensor_row.setPath(file_path);
tensor_table->push_back(std::move(tensor_row));
}
if (tupled_buffer.empty()) break;
@ -293,6 +295,8 @@ Status MindRecordOp::GetBufferFromReader(std::unique_ptr<DataBuffer> *fetched_bu
mindrecord::json columns_json = std::get<1>(tupled_row);
TensorRow tensor_row;
RETURN_IF_NOT_OK(LoadTensorRow(&tensor_row, columns_blob, columns_json, task_type));
std::vector<std::string> file_path(tensor_row.size(), dataset_file_[0]);
tensor_row.setPath(file_path);
tensor_table->push_back(std::move(tensor_row));
}
}

@ -82,6 +82,8 @@ MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per
row_cnt_(0),
folder_path_(folder_path),
rows_per_buffer_(rows_per_buffer),
image_path_({}),
label_path_({}),
data_schema_(std::move(data_schema)) {
io_block_queues_.Init(num_workers, queue_size);
}
@ -191,6 +193,7 @@ Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pa
RETURN_IF_NOT_OK(Tensor::CreateScalar(mnist_pair.second, &label));
(*trow) = TensorRow(row_id, {std::move(image), std::move(label)});
trow->setPath({image_path_[row_id], label_path_[row_id]});
return Status::OK();
}
@ -346,6 +349,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(),
reinterpret_cast<unsigned char *>(pixels), &image));
image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j]));
image_path_.push_back(image_names_[index]);
label_path_.push_back(label_names_[index]);
}
return Status::OK();
}

@ -251,6 +251,8 @@ class MnistOp : public ParallelOp, public RandomAccessOp {
std::vector<MnistLabelPair> image_label_pairs_;
std::vector<std::string> image_names_;
std::vector<std::string> label_names_;
std::vector<std::string> image_path_;
std::vector<std::string> label_path_;
};
} // namespace dataset
} // namespace mindspore

@ -147,9 +147,6 @@ Status TextFileOp::Reset() {
}
Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTable> *tensor_table, int64_t row) {
TensorRow tRow(1, nullptr);
(*tensor_table)->push_back(std::move(tRow));
std::shared_ptr<Tensor> tensor;
RETURN_IF_NOT_OK(Tensor::CreateScalar(line, &tensor));
(**tensor_table)[row][0] = std::move(tensor);
@ -183,6 +180,9 @@ Status TextFileOp::LoadFile(const std::string &file, const int64_t start_offset,
continue;
}
TensorRow tRow(1, nullptr);
tRow.setPath({file});
tensor_table->push_back(std::move(tRow));
RETURN_IF_NOT_OK(LoadTensor(line, &tensor_table, rows_each_buffer));
rows_each_buffer++;
rows_total++;

@ -599,6 +599,11 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off
std::string errMsg = "Invalid file, failed to parse tfrecord file : " + serialized_example;
RETURN_STATUS_UNEXPECTED(errMsg);
}
int32_t num_columns = data_schema_->NumColumns();
TensorRow newRow(num_columns, nullptr);
std::vector<std::string> file_path(num_columns, filename);
newRow.setPath(file_path);
new_tensor_table->push_back(std::move(newRow));
RETURN_IF_NOT_OK(LoadExample(&tf_file, &new_tensor_table, rows_read));
rows_read++;
}
@ -629,9 +634,6 @@ Status TFReaderOp::LoadFile(const std::string &filename, const int64_t start_off
Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, std::unique_ptr<TensorQTable> *tensor_table,
int64_t row) {
int32_t num_columns = data_schema_->NumColumns();
TensorRow newRow(num_columns, nullptr);
(*tensor_table)->push_back(std::move(newRow));
for (int32_t col = 0; col < num_columns; ++col) {
const ColDescriptor current_col = data_schema_->column(col);
const dataengine::Features &example_features = tf_file->features();

@ -213,6 +213,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Ten
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target));
(*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
trow->setPath({kImageFile, kTargetFile});
} else if (task_type_ == TaskType::Detection) {
std::shared_ptr<Tensor> image;
TensorRow annotation;
@ -223,6 +224,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Ten
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
trow->setId(row_id);
trow->setPath({kImageFile, kAnnotationFile, kAnnotationFile, kAnnotationFile, kAnnotationFile});
trow->push_back(std::move(image));
trow->insert(trow->end(), annotation.begin(), annotation.end());
}

@ -159,6 +159,8 @@ Status ZipOp::fillBuffer(TensorQTable *const table) {
return Status::OK();
}
// else we got a row so pack it into the tensor table.
// Currently we don't support printing error info after zip
new_row.setPath({});
table->push_back(std::move(new_row));
}
return Status::OK();

@ -141,6 +141,12 @@ class Status {
StatusCode get_code() const;
int GetLineOfCode() const { return line_of_code_; }
std::string SetErrDescription(const std::string &err_description);
std::string GetErrDescription() const { return err_description_; }
friend std::ostream &operator<<(std::ostream &os, const Status &s);
explicit operator bool() const { return (get_code() == StatusCode::kOK); }
@ -165,6 +171,9 @@ class Status {
private:
StatusCode code_;
int line_of_code_;
std::string file_name_;
std::string err_description_;
std::string err_msg_;
};

@ -25,11 +25,10 @@ namespace mindspore {
namespace dataset {
Status CFuncOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output);
Status ret = Status(StatusCode::kOK, "CFunc Call Succeed");
try {
*output = c_func_ptr_(input);
} catch (const std::exception &e) {
RETURN_STATUS_UNEXPECTED("Unexpected error in CFuncOp");
RETURN_STATUS_UNEXPECTED("Error raised, " + std::string(e.what()));
}
return Status::OK();
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save