|
|
|
@ -62,7 +62,7 @@ AlbumOp::AlbumOp(const std::string &file_dir, bool do_decode, const std::string
|
|
|
|
|
bool StrComp(const std::string &a, const std::string &b) {
|
|
|
|
|
// returns 1 if string "a" represent a numeric value less than string "b"
|
|
|
|
|
// the following will always return name, provided there is only one "." character in name
|
|
|
|
|
// "." character is guaranteed to exist since the extension is checked befor this function call.
|
|
|
|
|
// "." character is guaranteed to exist since the extension is checked before this function call.
|
|
|
|
|
int64_t value_a = std::atoi(a.substr(1, a.find(".")).c_str());
|
|
|
|
|
int64_t value_b = std::atoi(b.substr(1, b.find(".")).c_str());
|
|
|
|
|
return value_a < value_b;
|
|
|
|
@ -385,10 +385,93 @@ Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num,
|
|
|
|
|
return Status::OK();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Status AlbumOp::LoadIntTensorRowByIndex(int index, bool is_array, const nlohmann::json &column_value,
|
|
|
|
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
|
|
|
|
int i = index;
|
|
|
|
|
// int value
|
|
|
|
|
if (!is_array &&
|
|
|
|
|
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
// int array
|
|
|
|
|
if (is_array &&
|
|
|
|
|
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
return Status::OK();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Status AlbumOp::LoadTensorRowByIndex(int index, const std::string &file, const nlohmann::json &js,
|
|
|
|
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
|
|
|
|
int i = index;
|
|
|
|
|
// special case to handle
|
|
|
|
|
if (data_schema_->column(i).name() == "id") {
|
|
|
|
|
// id is internal, special case to load from file
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
// find if key does not exist, insert placeholder nullptr if not found
|
|
|
|
|
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
|
|
|
|
// iterator not found, push nullptr as placeholder
|
|
|
|
|
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
|
|
|
|
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
|
|
|
|
bool is_array = column_value.is_array();
|
|
|
|
|
// load single string
|
|
|
|
|
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
// load string array
|
|
|
|
|
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
// load image file
|
|
|
|
|
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
|
|
|
|
std::string image_file_path = column_value;
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
uint32_t orientation = GetOrientation(image_file_path);
|
|
|
|
|
TensorPtr scalar_tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<uint32_t>(orientation, &scalar_tensor));
|
|
|
|
|
(*map_row)["orientation"] = scalar_tensor;
|
|
|
|
|
}
|
|
|
|
|
// load float value
|
|
|
|
|
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
|
|
|
|
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
// load float array
|
|
|
|
|
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
|
|
|
|
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIntTensorRowByIndex(i, is_array, column_value, map_row));
|
|
|
|
|
return Status::OK();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Load 1 TensorRow (image,label) using 1 ImageColumns. 1 function call produces 1 TensorRow in a DataBuffer
|
|
|
|
|
// possible optimization: the helper functions of LoadTensorRow should be optimized
|
|
|
|
|
// to take a reference to a column descriptor?
|
|
|
|
|
// the design of this class is to make the code more readable, forgoing minor perfomance gain like
|
|
|
|
|
// the design of this class is to make the code more readable, forgoing minor performance gain like
|
|
|
|
|
// getting rid of duplicated checks
|
|
|
|
|
Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
|
|
|
|
|
std::unordered_map<std::string, std::shared_ptr<Tensor>> *map_row) {
|
|
|
|
@ -414,87 +497,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file,
|
|
|
|
|
if (!IsReadColumn(data_schema_->column(i).name())) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// special case to handle
|
|
|
|
|
if (data_schema_->column(i).name() == "id") {
|
|
|
|
|
// id is internal, special case to load from file
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// find if key does not exist, insert placeholder nullptr if not found
|
|
|
|
|
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
|
|
|
|
// iterator not found, push nullptr as placeholder
|
|
|
|
|
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
|
|
|
|
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
|
|
|
|
bool is_array = column_value.is_array();
|
|
|
|
|
// load single string
|
|
|
|
|
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// load string array
|
|
|
|
|
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// load image file
|
|
|
|
|
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
|
|
|
|
std::string image_file_path = column_value;
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
uint32_t orientation = GetOrientation(image_file_path);
|
|
|
|
|
TensorPtr scalar_tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(Tensor::CreateScalar<uint32_t>(orientation, &scalar_tensor));
|
|
|
|
|
(*map_row)["orientation"] = scalar_tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// load float value
|
|
|
|
|
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
|
|
|
|
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// load float array
|
|
|
|
|
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
|
|
|
|
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// int value
|
|
|
|
|
if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
|
|
|
|
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
// int array
|
|
|
|
|
if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
|
|
|
|
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
|
|
|
|
TensorPtr tensor;
|
|
|
|
|
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor));
|
|
|
|
|
(*map_row)[data_schema_->column(i).name()] = tensor;
|
|
|
|
|
continue;
|
|
|
|
|
} else {
|
|
|
|
|
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
RETURN_IF_NOT_OK(LoadTensorRowByIndex(i, file, js, map_row));
|
|
|
|
|
}
|
|
|
|
|
} catch (const std::exception &err) {
|
|
|
|
|
file_handle.close();
|
|
|
|
|