Clean up part 1

Removed extra fields in schema

Fixed test cases

Fixing CI
pull/2733/head
Eric 5 years ago committed by eric
parent a5c1e09469
commit 88bb65768e

@ -183,35 +183,7 @@ TensorShape ColDescriptor::shape() const {
const char DataSchema::DEFAULT_DATA_SCHEMA_FILENAME[] = "datasetSchema.json";
// Constructor 1: Simple constructor that leaves things uninitialized.
DataSchema::DataSchema() : dataset_type_(DatasetType::kUnknown), num_rows_(0) {}
DatasetType DataSchema::GetDatasetTYpeFromString(const std::string &type) const {
// Convert the string to a more easy to manage enum flavour of the buffer type.
if (type == "ARROW") {
return DatasetType::kArrow;
} else if (type == "TF") {
return DatasetType::kTf;
} else {
return DatasetType::kUnknown;
}
}
Status DataSchema::LoadDatasetType(const std::string &schema_file_path) {
try {
std::ifstream in(schema_file_path);
nlohmann::json js;
in >> js;
// First, get the column for the type of dataset.
dataset_type_str_ = js.value("datasetType", "");
dataset_type_ = GetDatasetTYpeFromString(dataset_type_str_);
dir_structure_ = js.value("directoryStructure", "");
}
// Catch any exception and convert to Status return code
catch (const std::exception &err) {
RETURN_STATUS_UNEXPECTED("Schema file failed to load");
}
return Status::OK();
}
DataSchema::DataSchema() : num_rows_(0) {}
// Internal helper function. Parses the json schema file in any order and produces a schema that
// does not follow any particular order (json standard does not enforce any ordering protocol).
@ -399,8 +371,6 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string,
nlohmann::json js = nlohmann::json::parse(schema_json_string);
RETURN_IF_NOT_OK(PreLoadExceptionCheck(js));
num_rows_ = js.value("numRows", 0);
dataset_type_str_ = js.value("datasetType", "");
dataset_type_ = GetDatasetTYpeFromString(dataset_type_str_);
nlohmann::json column_tree = js.at("columns");
if (column_tree.empty()) {
RETURN_STATUS_UNEXPECTED("columns is null");
@ -430,16 +400,10 @@ const ColDescriptor &DataSchema::column(int32_t idx) const {
// A print method typically used for debugging
void DataSchema::Print(std::ostream &out) const {
out << "Dataset type string : (";
if (dataset_type_str_.empty()) {
out << "none specified)\n";
} else {
out << dataset_type_str_ << ")\n";
}
out << "Dataset schema: (";
for (const auto &col_desc : col_descs_) {
out << col_desc << "\n";
}
out << "Dataset type: " << static_cast<uint32_t>(dataset_type_) << "\n";
}
// Adds a column descriptor to the schema

File diff suppressed because it is too large Load Diff

@ -4019,8 +4019,6 @@ class Schema:
else:
raise RuntimeError("Unknown field %s" % k)
if self.dataset_type is None:
raise RuntimeError("DatasetType field is missing.")
if self.columns is None:
raise RuntimeError("Columns are missing.")
if self.num_rows is not None:

@ -47,6 +47,7 @@ SET(DE_UT_SRCS
rescale_op_test.cc
resize_bilinear_op_test.cc
resize_op_test.cc
schema_test.cc
shuffle_op_test.cc
stand_alone_samplers_test.cc
status_test.cc

@ -0,0 +1,68 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include "common/common.h"
#include "common/utils.h"
#include "dataset/core/client.h"
#include "dataset/core/global_context.h"
#include "dataset/engine/data_schema.h"
#include "dataset/util/path.h"
#include "dataset/util/status.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
#include "securec.h"
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
class MindDataTestSchema : public UT::DatasetOpTesting {
protected:
};
TEST_F(MindDataTestSchema, TestOldSchema) {
std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
Status rc = schema->LoadSchemaFile(schema_file, {});
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during schema load: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
int32_t num_cols = schema->NumColumns();
EXPECT_TRUE(num_cols == 4);
}
}
TEST_F(MindDataTestSchema, TestAlbumSchema) {
std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
Status rc = schema->LoadSchemaFile(schema_file, {});
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during schema load: " << common::SafeCStr(rc.ToString()) << ".";
EXPECT_TRUE(false);
} else {
int32_t num_cols = schema->NumColumns();
MS_LOG(INFO) << "num_cols: " << num_cols << ".";
EXPECT_TRUE(num_cols == 7);
}
}

@ -0,0 +1,20 @@
{
"columns": {
"image": {
"type": "uint8",
"rank": 1
},
"label" : {
"type": "int32",
"rank": 1
},
"id" : {
"type": "int64",
"rank": 0
},
"_priority" : {
"type": "float64",
"rank": 0
}
}
}

@ -0,0 +1,32 @@
{
"columns": {
"image": {
"type": "uint8",
"rank": 1
},
"label" : {
"type": "int32",
"rank": 1
},
"id" : {
"type": "int64",
"rank": 0
},
"_priority" : {
"type": "float64",
"rank": 0
},
"_embedding" : {
"type": "uint8",
"rank": 1
},
"_segmented_image" : {
"type": "uint8",
"rank": 1
},
"_processed_image" : {
"type": "uint8",
"rank": 1
}
}
}

@ -0,0 +1,22 @@
import json
import os
def dump_json_from_dict(structure, file_name):
with open(file_name + '.json', 'w') as file_path:
json.dump(structure, file_path)
if __name__ == '__main__':
# iterate over directory
DIRECTORY = "imagefolder"
i = 0
for filename in os.listdir(DIRECTORY):
default_dict = {}
default_dict.update(dataset='')
default_dict.update(image=(os.path.join(DIRECTORY, filename)))
default_dict.update(label=[1, 2])
default_dict.update(_priority=0.8)
default_dict.update(_embedding='sample.bin')
default_dict.update(_segmented_image=(os.path.join(DIRECTORY, filename)))
default_dict.update(_processed_image=(os.path.join(DIRECTORY, filename)))
i = i + 1
dump_json_from_dict(default_dict, 'images/'+str(i))

Binary file not shown.

After

Width:  |  Height:  |  Size: 422 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 422 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 429 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 832 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 422 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_decoded.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_decoded.jpg", "_processed_image": "imagefolder/apple_expect_decoded.jpg"}

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_resize_bilinear.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_resize_bilinear.jpg", "_processed_image": "imagefolder/apple_expect_resize_bilinear.jpg"}

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_changemode.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_changemode.jpg", "_processed_image": "imagefolder/apple_expect_changemode.jpg"}

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_not_flip.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_not_flip.jpg", "_processed_image": "imagefolder/apple_expect_not_flip.jpg"}

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_flipped_horizontal.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_horizontal.jpg", "_processed_image": "imagefolder/apple_expect_flipped_horizontal.jpg"}

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_rescaled.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_rescaled.jpg", "_processed_image": "imagefolder/apple_expect_rescaled.jpg"}

@ -0,0 +1 @@
{"dataset": "", "image": "imagefolder/apple_expect_flipped_vertical.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_vertical.jpg", "_processed_image": "imagefolder/apple_expect_flipped_vertical.jpg"}
Loading…
Cancel
Save