Clean up part 1

Removed extra fields in schema Fixed test cases Fixing CI
5 years ago · 88bb65768e
parent a5c1e09469
commit 88bb65768e
23 changed files with 241 additions and 148 deletions
--- a/mindspore/ccsrc/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/dataset/engine/data_schema.cc
@ -183,35 +183,7 @@ TensorShape ColDescriptor::shape() const {
 const char DataSchema::DEFAULT_DATA_SCHEMA_FILENAME[] = "datasetSchema.json";

 // Constructor 1: Simple constructor that leaves things uninitialized.
-DataSchema::DataSchema() : dataset_type_(DatasetType::kUnknown), num_rows_(0) {}
-
-DatasetType DataSchema::GetDatasetTYpeFromString(const std::string &type) const {
-  // Convert the string to a more easy to manage enum flavour of the buffer type.
-  if (type == "ARROW") {
-    return DatasetType::kArrow;
-  } else if (type == "TF") {
-    return DatasetType::kTf;
-  } else {
-    return DatasetType::kUnknown;
-  }
-}
-
-Status DataSchema::LoadDatasetType(const std::string &schema_file_path) {
-  try {
-    std::ifstream in(schema_file_path);
-    nlohmann::json js;
-    in >> js;
-    // First, get the column for the type of dataset.
-    dataset_type_str_ = js.value("datasetType", "");
-    dataset_type_ = GetDatasetTYpeFromString(dataset_type_str_);
-    dir_structure_ = js.value("directoryStructure", "");
-  }
-  // Catch any exception and convert to Status return code
-  catch (const std::exception &err) {
-    RETURN_STATUS_UNEXPECTED("Schema file failed to load");
-  }
-  return Status::OK();
-}
+DataSchema::DataSchema() : num_rows_(0) {}

 // Internal helper function. Parses the json schema file in any order and produces a schema that
 // does not follow any particular order (json standard does not enforce any ordering protocol).
@ -399,8 +371,6 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string,
    nlohmann::json js = nlohmann::json::parse(schema_json_string);
    RETURN_IF_NOT_OK(PreLoadExceptionCheck(js));
    num_rows_ = js.value("numRows", 0);
-    dataset_type_str_ = js.value("datasetType", "");
-    dataset_type_ = GetDatasetTYpeFromString(dataset_type_str_);
    nlohmann::json column_tree = js.at("columns");
    if (column_tree.empty()) {
      RETURN_STATUS_UNEXPECTED("columns is null");
@ -430,16 +400,10 @@ const ColDescriptor &DataSchema::column(int32_t idx) const {

 // A print method typically used for debugging
 void DataSchema::Print(std::ostream &out) const {
-  out << "Dataset type string : (";
-  if (dataset_type_str_.empty()) {
-    out << "none specified)\n";
-  } else {
-    out << dataset_type_str_ << ")\n";
-  }
+  out << "Dataset schema: (";
  for (const auto &col_desc : col_descs_) {
    out << col_desc << "\n";
  }
-  out << "Dataset type: " << static_cast<uint32_t>(dataset_type_) << "\n";
 }

 // Adds a column descriptor to the schema
--- a/mindspore/ccsrc/dataset/engine/data_schema.h
+++ b/mindspore/ccsrc/dataset/engine/data_schema.h
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -4019,8 +4019,6 @@ class Schema:
            else:
                raise RuntimeError("Unknown field %s" % k)

-        if self.dataset_type is None:
-            raise RuntimeError("DatasetType field is missing.")
        if self.columns is None:
            raise RuntimeError("Columns are missing.")
        if self.num_rows is not None:
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@ -47,6 +47,7 @@ SET(DE_UT_SRCS
        rescale_op_test.cc
        resize_bilinear_op_test.cc
        resize_op_test.cc
+	schema_test.cc
        shuffle_op_test.cc
        stand_alone_samplers_test.cc
        status_test.cc
--- a/tests/ut/cpp/dataset/schema_test.cc
+++ b/tests/ut/cpp/dataset/schema_test.cc
@ -0,0 +1,68 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+#include "common/common.h"
+#include "common/utils.h"
+#include "dataset/core/client.h"
+#include "dataset/core/global_context.h"
+#include "dataset/engine/data_schema.h"
+#include "dataset/util/path.h"
+#include "dataset/util/status.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "securec.h"
+
+namespace common = mindspore::common;
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::ERROR;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+class MindDataTestSchema : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestSchema, TestOldSchema) {
+  std::string schema_file = datasets_root_path_ + "/testDataset2/datasetSchema.json";
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+  Status rc = schema->LoadSchemaFile(schema_file, {});
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during schema load: " << common::SafeCStr(rc.ToString()) << ".";
+    EXPECT_TRUE(false);
+  } else {
+    int32_t num_cols = schema->NumColumns();
+    EXPECT_TRUE(num_cols == 4);
+  }
+}
+
+TEST_F(MindDataTestSchema, TestAlbumSchema) {
+  std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
+  std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
+  Status rc = schema->LoadSchemaFile(schema_file, {});
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during schema load: " << common::SafeCStr(rc.ToString()) << ".";
+    EXPECT_TRUE(false);
+  } else {
+    int32_t num_cols = schema->NumColumns();
+    MS_LOG(INFO) << "num_cols: " << num_cols << ".";
+    EXPECT_TRUE(num_cols == 7);
+  }
+}
+
--- a/tests/ut/data/dataset/testAlbum/floatSchema.json
+++ b/tests/ut/data/dataset/testAlbum/floatSchema.json
@ -0,0 +1,20 @@
+{
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 1
+    },
+    "label" : {
+      "type": "int32",
+      "rank": 1
+    },
+    "id" : {
+      "type": "int64",
+      "rank": 0
+    },
+    "_priority" : {
+      "type": "float64",
+      "rank": 0
+    }
+  }
+}
--- a/tests/ut/data/dataset/testAlbum/fullSchema.json
+++ b/tests/ut/data/dataset/testAlbum/fullSchema.json
@ -0,0 +1,32 @@
+{ 
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 1
+    },
+    "label" : {
+      "type": "int32",
+      "rank": 1
+    },
+    "id" : {
+      "type": "int64",
+      "rank": 0
+    },
+    "_priority" : {
+      "type": "float64",
+      "rank": 0
+    },
+    "_embedding" : {
+      "type": "uint8",
+      "rank": 1
+    },
+    "_segmented_image" : {
+      "type": "uint8",
+      "rank": 1
+    },
+    "_processed_image" : {
+      "type": "uint8",
+      "rank": 1
+    }
+  }
+}
--- a/tests/ut/data/dataset/testAlbum/gen_json.py
+++ b/tests/ut/data/dataset/testAlbum/gen_json.py
@ -0,0 +1,22 @@
+import json
+import os
+
+def dump_json_from_dict(structure, file_name):
+    with open(file_name + '.json', 'w') as file_path:
+        json.dump(structure, file_path)
+
+if __name__ == '__main__':
+    # iterate over directory
+    DIRECTORY = "imagefolder"
+    i = 0
+    for filename in os.listdir(DIRECTORY):
+        default_dict = {}
+        default_dict.update(dataset='')
+        default_dict.update(image=(os.path.join(DIRECTORY, filename)))
+        default_dict.update(label=[1, 2])
+        default_dict.update(_priority=0.8)
+        default_dict.update(_embedding='sample.bin')
+        default_dict.update(_segmented_image=(os.path.join(DIRECTORY, filename)))
+        default_dict.update(_processed_image=(os.path.join(DIRECTORY, filename)))
+        i = i + 1
+        dump_json_from_dict(default_dict, 'images/'+str(i))
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_changemode.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_changemode.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_decoded.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_decoded.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_horizontal.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_horizontal.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_vertical.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_flipped_vertical.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_not_flip.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_not_flip.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_rescaled.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_rescaled.jpg
--- a/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_resize_bilinear.jpg
+++ b/tests/ut/data/dataset/testAlbum/imagefolder/apple_expect_resize_bilinear.jpg
--- a/tests/ut/data/dataset/testAlbum/images/1.json
+++ b/tests/ut/data/dataset/testAlbum/images/1.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_decoded.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_decoded.jpg", "_processed_image": "imagefolder/apple_expect_decoded.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/2.json
+++ b/tests/ut/data/dataset/testAlbum/images/2.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_resize_bilinear.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_resize_bilinear.jpg", "_processed_image": "imagefolder/apple_expect_resize_bilinear.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/3.json
+++ b/tests/ut/data/dataset/testAlbum/images/3.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_changemode.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_changemode.jpg", "_processed_image": "imagefolder/apple_expect_changemode.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/4.json
+++ b/tests/ut/data/dataset/testAlbum/images/4.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_not_flip.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_not_flip.jpg", "_processed_image": "imagefolder/apple_expect_not_flip.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/5.json
+++ b/tests/ut/data/dataset/testAlbum/images/5.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_flipped_horizontal.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_horizontal.jpg", "_processed_image": "imagefolder/apple_expect_flipped_horizontal.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/6.json
+++ b/tests/ut/data/dataset/testAlbum/images/6.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_rescaled.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_rescaled.jpg", "_processed_image": "imagefolder/apple_expect_rescaled.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/7.json
+++ b/tests/ut/data/dataset/testAlbum/images/7.json
@ -0,0 +1 @@
+{"dataset": "", "image": "imagefolder/apple_expect_flipped_vertical.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_flipped_vertical.jpg", "_processed_image": "imagefolder/apple_expect_flipped_vertical.jpg"}
--- a/tests/ut/data/dataset/testAlbum/sample.bin
+++ b/tests/ut/data/dataset/testAlbum/sample.bin
@ -0,0 +1 @@
+just some random stuff
				`@ -0,0 +1 @@`
				`{"dataset": "", "image": "imagefolder/apple_expect_decoded.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_decoded.jpg", "_processed_image": "imagefolder/apple_expect_decoded.jpg"}`