Initial commit for album

Added linter fix for album dataset Added testDataset Adding signature Added JsonDataset example API Example dataset Resolving format More fixing Refactor Small fix Added compiling album dataset Running tests Added linter fix #1 Passing UT Added dataset API Addressing clang Clang part 2 Fixing pass Fixed tree check lint fix Added lint fix part 2
5 years ago · c79db93c48
parent e06dfaa80d
commit c79db93c48
51 changed files with 1366 additions and 72 deletions
--- a/build.sh
+++ b/build.sh
@ -393,7 +393,7 @@ build_mindspore()
      CMAKE_VERBOSE="--verbose"
    fi
    cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
-    echo "success to build mindspore project!"
+    echo "success building mindspore project!"
 }

 checkndk() {
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@ -21,6 +21,7 @@
 #include "minddata/dataset/include/transforms.h"
 #include "minddata/dataset/engine/dataset_iterator.h"
 // Source dataset headers (in alphabetical order)
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/clue_op.h"
@ -117,6 +118,15 @@ std::shared_ptr<SchemaObj> Schema(const std::string &schema_file) {
 // FUNCTIONS TO CREATE DATASETS FOR LEAF-NODE DATASETS
 // (In alphabetical order)

+// Function to create a AlbumDataset.
+std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
+                                    const std::vector<std::string> &column_names, bool decode,
+                                    const std::shared_ptr<SamplerObj> &sampler) {
+  auto ds = std::make_shared<AlbumDataset>(dataset_dir, data_schema, column_names, decode, sampler);
+
+  return ds->ValidateParams() ? ds : nullptr;
+}
+
 // Function to create a CelebADataset.
 std::shared_ptr<CelebADataset> CelebA(const std::string &dataset_dir, const std::string &dataset_type,
                                      const std::shared_ptr<SamplerObj> &sampler, bool decode,
@ -687,6 +697,49 @@ bool ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_sha
 // DERIVED DATASET CLASSES LEAF-NODE DATASETS
 // (In alphabetical order)

+// Constructor for AlbumDataset
+AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
+                           const std::vector<std::string> &column_names, bool decode,
+                           const std::shared_ptr<SamplerObj> &sampler)
+    : dataset_dir_(dataset_dir),
+      schema_path_(data_schema),
+      column_names_(column_names),
+      decode_(decode),
+      sampler_(sampler) {}
+
+bool AlbumDataset::ValidateParams() {
+  if (!ValidateDatasetDirParam("AlbumDataset", dataset_dir_)) {
+    return false;
+  }
+
+  if (!ValidateDatasetFilesParam("AlbumDataset", {schema_path_})) {
+    return false;
+  }
+
+  return true;
+}
+
+// Function to build AlbumDataset
+std::vector<std::shared_ptr<DatasetOp>> AlbumDataset::Build() {
+  // A vector containing shared pointer to the Dataset Ops that this object will create
+  std::vector<std::shared_ptr<DatasetOp>> node_ops;
+
+  // If user does not specify Sampler, create a default sampler, i.e., RandomSampler.
+  if (sampler_ == nullptr) {
+    sampler_ = CreateDefaultSampler();
+  }
+
+  auto schema = std::make_unique<DataSchema>();
+  RETURN_EMPTY_IF_ERROR(schema->LoadSchemaFile(schema_path_, column_names_));
+
+  // Argument that is not exposed to user in the API.
+  std::set<std::string> extensions = {};
+
+  node_ops.push_back(std::make_shared<AlbumOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
+                                               decode_, extensions, std::move(schema), std::move(sampler_->Build())));
+  return node_ops;
+}
+
 // Constructor for CelebADataset
 CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &dataset_type,
                             const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@ -13,6 +13,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
    text_file_op.cc
    clue_op.cc
    csv_op.cc
+    album_op.cc
    )

 set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc
@ -134,7 +134,6 @@ Status ImageFolderOp::operator()() {
      TensorRow sample_row;
      RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
      std::shared_ptr<Tensor> sample_ids = sample_row[0];
-      if (sample_ids->type() != DataType(DataType::DE_INT64)) RETURN_STATUS_UNEXPECTED("Sampler Tensor isn't int64");
      for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); ++itr) {
        if ((*itr) >= num_rows_) continue;  // index out of bound, skipping
        keys.push_back(*itr);
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc
@ -30,6 +30,7 @@
 #include "minddata/dataset/engine/datasetops/repeat_op.h"
 #include "minddata/dataset/engine/datasetops/skip_op.h"
 #include "minddata/dataset/engine/datasetops/shuffle_op.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
@ -199,6 +200,11 @@ Status NodePass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified)
  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
 }

+Status NodePass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  // Fallback to base class visitor by default
+  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
+}
+
 Status NodePass::RunOnNode(std::shared_ptr<CacheOp> node, bool *modified) {
  // Fallback to base class visitor by default
  return RunOnNode(std::static_pointer_cast<DatasetOp>(node), modified);
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.h
@ -49,6 +49,8 @@ class FilterOp;
 class GeneratorOp;
 #endif

+class AlbumOp;
+
 class RandomDataOp;

 class RepeatOp;
@ -178,6 +180,8 @@ class NodePass : public Pass {

  virtual Status RunOnNode(std::shared_ptr<RandomDataOp> node, bool *modified);

+  virtual Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified);
+
  virtual Status RunOnNode(std::shared_ptr<TakeOp> node, bool *modified);

  virtual Status RunOnNode(std::shared_ptr<ZipOp> node, bool *modified);
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.cc
@ -21,6 +21,7 @@
 #include "minddata/dataset/engine/datasetops/cache_lookup_op.h"
 #include "minddata/dataset/engine/datasetops/cache_merge_op.h"
 #include "minddata/dataset/engine/datasetops/cache_op.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
 #include "minddata/dataset/engine/datasetops/source/celeba_op.h"
 #include "minddata/dataset/engine/datasetops/source/cifar_op.h"
 #include "minddata/dataset/engine/datasetops/source/coco_op.h"
@ -152,6 +153,11 @@ Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<ImageFolderOp> n
  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
 }

+// Perform leaf node cache transform identification
+Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
+}
+
 // Perform leaf node cache transform identification
 Status CacheTransformPass::CachePass::RunOnNode(std::shared_ptr<MnistOp> node, bool *modified) {
  return MappableCacheLeafSetup(std::static_pointer_cast<DatasetOp>(node));
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/cache_transform_pass.h
@ -79,6 +79,12 @@ class CacheTransformPass : public TreePass {
    /// \return Status The error code return
    Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;

+    /// \brief Perform leaf node cache tranform identifications
+    /// \param[in] node The node being visited
+    /// \param[inout] modified Indicator if the node was changed at all
+    /// \return Status The error code return
+    Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) override;
+
    /// \brief Perform leaf node cache tranform identifications
    /// \param[in] node The node being visited
    /// \param[inout] modified Indicator if the node was changed at all
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.cc
@ -111,5 +111,11 @@ Status PrinterPass::RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modifie
  std::cout << "Visiting ImageFolderOp" << '\n';
  return Status::OK();
 }
+
+Status PrinterPass::RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) {
+  *modified = false;
+  std::cout << "Visiting ImageFolderOp" << '\n';
+  return Status::OK();
+}
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/util/printer_pass.h
@ -58,6 +58,8 @@ class PrinterPass : public NodePass {
  Status RunOnNode(std::shared_ptr<DeviceQueueOp> node, bool *modified) override;

  Status RunOnNode(std::shared_ptr<ImageFolderOp> node, bool *modified) override;
+
+  Status RunOnNode(std::shared_ptr<AlbumOp> node, bool *modified) override;
 };

 }  // namespace dataset
--- a/mindspore/ccsrc/minddata/dataset/include/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@ -5,6 +5,7 @@ SET(DE_UT_SRCS
        common/cvop_common.cc
        common/bboxop_common.cc
        auto_contrast_op_test.cc
+	album_op_test.cc
        batch_op_test.cc
        bit_functions_test.cc
        storage_container_test.cc
@ -101,6 +102,7 @@ SET(DE_UT_SRCS
        c_api_samplers_test.cc
        c_api_transforms_test.cc
        c_api_dataset_ops_test.cc
+        c_api_dataset_album_test.cc
        c_api_dataset_cifar_test.cc
        c_api_dataset_clue_test.cc
        c_api_dataset_coco_test.cc
--- a/tests/ut/cpp/dataset/album_op_test.cc
+++ b/tests/ut/cpp/dataset/album_op_test.cc
@ -0,0 +1,208 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+#include "common/common.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/album_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "securec.h"
+#include "minddata/dataset/include/datasets.h"
+#include "minddata/dataset/include/transforms.h"
+
+using namespace mindspore::dataset;
+using mindspore::MsLogLevel::ERROR;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::LogStream;
+
+std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
+
+std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
+
+std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+
+std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                           bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
+                                           bool decode = false) {
+  std::shared_ptr<AlbumOp> so;
+  AlbumOp::Builder builder;
+  Status rc = builder.SetNumWorkers(num_works)
+                     .SetAlbumDir(path)
+                     .SetRowsPerBuffer(rows)
+                     .SetOpConnectorSize(conns)
+                     .SetExtensions({".json"})
+                     .SetSampler(std::move(sampler))
+                     .SetDecode(decode)
+                     .Build(&so);
+  return so;
+}
+
+std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t conns, std::string path,
+                                       std::string schema_file, std::vector<std::string> column_names = {},
+                                       bool shuf = false, std::unique_ptr<Sampler> sampler = nullptr,
+                                       bool decode = false) {
+  std::shared_ptr<AlbumOp> so;
+  AlbumOp::Builder builder;
+  Status rc = builder.SetNumWorkers(num_works)
+    .SetSchemaFile(schema_file)
+    .SetColumnsToLoad(column_names)
+    .SetAlbumDir(path)
+    .SetRowsPerBuffer(rows)
+    .SetOpConnectorSize(conns)
+    .SetExtensions({".json"})
+    .SetSampler(std::move(sampler))
+    .SetDecode(decode)
+    .Build(&so);
+  return so;
+}
+
+class MindDataTestAlbum : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchema) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file, column_names, false), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " <<  ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"] << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaNoOrder) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"] << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithSchemaFloat) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  // add the priority column
+  std::string schema_file = datasets_root_path_ + "/testAlbum/floatSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    double priority = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      tensor_map["_priority"]->GetItemAt<double>(&priority, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"]  << "priority: " << priority << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
+TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  // add the priority column
+  std::string schema_file = datasets_root_path_ + "/testAlbum/fullSchema.json";
+  auto tree = Build({AlbumSchema(16, 2, 32, folder_path, schema_file), Repeat(2)});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << ".";
+    EXPECT_TRUE(false);
+  } else {
+    DatasetIterator di(tree);
+    TensorMap tensor_map;
+    di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(rc.IsOk());
+    uint64_t i = 0;
+    int32_t label = 0;
+    double priority = 0;
+    while (tensor_map.size() != 0) {
+      tensor_map["label"]->GetItemAt<int32_t>(&label, {});
+      tensor_map["_priority"]->GetItemAt<double>(&priority, {});
+      MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
+                    << tensor_map["label"]  << "priority: " << priority << " embedding : " <<
+		    tensor_map["_embedding"]->shape() << "\n";
+      i++;
+      di.GetNextAsMap(&tensor_map);
+    }
+    MS_LOG(INFO) << "got rows" << i << "\n";
+    EXPECT_TRUE(i == 14);
+  }
+}
+
--- a/tests/ut/cpp/dataset/c_api_dataset_album_test.cc
+++ b/tests/ut/cpp/dataset/c_api_dataset_album_test.cc
@ -0,0 +1,136 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "common/common.h"
+#include "minddata/dataset/include/datasets.h"
+
+using namespace mindspore::dataset::api;
+using mindspore::dataset::Tensor;
+
+class MindDataTestPipeline : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestPipeline, TestAlbumBasic) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumBasic.";
+
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumDecode) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumDecode.";
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true);
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    auto shape = image->shape();
+    MS_LOG(INFO) << "Tensor image shape size: " << shape.Size();
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    EXPECT_GT(shape.Size(), 1); // Verify decode=true took effect
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 7);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumNumSamplers) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumNumSamplers.";
+  
+  std::string folder_path = datasets_root_path_ + "/testAlbum/images";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(0, 1));
+  EXPECT_NE(ds, nullptr);
+
+  // Create an iterator over the result of the above dataset
+  // This will trigger the creation of the Execution Tree and launch it.
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+
+  // Iterate the dataset and get each row
+  std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
+  iter->GetNextRow(&row);
+
+  uint64_t i = 0;
+  while (row.size() != 0) {
+    i++;
+    auto image = row["image"];
+    MS_LOG(INFO) << "Tensor image shape: " << image->shape();
+    iter->GetNextRow(&row);
+  }
+
+  EXPECT_EQ(i, 1);
+
+  // Manually terminate the pipeline
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAlbumError) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAlbumError.";
+  std::string folder_path = datasets_root_path_ + "/testAlbum/ima";
+  std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
+  std::vector<std::string> column_names = {"image", "label", "id"};
+  // Create a Album Dataset
+  std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, true, SequentialSampler(0, 1));
+
+  EXPECT_EQ(ds, nullptr);
+}
--- a/tests/ut/cpp/runtest.sh
+++ b/tests/ut/cpp/runtest.sh
@ -32,6 +32,8 @@ export GLOG_v=2

 ## prepare data for dataset & mindrecord
 cp -fr $PROJECT_PATH/tests/ut/data ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/
+## prepare album dataset, uses absolute path so has to be generated
+python ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/data/dataset/testAlbum/gen_json.py

 if [ $# -gt 0 ]; then 
  ./ut_tests --gtest_filter=$1
--- a/tests/ut/data/dataset/testAlbum/bin/sample.bin
+++ b/tests/ut/data/dataset/testAlbum/bin/sample.bin
@ -0,0 +1 @@
+just some random stuff
--- a/tests/ut/data/dataset/testAlbum/datasetSchema.json
+++ b/tests/ut/data/dataset/testAlbum/datasetSchema.json
@ -0,0 +1,16 @@
+{
+  "columns": {
+    "image": {
+      "type": "uint8",
+      "rank": 1
+    },
+    "label" : {
+      "type": "string",
+      "rank": 1
+    },
+    "id" : {
+      "type": "int64",
+      "rank": 0
+    }
+  }
+}
--- a/tests/ut/data/dataset/testAlbum/floatSchema.json
+++ b/tests/ut/data/dataset/testAlbum/floatSchema.json
@ -5,7 +5,7 @@
      "rank": 1
    },
    "label" : {
-      "type": "int32",
+      "type": "string",
      "rank": 1
    },
    "id" : {
--- a/tests/ut/data/dataset/testAlbum/fullSchema.json
+++ b/tests/ut/data/dataset/testAlbum/fullSchema.json
@ -5,7 +5,7 @@
      "rank": 1
    },
    "label" : {
-      "type": "int32",
+      "type": "string",
      "rank": 1
    },
    "id" : {
--- a/tests/ut/data/dataset/testAlbum/gen_json.py
+++ b/tests/ut/data/dataset/testAlbum/gen_json.py
@ -2,21 +2,21 @@ import json
 import os

 def dump_json_from_dict(structure, file_name):
-    with open(file_name + '.json', 'w') as file_path:
-        json.dump(structure, file_path)
+    with open(file_name + '.json', 'w') as fp:
+        json.dump(structure, fp)

 if __name__ == '__main__':
-    # iterate over directory
-    DIRECTORY = "imagefolder"
-    i = 0
+    # iterate over DIRECTORY
+    DIRECTORY = os.path.dirname(os.path.realpath(__file__)) + "/original"
+    PARENT_DIR = os.path.dirname(DIRECTORY)
+    i = -1
    for filename in os.listdir(DIRECTORY):
        default_dict = {}
        default_dict.update(dataset='')
-        default_dict.update(image=(os.path.join(DIRECTORY, filename)))
-        default_dict.update(label=[1, 2])
+        default_dict.update(image=os.path.abspath(os.path.join(DIRECTORY, filename)))
+        default_dict.update(label=['3', '2'])
        default_dict.update(_priority=0.8)
-        default_dict.update(_embedding='sample.bin')
-        default_dict.update(_segmented_image=(os.path.join(DIRECTORY, filename)))
-        default_dict.update(_processed_image=(os.path.join(DIRECTORY, filename)))
+        default_dict.update(_embedding=os.path.abspath(os.path.join(PARENT_DIR, 'sample.bin')))
+        default_dict.update(_processed_image=os.path.abspath(os.path.join(DIRECTORY, filename)))
        i = i + 1
-        dump_json_from_dict(default_dict, 'images/'+str(i))
+        dump_json_from_dict(default_dict, PARENT_DIR + '/images/'+str(i))
--- a/tests/ut/data/dataset/testAlbum/images/0.json
+++ b/tests/ut/data/dataset/testAlbum/images/0.json
@ -0,0 +1 @@
+{"dataset": "", "image": "original/apple_expect_decoded.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "sample.bin", "_processed_image": "original/apple_expect_decoded.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/1.json
+++ b/tests/ut/data/dataset/testAlbum/images/1.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_decoded.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_decoded.jpg", "_processed_image": "imagefolder/apple_expect_decoded.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_resize_bilinear.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_resize_bilinear.jpg"}
--- a/tests/ut/data/dataset/testAlbum/images/2.json
+++ b/tests/ut/data/dataset/testAlbum/images/2.json
@ -1 +1 @@
-{"dataset": "", "image": "imagefolder/apple_expect_resize_bilinear.jpg", "label": [1, 2], "_priority": 0.8, "_embedding": "sample.bin", "_segmented_image": "imagefolder/apple_expect_resize_bilinear.jpg", "_processed_image": "imagefolder/apple_expect_resize_bilinear.jpg"}
+{"dataset": "", "image": "testAlbum//testAlbum/original/apple_expect_changemode.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "testAlbum//testAlbum/sample.bin", "_processed_image": "testAlbum//testAlbum/original/apple_expect_changemode.jpg"}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`{"dataset": "", "image": "original/apple_expect_decoded.jpg", "label": ["3", "2"], "_priority": 0.8, "_embedding": "sample.bin", "_processed_image": "original/apple_expect_decoded.jpg"}`