Remove datasets.h dependency on tensor.h and other related dependencies

Replace std::function<TensorRow(TensorRow)> with std::function<MSTensorVec(MSTensor)> by using std::bind

Replace pad_info with MSTensor

Switch TensorShape/dataset::DataType to vector<int64> /mindspore::DataType

Change TypeId to mindspore::DataType in SchemaObj

address review cmts, add CreateFromMSTensor() in tensor.h/.cc
pull/12896/head
Zirui Wu 4 years ago
parent 9c1e73a5b9
commit 8022001450

@ -20,12 +20,13 @@
#include <unordered_set>
#include <utility>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/runtime_context.h"
#include "minddata/dataset/include/samplers.h"
#include "minddata/dataset/include/transforms.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/include/type_id.h"
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/consumers/tree_consumer.h"
@ -101,6 +102,37 @@
namespace mindspore {
namespace dataset {
// convert MSTensorVec to DE TensorRow, return empty if fails
TensorRow VecToRow(const MSTensorVec &v) {
TensorRow row;
row.reserve(v.size());
for (const MSTensor &t : v) {
std::shared_ptr<Tensor> rt;
Status rc = Tensor::CreateFromMSTensor(t, &rt);
if (rc.IsError()) {
MS_LOG_ERROR << "Convert from MSTensor to DETensor failed:" << rc.ToString() << ".";
return {};
}
row.emplace_back(rt);
}
return row;
}
// convert DE TensorRow to MSTensorVec, won't fail
MSTensorVec RowToVec(const TensorRow &v) {
MSTensorVec rv;
rv.reserve(v.size());
std::transform(v.begin(), v.end(), std::back_inserter(rv), [](std::shared_ptr<Tensor> t) -> MSTensor {
return mindspore::MSTensor(std::make_shared<DETensor>(t));
});
return rv;
}
// Convert a std::function<TensorRow(TensorRow)> to std::function<MSTensorVec(MSTensor)> with this helper
TensorRow FuncPtrConverter(std::function<MSTensorVec(MSTensorVec)> func, TensorRow in_row) {
return VecToRow(func(RowToVec(in_row)));
}
// Function to create the iterator, which will build and launch the execution tree.
std::shared_ptr<Iterator> Dataset::CreateIteratorCharIF(std::vector<std::vector<char>> columns, int32_t num_epochs) {
std::shared_ptr<Iterator> iter;
@ -228,22 +260,29 @@ int64_t Dataset::GetDatasetSize(bool estimate) {
return dataset_size;
}
std::vector<DataType> Dataset::GetOutputTypes() {
std::vector<mindspore::DataType> Dataset::GetOutputTypes() {
std::vector<DataType> types;
std::unique_ptr<NativeRuntimeContext> runtime_context = std::make_unique<NativeRuntimeContext>();
RETURN_SECOND_IF_ERROR(runtime_context->Init(), {});
RETURN_SECOND_IF_ERROR(tree_getters_->Init(this->IRNode()), {});
RETURN_SECOND_IF_ERROR(tree_getters_->GetOutputTypes(&types), {});
return types;
std::vector<mindspore::DataType> ret_types;
std::transform(
types.begin(), types.end(), std::back_inserter(ret_types),
[](const DataType &d) -> mindspore::DataType { return static_cast<mindspore::DataType>(DETypeToMSType(d)); });
return ret_types;
}
std::vector<TensorShape> Dataset::GetOutputShapes() {
std::vector<std::vector<int64_t>> Dataset::GetOutputShapes() {
std::vector<TensorShape> shapes;
std::unique_ptr<NativeRuntimeContext> runtime_context = std::make_unique<NativeRuntimeContext>();
RETURN_SECOND_IF_ERROR(runtime_context->Init(), {});
RETURN_SECOND_IF_ERROR(tree_getters_->Init(this->IRNode()), {});
RETURN_SECOND_IF_ERROR(tree_getters_->GetOutputShapes(&shapes), {});
return shapes;
std::vector<std::vector<int64_t>> ret_shapes;
std::transform(shapes.begin(), shapes.end(), std::back_inserter(ret_shapes),
[](const TensorShape &s) -> std::vector<int64_t> { return s.AsVector(); });
return ret_shapes;
}
int64_t Dataset::GetNumClasses() {
@ -296,16 +335,31 @@ BatchDataset::BatchDataset(std::shared_ptr<Dataset> input, int32_t batch_size, b
BucketBatchByLengthDataset::BucketBatchByLengthDataset(
std::shared_ptr<Dataset> input, const std::vector<std::vector<char>> &column_names,
const std::vector<int32_t> &bucket_boundaries, const std::vector<int32_t> &bucket_batch_sizes,
std::function<TensorRow(TensorRow)> element_length_function,
const std::map<std::vector<char>, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info,
bool pad_to_bucket_boundary, bool drop_remainder) {
std::function<MSTensorVec(MSTensorVec)> element_length_function,
const std::map<std::vector<char>, std::pair<std::vector<int64_t>, MSTensor>> &pad_info, bool pad_to_bucket_boundary,
bool drop_remainder) {
std::shared_ptr<TensorOp> c_func = nullptr;
if (element_length_function != nullptr) {
c_func = std::make_shared<CFuncOp>(element_length_function);
c_func = std::make_shared<CFuncOp>(std::bind(FuncPtrConverter, element_length_function, std::placeholders::_1));
}
std::map<std::vector<char>, std::pair<TensorShape, std::shared_ptr<Tensor>>> map;
for (auto const &p : pad_info) {
const MSTensor &t = p.second.second;
std::shared_ptr<Tensor> rt;
Status rc = Tensor::CreateFromMemory(TensorShape(t.Shape()), MSTypeToDEType(static_cast<TypeId>(t.DataType())),
(const uchar *)(t.Data().get()), t.DataSize(), &rt);
if (rc.IsError()) {
MS_LOG_ERROR << "Fail to create DETensor from MSTensor for pad_info: " << rc.ToString() << ".";
map.clear();
break;
}
map.insert({p.first, {TensorShape(p.second.first), rt}});
}
auto ds = std::make_shared<BucketBatchByLengthNode>(
input->IRNode(), VectorCharToString(column_names), bucket_boundaries, bucket_batch_sizes, c_func,
PadInfoCharToString(pad_info), pad_to_bucket_boundary, drop_remainder);
auto ds = std::make_shared<BucketBatchByLengthNode>(input->IRNode(), VectorCharToString(column_names),
bucket_boundaries, bucket_batch_sizes, c_func,
PadInfoCharToString(map), pad_to_bucket_boundary, drop_remainder);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
@ -322,10 +376,10 @@ ConcatDataset::ConcatDataset(const std::vector<std::shared_ptr<Dataset>> &datase
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
FilterDataset::FilterDataset(std::shared_ptr<Dataset> input, std::function<TensorRow(TensorRow)> predicate,
FilterDataset::FilterDataset(std::shared_ptr<Dataset> input, std::function<MSTensorVec(MSTensorVec)> predicate,
const std::vector<std::vector<char>> &input_columns) {
std::shared_ptr<TensorOp> c_func = nullptr;
if (predicate) c_func = std::make_shared<CFuncOp>(predicate);
if (predicate) c_func = std::make_shared<CFuncOp>(std::bind(FuncPtrConverter, predicate, std::placeholders::_1));
auto ds = std::make_shared<FilterNode>(input->IRNode(), c_func, VectorCharToString(input_columns));
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
@ -528,8 +582,9 @@ Status SchemaObj::Init() {
}
// Function to add a column to schema with a mstype de_type and known shape
Status SchemaObj::add_column_char(const std::vector<char> &name, TypeId de_type, const std::vector<int32_t> &shape) {
DataType data_type = dataset::MSTypeToDEType(de_type);
Status SchemaObj::add_column_char(const std::vector<char> &name, mindspore::DataType de_type,
const std::vector<int32_t> &shape) {
DataType data_type = dataset::MSTypeToDEType(static_cast<TypeId>(de_type));
return add_column_char(name, StringToChar(data_type.ToString()), shape);
}
@ -550,8 +605,8 @@ Status SchemaObj::add_column_char(const std::vector<char> &name, const std::vect
}
// Function to add a column to schema with a mstype de_type and without shape
Status SchemaObj::add_column_char(const std::vector<char> &name, TypeId de_type) {
DataType data_type = dataset::MSTypeToDEType(de_type);
Status SchemaObj::add_column_char(const std::vector<char> &name, mindspore::DataType de_type) {
DataType data_type = dataset::MSTypeToDEType(static_cast<TypeId>(de_type));
return add_column_char(name, StringToChar(data_type.ToString()));
}

@ -34,12 +34,16 @@ PYBIND_REGISTER(
THROW_IF_ERROR(schema->Init());
return schema;
}))
.def("add_column", [](SchemaObj &self, std::string name, TypeId de_type,
std::vector<int32_t> shape) { THROW_IF_ERROR(self.add_column(name, de_type, shape)); })
.def("add_column",
[](SchemaObj &self, std::string name, TypeId de_type, std::vector<int32_t> shape) {
THROW_IF_ERROR(self.add_column(name, static_cast<mindspore::DataType>(de_type), shape));
})
.def("add_column", [](SchemaObj &self, std::string name, std::string de_type,
std::vector<int32_t> shape) { THROW_IF_ERROR(self.add_column(name, de_type, shape)); })
.def("add_column",
[](SchemaObj &self, std::string name, TypeId de_type) { THROW_IF_ERROR(self.add_column(name, de_type)); })
[](SchemaObj &self, std::string name, TypeId de_type) {
THROW_IF_ERROR(self.add_column(name, static_cast<mindspore::DataType>(de_type)));
})
.def("add_column", [](SchemaObj &self, std::string name,
std::string de_type) { THROW_IF_ERROR(self.add_column(name, de_type)); })
.def("parse_columns",

@ -24,6 +24,8 @@
#include <utility>
#include <functional>
#include "minddata/dataset/include/type_id.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/constants.h"
@ -1040,6 +1042,10 @@ Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::
}
return CreateFromVector(strings, shape, out);
}
Status Tensor::CreateFromMSTensor(const MSTensor &in, TensorPtr *out) {
return Tensor::CreateFromMemory(TensorShape(in.Shape()), MSTypeToDEType(static_cast<TypeId>(in.DataType())),
(const uchar *)(in.Data().get()), in.DataSize(), out);
}
} // namespace dataset
} // namespace mindspore

@ -117,6 +117,11 @@ class Tensor {
return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
}
/// Create a copy of the input tensor
/// \param[in] MSTensor to create DETensorFrom
/// \return Status
static Status CreateFromMSTensor(const MSTensor &in, TensorPtr *out);
#ifdef ENABLE_PYTHON
/// Create a Tensor from a given py::array
/// \param[in] arr py::array

@ -30,10 +30,12 @@
#include <utility>
#include <vector>
#include <nlohmann/json.hpp>
#include "include/api/dual_abi_helper.h"
#include "include/api/types.h"
#include "minddata/dataset/include/iterator.h"
#include "minddata/dataset/include/samplers.h"
#include "minddata/dataset/include/tensor.h"
#include "minddata/dataset/include/text.h"
#include "minddata/dataset/include/type_id.h"
@ -41,7 +43,6 @@ namespace mindspore {
namespace dataset {
class Tensor;
class TensorRow;
class TensorShape;
class TreeAdapter;
class TreeGetters;
@ -101,11 +102,11 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \brief Gets the output type
/// \return a vector of DataType. If failed, return an empty vector
std::vector<DataType> GetOutputTypes();
std::vector<mindspore::DataType> GetOutputTypes();
/// \brief Gets the output shape
/// \return a vector of TensorShape. If failed, return an empty vector
std::vector<TensorShape> GetOutputShapes();
std::vector<std::vector<int64_t>> GetOutputShapes();
/// \brief Gets the batch size
/// \return int64_t
@ -196,7 +197,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// 0<i<n, and one bucket for [bucket_boundaries[n-1], inf).
/// \param[in] bucket_batch_sizes A list consisting of the batch sizes for each bucket.
/// Must contain elements equal to the size of bucket_boundaries + 1.
/// \param[in] element_length_function A function pointer that takes in TensorRow and outputs a TensorRow.
/// \param[in] element_length_function A function pointer that takes in MSTensorVec and outputs a MSTensorVec.
/// The output must contain a single tensor containing a single int32_t. If no value is provided,
/// then size of column_names must be 1, and the size of the first dimension of that column will be taken
/// as the length (default=nullptr)
@ -215,8 +216,8 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
std::shared_ptr<BucketBatchByLengthDataset> BucketBatchByLength(
const std::vector<std::string> &column_names, const std::vector<int32_t> &bucket_boundaries,
const std::vector<int32_t> &bucket_batch_sizes,
std::function<TensorRow(TensorRow)> element_length_function = nullptr,
const std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info = {},
std::function<MSTensorVec(MSTensorVec)> element_length_function = nullptr,
const std::map<std::string, std::pair<std::vector<int64_t>, MSTensor>> &pad_info = {},
bool pad_to_bucket_boundary = false, bool drop_remainder = false) {
return std::make_shared<BucketBatchByLengthDataset>(
shared_from_this(), VectorStringToChar(column_names), bucket_boundaries, bucket_batch_sizes,
@ -276,7 +277,7 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] predicate Function callable which returns a boolean value. If false then filter the element
/// \param[in] input_columns List of names of the input columns to filter
/// \return Shared pointer to the current FilterNode
std::shared_ptr<FilterDataset> Filter(std::function<TensorRow(TensorRow)> predicate,
std::shared_ptr<FilterDataset> Filter(std::function<MSTensorVec(MSTensorVec)> predicate,
const std::vector<std::string> &input_columns = {}) {
return std::make_shared<FilterDataset>(shared_from_this(), predicate, VectorStringToChar(input_columns));
}
@ -481,35 +482,37 @@ class SchemaObj {
/// \brief Add new column to the schema with unknown shape of rank 1
/// \param[in] name Name of the column.
/// \param[in] de_type Data type of the column(TypeId).
/// \param[in] ms_type Data type of the column(mindspore::DataType).
/// \return Status code
Status add_column(const std::string &name, TypeId de_type) { return add_column_char(StringToChar(name), de_type); }
Status add_column(const std::string &name, mindspore::DataType ms_type) {
return add_column_char(StringToChar(name), ms_type);
}
/// \brief Add new column to the schema with unknown shape of rank 1
/// \param[in] name Name of the column.
/// \param[in] de_type Data type of the column(std::string).
/// \param[in] ms_type Data type of the column(std::string).
/// \param[in] shape Shape of the column.
/// \return Status code
Status add_column(const std::string &name, const std::string &de_type) {
return add_column_char(StringToChar(name), StringToChar(de_type));
Status add_column(const std::string &name, const std::string &ms_type) {
return add_column_char(StringToChar(name), StringToChar(ms_type));
}
/// \brief Add new column to the schema
/// \param[in] name Name of the column.
/// \param[in] de_type Data type of the column(TypeId).
/// \param[in] ms_type Data type of the column(mindspore::DataType).
/// \param[in] shape Shape of the column.
/// \return Status code
Status add_column(const std::string &name, TypeId de_type, const std::vector<int32_t> &shape) {
return add_column_char(StringToChar(name), de_type, shape);
Status add_column(const std::string &name, mindspore::DataType ms_type, const std::vector<int32_t> &shape) {
return add_column_char(StringToChar(name), ms_type, shape);
}
/// \brief Add new column to the schema
/// \param[in] name Name of the column.
/// \param[in] de_type Data type of the column(std::string).
/// \param[in] ms_type Data type of the column(std::string).
/// \param[in] shape Shape of the column.
/// \return Status code
Status add_column(const std::string &name, const std::string &de_type, const std::vector<int32_t> &shape) {
return add_column_char(StringToChar(name), StringToChar(de_type), shape);
Status add_column(const std::string &name, const std::string &ms_type, const std::vector<int32_t> &shape) {
return add_column_char(StringToChar(name), StringToChar(ms_type), shape);
}
/// \brief Get a JSON string of the schema
@ -556,13 +559,13 @@ class SchemaObj {
explicit SchemaObj(const std::vector<char> &schema_file);
// Char interface of add_column
Status add_column_char(const std::vector<char> &name, TypeId de_type);
Status add_column_char(const std::vector<char> &name, mindspore::DataType ms_type);
Status add_column_char(const std::vector<char> &name, const std::vector<char> &de_type);
Status add_column_char(const std::vector<char> &name, const std::vector<char> &ms_type);
Status add_column_char(const std::vector<char> &name, TypeId de_type, const std::vector<int32_t> &shape);
Status add_column_char(const std::vector<char> &name, mindspore::DataType ms_type, const std::vector<int32_t> &shape);
Status add_column_char(const std::vector<char> &name, const std::vector<char> &de_type,
Status add_column_char(const std::vector<char> &name, const std::vector<char> &ms_type,
const std::vector<int32_t> &shape);
// Char interface of to_json
@ -589,8 +592,8 @@ class BucketBatchByLengthDataset : public Dataset {
BucketBatchByLengthDataset(
std::shared_ptr<Dataset> input, const std::vector<std::vector<char>> &column_names,
const std::vector<int32_t> &bucket_boundaries, const std::vector<int32_t> &bucket_batch_sizes,
std::function<TensorRow(TensorRow)> element_length_function = nullptr,
const std::map<std::vector<char>, std::pair<TensorShape, std::shared_ptr<Tensor>>> &pad_info = {},
std::function<MSTensorVec(MSTensorVec)> element_length_function = nullptr,
const std::map<std::vector<char>, std::pair<std::vector<int64_t>, MSTensor>> &pad_info = {},
bool pad_to_bucket_boundary = false, bool drop_remainder = false);
~BucketBatchByLengthDataset() = default;
};
@ -603,7 +606,7 @@ class ConcatDataset : public Dataset {
class FilterDataset : public Dataset {
public:
FilterDataset(std::shared_ptr<Dataset> input, std::function<TensorRow(TensorRow)> predicate,
FilterDataset(std::shared_ptr<Dataset> input, std::function<MSTensorVec(MSTensorVec)> predicate,
const std::vector<std::vector<char>> &input_columns);
~FilterDataset() = default;
};
@ -1530,12 +1533,8 @@ std::shared_ptr<TFRecordDataset> TFRecord(const std::vector<std::string> &datase
std::string schema_path = schema;
if (!schema_path.empty()) {
struct stat sb;
int rc = stat(common::SafeCStr(schema_path), &sb);
if (rc == -1 && errno != ENOENT) {
MS_LOG(WARNING) << "Unable to query the status of [" << schema_path << "]. Errno = " << errno << ".";
}
int rc = stat(schema_path.c_str(), &sb);
if (rc != 0) {
MS_LOG(ERROR) << "TFRecordDataset: schema path [" << schema_path << "] is invalid or does not exist.";
return nullptr;
}
}

@ -210,7 +210,8 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheCelebaCApi) {
// Create a CelebA Dataset, this folder_path has 4 records in it
std::string folder_path = datasets_root_path_ + "/testCelebAData/";
std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache);
std::shared_ptr<Dataset> ds =
CelebA(folder_path, "all", std::make_shared<RandomSampler>(false, 10), false, {}, some_cache);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
@ -374,7 +375,8 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheVocCApi) {
// Create a VOC Dataset, this folder_path has 9 records in it
std::string folder_path = datasets_root_path_ + "/testVOC2012/";
std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache);
std::shared_ptr<Dataset> ds =
VOC(folder_path, "Detection", "train", {}, false, std::make_shared<RandomSampler>(), some_cache);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
@ -417,7 +419,8 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheAlbumCApi) {
std::string schema_file = datasets_root_path_ + "/testAlbum/datasetSchema.json";
std::vector<std::string> column_names = {"image", "label", "id"};
// Create a Album Dataset, 7 records in it
std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache);
std::shared_ptr<Dataset> ds =
Album(folder_path, schema_file, column_names, false, std::make_shared<RandomSampler>(), some_cache);
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
@ -456,8 +459,9 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCacheRandomDataCApi) {
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1});
std::shared_ptr<Dataset> ds = RandomData(4, schema, {}, some_cache);
EXPECT_NE(ds, nullptr);
@ -826,7 +830,8 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestCApiCacheShare2) {
// Since sampler does not influence the data in the source, these two pipelines can share a common cache.
std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(), {}, {}, some_cache);
EXPECT_NE(ds1, nullptr);
std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache);
std::shared_ptr<Dataset> ds2 =
ImageFolder(folder_path, true, std::make_shared<SequentialSampler>(), {}, {}, some_cache);
EXPECT_NE(ds2, nullptr);
// Create and launch the Execution Tree for ds1

@ -14,6 +14,7 @@
* limitations under the License.
*/
#include "common/common.h"
#include "minddata/dataset/include/datasets.h"
using namespace mindspore::dataset;
@ -134,8 +135,8 @@ TEST_F(MindDataTestPipeline, TestCifar10Getters) {
EXPECT_NE(ds, nullptr);
EXPECT_EQ(ds->GetDatasetSize(), 10000);
std::vector<DataType> types = ds->GetOutputTypes();
std::vector<TensorShape> shapes = ds->GetOutputShapes();
std::vector<DataType> types = ToDETypes(ds->GetOutputTypes());
std::vector<TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes());
std::vector<std::string> column_names = {"image", "label"};
int64_t num_classes = ds->GetNumClasses();
EXPECT_EQ(types.size(), 2);
@ -149,14 +150,14 @@ TEST_F(MindDataTestPipeline, TestCifar10Getters) {
EXPECT_EQ(ds->GetRepeatCount(), 1);
EXPECT_EQ(ds->GetDatasetSize(), 10000);
EXPECT_EQ(ds->GetOutputTypes(), types);
EXPECT_EQ(ds->GetOutputShapes(), shapes);
EXPECT_EQ(ToDETypes(ds->GetOutputTypes()), types);
EXPECT_EQ(ToTensorShapeVec(ds->GetOutputShapes()), shapes);
EXPECT_EQ(ds->GetNumClasses(), -1);
EXPECT_EQ(ds->GetColumnNames(), column_names);
EXPECT_EQ(ds->GetDatasetSize(), 10000);
EXPECT_EQ(ds->GetOutputTypes(), types);
EXPECT_EQ(ds->GetOutputShapes(), shapes);
EXPECT_EQ(ToDETypes(ds->GetOutputTypes()), types);
EXPECT_EQ(ToTensorShapeVec(ds->GetOutputShapes()), shapes);
EXPECT_EQ(ds->GetBatchSize(), 1);
EXPECT_EQ(ds->GetRepeatCount(), 1);
EXPECT_EQ(ds->GetNumClasses(), -1);
@ -207,8 +208,8 @@ TEST_F(MindDataTestPipeline, TestCifar100Getters) {
EXPECT_NE(ds, nullptr);
std::vector<std::string> column_names = {"image", "coarse_label", "fine_label"};
std::vector<DataType> types = ds->GetOutputTypes();
std::vector<TensorShape> shapes = ds->GetOutputShapes();
std::vector<DataType> types = ToDETypes(ds->GetOutputTypes());
std::vector<TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes());
int64_t num_classes = ds->GetNumClasses();
EXPECT_EQ(types.size(), 3);

@ -141,7 +141,8 @@ TEST_F(MindDataTestPipeline, TestIteratorTwoColumns) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorTwoColumns.";
// Create a VOC Dataset
std::string folder_path = datasets_root_path_ + "/testVOC2012_2";
std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 4));
std::shared_ptr<Dataset> ds =
VOC(folder_path, "Detection", "train", {}, false, std::make_shared<SequentialSampler>(0, 4));
EXPECT_NE(ds, nullptr);
// Create a Repeat operation on ds
@ -198,7 +199,7 @@ TEST_F(MindDataTestPipeline, TestIteratorNumEpoch) {
std::shared_ptr<SchemaObj> schema = Schema();
int32_t random_data_num_row = 2;
int32_t num_epochs = 3;
ASSERT_OK(schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2}));
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
std::shared_ptr<Dataset> ds = RandomData(random_data_num_row, schema)->SetNumWorkers(1);
std::shared_ptr<Iterator> iter = ds->CreateIterator({}, num_epochs);
@ -228,7 +229,7 @@ TEST_F(MindDataTestPipeline, TestIteratorNumEpochFail) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestIteratorNumEpochFail.";
std::shared_ptr<SchemaObj> schema = Schema();
ASSERT_OK(schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2}));
ASSERT_OK(schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2}));
std::shared_ptr<Dataset> ds = RandomData(3, schema)->SetNumWorkers(1);
// expect nullptr due to incorrect num_epochs value.
EXPECT_EQ(ds->CreateIterator({}, 0), nullptr);

@ -15,6 +15,8 @@
*/
#include "common/common.h"
#include "minddata/dataset/include/datasets.h"
#include "minddata/dataset/core/tensor.h"
using namespace mindspore::dataset;
using mindspore::dataset::Tensor;
@ -283,7 +285,8 @@ TEST_F(MindDataTestPipeline, TestMindDataSuccess7) {
// Pass a list of mindrecord file name, files in list will be read directly but not search for related files
std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
std::vector<std::string> file_list = {file_path1};
std::shared_ptr<Dataset> ds = MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
std::shared_ptr<Dataset> ds =
MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
EXPECT_NE(ds, nullptr);
// Create a Skip operation on ds, skip original data in mindrecord and get padded samples
@ -332,11 +335,12 @@ TEST_F(MindDataTestPipeline, TestMindDataSuccess8) {
// Pass a list of mindrecord file name, files in list will be read directly but not search for related files
std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
std::vector<std::string> file_list = {file_path1};
std::shared_ptr<Dataset> ds = MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
std::shared_ptr<Dataset> ds =
MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
EXPECT_NE(ds, nullptr);
std::vector<DataType> types = ds->GetOutputTypes();
std::vector<TensorShape> shapes = ds->GetOutputShapes();
std::vector<DataType> types = ToDETypes(ds->GetOutputTypes());
std::vector<TensorShape> shapes = ToTensorShapeVec(ds->GetOutputShapes());
std::vector<std::string> column_names = {"file_name", "label"};
EXPECT_EQ(types.size(), 2);
EXPECT_EQ(types[0].ToString(), "string");
@ -400,12 +404,14 @@ TEST_F(MindDataTestPipeline, TestMindDataSuccess9) {
// Pass a list of mindrecord file name, files in list will be read directly but not search for related files
std::string file_path1 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
std::vector<std::string> file_list = {file_path1};
std::shared_ptr<Dataset> ds1 = MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
std::shared_ptr<Dataset> ds1 =
MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
EXPECT_NE(ds1, nullptr);
ds1 = ds1->Skip(5);
EXPECT_NE(ds1, nullptr);
std::shared_ptr<Dataset> ds2 = MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
std::shared_ptr<Dataset> ds2 =
MindData(file_list, {"file_name", "label"}, std::make_shared<SequentialSampler>(), pad, 4);
EXPECT_NE(ds2, nullptr);
ds2 = ds2->Skip(5);
EXPECT_NE(ds2, nullptr);
@ -598,7 +604,8 @@ TEST_F(MindDataTestPipeline, TestMindDataFail4) {
// Create a MindData Dataset
std::string file_path4 = datasets_root_path_ + "/../mindrecord/testMindDataSet/testImageNetData/imagenet.mindrecord0";
std::shared_ptr<Dataset> ds4 = MindData(file_path4, {"file_name", "label"}, std::make_shared<RandomSampler>(), pad2, 1);
std::shared_ptr<Dataset> ds4 =
MindData(file_path4, {"file_name", "label"}, std::make_shared<RandomSampler>(), pad2, 1);
EXPECT_NE(ds4, nullptr);
// Create an iterator over the result of the above dataset

@ -14,6 +14,7 @@
* limitations under the License.
*/
#include "common/common.h"
#include "include/api/types.h"
#include "minddata/dataset/core/tensor_row.h"
#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
#include "minddata/dataset/include/datasets.h"
@ -26,19 +27,39 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
protected:
};
mindspore::dataset::TensorRow BucketBatchTestFunction(mindspore::dataset::TensorRow input) {
TensorRow VecToRow(const MSTensorVec &v) {
TensorRow row;
for (const mindspore::MSTensor &t : v) {
std::shared_ptr<Tensor> rt;
Status rc =
Tensor::CreateFromMemory(TensorShape(t.Shape()), MSTypeToDEType(static_cast<mindspore::TypeId>(t.DataType())),
(const uchar *)(t.Data().get()), t.DataSize(), &rt);
row.emplace_back(rt);
}
return row;
}
MSTensorVec RowToVec(const TensorRow &v) {
MSTensorVec rv; // std::make_shared<DETensor>(de_tensor)
std::transform(v.begin(), v.end(), std::back_inserter(rv), [](std::shared_ptr<Tensor> t) -> mindspore::MSTensor {
return mindspore::MSTensor(std::make_shared<DETensor>(t));
});
return rv;
}
MSTensorVec BucketBatchTestFunction(MSTensorVec input) {
mindspore::dataset::TensorRow output;
std::shared_ptr<Tensor> out;
Tensor::CreateEmpty(mindspore::dataset::TensorShape({1}),
mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_INT32), &out);
out->SetItemAt({0}, 2);
output.push_back(out);
return output;
return RowToVec(output);
}
TensorRow Predicate1(TensorRow input) {
MSTensorVec Predicate1(MSTensorVec in) {
// Return true if input is equal to 3
uint64_t input_value;
TensorRow input = VecToRow(in);
input.at(0)->GetItemAt(&input_value, {0});
bool result = (input_value == 3);
@ -50,13 +71,14 @@ TensorRow Predicate1(TensorRow input) {
out->SetItemAt({0}, result);
output.push_back(out);
return output;
return RowToVec(output);
}
TensorRow Predicate2(TensorRow input) {
MSTensorVec Predicate2(MSTensorVec in) {
// Return true if label is more than 1
// The index of label in input is 1
uint64_t input_value;
TensorRow input = VecToRow(in);
input.at(1)->GetItemAt(&input_value, {0});
bool result = (input_value > 1);
@ -68,7 +90,7 @@ TensorRow Predicate2(TensorRow input) {
out->SetItemAt({0}, result);
output.push_back(out);
return output;
return RowToVec(output);
}
TEST_F(MindDataTestPipeline, TestBatchAndRepeat) {
@ -158,7 +180,7 @@ TEST_F(MindDataTestPipeline, TestBucketBatchByLengthSuccess2) {
EXPECT_NE(ds, nullptr);
// Create a BucketBatchByLength operation on ds
std::map<std::string, std::pair<mindspore::dataset::TensorShape, std::shared_ptr<Tensor>>> pad_info;
std::map<std::string, std::pair<std::vector<int64_t>, mindspore::MSTensor>> pad_info = {};
ds = ds->BucketBatchByLength({"image"}, {1, 2}, {1, 2, 3}, &BucketBatchTestFunction, pad_info, true, true);
EXPECT_NE(ds, nullptr);
@ -682,7 +704,7 @@ TEST_F(MindDataTestPipeline, TestFilterFail1) {
std::shared_ptr<Dataset> ds = TFRecord({data_file}, schema_file, {"image", "label"}, 0, ShuffleMode::kFalse);
EXPECT_NE(ds, nullptr);
std::function<TensorRow(TensorRow)> predicate_null = nullptr;
std::function<MSTensorVec(MSTensorVec)> predicate_null = nullptr;
// Create a Filter operation on ds
ds = ds->Filter(predicate_null);
@ -998,22 +1020,22 @@ TEST_F(MindDataTestPipeline, TestMapDuplicateColumnFail) {
}
TEST_F(MindDataTestPipeline, TestMapNullOperation) {
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapNullOperation.";
// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Map operation on ds
std::shared_ptr<TensorTransform> operation = nullptr;
auto ds1 = ds->Map({operation}, {"image"}, {}, {});
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
// Expect failure: Operation is nullptr
EXPECT_EQ(iter1, nullptr);
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMapNullOperation.";
// Create an ImageFolder Dataset
std::string folder_path = datasets_root_path_ + "/testPK/data/";
std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
EXPECT_NE(ds, nullptr);
// Create a Map operation on ds
std::shared_ptr<TensorTransform> operation = nullptr;
auto ds1 = ds->Map({operation}, {"image"}, {}, {});
EXPECT_NE(ds1, nullptr);
// Create an iterator over the result of the above dataset
std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
// Expect failure: Operation is nullptr
EXPECT_EQ(iter1, nullptr);
}
TEST_F(MindDataTestPipeline, TestProjectMapAutoInjection) {

@ -30,8 +30,8 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasic1) {
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1});
std::shared_ptr<Dataset> ds = RandomData(50, schema);
EXPECT_NE(ds, nullptr);
@ -74,8 +74,8 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetBasicWithPipeline) {
// Create two RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1});
std::shared_ptr<Dataset> ds1 = RandomData(50, schema);
std::shared_ptr<Dataset> ds2 = RandomData(50, schema);
EXPECT_NE(ds1, nullptr);
@ -132,8 +132,8 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetGetters) {
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1});
std::shared_ptr<Dataset> ds = RandomData(50, schema);
EXPECT_NE(ds, nullptr);
@ -480,8 +480,8 @@ TEST_F(MindDataTestPipeline, TestRandomDatasetDuplicateColumnName) {
// Create a RandomDataset
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("image", mindspore::TypeId::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::TypeId::kNumberTypeUInt8, {1});
schema->add_column("image", mindspore::DataType::kNumberTypeUInt8, {2});
schema->add_column("label", mindspore::DataType::kNumberTypeUInt8, {1});
std::shared_ptr<Dataset> ds = RandomData(50, schema, {"image", "image"});
// Expect failure: duplicate column names
EXPECT_EQ(ds->CreateIterator(), nullptr);

@ -1651,8 +1651,8 @@ TEST_F(MindDataTestPipeline, TestTruncateSequencePairSuccess1) {
// Create a RandomDataset which has column names "col1" and "col2"
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::TypeId::kNumberTypeInt16, {5});
schema->add_column("col2", mindspore::TypeId::kNumberTypeInt32, {3});
schema->add_column("col1", mindspore::DataType::kNumberTypeInt16, {5});
schema->add_column("col2", mindspore::DataType::kNumberTypeInt32, {3});
std::shared_ptr<Dataset> ds = RandomData(3, schema);
EXPECT_NE(ds, nullptr);
@ -1719,8 +1719,8 @@ TEST_F(MindDataTestPipeline, TestTruncateSequencePairSuccess2) {
// Create a RandomDataset which has column names "col1" and "col2"
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::TypeId::kNumberTypeInt32, {4});
schema->add_column("col2", mindspore::TypeId::kNumberTypeInt64, {4});
schema->add_column("col1", mindspore::DataType::kNumberTypeInt32, {4});
schema->add_column("col2", mindspore::DataType::kNumberTypeInt64, {4});
std::shared_ptr<Dataset> ds = RandomData(4, schema);
EXPECT_NE(ds, nullptr);
@ -1780,8 +1780,8 @@ TEST_F(MindDataTestPipeline, TestTruncateSequencePairFail) {
// Create a RandomDataset which has column names "col1" and "col2"
std::shared_ptr<SchemaObj> schema = Schema();
schema->add_column("col1", mindspore::TypeId::kNumberTypeInt8, {3});
schema->add_column("col2", mindspore::TypeId::kNumberTypeInt8, {3});
schema->add_column("col1", mindspore::DataType::kNumberTypeInt8, {3});
schema->add_column("col2", mindspore::DataType::kNumberTypeInt8, {3});
std::shared_ptr<Dataset> ds = RandomData(3, schema);
EXPECT_NE(ds, nullptr);

@ -22,18 +22,31 @@ extern "C" {
#endif
#endif
void DatasetOpTesting::SetUp() {
std::string install_home = "data/dataset";
datasets_root_path_ = install_home;
mindrecord_root_path_ = "data/mindrecord";
}
std::vector<mindspore::dataset::TensorShape> DatasetOpTesting::ToTensorShapeVec(
const std::vector<std::vector<int64_t>> &v) {
std::vector<mindspore::dataset::TensorShape> ret_v;
std::transform(v.begin(), v.end(), std::back_inserter(ret_v),
[](const auto &s) { return mindspore::dataset::TensorShape(s); });
return ret_v;
}
std::vector<mindspore::dataset::DataType> DatasetOpTesting::ToDETypes(const std::vector<mindspore::DataType> &t) {
std::vector<mindspore::dataset::DataType> ret_t;
std::transform(t.begin(), t.end(), std::back_inserter(ret_t), [](const mindspore::DataType &t) {
return mindspore::dataset::MSTypeToDEType(static_cast<mindspore::TypeId>(t));
});
return ret_t;
}
#ifdef __cplusplus
#if __cplusplus
}
#endif
#endif
} // namespace UT

@ -18,6 +18,9 @@
#include "gtest/gtest.h"
#include "include/api/status.h"
#include "include/api/types.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/include/type_id.h"
#include "utils/log_adapter.h"
using mindspore::Status;
@ -62,19 +65,19 @@ using mindspore::StatusCode;
namespace UT {
class Common : public testing::Test {
public:
// every TEST_F macro will enter one
virtual void SetUp();
// every TEST_F macro will enter one
virtual void SetUp();
virtual void TearDown();
virtual void TearDown();
};
class DatasetOpTesting : public Common {
public:
std::string datasets_root_path_;
std::string mindrecord_root_path_;
void SetUp() override;
std::vector<mindspore::dataset::TensorShape> ToTensorShapeVec(const std::vector<std::vector<int64_t>> &v);
std::vector<mindspore::dataset::DataType> ToDETypes(const std::vector<mindspore::DataType> &t);
std::string datasets_root_path_;
std::string mindrecord_root_path_;
void SetUp() override;
};
} // namespace UT
#endif // TESTS_DATASET_UT_CORE_COMMON_DE_UT_COMMON_H_

@ -329,7 +329,7 @@ TEST_F(MindDataTestCallback, TestCAPICallback) {
std::shared_ptr<DSCallback> cb1 = tst_cb;
// Create a RandomDataset. Use random_data to avoid I/O
std::shared_ptr<SchemaObj> schema = Schema();
ASSERT_OK(schema->add_column("label", mindspore::TypeId::kNumberTypeUInt32, {}));
ASSERT_OK(schema->add_column("label", mindspore::DataType::kNumberTypeUInt32, {}));
std::shared_ptr<Dataset> ds = RandomData(44, schema);
ASSERT_NE(ds, nullptr);
ds = ds->Map({std::make_shared<transforms::TypeCast>("uint64")}, {"label"}, {}, {}, nullptr, {cb1});

Loading…
Cancel
Save