diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index c7a0aaf18d..ae582ad35b 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -16,7 +16,33 @@ set(MIND_DATA_LIB_DIR ${RUNTIME_PKG_NAME}/minddata/lib) set(LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/lib) -if(BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper") +if(BUILD_MINDDATA STREQUAL "full") + install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/liteapi/include/ DESTINATION + ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) + install(FILES ${TOP_DIR}/include/api/status.h DESTINATION ${MIND_DATA_INC_DIR} + RENAME ms_status.h COMPONENT ${RUNTIME_COMPONENT_NAME}) + + if(PLATFORM_ARM64) + file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so) + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so + DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + elseif(PLATFORM_ARM32) + file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so) + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION + ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + else() + install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION + ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib + RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib + RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() +endif() + +if(BUILD_MINDDATA STREQUAL "wrapper") install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE) if(PLATFORM_ARM64) diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index 2c2bfd88ee..4d9d52e9de 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -79,6 +79,7 @@ // IR leaf nodes #include "minddata/dataset/engine/ir/datasetops/source/album_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" // IR leaf nodes disabled for android #ifndef ENABLE_ANDROID @@ -89,7 +90,6 @@ #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h" #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" -#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" #include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h" @@ -390,7 +390,7 @@ std::shared_ptr MindData(const std::vector &datase return ds; } - +#endif // Function to create a MnistDataset. std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr &sampler, @@ -399,6 +399,8 @@ std::shared_ptr Mnist(const std::string &dataset_dir, const std::s return ds; } + +#ifndef ENABLE_ANDROID // Function to overload "+" operator to concat two datasets std::shared_ptr operator+(const std::shared_ptr &datasets1, const std::shared_ptr &datasets2) { @@ -906,12 +908,15 @@ MindDataDataset::MindDataDataset(const std::vector &dataset_files, auto ds = std::make_shared(dataset_files, columns_list, sampler, padded_sample, num_padded); ir_node_ = std::static_pointer_cast(ds); } +#endif MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto ds = std::make_shared(dataset_dir, usage, sampler, cache); ir_node_ = std::static_pointer_cast(ds); } + +#ifndef ENABLE_ANDROID TextFileDataset::TextFileDataset(const std::vector &dataset_files, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache) { diff --git a/mindspore/ccsrc/minddata/dataset/api/samplers.cc b/mindspore/ccsrc/minddata/dataset/api/samplers.cc index b02ff7f1b6..fab24cb1f9 100644 --- a/mindspore/ccsrc/minddata/dataset/api/samplers.cc +++ b/mindspore/ccsrc/minddata/dataset/api/samplers.cc @@ -15,6 +15,7 @@ */ #include "minddata/dataset/include/samplers.h" +#include "minddata/dataset/core/config_manager.h" #include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" #include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" @@ -32,8 +33,6 @@ #include "minddata/mindrecord/include/shard_sequential_sample.h" #include "minddata/mindrecord/include/shard_shuffle.h" #include "minddata/dataset/util/random.h" -#else -#include "minddata/dataset/core/config_manager.h" #endif namespace mindspore { diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc index b0196f922f..ac43271aa5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc @@ -25,9 +25,9 @@ #include "minddata/dataset/engine/datasetops/device_queue_op.h" #include "minddata/dataset/engine/opt/pre/getter_pass.h" #include "minddata/dataset/engine/tree_adapter.h" -#include "minddata/mindrecord/include/shard_index_generator.h" #ifndef ENABLE_ANDROID +#include "minddata/mindrecord/include/shard_index_generator.h" #include "minddata/mindrecord/include/shard_header.h" #include "minddata/mindrecord/include/shard_writer.h" #endif @@ -324,10 +324,9 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map &column_name_id_map, - nlohmann::json *row_raw_data, - std::map>> *row_bin_data) { +static Status ValidateInputParams(nlohmann::json *row_raw_data, + std::map>> *row_bin_data, + const std::unordered_map &column_name_id_map) { if (row_raw_data == nullptr) { RETURN_STATUS_UNEXPECTED("Error: row raw data is NULL."); } @@ -337,76 +336,104 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, if (column_name_id_map.empty()) { RETURN_STATUS_UNEXPECTED("Error: column not found"); } + return Status::OK(); +} + +Status SaveToDisk::FetchFloatData(std::shared_ptr tensor, std::string column_name, nlohmann::json *row_raw_data, + std::unique_ptr> *data_ptr) { + auto column_type = tensor->type(); Status s; + if (column_type == DataType::DE_FLOAT32) { + std::unique_ptr data, dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_FLOAT64) { + std::unique_ptr data, dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } + return Status::OK(); +} + +Status SaveToDisk::FetchItemData(std::shared_ptr tensor, std::string column_name, nlohmann::json *row_raw_data, + std::map>> *row_bin_data) { + auto column_type = tensor->type(); + Status s; + std::unique_ptr> data_ptr; + if (column_type == DataType::DE_INT8) { + std::unique_ptr data; + std::unique_ptr dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_INT16) { + std::unique_ptr data; + std::unique_ptr dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_UINT16) { + std::unique_ptr data; + std::unique_ptr dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_UINT8) { + std::unique_ptr data, dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_INT32) { + std::unique_ptr data, dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_UINT32) { + std::unique_ptr data; + std::unique_ptr dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_INT64) { + std::unique_ptr data, dummy; + s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); + RETURN_IF_NOT_OK(s); + if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); + } else if (column_type == DataType::DE_FLOAT32 || column_type == DataType::DE_FLOAT64) { + s = FetchFloatData(tensor, column_name, row_raw_data, &data_ptr); + RETURN_IF_NOT_OK(s); + } else if (column_type == DataType::DE_STRING) { + std::string_view sv; + RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor + std::string ss(sv); + (*row_raw_data)[column_name] = std::move(ss); + return Status::OK(); + } else { + RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); + } + if (data_ptr != nullptr) { + (*row_bin_data)[column_name] = std::move(data_ptr); + } + return Status::OK(); +} + +Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, + const std::unordered_map &column_name_id_map, + nlohmann::json *row_raw_data, + std::map>> *row_bin_data) { + Status s; + s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map); + if (s.IsError()) { + return s; + } for (auto &col : column_name_id_map) { auto idx = col.second; auto column_name = col.first; auto &tensor = row[idx]; - auto column_type = tensor->type(); - - std::unique_ptr> data_ptr; - if (column_type == DataType::DE_INT8) { - std::unique_ptr data; - std::unique_ptr dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_INT16) { - std::unique_ptr data; - std::unique_ptr dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_UINT16) { - std::unique_ptr data; - std::unique_ptr dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_UINT8) { - std::unique_ptr data, dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_INT32) { - std::unique_ptr data, dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_UINT32) { - std::unique_ptr data; - std::unique_ptr dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_INT64) { - std::unique_ptr data, dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_FLOAT32) { - std::unique_ptr data, dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_FLOAT64) { - std::unique_ptr data, dummy; - s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy); - RETURN_IF_NOT_OK(s); - if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); - } else if (column_type == DataType::DE_STRING) { - std::string_view sv; - RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor - std::string ss(sv); - (*row_raw_data)[column_name] = std::move(ss); - continue; - } else { - RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); - } + s = FetchItemData(tensor, column_name, row_raw_data, row_bin_data); RETURN_IF_NOT_OK(s); - if (data_ptr != nullptr) { - (*row_bin_data)[column_name] = std::move(data_ptr); - } } return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h index 30d9e2f7a4..0c77266f20 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h +++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.h @@ -130,6 +130,12 @@ class SaveToDisk : public TreeConsumer { nlohmann::json *row_raw_data, std::map>> *row_bin_data); + Status FetchFloatData(std::shared_ptr tensor, std::string column_name, nlohmann::json *row_raw_data, + std::unique_ptr> *data_ptr); + + Status FetchItemData(std::shared_ptr tensor, std::string column_name, nlohmann::json *row_raw_data, + std::map>> *row_bin_data); + std::string dataset_path_; int32_t num_files_; std::string dataset_type_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/cpu_map_job.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/cpu_map_job.cc index 1d5967a390..907fe52c1d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/cpu_map_job.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/map_op/cpu_map_job.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "minddata/dataset/engine/datasetops/map_op/cpu_map_job.h" namespace mindspore { diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc index e3e82b3baa..396c3a7a7a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pass.cc @@ -86,8 +86,9 @@ #include "minddata/dataset/engine/datasetops/source/csv_op.h" #include "minddata/dataset/engine/datasetops/source/text_file_op.h" #endif -#include "minddata/dataset/engine/datasetops/source/voc_op.h" + #ifdef ENABLE_PYTHON +#include "minddata/dataset/engine/datasetops/source/voc_op.h" #include "minddata/dataset/engine/datasetops/filter_op.h" #include "minddata/dataset/engine/datasetops/source/generator_op.h" #endif diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc index 907f4612c1..a859095e8c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc @@ -80,9 +80,10 @@ Status ProfilingManager::Initialize() { std::shared_ptr connector_thr_sampling = std::make_shared(tree_); RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling)); +#ifndef ENABLE_ANDROID std::shared_ptr cpu_sampling = std::make_shared(tree_); RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling)); - +#endif return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc index bc0adc221f..ad2d7f00e8 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc @@ -18,14 +18,16 @@ #include "minddata/dataset/core/client.h" #include "minddata/dataset/engine/ir/datasetops/root_node.h" +#ifndef ENABLE_ANDROID #include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h" +#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h" +#include "minddata/dataset/engine/opt/post/repeat_pass.h" +#endif #include "minddata/dataset/engine/opt/pass.h" #include "minddata/dataset/engine/opt/post/auto_worker_pass.h" -#include "minddata/dataset/engine/opt/post/repeat_pass.h" #ifdef ENABLE_PYTHON #include "minddata/dataset/engine/opt/post/generator_node_pass.h" #endif -#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h" #include "minddata/dataset/engine/opt/pre/cache_validation_pass.h" #include "minddata/dataset/engine/opt/pre/deep_copy_pass.h" #include "minddata/dataset/engine/opt/pre/epoch_ctrl_pass.h" @@ -55,7 +57,9 @@ Status TreeAdapter::PrePass(std::shared_ptr ir) { actions.emplace_back(std::make_unique()); actions.emplace_back(std::make_unique()); if (usage_ == kDeGetter) actions.emplace_back(std::make_unique()); +#ifndef ENABLE_ANDROID actions.emplace_back(std::make_unique()); +#endif // Vector of flags for each action std::vector modified(actions.size(), false); // Apply pre-pass actions @@ -72,7 +76,9 @@ Status TreeAdapter::Optimize(std::shared_ptr ir) { // Vector of optimizations std::vector> optimizations; MS_LOG(INFO) << "Running optimization pass loops"; +#ifndef ENABLE_ANDROID optimizations.emplace_back(std::make_unique()); +#endif // Apply optimization pass actions for (auto i = 0; i < optimizations.size(); i++) { bool modified = false; @@ -95,8 +101,9 @@ Status TreeAdapter::PostPass(std::shared_ptr ir) { #ifdef ENABLE_PYTHON actions.emplace_back(std::make_unique()); #endif +#ifndef ENABLE_ANDROID actions.emplace_back(std::make_unique()); - +#endif // We will gradually move RepeatPass from ExecutionTree::PrepareTreePostAction to here. // Vector of flags for each action diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index da139f6a4f..421d3b9193 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -830,6 +830,7 @@ std::shared_ptr MindData(const std::vector &datase const std::vector &columns_list = {}, const std::shared_ptr &sampler = RandomSampler(), nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); +#endif class MnistDataset : public Dataset { public: @@ -850,7 +851,7 @@ class MnistDataset : public Dataset { std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", const std::shared_ptr &sampler = RandomSampler(), const std::shared_ptr &cache = nullptr); - +#ifndef ENABLE_ANDROID /// \brief Function to create a ConcatDataset /// \notes Reload "+" operator to concat two datasets /// \param[in] datasets1 Shared pointer to the first dataset to be concatenated diff --git a/mindspore/ccsrc/minddata/dataset/include/samplers.h b/mindspore/ccsrc/minddata/dataset/include/samplers.h index 7062ff3b6b..50d111521f 100644 --- a/mindspore/ccsrc/minddata/dataset/include/samplers.h +++ b/mindspore/ccsrc/minddata/dataset/include/samplers.h @@ -20,6 +20,7 @@ #include #include #include +#include #include "minddata/dataset/include/status.h" #ifndef ENABLE_ANDROID diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/allocator.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/allocator.h new file mode 100644 index 0000000000..5a6e052bc1 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/allocator.h @@ -0,0 +1,190 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include "include/memory_pool.h" + +namespace mindspore { +namespace dataset { +// The following conforms to the requirements of +// std::allocator. Do not rename/change any needed +// requirements, e.g. function names, typedef etc. +template +class Allocator { + public: + template + friend class Allocator; + + using value_type = T; + using pointer = T *; + using const_pointer = const T *; + using reference = T &; + using const_reference = const T &; + using size_type = uint64_t; + using difference_type = std::ptrdiff_t; + + template + struct rebind { + using other = Allocator; + }; + + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + + explicit Allocator(const std::shared_ptr &b) : pool_(b) {} + + ~Allocator() = default; + + template + explicit Allocator(Allocator const &rhs) : pool_(rhs.pool_) {} + + template + bool operator==(Allocator const &rhs) const { + return pool_ == rhs.pool_; + } + + template + bool operator!=(Allocator const &rhs) const { + return pool_ != rhs.pool_; + } + + pointer allocate(std::size_t n) { + void *p = nullptr; + Status rc = pool_->Allocate(n * sizeof(T), &p); + if (rc.IsOk()) { + return reinterpret_cast(p); + } else if (rc == StatusCode::kMDOutOfMemory) { + throw std::bad_alloc(); + } else { + throw std::exception(); + } + } + + void deallocate(pointer p, std::size_t n = 0) noexcept { pool_->Deallocate(p); } + + size_type max_size() { return pool_->get_max_size(); } + + private: + std::shared_ptr pool_; +}; +/// \brief It is a wrapper of unique_ptr with a custom Allocator class defined above +template , typename... Args> +Status MakeUnique(std::unique_ptr> *out, C alloc, size_t n, Args &&... args) { + RETURN_UNEXPECTED_IF_NULL(out); + CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "size must be positive"); + try { + T *data = alloc.allocate(n); + // Some of our implementation of allocator (e.g. NumaAllocator) don't throw std::bad_alloc. + // So we have to catch for null ptr + if (data == nullptr) { + return Status(StatusCode::kMDOutOfMemory); + } + if (!std::is_arithmetic::value) { + for (auto i = 0; i < n; i++) { + std::allocator_traits::construct(alloc, &(data[i]), std::forward(args)...); + } + } + auto deleter = [](T *p, C f_alloc, size_t f_n) { + if (!std::is_arithmetic::value && std::is_destructible::value) { + for (auto i = 0; i < f_n; ++i) { + std::allocator_traits::destroy(f_alloc, &p[i]); + } + } + f_alloc.deallocate(p, f_n); + }; + *out = std::unique_ptr>(data, std::bind(deleter, std::placeholders::_1, alloc, n)); + } catch (const std::bad_alloc &e) { + return Status(StatusCode::kMDOutOfMemory); + } catch (const std::exception &e) { + RETURN_STATUS_UNEXPECTED(e.what()); + } + return Status::OK(); +} + +/// \brief It is a wrapper of the above custom unique_ptr with some additional methods +/// \tparam T The type of object to be allocated +/// \tparam C Allocator. Default to std::allocator +template > +class MemGuard { + public: + using allocator = C; + MemGuard() : n_(0) {} + explicit MemGuard(allocator a) : n_(0), alloc_(a) {} + // There is no copy constructor nor assignment operator because the memory is solely owned by this object. + MemGuard(const MemGuard &) = delete; + MemGuard &operator=(const MemGuard &) = delete; + // On the other hand, We can support move constructor + MemGuard(MemGuard &&lhs) noexcept : n_(lhs.n_), alloc_(std::move(lhs.alloc_)), ptr_(std::move(lhs.ptr_)) {} + MemGuard &operator=(MemGuard &&lhs) noexcept { + if (this != &lhs) { + this->deallocate(); + n_ = lhs.n_; + alloc_ = std::move(lhs.alloc_); + ptr_ = std::move(lhs.ptr_); + } + return *this; + } + /// \brief Explicitly deallocate the memory if allocated + void deallocate() { + if (ptr_) { + ptr_.reset(); + } + } + /// \brief Allocate memory (with emplace feature). Previous one will be released. If size is 0, no new memory is + /// allocated. + /// \param n Number of objects of type T to be allocated + /// \tparam Args Extra arguments pass to the constructor of T + template + Status allocate(size_t n, Args &&... args) noexcept { + deallocate(); + n_ = n; + return MakeUnique(&ptr_, alloc_, n, std::forward(args)...); + } + ~MemGuard() noexcept { deallocate(); } + /// \brief Getter function + /// \return The pointer to the memory allocated + T *GetPointer() const { return ptr_.get(); } + /// \brief Getter function + /// \return The pointer to the memory allocated + T *GetMutablePointer() { return ptr_.get(); } + /// \brief Overload [] operator to access a particular element + /// \param x index to the element. Must be less than number of element allocated. + /// \return pointer to the x-th element + T *operator[](size_t x) { return GetMutablePointer() + x; } + /// \brief Overload [] operator to access a particular element + /// \param x index to the element. Must be less than number of element allocated. + /// \return pointer to the x-th element + T *operator[](size_t x) const { return GetPointer() + x; } + /// \brief Return how many bytes are allocated in total + /// \return Number of bytes allocated in total + size_t GetSizeInBytes() const { return n_ * sizeof(T); } + + private: + size_t n_; + allocator alloc_; + std::unique_ptr> ptr_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/constants.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/constants.h new file mode 100644 index 0000000000..480eb682f0 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/constants.h @@ -0,0 +1,109 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ + +#include +#include +#include + +namespace mindspore { +namespace dataset { +// Various type defines for convenience +using uchar = unsigned char; +using dsize_t = int64_t; + +// Target devices to perform map operation +enum class MapTargetDevice { kCpu, kGpu, kDvpp }; + +// Possible dataset types for holding the data and client type +enum class DatasetType { kUnknown, kArrow, kTf }; + +// Possible flavours of Tensor implementations +enum class TensorImpl { kNone, kFlexible, kCv, kNP }; + +// Possible values for shuffle +enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2 }; + +// Possible values for Border types +enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; + +// Possible values for Image format types in a batch +enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 }; + +// Possible values for Image format types +enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 }; + +// Possible interpolation modes +enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 }; + +// Possible JiebaMode modes +enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 }; + +// Possible values for SPieceTokenizerOutType +enum class SPieceTokenizerOutType { kString = 0, kInt = 1 }; + +// Possible values for SPieceTokenizerLoadType +enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 }; + +// Possible values for SentencePieceModel +enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 }; + +// Possible values for NormalizeForm +enum class NormalizeForm { + kNone = 0, + kNfc, + kNfkc, + kNfd, + kNfkd, +}; + +// convenience functions for 32bit int bitmask +inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; } + +inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; } + +inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); } + +constexpr int32_t kDeMaxDim = std::numeric_limits::max(); // 2147483647 or 2^32 -1 +constexpr int32_t kDeMaxRank = std::numeric_limits::max(); +constexpr int64_t kDeMaxFreq = std::numeric_limits::max(); // 9223372036854775807 or 2^(64-1) +constexpr int64_t kDeMaxTopk = std::numeric_limits::max(); + +constexpr uint32_t kCfgRowsPerBuffer = 1; +constexpr uint32_t kCfgParallelWorkers = 4; +constexpr uint32_t kCfgWorkerConnectorSize = 16; +constexpr uint32_t kCfgOpConnectorSize = 16; +constexpr int32_t kCfgDefaultRankId = -1; +constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed; +constexpr uint32_t kCfgMonitorSamplingInterval = 10; +constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds +constexpr int32_t kCfgDefaultCachePort = 50052; +constexpr char kCfgDefaultCacheHost[] = "127.0.0.1"; +constexpr int32_t kDftPrefetchSize = 20; +constexpr int32_t kDftNumConnections = 12; +constexpr int32_t kDftAutoNumWorkers = false; + +// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h) +constexpr uint8_t kCVInvalidType = 255; + +using connection_id_type = uint64_t; +using session_id_type = uint32_t; +using row_id_type = int64_t; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/data_type.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/data_type.h new file mode 100644 index 0000000000..92b91b46a8 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/data_type.h @@ -0,0 +1,291 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_ + +#include + +#include "include/constants.h" +namespace mindspore { +namespace dataset { + +// Class that represents basic data types in DataEngine. +class DataType { + public: + enum Type : uint8_t { + DE_UNKNOWN = 0, + DE_BOOL, + DE_INT8, + DE_UINT8, + DE_INT16, + DE_UINT16, + DE_INT32, + DE_UINT32, + DE_INT64, + DE_UINT64, + DE_FLOAT16, + DE_FLOAT32, + DE_FLOAT64, + DE_STRING, + NUM_OF_TYPES + }; + + struct TypeInfo { + const char *name_; // name to be represent the type while printing + const uint8_t sizeInBytes_; // number of bytes needed for this type + const char *pybindType_; // Python matching type, used in get_output_types + const std::string pybindFormatDescriptor_; // pybind format used for numpy types + const uint8_t cvType_; // OpenCv matching type + }; + + // android and no python + static inline const TypeInfo kTypeInfo[] = { + // name, sizeInBytes, formatDescriptor + {"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN + {"bool", 1, "bool", ""}, // DE_BOOL + {"int8", 1, "int8", ""}, // DE_INT8 + {"uint8", 1, "uint8", ""}, // DE_UINT8 + {"int16", 2, "int16", ""}, // DE_INT16 + {"uint16", 2, "uint16", ""}, // DE_UINT16 + {"int32", 4, "int32", ""}, // DE_INT32 + {"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32 + {"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64 + {"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64 + {"float16", 2, "float16", ""}, // DE_FLOAT16 + {"float32", 4, "float32", ""}, // DE_FLOAT32 + {"float64", 8, "double", ""}, // DE_FLOAT64 + {"string", 0, "bytes", "", kCVInvalidType} // DE_STRING + }; + + // No arg constructor to create an unknown shape + DataType() : type_(DE_UNKNOWN) {} + + // Create a type from a given string + /// \param type_str + explicit DataType(const std::string &type_str); + + // Default destructor + ~DataType() = default; + + // Create a type from a given enum + /// \param d + constexpr explicit DataType(Type d) : type_(d) {} + + constexpr bool operator==(const DataType a) const { return type_ == a.type_; } + + constexpr bool operator==(const Type a) const { return type_ == a; } + + constexpr bool operator!=(const DataType a) const { return type_ != a.type_; } + + constexpr bool operator!=(const Type a) const { return type_ != a; } + + // Disable this usage `if(d)` where d is of type DataType + /// \return + operator bool() = delete; + + // To be used in Switch/case + /// \return + operator Type() const { return type_; } + + // The number of bytes needed to store one value of this type + /// \return + uint8_t SizeInBytes() const; + + // Returns a string representation of the type + /// \return + std::string ToString() const; + + // returns true if the template type is the same as the Tensor type_ + /// \tparam T + /// \return true or false + template + bool IsCompatible() const { + return type_ == FromCType(); + } + + // returns true if the template type is the same as the Tensor type_ + /// \tparam T + /// \return true or false + template + bool IsLooselyCompatible() const; + + // << Stream output operator overload + /// \notes This allows you to print the info using stream operators + /// \param out - reference to the output stream being overloaded + /// \param rO - reference to the DataType to display + /// \return - the output stream must be returned + friend std::ostream &operator<<(std::ostream &out, const DataType &so) { + out << so.ToString(); + return out; + } + + template + static DataType FromCType(); + + // Get the buffer string format of the current type. Used in pybind buffer protocol. + /// \return + std::string GetPybindFormat() const; + + bool IsSignedInt() const { + return type_ == DataType::DE_INT8 || type_ == DataType::DE_INT16 || type_ == DataType::DE_INT32 || + type_ == DataType::DE_INT64; + } + + bool IsUnsignedInt() const { + return type_ == DataType::DE_UINT8 || type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT32 || + type_ == DataType::DE_UINT64; + } + + bool IsInt() const { return IsSignedInt() || IsUnsignedInt(); } + + bool IsFloat() const { + return type_ == DataType::DE_FLOAT16 || type_ == DataType::DE_FLOAT32 || type_ == DataType::DE_FLOAT64; + } + + bool IsBool() const { return type_ == DataType::DE_BOOL; } + + bool IsNumeric() const { return type_ != DataType::DE_STRING; } + + Type value() const { return type_; } + + private: + Type type_; +}; + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_BOOL); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_FLOAT64); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_FLOAT32); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT64); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT64); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT32); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT32); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT16); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT16); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_INT8); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_UINT8); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_STRING); +} + +template <> +inline DataType DataType::FromCType() { + return DataType(DataType::DE_STRING); +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_BOOL; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_FLOAT64 || type_ == DataType::DE_FLOAT32; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_FLOAT32; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_INT64 || type_ == DataType::DE_INT32 || type_ == DataType::DE_INT16 || + type_ == DataType::DE_INT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_UINT64 || type_ == DataType::DE_UINT32 || type_ == DataType::DE_UINT16 || + type_ == DataType::DE_UINT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_INT32 || type_ == DataType::DE_INT16 || type_ == DataType::DE_INT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_UINT32 || type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_INT16 || type_ == DataType::DE_INT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_INT8; +} + +template <> +inline bool DataType::IsLooselyCompatible() const { + return type_ == DataType::DE_UINT8; +} +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h new file mode 100644 index 0000000000..86a5327553 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h @@ -0,0 +1,254 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/iterator.h" +#include "include/samplers.h" + +namespace mindspore { +namespace dataset { + +class Tensor; +class TensorShape; +class TreeGetters; + +class DatasetCache; +class DatasetNode; +class Iterator; +class TensorOperation; +class SchemaObj; +class SamplerObj; + +// Dataset classes (in alphabetical order) +class BatchDataset; +class MapDataset; +class ProjectDataset; +class ShuffleDataset; +class DSCallback; + +/// \class Dataset datasets.h +/// \brief A base class to represent a dataset in the data pipeline. +class Dataset : public std::enable_shared_from_this { + public: + // need friend class so they can access the children_ field + friend class Iterator; + friend class TransferNode; + + /// \brief Constructor + Dataset(); + + /// \brief Destructor + ~Dataset() = default; + + /// \brief Gets the dataset size + /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting + /// dataset size at the expense of accuracy. + /// \return dataset size. If failed, return -1 + int64_t GetDatasetSize(bool estimate = false); + + // /// \brief Gets the output type + // /// \return a vector of DataType. If failed, return an empty vector + // std::vector GetOutputTypes(); + + /// \brief Gets the output shape + /// \return a vector of TensorShape. If failed, return an empty vector + std::vector GetOutputShapes(); + + /// \brief Gets the batch size + /// \return int64_t + int64_t GetBatchSize(); + + /// \brief Gets the repeat count + /// \return int64_t + int64_t GetRepeatCount(); + + /// \brief Gets the number of classes + /// \return number of classes. If failed, return -1 + int64_t GetNumClasses(); + + /// \brief Gets the column names + /// \return Names of the columns. If failed, return an empty vector + std::vector GetColumnNames(); + + /// \brief Gets the class indexing + /// \return a map of ClassIndexing. If failed, return an empty map + std::vector>> GetClassIndexing(); + + /// \brief Setter function for runtime number of workers + /// \param[in] num_workers The number of threads in this operator + /// \return Shared pointer to the original object + std::shared_ptr SetNumWorkers(int32_t num_workers); + + /// \brief Function to create an Iterator over the Dataset pipeline + /// \param[in] columns List of columns to be used to specify the order of columns + /// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs. + /// An empty row is returned at the end of each epoch + /// \return Shared pointer to the Iterator + std::shared_ptr CreateIterator(std::vector columns = {}, int32_t num_epochs = -1); + + /// \brief Function to create a BatchDataset + /// \notes Combines batch_size number of consecutive rows into batches + /// \param[in] batch_size The number of rows each batch is created with + /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete + /// batch. If true, and if there are less than batch_size rows + /// available to make the last batch, then those rows will + /// be dropped and not propagated to the next node + /// \return Shared pointer to the current BatchDataset + std::shared_ptr Batch(int32_t batch_size, bool drop_remainder = false); + + /// \brief Function to create a MapDataset + /// \notes Applies each operation in operations to this dataset + /// \param[in] operations Vector of operations to be applied on the dataset. Operations are + /// applied in the order they appear in this list + /// \param[in] input_columns Vector of the names of the columns that will be passed to the first + /// operation as input. The size of this list must match the number of + /// input columns expected by the first operator. The default input_columns + /// is the first column + /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation + /// This parameter is mandatory if len(input_columns) != len(output_columns) + /// The size of this list must match the number of output columns of the + /// last operation. The default output_columns will have the same + /// name as the input columns, i.e., the columns will be replaced + /// \param[in] project_columns A list of column names to project + /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). + /// \return Shared pointer to the current MapDataset + std::shared_ptr Map(std::vector> operations, + const std::vector &input_columns = {}, + const std::vector &output_columns = {}, + const std::vector &project_columns = {}, + const std::shared_ptr &cache = nullptr, + std::vector> callbacks = {}) { + return std::make_shared(shared_from_this(), operations, input_columns, output_columns, project_columns, + cache, callbacks); + } + + /// \brief Function to create a Project Dataset + /// \notes Applies project to the dataset + /// \param[in] columns The name of columns to project + /// \return Shared pointer to the current Dataset + std::shared_ptr Project(const std::vector &columns) { + return std::make_shared(shared_from_this(), columns); + } + + /// \brief Function to create a Shuffle Dataset + /// \notes Randomly shuffles the rows of this dataset + /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling + /// \return Shared pointer to the current ShuffleDataset + std::shared_ptr Shuffle(int32_t buffer_size) { + return std::make_shared(shared_from_this(), buffer_size); + } + + std::shared_ptr IRNode() { return ir_node_; } + + protected: + std::shared_ptr tree_getters_; + std::shared_ptr ir_node_; +}; + +class BatchDataset : public Dataset { + public: + BatchDataset(std::shared_ptr input, int32_t batch_size, bool drop_remainder = false); + ~BatchDataset() = default; +}; + +class MapDataset : public Dataset { + public: + MapDataset(std::shared_ptr input, std::vector> operations, + const std::vector &input_columns, const std::vector &output_columns, + const std::vector &project_columns, const std::shared_ptr &cache, + std::vector> callbacks); + ~MapDataset() = default; +}; + +class ProjectDataset : public Dataset { + public: + ProjectDataset(std::shared_ptr input, const std::vector &columns); + ~ProjectDataset() = default; +}; + +class ShuffleDataset : public Dataset { + public: + ShuffleDataset(std::shared_ptr input, int32_t buffer_size); + ~ShuffleDataset() = default; +}; + +/// \brief Function to create a SchemaObj +/// \param[in] schema_file Path of schema file +/// \return Shared pointer to the current schema +std::shared_ptr Schema(const std::string &schema_file = ""); + +class AlbumDataset : public Dataset { + public: + AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names = {}, bool decode = false, + const std::shared_ptr &sampler = RandomSampler(), + const std::shared_ptr &cache = nullptr); + ~AlbumDataset() = default; +}; + +/// \brief Function to create an AlbumDataset +/// \notes The generated dataset is specified through setting a schema +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] data_schema Path to dataset schema file +/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns. +/// (default = {}) +/// \param[in] decode the option to decode the images in dataset (default = false) +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, +/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current Dataset +std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names = {}, bool decode = false, + const std::shared_ptr &sampler = RandomSampler(), + const std::shared_ptr &cache = nullptr); + +class MnistDataset : public Dataset { + public: + explicit MnistDataset(const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = RandomSampler(), + const std::shared_ptr &cache = nullptr); + ~MnistDataset() = default; +}; + +/// \brief Function to create a MnistDataset +/// \notes The generated dataset has two columns ["image", "label"] +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all"). +/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, +/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current MnistDataset +std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = RandomSampler(), + const std::shared_ptr &cache = nullptr); + +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h new file mode 100644 index 0000000000..5229b09729 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h @@ -0,0 +1,58 @@ +/** + * Copyright 2020-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_ + +#include +#include +#include "include/api/types.h" +#include "include/constants.h" +#include "dataset/include/transforms.h" + +namespace mindspore { +namespace dataset { + +// class to run tensor operations in eager mode +class Execute { + public: + /// \brief Constructor + explicit Execute(std::shared_ptr op); + + explicit Execute(std::vector> ops); + + /// \brief Destructor + ~Execute() = default; + + /// \brief callable function to execute the TensorOperation in eager mode + /// \param[in] input Tensor to be transformed + /// \param[out] output Transformed tensor + /// \return Status code + Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output); + + /// \brief callable function to execute the TensorOperation in eager mode + /// \param[in] input_tensor_list List of Tensor to be transformed + /// \param[out] out Result tensor after transform + /// \return - Status + Status operator()(const std::vector &input_tensor_list, std::vector *out); + + private: + std::vector> ops_; +}; + +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h new file mode 100644 index 0000000000..fe1b1fe032 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h @@ -0,0 +1,120 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ + +#include +#include +#include +#include +#include "include/status.h" + +namespace mindspore { +namespace dataset { + +// Forward declare +class ExecutionTree; +class DatasetIterator; +class DatasetOp; +class Tensor; + +class NativeRuntimeContext; +class IteratorConsumer; + +class Dataset; + +using TensorMap = std::unordered_map>; +using TensorVec = std::vector>; + +// Abstract class for iterating over the dataset. +class Iterator { + public: + /// \brief Constructor + Iterator(); + + /// \brief Destructor + ~Iterator(); + + /// \brief Method for building and launching the pipeline. + /// \param[in] ops - a vector of DatasetOp in the data pipeline. + /// \return - a Status error code, returns OK if no error encountered. + Status BuildAndLaunchTree(std::shared_ptr ds); + + /// \brief Function to get the next row from the data pipeline. + /// \note Type of return data is a map(with column name). + /// \param[out] row - the output tensor row. + /// \return Returns true if no error encountered else false. + bool GetNextRow(TensorMap *row); + + /// \brief Function to get the next row from the data pipeline. + /// \note Type of return data is a vector(without column name). + /// \param[out] row - the output tensor row. + /// \return Returns true if no error encountered else false. + bool GetNextRow(TensorVec *row); + + /// \brief Function to shut down the data pipeline. + void Stop(); + + class _Iterator { + public: + explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} { + if (lt_) { + cur_row_ = new TensorMap(); + lt_->GetNextRow(cur_row_); + } + } + + // Destructor + ~_Iterator() { + if (cur_row_) { + delete cur_row_; + } + } + + _Iterator &operator++() { + if (lt_) { + ++ind_; + lt_->GetNextRow(cur_row_); + } + if (cur_row_ && cur_row_->size() == 0) { + delete cur_row_; + cur_row_ = nullptr; + } + return *this; + } // prefix ++ overload + TensorMap &operator*() { return *cur_row_; } // dereference operator + TensorMap *operator->() { return cur_row_; } + + bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; } + + private: + int ind_; // the cur node our Iterator points to + Iterator *lt_; + TensorMap *cur_row_; + }; + + _Iterator begin() { return _Iterator(this); } + + _Iterator end() { return _Iterator(nullptr); } + + private: + std::unique_ptr runtime_context_; + IteratorConsumer *consumer_; +}; +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/memory_pool.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/memory_pool.h new file mode 100644 index 0000000000..3841e18e3d --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/memory_pool.h @@ -0,0 +1,59 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_ + +#include +#include +#include +#include "include/status.h" + +namespace mindspore { +namespace dataset { +// Abstract class of a memory pool +class MemoryPool { + public: + // Allocate a block of size n + virtual Status Allocate(size_t, void **) = 0; + + // Enlarge or shrink a block from oldSz to newSz + virtual Status Reallocate(void **, size_t old_sz, size_t new_sz) = 0; + + // Free a pointer + virtual void Deallocate(void *) = 0; + + // What is the maximum size I can allocate ? + virtual uint64_t get_max_size() const = 0; + + virtual int PercentFree() const = 0; + + // Destructor + virtual ~MemoryPool() {} +}; + +Status DeMalloc(std::size_t s, void **p, bool); +} // namespace dataset +} // namespace mindspore + +void *operator new(std::size_t, mindspore::Status *, std::shared_ptr); + +void *operator new[](std::size_t, mindspore::Status *, std::shared_ptr); + +void operator delete(void *, std::shared_ptr); + +void operator delete[](void *, std::shared_ptr); + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/path.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/path.h new file mode 100644 index 0000000000..85730157de --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/path.h @@ -0,0 +1,126 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_ + +#include +#include +#include + +#include "include/status.h" + +namespace mindspore { +namespace dataset { +class Path { + public: + class DirIterator { + public: + static std::shared_ptr OpenDirectory(Path *f); + + ~DirIterator(); + + bool hasNext(); + + Path next(); + + private: + explicit DirIterator(Path *f); + + Path *dir_; + DIR *dp_; + struct dirent *entry_; + }; + + explicit Path(const std::string &); + + explicit Path(const char *); + + ~Path() = default; + + Path(const Path &); + + Path &operator=(const Path &); + + Path(Path &&) noexcept; + + Path &operator=(Path &&) noexcept; + + std::string toString() const { return path_; } + + Path operator+(const Path &); + + Path operator+(const std::string &); + + Path operator+(const char *); + + Path &operator+=(const Path &rhs); + + Path &operator+=(const std::string &); + + Path &operator+=(const char *); + + Path operator/(const Path &); + + Path operator/(const std::string &); + + Path operator/(const char *); + + bool operator==(const Path &rhs) const { return (path_ == rhs.path_); } + + bool operator!=(const Path &rhs) const { return (path_ != rhs.path_); } + + bool operator<(const Path &rhs) const { return (path_ < rhs.path_); } + + bool operator>(const Path &rhs) const { return (path_ > rhs.path_); } + + bool operator<=(const Path &rhs) const { return (path_ <= rhs.path_); } + + bool operator>=(const Path &rhs) const { return (path_ >= rhs.path_); } + + bool Exists(); + + bool IsDirectory(); + + Status CreateDirectory(); + + Status CreateDirectories(); + + std::string Extension() const; + + std::string ParentPath(); + + Status Remove(); + + Status CreateFile(int *fd); + + Status OpenFile(int *fd, bool create = false); + + Status CloseFile(int fd) const; + + Status TruncateFile(int fd) const; + + std::string Basename(); + + friend std::ostream &operator<<(std::ostream &os, const Path &s); + + private: + static char separator_; + std::string path_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h new file mode 100644 index 0000000000..93b5d88261 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h @@ -0,0 +1,301 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_ + +#include +#include +#include + +#include "include/status.h" + +namespace mindspore { +namespace dataset { + +// Internal Sampler class forward declaration +class SamplerRT; + +class SamplerObj : public std::enable_shared_from_this { + public: + /// \brief Constructor + SamplerObj(); + + /// \brief Destructor + ~SamplerObj() = default; + + /// \brief Pure virtual function for derived class to implement parameters validation + /// \return The Status code of the function. It returns OK status if parameters are valid. + virtual Status ValidateParams() = 0; + + /// \brief Pure virtual function to convert a SamplerObj class into a runtime sampler object + /// \return Shared pointers to the newly created Sampler + virtual std::shared_ptr Build() = 0; + + /// \brief Pure virtual function to copy a SamplerObj class + /// \return Shared pointers to the newly copied SamplerObj + virtual std::shared_ptr Copy() = 0; + + /// \brief Function for derived class to get the shard id of sampler + /// \return The shard id of the derived sampler + virtual int64_t ShardId() { return 0; } + + /// \brief Adds a child to the sampler + /// \param[in] child The sampler to be added as child + /// \return the Status code returned + Status AddChild(std::shared_ptr child); + + protected: + /// \brief A function that calls build on the children of this sampler + /// \param[in] sampler The samplerRT object built from this sampler + void BuildChildren(std::shared_ptr sampler); + + std::vector> children_; +}; + +class DistributedSamplerObj; +class PKSamplerObj; +class PreBuiltSamplerObj; +class RandomSamplerObj; +class SequentialSamplerObj; +class SubsetRandomSamplerObj; +class WeightedRandomSamplerObj; + +/// Function to create a Distributed Sampler. +/// \notes A Sampler that access a shard of the dataset. +/// \param[in] num_shards - Number of shards to divide the dataset into. +/// \param[in] shard_id - Shard ID of the current shard within num_shards. +/// \param[in] shuffle - If true, the indices are shuffled. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \param[in] seed - The seed in use when shuffle is true. +/// \param[in] offset - The starting position where access to elements in the dataset begins. +/// \param[in] even_dist - If true, each shard would return the same number of rows (default to true). +/// If false the total rows returned by all the shards would not have overlap. +/// \return Shared pointer to the current Sampler. +std::shared_ptr DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, + int64_t num_samples = 0, uint32_t seed = 1, + int64_t offset = -1, bool even_dist = true); + +/// Function to create a PK Sampler. +/// \notes Samples K elements for each P class in the dataset. +/// This will sample all classes. +/// \param[in] num_val - Number of elements to sample for each class. +/// \param[in] shuffle - If true, the class IDs are shuffled. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); + +/// Function to create a Random Sampler. +/// \notes Samples the elements randomly. +/// \param[in] replacement - If true, put the sample ID back for the next draw. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr RandomSampler(bool replacement = false, int64_t num_samples = 0); + +/// Function to create a Sequential Sampler. +/// \notes Samples the dataset elements sequentially, same as not having a sampler. +/// \param[in] start_index - Index to start sampling at (default to start at first id). +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); + +/// Function to create a Subset Random Sampler. +/// \notes Samples the elements randomly from a sequence of indices. +/// \param[in] indices - A vector sequence of indices. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \return Shared pointer to the current Sampler. +std::shared_ptr SubsetRandomSampler(std::vector indices, int64_t num_samples = 0); + +/// Function to create a Weighted Random Sampler. +/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given +/// weights (probabilities). +/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. +/// \param[in] num_samples - The number of samples to draw (default to all elements). +/// \param[in] replacement - If true, put the sample ID back for the next draw. +/// \return Shared pointer to the current Sampler. +std::shared_ptr WeightedRandomSampler(std::vector weights, int64_t num_samples = 0, + bool replacement = true); + +/* ####################################### Derived Sampler classes ################################# */ +class DistributedSamplerObj : public SamplerObj { + public: + DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed, + int64_t offset, bool even_dist); + + ~DistributedSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override { + auto sampler = std::make_shared(num_shards_, shard_id_, shuffle_, num_samples_, seed_, + offset_, even_dist_); + for (auto child : children_) { + sampler->AddChild(child); + } + return sampler; + } + + Status ValidateParams() override; + + /// \brief Function to get the shard id of sampler + /// \return The shard id of sampler + int64_t ShardId() override { return shard_id_; } + + private: + int64_t num_shards_; + int64_t shard_id_; + bool shuffle_; + int64_t num_samples_; + uint32_t seed_; + int64_t offset_; + bool even_dist_; +}; + +class PKSamplerObj : public SamplerObj { + public: + PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples); + + ~PKSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override { + auto sampler = std::make_shared(num_val_, shuffle_, num_samples_); + for (auto child : children_) { + sampler->AddChild(child); + } + return sampler; + } + + Status ValidateParams() override; + + private: + int64_t num_val_; + bool shuffle_; + int64_t num_samples_; +}; + +class PreBuiltSamplerObj : public SamplerObj { + public: + explicit PreBuiltSamplerObj(std::shared_ptr sampler); + + ~PreBuiltSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override; + + Status ValidateParams() override; + + private: + std::shared_ptr sp_; +}; + +class RandomSamplerObj : public SamplerObj { + public: + RandomSamplerObj(bool replacement, int64_t num_samples); + + ~RandomSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override { + auto sampler = std::make_shared(replacement_, num_samples_); + for (auto child : children_) { + sampler->AddChild(child); + } + return sampler; + } + + Status ValidateParams() override; + + private: + bool replacement_; + int64_t num_samples_; +}; + +class SequentialSamplerObj : public SamplerObj { + public: + SequentialSamplerObj(int64_t start_index, int64_t num_samples); + + ~SequentialSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override { + auto sampler = std::make_shared(start_index_, num_samples_); + for (auto child : children_) { + sampler->AddChild(child); + } + return sampler; + } + + Status ValidateParams() override; + + private: + int64_t start_index_; + int64_t num_samples_; +}; + +class SubsetRandomSamplerObj : public SamplerObj { + public: + SubsetRandomSamplerObj(std::vector indices, int64_t num_samples); + + ~SubsetRandomSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override { + auto sampler = std::make_shared(indices_, num_samples_); + for (auto child : children_) { + sampler->AddChild(child); + } + return sampler; + } + + Status ValidateParams() override; + + private: + const std::vector indices_; + int64_t num_samples_; +}; + +class WeightedRandomSamplerObj : public SamplerObj { + public: + explicit WeightedRandomSamplerObj(std::vector weights, int64_t num_samples = 0, bool replacement = true); + + ~WeightedRandomSamplerObj() = default; + + std::shared_ptr Build() override; + + std::shared_ptr Copy() override { + auto sampler = std::make_shared(weights_, num_samples_, replacement_); + for (auto child : children_) { + sampler->AddChild(child); + } + return sampler; + } + + Status ValidateParams() override; + + private: + const std::vector weights_; + int64_t num_samples_; + bool replacement_; +}; +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/status.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/status.h new file mode 100644 index 0000000000..9b08e0321a --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/status.h @@ -0,0 +1,105 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_ + +#if defined(__GNUC__) || defined(__clang__) +#define DEPRECATED __attribute__((deprecated)) +#elif defined(_MSC_VER) +#define DEPRECATED __declspec(deprecated) +#else +#pragma message("WARNING: You need to implement DEPRECATED for this compiler") +#define DEPRECATED +#endif + +#include +#include +#include + +#include "include/ms_status.h" + +namespace mindspore { +namespace dataset { +#define RETURN_IF_NOT_OK(_s) \ + do { \ + Status __rc = (_s); \ + if (__rc.IsError()) { \ + return __rc; \ + } \ + } while (false) + +#define RETURN_STATUS_UNEXPECTED(_e) \ + do { \ + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \ + } while (false) + +#define CHECK_FAIL_RETURN_UNEXPECTED(_condition, _e) \ + do { \ + if (!(_condition)) { \ + return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \ + } \ + } while (false) + +#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \ + do { \ + if (!(_condition)) { \ + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \ + } \ + } while (false) + +#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \ + do { \ + if (!(_condition)) { \ + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \ + } \ + } while (false) + +#define RETURN_UNEXPECTED_IF_NULL(_ptr) \ + do { \ + if ((_ptr) == nullptr) { \ + std::string err_msg = "The pointer[" + std::string(#_ptr) + "] is null."; \ + RETURN_STATUS_UNEXPECTED(err_msg); \ + } \ + } while (false) + +#define RETURN_OK_IF_TRUE(_condition) \ + do { \ + if (_condition) { \ + return Status::OK(); \ + } \ + } while (false) + +#define RETURN_STATUS_SYNTAX_ERROR(_e) \ + do { \ + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \ + } while (false) + +#define RETURN_SECOND_IF_ERROR(_s, _r) \ + do { \ + Status __rc = (_s); \ + if (__rc.IsError()) { \ + MS_LOG(ERROR) << __rc; \ + return _r; \ + } \ + } while (false) + +#if !defined(_WIN32) && !defined(_WIN64) +const float MAX_MEMORY_USAGE_THRESHOLD = 0.95; +float GetMemoryUsage(); +#endif +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor.h new file mode 100644 index 0000000000..b4ceb772a1 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor.h @@ -0,0 +1,632 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ + +#include +#include +#include +#include + +#if defined(_WIN32) || defined(_WIN64) +#undef HAVE_STDDEF_H +#undef HAVE_STDLIB_H +#endif + +#include "include/constants.h" +#include "include/data_type.h" +#include "include/tensor_helpers.h" +#include "include/tensor_shape.h" +#include "include/status.h" + +namespace mindspore { +namespace dataset { +class Tensor; +template +class Allocator; + +using CharAllocPtr = std::unique_ptr>; +using TensorAllocPtr = std::shared_ptr>; // An allocator shared_ptr for Tensors +using offset_t = uint32_t; // type of offset values to store strings locations +using TensorPtr = std::shared_ptr; + +class Tensor { + public: + Tensor() = delete; + Tensor(const Tensor &other) = delete; + Tensor &operator=(const Tensor &other) = delete; + + /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead + /// \note The shape and type information should be known and valid + /// \note The constructor does not allocate data + /// \param shape TensorShape + /// \param type DataType + Tensor(const TensorShape &shape, const DataType &type); + + /// Move constructor + /// \param other Tensor to be moved + Tensor(Tensor &&other) noexcept; + + /// Move assignment operator + /// \param other Tensor to be moved + Tensor &operator=(Tensor &&other) noexcept; + + /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized. + /// \param[in] shape shape of the output tensor + /// \param[in] type type of the output tensor + /// \param[out] out Generated tensor + /// \return Status code + static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out); + + /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type. + /// Data will be copied into the new created tensor. + /// \param[in] shape shape of the output tensor + /// \param[in] type type of the output tensor + /// \param[in] src pointer to the source data + /// \param[out] out Generated tensor + /// \return Status code + static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out); + + /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor. + /// \param[in] shape shape of the output tensor + /// \param[in] type type of the output tensor + /// \param[in] src pointer to the source data + /// \param[in] length length of the src data + /// \param[out] out Generated tensor + /// \return Status code + static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, + const dsize_t &length, TensorPtr *out); + + /// Create a copy of the input tensor + /// \param[in] in original tensor to be copied + /// \param[out] out output tensor to be generated + /// \return Status + static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) { + return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out); + } + + /// Create a Tensor from a given list of values. + /// \tparam type of the values to be inserted. + /// \param[in] items elements of the tensor + /// \param[in] shape shape of the output tensor + /// \param[out] out output argument to hold the created Tensor + /// \return Status Code + template + static Status CreateFromVector(const std::vector &items, const TensorShape &shape, TensorPtr *out) { + CHECK_FAIL_RETURN_UNEXPECTED( + items.size() == shape.NumOfElements(), + "Number of elements in the vector does not match the number of elements of the shape required"); + // cppcheck-suppress shadowFunction + DataType type = DataType::FromCType(); + // if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case. + auto items_ptr = reinterpret_cast(&items[0]); + return CreateFromMemory(shape, type, items_ptr, out); + } + + /// Create a 1D Tensor from a given list of values. + /// \tparam type of the values to be inserted. + /// \param[in] items elements of the tensor + /// \param[out] out output argument to hold the created Tensor + /// \return Status Code + template + static Status CreateFromVector(const std::vector &items, TensorPtr *out) { + return CreateFromVector(items, TensorShape({static_cast(items.size())}), out); + } + + /// Create a 1D boolean Tensor from a given list of boolean values. + /// \param[in] items elements of the tensor + /// \param[in] shape shape of the output tensor + /// \param[out] out output argument to hold the created Tensor + /// \return Status Code + static Status CreateFromVector(const std::vector &items, const TensorShape &shape, TensorPtr *out) { + std::vector temp(items.begin(), items.end()); + RETURN_IF_NOT_OK(CreateFromVector(temp, shape, out)); + (*out)->type_ = DataType(DataType::DE_BOOL); + return Status::OK(); + } + + /// Create a numeric scalar Tensor from the given value. + /// \tparam T type of value + /// \param[in] item value + /// \param[out] out Created tensor + /// \return Status code + template + static Status CreateScalar(const T &item, TensorPtr *out) { + // cppcheck-suppress shadowFunction + DataType type = DataType::FromCType(); + auto item_ptr = reinterpret_cast(&item); + return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out); + } + + /// Create a tensor from a binary file on disk. + /// \param[in] path file to be read + /// \param[out] out Created Tensor + /// \return Status code + static Status CreateFromFile(const std::string &path, TensorPtr *out); + + /// Destruct the tensor and release the memory using the allocator + virtual ~Tensor(); + + /// Equality operator. compares tensor shape, type and data + /// \param[in] rhs Tensor to be compared with + /// \return bool + bool operator==(const Tensor &rhs) const; + + bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); } + + /// Get item located at `index`, caller needs to provide the type. + /// \tparam T + /// \param[in] index vector + /// \return return the item specified at index + template + Status GetItemAt(T *o, const std::vector &index) const; + + /// Get string located at `index`. + /// \param[in] index vector + /// \return return std::string_view specified at index + Status GetItemAt(std::string_view *o, const std::vector &index) const; + + template + Status GetUnsignedIntAt(T *o, const std::vector &index) const; + + template + Status GetSignedIntAt(T *o, const std::vector &index) const; + + template + Status GetFloatAt(T *o, const std::vector &index) const; + + /// set item at location specified by index + /// \tparam `T` + /// \param[in] index + /// \param[in] value of type `T` + template + Status SetItemAt(const std::vector &index, const T &value) { + T *ptr = nullptr; + RETURN_IF_NOT_OK(GetItemPtr(&ptr, index)); + *ptr = value; + return Status::OK(); + } + + Status SetItemAt(const std::vector &index, const std::string &value); + + /// fill tensor with Zeros. Does not support strings. + Status Zero(); + + /// Fill all elements in the Tensor with the given value of type `T`. Does not support strings. + /// \tparam T + /// \param value[in] + template + Status Fill(const T &value); + + /// Getter function for shape + /// \return + const TensorShape &shape() const { return shape_; } + + /// Check if tensor has data + /// \return bool - true if tensor is not empty + bool HasData() const { return data_ != nullptr; } + + /// Reshape the tensor. The given shape should have the same number of elements in the Tensor + /// \param shape + virtual Status Reshape(const TensorShape &shape); + + /// \return number of elements in this tensor + dsize_t Size() const { return shape().NumOfElements(); } + + /// \return the number of bytes this tensor is needs + dsize_t SizeInBytes() const { + if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements(); + return data_end_ - data_; + } + + /// \return the rank of the tensor + dsize_t Rank() const { return shape().Rank(); } + + /// Get the starting memory address as a constant for the data of the tensor. This potentially + /// drives an allocation if the data area. + /// \return const unsigned char* + const unsigned char *GetBuffer() const { return data_; } + + /// Getter of the type + /// \return + // cppcheck-suppress shadowFunction + DataType type() const { return type_; } + + /// Provide stream operator for displaying it + /// \param output stream + /// \param so the Tensor object to be printed + /// \return output stream + friend std::ostream &operator<<(std::ostream &out, const Tensor &so) { + so.Print(out); + return out; + } + + /// Invalidate this Tensor by setting the type and shape to unknown and MData to null. + /// Calling this method will make the Tensor and its data inaccessible, use it with caution. + void Invalidate(); + + /// Copy input tensor into self at the location index. + /// Index is a vector of axes which can be incomplete: + /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. + /// \param index + /// \param input + /// \param partial_insert: boolean to determine if insertion along the full axis is enforced + /// \return Status code + Status InsertTensor(const std::vector &index, const std::shared_ptr &input, + const bool partial_insert = false); + + /// Find the address of the given index. Used in InsertTensor. + /// Example: + /// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1 + /// \param index incomplete index + /// \param output: startAddrofIndex + /// \param output: remaining + /// \return Status code + Status StartAddrOfIndex(std::vector ind, uchar **start_addr_of_index, TensorShape *remaining); + + /// Expand the shape of the Tensor with one extra dimension. + /// For example, if the shape is <512,512,3>: + /// *- ExpandDim(0) gives: <1,512,512,3> + /// *- ExpandDim(1) gives: <512,1,512,3> + /// *- ExpandDim(3) gives: <512,512,3,1> + /// \param axis location of the dim + virtual Status ExpandDim(const dsize_t &axis); + + virtual void Squeeze(); + + /// Calculates the strides of the Tensor + /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) + /// The strides will be {6,2,1}. + /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) + /// The strides will be {24,8,4}. + /// \return vector of integers + std::vector Strides() const; + + std::string ToString() { + std::stringstream ss; + this->Print(ss); + return ss.str(); + } + + /// Handle negative indices. + /// \param[out] out modified index + /// \param[in] index + /// \param[in] length axis length used to modify index + /// \return dsize_t modified index + static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } + + /// Handle negative indices for a vector of indices. + /// \param[out] out modified vector of indices + /// \param[in] index_vector vector of indices + /// \return std::vector modified vector of indices + static inline std::vector HandleNegIndices(std::vector index_vector, std::vector length) { + std::vector indices(index_vector.size(), 0); + for (int i = 0; i < index_vector.size(); i++) { + indices[i] = HandleNeg(index_vector[i], length[i]); + } + return indices; + } + + /// Slice tensor bases on the given indices. Copy the sliced data into out tensor. + /// Based on the type of tensor, SliceNumeric or SliceString will be called + /// \param[out] out Tensor + /// \param[in] slice_options vector of SliceOption objects + /// \return Status error code + // cppcheck-suppress passedByValue + Status Slice(TensorPtr *out, const std::vector slice_options); + + /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor + /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 + /// \tparam T type of values in the Tensor Iterator + template + class TensorIterator { + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T *; + using reference = T &; + + explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast(ptr); } + + TensorIterator(const TensorIterator &raw_iterator) { ptr_ = raw_iterator.ptr_; } + + ~TensorIterator() = default; + + // cppcheck-suppress operatorEqVarError + TensorIterator &operator=(const TensorIterator &rhs) { + ptr_ = rhs.ptr_; + return *this; + } + + TensorIterator &operator=(T *rhs) { + ptr_ = rhs; + return *this; + } + + bool operator==(const TensorIterator &rhs) { return ptr_ == rhs.ptr_; } + + bool operator!=(const TensorIterator &rhs) { return !(*this == rhs); } + + operator bool() const { return ptr_ != nullptr; } + + T &operator*() { return *ptr_; } + + const T &operator*() const { return *ptr_; } + + T *operator->() { return ptr_; } + + TensorIterator &operator+=(const ptrdiff_t &inc) { + ptr_ += inc; + return *this; + } + + TensorIterator &operator-=(const ptrdiff_t &inc) { + ptr_ -= inc; + return *this; + } + + TensorIterator &operator++() { + ++ptr_; + return *this; + } + + TensorIterator &operator--() { + --ptr_; + return *this; + } + + TensorIterator operator++(int) { + auto temp(*this); + ++ptr_; + return temp; + } + + TensorIterator operator--(int) { + auto temp(*this); + --ptr_; + return temp; + } + + TensorIterator operator+(const ptrdiff_t &inc) { + auto oldPtr = ptr_; + ptr_ += inc; + auto temp(*this); + ptr_ = oldPtr; + return temp; + } + + TensorIterator operator-(const ptrdiff_t &inc) { + auto oldPtr = ptr_; + ptr_ -= inc; + auto temp(*this); + ptr_ = oldPtr; + return temp; + } + + protected: + T *ptr_; + }; + + // Specialization of TensorIterator for strings. It returns std::string_view for every item. + // \tparam DUMMY, used to mbe able to specialize the inner class + template + class TensorIterator { + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = std::string_view; + using difference_type = ptrdiff_t; + using pointer = std::string_view *; + using reference = std::string_view &; + + explicit TensorIterator(uchar *data = nullptr, dsize_t index = 0) { + data_ = reinterpret_cast(data); + // cppcheck-suppress useInitializationList + index_ = index; + } + + TensorIterator(const TensorIterator &raw_iterator) { + data_ = raw_iterator.data_; + // cppcheck-suppress useInitializationList + index_ = raw_iterator.index_; + } + + ~TensorIterator() = default; + + bool operator==(const TensorIterator &rhs) { return data_ == rhs.data_ && index_ == rhs.index_; } + + bool operator!=(const TensorIterator &rhs) { return !(*this == rhs); } + + operator bool() const { return data_ != nullptr; } + + std::string_view operator*() const { + auto offset_ = reinterpret_cast(data_); + offset_t start = offset_[index_]; + return std::string_view{data_ + start}; + } + + TensorIterator &operator+=(const dsize_t &inc) { + index_ += inc; + return *this; + } + + TensorIterator &operator-=(const dsize_t &inc) { + index_ -= inc; + return *this; + } + + TensorIterator &operator++() { + ++index_; + return *this; + } + + TensorIterator &operator--() { + --index_; + return *this; + } + + TensorIterator operator++(int) { + auto temp(*this); + ++index_; + return temp; + } + + TensorIterator operator--(int) { + auto temp(*this); + --index_; + return temp; + } + + TensorIterator operator+(const dsize_t &inc) { + auto oldPtr = index_; + index_ += inc; + auto temp(*this); + index_ = oldPtr; + return temp; + } + + TensorIterator operator-(const dsize_t &inc) { + auto oldPtr = index_; + index_ -= inc; + auto temp(*this); + index_ = oldPtr; + return temp; + } + + protected: + dsize_t index_; + const char *data_; + }; + + /// Return a TensorIterator that points to the start of the Tensor. + /// It's the user responsibility to use the correct type that matches the Tensor type + /// \tparam T The type of values in the Tensor + /// \return TensorIterator + template + TensorIterator begin() { + return TensorIterator(data_); + } + + /// Return a linear iterator that points to the place after the last element of the Tensor. + /// \tparam T The type of values in the Tensor + /// \return TensorIterator + template + TensorIterator end() { + return TensorIterator(data_end_); + } + + /// Copies the last dimension at `index` from Tensor `src` to this Tensor. + /// \param[in] src Tensor + /// \param[in] index vector to the start of the dimension. The last dim should be 0 + /// \return Status + Status CopyLastDimAt(const std::shared_ptr &src, const std::vector &index); + + protected: + /// Allocate memory for the tensor using the data_allocator + /// \param[in] length number of bytes to be allocated + /// \return Error Status + Status AllocateBuffer(const dsize_t &length); + + /// Get the starting memory address for the data of the tensor. This potentially + /// drives an allocation if the data is null. + /// \return unsigned char* + unsigned char *GetMutableBuffer() { return data_; } + + /// A function that prints Tensor recursively, first called by print + /// \param[in] out + /// \param[in] cur_dim + /// \param[in] cur_index + void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector &cur_index) const; + + /// A function that prints info about the tensor + /// \param[out] out output stream + void Print(std::ostream &out) const; + + /// A function that print the value as specified by its index + /// \param[in] index vector representing the index + /// \param[out] out + void PrintItemAt(const std::vector &index, std::ostream &out) const; + + /// Get pointer to item located at `index`, caller needs to provide the type. + /// \tparam T + /// \param[in] index vector + /// \return return a pointer to the item specified at index of type `T` + template + Status GetItemPtr(T **, const std::vector &index) const; + + /// Get pointer to string located at `index` and the length of string + /// \param[in] index vector + /// \return return a pointer to the string specified at index and the length of the string + Status GetItemPtr(uchar **, const std::vector &index, offset_t *length = nullptr) const; + + /// Given a flat index of an item string, return the start and length of the item + /// \param[in] index flat index of the item + /// \param[out] start address of the ths string + /// \param[out] length of the string + Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const; + + /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if + /// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string + /// of the tensor. + uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; } + + /// all access to shape_ should be via shape + TensorShape shape_; + /// data type of tensor + DataType type_; + /// pointer to the start of the physical data + unsigned char *data_; + /// An allocator for data_ + CharAllocPtr data_allocator_; + /// pointer to the end of the physical data + unsigned char *data_end_ = nullptr; + + private: + /// Slice numeric tensors. + Status SliceNumeric(TensorPtr *out, const std::vector> &indices, const TensorShape &shape); + + /// Slice string tensors + Status SliceString(TensorPtr *out, const std::vector> &indices, const TensorShape &shape); + + /// Copy raw data of a array based on shape and strides to the destination pointer + /// \param dst [out] Pointer to the destination array where the content is to be copied + /// \param[in] src Pointer to the source of strided array to be copied + /// \param[in] shape shape of the source array + /// \param[in] strides strides of the source array + /// \param[in] type_size number of bytes needed to store one array element's type + /// \return Status Code + static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector shape, + std::vector strides, uint8_t type_size); + + /// const of the size of the offset variable + static constexpr uint8_t kOffsetSize = sizeof(offset_t); +}; +template <> +inline Tensor::TensorIterator Tensor::end() { + return TensorIterator(data_, shape_.NumOfElements()); +} + +/// Create a string scalar Tensor from the given value. +/// \param[in] item value +/// \param[out] out Created tensor +/// \return Status code +template <> +inline Status Tensor::CreateScalar(const std::string &item, TensorPtr *out) { + return CreateFromVector({item}, TensorShape::CreateScalar(), out); +} +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor_helpers.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor_helpers.h new file mode 100644 index 0000000000..66cec76915 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor_helpers.h @@ -0,0 +1,83 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ + +#include +#include + +#include "include/constants.h" + +namespace mindspore { +namespace dataset { +class Slice { + public: + Slice() : start_(0), stop_(0), step_(0) {} + Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {} + Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {} + explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {} + Slice(Slice const &slice) = default; + + ~Slice() = default; + + bool valid() const { return step_ != 0; } + dsize_t start_; + dsize_t stop_; + dsize_t step_; +}; + +class SliceOption { + public: + explicit SliceOption(bool all) : all_(all) {} + explicit SliceOption(std::vector indices) : indices_(indices) {} + explicit SliceOption(Slice slice) : slice_(slice) {} + SliceOption(SliceOption const &slice) = default; + + ~SliceOption() = default; + + // only one of the following will be valid + // given indices to slice the Tensor. + std::vector indices_ = {}; + // Slice object. All start, stop and step are 0 if invalid. + Slice slice_; + bool all_ = false; +}; + +/// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each +/// range represented by slice_options to generate a list of indices to be sliced. +/// \param[out] matrix Generated nested vector of indices +/// Example: For a 4 x 2 tensor, and with slice_list = {SliceOption({0})} (the first row), matrix will become +/// {{0}}. For slice_list = {SliceOption(all), SliceOption({0})} (the first column), matrix will become +/// {{0, 0}, {1, 0}, {2, 0}, {3, 0}}. +/// For slice_list = {SliceOption({0, 2})}, matrix will become {{0}, {2}}. The size of each nested array is always +/// equal to (slice_list).size(). +/// \param[in] depth used to keep track of recursion level +/// \param[in] numbers vector used to represent current index +/// \param[in] matrix 2D vector to be populated with desired indices +/// \param[in] slice_options vector of SliceOption objects +void IndexGeneratorHelper(int8_t depth, std::vector *numbers, const std::vector &slice_list, + std::vector> *matrix); + +/// Generate indices based on vector of SliceOptions +/// Calls the recursive helper function IndexGeneratorHelper +/// \param[in] slice_list vector of SliceOption objects. Note: If the user passes +/// {SliceOption(true), SliceOption(true)}, it will return a M x 2 vector, instead of reducing it to +/// {SliceOption(true)} first to only generate a M x 1 vector. +/// \return std::vector> 2D vector of generated indices, M x (slice_list).size() +std::vector> IndexGenerator(const std::vector &slice_list); +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor_shape.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor_shape.h new file mode 100644 index 0000000000..70354bf37e --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/tensor_shape.h @@ -0,0 +1,176 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_ + +#include +#include +#include +#include +#include + +#include "include/constants.h" +#include "include/status.h" +#include "include/allocator.h" + +namespace mindspore { +namespace dataset { + +using IntAlloc = Allocator; +// Class that represents a shape of a Tensor. A shape can be: +// -# Known shape (mKnown = true) +// -# Scalar --> empty vector --> <> +// -# n-Dim --> not empty vector --> where di is >= 0\n +// Example: <1,2>, <1>, <1,13,10,11,1> +// -# Unknown shape (mKnown = false) +// -# Rank is unknown --> empty vector --> <> +// -# one or more dim is unknown --> not empty vector --> where di is unknown\n +// Example: <3,?> (the 1st dim is unknown)\n +// <2,?,?,?> (all dims but the 0th dim are unknown) + +/// \brief TensorShape supports any dim > 0 and < 2^31-1 + +class TensorShape { + public: + static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension + + // Force the compiler to not create a no-arg constructor + TensorShape() = delete; + + /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}). + /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown + /// \param[in] list + explicit TensorShape(const std::initializer_list &list); + + /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector({2,2}) ). + /// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown + /// \param[in] list + explicit TensorShape(const std::vector &list); + + /// \brief Copy constructor + /// \param[in] shape + TensorShape(const TensorShape &shape); + + ~TensorShape() = default; + + /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true) + /// \return TensorShape + static TensorShape CreateScalar() { return TensorShape({}); } + + /// \brief Create a shape with an unknown rank. + /// \return TensorShape + static TensorShape CreateUnknownRankShape(); + + /// \brief Create a shape with a known rank . + /// \return TensorShape + static TensorShape CreateUnknownShapeWithRank(dsize_t rank); + + /// \brief Insert a new dim into a copy of the current shape. + /// \param[in] dim to be added + /// \param[in] axis the index where dim should be added + /// \return New modified shape + TensorShape InsertDim(dsize_t axis, dsize_t dim) const; + + /// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4> + /// \param[in] dim + /// \return + TensorShape PrependDim(dsize_t dim) const; + + /// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4> + /// \param[in] dim + /// \return + TensorShape AppendDim(dsize_t dim) const; + + dsize_t Size() const { return raw_shape_.size(); } + + dsize_t Rank() const { return raw_shape_.size(); } + + bool known() const { return known_; } + + bool empty() const { return raw_shape_.empty(); } + + dsize_t NumOfElements() const; + + bool operator==(const TensorShape &rhs) const { return known_ == rhs.known_ && raw_shape_ == rhs.raw_shape_; } + + bool operator!=(const TensorShape &rhs) const { return !(rhs == *this); } + + dsize_t operator[](const dsize_t index) const { + if (index < 0) return raw_shape_[raw_shape_.size() + index]; + return raw_shape_[index]; + } + + /// \brief Return the Shape as a vector + /// \return + std::vector AsVector() const; + + /// \brief Returns the class info as a string + /// \return + std::string ToString() const { + std::stringstream ss; + ss << *this; + return ss.str(); + } + + /// \brief Actual print function used by operator<< + /// \param out output string stream + void Print(std::ostream &out) const; + + /// \brief << Stream output operator overload + /// This allows you to print the info using stream operators + /// \param[in] out - reference to the output stream being overloaded + /// \param[in] rO - reference to the TensorShape to display + /// \return - the output stream must be returned + friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) { + so.Print(out); + return out; + } + + /// \brief Checks if the given index is a valid index for this tensor. + /// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not. + /// \param[in] index + /// \return bool + bool IsValidIndex(const std::vector &index) const; + + TensorShape Squeeze() const; + + std::vector Strides() const; + + /// \brief Returns the location of the item assuming row major memory layout. + /// \param[in] index + /// \param[out] flat_index + /// \return + Status ToFlatIndex(const std::vector &index, dsize_t *flat_index) const; + + private: + // True if known and valid shape, false otherwise + bool known_; + // Vector to keep the dims of the shape. + std::vector raw_shape_; + // Vector to keep the strides of the shape. The size is rank+1 + std::vector strides_; + + /// \brief Internal utility function to iterate over a list, + /// check if the dim is valid and then insert it into the shape. + /// \param[in] list Iterable list + /// \return true if the shape is valid and no overflow would be generated when counting the number of elements. + /// False otherwise. + template + void AddListToShape(const T &list); +}; +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h new file mode 100644 index 0000000000..f39a99a765 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h @@ -0,0 +1,252 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_ + +#include +#include +#include +#include "include/constants.h" +#include "include/status.h" + +namespace mindspore { +namespace dataset { + +class TensorOp; + +// Char arrays storing name of corresponding classes (in alphabetical order) +constexpr char kComposeOperation[] = "Compose"; +constexpr char kDuplicateOperation[] = "Duplicate"; +constexpr char kOneHotOperation[] = "OneHot"; +constexpr char kPreBuiltOperation[] = "PreBuilt"; +constexpr char kRandomApplyOperation[] = "RandomApply"; +constexpr char kRandomChoiceOperation[] = "RandomChoice"; +constexpr char kRandomSelectSubpolicyOperation[] = "RandomSelectSubpolicy"; +constexpr char kTypeCastOperation[] = "TypeCast"; +constexpr char kUniqueOperation[] = "Unique"; + +// Abstract class to represent a dataset in the data pipeline. +class TensorOperation : public std::enable_shared_from_this { + public: + /// \brief Constructor + TensorOperation() : random_op_(false) {} + + /// \brief Constructor + explicit TensorOperation(bool random) : random_op_(random) {} + + /// \brief Destructor + ~TensorOperation() = default; + + /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object. + /// \return shared pointer to the newly created TensorOp. + virtual std::shared_ptr Build() = 0; + + virtual Status ValidateParams() = 0; + + virtual std::string Name() const = 0; + + /// \brief Check whether the operation is deterministic. + /// \return true if this op is a random op (returns non-deterministic result e.g. RandomCrop) + bool IsRandomOp() const { return random_op_; } + + protected: + bool random_op_; +}; + +// Helper function to validate fill value +Status ValidateVectorFillvalue(const std::string &transform_name, const std::vector &fill_value); + +// Helper function to validate probability +Status ValidateProbability(const std::string &transform_name, const float &probability); + +// Helper function to validate padding +Status ValidateVectorPadding(const std::string &transform_name, const std::vector &padding); + +// Helper function to validate size +Status ValidateVectorPositive(const std::string &transform_name, const std::vector &size); + +// Helper function to validate transforms +Status ValidateVectorTransforms(const std::string &transform_name, + const std::vector> &transforms); + +// Helper function to compare float value +bool CmpFloat(const float &a, const float &b, float epsilon = 0.0000000001f); + +// Transform operations for performing data transformation. +namespace transforms { + +// Transform Op classes (in alphabetical order) +class ComposeOperation; +class DuplicateOperation; +class OneHotOperation; +class PreBuiltOperation; +class RandomApplyOperation; +class RandomChoiceOperation; +class TypeCastOperation; + +/// \brief Function to create a Compose TensorOperation. +/// \notes Compose a list of transforms into a single transform. +/// \param[in] transforms A vector of transformations to be applied. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Compose(const std::vector> &transforms); + +/// \brief Function to create a Duplicate TensorOperation. +/// \notes Duplicate the input tensor to a new output tensor. +/// The input tensor is carried over to the output list. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Duplicate(); + +/// \brief Function to create a OneHot TensorOperation. +/// \notes Convert the labels into OneHot format. +/// \param[in] num_classes number of classes. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr OneHot(int32_t num_classes); + +/// \brief Function to create a RandomApply TensorOperation. +/// \notes Randomly perform a series of transforms with a given probability. +/// \param[in] transforms A vector of transformations to be applied. +/// \param[in] prob The probability to apply the transformation list (default=0.5) +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr RandomApply(const std::vector> &transforms, + double prob = 0.5); + +/// \brief Function to create a RandomChoice TensorOperation. +/// \notes Randomly selects one transform from a list of transforms to perform operation. +/// \param[in] transforms A vector of transformations to be chosen from to apply. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr RandomChoice(const std::vector> &transforms); + +/// \brief Function to create a TypeCast TensorOperation. +/// \notes Tensor operation to cast to a given MindSpore data type. +/// \param[in] data_type mindspore.dtype to be cast to. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr TypeCast(std::string data_type); + +/* ####################################### Derived TensorOperation classes ################################# */ + +class ComposeOperation : public TensorOperation { + public: + explicit ComposeOperation(const std::vector> &transforms); + + ~ComposeOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kComposeOperation; } + + private: + std::vector> transforms_; +}; + +class DuplicateOperation : public TensorOperation { + public: + DuplicateOperation() = default; + + ~DuplicateOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kDuplicateOperation; } +}; + +class OneHotOperation : public TensorOperation { + public: + explicit OneHotOperation(int32_t num_classes_); + + ~OneHotOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kOneHotOperation; } + + private: + float num_classes_; +}; + +class PreBuiltOperation : public TensorOperation { + public: + explicit PreBuiltOperation(std::shared_ptr tensor_op); + + ~PreBuiltOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kPreBuiltOperation; } + + private: + std::shared_ptr op_; +}; + +class RandomApplyOperation : public TensorOperation { + public: + explicit RandomApplyOperation(const std::vector> &transforms, double prob); + + ~RandomApplyOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kRandomApplyOperation; } + + private: + std::vector> transforms_; + double prob_; +}; + +class RandomChoiceOperation : public TensorOperation { + public: + explicit RandomChoiceOperation(const std::vector> &transforms); + + ~RandomChoiceOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kRandomChoiceOperation; } + + private: + std::vector> transforms_; +}; +class TypeCastOperation : public TensorOperation { + public: + explicit TypeCastOperation(std::string data_type); + + ~TypeCastOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kTypeCastOperation; } + + private: + std::string data_type_; +}; +} // namespace transforms +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h new file mode 100644 index 0000000000..3960994729 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h @@ -0,0 +1,198 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_ + +#include +#include +#include +#include +#include + +#include "include/transforms.h" + +namespace mindspore { +namespace dataset { + +// Transform operations for performing computer vision. +namespace vision { + +// Char arrays storing name of corresponding classes (in alphabetical order) +constexpr char kCenterCropOperation[] = "CenterCrop"; +constexpr char kCropOperation[] = "Crop"; +constexpr char kDecodeOperation[] = "Decode"; +constexpr char kNormalizeOperation[] = "Normalize"; +constexpr char kResizeOperation[] = "Resize"; +constexpr char kRotateOperation[] = "Rotate"; +// Transform Op classes (in alphabetical order) +class CenterCropOperation; +class CropOperation; +class DecodeOperation; +class NormalizeOperation; +class ResizeOperation; +class RotateOperation; + +/// \brief Function to create a CenterCrop TensorOperation. +/// \notes Crops the input image at the center to the given size. +/// \param[in] size A vector representing the output size of the cropped image. +/// If size is a single value, a square crop of size (size, size) is returned. +/// If size has 2 values, it should be (height, width). +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr CenterCrop(std::vector size); + +/// \brief Function to create a Crop TensorOp +/// \notes Crop an image based on location and crop size +/// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor} +/// \param[in] size Size of the cropped area. +/// If size is a single value, a square crop of size (size, size) is returned. +/// If size has 2 values, it should be (height, width). +/// \return Shared pointer to the current TensorOp +std::shared_ptr Crop(std::vector coordinates, std::vector size); + +/// \brief Function to create a Decode TensorOperation. +/// \notes Decode the input image in RGB mode. +/// \param[in] rgb A boolean of whether to decode in RGB mode or not. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Decode(bool rgb = true); + +/// \brief Function to create a Normalize TensorOperation. +/// \notes Normalize the input image with respect to mean and standard deviation. +/// \param[in] mean A vector of mean values for each channel, w.r.t channel order. +/// The mean values must be in range [0.0, 255.0]. +/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order. +/// The standard deviation values must be in range (0.0, 255.0] +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Normalize(std::vector mean, std::vector std); + +/// \brief Function to create a Resize TensorOperation. +/// \notes Resize the input image to the given size. +/// \param[in] size A vector representing the output size of the resized image. +/// If size is a single value, the image will be resized to this value with +/// the same image aspect ratio. If size has 2 values, it should be (height, width). +/// \param[in] interpolation An enum for the mode of interpolation +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Resize(std::vector size, + InterpolationMode interpolation = InterpolationMode::kLinear); +/// \brief Applies an rotate transformation to an image. +/// \notes Rotate the input image using a specified angle id. +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr Rotate(); + +class CenterCropOperation : public TensorOperation { + public: + explicit CenterCropOperation(std::vector size); + + ~CenterCropOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kCenterCropOperation; } + + private: + std::vector size_; +}; + +class CropOperation : public TensorOperation { + public: + CropOperation(std::vector coordinates, std::vector size); + + ~CropOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kCropOperation; } + + private: + std::vector coordinates_; + std::vector size_; +}; +class DecodeOperation : public TensorOperation { + public: + explicit DecodeOperation(bool rgb = true); + + ~DecodeOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kDecodeOperation; } + + private: + bool rgb_; +}; + +class NormalizeOperation : public TensorOperation { + public: + NormalizeOperation(std::vector mean, std::vector std); + + ~NormalizeOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kNormalizeOperation; } + + private: + std::vector mean_; + std::vector std_; +}; + +class ResizeOperation : public TensorOperation { + public: + explicit ResizeOperation(std::vector size, + InterpolationMode interpolation_mode = InterpolationMode::kLinear); + + ~ResizeOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kResizeOperation; } + + private: + std::vector size_; + InterpolationMode interpolation_; +}; + +class RotateOperation : public TensorOperation { + public: + RotateOperation(); + + ~RotateOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kRotateOperation; } + + void setAngle(uint64_t angle_id); + + private: + std::shared_ptr rotate_op; +}; +} // namespace vision +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_ diff --git a/mindspore/ccsrc/minddata/dataset/util/task.cc b/mindspore/ccsrc/minddata/dataset/util/task.cc index 6d2c0bcaa0..17fabbd77d 100644 --- a/mindspore/ccsrc/minddata/dataset/util/task.cc +++ b/mindspore/ccsrc/minddata/dataset/util/task.cc @@ -14,6 +14,8 @@ * limitations under the License. */ #include "minddata/dataset/util/task.h" + +#include #include "utils/ms_utils.h" #include "minddata/dataset/util/log_adapter.h" #include "minddata/dataset/util/task_manager.h" diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt index 418ee00089..75779ca54a 100644 --- a/mindspore/lite/minddata/CMakeLists.txt +++ b/mindspore/lite/minddata/CMakeLists.txt @@ -99,233 +99,148 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES) AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES) -if(BUILD_MINDDATA STREQUAL "full") - set(BUILD_MINDDATA "wrapper") -endif() if(BUILD_MINDDATA STREQUAL "full") - include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image") - list(REMOVE_ITEM MINDDATA_API_SRC_FILES - "${MINDDATA_DIR}/api/text.cc" - "${MINDDATA_DIR}/api/minddata_eager.cc" - ) - - list(REMOVE_ITEM MINDDATA_CALLBACK_SRC_FILES - "${MINDDATA_DIR}/callback/py_ds_callback.cc" - ) - - list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES - "${MINDDATA_DIR}/core/cv_tensor.cc" - ) - - list(REMOVE_ITEM MINDDATA_KERNELS_SRC_FILES "${MINDDATA_DIR}/kernels/py_func_op.cc") - list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SRC_FILES - "${MINDDATA_DIR}/engine/datasetops/build_sentence_piece_vocab_op.cc" - "${MINDDATA_DIR}/engine/datasetops/filter_op.cc" - "${MINDDATA_DIR}/engine/datasetops/barrier_op.cc" - "${MINDDATA_DIR}/engine/datasetops/bucket_batch_by_length_op.cc" - "${MINDDATA_DIR}/engine/datasetops/build_vocab_op.cc" - "${MINDDATA_DIR}/engine/datasetops/cache_merge_op.cc" - "${MINDDATA_DIR}/engine/datasetops/cache_base_op.cc" - "${MINDDATA_DIR}/engine/datasetops/cache_lookup_op.cc" - "${MINDDATA_DIR}/engine/datasetops/cache_op.cc" - "${MINDDATA_DIR}/engine/datasetops/concat_op.cc" - "${MINDDATA_DIR}/engine/datasetops/rename_op.cc" - "${MINDDATA_DIR}/engine/datasetops/skip_op.cc" - "${MINDDATA_DIR}/engine/datasetops/take_op.cc" - "${MINDDATA_DIR}/engine/datasetops/zip_op.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES - "${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc" - "${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES - "${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES - "${MINDDATA_DIR}/engine/opt/post/generator_node_pass.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES - "${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES - "${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc" - "${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES - "${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc" - "${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES - "${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc" - ) - list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES - "${MINDDATA_DIR}/kernels/image/affine_op.cc" - "${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc" - "${MINDDATA_DIR}/kernels/image/bounding_box_op.cc" - "${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc" - "${MINDDATA_DIR}/kernels/image/concatenate_op.cc" - "${MINDDATA_DIR}/kernels/image/cut_out_op.cc" - "${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc" - "${MINDDATA_DIR}/kernels/image/equalize_op.cc" - "${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc" - "${MINDDATA_DIR}/kernels/image/image_utils.cc" - "${MINDDATA_DIR}/kernels/image/invert_op.cc" - "${MINDDATA_DIR}/kernels/image/math_utils.cc" - "${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc" - "${MINDDATA_DIR}/kernels/image/pad_op.cc" - "${MINDDATA_DIR}/kernels/image/posterize_op.cc" - "${MINDDATA_DIR}/kernels/image/normalize_pad_op.cc" - "${MINDDATA_DIR}/kernels/image/random_affine_op.cc" - "${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_op.cc" - "${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc" - "${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_posterize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_resize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_rotation_op.cc" - "${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc" - "${MINDDATA_DIR}/kernels/image/random_solarize_op.cc" - "${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc" - "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" - "${MINDDATA_DIR}/kernels/image/rescale_op.cc" - "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" - "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" - "${MINDDATA_DIR}/kernels/image/sharpness_op.cc" - "${MINDDATA_DIR}/kernels/image/solarize_op.cc" - "${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc" - "${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc" - "${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc" - "${MINDDATA_DIR}/kernels/image/random_color_op.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES - "${MINDDATA_DIR}/engine/ir/datasetops/bucket_batch_by_length_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/build_sentence_piece_vocab_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/build_vocab_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/filter_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/sync_wait_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/skip_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/take_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/transfer_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/zip_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/rename_node.cc" - "${MINDDATA_DIR}/engine/ir/datasetops/concat_node.cc" - ) - list(REMOVE_ITEM MINDDATA_ENGINE_CONSUMERS_SRC_FILES - "${MINDDATA_DIR}/engine/consumers/python_tree_consumer.cc" - ) - - list(REMOVE_ITEM MINDDATA_ENGINE_SRC_FILES - "${MINDDATA_DIR}/engine/python_runtime_context.cc" - ) - - list(REMOVE_ITEM MINDDATA_KERNELS_DATA_SRC_FILES - "${MINDDATA_DIR}/kernels/data/unique_op.cc" - ) - - list(REMOVE_ITEM MINDDATA_UTIL_SRC_FILES - "${MINDDATA_DIR}/util/numa_interface.cc" - ) - include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache") - - if(BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64)) - set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc) - endif() + include_directories("${MINDDATA_DIR}/kernels/image") + include_directories("${MINDDATA_DIR}/liteapi") + include_directories("${TOP_DIR}") + + set(MINDDATA_FULL_SRC + ${TOP_DIR}/mindspore/lite/src/cxx_api/types.cc + ${TOP_DIR}/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc + ${TOP_DIR}/mindspore/lite/src/tensor.cc + ${CORE_DIR}/utils/status.cc + ${MINDDATA_DIR}/api/datasets.cc + ${MINDDATA_DIR}/kernels/data/data_utils.cc + ${MINDDATA_DIR}/api/samplers.cc + ${MINDDATA_DIR}/api/iterator.cc + ${MINDDATA_DIR}/api/execute.cc + ${MINDDATA_DIR}/core/de_tensor.cc + ${MINDDATA_DIR}/core/tensor_shape.cc + ${MINDDATA_DIR}/util/memory_pool.cc + ${MINDDATA_DIR}/core/config_manager.cc + ${MINDDATA_DIR}/core/data_type.cc + ${MINDDATA_DIR}/core/tensor_helpers.cc + ${MINDDATA_DIR}/core/tensor.cc + ${MINDDATA_DIR}/core/global_context.cc + ${MINDDATA_DIR}/core/client.cc + ${MINDDATA_DIR}/engine/consumers/tree_consumer.cc + ${MINDDATA_DIR}/engine/ir/datasetops/dataset_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/epoch_ctrl_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/batch_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/map_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/root_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/repeat_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/project_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/shuffle_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/source/album_node.cc + ${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc + ${MINDDATA_DIR}/engine/datasetops/dataset_op.cc + ${MINDDATA_DIR}/engine/datasetops/repeat_op.cc + ${MINDDATA_DIR}/engine/datasetops/epoch_ctrl_op.cc + ${MINDDATA_DIR}/engine/datasetops/device_queue_op.cc + ${MINDDATA_DIR}/engine/datasetops/project_op.cc + ${MINDDATA_DIR}/engine/datasetops/shuffle_op.cc + ${MINDDATA_DIR}/engine/datasetops/pipeline_op.cc + ${MINDDATA_DIR}/engine/datasetops/batch_op.cc + ${MINDDATA_DIR}/engine/datasetops/parallel_op.cc + ${MINDDATA_DIR}/engine/datasetops/map_op/map_op.cc + ${MINDDATA_DIR}/engine/datasetops/map_op/cpu_map_job.cc + ${MINDDATA_DIR}/engine/datasetops/source/album_op.cc + ${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc + + ${MINDDATA_DIR}/engine/datasetops/source/io_block.cc + ${MINDDATA_DIR}/engine/opt/pre/getter_pass.cc + ${MINDDATA_DIR}/engine/opt/pre/input_validation_pass.cc + ${MINDDATA_DIR}/engine/opt/pre/cache_validation_pass.cc + ${MINDDATA_DIR}/engine/opt/pre/node_removal_pass.cc + ${MINDDATA_DIR}/engine/opt/pre/epoch_ctrl_pass.cc + ${MINDDATA_DIR}/engine/opt/pre/deep_copy_pass.cc + ${MINDDATA_DIR}/engine/opt/post/auto_worker_pass.cc + ${MINDDATA_DIR}/engine/opt/pass.cc + ${MINDDATA_DIR}/engine/perf/profiling.cc + ${MINDDATA_DIR}/engine/perf/monitor.cc + ${MINDDATA_DIR}/engine/perf/device_queue_tracing.cc + ${MINDDATA_DIR}/engine/perf/connector_size.cc + ${MINDDATA_DIR}/engine/perf/connector_throughput.cc + ${MINDDATA_DIR}/engine/perf/dataset_iterator_tracing.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/distributed_sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/pk_sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/random_sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/sequential_sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_random_sampler.cc + ${MINDDATA_DIR}/engine/datasetops/source/sampler/weighted_random_sampler.cc + ${MINDDATA_DIR}/engine/runtime_context.cc + ${MINDDATA_DIR}/engine/tree_adapter.cc + ${MINDDATA_DIR}/engine/data_buffer.cc + ${MINDDATA_DIR}/engine/execution_tree.cc + ${MINDDATA_DIR}/engine/dataset_iterator.cc + ${MINDDATA_DIR}/core/tensor_row.cc + ${MINDDATA_DIR}/api/vision.cc + ${MINDDATA_DIR}/api/transforms.cc + ${MINDDATA_DIR}/util/path.cc + ${MINDDATA_DIR}/util/status.cc + ${MINDDATA_DIR}/util/service.cc + ${MINDDATA_DIR}/util/data_helper.cc + ${MINDDATA_DIR}/util/cond_var.cc + ${MINDDATA_DIR}/engine/data_schema.cc + ${MINDDATA_DIR}/kernels/tensor_op.cc + ${MINDDATA_DIR}/kernels/image/lite_image_utils.cc + ${MINDDATA_DIR}/kernels/image/center_crop_op.cc + ${MINDDATA_DIR}/kernels/image/crop_op.cc + ${MINDDATA_DIR}/kernels/image/normalize_op.cc + ${MINDDATA_DIR}/kernels/image/resize_op.cc + ${MINDDATA_DIR}/kernels/image/rotate_op.cc + ${MINDDATA_DIR}/kernels/data/compose_op.cc + ${MINDDATA_DIR}/kernels/data/duplicate_op.cc + ${MINDDATA_DIR}/kernels/data/one_hot_op.cc + ${MINDDATA_DIR}/kernels/data/random_apply_op.cc + ${MINDDATA_DIR}/kernels/data/random_choice_op.cc + ${MINDDATA_DIR}/kernels/data/type_cast_op.cc + ${MINDDATA_DIR}/kernels/image/exif_utils.cc + ${MINDDATA_DIR}/callback/callback_manager.cc + ${MINDDATA_DIR}/util/task_manager.cc + ${MINDDATA_DIR}/util/services.cc + ${MINDDATA_DIR}/util/wait_post.cc + ${MINDDATA_DIR}/util/task.cc + ${MINDDATA_DIR}/util/circular_pool.cc + ${MINDDATA_DIR}/util/lock.cc + ${MINDDATA_DIR}/util/wait_post.cc + ${MINDDATA_DIR}/util/intrp_service.cc + ${MINDDATA_DIR}/util/arena.cc + ) add_library(minddata-lite SHARED - ${MINDDATA_API_SRC_FILES} - ${MINDDATA_CALLBACK_SRC_FILES} - ${MINDDATA_CORE_SRC_FILES} - ${MINDDATA_ENGINE_SRC_FILES} - #${MINDDATA_ENGINE_CACHE_SRC_FILES} - ${MINDDATA_ENGINE_CONSUMERS_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES} - ${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES} - ${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES} - ${MINDDATA_ENGINE_IR_CACHE_SRC_FILES} - ${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES} - ${MINDDATA_ENGINE_OPT_SRC_FILES} - ${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES} - ${MINDDATA_ENGINE_OPT_POST_SRC_FILES} - ${MINDDATA_ENGINE_OPT_PRE_SRC_FILES} - ${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES} - ${MINDDATA_ENGINE_PERF_SRC_FILES} - ${MINDDATA_KERNELS_SRC_FILES} - ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} - ${MINDDATA_KERNELS_IMAGE_SRC_FILES} - ${MINDDATA_KERNELS_DATA_SRC_FILES} - ${MINDDATA_UTIL_SRC_FILES} - ${MINDDATA_EXAMPLE_SRC} - ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc - ${CORE_DIR}/utils/ms_utils.cc - ) + ${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES} + ${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc + ${CORE_DIR}/utils/ms_utils.cc + ${MINDDATA_FULL_SRC} + ) find_package(Threads REQUIRED) target_link_libraries(minddata-lite - securec - mindspore::jpeg_turbo - mindspore::turbojpeg - mindspore::json - Threads::Threads - ) + securec + mindspore::jpeg_turbo + mindspore::turbojpeg + mindspore::json + Threads::Threads + ) # ref: https://github.com/android/ndk/issues/1202 if(PLATFORM_ARM32) - file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) - if(LIBCLANG_RT_LIB STREQUAL "") - MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") - endif() - target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) + file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a) + if(LIBCLANG_RT_LIB STREQUAL "") + MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}") + endif() + target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB}) endif() if(PLATFORM_ARM32 OR PLATFORM_ARM64) - target_link_libraries(minddata-lite log) - elseif(BUILD_MINDDATA_EXAMPLE) - endif() + target_link_libraries(minddata-lite log) + elseif() +endif() elseif(BUILD_MINDDATA STREQUAL "wrapper") include_directories("${MINDDATA_DIR}/kernels/image") include_directories("${MINDDATA_DIR}/util") diff --git a/mindspore/lite/minddata/example/CMakeLists.txt b/mindspore/lite/minddata/example/CMakeLists.txt new file mode 100644 index 0000000000..02a842b6b0 --- /dev/null +++ b/mindspore/lite/minddata/example/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.14.1) +project(testlenet) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror +-Wall -Wno-deprecated-declarations -fPIC") + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") + +set(DepDIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.1.0-inference-linux-x64/minddata") + +include_directories(${DepDIR}) + + + +add_executable(testlenet + ${CMAKE_CURRENT_SOURCE_DIR}/testlenet.cpp + ) + +target_link_libraries(testlenet + ${DepDIR}/lib/libminddata-lite.so + ${DepDIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62 + ${DepDIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0 + pthread) \ No newline at end of file diff --git a/mindspore/lite/minddata/example/testlenet.cpp b/mindspore/lite/minddata/example/testlenet.cpp new file mode 100644 index 0000000000..99c8807ebe --- /dev/null +++ b/mindspore/lite/minddata/example/testlenet.cpp @@ -0,0 +1,62 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/datasets.h" +#include "include/iterator.h" +#include "include/vision_lite.h" +#include "include/transforms.h" +#include "include/tensor.h" + +using mindspore::dataset::Dataset; +using mindspore::dataset::Iterator; +using mindspore::dataset::Mnist; +using mindspore::dataset::Tensor; +using mindspore::dataset::TensorOperation; + +int main(int argc, char **argv) { + std::string folder_path = "./testMnistData/"; + std::shared_ptr ds = Mnist(folder_path, "all"); + + std::shared_ptr resize = mindspore::dataset::vision::Resize({32, 32}); + ds = ds->Map({resize}); + + ds->Shuffle(2); + ds->Batch(2); + + std::shared_ptr iter = ds->CreateIterator(); + + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + iter->GetNextRow(&row); + } + + iter->Stop(); +}