add code for MD lite C++ include and api

pull/11998/head
xulei2020 4 years ago
parent b2fb825d89
commit ee09b38f00

@ -16,7 +16,33 @@ set(MIND_DATA_LIB_DIR ${RUNTIME_PKG_NAME}/minddata/lib)
set(LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/lib)
if(BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
if(BUILD_MINDDATA STREQUAL "full")
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/liteapi/include/ DESTINATION
${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE)
install(FILES ${TOP_DIR}/include/api/status.h DESTINATION ${MIND_DATA_INC_DIR}
RENAME ms_status.h COMPONENT ${RUNTIME_COMPONENT_NAME})
if(PLATFORM_ARM64)
file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so
DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
elseif(PLATFORM_ARM32)
file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
else()
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib
RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib
RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()
if(BUILD_MINDDATA STREQUAL "wrapper")
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR}
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE)
if(PLATFORM_ARM64)

@ -79,6 +79,7 @@
// IR leaf nodes
#include "minddata/dataset/engine/ir/datasetops/source/album_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
// IR leaf nodes disabled for android
#ifndef ENABLE_ANDROID
@ -89,7 +90,6 @@
#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h"
@ -390,7 +390,7 @@ std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &datase
return ds;
}
#endif
// Function to create a MnistDataset.
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage,
const std::shared_ptr<SamplerObj> &sampler,
@ -399,6 +399,8 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s
return ds;
}
#ifndef ENABLE_ANDROID
// Function to overload "+" operator to concat two datasets
std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
const std::shared_ptr<Dataset> &datasets2) {
@ -906,12 +908,15 @@ MindDataDataset::MindDataDataset(const std::vector<std::string> &dataset_files,
auto ds = std::make_shared<MindDataNode>(dataset_files, columns_list, sampler, padded_sample, num_padded);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
#endif
MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage,
const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
auto ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache);
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
}
#ifndef ENABLE_ANDROID
TextFileDataset::TextFileDataset(const std::vector<std::string> &dataset_files, int64_t num_samples,
ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
const std::shared_ptr<DatasetCache> &cache) {

@ -15,6 +15,7 @@
*/
#include "minddata/dataset/include/samplers.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
@ -32,8 +33,6 @@
#include "minddata/mindrecord/include/shard_sequential_sample.h"
#include "minddata/mindrecord/include/shard_shuffle.h"
#include "minddata/dataset/util/random.h"
#else
#include "minddata/dataset/core/config_manager.h"
#endif
namespace mindspore {

@ -25,9 +25,9 @@
#include "minddata/dataset/engine/datasetops/device_queue_op.h"
#include "minddata/dataset/engine/opt/pre/getter_pass.h"
#include "minddata/dataset/engine/tree_adapter.h"
#include "minddata/mindrecord/include/shard_index_generator.h"
#ifndef ENABLE_ANDROID
#include "minddata/mindrecord/include/shard_index_generator.h"
#include "minddata/mindrecord/include/shard_header.h"
#include "minddata/mindrecord/include/shard_writer.h"
#endif
@ -324,10 +324,9 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string,
return Status::OK();
}
Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
const std::unordered_map<std::string, int32_t> &column_name_id_map,
nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
static Status ValidateInputParams(nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data,
const std::unordered_map<std::string, int32_t> &column_name_id_map) {
if (row_raw_data == nullptr) {
RETURN_STATUS_UNEXPECTED("Error: row raw data is NULL.");
}
@ -337,76 +336,104 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
if (column_name_id_map.empty()) {
RETURN_STATUS_UNEXPECTED("Error: column not found");
}
return Status::OK();
}
Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
std::unique_ptr<std::vector<uint8_t>> *data_ptr) {
auto column_type = tensor->type();
Status s;
if (column_type == DataType::DE_FLOAT32) {
std::unique_ptr<float> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_FLOAT64) {
std::unique_ptr<double> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
}
return Status::OK();
}
Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
auto column_type = tensor->type();
Status s;
std::unique_ptr<std::vector<uint8_t>> data_ptr;
if (column_type == DataType::DE_INT8) {
std::unique_ptr<int32_t> data;
std::unique_ptr<int8_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_INT16) {
std::unique_ptr<int32_t> data;
std::unique_ptr<int16_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_UINT16) {
std::unique_ptr<int32_t> data;
std::unique_ptr<uint16_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_UINT8) {
std::unique_ptr<uint8_t> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_INT32) {
std::unique_ptr<int32_t> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_UINT32) {
std::unique_ptr<int64_t> data;
std::unique_ptr<uint32_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_INT64) {
std::unique_ptr<int64_t> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_FLOAT32 || column_type == DataType::DE_FLOAT64) {
s = FetchFloatData(tensor, column_name, row_raw_data, &data_ptr);
RETURN_IF_NOT_OK(s);
} else if (column_type == DataType::DE_STRING) {
std::string_view sv;
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor
std::string ss(sv);
(*row_raw_data)[column_name] = std::move(ss);
return Status::OK();
} else {
RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
}
if (data_ptr != nullptr) {
(*row_bin_data)[column_name] = std::move(data_ptr);
}
return Status::OK();
}
Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
const std::unordered_map<std::string, int32_t> &column_name_id_map,
nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
Status s;
s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map);
if (s.IsError()) {
return s;
}
for (auto &col : column_name_id_map) {
auto idx = col.second;
auto column_name = col.first;
auto &tensor = row[idx];
auto column_type = tensor->type();
std::unique_ptr<std::vector<uint8_t>> data_ptr;
if (column_type == DataType::DE_INT8) {
std::unique_ptr<int32_t> data;
std::unique_ptr<int8_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_INT16) {
std::unique_ptr<int32_t> data;
std::unique_ptr<int16_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_UINT16) {
std::unique_ptr<int32_t> data;
std::unique_ptr<uint16_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_UINT8) {
std::unique_ptr<uint8_t> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_INT32) {
std::unique_ptr<int32_t> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_UINT32) {
std::unique_ptr<int64_t> data;
std::unique_ptr<uint32_t> dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_INT64) {
std::unique_ptr<int64_t> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_FLOAT32) {
std::unique_ptr<float> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_FLOAT64) {
std::unique_ptr<double> data, dummy;
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_STRING) {
std::string_view sv;
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor
std::string ss(sv);
(*row_raw_data)[column_name] = std::move(ss);
continue;
} else {
RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
}
s = FetchItemData(tensor, column_name, row_raw_data, row_bin_data);
RETURN_IF_NOT_OK(s);
if (data_ptr != nullptr) {
(*row_bin_data)[column_name] = std::move(data_ptr);
}
}
return Status::OK();
}

@ -130,6 +130,12 @@ class SaveToDisk : public TreeConsumer {
nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data);
Status FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
std::unique_ptr<std::vector<uint8_t>> *data_ptr);
Status FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data);
std::string dataset_path_;
int32_t num_files_;
std::string dataset_type_;

@ -17,6 +17,7 @@
#include <memory>
#include <vector>
#include <utility>
#include <set>
#include "minddata/dataset/engine/datasetops/map_op/cpu_map_job.h"
namespace mindspore {

@ -86,8 +86,9 @@
#include "minddata/dataset/engine/datasetops/source/csv_op.h"
#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
#endif
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
#ifdef ENABLE_PYTHON
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
#include "minddata/dataset/engine/datasetops/filter_op.h"
#include "minddata/dataset/engine/datasetops/source/generator_op.h"
#endif

@ -80,9 +80,10 @@ Status ProfilingManager::Initialize() {
std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_);
RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling));
#ifndef ENABLE_ANDROID
std::shared_ptr<Sampling> cpu_sampling = std::make_shared<CpuSampling>(tree_);
RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling));
#endif
return Status::OK();
}

@ -18,14 +18,16 @@
#include "minddata/dataset/core/client.h"
#include "minddata/dataset/engine/ir/datasetops/root_node.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
#include "minddata/dataset/engine/opt/post/repeat_pass.h"
#endif
#include "minddata/dataset/engine/opt/pass.h"
#include "minddata/dataset/engine/opt/post/auto_worker_pass.h"
#include "minddata/dataset/engine/opt/post/repeat_pass.h"
#ifdef ENABLE_PYTHON
#include "minddata/dataset/engine/opt/post/generator_node_pass.h"
#endif
#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
#include "minddata/dataset/engine/opt/pre/cache_validation_pass.h"
#include "minddata/dataset/engine/opt/pre/deep_copy_pass.h"
#include "minddata/dataset/engine/opt/pre/epoch_ctrl_pass.h"
@ -55,7 +57,9 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
actions.emplace_back(std::make_unique<NodeRemovalPass>());
actions.emplace_back(std::make_unique<EpochCtrlPass>());
if (usage_ == kDeGetter) actions.emplace_back(std::make_unique<GetterPass>());
#ifndef ENABLE_ANDROID
actions.emplace_back(std::make_unique<CacheTransformPass>());
#endif
// Vector of flags for each action
std::vector<bool> modified(actions.size(), false);
// Apply pre-pass actions
@ -72,7 +76,9 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
// Vector of optimizations
std::vector<std::unique_ptr<IRNodePass>> optimizations;
MS_LOG(INFO) << "Running optimization pass loops";
#ifndef ENABLE_ANDROID
optimizations.emplace_back(std::make_unique<TensorOpFusionPass>());
#endif
// Apply optimization pass actions
for (auto i = 0; i < optimizations.size(); i++) {
bool modified = false;
@ -95,8 +101,9 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
#ifdef ENABLE_PYTHON
actions.emplace_back(std::make_unique<GeneratorNodePass>());
#endif
#ifndef ENABLE_ANDROID
actions.emplace_back(std::make_unique<RepeatPass>());
#endif
// We will gradually move RepeatPass from ExecutionTree::PrepareTreePostAction to here.
// Vector of flags for each action

@ -830,6 +830,7 @@ std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &datase
const std::vector<std::string> &columns_list = {},
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
#endif
class MnistDataset : public Dataset {
public:
@ -850,7 +851,7 @@ class MnistDataset : public Dataset {
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all",
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
const std::shared_ptr<DatasetCache> &cache = nullptr);
#ifndef ENABLE_ANDROID
/// \brief Function to create a ConcatDataset
/// \notes Reload "+" operator to concat two datasets
/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated

@ -20,6 +20,7 @@
#include <memory>
#include <string>
#include <vector>
#include <nlohmann/json.hpp>
#include "minddata/dataset/include/status.h"
#ifndef ENABLE_ANDROID

@ -0,0 +1,190 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_
#include <cstdlib>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include "include/memory_pool.h"
namespace mindspore {
namespace dataset {
// The following conforms to the requirements of
// std::allocator. Do not rename/change any needed
// requirements, e.g. function names, typedef etc.
template <typename T>
class Allocator {
public:
template <typename U>
friend class Allocator;
using value_type = T;
using pointer = T *;
using const_pointer = const T *;
using reference = T &;
using const_reference = const T &;
using size_type = uint64_t;
using difference_type = std::ptrdiff_t;
template <typename U>
struct rebind {
using other = Allocator<U>;
};
using propagate_on_container_copy_assignment = std::true_type;
using propagate_on_container_move_assignment = std::true_type;
using propagate_on_container_swap = std::true_type;
explicit Allocator(const std::shared_ptr<MemoryPool> &b) : pool_(b) {}
~Allocator() = default;
template <typename U>
explicit Allocator(Allocator<U> const &rhs) : pool_(rhs.pool_) {}
template <typename U>
bool operator==(Allocator<U> const &rhs) const {
return pool_ == rhs.pool_;
}
template <typename U>
bool operator!=(Allocator<U> const &rhs) const {
return pool_ != rhs.pool_;
}
pointer allocate(std::size_t n) {
void *p = nullptr;
Status rc = pool_->Allocate(n * sizeof(T), &p);
if (rc.IsOk()) {
return reinterpret_cast<pointer>(p);
} else if (rc == StatusCode::kMDOutOfMemory) {
throw std::bad_alloc();
} else {
throw std::exception();
}
}
void deallocate(pointer p, std::size_t n = 0) noexcept { pool_->Deallocate(p); }
size_type max_size() { return pool_->get_max_size(); }
private:
std::shared_ptr<MemoryPool> pool_;
};
/// \brief It is a wrapper of unique_ptr with a custom Allocator class defined above
template <typename T, typename C = std::allocator<T>, typename... Args>
Status MakeUnique(std::unique_ptr<T[], std::function<void(T *)>> *out, C alloc, size_t n, Args &&... args) {
RETURN_UNEXPECTED_IF_NULL(out);
CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "size must be positive");
try {
T *data = alloc.allocate(n);
// Some of our implementation of allocator (e.g. NumaAllocator) don't throw std::bad_alloc.
// So we have to catch for null ptr
if (data == nullptr) {
return Status(StatusCode::kMDOutOfMemory);
}
if (!std::is_arithmetic<T>::value) {
for (auto i = 0; i < n; i++) {
std::allocator_traits<C>::construct(alloc, &(data[i]), std::forward<Args>(args)...);
}
}
auto deleter = [](T *p, C f_alloc, size_t f_n) {
if (!std::is_arithmetic<T>::value && std::is_destructible<T>::value) {
for (auto i = 0; i < f_n; ++i) {
std::allocator_traits<C>::destroy(f_alloc, &p[i]);
}
}
f_alloc.deallocate(p, f_n);
};
*out = std::unique_ptr<T[], std::function<void(T *)>>(data, std::bind(deleter, std::placeholders::_1, alloc, n));
} catch (const std::bad_alloc &e) {
return Status(StatusCode::kMDOutOfMemory);
} catch (const std::exception &e) {
RETURN_STATUS_UNEXPECTED(e.what());
}
return Status::OK();
}
/// \brief It is a wrapper of the above custom unique_ptr with some additional methods
/// \tparam T The type of object to be allocated
/// \tparam C Allocator. Default to std::allocator
template <typename T, typename C = std::allocator<T>>
class MemGuard {
public:
using allocator = C;
MemGuard() : n_(0) {}
explicit MemGuard(allocator a) : n_(0), alloc_(a) {}
// There is no copy constructor nor assignment operator because the memory is solely owned by this object.
MemGuard(const MemGuard &) = delete;
MemGuard &operator=(const MemGuard &) = delete;
// On the other hand, We can support move constructor
MemGuard(MemGuard &&lhs) noexcept : n_(lhs.n_), alloc_(std::move(lhs.alloc_)), ptr_(std::move(lhs.ptr_)) {}
MemGuard &operator=(MemGuard &&lhs) noexcept {
if (this != &lhs) {
this->deallocate();
n_ = lhs.n_;
alloc_ = std::move(lhs.alloc_);
ptr_ = std::move(lhs.ptr_);
}
return *this;
}
/// \brief Explicitly deallocate the memory if allocated
void deallocate() {
if (ptr_) {
ptr_.reset();
}
}
/// \brief Allocate memory (with emplace feature). Previous one will be released. If size is 0, no new memory is
/// allocated.
/// \param n Number of objects of type T to be allocated
/// \tparam Args Extra arguments pass to the constructor of T
template <typename... Args>
Status allocate(size_t n, Args &&... args) noexcept {
deallocate();
n_ = n;
return MakeUnique(&ptr_, alloc_, n, std::forward<Args>(args)...);
}
~MemGuard() noexcept { deallocate(); }
/// \brief Getter function
/// \return The pointer to the memory allocated
T *GetPointer() const { return ptr_.get(); }
/// \brief Getter function
/// \return The pointer to the memory allocated
T *GetMutablePointer() { return ptr_.get(); }
/// \brief Overload [] operator to access a particular element
/// \param x index to the element. Must be less than number of element allocated.
/// \return pointer to the x-th element
T *operator[](size_t x) { return GetMutablePointer() + x; }
/// \brief Overload [] operator to access a particular element
/// \param x index to the element. Must be less than number of element allocated.
/// \return pointer to the x-th element
T *operator[](size_t x) const { return GetPointer() + x; }
/// \brief Return how many bytes are allocated in total
/// \return Number of bytes allocated in total
size_t GetSizeInBytes() const { return n_ * sizeof(T); }
private:
size_t n_;
allocator alloc_;
std::unique_ptr<T[], std::function<void(T *)>> ptr_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_

@ -0,0 +1,109 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
#include <cstdint>
#include <limits>
#include <random>
namespace mindspore {
namespace dataset {
// Various type defines for convenience
using uchar = unsigned char;
using dsize_t = int64_t;
// Target devices to perform map operation
enum class MapTargetDevice { kCpu, kGpu, kDvpp };
// Possible dataset types for holding the data and client type
enum class DatasetType { kUnknown, kArrow, kTf };
// Possible flavours of Tensor implementations
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
// Possible values for shuffle
enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2 };
// Possible values for Border types
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
// Possible values for Image format types in a batch
enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 };
// Possible values for Image format types
enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 };
// Possible interpolation modes
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
// Possible JiebaMode modes
enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
// Possible values for SPieceTokenizerOutType
enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
// Possible values for SPieceTokenizerLoadType
enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };
// Possible values for SentencePieceModel
enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
// Possible values for NormalizeForm
enum class NormalizeForm {
kNone = 0,
kNfc,
kNfkc,
kNfd,
kNfkd,
};
// convenience functions for 32bit int bitmask
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; }
inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); }
constexpr int32_t kDeMaxDim = std::numeric_limits<int32_t>::max(); // 2147483647 or 2^32 -1
constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max();
constexpr int64_t kDeMaxFreq = std::numeric_limits<int64_t>::max(); // 9223372036854775807 or 2^(64-1)
constexpr int64_t kDeMaxTopk = std::numeric_limits<int64_t>::max();
constexpr uint32_t kCfgRowsPerBuffer = 1;
constexpr uint32_t kCfgParallelWorkers = 4;
constexpr uint32_t kCfgWorkerConnectorSize = 16;
constexpr uint32_t kCfgOpConnectorSize = 16;
constexpr int32_t kCfgDefaultRankId = -1;
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
constexpr int32_t kCfgDefaultCachePort = 50052;
constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
constexpr int32_t kDftPrefetchSize = 20;
constexpr int32_t kDftNumConnections = 12;
constexpr int32_t kDftAutoNumWorkers = false;
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
constexpr uint8_t kCVInvalidType = 255;
using connection_id_type = uint64_t;
using session_id_type = uint32_t;
using row_id_type = int64_t;
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_

File diff suppressed because it is too large Load Diff

@ -0,0 +1,58 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
#include <vector>
#include <memory>
#include "include/api/types.h"
#include "include/constants.h"
#include "dataset/include/transforms.h"
namespace mindspore {
namespace dataset {
// class to run tensor operations in eager mode
class Execute {
public:
/// \brief Constructor
explicit Execute(std::shared_ptr<TensorOperation> op);
explicit Execute(std::vector<std::shared_ptr<TensorOperation>> ops);
/// \brief Destructor
~Execute() = default;
/// \brief callable function to execute the TensorOperation in eager mode
/// \param[in] input Tensor to be transformed
/// \param[out] output Transformed tensor
/// \return Status code
Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output);
/// \brief callable function to execute the TensorOperation in eager mode
/// \param[in] input_tensor_list List of Tensor to be transformed
/// \param[out] out Result tensor after transform
/// \return - Status
Status operator()(const std::vector<mindspore::MSTensor> &input_tensor_list, std::vector<mindspore::MSTensor> *out);
private:
std::vector<std::shared_ptr<TensorOperation>> ops_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_

@ -0,0 +1,120 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "include/status.h"
namespace mindspore {
namespace dataset {
// Forward declare
class ExecutionTree;
class DatasetIterator;
class DatasetOp;
class Tensor;
class NativeRuntimeContext;
class IteratorConsumer;
class Dataset;
using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
using TensorVec = std::vector<std::shared_ptr<Tensor>>;
// Abstract class for iterating over the dataset.
class Iterator {
public:
/// \brief Constructor
Iterator();
/// \brief Destructor
~Iterator();
/// \brief Method for building and launching the pipeline.
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
/// \return - a Status error code, returns OK if no error encountered.
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
/// \brief Function to get the next row from the data pipeline.
/// \note Type of return data is a map(with column name).
/// \param[out] row - the output tensor row.
/// \return Returns true if no error encountered else false.
bool GetNextRow(TensorMap *row);
/// \brief Function to get the next row from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \param[out] row - the output tensor row.
/// \return Returns true if no error encountered else false.
bool GetNextRow(TensorVec *row);
/// \brief Function to shut down the data pipeline.
void Stop();
class _Iterator {
public:
explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} {
if (lt_) {
cur_row_ = new TensorMap();
lt_->GetNextRow(cur_row_);
}
}
// Destructor
~_Iterator() {
if (cur_row_) {
delete cur_row_;
}
}
_Iterator &operator++() {
if (lt_) {
++ind_;
lt_->GetNextRow(cur_row_);
}
if (cur_row_ && cur_row_->size() == 0) {
delete cur_row_;
cur_row_ = nullptr;
}
return *this;
} // prefix ++ overload
TensorMap &operator*() { return *cur_row_; } // dereference operator
TensorMap *operator->() { return cur_row_; }
bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
private:
int ind_; // the cur node our Iterator points to
Iterator *lt_;
TensorMap *cur_row_;
};
_Iterator begin() { return _Iterator(this); }
_Iterator end() { return _Iterator(nullptr); }
private:
std::unique_ptr<NativeRuntimeContext> runtime_context_;
IteratorConsumer *consumer_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_

@ -0,0 +1,59 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_
#include <cstddef>
#include <cstdint>
#include <memory>
#include "include/status.h"
namespace mindspore {
namespace dataset {
// Abstract class of a memory pool
class MemoryPool {
public:
// Allocate a block of size n
virtual Status Allocate(size_t, void **) = 0;
// Enlarge or shrink a block from oldSz to newSz
virtual Status Reallocate(void **, size_t old_sz, size_t new_sz) = 0;
// Free a pointer
virtual void Deallocate(void *) = 0;
// What is the maximum size I can allocate ?
virtual uint64_t get_max_size() const = 0;
virtual int PercentFree() const = 0;
// Destructor
virtual ~MemoryPool() {}
};
Status DeMalloc(std::size_t s, void **p, bool);
} // namespace dataset
} // namespace mindspore
void *operator new(std::size_t, mindspore::Status *, std::shared_ptr<mindspore::dataset::MemoryPool>);
void *operator new[](std::size_t, mindspore::Status *, std::shared_ptr<mindspore::dataset::MemoryPool>);
void operator delete(void *, std::shared_ptr<mindspore::dataset::MemoryPool>);
void operator delete[](void *, std::shared_ptr<mindspore::dataset::MemoryPool>);
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_

@ -0,0 +1,126 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_
#include <dirent.h>
#include <memory>
#include <string>
#include "include/status.h"
namespace mindspore {
namespace dataset {
class Path {
public:
class DirIterator {
public:
static std::shared_ptr<DirIterator> OpenDirectory(Path *f);
~DirIterator();
bool hasNext();
Path next();
private:
explicit DirIterator(Path *f);
Path *dir_;
DIR *dp_;
struct dirent *entry_;
};
explicit Path(const std::string &);
explicit Path(const char *);
~Path() = default;
Path(const Path &);
Path &operator=(const Path &);
Path(Path &&) noexcept;
Path &operator=(Path &&) noexcept;
std::string toString() const { return path_; }
Path operator+(const Path &);
Path operator+(const std::string &);
Path operator+(const char *);
Path &operator+=(const Path &rhs);
Path &operator+=(const std::string &);
Path &operator+=(const char *);
Path operator/(const Path &);
Path operator/(const std::string &);
Path operator/(const char *);
bool operator==(const Path &rhs) const { return (path_ == rhs.path_); }
bool operator!=(const Path &rhs) const { return (path_ != rhs.path_); }
bool operator<(const Path &rhs) const { return (path_ < rhs.path_); }
bool operator>(const Path &rhs) const { return (path_ > rhs.path_); }
bool operator<=(const Path &rhs) const { return (path_ <= rhs.path_); }
bool operator>=(const Path &rhs) const { return (path_ >= rhs.path_); }
bool Exists();
bool IsDirectory();
Status CreateDirectory();
Status CreateDirectories();
std::string Extension() const;
std::string ParentPath();
Status Remove();
Status CreateFile(int *fd);
Status OpenFile(int *fd, bool create = false);
Status CloseFile(int fd) const;
Status TruncateFile(int fd) const;
std::string Basename();
friend std::ostream &operator<<(std::ostream &os, const Path &s);
private:
static char separator_;
std::string path_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_

File diff suppressed because it is too large Load Diff

@ -0,0 +1,105 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
#if defined(__GNUC__) || defined(__clang__)
#define DEPRECATED __attribute__((deprecated))
#elif defined(_MSC_VER)
#define DEPRECATED __declspec(deprecated)
#else
#pragma message("WARNING: You need to implement DEPRECATED for this compiler")
#define DEPRECATED
#endif
#include <iostream>
#include <string>
#include <utility>
#include "include/ms_status.h"
namespace mindspore {
namespace dataset {
#define RETURN_IF_NOT_OK(_s) \
do { \
Status __rc = (_s); \
if (__rc.IsError()) { \
return __rc; \
} \
} while (false)
#define RETURN_STATUS_UNEXPECTED(_e) \
do { \
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \
} while (false)
#define CHECK_FAIL_RETURN_UNEXPECTED(_condition, _e) \
do { \
if (!(_condition)) { \
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \
} \
} while (false)
#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \
do { \
if (!(_condition)) { \
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \
} \
} while (false)
#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \
do { \
if (!(_condition)) { \
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \
} \
} while (false)
#define RETURN_UNEXPECTED_IF_NULL(_ptr) \
do { \
if ((_ptr) == nullptr) { \
std::string err_msg = "The pointer[" + std::string(#_ptr) + "] is null."; \
RETURN_STATUS_UNEXPECTED(err_msg); \
} \
} while (false)
#define RETURN_OK_IF_TRUE(_condition) \
do { \
if (_condition) { \
return Status::OK(); \
} \
} while (false)
#define RETURN_STATUS_SYNTAX_ERROR(_e) \
do { \
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \
} while (false)
#define RETURN_SECOND_IF_ERROR(_s, _r) \
do { \
Status __rc = (_s); \
if (__rc.IsError()) { \
MS_LOG(ERROR) << __rc; \
return _r; \
} \
} while (false)
#if !defined(_WIN32) && !defined(_WIN64)
const float MAX_MEMORY_USAGE_THRESHOLD = 0.95;
float GetMemoryUsage();
#endif
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_

File diff suppressed because it is too large Load Diff

@ -0,0 +1,83 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
#include <memory>
#include <vector>
#include "include/constants.h"
namespace mindspore {
namespace dataset {
class Slice {
public:
Slice() : start_(0), stop_(0), step_(0) {}
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
Slice(Slice const &slice) = default;
~Slice() = default;
bool valid() const { return step_ != 0; }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
class SliceOption {
public:
explicit SliceOption(bool all) : all_(all) {}
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
explicit SliceOption(Slice slice) : slice_(slice) {}
SliceOption(SliceOption const &slice) = default;
~SliceOption() = default;
// only one of the following will be valid
// given indices to slice the Tensor.
std::vector<dsize_t> indices_ = {};
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
bool all_ = false;
};
/// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each
/// range represented by slice_options to generate a list of indices to be sliced.
/// \param[out] matrix Generated nested vector of indices
/// Example: For a 4 x 2 tensor, and with slice_list = {SliceOption({0})} (the first row), matrix will become
/// {{0}}. For slice_list = {SliceOption(all), SliceOption({0})} (the first column), matrix will become
/// {{0, 0}, {1, 0}, {2, 0}, {3, 0}}.
/// For slice_list = {SliceOption({0, 2})}, matrix will become {{0}, {2}}. The size of each nested array is always
/// equal to (slice_list).size().
/// \param[in] depth used to keep track of recursion level
/// \param[in] numbers vector used to represent current index
/// \param[in] matrix 2D vector to be populated with desired indices
/// \param[in] slice_options vector of SliceOption objects
void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers, const std::vector<SliceOption> &slice_list,
std::vector<std::vector<dsize_t>> *matrix);
/// Generate indices based on vector of SliceOptions
/// Calls the recursive helper function IndexGeneratorHelper
/// \param[in] slice_list vector of SliceOption objects. Note: If the user passes
/// {SliceOption(true), SliceOption(true)}, it will return a M x 2 vector, instead of reducing it to
/// {SliceOption(true)} first to only generate a M x 1 vector.
/// \return std::vector<std::vector<dsize_t>> 2D vector of generated indices, M x (slice_list).size()
std::vector<std::vector<dsize_t>> IndexGenerator(const std::vector<SliceOption> &slice_list);
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_

@ -0,0 +1,176 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_
#include <cstdint>
#include <ostream>
#include <sstream>
#include <string>
#include <vector>
#include "include/constants.h"
#include "include/status.h"
#include "include/allocator.h"
namespace mindspore {
namespace dataset {
using IntAlloc = Allocator<dsize_t>;
// Class that represents a shape of a Tensor. A shape can be:
// -# Known shape (mKnown = true)
// -# Scalar --> empty vector --> <>
// -# n-Dim --> not empty vector --> <d1, d2, d2, d3, ...> where di is >= 0\n
// Example: <1,2>, <1>, <1,13,10,11,1>
// -# Unknown shape (mKnown = false)
// -# Rank is unknown --> empty vector --> <>
// -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
// Example: <3,?> (the 1st dim is unknown)\n
// <2,?,?,?> (all dims but the 0th dim are unknown)
/// \brief TensorShape supports any dim > 0 and < 2^31-1
class TensorShape {
public:
static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension
// Force the compiler to not create a no-arg constructor
TensorShape() = delete;
/// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
/// \param[in] list
explicit TensorShape(const std::initializer_list<dsize_t> &list);
/// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
/// \param[in] list
explicit TensorShape(const std::vector<dsize_t> &list);
/// \brief Copy constructor
/// \param[in] shape
TensorShape(const TensorShape &shape);
~TensorShape() = default;
/// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
/// \return TensorShape
static TensorShape CreateScalar() { return TensorShape({}); }
/// \brief Create a shape with an unknown rank.
/// \return TensorShape
static TensorShape CreateUnknownRankShape();
/// \brief Create a shape with a known rank .
/// \return TensorShape
static TensorShape CreateUnknownShapeWithRank(dsize_t rank);
/// \brief Insert a new dim into a copy of the current shape.
/// \param[in] dim to be added
/// \param[in] axis the index where dim should be added
/// \return New modified shape
TensorShape InsertDim(dsize_t axis, dsize_t dim) const;
/// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
/// \param[in] dim
/// \return
TensorShape PrependDim(dsize_t dim) const;
/// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
/// \param[in] dim
/// \return
TensorShape AppendDim(dsize_t dim) const;
dsize_t Size() const { return raw_shape_.size(); }
dsize_t Rank() const { return raw_shape_.size(); }
bool known() const { return known_; }
bool empty() const { return raw_shape_.empty(); }
dsize_t NumOfElements() const;
bool operator==(const TensorShape &rhs) const { return known_ == rhs.known_ && raw_shape_ == rhs.raw_shape_; }
bool operator!=(const TensorShape &rhs) const { return !(rhs == *this); }
dsize_t operator[](const dsize_t index) const {
if (index < 0) return raw_shape_[raw_shape_.size() + index];
return raw_shape_[index];
}
/// \brief Return the Shape as a vector
/// \return
std::vector<dsize_t> AsVector() const;
/// \brief Returns the class info as a string
/// \return
std::string ToString() const {
std::stringstream ss;
ss << *this;
return ss.str();
}
/// \brief Actual print function used by operator<<
/// \param out output string stream
void Print(std::ostream &out) const;
/// \brief << Stream output operator overload
/// This allows you to print the info using stream operators
/// \param[in] out - reference to the output stream being overloaded
/// \param[in] rO - reference to the TensorShape to display
/// \return - the output stream must be returned
friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
so.Print(out);
return out;
}
/// \brief Checks if the given index is a valid index for this tensor.
/// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
/// \param[in] index
/// \return bool
bool IsValidIndex(const std::vector<dsize_t> &index) const;
TensorShape Squeeze() const;
std::vector<dsize_t> Strides() const;
/// \brief Returns the location of the item assuming row major memory layout.
/// \param[in] index
/// \param[out] flat_index
/// \return
Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
private:
// True if known and valid shape, false otherwise
bool known_;
// Vector to keep the dims of the shape.
std::vector<dsize_t, IntAlloc> raw_shape_;
// Vector to keep the strides of the shape. The size is rank+1
std::vector<dsize_t, IntAlloc> strides_;
/// \brief Internal utility function to iterate over a list,
/// check if the dim is valid and then insert it into the shape.
/// \param[in] list Iterable list
/// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
/// False otherwise.
template <typename T>
void AddListToShape(const T &list);
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save