!13220 Add data helper to C++ API

From: @ezphlow
Reviewed-by: 
Signed-off-by:
pull/13220/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 98a87ad969

@ -29,6 +29,7 @@ endif()
add_library(cpp-API OBJECT
config.cc
data_helper.cc
datasets.cc
execute.cc
iterator.cc

@ -0,0 +1,193 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/include/data_helper.h"
#include <algorithm>
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include "minddata/dataset/util/json_helper.h"
#include "minddata/dataset/util/log_adapter.h"
#include "minddata/dataset/util/path.h"
#include "include/api/status.h"
namespace mindspore {
namespace dataset {
// Create a numbered json file from image folder
Status DataHelper::CreateAlbumIF(const std::vector<char> &in_dir, const std::vector<char> &out_dir) {
auto jh = JsonHelper();
return jh.CreateAlbum(CharToString(in_dir), CharToString(out_dir));
}
// A print method typically used for debugging
void DataHelper::Print(std::ostream &out) const {
out << " Data Helper"
<< "\n";
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<std::vector<char>> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), VectorCharToString(value), CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<bool> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int8_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint8_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int16_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint16_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int32_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint32_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int64_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint64_t> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<float> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<double> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateArray(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<char> &value, const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), CharToString(value), CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const bool &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int8_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint8_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int16_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint16_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int32_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint32_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int64_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint64_t &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const float &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const double &value,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.UpdateValue(CharToString(in_file), CharToString(key), value, CharToString(out_file));
}
Status DataHelper::RemoveKeyIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<char> &out_file) {
auto jh = JsonHelper();
return jh.RemoveKey(CharToString(in_file), CharToString(key), CharToString(out_file));
}
size_t DataHelper::DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr,
const size_t &buffer_size) {
auto jh = JsonHelper();
return jh.DumpData(tensor_addr, tensor_size, addr, buffer_size);
}
} // namespace dataset
} // namespace mindspore

@ -22,7 +22,7 @@
#include "minddata/dataset/core/client.h" // DE client
#include "minddata/dataset/util/status.h"
#include "pybind11/numpy.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
namespace mindspore {
namespace dataset {

@ -20,7 +20,7 @@
#include "minddata/dataset/api/python/pybind_conversion.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/callback/py_ds_callback.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/serdes.h"
#include "minddata/dataset/include/datasets.h"

@ -20,7 +20,7 @@
#include "minddata/dataset/api/python/pybind_conversion.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/callback/py_ds_callback.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/serdes.h"
#include "minddata/dataset/include/datasets.h"

@ -21,7 +21,7 @@
#include "minddata/dataset/api/python/pybind_conversion.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/callback/py_ds_callback.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"

@ -19,7 +19,7 @@
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/api/python/pybind_conversion.h"
#include "minddata/dataset/include/datasets.h"

@ -19,7 +19,7 @@
// client.h
// Include file for DE client functions
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_shape.h"

@ -22,7 +22,7 @@
#include <nlohmann/json.hpp>
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/util/status.h"
@ -148,7 +148,7 @@ class ConfigManager {
void set_numa_enable(bool numa_enable);
/// getter function
/// Now we want to seperate the numa link to _c_dataengine in the CMakeLists,
/// Now we want to separate the numa link to _c_dataengine in the CMakeLists,
/// so we want user to choose whether to open numa switch.
/// @return Get the current numa switch state.
bool numa_enable() const { return numa_enable_; }

@ -1,112 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
#include <cstdint>
#include <limits>
#include <random>
namespace mindspore {
namespace dataset {
// Various type defines for convenience
using uchar = unsigned char;
using dsize_t = int64_t;
// Target devices to perform map operation
enum class MapTargetDevice { kCpu, kGpu, kDvpp };
// Possible dataset types for holding the data and client type
enum class DatasetType { kUnknown, kArrow, kTf };
// Possible flavours of Tensor implementations
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
// Possible values for shuffle
enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2 };
// Possible values for Border types
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
// Possible values for Image format types in a batch
enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 };
// Possible values for Image format types
enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 };
// Possible interpolation modes
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
// Possible JiebaMode modes
enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
// Possible values for SPieceTokenizerOutType
enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
// Possible values for SPieceTokenizerLoadType
enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };
// Possible values for SentencePieceModel
enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
// Possible values for NormalizeForm
enum class NormalizeForm {
kNone = 0,
kNfc,
kNfkc,
kNfd,
kNfkd,
};
// Possible values for SamplingStrategy
enum class SamplingStrategy { kRandom = 0, kEdgeWeight = 1 };
// convenience functions for 32bit int bitmask
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; }
inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); }
constexpr int32_t kDeMaxDim = std::numeric_limits<int32_t>::max(); // 2147483647 or 2^32 -1
constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max();
constexpr int64_t kDeMaxFreq = std::numeric_limits<int64_t>::max(); // 9223372036854775807 or 2^(64-1)
constexpr int64_t kDeMaxTopk = std::numeric_limits<int64_t>::max();
constexpr uint32_t kCfgRowsPerBuffer = 1;
constexpr uint32_t kCfgParallelWorkers = 8;
constexpr uint32_t kCfgWorkerConnectorSize = 16;
constexpr uint32_t kCfgOpConnectorSize = 16;
constexpr int32_t kCfgDefaultRankId = -1;
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
constexpr int32_t kCfgDefaultCachePort = 50052;
constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
constexpr int32_t kDftPrefetchSize = 20;
constexpr int32_t kDftNumConnections = 12;
constexpr int32_t kDftAutoNumWorkers = false;
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
constexpr uint8_t kCVInvalidType = 255;
using connection_id_type = uint64_t;
using session_id_type = uint32_t;
using row_id_type = int64_t;
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_

@ -18,7 +18,7 @@
#include <memory>
#include <vector>
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/tensor.h"
namespace mindspore {

@ -24,7 +24,7 @@
#include "./securec.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"

@ -29,7 +29,7 @@ namespace py = pybind11;
#else
#include "base/float16.h"
#endif
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
namespace mindspore {
namespace dataset {

@ -16,7 +16,7 @@
#include "minddata/dataset/core/de_tensor.h"
#include "minddata/dataset/core/device_tensor.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/include/type_id.h"
#include "mindspore/core/ir/dtype/type_id.h"

@ -20,7 +20,7 @@
#include <utility>
#include <vector>
#include "include/api/status.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/util/status.h"

@ -21,7 +21,7 @@
#include "include/api/status.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/util/allocator.h"
namespace mindspore {

@ -27,7 +27,7 @@
#include "minddata/dataset/include/type_id.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/core/cv_tensor.h"

@ -37,7 +37,7 @@
#include "pybind11/stl.h"
#endif
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor_helpers.h"
#include "minddata/dataset/core/tensor_shape.h"

@ -19,7 +19,7 @@
#include <memory>
#include <vector>
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
namespace mindspore {
namespace dataset {

@ -25,7 +25,7 @@
#else
#include "mindspore/lite/src/common/log_adapter.h"
#endif
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
namespace mindspore {
namespace dataset {

@ -31,7 +31,7 @@
namespace py = pybind11;
#endif
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/util/allocator.h"

@ -31,7 +31,7 @@
#include "minddata/dataset/engine/cache/cache_server.h"
#include "minddata/dataset/engine/cache/cache_ipc.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
namespace mindspore {
namespace dataset {

@ -22,7 +22,7 @@
#include <cstdlib>
#include <cstring>
#include <thread>
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/engine/cache/cache_client.h"
#include "minddata/dataset/engine/cache/cache_fbb.h"
namespace mindspore {

@ -18,7 +18,7 @@
#include <functional>
#include <limits>
#include <vector>
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/engine/cache/cache_ipc.h"
#include "minddata/dataset/engine/cache/cache_service.h"
#include "minddata/dataset/engine/cache/cache_request.h"

@ -1,5 +1,5 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,7 +23,7 @@
#include <vector>
#include "minddata/dataset/util/allocator.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/core/constants.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_row.h"
@ -51,7 +51,7 @@ class DataBuffer {
~DataBuffer() = default;
/// \brief A method for debug printing of the buffer
/// \param[inout] out The stream to write to
/// \param[in/out] out The stream to write to
/// \param[in] show_all A boolean to toggle between details and summary printing
void Print(std::ostream &out, bool show_all) const;

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save