!12349 [MD] Push down IR files for transforms and text

From: @tina_mengting_zhang
Reviewed-by: 
Signed-off-by:
pull/12349/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit ed7fef5d5e

@ -92,11 +92,14 @@ add_dependencies(engine core)
add_dependencies(callback core)
add_dependencies(text core)
add_dependencies(text-kernels core)
add_dependencies(text-ir core)
add_dependencies(text-ir-kernels core)
add_dependencies(cpp-API core)
add_dependencies(engine-ir-datasetops core)
add_dependencies(engine-ir-datasetops-source core)
add_dependencies(engine-ir-cache core)
add_dependencies(kernels-ir core)
add_dependencies(kernels-ir-data core)
add_dependencies(kernels-ir-vision core)
if(ENABLE_ACL)
@ -146,7 +149,10 @@ set(submodules
$<TARGET_OBJECTS:engine>
$<TARGET_OBJECTS:text>
$<TARGET_OBJECTS:text-kernels>
$<TARGET_OBJECTS:text-ir>
$<TARGET_OBJECTS:text-ir-kernels>
$<TARGET_OBJECTS:kernels-ir>
$<TARGET_OBJECTS:kernels-ir-data>
$<TARGET_OBJECTS:kernels-ir-vision>
)

@ -17,9 +17,9 @@
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/include/transforms.h"
#include "minddata/dataset/kernels/py_func_op.h"
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
#include "minddata/dataset/kernels/ir/vision/vision_ir.h"
namespace mindspore {

@ -18,7 +18,7 @@
#include "pybind11/stl.h"
#include "pybind11/stl_bind.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/include/text.h"
#include "minddata/dataset/text/ir/kernels/text_ir.h"
#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
#include "minddata/dataset/text/sentence_piece_vocab.h"
#include "minddata/dataset/text/vocab.h"

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -30,10 +30,10 @@
#include "pybind11/stl_bind.h"
#include "minddata/dataset/include/datasets.h"
#include "minddata/dataset/include/samplers.h"
#include "minddata/dataset/include/transforms.h"
#include "minddata/dataset/api/python/pybind_register.h"
#include "minddata/dataset/engine/ir/cache/pre_built_dataset_cache.h"
#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
#include "minddata/dataset/kernels/py_func_op.h"
namespace py = pybind11;

File diff suppressed because it is too large Load Diff

@ -15,18 +15,6 @@
*/
#include "minddata/dataset/include/transforms.h"
#include "minddata/dataset/kernels/ir/validators.h"
// Kernel data headers (in alphabetical order)
#include "minddata/dataset/kernels/data/compose_op.h"
#include "minddata/dataset/kernels/data/duplicate_op.h"
#include "minddata/dataset/kernels/data/one_hot_op.h"
#include "minddata/dataset/kernels/data/random_apply_op.h"
#include "minddata/dataset/kernels/data/random_choice_op.h"
#include "minddata/dataset/kernels/data/type_cast_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/unique_op.h"
#endif
namespace mindspore {
namespace dataset {
@ -88,122 +76,6 @@ std::shared_ptr<UniqueOperation> Unique() {
return op->ValidateParams() ? op : nullptr;
}
#endif
/* ####################################### Validator Functions ############################################ */
/* ####################################### Derived TensorOperation classes ################################# */
// (In alphabetical order)
// ComposeOperation
ComposeOperation::ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms)
: transforms_(transforms) {}
Status ComposeOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateVectorTransforms("Compose", transforms_));
return Status::OK();
}
std::shared_ptr<TensorOp> ComposeOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<ComposeOp>(tensor_ops);
}
// DuplicateOperation
Status DuplicateOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> DuplicateOperation::Build() { return std::make_shared<DuplicateOp>(); }
// OneHotOperation
OneHotOperation::OneHotOperation(int32_t num_classes) : num_classes_(num_classes) {}
Status OneHotOperation::ValidateParams() {
if (num_classes_ <= 0) {
std::string err_msg = "OneHot: Number of classes must be greater than 0, but got: " + std::to_string(num_classes_);
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
return Status::OK();
}
std::shared_ptr<TensorOp> OneHotOperation::Build() { return std::make_shared<OneHotOp>(num_classes_); }
// PreBuiltOperation
PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(tensor_op) {}
Status PreBuiltOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> PreBuiltOperation::Build() { return op_; }
std::string PreBuiltOperation::Name() const { return op_ ? op_->Name() : kPreBuiltOperation; }
Status PreBuiltOperation::to_json(nlohmann::json *out_json) {
RETURN_IF_NOT_OK(op_->to_json(out_json));
return Status::OK();
}
// RandomApplyOperation
RandomApplyOperation::RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob)
: TensorOperation(true), transforms_(transforms), prob_(prob) {}
Status RandomApplyOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateVectorTransforms("RandomApply", transforms_));
RETURN_IF_NOT_OK(ValidateProbability("RandomApply", prob_));
return Status::OK();
}
std::shared_ptr<TensorOp> RandomApplyOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<RandomApplyOp>(prob_, tensor_ops);
}
// RandomChoiceOperation
RandomChoiceOperation::RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms)
: TensorOperation(true), transforms_(transforms) {}
Status RandomChoiceOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateVectorTransforms("RandomChoice", transforms_));
return Status::OK();
}
std::shared_ptr<TensorOp> RandomChoiceOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<RandomChoiceOp>(tensor_ops);
}
// TypeCastOperation
TypeCastOperation::TypeCastOperation(std::string data_type) : data_type_(data_type) {}
Status TypeCastOperation::ValidateParams() {
std::vector<std::string> predefine_type = {"bool", "int8", "uint8", "int16", "uint16", "int32", "uint32",
"int64", "uint64", "float16", "float32", "float64", "string"};
auto itr = std::find(predefine_type.begin(), predefine_type.end(), data_type_);
if (itr == predefine_type.end()) {
std::string err_msg = "TypeCast: Invalid data type: " + data_type_;
MS_LOG(ERROR) << "TypeCast: Only supports data type bool, int8, uint8, int16, uint16, int32, uint32, "
<< "int64, uint64, float16, float32, float64, string, but got: " << data_type_;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
return Status::OK();
}
std::shared_ptr<TensorOp> TypeCastOperation::Build() { return std::make_shared<TypeCastOp>(data_type_); }
#ifndef ENABLE_ANDROID
// UniqueOperation
Status UniqueOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> UniqueOperation::Build() { return std::make_shared<UniqueOp>(); }
#endif
} // namespace transforms
} // namespace dataset
} // namespace mindspore

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -21,9 +21,9 @@
#include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
#include "minddata/dataset/engine/ir/datasetops/map_node.h"
#include "minddata/dataset/include/transforms.h"
#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
#include "minddata/dataset/kernels/image/random_crop_decode_resize_op.h"
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
#include "minddata/dataset/kernels/ir/vision/vision_ir.h"
namespace mindspore {

File diff suppressed because it is too large Load Diff

@ -25,40 +25,12 @@
#include "include/api/status.h"
#include "minddata/dataset/include/constants.h"
// (TEMPORARY) will be removed when Tensor op ir moved down
#include "minddata/dataset/kernels/ir/tensor_operation.h"
#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
#define INCLUDE_NLOHMANN_JSON_FWD_HPP_
namespace nlohmann {
template <typename T = void, typename SFINAE = void>
struct adl_serializer;
template <template <typename U, typename V, typename... Args> class ObjectType = std::map,
template <typename U, typename... Args> class ArrayType = std::vector, class StringType = std::string,
class BooleanType = bool, class NumberIntegerType = std::int64_t, class NumberUnsignedType = std::uint64_t,
class NumberFloatType = double, template <typename U> class AllocatorType = std::allocator,
template <typename T, typename SFINAE = void> class JSONSerializer = adl_serializer>
class basic_json;
template <typename BasicJsonType>
class json_pointer;
using json = basic_json<>;
} // namespace nlohmann
#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_
// FIXME - This internal IR header will be removed when external API classes are provided
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
namespace mindspore {
namespace dataset {
// Char arrays storing name of corresponding classes (in alphabetical order)
constexpr char kComposeOperation[] = "Compose";
constexpr char kDuplicateOperation[] = "Duplicate";
constexpr char kOneHotOperation[] = "OneHot";
constexpr char kPreBuiltOperation[] = "PreBuilt";
constexpr char kRandomApplyOperation[] = "RandomApply";
constexpr char kRandomChoiceOperation[] = "RandomChoice";
constexpr char kRandomSelectSubpolicyOperation[] = "RandomSelectSubpolicy";
constexpr char kTypeCastOperation[] = "TypeCast";
constexpr char kUniqueOperation[] = "Unique";
// Transform operations for performing data transformation.
namespace transforms {
@ -119,134 +91,6 @@ std::shared_ptr<TypeCastOperation> TypeCast(std::string data_type);
/// \return Shared pointer to the current TensorOperation.
std::shared_ptr<UniqueOperation> Unique();
#endif
/* ####################################### Derived TensorOperation classes ################################# */
class ComposeOperation : public TensorOperation {
public:
explicit ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
~ComposeOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kComposeOperation; }
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
};
class DuplicateOperation : public TensorOperation {
public:
DuplicateOperation() = default;
~DuplicateOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kDuplicateOperation; }
};
class OneHotOperation : public TensorOperation {
public:
explicit OneHotOperation(int32_t num_classes_);
~OneHotOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kOneHotOperation; }
private:
float num_classes_;
};
class PreBuiltOperation : public TensorOperation {
public:
explicit PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op);
~PreBuiltOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override;
Status to_json(nlohmann::json *out_json) override;
private:
std::shared_ptr<TensorOp> op_;
};
class RandomApplyOperation : public TensorOperation {
public:
explicit RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob);
~RandomApplyOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kRandomApplyOperation; }
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
double prob_;
};
class RandomChoiceOperation : public TensorOperation {
public:
explicit RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
~RandomChoiceOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kRandomChoiceOperation; }
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
};
class TypeCastOperation : public TensorOperation {
public:
explicit TypeCastOperation(std::string data_type);
~TypeCastOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kTypeCastOperation; }
private:
std::string data_type_;
};
#ifndef ENABLE_ANDROID
class UniqueOperation : public TensorOperation {
public:
UniqueOperation() = default;
~UniqueOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kUniqueOperation; }
};
#endif
} // namespace transforms
} // namespace dataset
} // namespace mindspore

@ -1,3 +1,4 @@
add_subdirectory(data)
add_subdirectory(vision)
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)

@ -0,0 +1,8 @@
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
set(DATASET_KERNELS_IR_DATA_SRC_FILES
transforms_ir.cc
)
add_library(kernels-ir-data OBJECT ${DATASET_KERNELS_IR_DATA_SRC_FILES})

@ -0,0 +1,155 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
// Kernel data headers (in alphabetical order)
#include "minddata/dataset/kernels/data/compose_op.h"
#include "minddata/dataset/kernels/data/duplicate_op.h"
#include "minddata/dataset/kernels/data/one_hot_op.h"
#include "minddata/dataset/kernels/data/random_apply_op.h"
#include "minddata/dataset/kernels/data/random_choice_op.h"
#include "minddata/dataset/kernels/data/type_cast_op.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/data/unique_op.h"
#endif
#include "minddata/dataset/kernels/ir/validators.h"
namespace mindspore {
namespace dataset {
// Transform operations for data.
namespace transforms {
/* ####################################### Derived TensorOperation classes ################################# */
// (In alphabetical order)
// ComposeOperation
ComposeOperation::ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms)
: transforms_(transforms) {}
Status ComposeOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateVectorTransforms("Compose", transforms_));
return Status::OK();
}
std::shared_ptr<TensorOp> ComposeOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<ComposeOp>(tensor_ops);
}
// DuplicateOperation
Status DuplicateOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> DuplicateOperation::Build() { return std::make_shared<DuplicateOp>(); }
// OneHotOperation
OneHotOperation::OneHotOperation(int32_t num_classes) : num_classes_(num_classes) {}
Status OneHotOperation::ValidateParams() {
if (num_classes_ <= 0) {
std::string err_msg = "OneHot: Number of classes must be greater than 0, but got: " + std::to_string(num_classes_);
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
return Status::OK();
}
std::shared_ptr<TensorOp> OneHotOperation::Build() { return std::make_shared<OneHotOp>(num_classes_); }
// PreBuiltOperation
PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(tensor_op) {}
Status PreBuiltOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> PreBuiltOperation::Build() { return op_; }
std::string PreBuiltOperation::Name() const { return op_ ? op_->Name() : kPreBuiltOperation; }
Status PreBuiltOperation::to_json(nlohmann::json *out_json) {
RETURN_IF_NOT_OK(op_->to_json(out_json));
return Status::OK();
}
// RandomApplyOperation
RandomApplyOperation::RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob)
: TensorOperation(true), transforms_(transforms), prob_(prob) {}
Status RandomApplyOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateVectorTransforms("RandomApply", transforms_));
RETURN_IF_NOT_OK(ValidateProbability("RandomApply", prob_));
return Status::OK();
}
std::shared_ptr<TensorOp> RandomApplyOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<RandomApplyOp>(prob_, tensor_ops);
}
// RandomChoiceOperation
RandomChoiceOperation::RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms)
: TensorOperation(true), transforms_(transforms) {}
Status RandomChoiceOperation::ValidateParams() {
RETURN_IF_NOT_OK(ValidateVectorTransforms("RandomChoice", transforms_));
return Status::OK();
}
std::shared_ptr<TensorOp> RandomChoiceOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<RandomChoiceOp>(tensor_ops);
}
// TypeCastOperation
TypeCastOperation::TypeCastOperation(std::string data_type) : data_type_(data_type) {}
Status TypeCastOperation::ValidateParams() {
std::vector<std::string> predefine_type = {"bool", "int8", "uint8", "int16", "uint16", "int32", "uint32",
"int64", "uint64", "float16", "float32", "float64", "string"};
auto itr = std::find(predefine_type.begin(), predefine_type.end(), data_type_);
if (itr == predefine_type.end()) {
std::string err_msg = "TypeCast: Invalid data type: " + data_type_;
MS_LOG(ERROR) << "TypeCast: Only supports data type bool, int8, uint8, int16, uint16, int32, uint32, "
<< "int64, uint64, float16, float32, float64, string, but got: " << data_type_;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
return Status::OK();
}
std::shared_ptr<TensorOp> TypeCastOperation::Build() { return std::make_shared<TypeCastOp>(data_type_); }
#ifndef ENABLE_ANDROID
// UniqueOperation
Status UniqueOperation::ValidateParams() { return Status::OK(); }
std::shared_ptr<TensorOp> UniqueOperation::Build() { return std::make_shared<UniqueOp>(); }
#endif
} // namespace transforms
} // namespace dataset
} // namespace mindspore

@ -0,0 +1,172 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IR_DATA_TRANSFORMS_IR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IR_DATA_TRANSFORMS_IR_H_
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "minddata/dataset/kernels/ir/tensor_operation.h"
namespace mindspore {
namespace dataset {
// Char arrays storing name of corresponding classes (in alphabetical order)
constexpr char kComposeOperation[] = "Compose";
constexpr char kDuplicateOperation[] = "Duplicate";
constexpr char kOneHotOperation[] = "OneHot";
constexpr char kPreBuiltOperation[] = "PreBuilt";
constexpr char kRandomApplyOperation[] = "RandomApply";
constexpr char kRandomChoiceOperation[] = "RandomChoice";
constexpr char kTypeCastOperation[] = "TypeCast";
constexpr char kUniqueOperation[] = "Unique";
// Transform operations for performing data transformation.
namespace transforms {
/* ####################################### Derived TensorOperation classes ################################# */
class ComposeOperation : public TensorOperation {
public:
explicit ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
~ComposeOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kComposeOperation; }
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
};
class DuplicateOperation : public TensorOperation {
public:
DuplicateOperation() = default;
~DuplicateOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kDuplicateOperation; }
};
class OneHotOperation : public TensorOperation {
public:
explicit OneHotOperation(int32_t num_classes_);
~OneHotOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kOneHotOperation; }
private:
float num_classes_;
};
class PreBuiltOperation : public TensorOperation {
public:
explicit PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op);
~PreBuiltOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override;
Status to_json(nlohmann::json *out_json) override;
private:
std::shared_ptr<TensorOp> op_;
};
class RandomApplyOperation : public TensorOperation {
public:
explicit RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob);
~RandomApplyOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kRandomApplyOperation; }
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
double prob_;
};
class RandomChoiceOperation : public TensorOperation {
public:
explicit RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
~RandomChoiceOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kRandomChoiceOperation; }
private:
std::vector<std::shared_ptr<TensorOperation>> transforms_;
};
class TypeCastOperation : public TensorOperation {
public:
explicit TypeCastOperation(std::string data_type);
~TypeCastOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kTypeCastOperation; }
private:
std::string data_type_;
};
#ifndef ENABLE_ANDROID
class UniqueOperation : public TensorOperation {
public:
UniqueOperation() = default;
~UniqueOperation() = default;
std::shared_ptr<TensorOp> Build() override;
Status ValidateParams() override;
std::string Name() const override { return kUniqueOperation; }
};
#endif
} // namespace transforms
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IR_DATA_TRANSFORMS_IR_H_

@ -1,3 +1,4 @@
add_subdirectory(ir)
add_subdirectory(kernels)
file(GLOB _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")

@ -0,0 +1,6 @@
add_subdirectory(kernels)
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
add_library(text-ir OBJECT validators.cc)

@ -0,0 +1,8 @@
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
set(DATASET_TEXT_IR_KERNELS_SRC_FILES
text_ir.cc
)
add_library(text-ir-kernels OBJECT ${DATASET_TEXT_IR_KERNELS_SRC_FILES})

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,60 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/text/ir/validators.h"
namespace mindspore {
namespace dataset {
/* ####################################### Validator Functions ############################################ */
// Helper function to validate tokenizer directory parameter
Status ValidateTokenizerDirParam(const std::string &tokenizer_name, const std::string &tokenizer_file) {
if (tokenizer_file.empty()) {
std::string err_msg = tokenizer_name + ": tokenizer_file is not specified.";
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
Path file(tokenizer_file);
if (!file.Exists()) {
std::string err_msg = tokenizer_name + ": tokenizer_file: [" + tokenizer_file + "] is an invalid directory path.";
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
if (access(tokenizer_file.c_str(), R_OK) == -1) {
std::string err_msg = tokenizer_name + ": No access to specified tokenizer path: " + tokenizer_file;
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
return Status::OK();
}
// Helper functions to help validate data type passed by user
bool IsTypeNumeric(const std::string &data_type) {
if (data_type == "int8" || data_type == "uint8" || data_type == "int16" || data_type == "uint16" ||
data_type == "int32" || data_type == "uint32" || data_type == "int64" || data_type == "uint64" ||
data_type == "float16" || data_type == "float32" || data_type == "float64")
return true;
return false;
}
bool IsTypeBoolean(const std::string &data_type) { return data_type == "bool"; }
bool IsTypeString(const std::string &data_type) { return data_type == "string"; }
} // namespace dataset
} // namespace mindspore

@ -0,0 +1,41 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_IR_VALIDATORS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_IR_VALIDATORS_H_
#include <string>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/util/status.h"
namespace mindspore {
namespace dataset {
// Helper function to validate tokenizer directory parameter
Status ValidateTokenizerDirParam(const std::string &tokenizer_name, const std::string &tokenizer_file);
// Helper function to validate data type passed by user
bool IsTypeNumeric(const std::string &data_type);
// Helper function to validate data type is boolean
bool IsTypeBoolean(const std::string &data_type);
// Helper function to validate data type is string
bool IsTypeString(const std::string &data_type);
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_IR_VALIDATORS_H_

@ -202,6 +202,7 @@ if(BUILD_MINDDATA STREQUAL "full")
${MINDDATA_DIR}/kernels/data/type_cast_op.cc
${MINDDATA_DIR}/kernels/image/exif_utils.cc
${MINDDATA_DIR}/kernels/ir/validators.cc
${MINDDATA_DIR}/kernels/ir/data/transforms_ir.cc
${MINDDATA_DIR}/kernels/ir/vision/vision_ir.cc
${MINDDATA_DIR}/callback/callback_manager.cc
${MINDDATA_DIR}/util/task_manager.cc
@ -281,6 +282,7 @@ elseif(BUILD_MINDDATA STREQUAL "wrapper")
${MINDDATA_DIR}/kernels/data/data_utils.cc
${MINDDATA_DIR}/kernels/image/exif_utils.cc
${MINDDATA_DIR}/kernels/ir/validators.cc
${MINDDATA_DIR}/kernels/ir/data/transforms_ir.cc
${MINDDATA_DIR}/kernels/ir/vision/vision_ir.cc
${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc
${CMAKE_CURRENT_SOURCE_DIR}/wrapper/album_op_android.cc
@ -393,6 +395,7 @@ elseif(BUILD_MINDDATA STREQUAL "lite")
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
${CORE_DIR}/utils/ms_utils.cc
${MINDDATA_DIR}/kernels/ir/validators.cc
${MINDDATA_DIR}/kernels/ir/data/transforms_ir.cc
${MINDDATA_DIR}/kernels/ir/vision/vision_ir.cc
)

Loading…
Cancel
Save