!12349 [MD] Push down IR files for transforms and text
From: @tina_mengting_zhang Reviewed-by: Signed-off-by:pull/12349/MERGE
commit
ed7fef5d5e
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,8 @@
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
|
||||
set(DATASET_KERNELS_IR_DATA_SRC_FILES
|
||||
transforms_ir.cc
|
||||
)
|
||||
|
||||
add_library(kernels-ir-data OBJECT ${DATASET_KERNELS_IR_DATA_SRC_FILES})
|
@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
|
||||
|
||||
// Kernel data headers (in alphabetical order)
|
||||
#include "minddata/dataset/kernels/data/compose_op.h"
|
||||
#include "minddata/dataset/kernels/data/duplicate_op.h"
|
||||
#include "minddata/dataset/kernels/data/one_hot_op.h"
|
||||
#include "minddata/dataset/kernels/data/random_apply_op.h"
|
||||
#include "minddata/dataset/kernels/data/random_choice_op.h"
|
||||
#include "minddata/dataset/kernels/data/type_cast_op.h"
|
||||
#ifndef ENABLE_ANDROID
|
||||
#include "minddata/dataset/kernels/data/unique_op.h"
|
||||
#endif
|
||||
|
||||
#include "minddata/dataset/kernels/ir/validators.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Transform operations for data.
|
||||
namespace transforms {
|
||||
|
||||
/* ####################################### Derived TensorOperation classes ################################# */
|
||||
|
||||
// (In alphabetical order)
|
||||
|
||||
// ComposeOperation
|
||||
ComposeOperation::ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms)
|
||||
: transforms_(transforms) {}
|
||||
|
||||
Status ComposeOperation::ValidateParams() {
|
||||
RETURN_IF_NOT_OK(ValidateVectorTransforms("Compose", transforms_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> ComposeOperation::Build() {
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
return std::make_shared<ComposeOp>(tensor_ops);
|
||||
}
|
||||
|
||||
// DuplicateOperation
|
||||
Status DuplicateOperation::ValidateParams() { return Status::OK(); }
|
||||
|
||||
std::shared_ptr<TensorOp> DuplicateOperation::Build() { return std::make_shared<DuplicateOp>(); }
|
||||
|
||||
// OneHotOperation
|
||||
OneHotOperation::OneHotOperation(int32_t num_classes) : num_classes_(num_classes) {}
|
||||
|
||||
Status OneHotOperation::ValidateParams() {
|
||||
if (num_classes_ <= 0) {
|
||||
std::string err_msg = "OneHot: Number of classes must be greater than 0, but got: " + std::to_string(num_classes_);
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> OneHotOperation::Build() { return std::make_shared<OneHotOp>(num_classes_); }
|
||||
|
||||
// PreBuiltOperation
|
||||
PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(tensor_op) {}
|
||||
|
||||
Status PreBuiltOperation::ValidateParams() { return Status::OK(); }
|
||||
|
||||
std::shared_ptr<TensorOp> PreBuiltOperation::Build() { return op_; }
|
||||
|
||||
std::string PreBuiltOperation::Name() const { return op_ ? op_->Name() : kPreBuiltOperation; }
|
||||
|
||||
Status PreBuiltOperation::to_json(nlohmann::json *out_json) {
|
||||
RETURN_IF_NOT_OK(op_->to_json(out_json));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// RandomApplyOperation
|
||||
RandomApplyOperation::RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob)
|
||||
: TensorOperation(true), transforms_(transforms), prob_(prob) {}
|
||||
|
||||
Status RandomApplyOperation::ValidateParams() {
|
||||
RETURN_IF_NOT_OK(ValidateVectorTransforms("RandomApply", transforms_));
|
||||
RETURN_IF_NOT_OK(ValidateProbability("RandomApply", prob_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> RandomApplyOperation::Build() {
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
return std::make_shared<RandomApplyOp>(prob_, tensor_ops);
|
||||
}
|
||||
|
||||
// RandomChoiceOperation
|
||||
RandomChoiceOperation::RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms)
|
||||
: TensorOperation(true), transforms_(transforms) {}
|
||||
|
||||
Status RandomChoiceOperation::ValidateParams() {
|
||||
RETURN_IF_NOT_OK(ValidateVectorTransforms("RandomChoice", transforms_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> RandomChoiceOperation::Build() {
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
return std::make_shared<RandomChoiceOp>(tensor_ops);
|
||||
}
|
||||
|
||||
// TypeCastOperation
|
||||
TypeCastOperation::TypeCastOperation(std::string data_type) : data_type_(data_type) {}
|
||||
|
||||
Status TypeCastOperation::ValidateParams() {
|
||||
std::vector<std::string> predefine_type = {"bool", "int8", "uint8", "int16", "uint16", "int32", "uint32",
|
||||
"int64", "uint64", "float16", "float32", "float64", "string"};
|
||||
auto itr = std::find(predefine_type.begin(), predefine_type.end(), data_type_);
|
||||
if (itr == predefine_type.end()) {
|
||||
std::string err_msg = "TypeCast: Invalid data type: " + data_type_;
|
||||
MS_LOG(ERROR) << "TypeCast: Only supports data type bool, int8, uint8, int16, uint16, int32, uint32, "
|
||||
<< "int64, uint64, float16, float32, float64, string, but got: " << data_type_;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> TypeCastOperation::Build() { return std::make_shared<TypeCastOp>(data_type_); }
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
// UniqueOperation
|
||||
Status UniqueOperation::ValidateParams() { return Status::OK(); }
|
||||
|
||||
std::shared_ptr<TensorOp> UniqueOperation::Build() { return std::make_shared<UniqueOp>(); }
|
||||
#endif
|
||||
|
||||
} // namespace transforms
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
@ -0,0 +1,172 @@
|
||||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IR_DATA_TRANSFORMS_IR_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IR_DATA_TRANSFORMS_IR_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "minddata/dataset/kernels/ir/tensor_operation.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Char arrays storing name of corresponding classes (in alphabetical order)
|
||||
constexpr char kComposeOperation[] = "Compose";
|
||||
constexpr char kDuplicateOperation[] = "Duplicate";
|
||||
constexpr char kOneHotOperation[] = "OneHot";
|
||||
constexpr char kPreBuiltOperation[] = "PreBuilt";
|
||||
constexpr char kRandomApplyOperation[] = "RandomApply";
|
||||
constexpr char kRandomChoiceOperation[] = "RandomChoice";
|
||||
constexpr char kTypeCastOperation[] = "TypeCast";
|
||||
constexpr char kUniqueOperation[] = "Unique";
|
||||
|
||||
// Transform operations for performing data transformation.
|
||||
namespace transforms {
|
||||
/* ####################################### Derived TensorOperation classes ################################# */
|
||||
|
||||
class ComposeOperation : public TensorOperation {
|
||||
public:
|
||||
explicit ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
~ComposeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kComposeOperation; }
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> transforms_;
|
||||
};
|
||||
|
||||
class DuplicateOperation : public TensorOperation {
|
||||
public:
|
||||
DuplicateOperation() = default;
|
||||
|
||||
~DuplicateOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kDuplicateOperation; }
|
||||
};
|
||||
|
||||
class OneHotOperation : public TensorOperation {
|
||||
public:
|
||||
explicit OneHotOperation(int32_t num_classes_);
|
||||
|
||||
~OneHotOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kOneHotOperation; }
|
||||
|
||||
private:
|
||||
float num_classes_;
|
||||
};
|
||||
|
||||
class PreBuiltOperation : public TensorOperation {
|
||||
public:
|
||||
explicit PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op);
|
||||
|
||||
~PreBuiltOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override;
|
||||
|
||||
Status to_json(nlohmann::json *out_json) override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<TensorOp> op_;
|
||||
};
|
||||
|
||||
class RandomApplyOperation : public TensorOperation {
|
||||
public:
|
||||
explicit RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob);
|
||||
|
||||
~RandomApplyOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kRandomApplyOperation; }
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> transforms_;
|
||||
double prob_;
|
||||
};
|
||||
|
||||
class RandomChoiceOperation : public TensorOperation {
|
||||
public:
|
||||
explicit RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
~RandomChoiceOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kRandomChoiceOperation; }
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> transforms_;
|
||||
};
|
||||
class TypeCastOperation : public TensorOperation {
|
||||
public:
|
||||
explicit TypeCastOperation(std::string data_type);
|
||||
|
||||
~TypeCastOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kTypeCastOperation; }
|
||||
|
||||
private:
|
||||
std::string data_type_;
|
||||
};
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
class UniqueOperation : public TensorOperation {
|
||||
public:
|
||||
UniqueOperation() = default;
|
||||
|
||||
~UniqueOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kUniqueOperation; }
|
||||
};
|
||||
#endif
|
||||
} // namespace transforms
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IR_DATA_TRANSFORMS_IR_H_
|
@ -0,0 +1,6 @@
|
||||
add_subdirectory(kernels)
|
||||
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
|
||||
add_library(text-ir OBJECT validators.cc)
|
@ -0,0 +1,8 @@
|
||||
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||
|
||||
set(DATASET_TEXT_IR_KERNELS_SRC_FILES
|
||||
text_ir.cc
|
||||
)
|
||||
|
||||
add_library(text-ir-kernels OBJECT ${DATASET_TEXT_IR_KERNELS_SRC_FILES})
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,60 @@
|
||||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/text/ir/validators.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
/* ####################################### Validator Functions ############################################ */
|
||||
|
||||
// Helper function to validate tokenizer directory parameter
|
||||
Status ValidateTokenizerDirParam(const std::string &tokenizer_name, const std::string &tokenizer_file) {
|
||||
if (tokenizer_file.empty()) {
|
||||
std::string err_msg = tokenizer_name + ": tokenizer_file is not specified.";
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
Path file(tokenizer_file);
|
||||
if (!file.Exists()) {
|
||||
std::string err_msg = tokenizer_name + ": tokenizer_file: [" + tokenizer_file + "] is an invalid directory path.";
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
if (access(tokenizer_file.c_str(), R_OK) == -1) {
|
||||
std::string err_msg = tokenizer_name + ": No access to specified tokenizer path: " + tokenizer_file;
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Helper functions to help validate data type passed by user
|
||||
bool IsTypeNumeric(const std::string &data_type) {
|
||||
if (data_type == "int8" || data_type == "uint8" || data_type == "int16" || data_type == "uint16" ||
|
||||
data_type == "int32" || data_type == "uint32" || data_type == "int64" || data_type == "uint64" ||
|
||||
data_type == "float16" || data_type == "float32" || data_type == "float64")
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool IsTypeBoolean(const std::string &data_type) { return data_type == "bool"; }
|
||||
|
||||
bool IsTypeString(const std::string &data_type) { return data_type == "string"; }
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_IR_VALIDATORS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_IR_VALIDATORS_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "minddata/dataset/core/tensor.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Helper function to validate tokenizer directory parameter
|
||||
Status ValidateTokenizerDirParam(const std::string &tokenizer_name, const std::string &tokenizer_file);
|
||||
|
||||
// Helper function to validate data type passed by user
|
||||
bool IsTypeNumeric(const std::string &data_type);
|
||||
|
||||
// Helper function to validate data type is boolean
|
||||
bool IsTypeBoolean(const std::string &data_type);
|
||||
|
||||
// Helper function to validate data type is string
|
||||
bool IsTypeString(const std::string &data_type);
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_TEXT_IR_VALIDATORS_H_
|
Loading…
Reference in new issue