From 9ffdf0b6d4e7d9e4bfef5f5344d058263d1811dd Mon Sep 17 00:00:00 2001 From: shenwei41 Date: Wed, 4 Nov 2020 10:41:45 +0800 Subject: [PATCH] Add transform API --- .../ccsrc/minddata/dataset/api/vision.cc | 76 +++++++++++++ .../ccsrc/minddata/dataset/include/vision.h | 51 +++++++++ tests/ut/cpp/dataset/c_api_vision_test.cc | 106 ++++++++++++++++++ 3 files changed, 233 insertions(+) diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index 0d16605e1f..2b2cb3b190 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -22,6 +22,7 @@ // Kernel image headers (in alphabetical order) #ifndef ENABLE_ANDROID #include "minddata/dataset/kernels/image/auto_contrast_op.h" +#include "minddata/dataset/kernels/image/bounding_box_augment_op.h" #include "minddata/dataset/kernels/image/center_crop_op.h" #endif #include "minddata/dataset/kernels/image/crop_op.h" @@ -57,6 +58,7 @@ #endif #include "minddata/dataset/kernels/image/resize_op.h" #ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/image/resize_with_bbox_op.h" #include "minddata/dataset/kernels/image/rgba_to_bgr_op.h" #include "minddata/dataset/kernels/image/rgba_to_rgb_op.h" #include "minddata/dataset/kernels/image/swap_red_blue_op.h" @@ -82,6 +84,17 @@ std::shared_ptr AutoContrast(float cutoff, std::vector BoundingBoxAugment(std::shared_ptr transform, + float ratio) { + auto op = std::make_shared(transform, ratio); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + // Function to create CenterCropOperation. std::shared_ptr CenterCrop(std::vector size) { auto op = std::make_shared(size); @@ -381,6 +394,16 @@ std::shared_ptr Resize(std::vector size, Interpolation } #ifndef ENABLE_ANDROID +// Function to create ResizeWithBBoxOperation. +std::shared_ptr ResizeWithBBox(std::vector size, InterpolationMode interpolation) { + auto op = std::make_shared(size, interpolation); + // Input validation + if (!op->ValidateParams()) { + return nullptr; + } + return op; +} + // Function to create RgbaToBgrOperation. std::shared_ptr RGBA2BGR() { auto op = std::make_shared(); @@ -525,6 +548,30 @@ std::shared_ptr AutoContrastOperation::Build() { return tensor_op; } +// BoundingBoxAugmentOperation +BoundingBoxAugmentOperation::BoundingBoxAugmentOperation(std::shared_ptr transform, float ratio) + : transform_(transform), ratio_(ratio) {} + +Status BoundingBoxAugmentOperation::ValidateParams() { + if (transform_ == nullptr) { + std::string err_msg = "BoundingBoxAugment: transform must not be null."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + + if (ratio_ < 0.0 || ratio_ > 1.0) { + std::string err_msg = "BoundingBoxAugment: ratio has to be between 0.0 and 1.0, got: " + std::to_string(ratio_); + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + return Status::OK(); +} + +std::shared_ptr BoundingBoxAugmentOperation::Build() { + std::shared_ptr tensor_op = std::make_shared(transform_->Build(), ratio_); + return tensor_op; +} + // CenterCropOperation CenterCropOperation::CenterCropOperation(std::vector size) : size_(size) {} @@ -1634,6 +1681,35 @@ std::shared_ptr ResizeOperation::Build() { } #ifndef ENABLE_ANDROID +// ResizeWithBBoxOperation +ResizeWithBBoxOperation::ResizeWithBBoxOperation(std::vector size, InterpolationMode interpolation) + : size_(size), interpolation_(interpolation) {} + +Status ResizeWithBBoxOperation::ValidateParams() { + // size + if (size_.empty() || size_.size() > 2) { + std::string err_msg = + "ResizeWithBBox: size must be a vector of one or two values, got: " + std::to_string(size_.size()); + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + RETURN_IF_NOT_OK(ValidateVectorPositive("Resize", size_)); + + return Status::OK(); +} + +std::shared_ptr ResizeWithBBoxOperation::Build() { + int32_t height = size_[0]; + int32_t width = 0; + + // User specified the width value. + if (size_.size() == 2) { + width = size_[1]; + } + + return std::make_shared(height, width, interpolation_); +} + // RgbaToBgrOperation. RgbaToBgrOperation::RgbaToBgrOperation() {} diff --git a/mindspore/ccsrc/minddata/dataset/include/vision.h b/mindspore/ccsrc/minddata/dataset/include/vision.h index 348a194281..05d9ae1d82 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision.h @@ -32,6 +32,7 @@ namespace vision { // Transform Op classes (in alphabetical order) #ifndef ENABLE_ANDROID class AutoContrastOperation; +class BoundingBoxAugmentOperation; class CenterCropOperation; #endif class CropOperation; @@ -67,6 +68,7 @@ class RescaleOperation; #endif class ResizeOperation; #ifndef ENABLE_ANDROID +class ResizeWithBBoxOperation; class RgbaToBgrOperation; class RgbaToRgbOperation; class SwapRedBlueOperation; @@ -79,6 +81,14 @@ class UniformAugOperation; /// \return Shared pointer to the current TensorOperation. std::shared_ptr AutoContrast(float cutoff = 0.0, std::vector ignore = {}); +/// \brief Function to create a BoundingBoxAugment TensorOperation. +/// \notes Apply a given image transform on a random selection of bounding box regions of a given image. +/// \param[in] transform A TensorOperation transform. +/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3). +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr BoundingBoxAugment(std::shared_ptr transform, + float ratio = 0.3); + /// \brief Function to create a CenterCrop TensorOperation. /// \notes Crops the input image at the center to the given size. /// \param[in] size A vector representing the output size of the cropped image. @@ -360,6 +370,16 @@ std::shared_ptr Resize(std::vector size, InterpolationMode interpolation = InterpolationMode::kLinear); #ifndef ENABLE_ANDROID +/// \brief Function to create a ResizeWithBBox TensorOperation. +/// \notes Resize the input image to the given size and adjust bounding boxes accordingly. +/// \param[in] size The output size of the resized image. +/// If size is an integer, smaller edge of the image will be resized to this value with the same image aspect ratio. +/// If size is a sequence of length 2, it should be (height, width). +/// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear). +/// \return Shared pointer to the current TensorOperation. +std::shared_ptr ResizeWithBBox(std::vector size, + InterpolationMode interpolation = InterpolationMode::kLinear); + /// \brief Function to create a RgbaToBgr TensorOperation. /// \notes Changes the input 4 channel RGBA tensor to 3 channel BGR. /// \return Shared pointer to the current TensorOperation. @@ -400,6 +420,21 @@ class AutoContrastOperation : public TensorOperation { std::vector ignore_; }; +class BoundingBoxAugmentOperation : public TensorOperation { + public: + explicit BoundingBoxAugmentOperation(std::shared_ptr transform, float ratio = 0.3); + + ~BoundingBoxAugmentOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + private: + std::shared_ptr transform_; + float ratio_; +}; + class CenterCropOperation : public TensorOperation { public: explicit CenterCropOperation(std::vector size); @@ -829,6 +864,22 @@ class ResizeOperation : public TensorOperation { }; #ifndef ENABLE_ANDROID +class ResizeWithBBoxOperation : public TensorOperation { + public: + explicit ResizeWithBBoxOperation(std::vector size, + InterpolationMode interpolation_mode = InterpolationMode::kLinear); + + ~ResizeWithBBoxOperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + private: + std::vector size_; + InterpolationMode interpolation_; +}; + class RgbaToBgrOperation : public TensorOperation { public: RgbaToBgrOperation(); diff --git a/tests/ut/cpp/dataset/c_api_vision_test.cc b/tests/ut/cpp/dataset/c_api_vision_test.cc index ca37dd2204..9c8d4bfe92 100644 --- a/tests/ut/cpp/dataset/c_api_vision_test.cc +++ b/tests/ut/cpp/dataset/c_api_vision_test.cc @@ -137,6 +137,56 @@ TEST_F(MindDataTestPipeline, TestAutoContrastFail) { EXPECT_EQ(auto_contrast2, nullptr); } +TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBoundingBoxAugmentSuccess."; + // Create an VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds = VOC(folder_path, "Detection", "train", {}, true, SequentialSampler(0, 3)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr bound_box_augment = vision::BoundingBoxAugment(vision::RandomRotation({90.0}), 1.0); + EXPECT_NE(bound_box_augment, nullptr); + + // Create a Map operation on ds + ds = ds->Map({bound_box_augment}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 3); + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestBoundingBoxAugmentFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestBoundingBoxAugmentFail with invalid params."; + // Testing invalid ratio < 0.0 + std::shared_ptr bound_box_augment = vision::BoundingBoxAugment(vision::RandomRotation({90.0}), -1.0); + EXPECT_EQ(bound_box_augment, nullptr); + // Testing invalid ratio > 1.0 + std::shared_ptr bound_box_augment1 = vision::BoundingBoxAugment(vision::RandomRotation({90.0}), 2.0); + EXPECT_EQ(bound_box_augment1, nullptr); + // Testing invalid transform + std::shared_ptr bound_box_augment2 = vision::BoundingBoxAugment(nullptr, 0.5); + EXPECT_EQ(bound_box_augment2, nullptr); +} + TEST_F(MindDataTestPipeline, TestCenterCrop) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCenterCrop with single integer input."; @@ -2000,6 +2050,62 @@ TEST_F(MindDataTestPipeline, TestResizeFail) { EXPECT_EQ(resize_op, nullptr); } +TEST_F(MindDataTestPipeline, TestResizeWithBBoxSuccess) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestResizeWithBBoxSuccess."; + // Create an VOC Dataset + std::string folder_path = datasets_root_path_ + "/testVOC2012_2"; + std::shared_ptr ds = VOC(folder_path, "Detection", "train", {}, true, SequentialSampler(0, 3)); + EXPECT_NE(ds, nullptr); + + // Create objects for the tensor ops + std::shared_ptr resize_with_bbox_op = vision::ResizeWithBBox({30}); + EXPECT_NE(resize_with_bbox_op, nullptr); + + std::shared_ptr resize_with_bbox_op1 = vision::ResizeWithBBox({30, 30}); + EXPECT_NE(resize_with_bbox_op1, nullptr); + + // Create a Map operation on ds + ds = ds->Map({resize_with_bbox_op, resize_with_bbox_op1}, {"image", "bbox"}, {"image", "bbox"}, {"image", "bbox"}); + EXPECT_NE(ds, nullptr); + + // Create an iterator over the result of the above dataset + // This will trigger the creation of the Execution Tree and launch it. + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + + // Iterate the dataset and get each row + std::unordered_map> row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + auto image = row["image"]; + MS_LOG(INFO) << "Tensor image shape: " << image->shape(); + iter->GetNextRow(&row); + } + + EXPECT_EQ(i, 3); + // Manually terminate the pipeline + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestResizeWithBBoxFail) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestResizeWithBBoxFail with invalid parameters."; + // Testing negative resize value + std::shared_ptr resize_with_bbox_op = vision::ResizeWithBBox({10, -10}); + EXPECT_EQ(resize_with_bbox_op, nullptr); + // Testing negative resize value + std::shared_ptr resize_with_bbox_op1 = vision::ResizeWithBBox({-10}); + EXPECT_EQ(resize_with_bbox_op1, nullptr); + // Testinig zero resize value + std::shared_ptr resize_with_bbox_op2 = vision::ResizeWithBBox({0, 10}); + EXPECT_EQ(resize_with_bbox_op2, nullptr); + // Testing resize with 3 values + std::shared_ptr resize_with_bbox_op3 = vision::ResizeWithBBox({10, 10, 10}); + EXPECT_EQ(resize_with_bbox_op3, nullptr); +} + TEST_F(MindDataTestPipeline, TestRandomVerticalFlipWithBBoxSuccess) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomVerticalFlipWithBBoxSuccess."; // Create an VOC Dataset