From d1bb9d470ee81847fd4d856cc4ba9491e8f257a5 Mon Sep 17 00:00:00 2001 From: shenwei41 Date: Tue, 16 Mar 2021 11:34:48 +0800 Subject: [PATCH] Add resizefiller --- .../ccsrc/minddata/dataset/api/vision.cc | 16 + .../minddata/dataset/include/vision_lite.h | 25 +- .../dataset/kernels/image/CMakeLists.txt | 1 + .../kernels/image/lite_cv/image_process.cc | 288 ++++++++++++++++++ .../kernels/image/lite_cv/image_process.h | 17 ++ .../dataset/kernels/image/lite_cv/lite_mat.h | 8 + .../dataset/kernels/image/lite_image_utils.cc | 37 +++ .../dataset/kernels/image/lite_image_utils.h | 9 + .../kernels/image/resize_preserve_ar_op.cc | 39 +++ .../kernels/image/resize_preserve_ar_op.h | 55 ++++ .../dataset/kernels/ir/vision/vision_ir.cc | 20 ++ .../dataset/kernels/ir/vision/vision_ir.h | 21 ++ .../minddata/dataset/kernels/tensor_op.h | 1 + .../dataset/liteapi/include/vision_lite.h | 23 ++ mindspore/lite/minddata/CMakeLists.txt | 3 + .../lite/minddata/example/CMakeLists.txt | 17 +- .../lite/minddata/example/testresize.cpp | 68 +++++ tests/ut/cpp/dataset/image_process_test.cc | 17 ++ 18 files changed, 661 insertions(+), 4 deletions(-) create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h create mode 100644 mindspore/lite/minddata/example/testresize.cpp diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index 0c50415e77..e371c5ed01 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -735,6 +735,22 @@ std::shared_ptr Resize::Parse(const MapTargetDevice &env) { return std::make_shared(data_->size_, data_->interpolation_); } +// ResizePreserveAR Transform Operation. +struct ResizePreserveAR::Data { + Data(int32_t height, int32_t width, int32_t img_orientation) + : height_(height), width_(width), img_orientation_(img_orientation) {} + int32_t height_; + int32_t width_; + int32_t img_orientation_; +}; + +ResizePreserveAR::ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation) + : data_(std::make_shared(height, width, img_orientation)) {} + +std::shared_ptr ResizePreserveAR::Parse() { + return std::make_shared(data_->height_, data_->width_, data_->img_orientation_); +} + #ifdef ENABLE_ANDROID // Rotate Transform Operation. Rotate::Rotate() {} diff --git a/mindspore/ccsrc/minddata/dataset/include/vision_lite.h b/mindspore/ccsrc/minddata/dataset/include/vision_lite.h index c8ccfe28cc..cf95b6ea20 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision_lite.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision_lite.h @@ -93,7 +93,7 @@ class CenterCrop final : public TensorTransform { /// \brief RGB2GRAY TensorTransform. /// \notes Convert RGB image or color image to grayscale image -class RGB2GRAY : public TensorTransform { +class RGB2GRAY final : public TensorTransform { public: /// \brief Constructor. RGB2GRAY() = default; @@ -244,6 +244,29 @@ class Resize final : public TensorTransform { std::shared_ptr data_; }; +/// \brief ResizePreserveAR TensorTransform. +/// \notes Keep the original picture ratio and fill the rest. +class ResizePreserveAR final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] height The height of image output value after resizing. + /// \param[in] width The width of image output value after resizing. + /// \param[in] img_orientation Angle method of image rotation. + ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation = 0); + + /// \brief Destructor. + ~ResizePreserveAR() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief Rotate TensorTransform. /// \notes Rotate the input image using a specified angle id. class Rotate final : public TensorTransform { diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt index 36bd1b3b5c..a16066a818 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt @@ -44,6 +44,7 @@ add_library(kernels-image OBJECT random_sharpness_op.cc rescale_op.cc resize_op.cc + resize_preserve_ar_op.cc rgb_to_gray_op.cc rgba_to_bgr_op.cc rgba_to_rgb_op.cc diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc index 057ec63d1b..bb3afa8f39 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc @@ -1692,5 +1692,293 @@ bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, Lit return true; } +void UpdateOrientationAfineMat(const LiteMat &src, int *rotationDstWidth, int *rotationDstHeight, float (*varM)[2][3], + int img_orientation) { + int srcOrientation = img_orientation; + if (IM_TOOL_EXIF_ORIENTATION_0_DEG_MIRROR == srcOrientation) { + (*varM)[0][0] *= -1; + (*varM)[0][2] += *rotationDstWidth - 1; + } else if ((IM_TOOL_EXIF_ORIENTATION_180_DEG == srcOrientation) || + (IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR == srcOrientation)) { + // 0, 1, 2 is the matrix index of varM + (*varM)[0][0] = -1; + (*varM)[0][1] = 0; + (*varM)[0][2] = *rotationDstWidth - 1; + (*varM)[1][0] = 0; + (*varM)[1][1] = -1; + (*varM)[1][2] = *rotationDstWidth - 1; + if (IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR == srcOrientation) { + /* with (*varM)irror */ + (*varM)[0][0] *= -1; + (*varM)[0][2] -= *rotationDstWidth - 1; + } + } else if ((IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR == srcOrientation) || + (IM_TOOL_EXIF_ORIENTATION_90_DEG == srcOrientation)) { + /* 90 Deg rotation */ + *rotationDstWidth = src.height_; + *rotationDstHeight = src.width_; + (*varM)[0][0] = 0; + (*varM)[0][1] = -1; + (*varM)[0][2] = *rotationDstWidth - 1; + (*varM)[1][0] = 1; + (*varM)[1][1] = 0; + (*varM)[1][2] = 0; + if (IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR == srcOrientation) { + /* with Mirror */ + (*varM)[0][1] *= -1; + (*varM)[0][2] -= *rotationDstWidth - 1; + } + } else if ((IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR == srcOrientation) || + (IM_TOOL_EXIF_ORIENTATION_270_DEG == srcOrientation)) { + /* 270 Deg rotation */ + *rotationDstWidth = src.height_; + *rotationDstHeight = src.width_; + (*varM)[0][0] = 0; + (*varM)[0][1] = 1; + (*varM)[0][2] = 0; + (*varM)[1][0] = -1; + (*varM)[1][1] = 0; + (*varM)[1][2] = *rotationDstWidth - 1; + if (IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR == srcOrientation) { + /* with Mirror */ + (*varM)[0][1] *= -1; + (*varM)[0][2] += *rotationDstWidth - 1; + } + } +} + +void ImageToolsConvertImage(const LiteMat &src, const LiteMat &dst, imageToolsImage_t *imageIn, + imageToolsImage_t *imageOut) { + imageIn->image_buff = src.data_ptr_; + imageIn->h = src.height_; + imageIn->w = src.width_; + imageIn->stride = src.width_; + imageIn->dataType = IM_TOOL_DATA_TYPE_UINT8; + + imageOut->image_buff = dst.data_ptr_; + imageOut->h = dst.height_; + imageOut->w = dst.width_; + imageOut->stride = dst.width_; + imageOut->dataType = IM_TOOL_DATA_TYPE_FLOAT; +} + +void InvAffine2x3(float M[2][3], float invM[][3]) { + float inv_det = M[0][0] * M[1][1] - M[1][0] * M[0][1]; + invM[1][1] = M[0][0] / inv_det; + invM[0][1] = -M[0][1] / inv_det; + invM[1][0] = -M[1][0] / inv_det; + invM[0][0] = M[1][1] / inv_det; + invM[0][2] = (M[0][1] * M[1][2] - M[1][1] * M[0][2]) / inv_det; + invM[1][2] = -(M[0][0] * M[1][2] - M[1][0] * M[0][2]) / inv_det; +} + +static float *CalDst(float *dst, float v1, float v2, float v3) { + *dst++ = v1; + *dst++ = v2; + *dst++ = v3; + return dst; +} + +static void ImageWarpAffineHWCFloat(imageToolsImage_t image, imageToolsImage_t warped_image, float invM[2][3]) { + // 3 is r, g, b + warped_image.stride *= 3; + image.stride *= 3; + + float *warped_image_buff = reinterpret_cast(warped_image.image_buff); + + float *image_buff = reinterpret_cast(image.image_buff); + for (int y0 = 0; y0 < warped_image.h; y0++) { + // Init pointers to start of rows + float *dst = warped_image_buff + y0 * warped_image.stride; + + for (int x0 = 0; x0 < warped_image.w; x0++) { + // number 0, 1, 2 is the index of MATRIX 'invM' + float fPosx = (static_cast(x0) * invM[0][0]) + (static_cast(y0) * invM[0][1]) + invM[0][2]; + float fPosy = (static_cast(x0) * invM[1][0]) + (static_cast(y0) * invM[1][1]) + invM[1][2]; + int iPosy = static_cast(fPosy + 2) - 2; // for floor like result until -2. + int iPosx = static_cast(fPosx + 2) - 2; // for floor like result until -2. + if ((iPosx < -1) || (iPosx >= image.w) || (iPosy < -1) || (iPosy >= image.h)) { + dst = CalDst(dst, 0.0f, 0.0f, 0.0f); + continue; + } + float fRsiduy = fPosy - iPosy; + float fRsidux = fPosx - iPosx; + float fOut0 = 0; + float fOut1 = 0; + float fOut2 = 0; + float *fTopeLeft = image_buff + iPosy * image.stride + iPosx * 3; + float fCoeff = 1 - fRsidux - fRsiduy + fRsidux * fRsiduy; + if ((iPosx >= 0) && (iPosy >= 0)) { + // number 0, 1, 2 is the index of MATRIX 'fTopeLeft' + fOut0 += fCoeff * fTopeLeft[0]; + fOut1 += fCoeff * fTopeLeft[1]; + fOut2 += fCoeff * fTopeLeft[2]; + } + float fSum = fCoeff; + fCoeff = fRsiduy - fRsidux * fRsiduy; + if ((iPosx >= 0) && (iPosy < image.h - 1)) { + // Image channel G and B could be accessed by adding number of 1, 2 + fOut0 += fCoeff * fTopeLeft[image.stride]; + fOut1 += fCoeff * fTopeLeft[image.stride + 1]; + fOut2 += fCoeff * fTopeLeft[image.stride + 2]; + } + fSum += fCoeff; + fCoeff = fRsidux - fRsidux * fRsiduy; + if ((iPosx < image.w - 1) && (iPosy >= 0)) { + // Image channel G and B could be accessed by adding number of 1, 2 + fOut0 += fCoeff * fTopeLeft[3]; + fOut1 += fCoeff * fTopeLeft[3 + 1]; + fOut2 += fCoeff * fTopeLeft[3 + 2]; + } + fSum += fCoeff; + if ((iPosx < image.w - 1) && (iPosy < image.h - 1)) { + // Image channel G and B could be accessed by adding number of 1, 2 + fOut0 += (1 - fSum) * fTopeLeft[image.stride + 3]; + fOut1 += (1 - fSum) * fTopeLeft[image.stride + 3 + 1]; + fOut2 += (1 - fSum) * fTopeLeft[image.stride + 3 + 2]; + } + dst = CalDst(dst, fOut0, fOut1, fOut2); + } + } +} + +static void ImageWarpAffineHWCUint8(imageToolsImage_t image, imageToolsImage_t warped_image, float invM[2][3]) { + // 3 is r, g, b + warped_image.stride *= 3; + image.stride *= 3; + float *warped_image_buff = reinterpret_cast(warped_image.image_buff); + + uint8_t *image_buff = reinterpret_cast(image.image_buff); + for (int y0 = 0; y0 < warped_image.h; y0++) { + // Init pointers to start of rows + float *dst = warped_image_buff + y0 * warped_image.stride; + + for (int x0 = 0; x0 < warped_image.w; x0++) { + float fPosx = (static_cast(x0) * invM[0][0]) + (static_cast(y0) * invM[0][1]) + invM[0][2]; + float fPosy = (static_cast(x0) * invM[1][0]) + (static_cast(y0) * invM[1][1]) + invM[1][2]; + + int iPosy = static_cast(fPosy + 2) - 2; // for floor like result until -2. + int iPosx = static_cast(fPosx + 2) - 2; // for floor like result until -2. + if ((iPosx < -1) || (iPosx >= image.w) || (iPosy < -1) || (iPosy >= image.h)) { + dst = CalDst(dst, 0.0f, 0.0f, 0.0f); + continue; + } + float fRsiduy = fPosy - iPosy; + float fRsidux = fPosx - iPosx; + float fOut0 = 0; + float fOut1 = 0; + float fOut2 = 0; + uint8_t *uiTopeLeft = image_buff + iPosy * image.stride + iPosx * 3; + float fCoeff = 1 - fRsidux - fRsiduy + fRsidux * fRsiduy; + if ((iPosx >= 0) && (iPosy >= 0)) { + // number 0, 1, 2 is the index of MATRIX round. + fOut0 += fCoeff * static_cast(uiTopeLeft[0]); + fOut1 += fCoeff * static_cast(uiTopeLeft[1]); + fOut2 += fCoeff * static_cast(uiTopeLeft[2]); + } + float fSum = fCoeff; + fCoeff = fRsiduy - fRsidux * fRsiduy; + if ((iPosx >= 0) && (iPosy < image.h - 1)) { + fOut0 += fCoeff * static_cast(uiTopeLeft[image.stride]); + fOut1 += fCoeff * static_cast(uiTopeLeft[image.stride + 1]); + fOut2 += fCoeff * static_cast(uiTopeLeft[image.stride + 2]); + } + fSum += fCoeff; + fCoeff = fRsidux - fRsidux * fRsiduy; + if ((iPosx < image.w - 1) && (iPosy >= 0)) { + fOut0 += fCoeff * static_cast(uiTopeLeft[3]); + fOut1 += fCoeff * static_cast(uiTopeLeft[3 + 1]); + fOut2 += fCoeff * static_cast(uiTopeLeft[3 + 2]); + } + fSum += fCoeff; + if ((iPosx < image.w - 1) && (iPosy < image.h - 1)) { + fOut0 += (1 - fSum) * static_cast(uiTopeLeft[image.stride + 3]); + fOut1 += (1 - fSum) * static_cast(uiTopeLeft[image.stride + 3 + 1]); + fOut2 += (1 - fSum) * static_cast(uiTopeLeft[image.stride + 3 + 2]); + } + dst = CalDst(dst, fOut0, fOut1, fOut2); + } + } +} + +int ImageWarpAffineHWC(imageToolsImage_t image, imageToolsImage_t warped_image, float M[2][3], bool bIsMInv) { + if ((IM_TOOL_DATA_TYPE_FLOAT != warped_image.dataType) || + ((IM_TOOL_DATA_TYPE_FLOAT != image.dataType) && (IM_TOOL_DATA_TYPE_UINT8 != image.dataType))) { + return IM_TOOL_RETURN_STATUS_INVALID_INPUT; + } + float invM[2][3]; + if (bIsMInv) { + for (int iy = 0; iy < 2; iy++) { + for (int ix = 0; ix < 3; ix++) { + invM[iy][ix] = M[iy][ix]; + } + } + } else { + InvAffine2x3(M, invM); + } + + if (IM_TOOL_DATA_TYPE_FLOAT == image.dataType) { + ImageWarpAffineHWCFloat(image, warped_image, invM); + } else { + ImageWarpAffineHWCUint8(image, warped_image, invM); + } + return IM_TOOL_RETURN_STATUS_SUCCESS; +} + +bool ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3], + float (*invM)[2][3], int img_orientation) { + if (dst.IsEmpty()) { + dst.Init(w, h, src.channel_, LDataType::FLOAT32); + } + // uint8_t *dst_ptr = dst; + float varM[2][3] = {{1.0, 0, 0}, {0, 1.0, 0}}; + float divisor = 2.0; + int rotationDstWidth = src.width_; + int rotationDstHeight = src.height_; + if (img_orientation > IM_TOOL_EXIF_ORIENTATION_0_DEG) { + UpdateOrientationAfineMat(src, &rotationDstWidth, &rotationDstHeight, &varM, img_orientation); + } + + /* Resize after orientation fix */ + float srcAR = static_cast(rotationDstWidth) / static_cast(rotationDstHeight); + float dstAR = static_cast(dst.width_) / static_cast(dst.height_); + auto dstActiveWidth = static_cast(dst.width_); + auto dstActiveHeight = static_cast(dst.height_); + float ratio, shiftW, shiftH; + if (srcAR < dstAR) { + ratio = static_cast(dst.height_) / static_cast(rotationDstHeight); + dstActiveWidth = static_cast(rotationDstWidth) * ratio; + } else { + ratio = static_cast(dst.width_) / static_cast(rotationDstWidth); + dstActiveHeight = static_cast(rotationDstHeight) * ratio; + } + shiftW = (static_cast(dst.width_) - dstActiveWidth) / divisor; + shiftH = (static_cast(dst.height_) - dstActiveHeight) / divisor; + for (auto &iy : varM) { + for (float &ix : iy) { + // cppcheck-suppress useStlAlgorithm + ix *= ratio; + } + } + + varM[0][2] += shiftW; + varM[1][2] += shiftH; + /* Resize and shift by affine transform */ + imageToolsImage_t imageIn, imageOut; + ImageToolsConvertImage(src, dst, &imageIn, &imageOut); + InvAffine2x3(varM, *invM); + int retVal = ImageWarpAffineHWC(imageIn, imageOut, *invM, true); + if (retVal != 0) { + return false; + } + + // 0, 1, 2 is the index of corresponding elem in ratioShiftWShiftH + (*ratioShiftWShiftH)[0] = ratio; + (*ratioShiftWShiftH)[1] = shiftW; + (*ratioShiftWShiftH)[2] = shiftH; + + return true; +} + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h index 324b9dcd96..08f4820d4c 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.h @@ -28,6 +28,19 @@ namespace mindspore { namespace dataset { #define CV_PI 3.1415926535897932384626433832795 +#define IM_TOOL_EXIF_ORIENTATION_0_DEG 1 +#define IM_TOOL_EXIF_ORIENTATION_0_DEG_MIRROR 2 +#define IM_TOOL_EXIF_ORIENTATION_180_DEG 3 +#define IM_TOOL_EXIF_ORIENTATION_180_DEG_MIRROR 4 +#define IM_TOOL_EXIF_ORIENTATION_90_DEG_MIRROR 5 +#define IM_TOOL_EXIF_ORIENTATION_90_DEG 6 +#define IM_TOOL_EXIF_ORIENTATION_270_DEG_MIRROR 7 +#define IM_TOOL_EXIF_ORIENTATION_270_DEG 8 +#define NUM_OF_RGB_CHANNELS 9 +#define IM_TOOL_DATA_TYPE_FLOAT (1) +#define IM_TOOL_DATA_TYPE_UINT8 (2) +#define IM_TOOL_RETURN_STATUS_SUCCESS (0) +#define IM_TOOL_RETURN_STATUS_INVALID_INPUT (1) #define INT16_CAST(X) \ static_cast(::std::min(::std::max(static_cast(X + (X >= 0.f ? 0.5f : -0.5f)), -32768), 32767)); @@ -140,6 +153,10 @@ bool Sobel(const LiteMat &src, LiteMat &dst, int flag_x, int flag_y, int ksize, /// \brief Convert RGB image or color image to grayscale image bool ConvertRgbToGray(const LiteMat &src, LDataType data_type, int w, int h, LiteMat &mat); +/// \brief Resize preserve AR with filler +bool ResizePreserveARWithFiller(LiteMat &src, LiteMat &dst, int h, int w, float (*ratioShiftWShiftH)[3], + float (*invM)[2][3], int img_orientation); + } // namespace dataset } // namespace mindspore #endif // IMAGE_PROCESS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h index c60566acc3..393d442c5e 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h @@ -63,6 +63,14 @@ struct Point { Point(float _x, float _y) : x(_x), y(_y) {} }; +typedef struct imageToolsImage { + int w; + int h; + int stride; + int dataType; + void *image_buff; +} imageToolsImage_t; + using BOOL_C1 = Chn1; using BOOL_C2 = Chn2; using BOOL_C3 = Chn3; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc index bc1078810a..68c273a9f9 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc @@ -421,6 +421,43 @@ Status Resize(const std::shared_ptr &input, std::shared_ptr *out return Status::OK(); } +Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation, + TensorRow *outputs) { + outputs->resize(3); + std::shared_ptr input = inputs[0]; + LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2], + const_cast(reinterpret_cast(input->GetBuffer())), + GetLiteCVDataType(input->type())); + + LiteMat lite_mat_dst; + std::shared_ptr image_tensor; + TensorShape new_shape = TensorShape({height, width, input->shape()[2]}); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, DataType(DataType::DE_FLOAT32), &image_tensor)); + uint8_t *buffer = reinterpret_cast(&(*image_tensor->begin())); + lite_mat_dst.Init(width, height, input->shape()[2], reinterpret_cast(buffer), LDataType::FLOAT32); + + float ratioShiftWShiftH[3] = {0}; + float invM[2][3] = {{0, 0, 0}, {0, 0, 0}}; + bool ret = + ResizePreserveARWithFiller(lite_mat_src, lite_mat_dst, height, width, &ratioShiftWShiftH, &invM, img_orientation); + CHECK_FAIL_RETURN_UNEXPECTED(ret, "Resize: bilinear resize failed."); + + std::shared_ptr ratio_tensor; + TensorShape ratio_shape = TensorShape({3}); + RETURN_IF_NOT_OK(Tensor::CreateFromMemory(ratio_shape, DataType(DataType::DE_FLOAT32), + reinterpret_cast(&ratioShiftWShiftH), &ratio_tensor)); + + std::shared_ptr invM_tensor; + TensorShape invM_shape = TensorShape({2, 3}); + RETURN_IF_NOT_OK(Tensor::CreateFromMemory(invM_shape, DataType(DataType::DE_FLOAT32), + reinterpret_cast(&invM), &invM_tensor)); + + (*outputs)[0] = image_tensor; + (*outputs)[1] = ratio_tensor; + (*outputs)[2] = invM_tensor; + return Status::OK(); +} + Status RgbToGray(const std::shared_ptr &input, std::shared_ptr *output) { if (input->Rank() != 3) { RETURN_STATUS_UNEXPECTED("RgbToGray: input image is not in shape of "); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h index c3dc9abdc2..e90fb64da9 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.h @@ -95,6 +95,15 @@ Status Resize(const std::shared_ptr &input, std::shared_ptr *out int32_t output_width, double fx = 0.0, double fy = 0.0, InterpolationMode mode = InterpolationMode::kLinear); +/// \brief Returns Resized image. +/// \param[in] inputs input TensorRow +/// \param[in] height Height of output +/// \param[in] width Width of output +/// \param[in] img_orientation Angle method of image rotation +/// \param[out] outputs Resized image of shape and same type as input +Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation, + TensorRow *outputs); + /// \brief Take in a 3 channel image in RBG to GRAY /// \param[in] input The input image /// \param[out] output The output image diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc new file mode 100644 index 0000000000..8e09463a35 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc @@ -0,0 +1,39 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "minddata/dataset/kernels/image/resize_preserve_ar_op.h" + +#ifdef ENABLE_ANDROID +#include "minddata/dataset/kernels/image/lite_image_utils.h" +#endif +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { +const int32_t ResizePreserveAROp::kDefImgorientation = 0; + +ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation) + : height_(height), width_(width), img_orientation_(img_orientation) {} + +Status ResizePreserveAROp::Compute(const TensorRow &inputs, TensorRow *outputs) { + IO_CHECK_VECTOR(inputs, outputs); +#ifdef ENABLE_ANDROID + return ResizePreserve(inputs, height_, width_, img_orientation_, outputs); +#endif + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h new file mode 100644 index 0000000000..d473c80c35 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h @@ -0,0 +1,55 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_RESIZE_PRESERVE_AR_OP_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_RESIZE_PRESERVE_AR_OP_H_ + +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" +#ifndef ENABLE_ANDROID +#include "minddata/dataset/kernels/image/image_utils.h" +#else +#include "minddata/dataset/kernels/image/lite_image_utils.h" +#endif +#include "minddata/dataset/kernels/tensor_op.h" +#include "minddata/dataset/util/status.h" + +namespace mindspore { +namespace dataset { +class ResizePreserveAROp : public TensorOp { + public: + // Default values, also used by python_bindings.cc + static const int32_t kDefImgorientation; + + ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgorientation); + + ~ResizePreserveAROp() override = default; + + Status Compute(const TensorRow &input, TensorRow *output) override; + + std::string Name() const override { return kResizePreserveAROp; } + + protected: + int32_t height_; + int32_t width_; + int32_t img_orientation_; +}; +} // namespace dataset +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_KERNELS_IMAGE_RESIZE_PRESERVE_AR_OP_H_ diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc index 00fbde7c28..bb023de165 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.cc @@ -72,6 +72,7 @@ #include "minddata/dataset/kernels/image/rgba_to_bgr_op.h" #include "minddata/dataset/kernels/image/rgba_to_rgb_op.h" #endif +#include "minddata/dataset/kernels/image/resize_preserve_ar_op.h" #include "minddata/dataset/kernels/image/rgb_to_gray_op.h" #include "minddata/dataset/kernels/image/rotate_op.h" #ifndef ENABLE_ANDROID @@ -1421,6 +1422,25 @@ Status ResizeOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +// ResizePreserveAROperation +ResizePreserveAROperation::ResizePreserveAROperation(int32_t height, int32_t width, int32_t img_orientation) + : height_(height), width_(width), img_orientation_(img_orientation) {} + +Status ResizePreserveAROperation::ValidateParams() { return Status::OK(); } + +std::shared_ptr ResizePreserveAROperation::Build() { + return std::make_shared(height_, width_, img_orientation_); +} + +Status ResizePreserveAROperation::to_json(nlohmann::json *out_json) { + nlohmann::json args; + args["height"] = height_; + args["width"] = width_; + args["img_orientation"] = img_orientation_; + *out_json = args; + return Status::OK(); +} + // RotateOperation RotateOperation::RotateOperation() { rotate_op = std::make_shared(0); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.h index b18ca219f5..80bfd1fde5 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vision_ir.h @@ -71,6 +71,7 @@ constexpr char kRandomVerticalFlipOperation[] = "RandomVerticalFlip"; constexpr char kRandomVerticalFlipWithBBoxOperation[] = "RandomVerticalFlipWithBBox"; constexpr char kRescaleOperation[] = "Rescale"; constexpr char kResizeOperation[] = "Resize"; +constexpr char kResizePreserveAROperation[] = "ResizePreserveAR"; constexpr char kResizeWithBBoxOperation[] = "ResizeWithBBox"; constexpr char kRgbaToBgrOperation[] = "RgbaToBgr"; constexpr char kRgbaToRgbOperation[] = "RgbaToRgb"; @@ -781,6 +782,26 @@ class ResizeOperation : public TensorOperation { InterpolationMode interpolation_; }; +class ResizePreserveAROperation : public TensorOperation { + public: + ResizePreserveAROperation(int32_t height, int32_t width, int32_t img_orientation); + + ~ResizePreserveAROperation() = default; + + std::shared_ptr Build() override; + + Status ValidateParams() override; + + std::string Name() const override { return kResizePreserveAROperation; } + + Status to_json(nlohmann::json *out_json) override; + + private: + int32_t height_; + int32_t width_; + int32_t img_orientation_; +}; + class ResizeWithBBoxOperation : public TensorOperation { public: explicit ResizeWithBBoxOperation(std::vector size, InterpolationMode interpolation_mode); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h index b564376d76..2f919bbe14 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h @@ -94,6 +94,7 @@ constexpr char kRandomVerticalFlipWithBBoxOp[] = "RandomVerticalFlipWithBBoxOp"; constexpr char kRescaleOp[] = "RescaleOp"; constexpr char kResizeBilinearOp[] = "ResizeBilinearOp"; constexpr char kResizeOp[] = "ResizeOp"; +constexpr char kResizePreserveAROp[] = "ResizePreserveAROp"; constexpr char kResizeWithBBoxOp[] = "ResizeWithBBoxOp"; constexpr char kRgbaToBgrOp[] = "RgbaToBgrOp"; constexpr char kRgbaToRgbOp[] = "RgbaToRgbOp"; diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h index 6587e394d5..06d6797d2d 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h @@ -88,6 +88,29 @@ class CenterCrop : public TensorTransform { std::shared_ptr data_; }; +/// \brief ResizePreserveAR TensorTransform. +/// \notes Keep the original picture ratio and fill the rest. +class ResizePreserveAR final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] height The height of image output value after resizing. + /// \param[in] width The width of image output value after resizing. + /// \param[in] img_orientation Angle method of image rotation. + ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation = 0); + + /// \brief Destructor. + ~ResizePreserveAR() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + /// \brief RGB2GRAY TensorTransform. /// \notes Convert RGB image or color image to grayscale image class RGB2GRAY : public TensorTransform { diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt index 58fca90347..28ad8cc5b9 100644 --- a/mindspore/lite/minddata/CMakeLists.txt +++ b/mindspore/lite/minddata/CMakeLists.txt @@ -199,6 +199,7 @@ if(BUILD_MINDDATA STREQUAL "full") ${MINDDATA_DIR}/kernels/image/decode_op.cc ${MINDDATA_DIR}/kernels/image/normalize_op.cc ${MINDDATA_DIR}/kernels/image/resize_op.cc + ${MINDDATA_DIR}/kernels/image/resize_preserve_ar_op.cc ${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc ${MINDDATA_DIR}/kernels/image/rotate_op.cc ${MINDDATA_DIR}/kernels/image/random_affine_op.cc @@ -282,6 +283,7 @@ elseif(BUILD_MINDDATA STREQUAL "wrapper") ${MINDDATA_DIR}/kernels/image/crop_op.cc ${MINDDATA_DIR}/kernels/image/normalize_op.cc ${MINDDATA_DIR}/kernels/image/resize_op.cc + ${MINDDATA_DIR}/kernels/image/resize_preserve_ar_op.cc.cc ${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc ${MINDDATA_DIR}/kernels/image/rotate_op.cc ${MINDDATA_DIR}/kernels/data/compose_op.cc @@ -381,6 +383,7 @@ elseif(BUILD_MINDDATA STREQUAL "lite") "${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc" "${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc" "${MINDDATA_DIR}/kernels/image/rescale_op.cc" + "${MINDDATA_DIR}/kernels/image/resize_preserve_ar_op.cc" "${MINDDATA_DIR}/kernels/image/rgb_to_gray_op.cc" "${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc" "${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc" diff --git a/mindspore/lite/minddata/example/CMakeLists.txt b/mindspore/lite/minddata/example/CMakeLists.txt index 8464bc7631..e4745bb583 100644 --- a/mindspore/lite/minddata/example/CMakeLists.txt +++ b/mindspore/lite/minddata/example/CMakeLists.txt @@ -1,11 +1,11 @@ cmake_minimum_required(VERSION 3.14.1) project(testlenet) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -fPIC") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -fPIC -std=c++17") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") -set(MD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.1.0-inference-linux-x64/minddata") -set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.1.0-inference-linux-x64/") +set(MD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-inference-linux-x64/minddata") +set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-inference-linux-x64/") include_directories(${MD_DIR}) include_directories(${MS_DIR}) @@ -16,6 +16,17 @@ add_executable(testlenet ) target_link_libraries(testlenet + ${MD_DIR}/lib/libminddata-lite.so + ${MD_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62 + ${MD_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0 + ${MS_DIR}/lib/libmindspore-lite.so + pthread) + +add_executable(testresize + ${CMAKE_CURRENT_SOURCE_DIR}/testresize.cpp + ) + +target_link_libraries(testresize ${MD_DIR}/lib/libminddata-lite.so ${MD_DIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62 ${MD_DIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0 diff --git a/mindspore/lite/minddata/example/testresize.cpp b/mindspore/lite/minddata/example/testresize.cpp new file mode 100644 index 0000000000..d54dc93f1b --- /dev/null +++ b/mindspore/lite/minddata/example/testresize.cpp @@ -0,0 +1,68 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "include/datasets.h" +#include "include/iterator.h" +#include "include/vision_lite.h" +#include "include/transforms.h" +#include "include/api/types.h" + +using mindspore::dataset::Album; +using mindspore::dataset::Dataset; +using mindspore::dataset::Iterator; +using mindspore::dataset::SequentialSampler; +using mindspore::dataset::TensorTransform; +using mindspore::dataset::vision::ResizePreserveAR; + + +int main(int argc, char **argv) { + std::string folder_path = "./testAlbum/images"; + std::string schema_file = "./testAlbum/datasetSchema.json"; + std::vector column_names = {"image", "label", "id"}; + + // Create a Album Dataset + std::shared_ptr ds = + Album(folder_path, schema_file, column_names, true, std::make_shared(0, 1)); + ds = ds->SetNumWorkers(1); + + std::shared_ptr resize(new ResizePreserveAR(1000, 1000)); + ds = ds->Map({resize}, {"image"}, {"image", "ratio", "invM"}); + + std::shared_ptr iter = ds->CreateIterator(); + + std::unordered_map row; + iter->GetNextRow(&row); + + uint64_t i = 0; + while (row.size() != 0) { + i++; + iter->GetNextRow(&row); + } + + iter->Stop(); +} diff --git a/tests/ut/cpp/dataset/image_process_test.cc b/tests/ut/cpp/dataset/image_process_test.cc index 437f41affc..dc83ba4521 100644 --- a/tests/ut/cpp/dataset/image_process_test.cc +++ b/tests/ut/cpp/dataset/image_process_test.cc @@ -1736,3 +1736,20 @@ TEST_F(MindDataImageProcess, testConvertRgbToGray) { cv::imwrite("./mindspore_image.jpg", dst_image); CompareMat(rgb_mat, lite_mat_gray); } + +TEST_F(MindDataImageProcess, testResizePreserveARWithFillerv) { + std::string filename = "data/dataset/apple.jpg"; + cv::Mat image = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR); + + LiteMat lite_mat_rgb; + lite_mat_rgb.Init(image.cols, image.rows, image.channels(), image.data, LDataType::UINT8); + LiteMat lite_mat_resize; + float ratioShiftWShiftH[3] = {0}; + float invM[2][3] = {{0, 0, 0}, {0, 0, 0}}; + int h = 1000; + int w = 1000; + bool ret = ResizePreserveARWithFiller(lite_mat_rgb, lite_mat_resize, h, w, &ratioShiftWShiftH, &invM, 0); + ASSERT_TRUE(ret == true); + cv::Mat dst_image(lite_mat_resize.height_, lite_mat_resize.width_, CV_32FC3, lite_mat_resize.data_ptr_); + cv::imwrite("./mindspore_image.jpg", dst_image); +}