From c410c6a6e2c48b03fc97f0475eafee7305f43b12 Mon Sep 17 00:00:00 2001 From: mayang Date: Sun, 16 Aug 2020 23:44:26 +0800 Subject: [PATCH] soft_dvpp_decode_resize_jpeg_op issue resolved, input size can be an int, break the constraints of input size being sequence (height, width) --- ..._dvpp_decode_random_crop_resize_jpeg_op.cc | 4 +- .../soft_dvpp_decode_resize_jpeg_op.cc | 38 +++++++++++++++++-- .../soft_dvpp_decode_resize_jpeg_op.h | 2 + .../dataset/transforms/vision/c_transforms.py | 2 +- tests/ut/python/dataset/test_soft_dvpp.py | 31 +++++++++++++++ 5 files changed, 70 insertions(+), 7 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc index 8980329a8c..0ae7195196 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc @@ -43,8 +43,8 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::GetCropInfo(const std::shared_ptrleft = x; crop_info->up = y; - crop_info->right = crop_info->left + crop_widht; - crop_info->down = crop_info->up + crop_heigh; + crop_info->right = crop_info->left + crop_widht - 1; + crop_info->down = crop_info->up + crop_heigh - 1; return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc index 84bb189ac2..3da38af095 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc @@ -15,6 +15,7 @@ */ #include "minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h" #include +#include #include "./utils/external_soft_dp.h" #include "opencv2/opencv.hpp" @@ -35,14 +36,32 @@ Status SoftDvppDecodeResizeJpegOp::Compute(const std::shared_ptr &input, SoftDpProcsessInfo info; info.input_buffer = static_cast(buffer); info.input_buffer_size = input->SizeInBytes(); - info.output_width = target_width_; - info.output_height = target_height_; + + int input_w = 0; + int input_h = 0; + RETURN_IF_NOT_OK(GetJpegImageInfo(input, &input_w, &input_h)); SoftDpCropInfo crop_info{0, 0, 0, 0}; - cv::Mat out_rgb_img(target_height_, target_width_, CV_8UC3); + if (target_width_ == 0) { + if (input_h < input_w) { + CHECK_FAIL_RETURN_UNEXPECTED(input_h != 0, "The input height is 0"); + info.output_height = target_height_; + info.output_width = static_cast(std::lround(static_cast(input_w) / input_h * info.output_height)); + } else { + CHECK_FAIL_RETURN_UNEXPECTED(input_w != 0, "The input width is 0"); + info.output_width = target_height_; + info.output_height = static_cast(std::lround(static_cast(input_h) / input_w * info.output_width)); + } + } else { + info.output_height = target_height_; + info.output_width = target_width_; + } + + cv::Mat out_rgb_img(info.output_height, info.output_width, CV_8UC3); info.output_buffer = out_rgb_img.data; - info.output_buffer_size = target_width_ * target_height_ * 3; + info.output_buffer_size = info.output_height * info.output_width * 3; + info.is_v_before_u = true; int ret = DecodeAndResizeJpeg(&info); std::string error_info("Soft dvpp DecodeAndResizeJpeg failed with return code: "); @@ -56,5 +75,16 @@ Status SoftDvppDecodeResizeJpegOp::Compute(const std::shared_ptr &input, } return Status::OK(); } + +Status SoftDvppDecodeResizeJpegOp::OutputShape(const std::vector &inputs, + std::vector &outputs) { + RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); + outputs.clear(); + TensorShape out({-1, -1, 3}); // we don't know what is output image size, but we know it should be 3 channels + if (inputs[0].Rank() == 1) outputs.emplace_back(out); + if (!outputs.empty()) return Status::OK(); + return Status(StatusCode::kUnexpectedError, "Input has a wrong shape"); +} + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h index 976c3ec53d..21bb54c222 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h @@ -18,6 +18,7 @@ #include #include +#include #include "minddata/dataset/core/tensor.h" #include "minddata/dataset/kernels/tensor_op.h" @@ -34,6 +35,7 @@ class SoftDvppDecodeResizeJpegOp : public TensorOp { ~SoftDvppDecodeResizeJpegOp() = default; Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; + Status OutputShape(const std::vector &inputs, std::vector &outputs) override; std::string Name() const override { return kSoftDvppDecodeReiszeJpegOp; } diff --git a/mindspore/dataset/transforms/vision/c_transforms.py b/mindspore/dataset/transforms/vision/c_transforms.py index d7f944adda..d7c0033c84 100644 --- a/mindspore/dataset/transforms/vision/c_transforms.py +++ b/mindspore/dataset/transforms/vision/c_transforms.py @@ -943,7 +943,7 @@ class SoftDvppDecodeResizeJpeg(cde.SoftDvppDecodeResizeJpegOp): @check_resize def __init__(self, size): if isinstance(size, int): - size = (size, size) + size = (size, 0) self.size = size super().__init__(*size) diff --git a/tests/ut/python/dataset/test_soft_dvpp.py b/tests/ut/python/dataset/test_soft_dvpp.py index 1c8cfa9c9c..6a5c93b13f 100644 --- a/tests/ut/python/dataset/test_soft_dvpp.py +++ b/tests/ut/python/dataset/test_soft_dvpp.py @@ -84,7 +84,38 @@ def test_soft_dvpp_decode_random_crop_resize_jpeg(plot=False): visualize_image(image1, image2, mse) num_iter += 1 +def test_soft_dvpp_decode_resize_jpeg_supplement(plot=False): + """ + Test SoftDvppDecodeResizeJpeg op + """ + logger.info("test_random_decode_resize_op") + + # First dataset + data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) + decode_op = vision.Decode() + resize_op = vision.Resize(256) + data1 = data1.map(input_columns=["image"], operations=[decode_op, resize_op]) + + # Second dataset + data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) + soft_dvpp_decode_resize_op = vision.SoftDvppDecodeResizeJpeg(256) + data2 = data2.map(input_columns=["image"], operations=soft_dvpp_decode_resize_op) + + num_iter = 0 + for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + if num_iter > 0: + break + image1 = item1["image"] + image2 = item2["image"] + mse = diff_mse(image1, image2) + assert mse <= 0.02 + logger.info("random_crop_decode_resize_op_{}, mse: {}".format(num_iter + 1, mse)) + if plot: + visualize_image(image1, image2, mse) + num_iter += 1 if __name__ == "__main__": test_soft_dvpp_decode_resize_jpeg(plot=True) test_soft_dvpp_decode_random_crop_resize_jpeg(plot=True) + test_soft_dvpp_decode_resize_jpeg_supplement(plot=True) + \ No newline at end of file