From 2474a0237f98a922075f8ed14ecaa546b997a85f Mon Sep 17 00:00:00 2001 From: Zhenglong Li Date: Mon, 8 Mar 2021 19:25:08 +0800 Subject: [PATCH] Fix dvpp input validation and Aipp config generation issue --- .../ccsrc/minddata/dataset/api/execute.cc | 49 ++++++++++++----- .../minddata/dataset/core/ascend_resource.cc | 5 ++ .../ccsrc/minddata/dataset/core/de_tensor.cc | 4 +- .../minddata/dataset/core/device_tensor.cc | 55 +++++++++++++++++++ .../minddata/dataset/core/device_tensor.h | 9 +++ tests/st/cpp/dataset/test_de.cc | 50 +++++++++++------ 6 files changed, 139 insertions(+), 33 deletions(-) diff --git a/mindspore/ccsrc/minddata/dataset/api/execute.cc b/mindspore/ccsrc/minddata/dataset/api/execute.cc index c4fb4c8c67..602b910b4a 100644 --- a/mindspore/ccsrc/minddata/dataset/api/execute.cc +++ b/mindspore/ccsrc/minddata/dataset/api/execute.cc @@ -32,6 +32,7 @@ #endif #ifdef ENABLE_ACL #include "minddata/dataset/core/ascend_resource.h" +#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" #include "minddata/dataset/kernels/ir/vision/ascend_vision_ir.h" #endif @@ -42,6 +43,15 @@ using json = nlohmann::json; struct Execute::ExtraInfo { std::multimap> aipp_cfg_; bool init_with_shared_ptr_ = true; // Initial execute object with shared_ptr as default +#ifdef ENABLE_ACL + std::multimap op2para_map_ = {{vision::kDvppCropJpegOperation, "size"}, + {vision::kDvppDecodeResizeOperation, "size"}, + {vision::kDvppDecodeResizeCropOperation, "crop_size"}, + {vision::kDvppDecodeResizeCropOperation, "resize_size"}, + {vision::kDvppNormalizeOperation, "mean"}, + {vision::kDvppNormalizeOperation, "std"}, + {vision::kDvppResizeJpegOperation, "size"}}; +#endif }; // FIXME - Temporarily overload Execute to support both TensorOperation and TensorTransform @@ -221,6 +231,7 @@ Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor // Parse TensorTransform transforms_ into TensorOperation ops_ if (info_->init_with_shared_ptr_) { RETURN_IF_NOT_OK(ParseTransforms_()); + info_->init_with_shared_ptr_ = false; } CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided"); @@ -285,11 +296,13 @@ Status Execute::operator()(const mindspore::MSTensor &input, mindspore::MSTensor device_input = std::move(device_output); } CHECK_FAIL_RETURN_UNEXPECTED(device_input->HasDeviceData(), "Apply transform failed, output tensor has no data"); - std::shared_ptr host_output; + // TODO(lizhenglong) waiting for computing department development, hence we pop data onto host temporarily. - RETURN_IF_NOT_OK(device_resource_->Pop(device_input, &host_output)); - *output = mindspore::MSTensor(std::make_shared(host_output)); - // *output = mindspore::MSTensor(std::make_shared(device_input, true)); Use in the future + // std::shared_ptr host_output; + // RETURN_IF_NOT_OK(device_resource_->Pop(device_input, &host_output)); + // *output = mindspore::MSTensor(std::make_shared(host_output)); + + *output = mindspore::MSTensor(std::make_shared(device_input, true)); #endif } return Status::OK(); @@ -306,6 +319,7 @@ Status Execute::operator()(const std::vector &input_tensor_list, std:: // Parse TensorTransform transforms_ into TensorOperation ops_ if (info_->init_with_shared_ptr_) { RETURN_IF_NOT_OK(ParseTransforms_()); + info_->init_with_shared_ptr_ = false; } CHECK_FAIL_RETURN_UNEXPECTED(!ops_.empty(), "Input TensorOperation should be provided"); @@ -386,6 +400,7 @@ Status Execute::operator()(const std::vector &input_tensor_list, std:: std::vector AippSizeFilter(const std::vector &resize_para, const std::vector &crop_para) { std::vector aipp_size; + // Special condition where (no Crop and no Resize) or (no Crop and resize with fixed ratio) will lead to dynamic input if ((resize_para.size() == 0 || resize_para.size() == 1) && crop_para.size() == 0) { aipp_size = {0, 0}; @@ -408,6 +423,11 @@ std::vector AippSizeFilter(const std::vector &resize_para, c : crop_para; } } + +#ifdef ENABLE_ACL + aipp_size[0] = DVPP_ALIGN_UP(aipp_size[0], VPC_HEIGHT_ALIGN); // H + aipp_size[1] = DVPP_ALIGN_UP(aipp_size[1], VPC_WIDTH_ALIGN); // W +#endif return aipp_size; } @@ -489,6 +509,7 @@ std::string Execute::AippCfgGenerator() { #ifdef ENABLE_ACL if (info_->init_with_shared_ptr_) { ParseTransforms_(); + info_->init_with_shared_ptr_ = false; } std::vector paras; // Record the parameters value of each Ascend operators for (int32_t i = 0; i < ops_.size(); i++) { @@ -501,15 +522,9 @@ std::string Execute::AippCfgGenerator() { // Define map between operator name and parameter name ops_[i]->to_json(&ir_info); - std::multimap op_list = {{vision::kDvppCropJpegOperation, "size"}, - {vision::kDvppDecodeResizeOperation, "size"}, - {vision::kDvppDecodeResizeCropOperation, "crop_size"}, - {vision::kDvppDecodeResizeCropOperation, "resize_size"}, - {vision::kDvppNormalizeOperation, "mean"}, - {vision::kDvppNormalizeOperation, "std"}, - {vision::kDvppResizeJpegOperation, "size"}}; + // Collect the information of operators - for (auto pos = op_list.equal_range(ops_[i]->Name()); pos.first != pos.second; ++pos.first) { + for (auto pos = info_->op2para_map_.equal_range(ops_[i]->Name()); pos.first != pos.second; ++pos.first) { auto paras_key_word = pos.first->second; paras = ir_info[paras_key_word].get>(); info_->aipp_cfg_.insert(std::make_pair(ops_[i]->Name(), paras)); @@ -578,6 +593,11 @@ std::string Execute::AippCfgGenerator() { } outfile << "}"; outfile.close(); + } else { // For case GPU or CPU + outfile << "aipp_op {" << std::endl << "}"; + outfile.close(); + MS_LOG(WARNING) << "Your runtime environment is not Ascend310, this config file will lead to undefined behavior on " + "computing result. Please check that."; } #endif return config_location; @@ -608,8 +628,9 @@ Status Execute::ParseTransforms_() { } Status Execute::validate_device_() { - if (device_type_ != MapTargetDevice::kCpu && device_type_ != MapTargetDevice::kAscend310) { - std::string err_msg = "Your input device is not supported. (Option: CPU or Ascend310)"; + if (device_type_ != MapTargetDevice::kCpu && device_type_ != MapTargetDevice::kAscend310 && + device_type_ != MapTargetDevice::kGpu) { + std::string err_msg = "Your input device is not supported. (Option: CPU or GPU or Ascend310)"; MS_LOG(ERROR) << err_msg; RETURN_STATUS_UNEXPECTED(err_msg); } diff --git a/mindspore/ccsrc/minddata/dataset/core/ascend_resource.cc b/mindspore/ccsrc/minddata/dataset/core/ascend_resource.cc index aecf6c565a..4d18eb0911 100644 --- a/mindspore/ccsrc/minddata/dataset/core/ascend_resource.cc +++ b/mindspore/ccsrc/minddata/dataset/core/ascend_resource.cc @@ -18,6 +18,7 @@ #include "include/api/types.h" #include "minddata/dataset/include/type_id.h" #include "minddata/dataset/core/ascend_resource.h" +#include "minddata/dataset/kernels/image/image_utils.h" namespace mindspore { namespace dataset { @@ -59,6 +60,10 @@ Status AscendResource::Sink(const mindspore::MSTensor &host_input, std::shared_p MSTypeToDEType(static_cast(host_input.DataType())), (const uchar *)(host_input.Data().get()), &de_input); RETURN_IF_NOT_OK(rc); + if (!IsNonEmptyJPEG(de_input)) { + RETURN_STATUS_UNEXPECTED("Dvpp operators can only support processing JPEG image"); + } + APP_ERROR ret = processor_->H2D_Sink(de_input, *device_input); if (ret != APP_ERR_OK) { ascend_resource_->Release(); diff --git a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc index 8ac6fd5f77..d65306cbf6 100644 --- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc @@ -87,8 +87,8 @@ const std::vector &DETensor::Shape() const { return shape_; } std::shared_ptr DETensor::Data() const { #ifndef ENABLE_ANDROID if (is_device_) { - MS_LOG(ERROR) << "Data() always return the data on the host."; - return nullptr; + ASSERT_NULL(device_tensor_impl_); + return std::shared_ptr(device_tensor_impl_->GetHostBuffer(), [](const void *) {}); } #endif return std::shared_ptr(tensor_impl_->GetBuffer(), [](const void *) {}); diff --git a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc index fc048eb219..65d5bbf615 100644 --- a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc @@ -16,6 +16,9 @@ #include "minddata/dataset/core/global_context.h" #include "minddata/dataset/core/device_tensor.h" +#ifdef ENABLE_ACL +#include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h" +#endif #include "minddata/dataset/util/status.h" namespace mindspore { @@ -25,6 +28,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten std::shared_ptr global_pool = GlobalContext::Instance()->mem_pool(); data_allocator_ = std::make_unique>(global_pool); device_data_type_ = type; + host_data_tensor_ = nullptr; } Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr *out) { @@ -80,6 +84,20 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data return Status::OK(); } +const unsigned char *DeviceTensor::GetHostBuffer() { +#ifdef ENABLE_ACL + Status rc = DataPop_(&host_data_tensor_); + if (!rc.IsOk()) { + MS_LOG(ERROR) << "Pop device data onto host fail, a nullptr will be returned"; + return nullptr; + } +#endif + if (!host_data_tensor_) { + return nullptr; + } + return host_data_tensor_->GetBuffer(); +} + uint8_t *DeviceTensor::GetDeviceBuffer() { return device_data_; } uint8_t *DeviceTensor::GetDeviceMutableBuffer() { return device_data_; } @@ -109,5 +127,42 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) { size_ = new_size; return Status::OK(); } + +#ifdef ENABLE_ACL +Status DeviceTensor::DataPop_(std::shared_ptr *host_tensor) { + void *resHostBuf = nullptr; + APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize()); + if (ret != APP_ERR_OK) { + MS_LOG(ERROR) << "Failed to allocate memory from host ret = " << ret; + return Status(StatusCode::kMDNoSpace); + } + std::shared_ptr outBuf(resHostBuf, aclrtFreeHost); + auto processedInfo_ = outBuf; + // Memcpy the output data from device to host + ret = aclrtMemcpy(outBuf.get(), this->DeviceDataSize(), this->GetDeviceBuffer(), this->DeviceDataSize(), + ACL_MEMCPY_DEVICE_TO_HOST); + if (ret != APP_ERR_OK) { + MS_LOG(ERROR) << "Failed to copy memory from device to host, ret = " << ret; + return Status(StatusCode::kMDOutOfMemory); + } + auto data = std::static_pointer_cast(processedInfo_); + unsigned char *ret_ptr = data.get(); + + mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize(); + const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1}); + uint32_t _output_width_ = this->GetYuvStrideShape()[0]; + uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1]; + uint32_t _output_height_ = this->GetYuvStrideShape()[2]; + uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3]; + const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8); + mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor); + (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_); + if (!(*host_tensor)->HasData()) { + return Status(StatusCode::kMCDeviceError); + } + MS_LOG(INFO) << "Successfully pop DeviceTensor data onto host"; + return Status::OK(); +} +#endif } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/core/device_tensor.h b/mindspore/ccsrc/minddata/dataset/core/device_tensor.h index 9490f578e0..32d0a7b63a 100644 --- a/mindspore/ccsrc/minddata/dataset/core/device_tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/device_tensor.h @@ -43,6 +43,8 @@ class DeviceTensor : public Tensor { const uint32_t &dataSize, const std::vector &attributes, std::shared_ptr *out); + const unsigned char *GetHostBuffer(); + uint8_t *GetDeviceBuffer(); uint8_t *GetDeviceMutableBuffer(); @@ -61,6 +63,10 @@ class DeviceTensor : public Tensor { Status SetYuvStrideShape_(const uint32_t &width, const uint32_t &widthStride, const uint32_t &height, const uint32_t &heightStride); +#ifdef ENABLE_ACL + Status DataPop_(std::shared_ptr *host_tensor); +#endif + std::vector YUV_shape_; // YUV_shape_ = {width, widthStride, height, heightStride} uint8_t *device_data_; @@ -68,6 +74,9 @@ class DeviceTensor : public Tensor { uint32_t size_; DataType device_data_type_; + + // We use this Tensor to store device_data when DeviceTensor pop onto host + std::shared_ptr host_data_tensor_; }; } // namespace dataset diff --git a/tests/st/cpp/dataset/test_de.cc b/tests/st/cpp/dataset/test_de.cc index c4b66f6889..5d24a8c333 100644 --- a/tests/st/cpp/dataset/test_de.cc +++ b/tests/st/cpp/dataset/test_de.cc @@ -67,7 +67,8 @@ TEST_F(TestDE, TestDvpp) { #ifdef ENABLE_ACL // Read images from target directory std::shared_ptr de_tensor; - mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); + Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); + ASSERT_TRUE(rc.IsOk()); auto image = MSTensor(std::make_shared(de_tensor)); // Define dvpp transform @@ -77,13 +78,13 @@ TEST_F(TestDE, TestDvpp) { mindspore::dataset::Execute Transform(decode_resize_crop, MapTargetDevice::kAscend310); // Apply transform on images - Status rc = Transform(image, &image); + rc = Transform(image, &image); std::string aipp_cfg = Transform.AippCfgGenerator(); ASSERT_EQ(aipp_cfg, "./aipp.cfg"); // Check image info ASSERT_TRUE(rc.IsOk()); - ASSERT_EQ(image.Shape().size(), 3); + ASSERT_EQ(image.Shape().size(), 2); int32_t real_h = 0; int32_t real_w = 0; int32_t remainder = crop_paras[crop_paras.size() - 1] % 16; @@ -94,15 +95,21 @@ TEST_F(TestDE, TestDvpp) { real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1; real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder; } - /* TODO Use in the future after compute college finish their job + ASSERT_EQ(image.Shape()[0], real_h); // For image in YUV format, each pixel takes 1.5 byte ASSERT_EQ(image.Shape()[1], real_w); ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); - */ + + ASSERT_TRUE(image.Data().get() != nullptr); + ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8); + ASSERT_EQ(image.IsDevice(), true); + + /* This is the criterion for previous method(Without pop) ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w); // For image in YUV format, each pixel takes 1.5 byte ASSERT_EQ(image.Shape()[1], 1); ASSERT_EQ(image.Shape()[2], 1); ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); + */ #endif } @@ -110,7 +117,8 @@ TEST_F(TestDE, TestDvppSinkMode) { #ifdef ENABLE_ACL // Read images from target directory std::shared_ptr de_tensor; - mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); + Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); + ASSERT_TRUE(rc.IsOk()); auto image = MSTensor(std::make_shared(de_tensor)); // Define dvpp transform @@ -123,11 +131,11 @@ TEST_F(TestDE, TestDvppSinkMode) { mindspore::dataset::Execute Transform(trans_list, MapTargetDevice::kAscend310); // Apply transform on images - Status rc = Transform(image, &image); + rc = Transform(image, &image); // Check image info ASSERT_TRUE(rc.IsOk()); - ASSERT_EQ(image.Shape().size(), 3); + ASSERT_EQ(image.Shape().size(), 2); int32_t real_h = 0; int32_t real_w = 0; int32_t remainder = crop_paras[crop_paras.size() - 1] % 16; @@ -138,10 +146,13 @@ TEST_F(TestDE, TestDvppSinkMode) { real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1; real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder; } - ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w); // For image in YUV format, each pixel takes 1.5 byte - ASSERT_EQ(image.Shape()[1], 1); - ASSERT_EQ(image.Shape()[2], 1); + ASSERT_EQ(image.Shape()[0], real_h); // For image in YUV format, each pixel takes 1.5 byte + ASSERT_EQ(image.Shape()[1], real_w); ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); + + ASSERT_TRUE(image.Data().get() != nullptr); + ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8); + ASSERT_EQ(image.IsDevice(), true); Transform.DeviceMemoryRelease(); #endif } @@ -149,7 +160,8 @@ TEST_F(TestDE, TestDvppSinkMode) { TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) { #ifdef ENABLE_ACL std::shared_ptr de_tensor; - mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); + Status rc = mindspore::dataset::Tensor::CreateFromFile("./data/dataset/apple.jpg", &de_tensor); + ASSERT_TRUE(rc.IsOk()); auto image = MSTensor(std::make_shared(de_tensor)); // Define dvpp transform @@ -170,11 +182,11 @@ TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) { ASSERT_EQ(aipp_cfg, "./aipp.cfg"); // Apply transform on images - Status rc = Transform(image, &image); + rc = Transform(image, &image); // Check image info ASSERT_TRUE(rc.IsOk()); - ASSERT_EQ(image.Shape().size(), 3); + ASSERT_EQ(image.Shape().size(), 2); int32_t real_h = 0; int32_t real_w = 0; int32_t remainder = crop_paras[crop_paras.size() - 1] % 16; @@ -185,10 +197,14 @@ TEST_F(TestDE, TestDvppDecodeResizeCropNormalize) { real_h = (crop_paras[0] % 2 == 0) ? crop_paras[0] : crop_paras[0] + 1; real_w = (remainder == 0) ? crop_paras[1] : crop_paras[1] + 16 - remainder; } - ASSERT_EQ(image.Shape()[0], 1.5 * real_h * real_w); // For image in YUV format, each pixel takes 1.5 byte - ASSERT_EQ(image.Shape()[1], 1); - ASSERT_EQ(image.Shape()[2], 1); + + ASSERT_EQ(image.Shape()[0], real_h); // For image in YUV format, each pixel takes 1.5 byte + ASSERT_EQ(image.Shape()[1], real_w); ASSERT_EQ(image.DataSize(), real_h * real_w * 1.5); + + ASSERT_TRUE(image.Data().get() != nullptr); + ASSERT_EQ(image.DataType(), mindspore::DataType::kNumberTypeUInt8); + ASSERT_EQ(image.IsDevice(), true); Transform.DeviceMemoryRelease(); #endif }