Delete the detection_output_op, which had been split into several operators. (#9121)
	
		
	
				
					
				
			
							parent
							
								
									26a9208394
								
							
						
					
					
						commit
						7c1a0b77a0
					
				| @ -1,21 +0,0 @@ | |||||||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 |  | ||||||
| 
 |  | ||||||
| Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| you may not use this file except in compliance with the License. |  | ||||||
| Indicesou may obtain a copy of the License at |  | ||||||
| 
 |  | ||||||
|     http://www.apache.org/licenses/LICENSE-2.0
 |  | ||||||
| 
 |  | ||||||
| Unless required by applicable law or agreed to in writing, software |  | ||||||
| distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| See the License for the specific language governing permissions and |  | ||||||
| limitations under the License. */ |  | ||||||
| 
 |  | ||||||
| #include "paddle/fluid/operators/detection_output_op.h" |  | ||||||
| 
 |  | ||||||
| namespace ops = paddle::operators; |  | ||||||
| REGISTER_OP_CUDA_KERNEL( |  | ||||||
|     detection_output, |  | ||||||
|     ops::DetectionOutputKernel<paddle::platform::CUDADeviceContext, float>, |  | ||||||
|     ops::DetectionOutputKernel<paddle::platform::CUDADeviceContext, double>); |  | ||||||
| @ -1,167 +0,0 @@ | |||||||
| /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 |  | ||||||
| 
 |  | ||||||
|    Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
|    you may not use this file except in compliance with the License. |  | ||||||
|    Indicesou may obtain a copy of the License at |  | ||||||
| 
 |  | ||||||
|    http://www.apache.org/licenses/LICENSE-2.0
 |  | ||||||
| 
 |  | ||||||
|    Unless required by applicable law or agreed to in writing, software |  | ||||||
|    distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
|    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
|    See the License for the specific language governing permissions and |  | ||||||
|    limitations under the License. */ |  | ||||||
| 
 |  | ||||||
| #pragma once |  | ||||||
| #include "paddle/fluid/framework/op_registry.h" |  | ||||||
| #include "paddle/fluid/framework/tensor.h" |  | ||||||
| #include "paddle/fluid/operators/math/detection_util.h" |  | ||||||
| #include "paddle/fluid/operators/math/math_function.h" |  | ||||||
| #include "paddle/fluid/operators/math/softmax.h" |  | ||||||
| #include "paddle/fluid/operators/strided_memcpy.h" |  | ||||||
| namespace paddle { |  | ||||||
| namespace operators { |  | ||||||
| template <typename DeviceContext, typename T> |  | ||||||
| inline void transpose_fun(const framework::ExecutionContext& context, |  | ||||||
|                           const framework::Tensor& src, |  | ||||||
|                           framework::Tensor* dst) { |  | ||||||
|   int input_nums = src.dims()[0]; |  | ||||||
|   int offset = 0; |  | ||||||
|   for (int j = 0; j < input_nums; ++j) { |  | ||||||
|     framework::Tensor in_p_tensor = src.Slice(j, j + 1); |  | ||||||
|     std::vector<int64_t> shape_vec( |  | ||||||
|         {in_p_tensor.dims()[0], in_p_tensor.dims()[1], in_p_tensor.dims()[3], |  | ||||||
|          in_p_tensor.dims()[4], in_p_tensor.dims()[2]}); |  | ||||||
|     framework::DDim shape(framework::make_ddim(shape_vec)); |  | ||||||
|     framework::Tensor in_p_tensor_transpose; |  | ||||||
|     in_p_tensor_transpose.mutable_data<T>(shape, context.GetPlace()); |  | ||||||
|     std::vector<int> shape_axis({0, 1, 3, 4, 2}); |  | ||||||
|     math::Transpose<DeviceContext, T, 5> trans5; |  | ||||||
|     trans5(context.template device_context<DeviceContext>(), in_p_tensor, |  | ||||||
|            &in_p_tensor_transpose, shape_axis); |  | ||||||
|     auto dst_stride = framework::stride(dst->dims()); |  | ||||||
|     auto src_stride = framework::stride(in_p_tensor_transpose.dims()); |  | ||||||
|     StridedMemcpy<T>(context.device_context(), in_p_tensor_transpose.data<T>(), |  | ||||||
|                      src_stride, in_p_tensor_transpose.dims(), dst_stride, |  | ||||||
|                      dst->data<T>() + offset); |  | ||||||
|     offset += in_p_tensor_transpose.dims()[4] * src_stride[4]; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
| template <typename DeviceContext, typename T> |  | ||||||
| class DetectionOutputKernel : public framework::OpKernel<T> { |  | ||||||
|  public: |  | ||||||
|   void Compute(const framework::ExecutionContext& context) const override { |  | ||||||
|     const framework::Tensor* in_loc = context.Input<framework::Tensor>("Loc"); |  | ||||||
|     const framework::Tensor* in_conf = context.Input<framework::Tensor>("Conf"); |  | ||||||
|     const framework::Tensor* in_priorbox = |  | ||||||
|         context.Input<framework::Tensor>("PriorBox"); |  | ||||||
|     auto* out = context.Output<framework::Tensor>("Out"); |  | ||||||
|     int num_classes = context.template Attr<int>("num_classes"); |  | ||||||
|     int top_k = context.template Attr<int>("top_k"); |  | ||||||
|     int nms_top_k = context.template Attr<int>("nms_top_k"); |  | ||||||
|     int background_label_id = context.template Attr<int>("background_label_id"); |  | ||||||
|     float nms_threshold = context.template Attr<float>("nms_threshold"); |  | ||||||
|     float confidence_threshold = |  | ||||||
|         context.template Attr<float>("confidence_threshold"); |  | ||||||
|     size_t batch_size = in_conf->dims()[1]; |  | ||||||
|     int conf_sum_size = in_conf->numel(); |  | ||||||
|     // for softmax
 |  | ||||||
|     std::vector<int64_t> conf_shape_softmax_vec( |  | ||||||
|         {conf_sum_size / num_classes, num_classes}); |  | ||||||
|     framework::DDim conf_shape_softmax( |  | ||||||
|         framework::make_ddim(conf_shape_softmax_vec)); |  | ||||||
|     // for knchw => nhwc
 |  | ||||||
|     std::vector<int64_t> loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3], |  | ||||||
|                                         in_loc->dims()[4], |  | ||||||
|                                         in_loc->dims()[2] * in_loc->dims()[0]}); |  | ||||||
|     std::vector<int64_t> conf_shape_vec( |  | ||||||
|         {1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4], |  | ||||||
|          in_conf->dims()[2] * in_conf->dims()[0]}); |  | ||||||
|     framework::DDim loc_shape(framework::make_ddim(loc_shape_vec)); |  | ||||||
|     framework::DDim conf_shape(framework::make_ddim(conf_shape_vec)); |  | ||||||
|     framework::Tensor loc_tensor; |  | ||||||
|     framework::Tensor conf_tensor; |  | ||||||
|     loc_tensor.mutable_data<T>(loc_shape, context.GetPlace()); |  | ||||||
|     conf_tensor.mutable_data<T>(conf_shape, context.GetPlace()); |  | ||||||
|     // for cpu
 |  | ||||||
|     framework::Tensor loc_cpu; |  | ||||||
|     framework::Tensor conf_cpu; |  | ||||||
|     framework::Tensor priorbox_cpu; |  | ||||||
|     const T* priorbox_data = in_priorbox->data<T>(); |  | ||||||
|     transpose_fun<DeviceContext, T>(context, *in_loc, &loc_tensor); |  | ||||||
|     transpose_fun<DeviceContext, T>(context, *in_conf, &conf_tensor); |  | ||||||
|     conf_tensor.Resize(conf_shape_softmax); |  | ||||||
|     math::SoftmaxFunctor<DeviceContext, T>()( |  | ||||||
|         context.template device_context<DeviceContext>(), &conf_tensor, |  | ||||||
|         &conf_tensor); |  | ||||||
|     T* loc_data = loc_tensor.data<T>(); |  | ||||||
|     T* conf_data = conf_tensor.data<T>(); |  | ||||||
|     if (platform::is_gpu_place(context.GetPlace())) { |  | ||||||
|       loc_cpu.mutable_data<T>(loc_tensor.dims(), platform::CPUPlace()); |  | ||||||
|       framework::TensorCopy(loc_tensor, platform::CPUPlace(), |  | ||||||
|                             context.device_context(), &loc_cpu); |  | ||||||
|       loc_data = loc_cpu.data<T>(); |  | ||||||
|       conf_cpu.mutable_data<T>(conf_tensor.dims(), platform::CPUPlace()); |  | ||||||
|       framework::TensorCopy(conf_tensor, platform::CPUPlace(), |  | ||||||
|                             context.device_context(), &conf_cpu); |  | ||||||
|       conf_data = conf_cpu.data<T>(); |  | ||||||
|       priorbox_cpu.mutable_data<T>(in_priorbox->dims(), platform::CPUPlace()); |  | ||||||
|       framework::TensorCopy(*in_priorbox, platform::CPUPlace(), |  | ||||||
|                             context.device_context(), &priorbox_cpu); |  | ||||||
|       priorbox_data = priorbox_cpu.data<T>(); |  | ||||||
|     } |  | ||||||
|     // get decode bboxes
 |  | ||||||
|     size_t num_priors = in_priorbox->numel() / 8; |  | ||||||
|     std::vector<std::vector<operators::math::BBox<T>>> all_decoded_bboxes; |  | ||||||
|     for (size_t n = 0; n < batch_size; ++n) { |  | ||||||
|       std::vector<operators::math::BBox<T>> decoded_bboxes; |  | ||||||
|       for (size_t i = 0; i < num_priors; ++i) { |  | ||||||
|         size_t prior_offset = i * 8; |  | ||||||
|         size_t loc_pred_offset = n * num_priors * 4 + i * 4; |  | ||||||
|         std::vector<math::BBox<T>> prior_bbox_vec; |  | ||||||
|         math::GetBBoxFromPriorData<T>(priorbox_data + prior_offset, 1, |  | ||||||
|                                       prior_bbox_vec); |  | ||||||
|         std::vector<std::vector<T>> prior_bbox_var; |  | ||||||
|         math::GetBBoxVarFromPriorData<T>(priorbox_data + prior_offset, 1, |  | ||||||
|                                          prior_bbox_var); |  | ||||||
|         std::vector<T> loc_pred_data; |  | ||||||
|         for (size_t j = 0; j < 4; ++j) |  | ||||||
|           loc_pred_data.push_back(*(loc_data + loc_pred_offset + j)); |  | ||||||
|         math::BBox<T> bbox = math::DecodeBBoxWithVar<T>( |  | ||||||
|             prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data); |  | ||||||
|         decoded_bboxes.push_back(bbox); |  | ||||||
|       } |  | ||||||
|       all_decoded_bboxes.push_back(decoded_bboxes); |  | ||||||
|     } |  | ||||||
|     std::vector<std::map<size_t, std::vector<size_t>>> all_indices; |  | ||||||
|     int num_kept = math::GetDetectionIndices<T>( |  | ||||||
|         conf_data, num_priors, num_classes, background_label_id, batch_size, |  | ||||||
|         confidence_threshold, nms_top_k, nms_threshold, top_k, |  | ||||||
|         all_decoded_bboxes, &all_indices); |  | ||||||
| 
 |  | ||||||
|     if (num_kept <= 0) { |  | ||||||
|       std::vector<int64_t> out_shape_vec({0, 0}); |  | ||||||
|       framework::DDim out_shape(framework::make_ddim(out_shape_vec)); |  | ||||||
|       out->Resize(out_shape); |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|     std::vector<int64_t> out_shape_vec({num_kept, 7}); |  | ||||||
|     framework::DDim out_shape(framework::make_ddim(out_shape_vec)); |  | ||||||
|     out->mutable_data<T>(out_shape, context.GetPlace()); |  | ||||||
|     framework::Tensor out_cpu; |  | ||||||
|     T* out_data = out->data<T>(); |  | ||||||
|     if (platform::is_gpu_place(context.GetPlace())) { |  | ||||||
|       out_cpu.mutable_data<T>(out->dims(), platform::CPUPlace()); |  | ||||||
|       out_data = out_cpu.data<T>(); |  | ||||||
|     } |  | ||||||
|     math::GetDetectionOutput<T>(conf_data, num_kept, num_priors, num_classes, |  | ||||||
|                                 batch_size, all_indices, all_decoded_bboxes, |  | ||||||
|                                 out_data); |  | ||||||
|     if (platform::is_gpu_place(context.GetPlace())) { |  | ||||||
|       framework::TensorCopy(out_cpu, platform::CUDAPlace(), |  | ||||||
|                             context.device_context(), out); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
| }  // namespace operators
 |  | ||||||
| }  // namespace paddle
 |  | ||||||
											
												
													File diff suppressed because it is too large
													Load Diff
												
											
										
									
								| @ -1,71 +0,0 @@ | |||||||
| #   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. |  | ||||||
| # |  | ||||||
| # Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| # you may not use this file except in compliance with the License. |  | ||||||
| # You may obtain a copy of the License at |  | ||||||
| # |  | ||||||
| #     http://www.apache.org/licenses/LICENSE-2.0 |  | ||||||
| # |  | ||||||
| # Unless required by applicable law or agreed to in writing, software |  | ||||||
| # distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| # See the License for the specific language governing permissions and |  | ||||||
| # limitations under the License. |  | ||||||
| 
 |  | ||||||
| import unittest |  | ||||||
| import numpy as np |  | ||||||
| from op_test import OpTest |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TestUnpoolOp(OpTest): |  | ||||||
|     def setUp(self): |  | ||||||
|         self.op_type = "detection_output" |  | ||||||
|         self.init_test_case() |  | ||||||
| 
 |  | ||||||
|         #loc.shape ((1, 4, 4, 1, 1)) |  | ||||||
|         #conf.shape ((1, 4, 2, 1, 1)) |  | ||||||
| 
 |  | ||||||
|         loc = np.array([[[[[0.1]], [[0.1]], [[0.1]], [[0.1]]], |  | ||||||
|                          [[[0.1]], [[0.1]], [[0.1]], [[0.1]]], |  | ||||||
|                          [[[0.1]], [[0.1]], [[0.1]], [[0.1]]], |  | ||||||
|                          [[[0.1]], [[0.1]], [[0.1]], [[0.1]]]]]) |  | ||||||
|         conf = np.array([[[[[0.1]], [[0.9]]], [[[0.2]], [[0.8]]], |  | ||||||
|                           [[[0.3]], [[0.7]]], [[[0.4]], [[0.6]]]]]) |  | ||||||
|         priorbox = np.array([ |  | ||||||
|             0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.6, 0.6, 0.1, |  | ||||||
|             0.1, 0.2, 0.2, 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, 0.4, 0.4, |  | ||||||
|             0.8, 0.8, 0.1, 0.1, 0.2, 0.2 |  | ||||||
|         ]) |  | ||||||
| 
 |  | ||||||
|         output = np.array([ |  | ||||||
|             0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031 |  | ||||||
|         ]) |  | ||||||
|         self.inputs = { |  | ||||||
|             'Loc': loc.astype('float32'), |  | ||||||
|             'Conf': conf.astype('float32'), |  | ||||||
|             'PriorBox': priorbox.astype('float32') |  | ||||||
|         } |  | ||||||
|         self.attrs = { |  | ||||||
|             'num_classes': self.num_classes, |  | ||||||
|             'top_k': self.top_k, |  | ||||||
|             'nms_top_k': self.nms_top_k, |  | ||||||
|             'background_label_id': self.background_label_id, |  | ||||||
|             'nms_threshold': self.nms_threshold, |  | ||||||
|             'confidence_threshold': self.confidence_threshold, |  | ||||||
|         } |  | ||||||
|         self.outputs = {'Out': output.astype('float32')} |  | ||||||
| 
 |  | ||||||
|     def test_check_output(self): |  | ||||||
|         self.check_output() |  | ||||||
| 
 |  | ||||||
|     def init_test_case(self): |  | ||||||
|         self.num_classes = 2 |  | ||||||
|         self.top_k = 10 |  | ||||||
|         self.nms_top_k = 20 |  | ||||||
|         self.background_label_id = 0 |  | ||||||
|         self.nms_threshold = 0.01 |  | ||||||
|         self.confidence_threshold = 0.01 |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| if __name__ == '__main__': |  | ||||||
|     unittest.main() |  | ||||||
					Loading…
					
					
				
		Reference in new issue