Merge pull request #6488 from sweetsky0901/detection_output
add Detection output op for SSDdel_some_in_makelist
commit
90a33dddad
@ -0,0 +1,21 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
Indicesou may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/detection_output_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
detection_output,
|
||||
ops::DetectionOutputKernel<paddle::platform::CUDADeviceContext, float>,
|
||||
ops::DetectionOutputKernel<paddle::platform::CUDADeviceContext, double>);
|
@ -0,0 +1,167 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
Indicesou may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/framework/tensor.h"
|
||||
#include "paddle/operators/math/detection_util.h"
|
||||
#include "paddle/operators/math/math_function.h"
|
||||
#include "paddle/operators/math/softmax.h"
|
||||
#include "paddle/operators/strided_memcpy.h"
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
template <typename DeviceContext, typename T>
|
||||
inline void transpose_fun(const framework::ExecutionContext& context,
|
||||
const framework::Tensor& src,
|
||||
framework::Tensor* dst) {
|
||||
int input_nums = src.dims()[0];
|
||||
int offset = 0;
|
||||
for (int j = 0; j < input_nums; ++j) {
|
||||
framework::Tensor in_p_tensor = src.Slice(j, j + 1);
|
||||
std::vector<int64_t> shape_vec(
|
||||
{in_p_tensor.dims()[0], in_p_tensor.dims()[1], in_p_tensor.dims()[3],
|
||||
in_p_tensor.dims()[4], in_p_tensor.dims()[2]});
|
||||
framework::DDim shape(framework::make_ddim(shape_vec));
|
||||
framework::Tensor in_p_tensor_transpose;
|
||||
in_p_tensor_transpose.mutable_data<T>(shape, context.GetPlace());
|
||||
std::vector<int> shape_axis({0, 1, 3, 4, 2});
|
||||
math::Transpose<DeviceContext, T, 5> trans5;
|
||||
trans5(context.template device_context<DeviceContext>(), in_p_tensor,
|
||||
&in_p_tensor_transpose, shape_axis);
|
||||
auto dst_stride = framework::stride(dst->dims());
|
||||
auto src_stride = framework::stride(in_p_tensor_transpose.dims());
|
||||
StridedMemcpy<T>(context.device_context(), in_p_tensor_transpose.data<T>(),
|
||||
src_stride, in_p_tensor_transpose.dims(), dst_stride,
|
||||
dst->data<T>() + offset);
|
||||
offset += in_p_tensor_transpose.dims()[4] * src_stride[4];
|
||||
}
|
||||
}
|
||||
template <typename DeviceContext, typename T>
|
||||
class DetectionOutputKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
const framework::Tensor* in_loc = context.Input<framework::Tensor>("Loc");
|
||||
const framework::Tensor* in_conf = context.Input<framework::Tensor>("Conf");
|
||||
const framework::Tensor* in_priorbox =
|
||||
context.Input<framework::Tensor>("PriorBox");
|
||||
auto* out = context.Output<framework::Tensor>("Out");
|
||||
int num_classes = context.template Attr<int>("num_classes");
|
||||
int top_k = context.template Attr<int>("top_k");
|
||||
int nms_top_k = context.template Attr<int>("nms_top_k");
|
||||
int background_label_id = context.template Attr<int>("background_label_id");
|
||||
float nms_threshold = context.template Attr<float>("nms_threshold");
|
||||
float confidence_threshold =
|
||||
context.template Attr<float>("confidence_threshold");
|
||||
size_t batch_size = in_conf->dims()[1];
|
||||
int conf_sum_size = in_conf->numel();
|
||||
// for softmax
|
||||
std::vector<int64_t> conf_shape_softmax_vec(
|
||||
{conf_sum_size / num_classes, num_classes});
|
||||
framework::DDim conf_shape_softmax(
|
||||
framework::make_ddim(conf_shape_softmax_vec));
|
||||
// for knchw => nhwc
|
||||
std::vector<int64_t> loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3],
|
||||
in_loc->dims()[4],
|
||||
in_loc->dims()[2] * in_loc->dims()[0]});
|
||||
std::vector<int64_t> conf_shape_vec(
|
||||
{1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4],
|
||||
in_conf->dims()[2] * in_conf->dims()[0]});
|
||||
framework::DDim loc_shape(framework::make_ddim(loc_shape_vec));
|
||||
framework::DDim conf_shape(framework::make_ddim(conf_shape_vec));
|
||||
framework::Tensor loc_tensor;
|
||||
framework::Tensor conf_tensor;
|
||||
loc_tensor.mutable_data<T>(loc_shape, context.GetPlace());
|
||||
conf_tensor.mutable_data<T>(conf_shape, context.GetPlace());
|
||||
// for cpu
|
||||
framework::Tensor loc_cpu;
|
||||
framework::Tensor conf_cpu;
|
||||
framework::Tensor priorbox_cpu;
|
||||
const T* priorbox_data = in_priorbox->data<T>();
|
||||
transpose_fun<DeviceContext, T>(context, *in_loc, &loc_tensor);
|
||||
transpose_fun<DeviceContext, T>(context, *in_conf, &conf_tensor);
|
||||
conf_tensor.Resize(conf_shape_softmax);
|
||||
math::SoftmaxFunctor<DeviceContext, T>()(
|
||||
context.template device_context<DeviceContext>(), &conf_tensor,
|
||||
&conf_tensor);
|
||||
T* loc_data = loc_tensor.data<T>();
|
||||
T* conf_data = conf_tensor.data<T>();
|
||||
if (platform::is_gpu_place(context.GetPlace())) {
|
||||
loc_cpu.mutable_data<T>(loc_tensor.dims(), platform::CPUPlace());
|
||||
framework::CopyFrom(loc_tensor, platform::CPUPlace(),
|
||||
context.device_context(), &loc_cpu);
|
||||
loc_data = loc_cpu.data<T>();
|
||||
conf_cpu.mutable_data<T>(conf_tensor.dims(), platform::CPUPlace());
|
||||
framework::CopyFrom(conf_tensor, platform::CPUPlace(),
|
||||
context.device_context(), &conf_cpu);
|
||||
conf_data = conf_cpu.data<T>();
|
||||
priorbox_cpu.mutable_data<T>(in_priorbox->dims(), platform::CPUPlace());
|
||||
framework::CopyFrom(*in_priorbox, platform::CPUPlace(),
|
||||
context.device_context(), &priorbox_cpu);
|
||||
priorbox_data = priorbox_cpu.data<T>();
|
||||
}
|
||||
// get decode bboxes
|
||||
size_t num_priors = in_priorbox->numel() / 8;
|
||||
std::vector<std::vector<operators::math::BBox<T>>> all_decoded_bboxes;
|
||||
for (size_t n = 0; n < batch_size; ++n) {
|
||||
std::vector<operators::math::BBox<T>> decoded_bboxes;
|
||||
for (size_t i = 0; i < num_priors; ++i) {
|
||||
size_t prior_offset = i * 8;
|
||||
size_t loc_pred_offset = n * num_priors * 4 + i * 4;
|
||||
std::vector<math::BBox<T>> prior_bbox_vec;
|
||||
math::GetBBoxFromPriorData<T>(priorbox_data + prior_offset, 1,
|
||||
prior_bbox_vec);
|
||||
std::vector<std::vector<T>> prior_bbox_var;
|
||||
math::GetBBoxVarFromPriorData<T>(priorbox_data + prior_offset, 1,
|
||||
prior_bbox_var);
|
||||
std::vector<T> loc_pred_data;
|
||||
for (size_t j = 0; j < 4; ++j)
|
||||
loc_pred_data.push_back(*(loc_data + loc_pred_offset + j));
|
||||
math::BBox<T> bbox = math::DecodeBBoxWithVar<T>(
|
||||
prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data);
|
||||
decoded_bboxes.push_back(bbox);
|
||||
}
|
||||
all_decoded_bboxes.push_back(decoded_bboxes);
|
||||
}
|
||||
std::vector<std::map<size_t, std::vector<size_t>>> all_indices;
|
||||
int num_kept = math::GetDetectionIndices<T>(
|
||||
conf_data, num_priors, num_classes, background_label_id, batch_size,
|
||||
confidence_threshold, nms_top_k, nms_threshold, top_k,
|
||||
all_decoded_bboxes, &all_indices);
|
||||
|
||||
if (num_kept <= 0) {
|
||||
std::vector<int64_t> out_shape_vec({0, 0});
|
||||
framework::DDim out_shape(framework::make_ddim(out_shape_vec));
|
||||
out->Resize(out_shape);
|
||||
return;
|
||||
}
|
||||
std::vector<int64_t> out_shape_vec({num_kept, 7});
|
||||
framework::DDim out_shape(framework::make_ddim(out_shape_vec));
|
||||
out->mutable_data<T>(out_shape, context.GetPlace());
|
||||
framework::Tensor out_cpu;
|
||||
T* out_data = out->data<T>();
|
||||
if (platform::is_gpu_place(context.GetPlace())) {
|
||||
out_cpu.mutable_data<T>(out->dims(), platform::CPUPlace());
|
||||
out_data = out_cpu.data<T>();
|
||||
}
|
||||
math::GetDetectionOutput<T>(conf_data, num_kept, num_priors, num_classes,
|
||||
batch_size, all_indices, all_decoded_bboxes,
|
||||
out_data);
|
||||
if (platform::is_gpu_place(context.GetPlace())) {
|
||||
framework::CopyFrom(out_cpu, platform::CUDAPlace(),
|
||||
context.device_context(), out);
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,57 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from op_test import OpTest
|
||||
|
||||
|
||||
class TestUnpoolOp(OpTest):
|
||||
def setUp(self):
|
||||
self.op_type = "detection_output"
|
||||
self.init_test_case()
|
||||
|
||||
#loc.shape ((1, 4, 4, 1, 1))
|
||||
#conf.shape ((1, 4, 2, 1, 1))
|
||||
|
||||
loc = np.array([[[[[0.1]], [[0.1]], [[0.1]], [[0.1]]],
|
||||
[[[0.1]], [[0.1]], [[0.1]], [[0.1]]],
|
||||
[[[0.1]], [[0.1]], [[0.1]], [[0.1]]],
|
||||
[[[0.1]], [[0.1]], [[0.1]], [[0.1]]]]])
|
||||
conf = np.array([[[[[0.1]], [[0.9]]], [[[0.2]], [[0.8]]],
|
||||
[[[0.3]], [[0.7]]], [[[0.4]], [[0.6]]]]])
|
||||
priorbox = np.array([
|
||||
0.1, 0.1, 0.5, 0.5, 0.1, 0.1, 0.2, 0.2, 0.2, 0.2, 0.6, 0.6, 0.1,
|
||||
0.1, 0.2, 0.2, 0.3, 0.3, 0.7, 0.7, 0.1, 0.1, 0.2, 0.2, 0.4, 0.4,
|
||||
0.8, 0.8, 0.1, 0.1, 0.2, 0.2
|
||||
])
|
||||
|
||||
output = np.array([
|
||||
0, 1, 0.68997443, 0.099959746, 0.099959746, 0.50804031, 0.50804031
|
||||
])
|
||||
self.inputs = {
|
||||
'Loc': loc.astype('float32'),
|
||||
'Conf': conf.astype('float32'),
|
||||
'PriorBox': priorbox.astype('float32')
|
||||
}
|
||||
self.attrs = {
|
||||
'num_classes': self.num_classes,
|
||||
'top_k': self.top_k,
|
||||
'nms_top_k': self.nms_top_k,
|
||||
'background_label_id': self.background_label_id,
|
||||
'nms_threshold': self.nms_threshold,
|
||||
'confidence_threshold': self.confidence_threshold,
|
||||
}
|
||||
self.outputs = {'Out': output.astype('float32')}
|
||||
|
||||
def test_check_output(self):
|
||||
self.check_output()
|
||||
|
||||
def init_test_case(self):
|
||||
self.num_classes = 2
|
||||
self.top_k = 10
|
||||
self.nms_top_k = 20
|
||||
self.background_label_id = 0
|
||||
self.nms_threshold = 0.01
|
||||
self.confidence_threshold = 0.01
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue