parent
6665c49299
commit
ca535d18ab
@ -0,0 +1,91 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
Indicesou may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/operators/detection_output_op.h"
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
class Detection_output_OpMaker : public framework::OpProtoAndCheckerMaker {
|
||||||
|
public:
|
||||||
|
Detection_output_OpMaker(framework::OpProto* proto,
|
||||||
|
framework::OpAttrChecker* op_checker)
|
||||||
|
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||||
|
AddInput(
|
||||||
|
"Loc",
|
||||||
|
"(Tensor) The input tensor of detection_output operator. "
|
||||||
|
"The format of input tensor is NCHW. Where N is batch size, C is the "
|
||||||
|
"number of channels, H and W is the height and width of feature.");
|
||||||
|
AddInput(
|
||||||
|
"Conf",
|
||||||
|
"(Tensor) The input tensor of detection_output operator. "
|
||||||
|
"The format of input tensor is NCHW. Where N is batch size, C is the "
|
||||||
|
"number of channels, H and W is the height and width of feature.");
|
||||||
|
AddInput(
|
||||||
|
"PriorBox",
|
||||||
|
"(Tensor) The input tensor of detection_output operator. "
|
||||||
|
"The format of input tensor is NCHW. Where N is batch size, C is the "
|
||||||
|
"number of channels, H and W is the height and width of feature.");
|
||||||
|
AddOutput("Out",
|
||||||
|
"(Tensor) The output tensor of detection_output operator."
|
||||||
|
"N * M."
|
||||||
|
"M = C * H * W");
|
||||||
|
AddAttr<int>("background_label_id", "(int), multi level pooling");
|
||||||
|
AddAttr<int>("num_classes", "(int), multi level pooling");
|
||||||
|
AddAttr<float>("nms_threshold", "(int), multi level pooling");
|
||||||
|
AddAttr<float>("confidence_threshold", "(int), multi level pooling");
|
||||||
|
AddAttr<int>("top_k", "(int), multi level pooling");
|
||||||
|
AddAttr<int>("nms_top_k", "(int), multi level pooling");
|
||||||
|
AddComment(R"DOC(
|
||||||
|
"Does spatial pyramid pooling on the input image by taking the max,
|
||||||
|
etc. within regions so that the result vector of different sized
|
||||||
|
images are of the same size
|
||||||
|
Input shape: $(N, C_{in}, H_{in}, W_{in})$
|
||||||
|
Output shape: $(H_{out}, W_{out})$
|
||||||
|
Where
|
||||||
|
$$
|
||||||
|
H_{out} = N \\
|
||||||
|
W_{out} = (((4^pyramid_height) - 1) / (4 - 1))$ * C_{in}
|
||||||
|
$$
|
||||||
|
)DOC");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Detection_output_Op : public framework::OperatorWithKernel {
|
||||||
|
public:
|
||||||
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||||
|
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||||
|
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||||
|
"Input(X) of Detection_output_Op"
|
||||||
|
"should not be null.");
|
||||||
|
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||||
|
"Output(Out) of Detection_output_Op should not be null.");
|
||||||
|
auto in_x_dims = ctx->GetInputDim("X");
|
||||||
|
int pyramid_height = ctx->Attrs().Get<int>("pyramid_height");
|
||||||
|
PADDLE_ENFORCE(in_x_dims.size() == 4,
|
||||||
|
"Detection_output_ing intput must be of 4-dimensional.");
|
||||||
|
int outlen = ((std::pow(4, pyramid_height) - 1) / (4 - 1)) * in_x_dims[1];
|
||||||
|
std::vector<int64_t> output_shape({in_x_dims[0], outlen});
|
||||||
|
ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
REGISTER_OP_WITHOUT_GRADIENT(detection_output, ops::Detection_output_Op,
|
||||||
|
ops::Detection_output_OpMaker);
|
||||||
|
REGISTER_OP_CPU_KERNEL(
|
||||||
|
detection_output,
|
||||||
|
ops::Detection_output_Kernel<paddle::platform::CPUPlace, float>,
|
||||||
|
ops::Detection_output_Kernel<paddle::platform::CPUPlace, double>);
|
@ -0,0 +1,21 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
Indicesou may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/operators/detection_output_op.h"
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
REGISTER_OP_GPU_KERNEL(
|
||||||
|
detection_output,
|
||||||
|
ops::Detection_output_Kernel<paddle::platform::GPUPlace, float>,
|
||||||
|
ops::Detection_output_Kernel<paddle::platform::GPUPlace, double>);
|
@ -0,0 +1,114 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
Indicesou may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "paddle/framework/op_registry.h"
|
||||||
|
#include "paddle/framework/tensor.h"
|
||||||
|
#include "paddle/operators/math/detection_util.h"
|
||||||
|
#include "paddle/operators/math/math_function.h"
|
||||||
|
#include "paddle/operators/math/softmax.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
template <typename Place, typename T>
|
||||||
|
class Detection_output_Kernel : public framework::OpKernel<T> {
|
||||||
|
public:
|
||||||
|
void Compute(const framework::ExecutionContext& context) const override {
|
||||||
|
const framework::Tensor* in_loc = context.Input<framework::Tensor>("Loc");
|
||||||
|
const framework::Tensor* in_conf = context.Input<framework::Tensor>("Conf");
|
||||||
|
const framework::Tensor* in_priorbox =
|
||||||
|
context.Input<framework::Tensor>("PriorBox");
|
||||||
|
auto* out = context.Output<framework::Tensor>("Out");
|
||||||
|
int num_classes = context.template Attr<int>("num_classes");
|
||||||
|
int top_k = context.template Attr<int>("top_k");
|
||||||
|
int nms_top_k = context.template Attr<int>("nms_top_k");
|
||||||
|
int background_label_id = context.template Attr<int>("background_label_id");
|
||||||
|
float nms_threshold = context.template Attr<float>("nms_threshold");
|
||||||
|
float confidence_threshold =
|
||||||
|
context.template Attr<float>("confidence_threshold");
|
||||||
|
|
||||||
|
int input_num = in_loc->dims()[0];
|
||||||
|
int batch_size = in_loc->dims()[1];
|
||||||
|
int loc_sum_size = in_loc->numel();
|
||||||
|
int conf_sum_size = in_conf->numel();
|
||||||
|
std::vector<int64_t> loc_shape_vec({1, loc_sum_size});
|
||||||
|
std::vector<int64_t> conf_shape_vec(
|
||||||
|
{conf_sum_size / num_classes, num_classes});
|
||||||
|
framework::DDim loc_shape(framework::make_ddim(loc_shape_vec));
|
||||||
|
framework::DDim conf_shape(framework::make_ddim(conf_shape_vec));
|
||||||
|
framework::Tensor loc_tensor;
|
||||||
|
framework::Tensor conf_tensor;
|
||||||
|
loc_tensor.mutable_data<T>(loc_shape, context.GetPlace());
|
||||||
|
conf_tensor.mutable_data<T>(conf_shape, context.GetPlace());
|
||||||
|
|
||||||
|
// KNCHW ==> NHWC
|
||||||
|
for (int i = 0; i < input_num; ++i) {
|
||||||
|
math::appendWithPermute<T>(*in_loc, &loc_tensor);
|
||||||
|
math::appendWithPermute<T>(*in_conf, &conf_tensor);
|
||||||
|
}
|
||||||
|
// softmax
|
||||||
|
math::SoftmaxFunctor<Place, T>()(context.device_context(), &conf_tensor,
|
||||||
|
&conf_tensor);
|
||||||
|
// get decode bboxes
|
||||||
|
size_t num_priors = in_priorbox->numel() / 8;
|
||||||
|
std::vector<std::vector<operators::math::BBox<T>>> all_decoded_bboxes;
|
||||||
|
for (size_t n = 0; n < batch_size; ++n) {
|
||||||
|
std::vector<operators::math::BBox<T>> decoded_bboxes;
|
||||||
|
for (size_t i = 0; i < num_priors; ++i) {
|
||||||
|
size_t prior_offset = i * 8;
|
||||||
|
size_t loc_pred_offset = n * num_priors * 4 + i * 4;
|
||||||
|
std::vector<math::BBox<T>> prior_bbox_vec;
|
||||||
|
math::getBBoxFromPriorData<T>(in_priorbox->data<T>() + prior_offset, 1,
|
||||||
|
prior_bbox_vec);
|
||||||
|
std::vector<std::vector<T>> prior_bbox_var;
|
||||||
|
math::getBBoxVarFromPriorData<T>(in_priorbox->data<T>() + prior_offset,
|
||||||
|
1, prior_bbox_var);
|
||||||
|
std::vector<T> loc_pred_data;
|
||||||
|
for (size_t j = 0; j < 4; ++j)
|
||||||
|
loc_pred_data.push_back(
|
||||||
|
*(loc_tensor.data<T>() + loc_pred_offset + j));
|
||||||
|
math::BBox<T> bbox = math::decodeBBoxWithVar<T>(
|
||||||
|
prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data);
|
||||||
|
decoded_bboxes.push_back(bbox);
|
||||||
|
}
|
||||||
|
all_decoded_bboxes.push_back(decoded_bboxes);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::map<size_t, std::vector<size_t>>> all_indices;
|
||||||
|
int num_kept = math::getDetectionIndices<T>(
|
||||||
|
conf_tensor.data<T>(), num_priors, num_classes, background_label_id,
|
||||||
|
batch_size, confidence_threshold, nms_top_k, nms_threshold, top_k,
|
||||||
|
all_decoded_bboxes, &all_indices);
|
||||||
|
|
||||||
|
framework::Tensor out_tmp;
|
||||||
|
if (num_kept <= 0) {
|
||||||
|
std::vector<int64_t> out_shape_vec({0, 0});
|
||||||
|
framework::DDim out_shape(framework::make_ddim(out_shape_vec));
|
||||||
|
out->Resize(out_shape);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<int64_t> out_shape_vec({num_kept, 7});
|
||||||
|
framework::DDim out_shape(framework::make_ddim(out_shape_vec));
|
||||||
|
out_tmp.mutable_data<T>(out_shape, context.GetPlace());
|
||||||
|
|
||||||
|
T* out_data = out_tmp.data<T>();
|
||||||
|
math::getDetectionOutput<T>(conf_tensor.data<T>(), num_kept, num_priors,
|
||||||
|
num_classes, batch_size, all_indices,
|
||||||
|
all_decoded_bboxes, out_data);
|
||||||
|
out->mutable_data<T>(out_shape, context.GetPlace());
|
||||||
|
out->ShareDataWith(out_tmp);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue