|
|
@ -109,17 +109,18 @@ std::vector<std::vector<int>> SampleFgBgGt(
|
|
|
|
const platform::CPUDeviceContext& context, Tensor* iou,
|
|
|
|
const platform::CPUDeviceContext& context, Tensor* iou,
|
|
|
|
const Tensor& is_crowd, const int batch_size_per_im,
|
|
|
|
const Tensor& is_crowd, const int batch_size_per_im,
|
|
|
|
const float fg_fraction, const float fg_thresh, const float bg_thresh_hi,
|
|
|
|
const float fg_fraction, const float fg_thresh, const float bg_thresh_hi,
|
|
|
|
const float bg_thresh_lo, std::minstd_rand engine, const bool use_random) {
|
|
|
|
const float bg_thresh_lo, std::minstd_rand engine, const bool use_random,
|
|
|
|
|
|
|
|
const bool is_cascade_rcnn, const Tensor& rpn_rois) {
|
|
|
|
std::vector<int> fg_inds;
|
|
|
|
std::vector<int> fg_inds;
|
|
|
|
std::vector<int> bg_inds;
|
|
|
|
std::vector<int> bg_inds;
|
|
|
|
std::vector<int> gt_inds;
|
|
|
|
std::vector<int> mapped_gt_inds;
|
|
|
|
int64_t gt_num = is_crowd.numel();
|
|
|
|
int64_t gt_num = is_crowd.numel();
|
|
|
|
const int* crowd_data = is_crowd.data<int>();
|
|
|
|
const int* crowd_data = is_crowd.data<int>();
|
|
|
|
T* proposal_to_gt_overlaps = iou->data<T>();
|
|
|
|
T* proposal_to_gt_overlaps = iou->data<T>();
|
|
|
|
int64_t row = iou->dims()[0];
|
|
|
|
int64_t row = iou->dims()[0];
|
|
|
|
int64_t col = iou->dims()[1];
|
|
|
|
int64_t col = iou->dims()[1];
|
|
|
|
float epsilon = 0.00001;
|
|
|
|
float epsilon = 0.00001;
|
|
|
|
|
|
|
|
const T* rpn_rois_dt = rpn_rois.data<T>();
|
|
|
|
// Follow the Faster RCNN's implementation
|
|
|
|
// Follow the Faster RCNN's implementation
|
|
|
|
for (int64_t i = 0; i < row; ++i) {
|
|
|
|
for (int64_t i = 0; i < row; ++i) {
|
|
|
|
const T* v = proposal_to_gt_overlaps + i * col;
|
|
|
|
const T* v = proposal_to_gt_overlaps + i * col;
|
|
|
@ -127,24 +128,37 @@ std::vector<std::vector<int>> SampleFgBgGt(
|
|
|
|
if ((i < gt_num) && (crowd_data[i])) {
|
|
|
|
if ((i < gt_num) && (crowd_data[i])) {
|
|
|
|
max_overlap = -1.0;
|
|
|
|
max_overlap = -1.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (max_overlap > fg_thresh) {
|
|
|
|
if (is_cascade_rcnn &&
|
|
|
|
|
|
|
|
((rpn_rois_dt[i * 4 + 2] - rpn_rois_dt[i * 4 + 0] + 1) <= 0 ||
|
|
|
|
|
|
|
|
(rpn_rois_dt[i * 4 + 3] - rpn_rois_dt[i * 4 + 1] + 1) <= 0)) {
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (max_overlap >= fg_thresh) {
|
|
|
|
|
|
|
|
// fg mapped gt label index
|
|
|
|
for (int64_t j = 0; j < col; ++j) {
|
|
|
|
for (int64_t j = 0; j < col; ++j) {
|
|
|
|
T val = proposal_to_gt_overlaps[i * col + j];
|
|
|
|
T val = proposal_to_gt_overlaps[i * col + j];
|
|
|
|
auto diff = std::abs(max_overlap - val);
|
|
|
|
auto diff = std::abs(max_overlap - val);
|
|
|
|
if (diff < epsilon) {
|
|
|
|
if (diff < epsilon) {
|
|
|
|
fg_inds.emplace_back(i);
|
|
|
|
fg_inds.emplace_back(i);
|
|
|
|
gt_inds.emplace_back(j);
|
|
|
|
mapped_gt_inds.emplace_back(j);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) {
|
|
|
|
if ((max_overlap >= bg_thresh_lo) && (max_overlap < bg_thresh_hi)) {
|
|
|
|
|
|
|
|
bg_inds.emplace_back(i);
|
|
|
|
bg_inds.emplace_back(i);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::vector<int>> res;
|
|
|
|
|
|
|
|
if (is_cascade_rcnn) {
|
|
|
|
|
|
|
|
res.emplace_back(fg_inds);
|
|
|
|
|
|
|
|
res.emplace_back(bg_inds);
|
|
|
|
|
|
|
|
res.emplace_back(mapped_gt_inds);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// Reservoir Sampling
|
|
|
|
// Reservoir Sampling
|
|
|
|
|
|
|
|
// sampling fg
|
|
|
|
std::uniform_real_distribution<float> uniform(0, 1);
|
|
|
|
std::uniform_real_distribution<float> uniform(0, 1);
|
|
|
|
int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
|
|
|
|
int fg_rois_per_im = std::floor(batch_size_per_im * fg_fraction);
|
|
|
|
int fg_rois_this_image = fg_inds.size();
|
|
|
|
int fg_rois_this_image = fg_inds.size();
|
|
|
@ -156,19 +170,22 @@ std::vector<std::vector<int>> SampleFgBgGt(
|
|
|
|
int rng_ind = std::floor(uniform(engine) * i);
|
|
|
|
int rng_ind = std::floor(uniform(engine) * i);
|
|
|
|
if (rng_ind < fg_rois_per_this_image) {
|
|
|
|
if (rng_ind < fg_rois_per_this_image) {
|
|
|
|
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
|
|
|
|
std::iter_swap(fg_inds.begin() + rng_ind, fg_inds.begin() + i);
|
|
|
|
std::iter_swap(gt_inds.begin() + rng_ind, gt_inds.begin() + i);
|
|
|
|
std::iter_swap(mapped_gt_inds.begin() + rng_ind,
|
|
|
|
|
|
|
|
mapped_gt_inds.begin() + i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::vector<int> new_fg_inds(fg_inds.begin(),
|
|
|
|
std::vector<int> new_fg_inds(fg_inds.begin(),
|
|
|
|
fg_inds.begin() + fg_rois_per_this_image);
|
|
|
|
fg_inds.begin() + fg_rois_per_this_image);
|
|
|
|
std::vector<int> new_gt_inds(gt_inds.begin(),
|
|
|
|
std::vector<int> new_gt_inds(
|
|
|
|
gt_inds.begin() + fg_rois_per_this_image);
|
|
|
|
mapped_gt_inds.begin(),
|
|
|
|
|
|
|
|
mapped_gt_inds.begin() + fg_rois_per_this_image);
|
|
|
|
|
|
|
|
// sampling bg
|
|
|
|
int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
|
|
|
|
int bg_rois_per_image = batch_size_per_im - fg_rois_per_this_image;
|
|
|
|
int bg_rois_this_image = bg_inds.size();
|
|
|
|
int bg_rois_this_image = bg_inds.size();
|
|
|
|
int bg_rois_per_this_image = std::min(bg_rois_per_image, bg_rois_this_image);
|
|
|
|
int bg_rois_per_this_image =
|
|
|
|
|
|
|
|
std::min(bg_rois_per_image, bg_rois_this_image);
|
|
|
|
if (use_random) {
|
|
|
|
if (use_random) {
|
|
|
|
const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
|
|
|
|
const int64_t bg_size = static_cast<int64_t>(bg_inds.size());
|
|
|
|
if (bg_size > bg_rois_per_this_image) {
|
|
|
|
if (bg_size > bg_rois_per_this_image) {
|
|
|
@ -181,10 +198,12 @@ std::vector<std::vector<int>> SampleFgBgGt(
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::vector<int> new_bg_inds(bg_inds.begin(),
|
|
|
|
std::vector<int> new_bg_inds(bg_inds.begin(),
|
|
|
|
bg_inds.begin() + bg_rois_per_this_image);
|
|
|
|
bg_inds.begin() + bg_rois_per_this_image);
|
|
|
|
std::vector<std::vector<int>> res;
|
|
|
|
//
|
|
|
|
res.emplace_back(new_fg_inds);
|
|
|
|
res.emplace_back(new_fg_inds);
|
|
|
|
res.emplace_back(new_bg_inds);
|
|
|
|
res.emplace_back(new_bg_inds);
|
|
|
|
res.emplace_back(new_gt_inds);
|
|
|
|
res.emplace_back(new_gt_inds);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return res;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -231,35 +250,50 @@ std::vector<Tensor> SampleRoisForOneImage(
|
|
|
|
const Tensor& im_info, const int batch_size_per_im, const float fg_fraction,
|
|
|
|
const Tensor& im_info, const int batch_size_per_im, const float fg_fraction,
|
|
|
|
const float fg_thresh, const float bg_thresh_hi, const float bg_thresh_lo,
|
|
|
|
const float fg_thresh, const float bg_thresh_hi, const float bg_thresh_lo,
|
|
|
|
const std::vector<float>& bbox_reg_weights, const int class_nums,
|
|
|
|
const std::vector<float>& bbox_reg_weights, const int class_nums,
|
|
|
|
std::minstd_rand engine, bool use_random) {
|
|
|
|
std::minstd_rand engine, bool use_random, bool is_cascade_rcnn,
|
|
|
|
|
|
|
|
bool is_cls_agnostic) {
|
|
|
|
|
|
|
|
// 1.1 map to original image
|
|
|
|
auto im_scale = im_info.data<T>()[2];
|
|
|
|
auto im_scale = im_info.data<T>()[2];
|
|
|
|
|
|
|
|
Tensor rpn_rois_slice;
|
|
|
|
Tensor rpn_rois;
|
|
|
|
Tensor rpn_rois;
|
|
|
|
rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
|
|
|
|
|
|
|
|
|
|
|
|
if (is_cascade_rcnn) {
|
|
|
|
|
|
|
|
// slice rpn_rois from gt_box_num refer to detectron
|
|
|
|
|
|
|
|
rpn_rois_slice =
|
|
|
|
|
|
|
|
rpn_rois_in.Slice(gt_boxes.dims()[0], rpn_rois_in.dims()[0]);
|
|
|
|
|
|
|
|
rpn_rois.mutable_data<T>(rpn_rois_slice.dims(), context.GetPlace());
|
|
|
|
|
|
|
|
const T* rpn_rois_in_dt = rpn_rois_slice.data<T>();
|
|
|
|
T* rpn_rois_dt = rpn_rois.data<T>();
|
|
|
|
T* rpn_rois_dt = rpn_rois.data<T>();
|
|
|
|
|
|
|
|
for (int i = 0; i < rpn_rois.numel(); ++i) {
|
|
|
|
|
|
|
|
rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
rpn_rois.mutable_data<T>(rpn_rois_in.dims(), context.GetPlace());
|
|
|
|
const T* rpn_rois_in_dt = rpn_rois_in.data<T>();
|
|
|
|
const T* rpn_rois_in_dt = rpn_rois_in.data<T>();
|
|
|
|
|
|
|
|
T* rpn_rois_dt = rpn_rois.data<T>();
|
|
|
|
for (int i = 0; i < rpn_rois.numel(); ++i) {
|
|
|
|
for (int i = 0; i < rpn_rois.numel(); ++i) {
|
|
|
|
rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale;
|
|
|
|
rpn_rois_dt[i] = rpn_rois_in_dt[i] / im_scale;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Tensor boxes;
|
|
|
|
// 1.2 compute overlaps
|
|
|
|
int proposals_num = gt_boxes.dims()[0] + rpn_rois.dims()[0];
|
|
|
|
int proposals_num = gt_boxes.dims()[0] + rpn_rois.dims()[0];
|
|
|
|
|
|
|
|
Tensor boxes;
|
|
|
|
boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
|
|
|
|
boxes.mutable_data<T>({proposals_num, kBoxDim}, context.GetPlace());
|
|
|
|
Concat<T>(context, gt_boxes, rpn_rois, &boxes);
|
|
|
|
Concat<T>(context, gt_boxes, rpn_rois, &boxes);
|
|
|
|
|
|
|
|
|
|
|
|
// Overlaps
|
|
|
|
|
|
|
|
Tensor proposal_to_gt_overlaps;
|
|
|
|
Tensor proposal_to_gt_overlaps;
|
|
|
|
proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]},
|
|
|
|
proposal_to_gt_overlaps.mutable_data<T>({proposals_num, gt_boxes.dims()[0]},
|
|
|
|
context.GetPlace());
|
|
|
|
context.GetPlace());
|
|
|
|
BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps);
|
|
|
|
BboxOverlaps<T>(boxes, gt_boxes, &proposal_to_gt_overlaps);
|
|
|
|
|
|
|
|
|
|
|
|
// Generate proposal index
|
|
|
|
// Generate proposal index
|
|
|
|
std::vector<std::vector<int>> fg_bg_gt = SampleFgBgGt<T>(
|
|
|
|
std::vector<std::vector<int>> fg_bg_gt =
|
|
|
|
context, &proposal_to_gt_overlaps, is_crowd, batch_size_per_im,
|
|
|
|
SampleFgBgGt<T>(context, &proposal_to_gt_overlaps, is_crowd,
|
|
|
|
fg_fraction, fg_thresh, bg_thresh_hi, bg_thresh_lo, engine, use_random);
|
|
|
|
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
|
|
|
|
|
|
|
|
bg_thresh_lo, engine, use_random, is_cascade_rcnn, boxes);
|
|
|
|
std::vector<int> fg_inds = fg_bg_gt[0];
|
|
|
|
std::vector<int> fg_inds = fg_bg_gt[0];
|
|
|
|
std::vector<int> bg_inds = fg_bg_gt[1];
|
|
|
|
std::vector<int> bg_inds = fg_bg_gt[1];
|
|
|
|
std::vector<int> gt_inds = fg_bg_gt[2];
|
|
|
|
std::vector<int> mapped_gt_inds = fg_bg_gt[2]; // mapped_gt_labels
|
|
|
|
|
|
|
|
|
|
|
|
// Gather boxes and labels
|
|
|
|
// Gather boxes and labels
|
|
|
|
Tensor sampled_boxes, sampled_labels, sampled_gts;
|
|
|
|
Tensor sampled_boxes, sampled_labels, sampled_gts;
|
|
|
@ -271,7 +305,8 @@ std::vector<Tensor> SampleRoisForOneImage(
|
|
|
|
sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace());
|
|
|
|
sampled_labels.mutable_data<int>({boxes_num}, context.GetPlace());
|
|
|
|
sampled_gts.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
|
|
|
|
sampled_gts.mutable_data<T>({fg_num, kBoxDim}, context.GetPlace());
|
|
|
|
GatherBoxesLabels<T>(context, boxes, gt_boxes, gt_classes, fg_inds, bg_inds,
|
|
|
|
GatherBoxesLabels<T>(context, boxes, gt_boxes, gt_classes, fg_inds, bg_inds,
|
|
|
|
gt_inds, &sampled_boxes, &sampled_labels, &sampled_gts);
|
|
|
|
mapped_gt_inds, &sampled_boxes, &sampled_labels,
|
|
|
|
|
|
|
|
&sampled_gts);
|
|
|
|
|
|
|
|
|
|
|
|
// Compute targets
|
|
|
|
// Compute targets
|
|
|
|
Tensor bbox_targets_single;
|
|
|
|
Tensor bbox_targets_single;
|
|
|
@ -305,6 +340,9 @@ std::vector<Tensor> SampleRoisForOneImage(
|
|
|
|
for (int64_t i = 0; i < boxes_num; ++i) {
|
|
|
|
for (int64_t i = 0; i < boxes_num; ++i) {
|
|
|
|
int label = sampled_labels_data[i];
|
|
|
|
int label = sampled_labels_data[i];
|
|
|
|
if (label > 0) {
|
|
|
|
if (label > 0) {
|
|
|
|
|
|
|
|
if (is_cls_agnostic) {
|
|
|
|
|
|
|
|
label = 1;
|
|
|
|
|
|
|
|
}
|
|
|
|
int dst_idx = i * width + kBoxDim * label;
|
|
|
|
int dst_idx = i * width + kBoxDim * label;
|
|
|
|
int src_idx = kBoxDim * i;
|
|
|
|
int src_idx = kBoxDim * i;
|
|
|
|
bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx];
|
|
|
|
bbox_targets_data[dst_idx] = bbox_targets_single_data[src_idx];
|
|
|
@ -356,7 +394,8 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
|
|
|
|
context.Attr<std::vector<float>>("bbox_reg_weights");
|
|
|
|
context.Attr<std::vector<float>>("bbox_reg_weights");
|
|
|
|
int class_nums = context.Attr<int>("class_nums");
|
|
|
|
int class_nums = context.Attr<int>("class_nums");
|
|
|
|
bool use_random = context.Attr<bool>("use_random");
|
|
|
|
bool use_random = context.Attr<bool>("use_random");
|
|
|
|
|
|
|
|
bool is_cascade_rcnn = context.Attr<bool>("is_cascade_rcnn");
|
|
|
|
|
|
|
|
bool is_cls_agnostic = context.Attr<bool>("is_cls_agnostic");
|
|
|
|
PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
|
|
|
|
PADDLE_ENFORCE_EQ(rpn_rois->lod().size(), 1UL,
|
|
|
|
"GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
|
|
|
|
"GenerateProposalLabelsOp rpn_rois needs 1 level of LoD");
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
@ -411,7 +450,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel<T> {
|
|
|
|
dev_ctx, rpn_rois_slice, gt_classes_slice, is_crowd_slice,
|
|
|
|
dev_ctx, rpn_rois_slice, gt_classes_slice, is_crowd_slice,
|
|
|
|
gt_boxes_slice, im_info_slice, batch_size_per_im, fg_fraction,
|
|
|
|
gt_boxes_slice, im_info_slice, batch_size_per_im, fg_fraction,
|
|
|
|
fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
|
|
|
|
fg_thresh, bg_thresh_hi, bg_thresh_lo, bbox_reg_weights, class_nums,
|
|
|
|
engine, use_random);
|
|
|
|
engine, use_random, is_cascade_rcnn, is_cls_agnostic);
|
|
|
|
Tensor sampled_rois = tensor_output[0];
|
|
|
|
Tensor sampled_rois = tensor_output[0];
|
|
|
|
Tensor sampled_labels_int32 = tensor_output[1];
|
|
|
|
Tensor sampled_labels_int32 = tensor_output[1];
|
|
|
|
Tensor sampled_bbox_targets = tensor_output[2];
|
|
|
|
Tensor sampled_bbox_targets = tensor_output[2];
|
|
|
@ -513,6 +552,13 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
|
|
"use_random",
|
|
|
|
"use_random",
|
|
|
|
"Use random sampling to choose foreground and background boxes.")
|
|
|
|
"Use random sampling to choose foreground and background boxes.")
|
|
|
|
.SetDefault(true);
|
|
|
|
.SetDefault(true);
|
|
|
|
|
|
|
|
AddAttr<bool>("is_cascade_rcnn",
|
|
|
|
|
|
|
|
"cascade rcnn sampling policy changed from stage 2.")
|
|
|
|
|
|
|
|
.SetDefault(false);
|
|
|
|
|
|
|
|
AddAttr<bool>(
|
|
|
|
|
|
|
|
"is_cls_agnostic",
|
|
|
|
|
|
|
|
"the box regress will only include fg and bg locations if set true ")
|
|
|
|
|
|
|
|
.SetDefault(false);
|
|
|
|
|
|
|
|
|
|
|
|
AddComment(R"DOC(
|
|
|
|
AddComment(R"DOC(
|
|
|
|
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
|
|
|
|
This operator can be, for given the GenerateProposalOp output bounding boxes and groundtruth,
|
|
|
|