|
|
|
@ -286,7 +286,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
|
|
|
|
|
}
|
|
|
|
|
int *keep = keep_out->mutable_data<int>({num_to_keep}, ctx.GetPlace());
|
|
|
|
|
memory::Copy(place, keep, platform::CPUPlace(), keep_vec.data(),
|
|
|
|
|
sizeof(int) * num_to_keep, 0);
|
|
|
|
|
sizeof(int) * num_to_keep, ctx.stream());
|
|
|
|
|
ctx.Wait();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
@ -329,7 +330,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
|
|
|
|
|
int keep_num;
|
|
|
|
|
const auto gpu_place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
|
|
|
|
|
memory::Copy(platform::CPUPlace(), &keep_num, gpu_place,
|
|
|
|
|
keep_num_t.data<int>(), sizeof(int), 0);
|
|
|
|
|
keep_num_t.data<int>(), sizeof(int), ctx.stream());
|
|
|
|
|
ctx.Wait();
|
|
|
|
|
keep_index.Resize({keep_num});
|
|
|
|
|
|
|
|
|
|
Tensor scores_filter, proposals_filter;
|
|
|
|
@ -438,9 +440,12 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
|
|
|
|
|
Tensor &scores = box_score_pair.second;
|
|
|
|
|
|
|
|
|
|
memory::Copy(place, rpn_rois_data + num_proposals * 4, place,
|
|
|
|
|
proposals.data<T>(), sizeof(T) * proposals.numel(), 0);
|
|
|
|
|
proposals.data<T>(), sizeof(T) * proposals.numel(),
|
|
|
|
|
dev_ctx.stream());
|
|
|
|
|
memory::Copy(place, rpn_roi_probs_data + num_proposals, place,
|
|
|
|
|
scores.data<T>(), sizeof(T) * scores.numel(), 0);
|
|
|
|
|
scores.data<T>(), sizeof(T) * scores.numel(),
|
|
|
|
|
dev_ctx.stream());
|
|
|
|
|
dev_ctx.Wait();
|
|
|
|
|
num_proposals += proposals.dims()[0];
|
|
|
|
|
offset.emplace_back(num_proposals);
|
|
|
|
|
}
|
|
|
|
|