Refine target_assign_op to unify the classification and regression targets assigning. (#8326)

* Refine target_assign_op to unify the classification and regression targets assignment.

* Fix the unit testing.

* Fix conflicts.
emailweixu-patch-1
qingqing01 7 years ago committed by GitHub
parent 9030a65586
commit 72bcf72c66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

File diff suppressed because it is too large Load Diff

@ -17,39 +17,41 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename WT>
__global__ void NegTargetAssignKernel(const int* neg_indices, const size_t* lod,
const int num, const int num_prior_box,
const int background_label,
int* out_label, T* out_label_wt) {
const int N, const int M, const int K,
const int mismatch_value, T* out,
WT* out_wt) {
int bidx = blockIdx.x;
int st = lod[bidx];
int ed = lod[bidx + 1];
int row_start = bidx * num_prior_box;
int row_start = bidx * M;
for (int i = st + threadIdx.x; i < ed; i += blockDim.x) {
int id = row_start + neg_indices[i];
out_label[id] = background_label;
out_label_wt[id] = 1.;
for (int k = 0; k < K; ++k) {
out[id * K + k] = T(mismatch_value);
out_wt[id * K + k] = WT(1.);
}
}
}
template <typename T>
struct NegTargetAssignFunctor<platform::CUDADeviceContext, T> {
template <typename T, typename WT>
struct NegTargetAssignFunctor<platform::CUDADeviceContext, T, WT> {
void operator()(const platform::CUDADeviceContext& ctx,
const int* neg_indices, const size_t* lod, const int num,
const int num_prior_box, const int background_label,
int* out_label, T* out_label_wt) {
const int* neg_indices, const size_t* lod, const int N,
const int M, const int K, const int mismatch_value, T* out,
WT* out_wt) {
const int block_size = 256;
const int grid_size = num;
NegTargetAssignKernel<T><<<grid_size, block_size, 0, ctx.stream()>>>(
neg_indices, lod, num, num_prior_box, background_label, out_label,
out_label_wt);
const int grid_size = N;
NegTargetAssignKernel<T, WT><<<grid_size, block_size, 0, ctx.stream()>>>(
neg_indices, lod, N, M, K, mismatch_value, out, out_wt);
}
};
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, float>;
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, int, float>;
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, float,
float>;
} // namespace operators
} // namespace paddle
@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
target_assign,
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, float>,
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, double>);
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, int, float>,
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, float, float>);

@ -19,140 +19,113 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename WT>
struct TargetAssignFunctor {
const T* gt_box_;
const int* gt_label_;
const T* in_;
const int* match_indices_;
const size_t* lod_;
const int background_label_;
const int64_t num_;
const int64_t num_prior_box_;
T* out_box_;
T* out_box_wt_;
int* out_label_;
T* out_label_wt_;
TargetAssignFunctor(const T* gt_box, const int* gt_label,
const int* match_indices, const size_t* lod,
const int background_label, const int64_t num,
const int64_t np, T* out_box, T* out_box_wt,
int* out_label, T* out_label_wt)
: gt_box_(gt_box),
gt_label_(gt_label),
const int mismatch_value_;
const int64_t N_;
const int64_t M_;
const int64_t P_;
const int64_t K_;
T* out_;
WT* out_wt_;
TargetAssignFunctor(const T* input, const int* match_indices,
const size_t* lod, const int mismatch_value,
const int64_t N, const int64_t M, const int64_t P,
const int64_t K, T* out, WT* out_wt)
: in_(input),
match_indices_(match_indices),
lod_(lod),
background_label_(background_label),
num_(num),
num_prior_box_(np),
out_box_(out_box),
out_box_wt_(out_box_wt),
out_label_(out_label),
out_label_wt_(out_label_wt) {}
mismatch_value_(mismatch_value),
N_(N),
M_(M),
P_(P),
K_(K),
out_(out),
out_wt_(out_wt) {}
HOSTDEVICE void operator()(size_t i) const {
int row = i / num_prior_box_;
int col = i - row * num_prior_box_;
int h = i / M_;
int w = i - h * M_;
size_t row_off = lod_[row];
int offset = row * num_prior_box_ + col;
size_t off = lod_[h];
int id = match_indices_[i];
int id = match_indices_[offset];
T* obox = out_box_ + offset * 4;
int* olabel = out_label_ + offset;
T* obox_wt = out_box_wt_ + offset;
T* olabel_wt = out_label_wt_ + offset;
T* out = out_ + i * K_;
WT* out_wt = out_wt_ + i;
if (id > -1) {
const T* gtbox = gt_box_ + ((row_off + id) * num_prior_box_ + col) * 4;
obox[0] = gtbox[0];
obox[1] = gtbox[1];
obox[2] = gtbox[2];
obox[3] = gtbox[3];
olabel[0] = gt_label_[row_off + id];
obox_wt[0] = static_cast<T>(1.);
olabel_wt[0] = static_cast<T>(1.);
int w_off = w % P_;
const T* in = in_ + ((off + id) * P_ + w_off) * K_;
for (int64_t k = 0; k < K_; ++k) {
out[k] = in[k];
}
out_wt[0] = static_cast<WT>(1.);
} else {
obox[0] = static_cast<T>(0.);
obox[1] = static_cast<T>(0.);
obox[2] = static_cast<T>(0.);
obox[3] = static_cast<T>(0.);
olabel[0] = background_label_;
obox_wt[0] = static_cast<T>(0.);
olabel_wt[0] = static_cast<T>(0.);
for (int64_t k = 0; k < K_; ++k) {
out[k] = static_cast<T>(mismatch_value_);
}
out_wt[0] = static_cast<WT>(0.);
}
}
};
template <typename DeviceContext, typename T>
template <typename DeviceContext, typename T, typename WT>
struct NegTargetAssignFunctor {
void operator()(const platform::DeviceContext& ctx, const int* neg_indices,
const size_t* lod, const int num, const int num_prior_box,
const int background_label, int* out_label,
T* out_label_wt) const;
const size_t* lod, const int N, const int M, const int K,
const int mismatch_value, T* out, WT* out_wt) const;
};
template <typename DeviceContext, typename T>
template <typename DeviceContext, typename T, typename WT>
class TargetAssignKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* enc_gt_box = ctx.Input<framework::LoDTensor>("EncodedGTBBox");
auto* gt_label = ctx.Input<framework::LoDTensor>("GTScoreLabel");
auto* x = ctx.Input<framework::LoDTensor>("X");
auto* match_indices = ctx.Input<framework::Tensor>("MatchIndices");
auto* neg_indices = ctx.Input<framework::LoDTensor>("NegIndices");
auto* out_box = ctx.Output<framework::Tensor>("PredBBoxLabel");
auto* out_box_wt = ctx.Output<framework::Tensor>("PredBBoxWeight");
auto* out_label = ctx.Output<framework::Tensor>("PredScoreLabel");
auto* out_label_wt = ctx.Output<framework::Tensor>("PredScoreWeight");
PADDLE_ENFORCE_EQ(enc_gt_box->lod().size(), 1UL);
PADDLE_ENFORCE_EQ(gt_label->lod().size(), 1UL);
PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
auto* out = ctx.Output<framework::Tensor>("Out");
auto* out_wt = ctx.Output<framework::Tensor>("OutWeight");
int background_label = ctx.Attr<int>("background_label");
PADDLE_ENFORCE_EQ(x->lod().size(), 1UL);
int mismatch_value = ctx.Attr<int>("mismatch_value");
const T* box_data = enc_gt_box->data<T>();
const int* label_data = gt_label->data<int>();
const T* x_data = x->data<T>();
const int* match_idx_data = match_indices->data<int>();
const int* neg_idx_data = neg_indices->data<int>();
T* obox_data = out_box->mutable_data<T>(ctx.GetPlace());
T* obox_wt_data = out_box_wt->mutable_data<T>(ctx.GetPlace());
int* olabel_data = out_label->mutable_data<int>(ctx.GetPlace());
T* olabel_wt_data = out_label_wt->mutable_data<T>(ctx.GetPlace());
T* out_data = out->mutable_data<T>(ctx.GetPlace());
WT* out_wt_data = out_wt->mutable_data<WT>(ctx.GetPlace());
int64_t num = match_indices->dims()[0];
int64_t num_prior_box = match_indices->dims()[1];
int64_t n = match_indices->dims()[0];
int64_t m = match_indices->dims()[1];
int64_t p = x->dims()[1];
int64_t k = x->dims()[2];
auto gt_lod = enc_gt_box->lod().back();
auto gt_label_lod = gt_label->lod().back();
auto neg_lod = neg_indices->lod().back();
for (size_t i = 0; i < gt_lod.size(); ++i) {
PADDLE_ENFORCE_EQ(gt_lod.data()[i], gt_label_lod.data()[i]);
}
size_t* gt_lod_data = gt_lod.MutableData(ctx.GetPlace());
size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
auto x_lod = x->lod().back();
size_t* x_lod_data = x_lod.MutableData(ctx.GetPlace());
TargetAssignFunctor<T> functor(box_data, label_data, match_idx_data,
gt_lod_data, background_label, num,
num_prior_box, obox_data, obox_wt_data,
olabel_data, olabel_wt_data);
TargetAssignFunctor<T, WT> functor(x_data, match_idx_data, x_lod_data,
mismatch_value, n, m, p, k, out_data,
out_wt_data);
auto& device_ctx = ctx.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(device_ctx,
num * num_prior_box);
platform::ForRange<DeviceContext> for_range(device_ctx, n * m);
for_range(functor);
NegTargetAssignFunctor<DeviceContext, T> neg_trg_functor;
neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, num, num_prior_box,
background_label, olabel_data, olabel_wt_data);
auto* neg_indices = ctx.Input<framework::LoDTensor>("NegIndices");
if (neg_indices) {
PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
const int* neg_idx_data = neg_indices->data<int>();
auto neg_lod = neg_indices->lod().back();
size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
NegTargetAssignFunctor<DeviceContext, T, WT> neg_trg_functor;
neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k,
mismatch_value, out_data, out_wt_data);
}
}
};

@ -43,7 +43,7 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
neg_lod, background_label):
neg_lod, mismatch_value):
batch_size, num_prior = match_indices.shape
# init target bbox
@ -52,7 +52,7 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
trg_box_wt = np.zeros((batch_size, num_prior, 1)).astype('float32')
# init target label
trg_label = np.ones((batch_size, num_prior, 1)).astype('int32')
trg_label = trg_label * background_label
trg_label = trg_label * mismatch_value
# init weight for target label
trg_label_wt = np.zeros((batch_size, num_prior, 1)).astype('float32')
@ -65,53 +65,90 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
# target bbox
for v, c in zip(col_val + gt_start, col_ids[0].tolist()):
trg_box[i][c][:] = encoded_box[v][c][:]
# weight for target bbox
trg_box_wt[i][col_ids] = 1.0
trg_label[i][col_ids] = gt_label[col_val + gt_start]
trg_label_wt[i][col_ids] = 1.0
# set target label weight to 1.0 for the negative samples
neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]]
trg_label_wt[i][neg_ids] = 1.0
if neg_indices is not None:
neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]]
trg_label_wt[i][neg_ids] = 1.0
return trg_box, trg_box_wt, trg_label, trg_label_wt
class TestTargetAssginOp(OpTest):
class TestTargetAssginFloatType(OpTest):
def setUp(self):
self.op_type = "target_assign"
num_prior = 120
num_class = 21
gt_lod = [0, 5, 11, 23]
neg_lod = [0, 4, 7, 13]
mismatch_value = 0
batch_size = len(gt_lod) - 1
num_gt = gt_lod[-1]
encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
gt_label = np.random.randint(
num_class, size=(num_gt, 1)).astype('int32')
match_indices, neg_indices = gen_match_and_neg_indices(num_prior,
gt_lod, neg_lod)
out, out_wt, _, _ = target_assign(encoded_box, gt_label, match_indices,
neg_indices, gt_lod, neg_lod,
mismatch_value)
# assign regression targets
x = encoded_box
self.inputs = {
'X': (x, [gt_lod]),
'MatchIndices': match_indices,
}
self.attrs = {'mismatch_value': mismatch_value}
self.outputs = {
'Out': out,
'OutWeight': out_wt,
}
def test_check_output(self):
self.check_output()
class TestTargetAssginIntType(OpTest):
def setUp(self):
self.op_type = "target_assign"
num_prior = 120
num_class = 21
gt_lod = [0, 5, 11, 23]
neg_lod = [0, 4, 7, 13]
mismatch_value = 0
batch_size = len(gt_lod) - 1
num_gt = gt_lod[-1]
background_label = 0
encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
gt_label = np.random.randint(
num_class, size=(num_gt, 1)).astype('int32')
match_indices, neg_indices = gen_match_and_neg_indices(num_prior,
gt_lod, neg_lod)
trg_box, trg_box_wt, trg_label, trg_label_wt = target_assign(
encoded_box, gt_label, match_indices, neg_indices, gt_lod, neg_lod,
background_label)
_, _, out, out_wt, = target_assign(encoded_box, gt_label, match_indices,
neg_indices, gt_lod, neg_lod,
mismatch_value)
# assign cassification argets
x = np.reshape(gt_label, (num_gt, 1, 1))
self.inputs = {
'EncodedGTBBox': (encoded_box, [gt_lod]),
'GTScoreLabel': (gt_label, [gt_lod]),
'MatchIndices': (match_indices),
'X': (x, [gt_lod]),
'MatchIndices': match_indices,
'NegIndices': (neg_indices, [neg_lod]),
}
self.attrs = {'background_label': background_label}
self.attrs = {'mismatch_value': mismatch_value}
self.outputs = {
'PredBBoxLabel': (trg_box),
'PredBBoxWeight': (trg_box_wt),
'PredScoreLabel': (trg_label),
'PredScoreWeight': (trg_label_wt),
'Out': out,
'OutWeight': out_wt,
}
def test_check_output(self):

Loading…
Cancel
Save