|
|
|
@ -36,11 +36,6 @@ static T SCE(T x, T label) {
|
|
|
|
|
return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
static T L1Loss(T x, T y) {
|
|
|
|
|
return std::abs(y - x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
static T L2Loss(T x, T y) {
|
|
|
|
|
return 0.5 * (y - x) * (y - x);
|
|
|
|
@ -51,11 +46,6 @@ static T SCEGrad(T x, T label) {
|
|
|
|
|
return 1.0 / (1.0 + std::exp(-x)) - label;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
static T L1LossGrad(T x, T y) {
|
|
|
|
|
return x > y ? 1.0 : -1.0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
static T L2LossGrad(T x, T y) {
|
|
|
|
|
return x - y;
|
|
|
|
@ -131,13 +121,13 @@ template <typename T>
|
|
|
|
|
static void CalcBoxLocationLoss(T* loss, const T* input, Box<T> gt,
|
|
|
|
|
std::vector<int> anchors, int an_idx,
|
|
|
|
|
int box_idx, int gi, int gj, int grid_size,
|
|
|
|
|
int input_size, int stride, T score) {
|
|
|
|
|
int input_size, int stride) {
|
|
|
|
|
T tx = gt.x * grid_size - gi;
|
|
|
|
|
T ty = gt.y * grid_size - gj;
|
|
|
|
|
T tw = std::log(gt.w * input_size / anchors[2 * an_idx]);
|
|
|
|
|
T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]);
|
|
|
|
|
|
|
|
|
|
T scale = (2.0 - gt.w * gt.h) * score;
|
|
|
|
|
T scale = (2.0 - gt.w * gt.h);
|
|
|
|
|
loss[0] += SCE<T>(input[box_idx], tx) * scale;
|
|
|
|
|
loss[0] += SCE<T>(input[box_idx + stride], ty) * scale;
|
|
|
|
|
loss[0] += L2Loss<T>(input[box_idx + 2 * stride], tw) * scale;
|
|
|
|
@ -148,14 +138,13 @@ template <typename T>
|
|
|
|
|
static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input,
|
|
|
|
|
Box<T> gt, std::vector<int> anchors,
|
|
|
|
|
int an_idx, int box_idx, int gi, int gj,
|
|
|
|
|
int grid_size, int input_size, int stride,
|
|
|
|
|
T score) {
|
|
|
|
|
int grid_size, int input_size, int stride) {
|
|
|
|
|
T tx = gt.x * grid_size - gi;
|
|
|
|
|
T ty = gt.y * grid_size - gj;
|
|
|
|
|
T tw = std::log(gt.w * input_size / anchors[2 * an_idx]);
|
|
|
|
|
T th = std::log(gt.h * input_size / anchors[2 * an_idx + 1]);
|
|
|
|
|
|
|
|
|
|
T scale = (2.0 - gt.w * gt.h) * score;
|
|
|
|
|
T scale = (2.0 - gt.w * gt.h);
|
|
|
|
|
input_grad[box_idx] = SCEGrad<T>(input[box_idx], tx) * scale * loss;
|
|
|
|
|
input_grad[box_idx + stride] =
|
|
|
|
|
SCEGrad<T>(input[box_idx + stride], ty) * scale * loss;
|
|
|
|
@ -168,11 +157,10 @@ static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input,
|
|
|
|
|
template <typename T>
|
|
|
|
|
static inline void CalcLabelLoss(T* loss, const T* input, const int index,
|
|
|
|
|
const int label, const int class_num,
|
|
|
|
|
const int stride, const T pos, const T neg,
|
|
|
|
|
T score) {
|
|
|
|
|
const int stride) {
|
|
|
|
|
for (int i = 0; i < class_num; i++) {
|
|
|
|
|
T pred = input[index + i * stride];
|
|
|
|
|
loss[0] += SCE<T>(pred, (i == label) ? pos : neg) * score;
|
|
|
|
|
loss[0] += SCE<T>(pred, (i == label) ? 1.0 : 0.0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -180,12 +168,11 @@ template <typename T>
|
|
|
|
|
static inline void CalcLabelLossGrad(T* input_grad, const T loss,
|
|
|
|
|
const T* input, const int index,
|
|
|
|
|
const int label, const int class_num,
|
|
|
|
|
const int stride, const T pos, const T neg,
|
|
|
|
|
T score) {
|
|
|
|
|
const int stride) {
|
|
|
|
|
for (int i = 0; i < class_num; i++) {
|
|
|
|
|
T pred = input[index + i * stride];
|
|
|
|
|
input_grad[index + i * stride] =
|
|
|
|
|
SCEGrad<T>(pred, (i == label) ? pos : neg) * score * loss;
|
|
|
|
|
SCEGrad<T>(pred, (i == label) ? 1.0 : 0.0) * loss;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -201,7 +188,7 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness,
|
|
|
|
|
T obj = objness[k * w + l];
|
|
|
|
|
if (obj > 1e-5) {
|
|
|
|
|
// positive sample: obj = mixup score
|
|
|
|
|
loss[i] += SCE<T>(input[k * w + l], 1.0) * obj;
|
|
|
|
|
loss[i] += SCE<T>(input[k * w + l], 1.0);
|
|
|
|
|
} else if (obj > -0.5) {
|
|
|
|
|
// negetive sample: obj = 0
|
|
|
|
|
loss[i] += SCE<T>(input[k * w + l], 0.0);
|
|
|
|
@ -226,8 +213,7 @@ static inline void CalcObjnessLossGrad(T* input_grad, const T* loss,
|
|
|
|
|
for (int l = 0; l < w; l++) {
|
|
|
|
|
T obj = objness[k * w + l];
|
|
|
|
|
if (obj > 1e-5) {
|
|
|
|
|
input_grad[k * w + l] =
|
|
|
|
|
SCEGrad<T>(input[k * w + l], 1.0) * obj * loss[i];
|
|
|
|
|
input_grad[k * w + l] = SCEGrad<T>(input[k * w + l], 1.0) * loss[i];
|
|
|
|
|
} else if (obj > -0.5) {
|
|
|
|
|
input_grad[k * w + l] = SCEGrad<T>(input[k * w + l], 0.0) * loss[i];
|
|
|
|
|
}
|
|
|
|
@ -263,7 +249,6 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
|
|
|
|
|
auto* input = ctx.Input<Tensor>("X");
|
|
|
|
|
auto* gt_box = ctx.Input<Tensor>("GTBox");
|
|
|
|
|
auto* gt_label = ctx.Input<Tensor>("GTLabel");
|
|
|
|
|
auto* gt_score = ctx.Input<Tensor>("GTScore");
|
|
|
|
|
auto* loss = ctx.Output<Tensor>("Loss");
|
|
|
|
|
auto* objness_mask = ctx.Output<Tensor>("ObjectnessMask");
|
|
|
|
|
auto* gt_match_mask = ctx.Output<Tensor>("GTMatchMask");
|
|
|
|
@ -272,7 +257,6 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
|
|
|
|
|
int class_num = ctx.Attr<int>("class_num");
|
|
|
|
|
float ignore_thresh = ctx.Attr<float>("ignore_thresh");
|
|
|
|
|
int downsample = ctx.Attr<int>("downsample");
|
|
|
|
|
bool use_label_smooth = ctx.Attr<bool>("use_label_smooth");
|
|
|
|
|
|
|
|
|
|
const int n = input->dims()[0];
|
|
|
|
|
const int h = input->dims()[2];
|
|
|
|
@ -285,17 +269,9 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
|
|
|
|
|
const int stride = h * w;
|
|
|
|
|
const int an_stride = (class_num + 5) * stride;
|
|
|
|
|
|
|
|
|
|
T label_pos = 1.0;
|
|
|
|
|
T label_neg = 0.0;
|
|
|
|
|
if (use_label_smooth) {
|
|
|
|
|
label_pos = 1.0 - 1.0 / static_cast<T>(class_num);
|
|
|
|
|
label_neg = 1.0 / static_cast<T>(class_num);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const T* input_data = input->data<T>();
|
|
|
|
|
const T* gt_box_data = gt_box->data<T>();
|
|
|
|
|
const int* gt_label_data = gt_label->data<int>();
|
|
|
|
|
const T* gt_score_data = gt_score->data<T>();
|
|
|
|
|
T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace());
|
|
|
|
|
memset(loss_data, 0, loss->numel() * sizeof(T));
|
|
|
|
|
T* obj_mask_data =
|
|
|
|
@ -376,20 +352,19 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
|
|
|
|
|
int mask_idx = GetMaskIndex(anchor_mask, best_n);
|
|
|
|
|
gt_match_mask_data[i * b + t] = mask_idx;
|
|
|
|
|
if (mask_idx >= 0) {
|
|
|
|
|
T score = gt_score_data[i * b + t];
|
|
|
|
|
int box_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
|
|
|
|
|
an_stride, stride, 0);
|
|
|
|
|
CalcBoxLocationLoss<T>(loss_data + i, input_data, gt, anchors, best_n,
|
|
|
|
|
box_idx, gi, gj, h, input_size, stride, score);
|
|
|
|
|
box_idx, gi, gj, h, input_size, stride);
|
|
|
|
|
|
|
|
|
|
int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi;
|
|
|
|
|
obj_mask_data[obj_idx] = score;
|
|
|
|
|
obj_mask_data[obj_idx] = 1.0;
|
|
|
|
|
|
|
|
|
|
int label = gt_label_data[i * b + t];
|
|
|
|
|
int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
|
|
|
|
|
an_stride, stride, 5);
|
|
|
|
|
CalcLabelLoss<T>(loss_data + i, input_data, label_idx, label,
|
|
|
|
|
class_num, stride, label_pos, label_neg, score);
|
|
|
|
|
class_num, stride);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -406,7 +381,6 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
auto* input = ctx.Input<Tensor>("X");
|
|
|
|
|
auto* gt_box = ctx.Input<Tensor>("GTBox");
|
|
|
|
|
auto* gt_label = ctx.Input<Tensor>("GTLabel");
|
|
|
|
|
auto* gt_score = ctx.Input<Tensor>("GTScore");
|
|
|
|
|
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
|
|
|
|
|
auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
|
|
|
|
|
auto* objness_mask = ctx.Input<Tensor>("ObjectnessMask");
|
|
|
|
@ -415,7 +389,6 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
auto anchor_mask = ctx.Attr<std::vector<int>>("anchor_mask");
|
|
|
|
|
int class_num = ctx.Attr<int>("class_num");
|
|
|
|
|
int downsample = ctx.Attr<int>("downsample");
|
|
|
|
|
bool use_label_smooth = ctx.Attr<bool>("use_label_smooth");
|
|
|
|
|
|
|
|
|
|
const int n = input_grad->dims()[0];
|
|
|
|
|
const int c = input_grad->dims()[1];
|
|
|
|
@ -428,17 +401,9 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
const int stride = h * w;
|
|
|
|
|
const int an_stride = (class_num + 5) * stride;
|
|
|
|
|
|
|
|
|
|
T label_pos = 1.0;
|
|
|
|
|
T label_neg = 0.0;
|
|
|
|
|
if (use_label_smooth) {
|
|
|
|
|
label_pos = 1.0 - 1.0 / static_cast<T>(class_num);
|
|
|
|
|
label_neg = 1.0 / static_cast<T>(class_num);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const T* input_data = input->data<T>();
|
|
|
|
|
const T* gt_box_data = gt_box->data<T>();
|
|
|
|
|
const int* gt_label_data = gt_label->data<int>();
|
|
|
|
|
const T* gt_score_data = gt_score->data<T>();
|
|
|
|
|
const T* loss_grad_data = loss_grad->data<T>();
|
|
|
|
|
const T* obj_mask_data = objness_mask->data<T>();
|
|
|
|
|
const int* gt_match_mask_data = gt_match_mask->data<int>();
|
|
|
|
@ -450,24 +415,21 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
|
|
|
|
|
for (int t = 0; t < b; t++) {
|
|
|
|
|
int mask_idx = gt_match_mask_data[i * b + t];
|
|
|
|
|
if (mask_idx >= 0) {
|
|
|
|
|
T score = gt_score_data[i * b + t];
|
|
|
|
|
Box<T> gt = GetGtBox(gt_box_data, i, b, t);
|
|
|
|
|
int gi = static_cast<int>(gt.x * w);
|
|
|
|
|
int gj = static_cast<int>(gt.y * h);
|
|
|
|
|
|
|
|
|
|
int box_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
|
|
|
|
|
an_stride, stride, 0);
|
|
|
|
|
CalcBoxLocationLossGrad<T>(input_grad_data, loss_grad_data[i],
|
|
|
|
|
input_data, gt, anchors,
|
|
|
|
|
anchor_mask[mask_idx], box_idx, gi, gj, h,
|
|
|
|
|
input_size, stride, score);
|
|
|
|
|
CalcBoxLocationLossGrad<T>(
|
|
|
|
|
input_grad_data, loss_grad_data[i], input_data, gt, anchors,
|
|
|
|
|
anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride);
|
|
|
|
|
|
|
|
|
|
int label = gt_label_data[i * b + t];
|
|
|
|
|
int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
|
|
|
|
|
an_stride, stride, 5);
|
|
|
|
|
CalcLabelLossGrad<T>(input_grad_data, loss_grad_data[i], input_data,
|
|
|
|
|
label_idx, label, class_num, stride, label_pos,
|
|
|
|
|
label_neg, score);
|
|
|
|
|
label_idx, label, class_num, stride);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|