|
|
|
@ -9,7 +9,7 @@
|
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
|
limitations under the License. */
|
|
|
|
|
|
|
|
|
|
#include "paddle/fluid/operators/yolov3_loss_op.h"
|
|
|
|
|
#include "paddle/fluid/operators/detection/yolov3_loss_op.h"
|
|
|
|
|
#include "paddle/fluid/framework/op_registry.h"
|
|
|
|
|
|
|
|
|
|
namespace paddle {
|
|
|
|
@ -29,23 +29,33 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
|
|
|
|
|
"Input(GTLabel) of Yolov3LossOp should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(ctx->HasOutput("Loss"),
|
|
|
|
|
"Output(Loss) of Yolov3LossOp should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(
|
|
|
|
|
ctx->HasOutput("ObjectnessMask"),
|
|
|
|
|
"Output(ObjectnessMask) of Yolov3LossOp should not be null.");
|
|
|
|
|
PADDLE_ENFORCE(ctx->HasOutput("GTMatchMask"),
|
|
|
|
|
"Output(GTMatchMask) of Yolov3LossOp should not be null.");
|
|
|
|
|
|
|
|
|
|
auto dim_x = ctx->GetInputDim("X");
|
|
|
|
|
auto dim_gtbox = ctx->GetInputDim("GTBox");
|
|
|
|
|
auto dim_gtlabel = ctx->GetInputDim("GTLabel");
|
|
|
|
|
auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors");
|
|
|
|
|
int anchor_num = anchors.size() / 2;
|
|
|
|
|
auto anchor_mask = ctx->Attrs().Get<std::vector<int>>("anchor_mask");
|
|
|
|
|
int mask_num = anchor_mask.size();
|
|
|
|
|
auto class_num = ctx->Attrs().Get<int>("class_num");
|
|
|
|
|
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor.");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3],
|
|
|
|
|
"Input(X) dim[3] and dim[4] should be euqal.");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num),
|
|
|
|
|
"Input(X) dim[1] should be equal to (anchor_number * (5 "
|
|
|
|
|
"+ class_num)).");
|
|
|
|
|
PADDLE_ENFORCE_EQ(
|
|
|
|
|
dim_x[1], mask_num * (5 + class_num),
|
|
|
|
|
"Input(X) dim[1] should be equal to (anchor_mask_number * (5 "
|
|
|
|
|
"+ class_num)).");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3,
|
|
|
|
|
"Input(GTBox) should be a 3-D tensor");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_gtbox[2], 4, "Input(GTBox) dim[2] should be 5");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_gtlabel.size(), 2,
|
|
|
|
|
"Input(GTBox) should be a 2-D tensor");
|
|
|
|
|
"Input(GTLabel) should be a 2-D tensor");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_gtlabel[0], dim_gtbox[0],
|
|
|
|
|
"Input(GTBox) and Input(GTLabel) dim[0] should be same");
|
|
|
|
|
PADDLE_ENFORCE_EQ(dim_gtlabel[1], dim_gtbox[1],
|
|
|
|
@ -54,11 +64,22 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
|
|
|
|
|
"Attr(anchors) length should be greater then 0.");
|
|
|
|
|
PADDLE_ENFORCE_EQ(anchors.size() % 2, 0,
|
|
|
|
|
"Attr(anchors) length should be even integer.");
|
|
|
|
|
for (size_t i = 0; i < anchor_mask.size(); i++) {
|
|
|
|
|
PADDLE_ENFORCE_LT(
|
|
|
|
|
anchor_mask[i], anchor_num,
|
|
|
|
|
"Attr(anchor_mask) should not crossover Attr(anchors).");
|
|
|
|
|
}
|
|
|
|
|
PADDLE_ENFORCE_GT(class_num, 0,
|
|
|
|
|
"Attr(class_num) should be an integer greater then 0.");
|
|
|
|
|
|
|
|
|
|
std::vector<int64_t> dim_out({1});
|
|
|
|
|
std::vector<int64_t> dim_out({dim_x[0]});
|
|
|
|
|
ctx->SetOutputDim("Loss", framework::make_ddim(dim_out));
|
|
|
|
|
|
|
|
|
|
std::vector<int64_t> dim_obj_mask({dim_x[0], mask_num, dim_x[2], dim_x[3]});
|
|
|
|
|
ctx->SetOutputDim("ObjectnessMask", framework::make_ddim(dim_obj_mask));
|
|
|
|
|
|
|
|
|
|
std::vector<int64_t> dim_gt_match_mask({dim_gtbox[0], dim_gtbox[1]});
|
|
|
|
|
ctx->SetOutputDim("GTMatchMask", framework::make_ddim(dim_gt_match_mask));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
@ -73,11 +94,11 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
|
|
|
public:
|
|
|
|
|
void Make() override {
|
|
|
|
|
AddInput("X",
|
|
|
|
|
"The input tensor of YOLO v3 loss operator, "
|
|
|
|
|
"The input tensor of YOLOv3 loss operator, "
|
|
|
|
|
"This is a 4-D tensor with shape of [N, C, H, W]."
|
|
|
|
|
"H and W should be same, and the second dimention(C) stores"
|
|
|
|
|
"box locations, confidence score and classification one-hot"
|
|
|
|
|
"key of each anchor box");
|
|
|
|
|
"keys of each anchor box");
|
|
|
|
|
AddInput("GTBox",
|
|
|
|
|
"The input tensor of ground truth boxes, "
|
|
|
|
|
"This is a 3-D tensor with shape of [N, max_box_num, 5], "
|
|
|
|
@ -89,32 +110,39 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
|
|
|
AddInput("GTLabel",
|
|
|
|
|
"The input tensor of ground truth label, "
|
|
|
|
|
"This is a 2-D tensor with shape of [N, max_box_num], "
|
|
|
|
|
"and each element shoudl be an integer to indicate the "
|
|
|
|
|
"and each element should be an integer to indicate the "
|
|
|
|
|
"box class id.");
|
|
|
|
|
AddOutput("Loss",
|
|
|
|
|
"The output yolov3 loss tensor, "
|
|
|
|
|
"This is a 1-D tensor with shape of [1]");
|
|
|
|
|
"This is a 1-D tensor with shape of [N]");
|
|
|
|
|
AddOutput("ObjectnessMask",
|
|
|
|
|
"This is an intermediate tensor with shape of [N, M, H, W], "
|
|
|
|
|
"M is the number of anchor masks. This parameter caches the "
|
|
|
|
|
"mask for calculate objectness loss in gradient kernel.")
|
|
|
|
|
.AsIntermediate();
|
|
|
|
|
AddOutput("GTMatchMask",
|
|
|
|
|
"This is an intermediate tensor with shape of [N, B], "
|
|
|
|
|
"B is the max box number of GT boxes. This parameter caches "
|
|
|
|
|
"matched mask index of each GT boxes for gradient calculate.")
|
|
|
|
|
.AsIntermediate();
|
|
|
|
|
|
|
|
|
|
AddAttr<int>("class_num", "The number of classes to predict.");
|
|
|
|
|
AddAttr<std::vector<int>>("anchors",
|
|
|
|
|
"The anchor width and height, "
|
|
|
|
|
"it will be parsed pair by pair.");
|
|
|
|
|
"it will be parsed pair by pair.")
|
|
|
|
|
.SetDefault(std::vector<int>{});
|
|
|
|
|
AddAttr<std::vector<int>>("anchor_mask",
|
|
|
|
|
"The mask index of anchors used in "
|
|
|
|
|
"current YOLOv3 loss calculation.")
|
|
|
|
|
.SetDefault(std::vector<int>{});
|
|
|
|
|
AddAttr<int>("downsample_ratio",
|
|
|
|
|
"The downsample ratio from network input to YOLOv3 loss "
|
|
|
|
|
"input, so 32, 16, 8 should be set for the first, second, "
|
|
|
|
|
"and thrid YOLOv3 loss operators.")
|
|
|
|
|
.SetDefault(32);
|
|
|
|
|
AddAttr<float>("ignore_thresh",
|
|
|
|
|
"The ignore threshold to ignore confidence loss.");
|
|
|
|
|
AddAttr<float>("loss_weight_xy", "The weight of x, y location loss.")
|
|
|
|
|
.SetDefault(1.0);
|
|
|
|
|
AddAttr<float>("loss_weight_wh", "The weight of w, h location loss.")
|
|
|
|
|
.SetDefault(1.0);
|
|
|
|
|
AddAttr<float>(
|
|
|
|
|
"loss_weight_conf_target",
|
|
|
|
|
"The weight of confidence score loss in locations with target object.")
|
|
|
|
|
.SetDefault(1.0);
|
|
|
|
|
AddAttr<float>("loss_weight_conf_notarget",
|
|
|
|
|
"The weight of confidence score loss in locations without "
|
|
|
|
|
"target object.")
|
|
|
|
|
.SetDefault(1.0);
|
|
|
|
|
AddAttr<float>("loss_weight_class", "The weight of classification loss.")
|
|
|
|
|
.SetDefault(1.0);
|
|
|
|
|
"The ignore threshold to ignore confidence loss.")
|
|
|
|
|
.SetDefault(0.7);
|
|
|
|
|
AddComment(R"DOC(
|
|
|
|
|
This operator generate yolov3 loss by given predict result and ground
|
|
|
|
|
truth boxes.
|
|
|
|
@ -147,17 +175,28 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
|
|
|
|
|
thresh, the confidence score loss of this anchor box will be ignored.
|
|
|
|
|
|
|
|
|
|
Therefore, the yolov3 loss consist of three major parts, box location loss,
|
|
|
|
|
confidence score loss, and classification loss. The MSE loss is used for
|
|
|
|
|
box location, and binary cross entropy loss is used for confidence score
|
|
|
|
|
loss and classification loss.
|
|
|
|
|
confidence score loss, and classification loss. The L2 loss is used for
|
|
|
|
|
box coordinates (w, h), and sigmoid cross entropy loss is used for box
|
|
|
|
|
coordinates (x, y), confidence score loss and classification loss.
|
|
|
|
|
|
|
|
|
|
Each groud truth box find a best matching anchor box in all anchors,
|
|
|
|
|
prediction of this anchor box will incur all three parts of losses, and
|
|
|
|
|
prediction of anchor boxes with no GT box matched will only incur objectness
|
|
|
|
|
loss.
|
|
|
|
|
|
|
|
|
|
In order to trade off box coordinate losses between big boxes and small
|
|
|
|
|
boxes, box coordinate losses will be mutiplied by scale weight, which is
|
|
|
|
|
calculated as follow.
|
|
|
|
|
|
|
|
|
|
$$
|
|
|
|
|
weight_{box} = 2.0 - t_w * t_h
|
|
|
|
|
$$
|
|
|
|
|
|
|
|
|
|
Final loss will be represented as follow.
|
|
|
|
|
|
|
|
|
|
$$
|
|
|
|
|
loss = \loss_weight_{xy} * loss_{xy} + \loss_weight_{wh} * loss_{wh}
|
|
|
|
|
+ \loss_weight_{conf_target} * loss_{conf_target}
|
|
|
|
|
+ \loss_weight_{conf_notarget} * loss_{conf_notarget}
|
|
|
|
|
+ \loss_weight_{class} * loss_{class}
|
|
|
|
|
loss = (loss_{xy} + loss_{wh}) * weight_{box}
|
|
|
|
|
+ loss_{conf} + loss_{class}
|
|
|
|
|
$$
|
|
|
|
|
)DOC");
|
|
|
|
|
}
|
|
|
|
@ -196,6 +235,8 @@ class Yolov3LossGradMaker : public framework::SingleGradOpDescMaker {
|
|
|
|
|
op->SetInput("GTBox", Input("GTBox"));
|
|
|
|
|
op->SetInput("GTLabel", Input("GTLabel"));
|
|
|
|
|
op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
|
|
|
|
|
op->SetInput("ObjectnessMask", Output("ObjectnessMask"));
|
|
|
|
|
op->SetInput("GTMatchMask", Output("GTMatchMask"));
|
|
|
|
|
|
|
|
|
|
op->SetAttrMap(Attrs());
|
|
|
|
|
|