Add backward CPU kernel. test=develop

7 years ago · a0284f6fbc
parent 36c46152e1
commit a0284f6fbc
7 changed files with 327 additions and 98 deletions
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -183,6 +183,7 @@ paddle.fluid.layers.similarity_focus ArgSpec(args=['input', 'axis', 'indexes', '
 paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
 paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
+paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'anchors', 'class_num', 'ignore_thresh', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
--- a/paddle/fluid/operators/yolov3_loss_op.cc
+++ b/paddle/fluid/operators/yolov3_loss_op.cc
@ -20,8 +20,6 @@ using framework::Tensor;
 class Yolov3LossOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"),
                   "Input(X) of Yolov3LossOp should not be null.");
@ -32,7 +30,6 @@ class Yolov3LossOp : public framework::OperatorWithKernel {

    auto dim_x = ctx->GetInputDim("X");
    auto dim_gt = ctx->GetInputDim("GTBox");
-    auto img_height = ctx->Attrs().Get<int>("img_height");
    auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors");
    auto class_num = ctx->Attrs().Get<int>("class_num");
    PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor.");
@ -43,8 +40,6 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
                      "+ class_num)).");
    PADDLE_ENFORCE_EQ(dim_gt.size(), 3, "Input(GTBox) should be a 3-D tensor");
    PADDLE_ENFORCE_EQ(dim_gt[2], 5, "Input(GTBox) dim[2] should be 5");
-    PADDLE_ENFORCE_GT(img_height, 0,
-                      "Attr(img_height) value should be greater then 0");
    PADDLE_ENFORCE_GT(anchors.size(), 0,
                      "Attr(anchors) length should be greater then 0.");
    PADDLE_ENFORCE_EQ(anchors.size() % 2, 0,
@ -87,13 +82,43 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<std::vector<int>>("anchors",
                              "The anchor width and height, "
                              "it will be parsed pair by pair.");
-    AddAttr<int>("img_height",
-                 "The input image height after crop of yolov3 network.");
    AddAttr<float>("ignore_thresh",
                   "The ignore threshold to ignore confidence loss.");
    AddComment(R"DOC(
         This operator generate yolov3 loss by given predict result and ground
         truth boxes.
+         
+         The output of previous network is in shape [N, C, H, W], while H and W
+         should be the same, specify the grid size, each grid point predict given
+         number boxes, this given number is specified by anchors, it should be 
+         half anchors length, which following will be represented as S. In the 
+         second dimention(the channel dimention), C should be S * (class_num + 5),
+         class_num is the box categoriy number of source dataset(such as coco), 
+         so in the second dimention, stores 4 box location coordinates x, y, w, h 
+         and confidence score of the box and class one-hot key of each anchor box.
+
+         While the 4 location coordinates if $$tx, ty, tw, th$$, the box predictions
+         correspnd to:
+
+         $$
+         b_x = \sigma(t_x) + c_x
+         b_y = \sigma(t_y) + c_y
+         b_w = p_w e^{t_w}
+         b_h = p_h e^{t_h}
+         $$
+
+         While $$c_x, c_y$$ is the left top corner of current grid and $$p_w, p_h$$
+         is specified by anchors.
+
+         As for confidence score, it is the logistic regression value of IoU between
+         anchor boxes and ground truth boxes, the score of the anchor box which has 
+         the max IoU should be 1, and if the anchor box has IoU bigger then ignore 
+         thresh, the confidence score loss of this anchor box will be ignored.
+
+         Therefore, the yolov3 loss consist of three major parts, box location loss,
+         confidence score loss, and classification loss. The MSE loss is used for 
+         box location, and binary cross entropy loss is used for confidence score 
+         loss and classification loss.
         )DOC");
  }
 };
@ -101,8 +126,6 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
 class Yolov3LossOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
  void InferShape(framework::InferShapeContext* ctx) const override {
    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")),
@ -113,6 +136,7 @@ class Yolov3LossOpGrad : public framework::OperatorWithKernel {
    }
  }

+ protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
@ -120,12 +144,32 @@ class Yolov3LossOpGrad : public framework::OperatorWithKernel {
  }
 };

+class Yolov3LossGradMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* op = new framework::OpDesc();
+    op->SetType("yolov3_loss_grad");
+    op->SetInput("X", Input("X"));
+    op->SetInput("GTBox", Input("GTBox"));
+    op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
+
+    op->SetAttrMap(Attrs());
+
+    op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    op->SetOutput(framework::GradVarName("GTBox"), {});
+    return std::unique_ptr<framework::OpDesc>(op);
+  }
+};
+
 }  // namespace operators
 }  // namespace paddle

 namespace ops = paddle::operators;
 REGISTER_OPERATOR(yolov3_loss, ops::Yolov3LossOp, ops::Yolov3LossOpMaker,
-                  paddle::framework::DefaultGradOpDescMaker<true>);
+                  ops::Yolov3LossGradMaker);
 REGISTER_OPERATOR(yolov3_loss_grad, ops::Yolov3LossOpGrad);
 REGISTER_OP_CPU_KERNEL(
    yolov3_loss,
--- a/paddle/fluid/operators/yolov3_loss_op.cu
+++ b/paddle/fluid/operators/yolov3_loss_op.cu
@ -17,7 +17,7 @@
 namespace ops = paddle::operators;
 REGISTER_OP_CUDA_KERNEL(
    yolov3_loss,
-    ops::Yolov3LossOpKernel<paddle::platform::CUDADeviceContext, float>);
+    ops::Yolov3LossKernel<paddle::platform::CUDADeviceContext, float>);
 REGISTER_OP_CUDA_KERNEL(
    yolov3_loss_grad,
-    ops::Yolov3LossGradOpKernel<paddle::platform::CUDADeviceContext, float>);
+    ops::Yolov3LossGradKernel<paddle::platform::CUDADeviceContext, float>);
--- a/paddle/fluid/operators/yolov3_loss_op.h
+++ b/paddle/fluid/operators/yolov3_loss_op.h
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@ -8244,14 +8244,55 @@ def log_loss(input, label, epsilon=1e-4, name=None):
    return loss


-def yolov3_loss(x, gtbox, img_height, anchors, ignore_thresh, name=None):
+@templatedoc(op_type="yolov3_loss")
+def yolov3_loss(x, gtbox, anchors, class_num, ignore_thresh, name=None):
    """
-    **YOLOv3 Loss Layer**
+    ${comment}
+
+    Args:
+        x (Variable): ${x_comment}
+        gtbox (Variable): groud truth boxes, shoulb be in shape of [N, B, 5],
+                          in the third dimenstion, class_id, x, y, w, h should
+                          be stored and x, y, w, h should be relative valud of
+                          input image.
+        anchors (list|tuple): ${anchors_comment}
+        class_num (int): ${class_num_comment}
+        ignore_thresh (float): ${ignore_thresh_comment}
+        name (string): the name of yolov3 loss

-    This layer 
+    Returns:
+        Variable: A 1-D tensor with shape [1], the value of yolov3 loss
+
+    Raises:
+        TypeError: Input x of yolov3_loss must be Variable
+        TypeError: Input gtbox of yolov3_loss must be Variable"
+        TypeError: Attr anchors of yolov3_loss must be list or tuple
+        TypeError: Attr class_num of yolov3_loss must be an integer
+        TypeError: Attr ignore_thresh of yolov3_loss must be a float number
+
+    Examples:
+    .. code-block:: python
+
+        x = fluid.layers.data(name='x', shape=[10, 255, 13, 13], dtype='float32')
+        gtbox = fluid.layers.data(name='gtbox', shape=[10, 6, 5], dtype='float32')
+        anchors = [10, 13, 16, 30, 33, 23]
+        loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, class_num=80
+                                        anchors=anchors, ignore_thresh=0.5)
    """
    helper = LayerHelper('yolov3_loss', **locals())

+    if not isinstance(x, Variable):
+        raise TypeError("Input x of yolov3_loss must be Variable")
+    if not isinstance(gtbox, Variable):
+        raise TypeError("Input gtbox of yolov3_loss must be Variable")
+    if not isinstance(anchors, list) and not isinstance(anchors, tuple):
+        raise TypeError("Attr anchors of yolov3_loss must be list or tuple")
+    if not isinstance(class_num, int):
+        raise TypeError("Attr class_num of yolov3_loss must be an integer")
+    if not isinstance(ignore_thresh, float):
+        raise TypeError(
+            "Attr ignore_thresh of yolov3_loss must be a float number")
+
    if name is None:
        loss = helper.create_variable_for_type_inference(dtype=x.dtype)
    else:
@ -8264,8 +8305,8 @@ def yolov3_loss(x, gtbox, img_height, anchors, ignore_thresh, name=None):
                "GTBox": gtbox},
        outputs={'Loss': loss},
        attrs={
-            "img_height": img_height,
            "anchors": anchors,
+            "class_num": class_num,
            "ignore_thresh": ignore_thresh,
        })
    return loss
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@ -911,6 +911,15 @@ class TestBook(unittest.TestCase):
            self.assertIsNotNone(data_1)
        print(str(program))

+    def test_yolov3_loss(self):
+        program = Program()
+        with program_guard(program):
+            x = layers.data(name='x', shape=[30, 7, 7], dtype='float32')
+            gtbox = layers.data(name='gtbox', shape=[10, 5], dtype='float32')
+            loss = layers.yolov3_loss(x, gtbox, [10, 13, 30, 13], 10, 0.5)
+
+            self.assertIsNotNone(loss)
+
    def test_bilinear_tensor_product_layer(self):
        program = Program()
        with program_guard(program):
--- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
@ -12,10 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import division
+
 import unittest
 import numpy as np
 from op_test import OpTest

+from paddle.fluid import core
+

 def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))
@ -65,10 +69,9 @@ def box_iou(box1, box2):
 def build_target(gtboxs, attrs, grid_size):
    n, b, _ = gtboxs.shape
    ignore_thresh = attrs["ignore_thresh"]
-    img_height = attrs["img_height"]
    anchors = attrs["anchors"]
    class_num = attrs["class_num"]
-    an_num = len(anchors) / 2
+    an_num = len(anchors) // 2
    obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
    noobj_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32')
    tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
@ -120,7 +123,7 @@ def build_target(gtboxs, attrs, grid_size):

 def YoloV3Loss(x, gtbox, attrs):
    n, c, h, w = x.shape
-    an_num = len(attrs['anchors']) / 2
+    an_num = len(attrs['anchors']) // 2
    class_num = attrs["class_num"]
    x = x.reshape((n, an_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
    pred_x = sigmoid(x[:, :, :, :, 0])
@ -144,13 +147,6 @@ def YoloV3Loss(x, gtbox, attrs):
                          noobj_mask)
    loss_class = bce(pred_cls * obj_mask_expand, tcls * obj_mask_expand,
                     obj_mask_expand)
-    # print "loss_x: ", loss_x
-    # print "loss_y: ", loss_y
-    # print "loss_w: ", loss_w
-    # print "loss_h: ", loss_h
-    # print "loss_conf_obj: ", loss_conf_obj
-    # print "loss_conf_noobj: ", loss_conf_noobj
-    # print "loss_class: ", loss_class

    return loss_x + loss_y + loss_w + loss_h + loss_conf_obj + loss_conf_noobj + loss_class

@ -165,29 +161,35 @@ class TestYolov3LossOp(OpTest):
                                           self.gtbox_shape[:2])

        self.attrs = {
-            "img_height": self.img_height,
            "anchors": self.anchors,
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
        }

        self.inputs = {'X': x, 'GTBox': gtbox}
-        self.outputs = {'Loss': np.array([YoloV3Loss(x, gtbox, self.attrs)])}
-        print self.outputs
+        self.outputs = {
+            'Loss':
+            np.array([YoloV3Loss(x, gtbox, self.attrs)]).astype('float32')
+        }

    def test_check_output(self):
-        self.check_output(atol=1e-3)
+        place = core.CPUPlace()
+        self.check_output_with_place(place, atol=1e-3)

-    # def test_check_grad_normal(self):
-    #     self.check_grad(['X', 'Grid'], 'Output', max_relative_error=0.61)
+    def test_check_grad_ignore_gtbox(self):
+        place = core.CPUPlace()
+        self.check_grad_with_place(
+            place, ['X'],
+            'Loss',
+            no_grad_set=set("GTBox"),
+            max_relative_error=0.1)

    def initTestCase(self):
-        self.img_height = 608
-        self.anchors = [10, 13, 16, 30, 33, 23]
+        self.anchors = [10, 13, 12, 12]
        self.class_num = 10
        self.ignore_thresh = 0.5
-        self.x_shape = (5, len(self.anchors) / 2 * (5 + self.class_num), 7, 7)
-        self.gtbox_shape = (5, 10, 5)
+        self.x_shape = (5, len(self.anchors) // 2 * (5 + self.class_num), 7, 7)
+        self.gtbox_shape = (5, 5, 5)


 if __name__ == "__main__":