update detection_map operator

8 years ago · 26f03ea13d
parent 67cbb3e3b6
commit 26f03ea13d
7 changed files with 231 additions and 355 deletions
--- a/paddle/operators/detection_map_op.cc
+++ b/paddle/operators/detection_map_op.cc
@ -24,6 +24,29 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Detection"),
+                   "Input(Detection) of DetectionMAPOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Label"),
+                   "Input(Label) of DetectionMAPOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("MAP"),
+                   "Output(MAP) of DetectionMAPOp should not be null.");
+
+    auto det_dims = ctx->GetInputDim("Detection");
+    PADDLE_ENFORCE_EQ(det_dims.size(), 2UL,
+                      "The rank of Input(Detection) must be 2, "
+                      "the shape is [N, 6].");
+    PADDLE_ENFORCE_EQ(det_dims[1], 6UL,
+                      "The shape is of Input(Detection) [N, 6].");
+    auto label_dims = ctx->GetInputDim("Label");
+    PADDLE_ENFORCE_EQ(label_dims.size(), 2UL,
+                      "The rank of Input(Label) must be 2, "
+                      "the shape is [N, 6].");
+    PADDLE_ENFORCE_EQ(label_dims[1], 6UL,
+                      "The shape is of Input(Label) [N, 6].");
+
+    auto ap_type = GetAPType(ctx->Attrs().Get<std::string>("ap_type"));
+    PADDLE_ENFORCE_NE(ap_type, APType::kNone,
+                      "The ap_type should be 'integral' or '11point.");
    auto map_dim = framework::make_ddim({1});
    ctx->SetOutputDim("MAP", map_dim);
  }
@ -42,25 +65,49 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
  DetectionMAPOpMaker(framework::OpProto* proto,
                      framework::OpAttrChecker* op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("Detect", "The detection output.");
-    AddInput("Label", "The label data.");
-    AddOutput("MAP", "The MAP evaluate result of the detection.");
-
-    AddAttr<float>("overlap_threshold", "The overlap threshold.")
+    AddInput("Label",
+             "(LoDTensor) A 2-D LoDTensor with shape[N, 6] represents the"
+             "Labeled ground-truth data. Each row has 6 values: "
+             "[label, is_difficult, xmin, ymin, xmax, ymax], N is the total "
+             "number of ground-truth data in this mini-batch. For each "
+             "instance, the offsets in first dimension are called LoD, "
+             "the number of offset is N + 1, if LoD[i + 1] - LoD[i] == 0, "
+             "means there is no ground-truth data.");
+    AddInput("Detection",
+             "(LoDTensor) A 2-D LoDTensor with shape [M, 6] represents the "
+             "detections. Each row has 6 values: "
+             "[label, confidence, xmin, ymin, xmax, ymax], M is the total "
+             "number of detections in this mini-batch. For each instance, "
+             "the offsets in first dimension are called LoD, the number of "
+             "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
+             "no detected data.");
+    AddOutput("MAP",
+              "(Tensor) A tensor with shape [1], store the mAP evaluate "
+              "result of the detection.");
+
+    AddAttr<float>("overlap_threshold",
+                   "(float) "
+                   "The jaccard overlap threshold of detection output and "
+                   "ground-truth data.")
        .SetDefault(.3f);
    AddAttr<bool>("evaluate_difficult",
+                  "(bool, default true) "
                  "Switch to control whether the difficult data is evaluated.")
        .SetDefault(true);
    AddAttr<std::string>("ap_type",
-                         "The AP algorithm type, 'Integral' or '11point'.")
-        .SetDefault("Integral");
-
+                         "(string, default 'integral') "
+                         "The AP algorithm type, 'integral' or '11point'.")
+        .SetDefault("integral")
+        .InEnum({"integral", "11point"});
    AddComment(R"DOC(
-Detection MAP Operator.
-
-Detection MAP evaluator for SSD(Single Shot MultiBox Detector) algorithm.
-Please get more information from the following papers:
-https://arxiv.org/abs/1512.02325.
+Detection mAP evaluate operator.
+The general steps are as follows. First, calculate the true positive and
+ false positive according to the input of detection and labels, then
+ calculate the mAP evaluate value.
+ Supporting '11 point' and 'integral' mAP algorithm. Please get more information
+ from the following articles:
+ https://sanchom.wordpress.com/tag/average-precision/
+ https://arxiv.org/abs/1512.02325

 )DOC");
  }
--- a/paddle/operators/detection_map_op.cu
+++ b/paddle/operators/detection_map_op.cu
@ -1,20 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/detection_map_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    detection_map, ops::DetectionMAPOpKernel<paddle::platform::GPUPlace, float>,
-    ops::DetectionMAPOpKernel<paddle::platform::GPUPlace, double>);
--- a/paddle/operators/detection_map_op.h
+++ b/paddle/operators/detection_map_op.h
--- a/paddle/operators/math/detection_util.cc
+++ b/paddle/operators/math/detection_util.cc
@ -1,22 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/math/detection_util.h"
-#include "paddle/operators/math/math_function.h"
-
-namespace paddle {
-namespace operators {
-namespace math {}  // namespace math
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/operators/math/detection_util.cu
+++ b/paddle/operators/math/detection_util.cu
@ -1,23 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/math/detection_util.h"
-#include "paddle/operators/math/math_function.h"
-#include "paddle/platform/cuda_helper.h"
-
-namespace paddle {
-namespace operators {
-namespace math {}  // namespace math
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/operators/math/detection_util.h
+++ b/paddle/operators/math/detection_util.h
@ -1,128 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include "paddle/framework/selected_rows.h"
-#include "paddle/platform/device_context.h"
-
-namespace paddle {
-namespace operators {
-namespace math {
-
-template <typename T>
-struct BBox {
-  BBox(T x_min, T y_min, T x_max, T y_max)
-      : x_min(x_min),
-        y_min(y_min),
-        x_max(x_max),
-        y_max(y_max),
-        is_difficult(false) {}
-
-  BBox() {}
-
-  T get_width() const { return x_max - x_min; }
-
-  T get_height() const { return y_max - y_min; }
-
-  T get_center_x() const { return (x_min + x_max) / 2; }
-
-  T get_center_y() const { return (y_min + y_max) / 2; }
-
-  T get_area() const { return get_width() * get_height(); }
-
-  // coordinate of bounding box
-  T x_min;
-  T y_min;
-  T x_max;
-  T y_max;
-  // whether difficult object (e.g. object with heavy occlusion is difficult)
-  bool is_difficult;
-};
-
-template <typename T>
-void GetBBoxFromDetectData(const T* detect_data, const size_t num_bboxes,
-                           std::vector<T>& labels, std::vector<T>& scores,
-                           std::vector<BBox<T>>& bboxes) {
-  size_t out_offset = bboxes.size();
-  labels.resize(out_offset + num_bboxes);
-  scores.resize(out_offset + num_bboxes);
-  bboxes.resize(out_offset + num_bboxes);
-  for (size_t i = 0; i < num_bboxes; ++i) {
-    labels[out_offset + i] = *(detect_data + i * 7 + 1);
-    scores[out_offset + i] = *(detect_data + i * 7 + 2);
-    BBox<T> bbox;
-    bbox.x_min = *(detect_data + i * 7 + 3);
-    bbox.y_min = *(detect_data + i * 7 + 4);
-    bbox.x_max = *(detect_data + i * 7 + 5);
-    bbox.y_max = *(detect_data + i * 7 + 6);
-    bboxes[out_offset + i] = bbox;
-  };
-}
-
-template <typename T>
-void GetBBoxFromLabelData(const T* label_data, const size_t num_bboxes,
-                          std::vector<BBox<T>>& bboxes) {
-  size_t out_offset = bboxes.size();
-  bboxes.resize(bboxes.size() + num_bboxes);
-  for (size_t i = 0; i < num_bboxes; ++i) {
-    BBox<T> bbox;
-    bbox.x_min = *(label_data + i * 6 + 1);
-    bbox.y_min = *(label_data + i * 6 + 2);
-    bbox.x_max = *(label_data + i * 6 + 3);
-    bbox.y_max = *(label_data + i * 6 + 4);
-    T is_difficult = *(label_data + i * 6 + 5);
-    if (std::abs(is_difficult - 0.0) < 1e-6)
-      bbox.is_difficult = false;
-    else
-      bbox.is_difficult = true;
-    bboxes[out_offset + i] = bbox;
-  }
-}
-
-template <typename T>
-inline float JaccardOverlap(const BBox<T>& bbox1, const BBox<T>& bbox2) {
-  if (bbox2.x_min > bbox1.x_max || bbox2.x_max < bbox1.x_min ||
-      bbox2.y_min > bbox1.y_max || bbox2.y_max < bbox1.y_min) {
-    return 0.0;
-  } else {
-    float inter_x_min = std::max(bbox1.x_min, bbox2.x_min);
-    float inter_y_min = std::max(bbox1.y_min, bbox2.y_min);
-    float inter_x_max = std::min(bbox1.x_max, bbox2.x_max);
-    float inter_y_max = std::min(bbox1.y_max, bbox2.y_max);
-
-    float inter_width = inter_x_max - inter_x_min;
-    float inter_height = inter_y_max - inter_y_min;
-    float inter_area = inter_width * inter_height;
-
-    float bbox_area1 = bbox1.get_area();
-    float bbox_area2 = bbox2.get_area();
-
-    return inter_area / (bbox_area1 + bbox_area2 - inter_area);
-  }
-}
-
-template <typename T>
-bool SortScorePairDescend(const std::pair<float, T>& pair1,
-                          const std::pair<float, T>& pair2) {
-  return pair1.first > pair2.first;
-}
-
-// template <>
-// bool SortScorePairDescend(const std::pair<float, NormalizedBBox>& pair1,
-//                           const std::pair<float, NormalizedBBox>& pair2) {
-//   return pair1.first > pair2.first;
-// }
-
-}  // namespace math
-}  // namespace operators
-}  // namespace paddle
--- a/python/paddle/v2/fluid/tests/test_detection_map_op.py
+++ b/python/paddle/v2/fluid/tests/test_detection_map_op.py
@ -10,14 +10,14 @@ class TestDetectionMAPOp(OpTest):
    def set_data(self):
        self.init_test_case()

-        self.mAP = [self.calc_map(self.tf_pos)]
+        self.mAP = [self.calc_map(self.tf_pos, self.tf_pos_lod)]
        self.label = np.array(self.label).astype('float32')
        self.detect = np.array(self.detect).astype('float32')
        self.mAP = np.array(self.mAP).astype('float32')

        self.inputs = {
            'Label': (self.label, self.label_lod),
-            'Detect': self.detect
+            'Detection': (self.detect, self.detect_lod)
        }

        self.attrs = {
@ -31,29 +31,29 @@ class TestDetectionMAPOp(OpTest):
    def init_test_case(self):
        self.overlap_threshold = 0.3
        self.evaluate_difficult = True
-        self.ap_type = "Integral"
+        self.ap_type = "integral"

        self.label_lod = [[0, 2, 4]]
-        # label xmin ymin xmax ymax difficult
-        self.label = [[1, 0.1, 0.1, 0.3, 0.3, 0], [1, 0.6, 0.6, 0.8, 0.8, 1],
-                      [2, 0.3, 0.3, 0.6, 0.5, 0], [1, 0.7, 0.1, 0.9, 0.3, 0]]
+        # label difficult xmin ymin xmax ymax
+        self.label = [[1, 0, 0.1, 0.1, 0.3, 0.3], [1, 1, 0.6, 0.6, 0.8, 0.8],
+                      [2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]]

-        # image_id label score xmin ymin xmax ymax difficult
+        # label score xmin ymin xmax ymax difficult
+        self.detect_lod = [[0, 3, 7]]
        self.detect = [
-            [0, 1, 0.3, 0.1, 0.0, 0.4, 0.3], [0, 1, 0.7, 0.0, 0.1, 0.2, 0.3],
-            [0, 1, 0.9, 0.7, 0.6, 0.8, 0.8], [1, 2, 0.8, 0.2, 0.1, 0.4, 0.4],
-            [1, 2, 0.1, 0.4, 0.3, 0.7, 0.5], [1, 1, 0.2, 0.8, 0.1, 1.0, 0.3],
-            [1, 3, 0.2, 0.8, 0.1, 1.0, 0.3]
+            [1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3],
+            [1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4],
+            [2, 0.1, 0.4, 0.3, 0.7, 0.5], [1, 0.2, 0.8, 0.1, 1.0, 0.3],
+            [3, 0.2, 0.8, 0.1, 1.0, 0.3]
        ]

-        # image_id label score false_pos false_pos
-        # [-1, 1, 3, -1, -1],
-        # [-1, 2, 1, -1, -1]
-        self.tf_pos = [[0, 1, 0.9, 1, 0], [0, 1, 0.7, 1, 0], [0, 1, 0.3, 0, 1],
-                       [1, 1, 0.2, 1, 0], [1, 2, 0.8, 0, 1], [1, 2, 0.1, 1, 0],
-                       [1, 3, 0.2, 0, 1]]
+        # label score true_pos false_pos
+        self.tf_pos_lod = [[0, 3, 7]]
+        self.tf_pos = [[1, 0.9, 1, 0], [1, 0.7, 1, 0], [1, 0.3, 0, 1],
+                       [1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0],
+                       [3, 0.2, 0, 1]]

-    def calc_map(self, tf_pos):
+    def calc_map(self, tf_pos, tf_pos_lod):
        mAP = 0.0
        count = 0

@ -71,7 +71,7 @@ class TestDetectionMAPOp(OpTest):
            return accu_list

        label_count = collections.Counter()
-        for (label, xmin, ymin, xmax, ymax, difficult) in self.label:
+        for (label, difficult, xmin, ymin, xmax, ymax) in self.label:
            if self.evaluate_difficult:
                label_count[label] += 1
            elif not difficult:
@ -79,7 +79,7 @@ class TestDetectionMAPOp(OpTest):

        true_pos = collections.defaultdict(list)
        false_pos = collections.defaultdict(list)
-        for (image_id, label, score, tp, fp) in tf_pos:
+        for (label, score, tp, fp) in tf_pos:
            true_pos[label].append([score, tp])
            false_pos[label].append([score, fp])

@ -103,22 +103,22 @@ class TestDetectionMAPOp(OpTest):
                recall.append(float(accu_tp_sum[i]) / label_pos_num)

            if self.ap_type == "11point":
-                max_precisions = [11.0, 0.0]
+                max_precisions = [0.0] * 11
                start_idx = len(accu_tp_sum) - 1
-                for j in range(10, 0, -1):
-                    for i in range(start_idx, 0, -1):
-                        if recall[i] < j / 10.0:
+                for j in range(10, -1, -1):
+                    for i in range(start_idx, -1, -1):
+                        if recall[i] < float(j) / 10.0:
                            start_idx = i
                            if j > 0:
                                max_precisions[j - 1] = max_precisions[j]
                                break
-                            else:
-                                if max_precisions[j] < accu_precision[i]:
-                                    max_precisions[j] = accu_precision[i]
-                for j in range(10, 0, -1):
+                        else:
+                            if max_precisions[j] < precision[i]:
+                                max_precisions[j] = precision[i]
+                for j in range(10, -1, -1):
                    mAP += max_precisions[j] / 11
                count += 1
-            elif self.ap_type == "Integral":
+            elif self.ap_type == "integral":
                average_precisions = 0.0
                prev_recall = 0.0
                for i in range(len(accu_tp_sum)):
@ -147,8 +147,17 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp):

        self.evaluate_difficult = False

-        self.tf_pos = [[0, 1, 0.7, 1, 0], [0, 1, 0.3, 0, 1], [1, 1, 0.2, 1, 0],
-                       [1, 2, 0.8, 0, 1], [1, 2, 0.1, 1, 0], [1, 3, 0.2, 0, 1]]
+        self.tf_pos_lod = [[0, 2, 6]]
+        # label score true_pos false_pos
+        self.tf_pos = [[1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0],
+                       [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
+
+
+class TestDetectionMAPOp11Point(TestDetectionMAPOp):
+    def init_test_case(self):
+        super(TestDetectionMAPOp11Point, self).init_test_case()
+
+        self.ap_type = "11point"


 if __name__ == '__main__':