retinanet

4 years ago · 03125ee773
parent 4e3abb2434
commit 03125ee773
14 changed files with 2234 additions and 0 deletions
--- a/model_zoo/official/cv/retinanet/README_CN.md
+++ b/model_zoo/official/cv/retinanet/README_CN.md
--- a/model_zoo/official/cv/retinanet/eval.py
+++ b/model_zoo/official/cv/retinanet/eval.py
@ -0,0 +1,113 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# less required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Evaluation for retinanet"""
+
+import os
+import argparse
+import time
+import numpy as np
+from mindspore import context, Tensor
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from src.retinanet import  retinanet50, resnet50, retinanetInferWithDecoder
+from src.dataset import create_retinanet_dataset, data_to_mindrecord_byte_image, voc_data_to_mindrecord
+from src.config import config
+from src.coco_eval import metrics
+from src.box_utils import default_boxes
+
+def retinanet_eval(dataset_path, ckpt_path):
+    """retinanet evaluation."""
+    batch_size = 1
+    ds = create_retinanet_dataset(dataset_path, batch_size=batch_size, repeat_num=1, is_training=False)
+    backbone = resnet50(config.num_classes)
+    net = retinanet50(backbone, config)
+    net = retinanetInferWithDecoder(net, Tensor(default_boxes), config)
+    print("Load Checkpoint!")
+    param_dict = load_checkpoint(ckpt_path)
+    net.init_parameters_data()
+    load_param_into_net(net, param_dict)
+
+    net.set_train(False)
+    i = batch_size
+    total = ds.get_dataset_size() * batch_size
+    start = time.time()
+    pred_data = []
+    print("\n========================================\n")
+    print("total images num: ", total)
+    print("Processing, please wait a moment.")
+    for data in ds.create_dict_iterator(output_numpy=True):
+        img_id = data['img_id']
+        img_np = data['image']
+        image_shape = data['image_shape']
+
+        output = net(Tensor(img_np))
+        for batch_idx in range(img_np.shape[0]):
+            pred_data.append({"boxes": output[0].asnumpy()[batch_idx],
+                              "box_scores": output[1].asnumpy()[batch_idx],
+                              "img_id": int(np.squeeze(img_id[batch_idx])),
+                              "image_shape": image_shape[batch_idx]})
+        percent = round(i / total * 100., 2)
+
+        print(f'    {str(percent)} [{i}/{total}]', end='\r')
+        i += batch_size
+    cost_time = int((time.time() - start) * 1000)
+    print(f'    100% [{total}/{total}] cost {cost_time} ms')
+    mAP = metrics(pred_data)
+    print("\n========================================\n")
+    print(f"mAP: {mAP}")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='retinanet evaluation')
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.")
+    parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend"),
+                        help="run platform, only support Ascend.")
+    args_opt = parser.parse_args()
+
+    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.run_platform, device_id=args_opt.device_id)
+
+    prefix = "retinanet_eval.mindrecord"
+    mindrecord_dir = config.mindrecord_dir
+    mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
+    if args_opt.dataset == "voc":
+        config.coco_root = config.voc_root
+    if not os.path.exists(mindrecord_file):
+        if not os.path.isdir(mindrecord_dir):
+            os.makedirs(mindrecord_dir)
+        if args_opt.dataset == "coco":
+            if os.path.isdir(config.coco_root):
+                print("Create Mindrecord.")
+                data_to_mindrecord_byte_image("coco", False, prefix)
+                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
+            else:
+                print("coco_root not exits.")
+        elif args_opt.dataset == "voc":
+            if os.path.isdir(config.voc_dir) and os.path.isdir(config.voc_root):
+                print("Create Mindrecord.")
+                voc_data_to_mindrecord(mindrecord_dir, False, prefix)
+                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
+            else:
+                print("voc_root or voc_dir not exits.")
+        else:
+            if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path):
+                print("Create Mindrecord.")
+                data_to_mindrecord_byte_image("other", False, prefix)
+                print("Create Mindrecord Done, at {}".format(mindrecord_dir))
+            else:
+                print("IMAGE_DIR or ANNO_PATH not exits.")
+
+    print("Start Eval!")
+    retinanet_eval(mindrecord_file, config.checkpoint_path)
--- a/model_zoo/official/cv/retinanet/scripts/run_distribute_train.sh
+++ b/model_zoo/official/cv/retinanet/scripts/run_distribute_train.sh
@ -0,0 +1,83 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE LR DATASET RANK_TABLE_FILE PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
+echo "for example: sh run_distribute_train.sh 8 500 0.1 coco /data/hccl.json /opt/retinanet-500_458.ckpt(optional) 200(optional)"
+echo "It is better to use absolute path."
+echo "================================================================================================================="
+
+if [ $# != 5 ] && [ $# != 7 ]
+then
+    echo "Usage: sh run_distribute_train.sh [DEVICE_NUM] [EPOCH_SIZE] [LR] [DATASET] \
+[RANK_TABLE_FILE] [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
+    exit 1
+fi
+
+# Before start distribute train, first create mindrecord files.
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+python train.py --only_create_dataset=True
+
+echo "After running the script, the network runs in the background. The log will be generated in LOGx/log.txt"
+
+export RANK_SIZE=$1
+EPOCH_SIZE=$2
+LR=$3
+DATASET=$4
+PRE_TRAINED=$6
+PRE_TRAINED_EPOCH_SIZE=$7
+export RANK_TABLE_FILE=$5
+
+for((i=0;i<RANK_SIZE;i++))
+do
+    export DEVICE_ID=$i
+    rm -rf LOG$i
+    mkdir ./LOG$i
+    cp ./*.py ./LOG$i
+    cp -r ./src ./LOG$i
+    cp -r ./scripts ./LOG$i
+    cd ./LOG$i || exit
+    export RANK_ID=$i
+    echo "start training for rank $i, device $DEVICE_ID"
+    env > env.log
+    if [ $# == 5 ]
+    then
+        python train.py  \
+        --distribute=True  \
+        --lr=$LR \
+        --dataset=$DATASET \
+        --device_num=$RANK_SIZE  \
+        --device_id=$DEVICE_ID  \
+        --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+    fi
+
+    if [ $# == 7 ]
+    then
+        python train.py  \
+        --distribute=True  \
+        --lr=$LR \
+        --dataset=$DATASET \
+        --device_num=$RANK_SIZE  \
+        --device_id=$DEVICE_ID  \
+        --pre_trained=$PRE_TRAINED \
+        --pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE \
+        --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+    fi
+
+    cd ../
+done
--- a/model_zoo/official/cv/retinanet/scripts/run_eval.sh
+++ b/model_zoo/official/cv/retinanet/scripts/run_eval.sh
@ -0,0 +1,49 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 2 ]
+then
+    echo "Usage: sh run_eval.sh [DATASET] [DEVICE_ID]"
+exit 1
+fi
+
+DATASET=$1
+echo $DATASET
+
+
+export DEVICE_NUM=1
+export DEVICE_ID=$2
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+
+if [ -d "eval$2" ];
+then
+    rm -rf ./eval$2
+fi
+
+mkdir ./eval$2
+cp ./*.py ./eval$2
+cp -r ./src ./eval$2
+cd ./eval$2 || exit
+env > env.log
+echo "start inferring for device $DEVICE_ID"
+python eval.py \
+    --dataset=$DATASET \
+    --device_id=$2 > log.txt 2>&1 &
+cd ..
--- a/model_zoo/official/cv/retinanet/scripts/run_single_train.sh
+++ b/model_zoo/official/cv/retinanet/scripts/run_single_train.sh
@ -0,0 +1,76 @@
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "sh run_single_train.sh DEVICE_ID EPOCH_SIZE LR DATASET PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
+echo "for example: sh run_single_train.sh 0 500 0.1 coco /opt/retinanet-500_458.ckpt(optional) 200(optional)"
+echo "It is better to use absolute path."
+echo "================================================================================================================="
+
+if [ $# != 4 ] && [ $# != 6 ]
+then
+    echo "Usage: sh run_single_train.sh [DEVICE_ID] [EPOCH_SIZE] [LR] [DATASET] \
+[PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
+    exit 1
+fi
+
+# Before start single train, first create mindrecord files.
+BASE_PATH=$(cd "`dirname $0`" || exit; pwd)
+cd $BASE_PATH/../ || exit
+python train.py --only_create_dataset=True
+
+echo "After running the script, the network runs in the background. The log will be generated in LOGx/log.txt"
+
+export DEVICE_ID=$1
+EPOCH_SIZE=$2
+LR=$3
+DATASET=$4
+PRE_TRAINED=$5
+PRE_TRAINED_EPOCH_SIZE=$6
+
+rm -rf LOG$1
+mkdir ./LOG$1
+cp ./*.py ./LOG$1
+cp -r ./src ./LOG$1
+cd ./LOG$1 || exit
+echo "start training for device $1"
+env > env.log
+if [ $# == 4 ]
+then
+    python train.py  \
+    --distribute=False  \
+    --lr=$LR \
+    --dataset=$DATASET \
+    --device_num=1  \
+    --device_id=$DEVICE_ID  \
+    --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+fi
+
+if [ $# == 6 ]
+then
+    python train,py  \
+    --distribute=False  \
+    --lr=$LR \
+    --dataset=$DATASET \
+    --device_num=1  \
+    --device_id=$DEVICE_ID  \
+    --pre_trained=$PRE_TRAINED \
+    --pre_trained_epoch_size=$PRE_TRAINED_EPOCH_SIZE  \
+    --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
+fi
+
+cd ../
+
--- a/model_zoo/official/cv/retinanet/src/init.py
+++ b/model_zoo/official/cv/retinanet/src/init.py
--- a/model_zoo/official/cv/retinanet/src/box_utils.py
+++ b/model_zoo/official/cv/retinanet/src/box_utils.py
@ -0,0 +1,165 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Bbox utils"""
+
+import math
+import itertools as it
+import numpy as np
+from .config import config
+
+
+class GeneratDefaultBoxes():
+    """
+    Generate Default boxes for retinanet, follows the order of (W, H, archor_sizes).
+    `self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w].
+    `self.default_boxes_ltrb` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2].
+    """
+    def __init__(self):
+        fk = config.img_shape[0] / np.array(config.steps)
+        scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
+        anchor_size = np.array(config.anchor_size)
+        self.default_boxes = []
+        for idex, feature_size in enumerate(config.feature_size):
+            base_size = anchor_size[idex] / config.img_shape[0]
+            size1 = base_size*scales[0]
+            size2 = base_size*scales[1]
+            size3 = base_size*scales[2]
+            all_sizes = []
+            for aspect_ratio in config.aspect_ratios[idex]:
+                w1, h1 = size1 * math.sqrt(aspect_ratio), size1 / math.sqrt(aspect_ratio)
+                all_sizes.append((h1, w1))
+                w2, h2 = size2 * math.sqrt(aspect_ratio), size2 / math.sqrt(aspect_ratio)
+                all_sizes.append((h2, w2))
+                w3, h3 = size3 * math.sqrt(aspect_ratio), size3 / math.sqrt(aspect_ratio)
+                all_sizes.append((h3, w3))
+
+            assert len(all_sizes) == config.num_default[idex]
+
+            for i, j in it.product(range(feature_size), repeat=2):
+                for h, w in all_sizes:
+                    cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex]
+                    self.default_boxes.append([cy, cx, h, w])
+
+        def to_ltrb(cy, cx, h, w):
+            return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
+
+        # For IoU calculation
+        self.default_boxes_ltrb = np.array(tuple(to_ltrb(*i) for i in self.default_boxes), dtype='float32')
+        self.default_boxes = np.array(self.default_boxes, dtype='float32')
+
+
+default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb
+default_boxes = GeneratDefaultBoxes().default_boxes
+y1, x1, y2, x2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1)
+vol_anchors = (x2 - x1) * (y2 - y1)
+matching_threshold = config.match_thershold
+
+
+def retinanet_bboxes_encode(boxes):
+    """
+    Labels anchors with ground truth inputs.
+
+    Args:
+        boxex: ground truth with shape [N, 5], for each row, it stores [y, x, h, w, cls].
+
+    Returns:
+        gt_loc: location ground truth with shape [num_anchors, 4].
+        gt_label: class ground truth with shape [num_anchors, 1].
+        num_matched_boxes: number of positives in an image.
+    """
+
+    def jaccard_with_anchors(bbox):
+        """Compute jaccard score a box and the anchors."""
+        # Intersection bbox and volume.
+        ymin = np.maximum(y1, bbox[0])
+        xmin = np.maximum(x1, bbox[1])
+        ymax = np.minimum(y2, bbox[2])
+        xmax = np.minimum(x2, bbox[3])
+        w = np.maximum(xmax - xmin, 0.)
+        h = np.maximum(ymax - ymin, 0.)
+
+        # Volumes.
+        inter_vol = h * w
+        union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol
+        jaccard = inter_vol / union_vol
+        return np.squeeze(jaccard)
+
+    pre_scores = np.zeros((config.num_retinanet_boxes), dtype=np.float32)
+    t_boxes = np.zeros((config.num_retinanet_boxes, 4), dtype=np.float32)
+    t_label = np.zeros((config.num_retinanet_boxes), dtype=np.int64)
+    for bbox in boxes:
+        label = int(bbox[4])
+        scores = jaccard_with_anchors(bbox)
+        idx = np.argmax(scores)
+        scores[idx] = 2.0
+        mask = (scores > matching_threshold)
+        mask = mask & (scores > pre_scores)
+        pre_scores = np.maximum(pre_scores, scores * mask)
+        t_label = mask * label + (1 - mask) * t_label
+        for i in range(4):
+            t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i]
+
+    index = np.nonzero(t_label)
+
+    # Transform to ltrb.
+    bboxes = np.zeros((config.num_retinanet_boxes, 4), dtype=np.float32)
+    bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2
+    bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]]
+
+    # Encode features.
+    bboxes_t = bboxes[index]
+    default_boxes_t = default_boxes[index]
+    bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.prior_scaling[0])
+    tmp = np.maximum(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4], 0.000001)
+    bboxes_t[:, 2:4] = np.log(tmp) / config.prior_scaling[1]
+    bboxes[index] = bboxes_t
+
+    num_match = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32)
+    return bboxes, t_label.astype(np.int32), num_match
+
+
+def retinanet_bboxes_decode(boxes):
+    """Decode predict boxes to [y, x, h, w]"""
+    boxes_t = boxes.copy()
+    default_boxes_t = default_boxes.copy()
+    boxes_t[:, :2] = boxes_t[:, :2] * config.prior_scaling[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2]
+    boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.prior_scaling[1]) * default_boxes_t[:, 2:4]
+
+    bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32)
+
+    bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2
+    bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2
+
+    return np.clip(bboxes, 0, 1)
+
+
+def intersect(box_a, box_b):
+    """Compute the intersect of two sets of boxes."""
+    max_yx = np.minimum(box_a[:, 2:4], box_b[2:4])
+    min_yx = np.maximum(box_a[:, :2], box_b[:2])
+    inter = np.clip((max_yx - min_yx), a_min=0, a_max=np.inf)
+    return inter[:, 0] * inter[:, 1]
+
+
+def jaccard_numpy(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes."""
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, 2] - box_a[:, 0]) *
+              (box_a[:, 3] - box_a[:, 1]))
+    area_b = ((box_b[2] - box_b[0]) *
+              (box_b[3] - box_b[1]))
+    union = area_a + area_b - inter
+    return inter / union
--- a/model_zoo/official/cv/retinanet/src/coco_eval.py
+++ b/model_zoo/official/cv/retinanet/src/coco_eval.py
@ -0,0 +1,125 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Coco metrics utils"""
+
+import os
+import json
+import numpy as np
+from .config import config
+
+
+def apply_nms(all_boxes, all_scores, thres, max_boxes):
+    """Apply NMS to bboxes."""
+    y1 = all_boxes[:, 0]
+    x1 = all_boxes[:, 1]
+    y2 = all_boxes[:, 2]
+    x2 = all_boxes[:, 3]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+
+    order = all_scores.argsort()[::-1]
+    keep = []
+
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        if len(keep) >= max_boxes:
+            break
+
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thres)[0]
+
+        order = order[inds + 1]
+    return keep
+
+
+def metrics(pred_data):
+    """Calculate mAP of predicted bboxes."""
+    from pycocotools.coco import COCO
+    from pycocotools.cocoeval import COCOeval
+    num_classes = config.num_classes
+
+    coco_root = config.coco_root
+    data_type = config.val_data_type
+
+    #Classes need to train or test.
+    val_cls = config.coco_classes
+    val_cls_dict = {}
+    for i, cls in enumerate(val_cls):
+        val_cls_dict[i] = cls
+
+    anno_json = os.path.join(coco_root, config.instances_set.format(data_type))
+    coco_gt = COCO(anno_json)
+    classs_dict = {}
+    cat_ids = coco_gt.loadCats(coco_gt.getCatIds())
+    for cat in cat_ids:
+        classs_dict[cat["name"]] = cat["id"]
+
+    predictions = []
+    img_ids = []
+
+    for sample in pred_data:
+        pred_boxes = sample['boxes']
+        box_scores = sample['box_scores']
+        img_id = sample['img_id']
+        h, w = sample['image_shape']
+
+        final_boxes = []
+        final_label = []
+        final_score = []
+        img_ids.append(img_id)
+
+        for c in range(1, num_classes):
+            class_box_scores = box_scores[:, c]
+            score_mask = class_box_scores > config.min_score
+            class_box_scores = class_box_scores[score_mask]
+            class_boxes = pred_boxes[score_mask] * [h, w, h, w]
+
+            if score_mask.any():
+                nms_index = apply_nms(class_boxes, class_box_scores, config.nms_thershold, config.max_boxes)
+                class_boxes = class_boxes[nms_index]
+                class_box_scores = class_box_scores[nms_index]
+
+                final_boxes += class_boxes.tolist()
+                final_score += class_box_scores.tolist()
+                final_label += [classs_dict[val_cls_dict[c]]] * len(class_box_scores)
+
+        for loc, label, score in zip(final_boxes, final_label, final_score):
+            res = {}
+            res['image_id'] = img_id
+            res['bbox'] = [loc[1], loc[0], loc[3] - loc[1], loc[2] - loc[0]]
+            res['score'] = score
+            res['category_id'] = label
+            predictions.append(res)
+    with open('predictions.json', 'w') as f:
+        json.dump(predictions, f)
+
+    coco_dt = coco_gt.loadRes('predictions.json')
+    E = COCOeval(coco_gt, coco_dt, iouType='bbox')
+    E.params.imgIds = img_ids
+    E.evaluate()
+    E.accumulate()
+    E.summarize()
+    return E.stats[0]
--- a/model_zoo/official/cv/retinanet/src/config.py
+++ b/model_zoo/official/cv/retinanet/src/config.py
@ -0,0 +1,86 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#" ============================================================================
+
+"""Config parameters for retinanet models."""
+
+from easydict import EasyDict as ed
+
+config = ed({
+    "img_shape": [600, 600],
+    "num_retinanet_boxes": 67995,
+    "match_thershold": 0.5,
+    "nms_thershold": 0.6,
+    "min_score": 0.1,
+    "max_boxes": 100,
+
+    # learing rate settings
+    "global_step": 0,
+    "lr_init": 1e-6,
+    "lr_end_rate": 5e-3,
+    "warmup_epochs1": 2,
+    "warmup_epochs2": 5,
+    "warmup_epochs3": 23,
+    "warmup_epochs4": 60,
+    "warmup_epochs5": 160,
+    "momentum": 0.9,
+    "weight_decay": 1.5e-4,
+
+    # network
+    "num_default": [9, 9, 9, 9, 9],
+    "extras_out_channels": [256, 256, 256, 256, 256],
+    "feature_size": [75, 38, 19, 10, 5],
+    "aspect_ratios": [(0.5, 1.0, 2.0), (0.5, 1.0, 2.0), (0.5, 1.0, 2.0), (0.5, 1.0, 2.0), (0.5, 1.0, 2.0)],
+    "steps": (8, 16, 32, 64, 128),
+    "anchor_size": (32, 64, 128, 256, 512),
+    "prior_scaling": (0.1, 0.2),
+    "gamma": 2.0,
+    "alpha": 0.75,
+
+    # `mindrecord_dir` and `coco_root` are better to use absolute path.
+    "mindrecord_dir": "/data/hitwh/retinanet/MindRecord_COCO",
+    "coco_root": "/data/dataset/coco2017",
+    "train_data_type": "train2017",
+    "val_data_type": "val2017",
+    "instances_set": "/data/dataset/coco2017/annotations/instances_{}.json",
+    "coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+                     'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
+                     'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
+                     'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
+                     'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
+                     'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+                     'kite', 'baseball bat', 'baseball glove', 'skateboard',
+                     'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
+                     'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+                     'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+                     'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
+                     'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
+                     'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
+                     'refrigerator', 'book', 'clock', 'vase', 'scissors',
+                     'teddy bear', 'hair drier', 'toothbrush'),
+    "num_classes": 81,
+    # The annotation.json position of voc validation dataset.
+    "voc_root": "",
+    # voc original dataset.
+    "voc_dir": "",
+    # if coco or voc used, `image_dir` and `anno_path` are useless.
+    "image_dir": "",
+    "anno_path": "",
+    "save_checkpoint": True,
+    "save_checkpoint_epochs": 1,
+    "keep_checkpoint_max": 1,
+    "save_checkpoint_path": "./model",
+    "finish_epoch": 0,
+    "checkpoint_path": "/home/hitwh1/1.0/ckpt_0/retinanet-500_458_59.ckpt"
+})
--- a/model_zoo/official/cv/retinanet/src/dataset.py
+++ b/model_zoo/official/cv/retinanet/src/dataset.py
--- a/model_zoo/official/cv/retinanet/src/init_params.py
+++ b/model_zoo/official/cv/retinanet/src/init_params.py
@ -0,0 +1,35 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Parameters utils"""
+
+from mindspore.common.initializer import initializer, TruncatedNormal
+
+def init_net_param(network, initialize_mode='TruncatedNormal'):
+    """Init the parameters in net."""
+    params = network.trainable_params()
+    for p in params:
+        if 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name:
+            if initialize_mode == 'TruncatedNormal':
+                p.set_data(initializer(TruncatedNormal(), p.data.shape, p.data.dtype))
+            else:
+                p.set_data(initialize_mode, p.data.shape, p.data.dtype)
+
+
+
+def filter_checkpoint_parameter(param_dict):
+    """remove useless parameters"""
+    for key in list(param_dict.keys()):
+        if 'multi_loc_layers' in key or 'multi_cls_layers' in key:
+            del param_dict[key]
--- a/model_zoo/official/cv/retinanet/src/lr_schedule.py
+++ b/model_zoo/official/cv/retinanet/src/lr_schedule.py
@ -0,0 +1,74 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Learning rate schedule"""
+
+import math
+import numpy as np
+
+
+def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs1, warmup_epochs2,
+           warmup_epochs3, warmup_epochs4, warmup_epochs5, total_epochs, steps_per_epoch):
+    """
+    generate learning rate array
+
+    Args:
+       global_step(int): total steps of the training
+       lr_init(float): init learning rate
+       lr_end(float): end learning rate
+       lr_max(float): max learning rate
+       warmup_epochs(float): number of warmup epochs
+       total_epochs(int): total epoch of training
+       steps_per_epoch(int): steps of one epoch
+
+    Returns:
+       np.array, learning rate array
+    """
+    lr_each_step = []
+    total_steps = steps_per_epoch * total_epochs
+    warmup_steps1 = steps_per_epoch * warmup_epochs1
+    warmup_steps2 = warmup_steps1 + steps_per_epoch * warmup_epochs2
+    warmup_steps3 = warmup_steps2 + steps_per_epoch * warmup_epochs3
+    warmup_steps4 = warmup_steps3 + steps_per_epoch * warmup_epochs4
+    warmup_steps5 = warmup_steps4 + steps_per_epoch * warmup_epochs5
+    for i in range(total_steps):
+        if i < warmup_steps1:
+            lr = lr_init*(warmup_steps1-i) / (warmup_steps1) + \
+            (lr_max*1e-4) * i / (warmup_steps1*3)
+        elif warmup_steps1 <= i < warmup_steps2:
+            lr = 1e-5*(warmup_steps2-i) / (warmup_steps2 - warmup_steps1) + \
+            (lr_max*1e-3) * (i-warmup_steps1) / (warmup_steps2 - warmup_steps1)
+        elif warmup_steps2 <= i < warmup_steps3:
+            lr = 1e-4*(warmup_steps3-i) / (warmup_steps3 - warmup_steps2) + \
+            (lr_max*1e-2) * (i-warmup_steps2) / (warmup_steps3 - warmup_steps2)
+        elif warmup_steps3 <= i < warmup_steps4:
+            lr = 1e-3*(warmup_steps4-i) / (warmup_steps4 - warmup_steps3) + \
+            (lr_max*1e-1) * (i-warmup_steps3) / (warmup_steps4 - warmup_steps3)
+        elif warmup_steps4 <= i < warmup_steps5:
+            lr = 1e-2*(warmup_steps5-i) / (warmup_steps5 - warmup_steps4) + \
+            lr_max  * (i-warmup_steps4) / (warmup_steps5 - warmup_steps4)
+        else:
+            lr = lr_end + \
+                 (lr_max - lr_end) * \
+                 (1. + math.cos(math.pi * (i-warmup_steps5) / (total_steps - warmup_steps5))) / 2.
+        if lr < 0.0:
+            lr = 0.0
+        lr_each_step.append(lr)
+
+    current_step = global_step
+    lr_each_step = np.array(lr_each_step).astype(np.float32)
+    learning_rate = lr_each_step[current_step:]
+
+    return learning_rate
--- a/model_zoo/official/cv/retinanet/src/retinanet.py
+++ b/model_zoo/official/cv/retinanet/src/retinanet.py
--- a/model_zoo/official/cv/retinanet/train.py
+++ b/model_zoo/official/cv/retinanet/train.py
@ -0,0 +1,153 @@
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# less required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""Train retinanet and get checkpoint files."""
+
+import os
+import argparse
+import ast
+import mindspore
+import mindspore.nn as nn
+from mindspore import context, Tensor
+from mindspore.communication.management import init, get_rank
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMonitor, TimeMonitor, Callback
+from mindspore.train import Model
+from mindspore.context import ParallelMode
+from mindspore.train.serialization import load_checkpoint, load_param_into_net
+from mindspore.common import set_seed
+from src.retinanet import  retinanetWithLossCell, TrainingWrapper, retinanet50, resnet50
+from src.config import config
+from src.dataset import create_retinanet_dataset, create_mindrecord
+from src.lr_schedule import get_lr
+from src.init_params import init_net_param, filter_checkpoint_parameter
+
+
+set_seed(1)
+class Monitor(Callback):
+    """
+    Monitor loss and time.
+
+    Args:
+        lr_init (numpy array): train lr
+
+    Returns:
+        None
+
+    Examples:
+        >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
+    """
+
+    def __init__(self, lr_init=None):
+        super(Monitor, self).__init__()
+        self.lr_init = lr_init
+        self.lr_init_len = len(lr_init)
+    def step_end(self, run_context):
+        cb_params = run_context.original_args()
+        print("lr:[{:8.6f}]".format(self.lr_init[cb_params.cur_step_num-1]), flush=True)
+
+def main():
+    parser = argparse.ArgumentParser(description="retinanet training")
+    parser.add_argument("--only_create_dataset", type=ast.literal_eval, default=False,
+                        help="If set it true, only create Mindrecord, default is False.")
+    parser.add_argument("--distribute", type=ast.literal_eval, default=False,
+                        help="Run distribute, default is False.")
+    parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
+    parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
+    parser.add_argument("--lr", type=float, default=0.1, help="Learning rate, default is 0.1.")
+    parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink.")
+    parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.")
+    parser.add_argument("--epoch_size", type=int, default=500, help="Epoch size, default is 500.")
+    parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
+    parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained Checkpoint file path.")
+    parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size.")
+    parser.add_argument("--save_checkpoint_epochs", type=int, default=1, help="Save checkpoint epochs, default is 1.")
+    parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.")
+    parser.add_argument("--filter_weight", type=ast.literal_eval, default=False,
+                        help="Filter weight parameters, default is False.")
+    parser.add_argument("--run_platform", type=str, default="Ascend", choices=("Ascend"),
+                        help="run platform, only support Ascend.")
+    args_opt = parser.parse_args()
+
+    if args_opt.run_platform == "Ascend":
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+        if args_opt.distribute:
+            if os.getenv("DEVICE_ID", "not_set").isdigit():
+                context.set_context(device_id=int(os.getenv("DEVICE_ID")))
+            init()
+            device_num = args_opt.device_num
+            rank = get_rank()
+            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True,
+                                              device_num=device_num)
+        else:
+            rank = 0
+            device_num = 1
+            context.set_context(device_id=args_opt.device_id)
+
+    else:
+        raise ValueError("Unsupported platform.")
+
+    mindrecord_file = create_mindrecord(args_opt.dataset, "retinanet.mindrecord", True)
+
+    if not args_opt.only_create_dataset:
+        loss_scale = float(args_opt.loss_scale)
+
+        # When create MindDataset, using the fitst mindrecord file, such as retinanet.mindrecord0.
+        dataset = create_retinanet_dataset(mindrecord_file, repeat_num=1,
+                                           batch_size=args_opt.batch_size, device_num=device_num, rank=rank)
+
+        dataset_size = dataset.get_dataset_size()
+        print("Create dataset done!")
+
+
+        backbone = resnet50(config.num_classes)
+        retinanet = retinanet50(backbone, config)
+        net = retinanetWithLossCell(retinanet, config)
+        net.to_float(mindspore.float16)
+        init_net_param(net)
+
+        if args_opt.pre_trained:
+            if args_opt.pre_trained_epoch_size <= 0:
+                raise KeyError("pre_trained_epoch_size must be greater than 0.")
+            param_dict = load_checkpoint(args_opt.pre_trained)
+            if args_opt.filter_weight:
+                filter_checkpoint_parameter(param_dict)
+            load_param_into_net(net, param_dict)
+
+        lr = Tensor(get_lr(global_step=config.global_step,
+                           lr_init=config.lr_init, lr_end=config.lr_end_rate * args_opt.lr, lr_max=args_opt.lr,
+                           warmup_epochs1=config.warmup_epochs1, warmup_epochs2=config.warmup_epochs2,
+                           warmup_epochs3=config.warmup_epochs3, warmup_epochs4=config.warmup_epochs4,
+                           warmup_epochs5=config.warmup_epochs5, total_epochs=args_opt.epoch_size,
+                           steps_per_epoch=dataset_size))
+        opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
+                          config.momentum, config.weight_decay, loss_scale)
+        net = TrainingWrapper(net, opt, loss_scale)
+        model = Model(net)
+        print("Start train retinanet, the first epoch will be slower because of the graph compilation.")
+        cb = [TimeMonitor(), LossMonitor()]
+        cb += [Monitor(lr_init=lr.asnumpy())]
+        config_ck = CheckpointConfig(save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs,
+                                     keep_checkpoint_max=config.keep_checkpoint_max)
+        ckpt_cb = ModelCheckpoint(prefix="retinanet", directory=config.save_checkpoint_path, config=config_ck)
+        if args_opt.distribute:
+            if rank == 0:
+                cb += [ckpt_cb]
+            model.train(args_opt.epoch_size, dataset, callbacks=cb, dataset_sink_mode=True)
+        else:
+            cb += [ckpt_cb]
+            model.train(args_opt.epoch_size, dataset, callbacks=cb, dataset_sink_mode=True)
+
+if __name__ == '__main__':
+    main()