From ca6c1758098dad15ceff9e01166f3af20ff631e9 Mon Sep 17 00:00:00 2001
From: linqingke <linqingke@huawei.com>
Date: Tue, 9 Mar 2021 16:28:50 +0800
Subject: [PATCH] fix resnet pretrain model bug.

---
 .../official/cv/faster_rcnn/src/config.py     | 10 ++---
 .../official/cv/faster_rcnn/src/dataset.py    | 43 +++++++++++++++----
 .../cv/faster_rcnn/src/lr_schedule.py         |  2 +-
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/model_zoo/official/cv/faster_rcnn/src/config.py b/model_zoo/official/cv/faster_rcnn/src/config.py
index 63d727a25b..c1cf903ac8 100644
--- a/model_zoo/official/cv/faster_rcnn/src/config.py
+++ b/model_zoo/official/cv/faster_rcnn/src/config.py
@@ -20,7 +20,7 @@ from easydict import EasyDict as ed
 config = ed({
     "img_width": 1280,
     "img_height": 768,
-    "keep_ratio": False,
+    "keep_ratio": True,
     "flip_ratio": 0.5,
     "expand_ratio": 1.0,
 
@@ -112,17 +112,17 @@ config = ed({
     "rpn_head_weight": 1.0,
 
     # LR
-    "base_lr": 0.02,
+    "base_lr": 0.04,
     "warmup_step": 500,
-    "warmup_ratio": 1/3.0,
+    "warmup_ratio": 1/16.0,
     "sgd_step": [8, 11],
     "sgd_momentum": 0.9,
 
     # train
     "batch_size": 2,
-    "loss_scale": 1,
+    "loss_scale": 256,
     "momentum": 0.91,
-    "weight_decay": 1e-4,
+    "weight_decay": 1e-5,
     "epoch_size": 12,
     "save_checkpoint": True,
     "save_checkpoint_epochs": 1,
diff --git a/model_zoo/official/cv/faster_rcnn/src/dataset.py b/model_zoo/official/cv/faster_rcnn/src/dataset.py
index 18fbd39add..d7abeb043b 100644
--- a/model_zoo/official/cv/faster_rcnn/src/dataset.py
+++ b/model_zoo/official/cv/faster_rcnn/src/dataset.py
@@ -164,16 +164,43 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
     """rescale operation for image"""
     img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
     if img_data.shape[0] > config.img_height:
-        img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True)
-        scale_factor = scale_factor * scale_factor2
-    img_shape = np.append(img_shape, scale_factor)
-    img_shape = np.asarray(img_shape, dtype=np.float32)
+        img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_height), return_scale=True)
+        scale_factor = scale_factor*scale_factor2
+
     gt_bboxes = gt_bboxes * scale_factor
+    gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_data.shape[1] - 1)
+    gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_data.shape[0] - 1)
 
-    gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
-    gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)
+    pad_h = config.img_height - img_data.shape[0]
+    pad_w = config.img_width - img_data.shape[1]
+    assert ((pad_h >= 0) and (pad_w >= 0))
 
-    return (img_data, img_shape, gt_bboxes, gt_label, gt_num)
+    pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype)
+    pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data
+
+    img_shape = (config.img_height, config.img_width, 1.0)
+    img_shape = np.asarray(img_shape, dtype=np.float32)
+
+    return  (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num)
+
+def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
+    """rescale operation for image of eval"""
+    img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
+    if img_data.shape[0] > config.img_height:
+        img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_height), return_scale=True)
+        scale_factor = scale_factor*scale_factor2
+
+    pad_h = config.img_height - img_data.shape[0]
+    pad_w = config.img_width - img_data.shape[1]
+    assert ((pad_h >= 0) and (pad_w >= 0))
+
+    pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype)
+    pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data
+
+    img_shape = np.append(img_shape, (scale_factor, scale_factor))
+    img_shape = np.asarray(img_shape, dtype=np.float32)
+
+    return  (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num)
 
 
 def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
@@ -274,7 +301,7 @@ def preprocess_fn(image, box, is_training):
         input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert
 
         if config.keep_ratio:
-            input_data = rescale_column(*input_data)
+            input_data = rescale_column_test(*input_data)
         else:
             input_data = resize_column_test(*input_data)
         input_data = imnormalize_column(*input_data)
diff --git a/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py b/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py
index d5440246de..0bc55f9e8e 100644
--- a/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py
+++ b/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py
@@ -28,7 +28,7 @@ def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps):
 def dynamic_lr(config, steps_per_epoch):
     """dynamic learning rate generator"""
     base_lr = config.base_lr
-    total_steps = steps_per_epoch * config.epoch_size
+    total_steps = steps_per_epoch * (config.epoch_size + 1)
     warmup_steps = int(config.warmup_step)
     lr = []
     for i in range(total_steps):