From ca6c1758098dad15ceff9e01166f3af20ff631e9 Mon Sep 17 00:00:00 2001 From: linqingke Date: Tue, 9 Mar 2021 16:28:50 +0800 Subject: [PATCH] fix resnet pretrain model bug. --- .../official/cv/faster_rcnn/src/config.py | 10 ++--- .../official/cv/faster_rcnn/src/dataset.py | 43 +++++++++++++++---- .../cv/faster_rcnn/src/lr_schedule.py | 2 +- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/model_zoo/official/cv/faster_rcnn/src/config.py b/model_zoo/official/cv/faster_rcnn/src/config.py index 63d727a25b..c1cf903ac8 100644 --- a/model_zoo/official/cv/faster_rcnn/src/config.py +++ b/model_zoo/official/cv/faster_rcnn/src/config.py @@ -20,7 +20,7 @@ from easydict import EasyDict as ed config = ed({ "img_width": 1280, "img_height": 768, - "keep_ratio": False, + "keep_ratio": True, "flip_ratio": 0.5, "expand_ratio": 1.0, @@ -112,17 +112,17 @@ config = ed({ "rpn_head_weight": 1.0, # LR - "base_lr": 0.02, + "base_lr": 0.04, "warmup_step": 500, - "warmup_ratio": 1/3.0, + "warmup_ratio": 1/16.0, "sgd_step": [8, 11], "sgd_momentum": 0.9, # train "batch_size": 2, - "loss_scale": 1, + "loss_scale": 256, "momentum": 0.91, - "weight_decay": 1e-4, + "weight_decay": 1e-5, "epoch_size": 12, "save_checkpoint": True, "save_checkpoint_epochs": 1, diff --git a/model_zoo/official/cv/faster_rcnn/src/dataset.py b/model_zoo/official/cv/faster_rcnn/src/dataset.py index 18fbd39add..d7abeb043b 100644 --- a/model_zoo/official/cv/faster_rcnn/src/dataset.py +++ b/model_zoo/official/cv/faster_rcnn/src/dataset.py @@ -164,16 +164,43 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): """rescale operation for image""" img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) if img_data.shape[0] > config.img_height: - img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True) - scale_factor = scale_factor * scale_factor2 - img_shape = np.append(img_shape, scale_factor) - img_shape = np.asarray(img_shape, dtype=np.float32) + img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_height), return_scale=True) + scale_factor = scale_factor*scale_factor2 + gt_bboxes = gt_bboxes * scale_factor + gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_data.shape[1] - 1) + gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_data.shape[0] - 1) - gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) - gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) + pad_h = config.img_height - img_data.shape[0] + pad_w = config.img_width - img_data.shape[1] + assert ((pad_h >= 0) and (pad_w >= 0)) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype) + pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data + + img_shape = (config.img_height, config.img_width, 1.0) + img_shape = np.asarray(img_shape, dtype=np.float32) + + return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num) + +def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): + """rescale operation for image of eval""" + img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) + if img_data.shape[0] > config.img_height: + img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_height), return_scale=True) + scale_factor = scale_factor*scale_factor2 + + pad_h = config.img_height - img_data.shape[0] + pad_w = config.img_width - img_data.shape[1] + assert ((pad_h >= 0) and (pad_w >= 0)) + + pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype) + pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data + + img_shape = np.append(img_shape, (scale_factor, scale_factor)) + img_shape = np.asarray(img_shape, dtype=np.float32) + + return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num) def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): @@ -274,7 +301,7 @@ def preprocess_fn(image, box, is_training): input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert if config.keep_ratio: - input_data = rescale_column(*input_data) + input_data = rescale_column_test(*input_data) else: input_data = resize_column_test(*input_data) input_data = imnormalize_column(*input_data) diff --git a/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py b/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py index d5440246de..0bc55f9e8e 100644 --- a/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py +++ b/model_zoo/official/cv/faster_rcnn/src/lr_schedule.py @@ -28,7 +28,7 @@ def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps): def dynamic_lr(config, steps_per_epoch): """dynamic learning rate generator""" base_lr = config.base_lr - total_steps = steps_per_epoch * config.epoch_size + total_steps = steps_per_epoch * (config.epoch_size + 1) warmup_steps = int(config.warmup_step) lr = [] for i in range(total_steps):