diff --git a/model_zoo/official/cv/ctpn/eval.py b/model_zoo/official/cv/ctpn/eval.py index 9fcb2c557c..17e3bfa075 100644 --- a/model_zoo/official/cv/ctpn/eval.py +++ b/model_zoo/official/cv/ctpn/eval.py @@ -65,7 +65,7 @@ def ctpn_infer_test(dataset_path='', ckpt_path='', img_dir=''): start = time.time() # run net - output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num) + output = net(img_data, gt_bboxes, gt_labels, gt_num) gt_bboxes = gt_bboxes.asnumpy() gt_labels = gt_labels.asnumpy() gt_num = gt_num.asnumpy().astype(bool) diff --git a/model_zoo/official/cv/ctpn/src/ctpn.py b/model_zoo/official/cv/ctpn/src/ctpn.py index 3656a62c99..80c0f96077 100644 --- a/model_zoo/official/cv/ctpn/src/ctpn.py +++ b/model_zoo/official/cv/ctpn/src/ctpn.py @@ -119,19 +119,14 @@ class CTPN(nn.Cell): config.activate_num_classes, config.use_sigmoid_cls) self.proposal_generator_test.set_train_local(config, False) - def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids): - # (1,3,600,900) + def construct(self, img_data, gt_bboxes, gt_labels, gt_valids, img_metas=None): x = self.vgg16_feature_extractor(img_data) x = self.conv(x) x = self.cast(x, mstype.float16) - # (1, 512, 38, 57) x = self.transpose(x, (0, 2, 1, 3)) x = self.reshape(x, (-1, self.input_size, self.num_step)) x = self.transpose(x, (2, 0, 1)) - # (57, 38, 512) x = self.rnn(x) - # (57, 38, 256) - #x = self.cast(x, mstype.float32) rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss = self.rpn_with_loss(x, img_metas, self.anchor_list, diff --git a/model_zoo/official/cv/ctpn/src/dataset.py b/model_zoo/official/cv/ctpn/src/dataset.py index 03acea4b62..79b0db7feb 100644 --- a/model_zoo/official/cv/ctpn/src/dataset.py +++ b/model_zoo/official/cv/ctpn/src/dataset.py @@ -15,7 +15,6 @@ """CTPN dataset""" from __future__ import division -import os import numpy as np from numpy import random import mmcv @@ -23,7 +22,6 @@ import mindspore.dataset as de import mindspore.dataset.vision.c_transforms as C import mindspore.dataset.transforms.c_transforms as CC import mindspore.common.dtype as mstype -from mindspore.mindrecord import FileWriter from src.config import config class PhotoMetricDistortion: @@ -98,7 +96,7 @@ class Expand: boxes += np.tile((left, top), 2) return img, boxes, labels -def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): +def rescale_column(img, gt_bboxes, gt_label, gt_num, img_shape): """rescale operation for image""" img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) if img_data.shape[0] > config.img_height: @@ -112,10 +110,10 @@ def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, gt_bboxes, gt_label, gt_num, img_shape) -def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): +def resize_column(img, gt_bboxes, gt_label, gt_num, img_shape): """resize operation for image""" img_data = img img_data, w_scale, h_scale = mmcv.imresize( @@ -129,10 +127,10 @@ def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, gt_bboxes, gt_label, gt_num, img_shape) -def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): +def resize_column_test(img, gt_bboxes, gt_label, gt_num, img_shape): """resize operation for image of eval""" img_data = img img_data, w_scale, h_scale = mmcv.imresize( @@ -149,34 +147,34 @@ def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, gt_bboxes, gt_label, gt_num, img_shape) -def flipped_generation(img, img_shape, gt_bboxes, gt_label, gt_num): +def flipped_generation(img, gt_bboxes, gt_label, gt_num, img_shape): """flipped generation""" img_data = img flipped = gt_bboxes.copy() _, w, _ = img_data.shape flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 - return (img_data, img_shape, flipped, gt_label, gt_num) + return (img_data, flipped, gt_label, gt_num, img_shape) -def image_bgr_rgb(img, img_shape, gt_bboxes, gt_label, gt_num): +def image_bgr_rgb(img, gt_bboxes, gt_label, gt_num, img_shape): img_data = img[:, :, ::-1] - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, gt_bboxes, gt_label, gt_num, img_shape) -def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): +def photo_crop_column(img, gt_bboxes, gt_label, gt_num, img_shape): """photo crop operation for image""" random_photo = PhotoMetricDistortion() img_data, gt_bboxes, gt_label = random_photo(img, gt_bboxes, gt_label) - return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + return (img_data, gt_bboxes, gt_label, gt_num, img_shape) -def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): +def expand_column(img, gt_bboxes, gt_label, gt_num, img_shape): """expand operation for image""" expand = Expand() img, gt_bboxes, gt_label = expand(img, gt_bboxes, gt_label) - return (img, img_shape, gt_bboxes, gt_label, gt_num) + return (img, gt_bboxes, gt_label, gt_num, img_shape) def split_gtbox_label(gt_bbox_total): """split ground truth box label""" @@ -193,7 +191,7 @@ def split_gtbox_label(gt_bbox_total): gtbox_list.append([x0, gt_bbox[1], x0+15, gt_bbox[3], 1]) return np.array(gtbox_list) -def pad_label(img, img_shape, gt_bboxes, gt_label, gt_valid): +def pad_label(img, gt_bboxes, gt_label, gt_valid, img_shape): """pad ground truth label""" pad_max_number = 256 gt_label = gt_bboxes[:, 4] @@ -208,13 +206,13 @@ def pad_label(img, img_shape, gt_bboxes, gt_label, gt_valid): gt_box = gt_bboxes[0:pad_max_number] gt_label = gt_label[0:pad_max_number] gt_valid = gt_valid[0:pad_max_number] - return (img, img_shape, gt_box[:, :4], gt_label, gt_valid) + return (img, gt_box[:, :4], gt_label, gt_valid, img_shape) def preprocess_fn(image, box, is_training): """Preprocess function for dataset.""" - def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_valid): + def _infer_data(image_bgr, gt_box_new, gt_label_new, gt_valid, image_shape): image_shape = image_shape[:2] - input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_valid + input_data = image_bgr, gt_box_new, gt_label_new, gt_valid, image_shape if config.keep_ratio: input_data = rescale_column(*input_data) else: @@ -234,9 +232,9 @@ def preprocess_fn(image, box, is_training): gt_box = box[:, :4] gt_label = box[:, 4] gt_valid = box[:, 4] - input_data = image_bgr, image_shape, gt_box, gt_label, gt_valid + input_data = image_bgr, gt_box, gt_label, gt_valid, image_shape if not is_training: - return _infer_data(image_bgr, image_shape, gt_box, gt_label, gt_valid) + return _infer_data(image_bgr, gt_box, gt_label, gt_valid, image_shape) expand = (np.random.rand() < config.expand_ratio) if expand: input_data = expand_column(*input_data) @@ -260,46 +258,6 @@ def anno_parser(annos_str): annos.append(anno) return annos -def filter_valid_data(image_dir, anno_path): - """Filter valid image file, which both in image_dir and anno_path.""" - image_files = [] - image_anno_dict = {} - if not os.path.isdir(image_dir): - raise RuntimeError("Path given is not valid.") - if not os.path.isfile(anno_path): - raise RuntimeError("Annotation file is not valid.") - - with open(anno_path, "rb") as f: - lines = f.readlines() - for line in lines: - line_str = line.decode("utf-8").strip() - line_split = str(line_str).split(' ') - file_name = line_split[0] - image_path = os.path.join(image_dir, file_name) - if os.path.isfile(image_path): - image_anno_dict[image_path] = anno_parser(line_split[1:]) - image_files.append(image_path) - return image_files, image_anno_dict - -def data_to_mindrecord_byte_image(is_training=True, prefix="cptn_mlt.mindrecord", file_num=8): - """Create MindRecord file.""" - mindrecord_dir = config.mindrecord_dir - mindrecord_path = os.path.join(mindrecord_dir, prefix) - writer = FileWriter(mindrecord_path, file_num) - image_files, image_anno_dict = create_icdar_test_label() - ctpn_json = { - "image": {"type": "bytes"}, - "annotation": {"type": "int32", "shape": [-1, 6]}, - } - writer.add_schema(ctpn_json, "ctpn_json") - for image_name in image_files: - with open(image_name, 'rb') as f: - img = f.read() - annos = np.array(image_anno_dict[image_name], dtype=np.int32) - row = {"image": img, "annotation": annos} - writer.write_raw_data([row]) - writer.commit() - def create_ctpn_dataset(mindrecord_file, batch_size=1, repeat_num=1, device_num=1, rank_id=0, is_training=True, num_parallel_workers=12): """Creatr ctpn dataset with MindDataset.""" @@ -316,8 +274,8 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, repeat_num=1, device_num= type_cast3 = CC.TypeCast(mstype.bool_) if is_training: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], - output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], + output_columns=["image", "box", "label", "valid_num", "image_shape"], + column_order=["image", "box", "label", "valid_num", "image_shape"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"], @@ -329,8 +287,8 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, repeat_num=1, device_num= else: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], - output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], + output_columns=["image", "box", "label", "valid_num", "image_shape"], + column_order=["image", "box", "label", "valid_num", "image_shape"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) diff --git a/model_zoo/official/cv/ctpn/src/network_define.py b/model_zoo/official/cv/ctpn/src/network_define.py index f352720518..2ab20e8aa2 100644 --- a/model_zoo/official/cv/ctpn/src/network_define.py +++ b/model_zoo/official/cv/ctpn/src/network_define.py @@ -99,8 +99,8 @@ class WithLossCell(nn.Cell): self._backbone = backbone self._loss_fn = loss_fn - def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num): - rpn_loss, _, _, rpn_cls_loss, rpn_reg_loss = self._backbone(x, img_shape, gt_bboxe, gt_label, gt_num) + def construct(self, x, gt_bbox, gt_label, gt_num, img_shape=None): + rpn_loss, _, _, rpn_cls_loss, rpn_reg_loss = self._backbone(x, gt_bbox, gt_label, gt_num, img_shape) return self._loss_fn(rpn_loss, rpn_cls_loss, rpn_reg_loss) @property @@ -144,10 +144,10 @@ class TrainOneStepCell(nn.Cell): if reduce_flag: self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) - def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num): + def construct(self, x, gt_bbox, gt_label, gt_num, img_shape=None): weights = self.weights - rpn_loss, _, _, rpn_cls_loss, rpn_reg_loss = self.backbone(x, img_shape, gt_bboxe, gt_label, gt_num) - grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) + rpn_loss, _, _, rpn_cls_loss, rpn_reg_loss = self.backbone(x, gt_bbox, gt_label, gt_num, img_shape) + grads = self.grad(self.network, weights)(x, gt_bbox, gt_label, gt_num, img_shape, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) return F.depend(rpn_loss, self.optimizer(grads)), rpn_cls_loss, rpn_reg_loss