update config

5 years ago · 6832ca029f
parent 09d8cb6d98
commit 6832ca029f
7 changed files with 197 additions and 132 deletions
--- a/configs/rec/rec_r50fpn_vd_none_srn_pvam_test_all.yml
+++ b/configs/rec/rec_r50fpn_vd_none_srn_pvam_test_all.yml
@ -17,11 +17,12 @@ Global:
  average_window: 0.15
  max_average_window: 15625
  min_average_window: 10000
-  reader_yml: ./configs/rec/rec_srn_reader.yml
+  reader_yml: ./configs/rec/rec_benchmark_reader.yml
  pretrain_weights: 
  checkpoints:
  save_inference_dir:
-  
+  infer_img:
+
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@ -118,15 +118,14 @@ class LMDBReader(object):
                image_file_list = get_image_file_list(self.infer_img)
                for single_img in image_file_list:
                    img = cv2.imread(single_img)
-                    if img.shape[-1]==1 or len(list(img.shape))==2:
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                    if self.loss_type == 'srn':
                        norm_img = process_image_srn(
                            img=img,
                            image_shape=self.image_shape,
                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length
-                        )
+                            max_text_length=self.max_text_length)
                    else:
                        norm_img = process_image(
                            img=img,
@ -135,20 +134,20 @@ class LMDBReader(object):
                            tps=self.use_tps,
                            infer_mode=True)
                    yield norm_img
-            elif self.mode == 'test':
-                image_file_list = get_image_file_list(self.infer_img)
-                for single_img in image_file_list:
-                    img = cv2.imread(single_img)
-                    if img.shape[-1]==1 or len(list(img.shape))==2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    norm_img = process_image(
-                        img=img,
-                        image_shape=self.image_shape,
-                        char_ops=self.char_ops,
-                        tps=self.use_tps,
-                        infer_mode=True
-                    )
-                    yield norm_img
+            #elif self.mode == 'eval':
+            #    image_file_list = get_image_file_list(self.infer_img)
+            #    for single_img in image_file_list:
+            #        img = cv2.imread(single_img)
+            #        if img.shape[-1]==1 or len(list(img.shape))==2:
+            #            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+            #        norm_img = process_image(
+            #            img=img,
+            #            image_shape=self.image_shape,
+            #            char_ops=self.char_ops,
+            #            tps=self.use_tps,
+            #            infer_mode=True
+            #        )
+            #        yield norm_img
            else:
                lmdb_sets = self.load_hierarchical_lmdb_dataset()
                if process_id == 0:
@ -169,14 +168,15 @@ class LMDBReader(object):
                            img, label = sample_info
                            outs = []
                            if self.loss_type == "srn":
-                                outs = process_image_srn(img, self.image_shape, self.num_heads,
-                                                         self.max_text_length, label,
-                                                         self.char_ops, self.loss_type)
+                                outs = process_image_srn(
+                                    img, self.image_shape, self.num_heads,
+                                    self.max_text_length, label, self.char_ops,
+                                    self.loss_type)

                            else:
-                                outs = process_image(img, self.image_shape, label,
-                                                    self.char_ops, self.loss_type,
-                                                    self.max_text_length)
+                                outs = process_image(
+                                    img, self.image_shape, label, self.char_ops,
+                                    self.loss_type, self.max_text_length)
                            if outs is None:
                                continue
                            yield outs
@ -184,6 +184,7 @@ class LMDBReader(object):
                    if finish_read_num == len(lmdb_sets):
                        break
                self.close_lmdb_dataset(lmdb_sets)
+
        def batch_iter_reader():
            batch_outs = []
            for outs in sample_iter_reader():
@ -311,4 +312,4 @@ class SimpleReader(object):

        if self.infer_img is None:
            return batch_iter_reader
-        return sample_iter_reader
+        return sample_iter_reader
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@ -79,17 +79,45 @@ class RecModel(object):
                feed_list = [image, label_in, label_out]
                labels = {'label_in': label_in, 'label_out': label_out}
            elif self.loss_type == "srn":
-                encoder_word_pos = fluid.data(name="encoder_word_pos", shape=[-1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)), 1], dtype="int64")
-                gsrm_word_pos = fluid.data(name="gsrm_word_pos", shape=[-1, self.max_text_length, 1], dtype="int64")
-                gsrm_slf_attn_bias1 = fluid.data(name="gsrm_slf_attn_bias1", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
-                gsrm_slf_attn_bias2 = fluid.data(name="gsrm_slf_attn_bias2", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
-                lbl_weight = fluid.layers.data(name="lbl_weight", shape=[-1, 1], dtype='int64')
+                encoder_word_pos = fluid.data(
+                    name="encoder_word_pos",
+                    shape=[
+                        -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
+                        1
+                    ],
+                    dtype="int64")
+                gsrm_word_pos = fluid.data(
+                    name="gsrm_word_pos",
+                    shape=[-1, self.max_text_length, 1],
+                    dtype="int64")
+                gsrm_slf_attn_bias1 = fluid.data(
+                    name="gsrm_slf_attn_bias1",
+                    shape=[
+                        -1, self.num_heads, self.max_text_length,
+                        self.max_text_length
+                    ])
+                gsrm_slf_attn_bias2 = fluid.data(
+                    name="gsrm_slf_attn_bias2",
+                    shape=[
+                        -1, self.num_heads, self.max_text_length,
+                        self.max_text_length
+                    ])
+                lbl_weight = fluid.layers.data(
+                    name="lbl_weight", shape=[-1, 1], dtype='int64')
                label = fluid.data(
                    name='label', shape=[-1, 1], dtype='int32', lod_level=1)
-                feed_list = [image, label, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight]
-                labels = {'label': label, 'encoder_word_pos': encoder_word_pos,
-                          'gsrm_word_pos': gsrm_word_pos, 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
-                          'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,'lbl_weight':lbl_weight}
+                feed_list = [
+                    image, label, encoder_word_pos, gsrm_word_pos,
+                    gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight
+                ]
+                labels = {
+                    'label': label,
+                    'encoder_word_pos': encoder_word_pos,
+                    'gsrm_word_pos': gsrm_word_pos,
+                    'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
+                    'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,
+                    'lbl_weight': lbl_weight
+                }
            else:
                label = fluid.data(
                    name='label', shape=[None, 1], dtype='int32', lod_level=1)
@ -112,15 +140,41 @@ class RecModel(object):
                        "We set img_shape to be the same , it may affect the inference effect"
                    )
                    image_shape = deepcopy(self.image_shape)
-                    image = fluid.data(name='image', shape=image_shape, dtype='float32')
+            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            if self.loss_type == "srn":
-                encoder_word_pos = fluid.data(name="encoder_word_pos", shape=[-1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)), 1], dtype="int64")
-                gsrm_word_pos = fluid.data(name="gsrm_word_pos", shape=[-1, self.max_text_length, 1], dtype="int64")
-                gsrm_slf_attn_bias1 = fluid.data(name="gsrm_slf_attn_bias1", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
-                gsrm_slf_attn_bias2 = fluid.data(name="gsrm_slf_attn_bias2", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
-                feed_list = [image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2]
-                labels = {'encoder_word_pos': encoder_word_pos, 'gsrm_word_pos': gsrm_word_pos,
-                        'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1, 'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2}
+                encoder_word_pos = fluid.data(
+                    name="encoder_word_pos",
+                    shape=[
+                        -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
+                        1
+                    ],
+                    dtype="int64")
+                gsrm_word_pos = fluid.data(
+                    name="gsrm_word_pos",
+                    shape=[-1, self.max_text_length, 1],
+                    dtype="int64")
+                gsrm_slf_attn_bias1 = fluid.data(
+                    name="gsrm_slf_attn_bias1",
+                    shape=[
+                        -1, self.num_heads, self.max_text_length,
+                        self.max_text_length
+                    ])
+                gsrm_slf_attn_bias2 = fluid.data(
+                    name="gsrm_slf_attn_bias2",
+                    shape=[
+                        -1, self.num_heads, self.max_text_length,
+                        self.max_text_length
+                    ])
+                feed_list = [
+                    image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
+                    gsrm_slf_attn_bias2
+                ]
+                labels = {
+                    'encoder_word_pos': encoder_word_pos,
+                    'gsrm_word_pos': gsrm_word_pos,
+                    'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
+                    'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2
+                }
        return image, labels, loader

    def __call__(self, mode):
@ -140,8 +194,13 @@ class RecModel(object):
                label = labels['label']
            if self.loss_type == 'srn':
                total_loss, img_loss, word_loss = self.loss(predicts, labels)
-                outputs = {'total_loss':total_loss, 'img_loss':img_loss, 'word_loss':word_loss,
-                           'decoded_out':decoded_out, 'label':label}
+                outputs = {
+                    'total_loss': total_loss,
+                    'img_loss': img_loss,
+                    'word_loss': word_loss,
+                    'decoded_out': decoded_out,
+                    'label': label
+                }
            else:
                outputs = {'total_loss':loss, 'decoded_out':\
                    decoded_out, 'label':label}
@ -156,4 +215,4 @@ class RecModel(object):
            predict = predicts['predict']
            if self.loss_type == "ctc":
                predict = fluid.layers.softmax(predict)
-            return loader, {'decoded_out': decoded_out, 'predicts': predict}
+            return loader, {'decoded_out': decoded_out, 'predicts': predict}
--- a/ppocr/modeling/heads/self_attention/model.py
+++ b/ppocr/modeling/heads/self_attention/model.py
--- a/tools/eval_utils/eval_rec_utils.py
+++ b/tools/eval_utils/eval_rec_utils.py
@ -61,7 +61,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
            img_list.append(data[ino][0])
            label_list.append(data[ino][1])

-        if config['Global']['loss_type'] != "srn": 
+        if config['Global']['loss_type'] != "srn":
            img_list = np.concatenate(img_list, axis=0)
            outs = exe.run(eval_info_dict['program'], \
                       feed={'image': img_list}, \
@ -75,7 +75,8 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
                preds_lod = outs[0].lod()[0]
            labels, labels_lod = convert_rec_label_to_lod(label_list)
            acc, acc_num, sample_num = cal_predicts_accuracy(
-                char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate)
+                char_ops, preds, preds_lod, labels, labels_lod,
+                is_remove_duplicate)
        else:
            encoder_word_pos_list = []
            gsrm_word_pos_list = []
@ -89,15 +90,19 @@ def eval_rec_run(exe, config, eval_info_dict, mode):

            img_list = np.concatenate(img_list, axis=0)
            label_list = np.concatenate(label_list, axis=0)
-            encoder_word_pos_list = np.concatenate(encoder_word_pos_list, axis=0).astype(np.int64)
-            gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list, axis=0).astype(np.int64)
-            gsrm_slf_attn_bias1_list = np.concatenate(gsrm_slf_attn_bias1_list, axis=0).astype(np.float32)
-            gsrm_slf_attn_bias2_list = np.concatenate(gsrm_slf_attn_bias2_list, axis=0).astype(np.float32)
+            encoder_word_pos_list = np.concatenate(
+                encoder_word_pos_list, axis=0).astype(np.int64)
+            gsrm_word_pos_list = np.concatenate(
+                gsrm_word_pos_list, axis=0).astype(np.int64)
+            gsrm_slf_attn_bias1_list = np.concatenate(
+                gsrm_slf_attn_bias1_list, axis=0).astype(np.float32)
+            gsrm_slf_attn_bias2_list = np.concatenate(
+                gsrm_slf_attn_bias2_list, axis=0).astype(np.float32)

            labels = label_list

            outs = exe.run(eval_info_dict['program'], \
-                       feed={'image': img_list, 'encoder_word_pos': encoder_word_pos_list, 
+                       feed={'image': img_list, 'encoder_word_pos': encoder_word_pos_list,
                             'gsrm_word_pos': gsrm_word_pos_list, 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1_list,
                             'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2_list}, \
                       fetch_list=eval_info_dict['fetch_varname_list'], \
@ -108,7 +113,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):

        total_acc_num += acc_num
        total_sample_num += sample_num
-        logger.info("eval batch id: {}, acc: {}".format(total_batch_num, acc))
+        #logger.info("eval batch id: {}, acc: {}".format(total_batch_num, acc))
        total_batch_num += 1
    avg_acc = total_acc_num * 1.0 / total_sample_num
    metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \
--- a/tools/program.py
+++ b/tools/program.py
@ -34,6 +34,7 @@ from ppocr.utils.save_load import save_model
 import numpy as np
 from ppocr.utils.character import cal_predicts_accuracy, cal_predicts_accuracy_srn, CharacterOps

+
 class ArgsParser(ArgumentParser):
    def __init__(self):
        super(ArgsParser, self).__init__(
@ -196,10 +197,13 @@ def build(config, main_prog, startup_prog, mode):
                if config['Global']["loss_type"] == 'srn':
                    model_average = fluid.optimizer.ModelAverage(
                        config['Global']['average_window'],
-                        min_average_window=config['Global']['min_average_window'],
-                        max_average_window=config['Global']['max_average_window'])
+                        min_average_window=config['Global'][
+                            'min_average_window'],
+                        max_average_window=config['Global'][
+                            'max_average_window'])

-    return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name,model_average)
+    return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name,
+            model_average)


 def build_export(config, main_prog, startup_prog):
@ -398,6 +402,7 @@ def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
            save_model(train_info_dict['train_program'], save_path)
    return

+
 def preprocess():
    FLAGS = ArgsParser().parse_args()
    config = load_config(FLAGS.config)
@ -409,8 +414,8 @@ def preprocess():
    check_gpu(use_gpu)

    alg = config['Global']['algorithm']
-    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
-    if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
+    assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN']
+    if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN']:
        config['Global']['char_ops'] = CharacterOps(config['Global'])

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
--- a/1
+++ b/1
@ -0,0 +1 @@
+/workspace/PaddleOCR/train_data/