添加tps网络

4 years ago · 2f9f258ff4
parent ff0f23d495
commit 2f9f258ff4
4 changed files with 398 additions and 6 deletions
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@ -0,0 +1,100 @@
+Global:
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/r34_vd_tps_bilstm_ctc/
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  character_dict_path: 
+  character_type: en
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0005
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+    name: TPS
+    num_fiducial: 20
+    loc_lr: 0.1
+    model_name: small
+  Backbone:
+    name: ResNet
+    layers: 34
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 256
+  Head:
+    name: CTCHead
+    fc_decay: 0
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/validation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 4
--- a/ppocr/modeling/architectures/base_model.py
+++ b/ppocr/modeling/architectures/base_model.py
@ -16,13 +16,14 @@ from __future__ import division
 from __future__ import print_function

 from paddle import nn
-
+from ppocr.modeling.transform import build_transform
 from ppocr.modeling.backbones import build_backbone
 from ppocr.modeling.necks import build_neck
 from ppocr.modeling.heads import build_head

 __all__ = ['BaseModel']

+
 class BaseModel(nn.Layer):
    def __init__(self, config):
        """
@ -31,7 +32,7 @@ class BaseModel(nn.Layer):
            config (dict): the super parameters for module.
        """
        super(BaseModel, self).__init__()
-        
+
        in_channels = config.get('in_channels', 3)
        model_type = config['model_type']
        # build transfrom,
@ -50,7 +51,7 @@ class BaseModel(nn.Layer):
        config["Backbone"]['in_channels'] = in_channels
        self.backbone = build_backbone(config["Backbone"], model_type)
        in_channels = self.backbone.out_channels
-        
+
        # build neck
        # for rec, neck can be cnn,rnn or reshape(None)
        # for det, neck can be FPN, BIFPN and so on.
@ -62,7 +63,7 @@ class BaseModel(nn.Layer):
            config['Neck']['in_channels'] = in_channels
            self.neck = build_neck(config['Neck'])
            in_channels = self.neck.out_channels
-        
+
        # # build head, head is need for det, rec and cls
        config["Head"]['in_channels'] = in_channels
        self.head = build_head(config["Head"])
@ -74,4 +75,4 @@ class BaseModel(nn.Layer):
        if self.use_neck:
            x = self.neck(x)
        x = self.head(x)
-        return x
+        return x
--- a/ppocr/modeling/transform/init.py
+++ b/ppocr/modeling/transform/init.py
@ -16,7 +16,9 @@ __all__ = ['build_transform']


 def build_transform(config):
-    support_dict = ['']
+    from .tps import TPS
+
+    support_dict = ['TPS']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception(
--- a/ppocr/modeling/transform/tps.py
+++ b/ppocr/modeling/transform/tps.py