Merge pull request #13 from PaddlePaddle/dygraph

Dygraph
4 years ago · 0458f0cc05
parent 04b0318b56 836839bbf6
commit 0458f0cc05
70 changed files with 14396 additions and 587 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,8 +1,7 @@
 include LICENSE.txt
 include README.md

-recursive-include ppocr/utils *.txt utility.py character.py check.py
-recursive-include ppocr/data/det *.py
+recursive-include ppocr/utils *.txt utility.py logging.py
+recursive-include ppocr/data/ *.py
 recursive-include ppocr/postprocess *.py
-recursive-include ppocr/postprocess/lanms *.*
-recursive-include tools/infer *.py
+recursive-include tools/infer *.py
--- a/configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml
+++ b/configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml
@ -0,0 +1,134 @@
+Global:
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/ch_db_mv3/
+  save_epoch_step: 1200
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [3000, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+
+Architecture:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+    disable_se: True
+  Neck:
+    name: DBFPN
+    out_channels: 96
+  Head:
+    name: DBHead
+    k: 50
+
+Loss:
+  name: DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+    warmup_epoch: 2
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.6
+  max_candidates: 1000
+  unclip_ratio: 1.5
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
+            - { 'type': Resize, 'args': { 'size': [0.5, 3] } }
+      - EastRandomCropData:
+          size: [960, 960]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+#           image_shape: [736, 1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml
+++ b/configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml
@ -0,0 +1,133 @@
+Global:
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/ch_db_res18/
+  save_epoch_step: 1200
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [3000, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet18_vd_pretrained
+  checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+
+Architecture:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 18
+    disable_se: True
+  Neck:
+    name: DBFPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50
+
+Loss:
+  name: DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+    warmup_epoch: 2
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.6
+  max_candidates: 1000
+  unclip_ratio: 1.5
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
+            - { 'type': Resize, 'args': { 'size': [0.5, 3] } }
+      - EastRandomCropData:
+          size: [960, 960]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+#           image_shape: [736, 1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_mv3_east.yml
+++ b/configs/det/det_mv3_east.yml
@ -0,0 +1,111 @@
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_mv3/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: EASTFPN
+    model_name: small
+  Head:
+    name: EASTHead
+    model_name: small
+
+Loss:
+  name: EASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_r50_vd_east.yml
+++ b/configs/det/det_r50_vd_east.yml
@ -0,0 +1,110 @@
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_r50_vd/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: EASTFPN
+    model_name: large
+  Head:
+    name: EASTHead
+    model_name: large
+
+Loss:
+  name: EASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ b/configs/det/det_r50_vd_sast_icdar15.yml
@ -0,0 +1,110 @@
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_ic15/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
+
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+
+Loss:
+  name: SASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 2
+  nms_thresh: 0.2
+  expand_scale: 1.0
+  shrink_ratio_of_width: 0.3
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
+    data_ratio_list: [0.5, 0.5]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 1536
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
@ -0,0 +1,109 @@
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_tt/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
+
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+
+Loss:
+  name: SASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 6
+  nms_thresh: 0.2
+  expand_scale: 1.2
+  shrink_ratio_of_width: 0.2
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
+    ratio_list: [0.1, 0.45, 0.3, 0.15]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: 
+        - ./train_data/total_text_icdar_14pt/test_label_json.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 768
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yaml
+++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yaml
@ -3,7 +3,7 @@ Global:
  epoch_num: 500
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: ./output/rec_chinese_common_v1.1
+  save_model_dir: ./output/rec_chinese_common_v2.0
  save_epoch_step: 3
  # evaluation is run every 5000 iterations after the 4000th iteration
  eval_batch_step: [0, 2000]
--- a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yaml
+++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yaml
@ -3,7 +3,7 @@ Global:
  epoch_num: 500
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: ./output/rec_chinese_lite_v1.1
+  save_model_dir: ./output/rec_chinese_lite_v2.0
  save_epoch_step: 3
  # evaluation is run every 5000 iterations after the 4000th iteration
  eval_batch_step: [0, 2000]
@ -19,7 +19,7 @@ Global:
  character_type: ch
  max_text_length: 25
  infer_mode: False
-  use_space_char: False
+  use_space_char: True


 Optimizer:
--- a/configs/rec/multi_language/rec_en_number_lite_train.yml
+++ b/configs/rec/multi_language/rec_en_number_lite_train.yml
@ -0,0 +1,102 @@
+Global:
+  use_gpu: True
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_en_number_lite
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img:
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/en_dict.txt
+  character_type: ch
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [1, 2, 2, 2]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTCHead
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/train_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RecAug: 
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/eval_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/rec/multi_language/rec_french_lite_train.yml
+++ b/configs/rec/multi_language/rec_french_lite_train.yml
@ -0,0 +1,102 @@
+Global:
+  use_gpu: True
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_french_lite
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model: 
+  checkpoints: 
+  save_inference_dir: 
+  use_visualdl: False
+  infer_img:
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/french_dict.txt
+  character_type: french
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [1, 2, 2, 2]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTCHead
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/train_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RecAug: 
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/eval_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/rec/multi_language/rec_german_lite_train.yml
+++ b/configs/rec/multi_language/rec_german_lite_train.yml
@ -0,0 +1,102 @@
+Global:
+  use_gpu: True
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_german_lite
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img:
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/german_dict.txt
+  character_type: german
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [1, 2, 2, 2]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTCHead
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/train_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RecAug: 
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/eval_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/rec/multi_language/rec_japan_lite_train.yml
+++ b/configs/rec/multi_language/rec_japan_lite_train.yml
@ -0,0 +1,102 @@
+Global:
+  use_gpu: True
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_japan_lite
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img:
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/japan_dict.txt
+  character_type: japan
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [1, 2, 2, 2]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTCHead
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/train_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RecAug: 
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/eval_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/rec/multi_language/rec_korean_lite_train.yml
+++ b/configs/rec/multi_language/rec_korean_lite_train.yml
@ -0,0 +1,102 @@
+Global:
+  use_gpu: True
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_korean_lite
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img:
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/korean_dict.txt
+  character_type: korean
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [1, 2, 2, 2]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTCHead
+    fc_decay: 0.00001
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/train_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RecAug: 
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: ["./train_data/eval_list.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 320]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@ -81,7 +81,7 @@ cv::Mat Classifier::Run(cv::Mat &img) {

 void Classifier::LoadModel(const std::string &model_dir) {
  AnalysisConfig config;
-  config.SetModel(model_dir + "/model", model_dir + "/params");
+  config.SetModel(model_dir + ".pdmodel", model_dir + ".pdiparams");

  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@ -18,7 +18,7 @@ namespace PaddleOCR {

 void DBDetector::LoadModel(const std::string &model_dir) {
  AnalysisConfig config;
-  config.SetModel(model_dir + "/model", model_dir + "/params");
+  config.SetModel(model_dir + ".pdmodel", model_dir + ".pdiparams");

  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@ -103,7 +103,7 @@ void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,

 void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  AnalysisConfig config;
-  config.SetModel(model_dir + "/model", model_dir + "/params");
+  config.SetModel(model_dir + ".pdmodel", model_dir + ".pdiparams");

  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
@ -186,4 +186,4 @@ cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage,
  }
 }

-} // namespace PaddleOCR
+} // namespace PaddleOCR
--- a/deploy/docker/hubserving/README.md
+++ b/deploy/docker/hubserving/README.md
@ -0,0 +1,54 @@
+English | [简体中文](README_cn.md)
+
+## Introduction
+Many users hope package the PaddleOCR service into a docker image, so that it can be quickly released and used in the docker or k8s environment.
+
+This page provides some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the futrue)
+
+## 1. Prerequisites
+
+You need to install the following basic components first：
+a. Docker
+b. Graphics driver and CUDA 10.0+（GPU）
+c. NVIDIA Container Toolkit（GPU，Docker 19.03+ can skip this）
+d. cuDNN 7.6+（GPU）
+
+## 2. Build Image
+a. Goto Dockerfile directory（ps：Need to distinguish between cpu and gpu version, the following takes cpu as an example, gpu version needs to replace the keyword）
+```
+cd deploy/docker/hubserving/cpu
+```
+c. Build image
+```
+docker build -t paddleocr:cpu .
+```
+
+## 3. Start container
+a. CPU version
+```
+sudo docker run -dp 8868:8868 --name paddle_ocr paddleocr:cpu
+```
+b. GPU version (base on NVIDIA Container Toolkit)
+```
+sudo nvidia-docker run -dp 8868:8868 --name paddle_ocr paddleocr:gpu
+```
+c. GPU version (Docker 19.03++)
+```
+sudo docker run -dp 8868:8868 --gpus all --name paddle_ocr paddleocr:gpu
+```
+d. Check service status（If you can see the following statement then it means completed：Successfully installed ocr_system && Running on http://0.0.0.0:8868/）
+```
+docker logs -f paddle_ocr
+```
+
+## 4. Test
+a. Calculate the Base64 encoding of the picture to be recognized (if you just test, you can use a free online tool, like：https://freeonlinetools24.com/base64-image/）
+b. Post a service request（sample request in sample_request.txt）
+
+```
+curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"Input image Base64 encode(need to delete the code 'data:image/jpg;base64,'）\"]}" http://localhost:8868/predict/ocr_system
+```
+c. Get resposne（If the call is successful, the following result will be returned）
+```
+{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
+```
--- a/deploy/docker/hubserving/README_cn.md
+++ b/deploy/docker/hubserving/README_cn.md
@ -0,0 +1,53 @@
+[English](README.md) | 简体中文
+
+## Docker化部署服务
+在日常项目应用中，相信大家一般都会希望能通过Docker技术，把PaddleOCR服务打包成一个镜像，以便在Docker或k8s环境里，快速发布上线使用。
+
+本文将提供一些标准化的代码来实现这样的目标。大家通过如下步骤可以把PaddleOCR项目快速发布成可调用的Restful API服务。（目前暂时先实现了基于HubServing模式的部署，后续作者计划增加PaddleServing模式的部署）
+
+## 1.实施前提准备
+
+需要先完成如下基本组件的安装：
+a. Docker环境
+b. 显卡驱动和CUDA 10.0+（GPU）
+c. NVIDIA Container Toolkit（GPU，Docker 19.03以上版本可以跳过此步）
+d. cuDNN 7.6+（GPU）
+
+## 2.制作镜像
+a.切换至Dockerfile目录（注：需要区分cpu或gpu版本，下文以cpu为例，gpu版本需要替换一下关键字即可）
+```
+cd deploy/docker/hubserving/cpu
+```
+c.生成镜像
+```
+docker build -t paddleocr:cpu .
+```
+
+## 3.启动Docker容器
+a. CPU 版本
+```
+sudo docker run -dp 8868:8868 --name paddle_ocr paddleocr:cpu
+```
+b. GPU 版本 (通过NVIDIA Container Toolkit)
+```
+sudo nvidia-docker run -dp 8868:8868 --name paddle_ocr paddleocr:gpu
+```
+c. GPU 版本 (Docker 19.03以上版本，可以直接用如下命令)
+```
+sudo docker run -dp 8868:8869 --gpus all --name paddle_ocr paddleocr:gpu
+```
+d. 检查服务运行情况（出现：Successfully installed ocr_system和Running on http://0.0.0.0:8868 等信息，表示运行成功）
+```
+docker logs -f paddle_ocr
+```
+
+## 4.测试服务
+a. 计算待识别图片的Base64编码（如果只是测试一下效果，可以通过免费的在线工具实现，如：http://tool.chinaz.com/tools/imgtobase/）
+b. 发送服务请求（可参见sample_request.txt中的值）
+```
+curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"填入图片Base64编码(需要删除'data:image/jpg;base64,'）\"]}" http://localhost:8868/predict/ocr_system
+```
+c. 返回结果（如果调用成功，会返回如下结果）
+```
+{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
+```
--- a/deploy/docker/hubserving/cpu/Dockerfile
+++ b/deploy/docker/hubserving/cpu/Dockerfile
@ -0,0 +1,32 @@
+# Version: 1.0.0
+FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev
+
+# PaddleOCR base on Python3.7
+RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
+
+RUN python3.7 -m pip install paddlepaddle==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
+
+RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple
+
+RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
+
+WORKDIR /PaddleOCR
+
+RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
+
+RUN mkdir -p /PaddleOCR/inference/
+# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py）
+ADD {link} /PaddleOCR/inference/
+RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/
+
+# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py）
+ADD {link} /PaddleOCR/inference/
+RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
+
+# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py）
+ADD {link} /PaddleOCR/inference/
+RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
+
+EXPOSE 8868
+
+CMD ["/bin/bash","-c","hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]
--- a/deploy/docker/hubserving/gpu/Dockerfile
+++ b/deploy/docker/hubserving/gpu/Dockerfile
@ -0,0 +1,32 @@
+# Version: 1.0.0
+FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev
+
+# PaddleOCR base on Python3.7
+RUN pip3.7 install --upgrade pip -i https://mirror.baidu.com/pypi/simple
+
+RUN python3.7 -m pip install paddlepaddle-gpu==2.0.0rc0 -i https://mirror.baidu.com/pypi/simple
+
+RUN pip3.7 install paddlehub --upgrade -i https://mirror.baidu.com/pypi/simple
+
+RUN git clone https://github.com/PaddlePaddle/PaddleOCR.git /PaddleOCR
+
+WORKDIR /PaddleOCR
+
+RUN pip3.7 install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
+
+RUN mkdir -p /PaddleOCR/inference/
+# Download orc detect model(light version). if you want to change normal version, you can change ch_ppocr_mobile_v1.1_det_infer to ch_ppocr_server_v1.1_det_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py）
+ADD {link} /PaddleOCR/inference/
+RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
+
+# Download direction classifier(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_cls_infer to ch_ppocr_mobile_v1.1_cls_infer, also remember change cls_model_dir in deploy/hubserving/ocr_system/params.py）
+ADD {link} /PaddleOCR/inference/
+RUN tar xf /PaddleOCR/inference/{file} -C /PaddleOCR/inference/
+
+# Download orc recognition model(light version). If you want to change normal version, you can change ch_ppocr_mobile_v1.1_rec_infer to ch_ppocr_server_v1.1_rec_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py）
+ADD {link} /PaddleOCR/inference/
+RUN tar xf /PaddleOCR/inference/{file}.tar -C /PaddleOCR/inference/
+
+EXPOSE 8868
+
+CMD ["/bin/bash","-c","hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]
--- a/deploy/docker/hubserving/sample_request.txt
+++ b/deploy/docker/hubserving/sample_request.txt
--- a/doc/doc_ch/add_new_algorithm.md
+++ b/doc/doc_ch/add_new_algorithm.md
--- a/doc/doc_ch/angle_class.md
+++ b/doc/doc_ch/angle_class.md
@ -45,7 +45,7 @@ train_data/cls/word_002.jpg   180
 ```
 |-train_data
    |-cls
-        |- 和一个cls_gt_test.txt
+        |- cls_gt_test.txt
        |- test
            |- word_001.jpg
            |- word_002.jpg
@ -62,29 +62,36 @@ PaddleOCR提供了训练脚本、评估脚本和预测脚本。
 *如果您安装的是cpu版本，请将配置文件中的 `use_gpu` 字段修改为false*

 ```
-# 设置PYTHONPATH路径
-export PYTHONPATH=$PYTHONPATH:.
-# GPU训练 支持单卡，多卡训练，通过CUDA_VISIBLE_DEVICES指定卡号
-export CUDA_VISIBLE_DEVICES=0,1,2,3
-# 启动训练
-python3 tools/train.py -c configs/cls/cls_mv3.yml
+# GPU训练 支持单卡，多卡训练，通过selected_gpus指定卡号
+# 启动训练，下面的命令已经写入train.sh文件中，只需修改文件里的配置文件路径即可
+python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3,4,5,6,7'  tools/train.py -c configs/cls/cls_mv3.yml
 ```

 - 数据增强

-PaddleOCR提供了多种数据增强方式，如果您希望在训练时加入扰动，请在配置文件中设置 `distort: true`。
+PaddleOCR提供了多种数据增强方式，如果您希望在训练时加入扰动，请在配置文件中取消`Train.dataset.transforms`下的`RecAug`和`RandAugment`字段的注释。

 默认的扰动方式有：颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse),随机数据增强(RandAugment)。

 训练过程中除随机数据增强外每种扰动方式以50%的概率被选择，具体代码实现请参考：
-[randaugment.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/cls/randaugment.py)
-[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+[rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py) 
+[randaugment.py](../../ppocr/data/imaug/randaugment.py)

 *由于OpenCV的兼容性问题，扰动操作暂时只支持linux*

 ### 训练

-PaddleOCR支持训练和评估交替进行, 可以在 `configs/cls/cls_mv3.yml` 中修改 `eval_batch_step` 设置评估频率，默认每500个iter评估一次。评估过程中默认将最佳acc模型，保存为 `output/cls_mv3/best_accuracy` 。
+PaddleOCR支持训练和评估交替进行, 可以在 `configs/cls/cls_mv3.yml` 中修改 `eval_batch_step` 设置评估频率，默认每1000个iter评估一次。训练过程中将会保存如下内容：
+```bash
+├── best_accuracy.pdopt # 最佳模型的优化器参数
+├── best_accuracy.pdparams # 最佳模型的参数
+├── best_accuracy.states # 最佳模型的指标和epoch等信息
+├── config.yml # 本次实验的配置文件
+├── latest.pdopt # 最新模型的优化器参数
+├── latest.pdparams # 最新模型的参数
+├── latest.states # 最新模型的指标和epoch等信息
+└── train.log # 训练日志
+```

 如果验证集很大，测试将会比较耗时，建议减少评估次数，或训练完再进行评估。

@ -92,9 +99,8 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/cls/cls_mv3.yml`

 ### 评估

-评估数据集可以通过`configs/cls/cls_reader.yml`  修改EvalReader中的 `label_file_path` 设置。
+评估数据集可以通过修改`configs/cls/cls_mv3.yml`文件里的`Eval.dataset.label_file_list` 字段设置。

-*注意* 评估时必须确保配置文件中 infer_img 字段为空
 ```
 export CUDA_VISIBLE_DEVICES=0
 # GPU 评估， Global.checkpoints 为待测权重
@ -107,21 +113,20 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/

 使用 PaddleOCR 训练好的模型，可以通过以下脚本进行快速预测。

-默认预测图片存储在 `infer_img` 里，通过 `-o Global.checkpoints` 指定权重：
+通过 `Global.infer_img` 指定预测图片或文件夹路径，通过 `Global.checkpoints` 指定权重：

 ```
 # 预测分类结果
-python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
+python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
 ```

 预测图片：

-![](../imgs_words/en/word_1.png)
+![](../imgs_words/ch/word_1.jpg)

 得到输入图像的预测结果：

 ```
-infer_img: doc/imgs_words/en/word_1.png
-    scores: [[0.93161047 0.06838956]]
-    label: [0]
+infer_img: doc/imgs_words/ch/word_1.jpg
+     result: ('0', 0.9998784)
 ```
--- a/doc/doc_ch/config.md
+++ b/doc/doc_ch/config.md
@ -1,4 +1,4 @@
-# 可选参数列表
+## 可选参数列表

 以下列表可以通过`--help`查看

@ -8,65 +8,115 @@
 |          -o              |      ALL       |  设置配置文件里的参数内容  |  None  |  使用-o配置相较于-c选择的配置文件具有更高的优先级。例如：`-o Global.use_gpu=false`  |  


-## 配置文件 Global 参数介绍
+## 配置文件参数介绍

 以 `rec_chinese_lite_train_v1.1.yml ` 为例
-
+### Global 

 |         字段             |            用途                |      默认值       |            备注            |
 | :----------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|      algorithm           |    设置算法                    |  与配置文件同步   |     选择模型，支持模型请参考[简介](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/README.md) |
-|      use_gpu             |    设置代码运行场所            |       true        |                \                 |
-|      epoch_num           |    最大训练epoch数             |       3000        |                \                 |
+|      use_gpu             |    设置代码是否在gpu运行           |       true        |                \                 |
+|      epoch_num           |    最大训练epoch数             |       500        |                \                 |
 |      log_smooth_window   |    滑动窗口大小            |       20          |                \                 |
 |      print_batch_step    |    设置打印log间隔         |       10          |                \                 |
 |      save_model_dir      |    设置模型保存路径        |  output/{算法名称}  |                \                 |
 |      save_epoch_step     |    设置模型保存间隔        |       3           |                \                 |
 |      eval_batch_step     |    设置模型评估间隔        | 2000 或 [1000, 2000]        | 2000 表示每2000次迭代评估一次，[1000， 2000]表示从1000次迭代开始，每2000次评估一次   |
-|train_batch_size_per_card |  设置训练时单卡batch size    |         256         |                \                 |
-| test_batch_size_per_card |  设置评估时单卡batch size    |         256         |                \                 |
-|      image_shape         |    设置输入图片尺寸        |   [3, 32, 100]    |                \                 |
+|      cal_metric_during_train     |    设置是否在训练过程中评估指标，此时评估的是模型在当前batch下的指标        |       true         |                \                 |
+|      load_static_weights     |   设置预训练模型是否是静态图模式保存(目前仅检测算法需要)        |       true         |                \                 |
+|      pretrained_model    |    设置加载预训练模型路径      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
+|      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
+|      use_visualdl  |    设置是否启用visualdl进行可视化log展示 |          False        |    [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) |
+|      infer_img            |    设置预测图像路径或文件夹路径     |       ./infer_img | \|
+|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ppocr_keys_v1.txt  |    \                 |
 |      max_text_length     |    设置文本最大长度        |       25          |                \                 |
 |      character_type      |    设置字符类型            |       ch          |    en/ch, en时将使用默认dict，ch时使用自定义dict|
-|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ic15_dict.txt  |    \                 |
-|      loss_type           |    设置 loss 类型              |       ctc         |    支持两种loss： ctc / attention |
-|       distort            |    设置是否使用数据增强          |       false       |  设置为true时，将在训练时随机进行扰动，支持的扰动操作可阅读[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)                 |
-|       use_space_char     |    设置是否识别空格             |        false      |          仅在 character_type=ch 时支持空格                 |
+|      use_space_char     |    设置是否识别空格             |        True      |          仅在 character_type=ch 时支持空格                 |
 |      label_list          |    设置方向分类器支持的角度       |    ['0','180']    |     仅在方向分类器中生效 |
-|      average_window      |    ModelAverage优化器中的窗口长度计算比例 |  0.15       |       目前仅应用与SRN |
-|      max_average_window  |    平均值计算窗口长度的最大值   |   15625              | 推荐设置为一轮训练中mini-batchs的数目|
-|      min_average_window  |    平均值计算窗口长度的最小值  |    10000              |      \          |
-|      reader_yml          |    设置reader配置文件          |  ./configs/rec/rec_icdar15_reader.yml  |  \          |
-|      pretrain_weights    |    加载预训练模型路径      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
-|      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
-|      save_inference_dir  |    inference model 保存路径 |          None        |    用于保存inference model |
+|      save_res_path          |    设置检测模型的结果保存地址       |    ./output/det_db/predicts_db.txt    |     仅在检测模型中生效 |

-## 配置文件 Reader 系列参数介绍
+### Optimizer ([ppocr/optimizer](../../ppocr/optimizer))

-以 `rec_chinese_reader.yml` 为例
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         优化器类名          |  Adam  |  目前支持`Momentum`,`Adam`,`RMSProp`, 见[ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py)  |
+|      beta1           |    设置一阶矩估计的指数衰减率  |       0.9         |               \             |
+|      beta2           |    设置二阶矩估计的指数衰减率  |     0.999         |               \             |
+|      **lr**                |         设置学习率decay方式       |   -    |       \  |
+|        name    |      学习率decay类名   |         Cosine       | 目前支持`Linear`,`Cosine`,`Step`,`Piecewise`, 见[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
+|        learning_rate      |    基础学习率        |       0.001      |  \        |
+|      **regularizer**      |  设置网络正则化方式        |       -      | \        |
+|        name      |    正则化类名      |       L2     | 目前支持`L1`,`L2`, 见[ppocr/optimizer/regularizer.py](../../ppocr/optimizer/regularizer.py)        |
+|        factor      |    学习率衰减系数       |       0.00004     |  \        |

-|         字段             |            用途                |      默认值       |            备注            |
-| :----------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|      reader_function     |    选择数据读取方式        |  ppocr.data.rec.dataset_traversal,SimpleReader  | 支持SimpleReader / LMDBReader 两种数据读取方式 |
-|      num_workers             |    设置数据读取线程数            |       8        |                \                 |
-|      img_set_dir          |    数据集路径             |       ./train_data        |                \                 |
-|      label_file_path      |    数据标签路径           |       ./train_data/rec_gt_train.txt| \    |
-|      infer_img            |    预测图像文件夹路径     |       ./infer_img | \|

-## 配置文件 Optimizer 系列参数介绍
+### Architecture ([ppocr/modeling](../../ppocr/modeling))
+在ppocr中，网络被划分为Transform,Backbone,Neck和Head四个阶段
+
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      model_type        |         网络类型          |  rec  |  目前支持`rec`,`det`,`cls`  |
+|      algorithm           |    模型名称  |       CRNN         |               支持列表见[algorithm_overview](./algorithm_overview.md)             |
+|      **Transform**           |    设置变换方式  |       -       |               目前仅rec类型的算法支持, 具体见[ppocr/modeling/transform](../../ppocr/modeling/transform)              |
+|        name    |      变换方式类名   |         TPS       | 目前支持`TPS` |
+|        num_fiducial      |    TPS控制点数        |       20      |  上下边各十个       |
+|        loc_lr      |    定位网络学习率        |       0.1      |  \      |
+|        model_name      |    定位网络大小        |       small      |  目前支持`small`,`large`       |
+|      **Backbone**      |  设置网络backbone类名        |       -      | 具体见[ppocr/modeling/backbones](../../ppocr/modeling/backbones)        |
+|        name      |    backbone类名       |       ResNet     | 目前支持`MobileNetV3`,`ResNet`        |
+|        layers      |    resnet层数       |       34     |  支持18,34,50,101,152,200       |
+|        model_name      |    MobileNetV3 网络大小       |       small     |  支持`small`,`large`       |
+|      **Neck**      |  设置网络neck        |       -      | 具体见[ppocr/modeling/necks](../../ppocr/modeling/necks)        |
+|        name      |    neck类名       |       SequenceEncoder     | 目前支持`SequenceEncoder`,`DBFPN`        |
+|        encoder_type      |    SequenceEncoder编码器类型       |       rnn     |  支持`reshape`,`fc`,`rnn`       |
+|        hidden_size      |   rnn内部单元数       |       48     |  \      |
+|        out_channels      |   DBFPN输出通道数       |       256     |  \      |
+|      **Head**      |  设置网络Head        |       -      | 具体见[ppocr/modeling/heads](../../ppocr/modeling/heads)        |
+|        name      |    head类名       |       CTCHead     | 目前支持`CTCHead`,`DBHead`,`ClsHead`        |
+|        fc_decay      |    CTCHead正则化系数       |       0.0004     |  \      |
+|        k      |   DBHead二值化系数       |       50     |  \      |
+|        class_dim      |   ClsHead输出分类数       |       2     |  \      |
+

-以 `rec_icdar15_train.yml` 为例
+### Loss ([ppocr/losses](../../ppocr/losses))
+
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         网络loss类名          |  CTCLoss  |  目前支持`CTCLoss`,`DBLoss`,`ClsLoss`  |
+|      balance_loss        |        DBLossloss中是否对正负样本数量进行均衡(使用OHEM)         |  True  |  \  |
+|      ohem_ratio        |        DBLossloss中的OHEM的负正样本比例         |  3  |  \  |
+|      main_loss_type        |        DBLossloss中shrink_map所采用的的loss        |  DiceLoss  |  支持`DiceLoss`,`BCELoss`  |
+|      alpha        |        DBLossloss中shrink_map_loss的系数       |  5  |  \  |
+|      beta        |        DBLossloss中threshold_map_loss的系数       |  10  |  \  |
+
+### PostProcess ([ppocr/postprocess](../../ppocr/postprocess))
+
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         后处理类名          |  CTCLabelDecode  |  目前支持`CTCLoss`,`AttnLabelDecode`,`DBPostProcess`,`ClsPostProcess`  |
+|      thresh        |        DBPostProcess中分割图进行二值化的阈值         |  0.3  |  \  |
+|      box_thresh        |        DBPostProcess中对输出框进行过滤的阈值，低于此阈值的框不会输出         |  0.7  |  \  |
+|      max_candidates        |        DBPostProcess中输出的最大文本框数量        |  1000  |   |
+|      unclip_ratio        |        DBPostProcess中对文本框进行放大的比例       |  2.0  |  \  |
+
+### Metric ([ppocr/metrics](../../ppocr/metrics))
+
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         指标评估方法名称          |  CTCLabelDecode  |  目前支持`DetMetric`,`RecMetric`,`ClsMetric`  |
+|      main_indicator        |        主要指标,用于选取最优模型         |  acc |  对于检测方法为hmean，识别和分类方法为acc  |

+### Dataset  ([ppocr/data](../../ppocr/data))
 |         字段             |            用途            |      默认值        |            备注             |
 | :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|         function        |         选择优化器          |  pocr.optimizer,AdamDecay  |  目前只支持Adam方式  |
-|         base_lr         |      设置初始学习率          |       0.0005      |               \             |
-|         beta1           |    设置一阶矩估计的指数衰减率  |       0.9         |               \             |
-|         beta2           |    设置二阶矩估计的指数衰减率  |     0.999         |               \             |
-|         decay           |         是否使用decay       |    \              |               \             |
-|      function(decay)    |         设置decay方式       |   -    |       目前支持cosine_decay, cosine_decay_warmup与piecewise_decay  |
-|      step_each_epoch    |      每个epoch包含多少次迭代, cosine_decay/cosine_decay_warmup时有效   |         20       | 计算方式：total_image_num / (batch_size_per_card * card_size) |
-|        total_epoch      |    总共迭代多少个epoch, cosine_decay/cosine_decay_warmup时有效        |       1000      | 与Global.epoch_num 一致        |
-|        warmup_minibatch      |  线性warmup的迭代次数, cosine_decay_warmup时有效        |       1000      | \        |
-|        boundaries      |    学习率下降时的迭代次数间隔, piecewise_decay时有效       |       -      | 参数为列表形式        |
-|        decay_rate      |    学习率衰减系数, piecewise_decay时有效       |       -      |  \        |
+|      **dataset**        |         每次迭代返回一个样本          |  -  |  -  |
+|      name        |        dataset类名         |  SimpleDataSet |  目前支持`SimpleDataSet`和`LMDBDateSet`  |
+|      data_dir        |        数据集图片存放路径         |  ./train_data |  \  |
+|      label_file_list        |        数据标签路径         |  ["./train_data/train_list.txt"] | dataset为LMDBDateSet时不需要此参数   |
+|      ratio_list        |        数据集的比例         |  [1.0] | 若label_file_list中有两个train_list，且ratio_list为[0.4,0.6]，则从train_list1中采样40%，从train_list2中采样60%组合整个dataset   |
+|      transforms        |        对图片和标签进行变换的方法列表         |  [DecodeImage,CTCLabelEncode,RecResizeImg,KeepKeys] |   见[ppocr/data/imaug](../../ppocr/data/imaug)  |
+|      **loader**        |        dataloader相关         |  - |   |
+|      shuffle        |        每个epoch是否将数据集顺序打乱         |  True | \  |
+|      batch_size_per_card        |        训练时单卡batch size         |  256 | \  |
+|      drop_last        |        是否丢弃因数据集样本数不能被 batch_size 整除而产生的最后一个不完整的mini-batch        |  True | \  |
+|      num_workers        |        用于加载数据的子进程个数，若为0即为不开启子进程，在主进程中进行数据加载        |  8 | \  |
--- a/Show More
+++ b/Show More