add east & sast

6 years ago · 021c1132a9
parent 8a5566c974
commit 021c1132a9
36 changed files with 3798 additions and 103 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,8 +1,7 @@
 include LICENSE.txt
 include README.md

-recursive-include ppocr/utils *.txt utility.py character.py check.py
-recursive-include ppocr/data/det *.py
+recursive-include ppocr/utils *.txt utility.py logging.py
+recursive-include ppocr/data/ *.py
 recursive-include ppocr/postprocess *.py
-recursive-include ppocr/postprocess/lanms *.*
-recursive-include tools/infer *.py
+recursive-include tools/infer *.py
--- a/configs/det/det_mv3_east.yml
+++ b/configs/det/det_mv3_east.yml
@ -0,0 +1,111 @@
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_mv3/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: EASTFPN
+    model_name: small
+  Head:
+    name: EASTHead
+    model_name: small
+
+Loss:
+  name: EASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_r50_vd_east.yml
+++ b/configs/det/det_r50_vd_east.yml
@ -0,0 +1,110 @@
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_r50_vd/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: EASTFPN
+    model_name: large
+  Head:
+    name: EASTHead
+    model_name: large
+
+Loss:
+  name: EASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ b/configs/det/det_r50_vd_sast_icdar15.yml
@ -0,0 +1,110 @@
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_ic15/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
+
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+
+Loss:
+  name: SASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 2
+  nms_thresh: 0.2
+  expand_scale: 1.0
+  shrink_ratio_of_width: 0.3
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
+    data_ratio_list: [0.5, 0.5]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 1536
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
@ -0,0 +1,109 @@
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_tt/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
+
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+
+Loss:
+  name: SASTLoss
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 6
+  nms_thresh: 0.2
+  expand_scale: 1.2
+  shrink_ratio_of_width: 0.2
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
+    ratio_list: [0.1, 0.45, 0.3, 0.15]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: 
+        - ./train_data/total_text_icdar_14pt/test_label_json.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 768
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
--- a/configs/rec/multi_language/rec_en_number_lite_train.yml
+++ b/configs/rec/multi_language/rec_en_number_lite_train.yml
@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/ic15_dict.txt
+  character_dict_path: ppocr/utils/dict/ic15_dict.txt
  character_type: ch
  max_text_length: 25
  infer_mode: False
--- a/configs/rec/multi_language/rec_french_lite_train.yml
+++ b/configs/rec/multi_language/rec_french_lite_train.yml
@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/french_dict.txt
+  character_dict_path: ppocr/utils/dict/french_dict.txt
  character_type: french
  max_text_length: 25
  infer_mode: False
--- a/configs/rec/multi_language/rec_german_lite_train.yml
+++ b/configs/rec/multi_language/rec_german_lite_train.yml
@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/german_dict.txt
+  character_dict_path: ppocr/utils/dict/german_dict.txt
  character_type: german
  max_text_length: 25
  infer_mode: False
--- a/configs/rec/multi_language/rec_japan_lite_train.yml
+++ b/configs/rec/multi_language/rec_japan_lite_train.yml
@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/japan_dict.txt
+  character_dict_path: ppocr/utils/dict/japan_dict.txt
  character_type: japan
  max_text_length: 25
  infer_mode: False
--- a/configs/rec/multi_language/rec_korean_lite_train.yml
+++ b/configs/rec/multi_language/rec_korean_lite_train.yml
@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/korean_dict.txt
+  character_dict_path: ppocr/utils/dict/korean_dict.txt
  character_type: korean
  max_text_length: 25
  infer_mode: False
--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@ -261,6 +261,61 @@ im_show.save('result.jpg')
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
 ```

+### 使用网络图片或者numpy数组作为输入
+
+1. 网络图片
+
+代码使用
+```python
+from paddleocr import PaddleOCR, draw_ocr
+# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
+# 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+
+# 显示结果
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+命令行模式
+```bash
+paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
+```
+
+2. numpy数组
+仅通过代码使用时支持numpy数组作为输入
+```python
+from paddleocr import PaddleOCR, draw_ocr
+# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
+# 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs/11.jpg'
+img = cv2.imread(img_path)
+# img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), 如果你自己训练的模型支持灰度图，可以将这句话的注释取消
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+
+# 显示结果
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+
 ## 参数说明

 | 字段                    | 说明                                                                                                                                                                                                                 | 默认值                  |
@ -285,6 +340,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | max_text_length         | 识别算法能识别的最大文字长度                                                                                                                                                                                         | 25                      |
 | rec_char_dict_path      | 识别模型字典路径，当rec_model_dir使用方式2传参时需要修改为自己的字典路径                                                                                                                                                | ./ppocr/utils/ppocr_keys_v1.txt                        |
 | use_space_char          | 是否识别空格                                                                                                                                                                                                         | TRUE                    |
+| drop_score          | 对输出按照分数(来自于识别模型)进行过滤，低于此分数的不返回                                                                                                                                                                                                         | 0.5                    |
 | use_angle_cls          | 是否加载分类模型                                                                                                                                                                                                         | FALSE                    |
 | cls_model_dir          | 分类模型所在文件夹。传参方式有两种，1. None: 自动下载内置模型到 `~/.paddleocr/cls`；2.自己转换好的inference模型路径，模型路径下必须包含model和params文件                                                                                 | None                    |
 | cls_image_shape          | 分类算法的输入图片尺寸                                                                           | "3, 48, 192"                    |
@ -295,4 +351,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | lang                     | 模型语言类型,目前支持 中文(ch)和英文(en)                                                                                                                                                                                                  | ch                    |
 | det                     | 前向时使用启动检测                                                                                                                                                                                                   | TRUE                    |
 | rec                     | 前向时是否启动识别                                                                                                                                                                                                   | TRUE                    |
-| cls                     | 前向时是否启动分类                                                                                                                                                                                                 | FALSE                    |
+| cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@ -271,6 +271,59 @@ im_show.save('result.jpg')
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
 ```

+### Use web images or numpy array as input
+
+1. Web image
+
+Use by code
+```python
+from paddleocr import PaddleOCR, draw_ocr
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+
+# show result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+Use by command line
+```bash
+paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
+```
+
+2. Numpy array
+Support numpy array as input only when used by code
+
+```python
+from paddleocr import PaddleOCR, draw_ocr
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs/11.jpg'
+img = cv2.imread(img_path)
+# img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), If your own training model supports grayscale images, you can uncomment this line
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+
+# show result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+
+
 ## Parameter Description

 | Parameter                    | Description                                                                                                                                                                                                                 | Default value                  |
@ -295,6 +348,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | max_text_length         | The maximum text length that the recognition algorithm can recognize                                                                                                                                                                                         | 25                      |
 | rec_char_dict_path      | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2                                                                                                                                              | ./ppocr/utils/ppocr_keys_v1.txt                        |
 | use_space_char          | Whether to recognize spaces                                                                                                                                                                                                         | TRUE                    |
+| drop_score          | Filter the output by score (from the recognition model), and those below this score will not be returned                                                                                                                                                                                                        | 0.5                    |
 | use_angle_cls          | Whether to load classification model                                                                                                                                                                                                       | FALSE                    |
 | cls_model_dir           | the classification inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/cls`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None |
 | cls_image_shape         | image shape of classification algorithm                                                                                                                                                                                            | "3,48,192"              |
@ -305,4 +359,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | lang                     | The support language, now only Chinese(ch)、English(en)、French(french)、German(german)、Korean(korean)、Japanese(japan) are supported                                                                                                                                                                                                  | ch                    |
 | det                     | Enable detction when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
 | rec                     | Enable recognition when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
-| cls                     | Enable classification when `ppocr.ocr` func exec                                                                                                                                                                                                   | FALSE                    |
+| cls                     | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction)                                                                                                                                                                                                   | FALSE                    |
--- a/paddleocr.py
+++ b/paddleocr.py
--- a/ppocr/data/imaug/init.py
+++ b/ppocr/data/imaug/init.py
@ -26,6 +26,9 @@ from .randaugment import RandAugment
 from .operators import *
 from .label_ops import *

+from .east_process import *
+from .sast_process import *
+

 def transform(data, ops=None):
    """ transform """
--- a/ppocr/data/imaug/east_process.py
+++ b/ppocr/data/imaug/east_process.py
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@ -52,6 +52,7 @@ class DetLabelEncode(object):
                txt_tags.append(True)
            else:
                txt_tags.append(False)
+        boxes = self.expand_points_num(boxes)
        boxes = np.array(boxes, dtype=np.float32)
        txt_tags = np.array(txt_tags, dtype=np.bool)

@ -70,6 +71,17 @@ class DetLabelEncode(object):
        rect[3] = pts[np.argmax(diff)]
        return rect

+    def expand_points_num(self, boxes):
+        max_points_num = 0
+        for box in boxes:
+            if len(box) > max_points_num:
+                max_points_num = len(box)
+        ex_boxes = []
+        for box in boxes:
+            ex_box = box + [box[-1]] * (max_points_num - len(box))
+            ex_boxes.append(ex_box)
+        return ex_boxes
+

 class BaseRecLabelEncode(object):
    """ Convert between text-label and text-index """
@ -79,7 +91,9 @@ class BaseRecLabelEncode(object):
                 character_dict_path=None,
                 character_type='ch',
                 use_space_char=False):
-        support_character_type = ['ch', 'en', 'en_sensitive']
+        support_character_type = [
+            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
+        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
            support_character_type, self.character_str)

@ -87,7 +101,7 @@ class BaseRecLabelEncode(object):
        if character_type == "en":
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
-        elif character_type == "ch":
+        elif character_type in ["ch", "french", "german", "japan", "korean"]:
            self.character_str = ""
            assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
            with open(character_dict_path, "rb") as fin:
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@ -120,26 +120,37 @@ class DetResizeForTest(object):
        if 'limit_side_len' in kwargs:
            self.limit_side_len = kwargs['limit_side_len']
            self.limit_type = kwargs.get('limit_type', 'min')
+        if 'resize_long' in kwargs:
+            self.resize_type = 2
+            self.resize_long = kwargs.get('resize_long', 960)
        else:
            self.limit_side_len = 736
            self.limit_type = 'min'

    def __call__(self, data):
        img = data['image']
+        src_h, src_w, _ = img.shape

        if self.resize_type == 0:
-            img, shape = self.resize_image_type0(img)
+            # img, shape = self.resize_image_type0(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+        elif self.resize_type == 2:
+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
        else:
-            img, shape = self.resize_image_type1(img)
+            # img, shape = self.resize_image_type1(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
        data['image'] = img
-        data['shape'] = shape
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
        return data

    def resize_image_type1(self, img):
        resize_h, resize_w = self.image_shape
        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        return img, np.array([ori_h, ori_w])
+        # return img, np.array([ori_h, ori_w])
+        return img, [ratio_h, ratio_w]

    def resize_image_type0(self, img):
        """
@ -182,4 +193,31 @@ class DetResizeForTest(object):
        except:
            print(img.shape, resize_w, resize_h)
            sys.exit(0)
-        return img, np.array([h, w])
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        # return img, np.array([h, w])
+        return img, [ratio_h, ratio_w]
+    
+    def resize_image_type2(self, img):
+        h, w, _ = img.shape
+
+        resize_w = w
+        resize_h = h
+
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(self.resize_long) / resize_h
+        else:
+            ratio = float(self.resize_long) / resize_w
+
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+
+        return img, [ratio_h, ratio_w]
--- a/ppocr/data/imaug/sast_process.py
+++ b/ppocr/data/imaug/sast_process.py
--- a/ppocr/losses/init.py
+++ b/ppocr/losses/init.py
@ -18,6 +18,8 @@ import copy
 def build_loss(config):
    # det loss
    from .det_db_loss import DBLoss
+    from .det_east_loss import EASTLoss
+    from .det_sast_loss import SASTLoss

    # rec loss
    from .rec_ctc_loss import CTCLoss
@ -25,7 +27,7 @@ def build_loss(config):
    # cls loss
    from .cls_loss import ClsLoss

-    support_dict = ['DBLoss', 'CTCLoss', 'ClsLoss']
+    support_dict = ['DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss']

    config = copy.deepcopy(config)
    module_name = config.pop('name')
--- a/ppocr/losses/det_east_loss.py
+++ b/ppocr/losses/det_east_loss.py
@ -0,0 +1,63 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+
+
+class EASTLoss(nn.Layer):
+    """
+    """
+
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(EASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+
+    def forward(self, predicts, labels):
+        l_score, l_geo, l_mask = labels[1:]
+        f_score = predicts['f_score']
+        f_geo = predicts['f_geo']
+
+        dice_loss = self.dice_loss(f_score, l_score, l_mask)
+
+        #smoooth_l1_loss
+        channels = 8
+        l_geo_split = paddle.split(
+            l_geo, num_or_sections=channels + 1, axis=1)
+        f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
+        smooth_l1 = 0
+        for i in range(0, channels):
+            geo_diff = l_geo_split[i] - f_geo_split[i]
+            abs_geo_diff = paddle.abs(geo_diff)
+            smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
+            smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
+            in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
+                (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
+            out_loss = l_geo_split[-1] / channels * in_loss * l_score
+            smooth_l1 += out_loss
+        smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
+
+        dice_loss = dice_loss * 0.01
+        total_loss = dice_loss + smooth_l1_loss
+        losses = {"loss":total_loss, \
+                  "dice_loss":dice_loss,\
+                  "smooth_l1_loss":smooth_l1_loss}
+        return losses
--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
@ -0,0 +1,121 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+import paddle.fluid as fluid
+import numpy as np
+
+
+class SASTLoss(nn.Layer):
+    """
+    """
+
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(SASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+
+    def forward(self, predicts, labels):
+        """
+        tcl_pos: N x 128 x 3
+        tcl_mask: N x 128 x 1
+        tcl_label: N x X list or LoDTensor
+        """
+                
+        f_score = predicts['f_score']
+        f_border = predicts['f_border']
+        f_tvo = predicts['f_tvo']
+        f_tco = predicts['f_tco']
+
+        l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
+
+        #score_loss
+        intersection = paddle.sum(f_score * l_score * l_mask)
+        union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
+        score_loss = 1.0 - 2 * intersection / (union + 1e-5)
+
+        #border loss
+        l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
+        f_border_split = f_border
+        border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
+        l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
+        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)   
+        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)  
+
+        border_diff = l_border_split - f_border_split
+        abs_border_diff = paddle.abs(border_diff) 
+        border_sign = abs_border_diff < 1.0
+        border_sign = paddle.cast(border_sign, dtype='float32')
+        border_sign.stop_gradient = True
+        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
+                    (abs_border_diff - 0.5) * (1.0 - border_sign)
+        border_out_loss = l_border_norm_split * border_in_loss
+        border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
+                    (paddle.sum(l_border_score * l_border_mask) + 1e-5)
+
+        #tvo_loss
+        l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
+        f_tvo_split = f_tvo
+        tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
+        l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
+        l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)   
+        l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)   
+        #
+        tvo_geo_diff = l_tvo_split - f_tvo_split
+        abs_tvo_geo_diff = paddle.abs(tvo_geo_diff) 
+        tvo_sign = abs_tvo_geo_diff < 1.0
+        tvo_sign = paddle.cast(tvo_sign, dtype='float32')
+        tvo_sign.stop_gradient = True
+        tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
+                    (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
+        tvo_out_loss = l_tvo_norm_split * tvo_in_loss
+        tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
+                    (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
+
+        #tco_loss
+        l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
+        f_tco_split = f_tco
+        tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
+        l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
+        l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)   
+        l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape) 
+        
+        tco_geo_diff = l_tco_split - f_tco_split
+        abs_tco_geo_diff = paddle.abs(tco_geo_diff) 
+        tco_sign = abs_tco_geo_diff < 1.0
+        tco_sign = paddle.cast(tco_sign, dtype='float32')
+        tco_sign.stop_gradient = True
+        tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
+                    (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
+        tco_out_loss = l_tco_norm_split * tco_in_loss
+        tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
+                    (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
+
+
+        # total loss
+        tvo_lw, tco_lw = 1.5, 1.5
+        score_lw, border_lw = 1.0, 1.0
+        total_loss = score_loss * score_lw + border_loss * border_lw + \
+                    tvo_loss * tvo_lw + tco_loss * tco_lw
+                    
+        losses = {'loss':total_loss, "score_loss":score_loss,\
+            "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
+        return losses
--- a/ppocr/modeling/backbones/init.py
+++ b/ppocr/modeling/backbones/init.py
@ -19,6 +19,7 @@ def build_backbone(config, model_type):
    if model_type == 'det':
        from .det_mobilenet_v3 import MobileNetV3
        from .det_resnet_vd import ResNet
+        from .det_resnet_vd_sast import ResNet_SAST
        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
    elif model_type == 'rec' or model_type == 'cls':
        from .rec_mobilenet_v3 import MobileNetV3
--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/ppocr/modeling/backbones/det_resnet_vd_sast.py
--- a/ppocr/modeling/heads/init.py
+++ b/ppocr/modeling/heads/init.py
@ -18,13 +18,15 @@ __all__ = ['build_head']
 def build_head(config):
    # det head
    from .det_db_head import DBHead
+    from .det_east_head import EASTHead
+    from .det_sast_head import SASTHead

    # rec head
    from .rec_ctc_head import CTCHead

    # cls head
    from .cls_head import ClsHead
-    support_dict = ['DBHead', 'CTCHead', 'ClsHead']
+    support_dict = ['DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('head only support {}'.format(
--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
@ -0,0 +1,121 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class EASTHead(nn.Layer):
+    """
+    """
+    def __init__(self, in_channels, model_name, **kwargs):
+        super(EASTHead, self).__init__()
+        self.model_name = model_name
+        if self.model_name == "large":
+            num_outputs = [128, 64, 1, 8]
+        else:
+            num_outputs = [64, 32, 1, 8]
+
+        self.det_conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=num_outputs[0],
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="det_head1")
+        self.det_conv2 = ConvBNLayer(
+            in_channels=num_outputs[0],
+            out_channels=num_outputs[1],
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="det_head2")
+        self.score_conv = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[2],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name="f_score")
+        self.geo_conv = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[3],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name="f_geo")
+
+    def forward(self, x):
+        f_det = self.det_conv1(x)
+        f_det = self.det_conv2(f_det)
+        f_score = self.score_conv(f_det)
+        f_score = F.sigmoid(f_score)
+        f_geo = self.geo_conv(f_det)
+        f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800
+
+        pred = {'f_score': f_score, 'f_geo': f_geo}
+        return pred
--- a/Show More
+++ b/Show More