Merge pull request #1105 from dyning/dygraph

updata structure of dygraph
release/2.0-rc1-0
dyning 4 years ago committed by GitHub
commit 96c9190710
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -6,29 +6,19 @@ Global:
save_model_dir: ./output/db_mv3/
save_epoch_step: 1200
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: 8
eval_batch_step: [4000, 5000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights: True
cal_metric_during_train: False
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints:
checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
save_inference_dir:
use_visualdl: True
use_visualdl: False
infer_img: doc/imgs_en/img_10.jpg
save_res_path: ./output/det_db/predicts_db.txt
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
learning_rate:
lr: 0.001
regularizer:
name: 'L2'
factor: 0
Architecture:
type: det
model_type: det
algorithm: DB
Transform:
Backbone:
@ -36,7 +26,7 @@ Architecture:
scale: 0.5
model_name: large
Neck:
name: FPN
name: DBFPN
out_channels: 256
Head:
name: DBHead
@ -49,6 +39,18 @@ Loss:
alpha: 5
beta: 10
ohem_ratio: 3
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
learning_rate:
# name: Cosine
lr: 0.001
# warmup_epoch: 0
regularizer:
name: 'L2'
factor: 0
PostProcess:
name: DBPostProcess
@ -61,13 +63,13 @@ Metric:
name: DetMetric
main_indicator: hmean
TRAIN:
Train:
dataset:
name: SimpleDataSet
data_dir: ./detection/
file_list:
- ./detection/train_icdar2015_label.txt # dataset1
ratio_list: [1.0]
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list: [0.5]
transforms:
- DecodeImage: # load image
img_mode: BGR
@ -76,10 +78,10 @@ TRAIN:
- IaaAugment:
augmenter_args:
- { 'type': Fliplr, 'args': { 'p': 0.5 } }
- { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
- { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
- { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
- { 'type': Resize, 'args': { 'size': [0.5, 3] } }
- EastRandomCropData:
size: [ 640,640 ]
size: [640, 640]
max_tries: 50
keep_ratio: true
- MakeBorderMap:
@ -91,41 +93,41 @@ TRAIN:
min_text_size: 8
- NormalizeImage:
scale: 1./255.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- ToCHWImage:
- keepKeys:
keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader will return list in this order
- KeepKeys:
keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
loader:
shuffle: True
drop_last: False
batch_size: 16
batch_size_per_card: 4
num_workers: 8
EVAL:
Eval:
dataset:
name: SimpleDataSet
data_dir: ./detection/
file_list:
- ./detection/test_icdar2015_label.txt
data_dir: ./train_data/icdar2015/text_localization/
label_file_list:
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- DetLabelEncode: # Class handling label
- DetResizeForTest:
image_shape: [736,1280]
image_shape: [736, 1280]
- NormalizeImage:
scale: 1./255.
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: 'hwc'
- ToCHWImage:
- keepKeys:
keep_keys: ['image','shape','polys','ignore_tags']
- KeepKeys:
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
loader:
shuffle: False
drop_last: False
batch_size: 1 # must be 1
num_workers: 8
batch_size_per_card: 1 # must be 1
num_workers: 2

@ -1,25 +1,25 @@
Global:
use_gpu: true
use_gpu: false
epoch_num: 500
log_smooth_window: 20
print_batch_step: 1
print_batch_step: 10
save_model_dir: ./output/rec/mv3_none_bilstm_ctc/
save_epoch_step: 500
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: 1016
eval_batch_step: 127
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights: True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: True
use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
max_text_length: 80
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: 'ch'
use_space_char: True
use_space_char: False
infer_mode: False
use_tps: False
@ -29,7 +29,7 @@ Optimizer:
beta1: 0.9
beta2: 0.999
learning_rate:
lr: 0.0005
lr: 0.001
regularizer:
name: 'L2'
factor: 0.00001
@ -45,8 +45,8 @@ Architecture:
small_stride: [ 1, 2, 2, 2 ]
Neck:
name: SequenceEncoder
encoder_type: rnn
hidden_size: 48
encoder_type: fc
hidden_size: 96
Head:
name: CTC
fc_decay: 0.00001
@ -63,9 +63,10 @@ Metric:
TRAIN:
dataset:
name: LMDBDateSet
name: SimpleDataSet
data_dir: ./rec
file_list:
- ./rec/lmdb/train # dataset1
- ./rec/train.txt # dataset1
ratio_list: [ 0.4,0.6 ]
transforms:
- DecodeImage: # load image
@ -85,9 +86,10 @@ TRAIN:
EVAL:
dataset:
name: LMDBDateSet
name: SimpleDataSet
data_dir: ./rec
file_list:
- ./rec/lmdb/val
- ./rec/val.txt
transforms:
- DecodeImage: # load image
img_mode: BGR

@ -1,14 +1,13 @@
Global:
use_gpu: false
epoch_num: 500
use_gpu: true
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/mv3_none_bilstm_ctc/
save_epoch_step: 500
save_epoch_step: 3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: 127
eval_batch_step: [0, 1000]
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights: True
cal_metric_during_train: True
pretrained_model:
checkpoints:
@ -16,12 +15,14 @@ Global:
use_visualdl: False
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
max_text_length: 80
character_dict_path: ppocr/utils/ppocr_keys_v1.txt
character_type: 'ch'
use_space_char: False
character_dict_path:
character_type: en
max_text_length: 25
loss_type: ctc
infer_mode: False
use_tps: False
# use_space_char: True
# use_tps: False
Optimizer:
@ -29,27 +30,26 @@ Optimizer:
beta1: 0.9
beta2: 0.999
learning_rate:
lr: 0.001
lr: 0.0005
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
type: rec
model_type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: small
small_stride: [ 1, 2, 2, 2 ]
model_name: large
Neck:
name: SequenceEncoder
encoder_type: fc
encoder_type: rnn
hidden_size: 96
Head:
name: CTC
fc_decay: 0.00001
name: CTCHead
fc_decay: 0.0004
Loss:
name: CTCLoss
@ -61,46 +61,40 @@ Metric:
name: RecMetric
main_indicator: acc
TRAIN:
Train:
dataset:
name: SimpleDataSet
data_dir: ./rec
file_list:
- ./rec/train.txt # dataset1
ratio_list: [ 0.4,0.6 ]
name: LMDBDateSet
data_dir: ./train_data/data_lmdb_release/training/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecAug:
- RecResizeImg:
image_shape: [ 3,32,320 ]
- keepKeys:
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
batch_size: 256
shuffle: True
batch_size_per_card: 256
shuffle: False
drop_last: True
num_workers: 8
EVAL:
Eval:
dataset:
name: SimpleDataSet
data_dir: ./rec
file_list:
- ./rec/val.txt
name: LMDBDateSet
data_dir: ./train_data/data_lmdb_release/validation/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [ 3,32,320 ]
- keepKeys:
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
image_shape: [3, 32, 100]
- KeepKeys:
keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size: 256
num_workers: 8
batch_size_per_card: 256
num_workers: 2

@ -1,103 +0,0 @@
Global:
use_gpu: false
epoch_num: 72
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/rec/mv3_none_none_ctc/
save_epoch_step: 500
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step: 2000
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights: True
cal_metric_during_train: True
pretrained_model:
checkpoints:
save_inference_dir:
use_visualdl: True
infer_img: doc/imgs_words/ch/word_1.jpg
# for data or label process
max_text_length: 25
character_dict_path:
character_type: 'en'
use_space_char: False
infer_mode: False
use_tps: False
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
learning_rate:
lr: 0.0005
regularizer:
name: 'L2'
factor: 0.00001
Architecture:
type: rec
algorithm: CRNN
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
small_stride: [ 1, 2, 2, 2 ]
Neck:
name: SequenceEncoder
encoder_type: reshape
Head:
name: CTC
fc_decay: 0.00001
Loss:
name: CTCLoss
PostProcess:
name: CTCLabelDecode
Metric:
name: RecMetric
main_indicator: acc
TRAIN:
dataset:
name: LMDBDateSet
file_list:
- ./rec/train # dataset1
ratio_list: [ 0.4,0.6 ]
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecAug:
- RecResizeImg:
image_shape: [ 3,32,100 ]
- keepKeys:
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
loader:
batch_size: 256
shuffle: True
drop_last: True
num_workers: 8
EVAL:
dataset:
name: LMDBDateSet
file_list:
- ./rec/val/
transforms:
- DecodeImage: # load image
img_mode: BGR
channel_first: False
- CTCLabelEncode: # Class handling label
- RecResizeImg:
image_shape: [ 3,32,100 ]
- keepKeys:
keep_keys: [ 'image','label','length' ] # dataloader will return list in this order
loader:
shuffle: False
drop_last: False
batch_size: 256
num_workers: 8

@ -1,58 +0,0 @@
English | [简体中文](README_cn.md)
## Introduction
Many user hopes package the PaddleOCR service into an docker image, so that it can be quickly released and used in the docker or k8s environment.
This page provide some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the futrue)
## 1. Prerequisites
You need to install the following basic components first
a. Docker
b. Graphics driver and CUDA 10.0+GPU
c. NVIDIA Container ToolkitGPUDocker 19.03+ can skip this
d. cuDNN 7.6+GPU
## 2. Build Image
a. Download PaddleOCR sourcecode
```
git clone https://github.com/PaddlePaddle/PaddleOCR.git
```
b. Goto Dockerfile directorypsNeed to distinguish between cpu and gpu version, the following takes cpu as an example, gpu version needs to replace the keyword
```
cd docker/cpu
```
c. Build image
```
docker build -t paddleocr:cpu .
```
## 3. Start container
a. CPU version
```
sudo docker run -dp 8866:8866 --name paddle_ocr paddleocr:cpu
```
b. GPU version (base on NVIDIA Container Toolkit)
```
sudo nvidia-docker run -dp 8866:8866 --name paddle_ocr paddleocr:gpu
```
c. GPU version (Docker 19.03++)
```
sudo docker run -dp 8866:8866 --gpus all --name paddle_ocr paddleocr:gpu
```
d. Check service statusIf you can see the following statement then it means completedSuccessfully installed ocr_system && Running on http://0.0.0.0:8866/
```
docker logs -f paddle_ocr
```
## 4. Test
a. Calculate the Base64 encoding of the picture to be recognized (if you just test, you can use a free online tool, likehttps://freeonlinetools24.com/base64-image/
b. Post a service requestsample request in sample_request.txt
```
curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"Input image Base64 encode(need to delete the code 'data:image/jpg;base64,'\"]}" http://localhost:8866/predict/ocr_system
```
c. Get resposneIf the call is successful, the following result will be returned
```
{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
```

@ -1,57 +0,0 @@
[English](README.md) | 简体中文
## Docker化部署服务
在日常项目应用中相信大家一般都会希望能通过Docker技术把PaddleOCR服务打包成一个镜像以便在Docker或k8s环境里快速发布上线使用。
本文将提供一些标准化的代码来实现这样的目标。大家通过如下步骤可以把PaddleOCR项目快速发布成可调用的Restful API服务。目前暂时先实现了基于HubServing模式的部署后续作者计划增加PaddleServing模式的部署
## 1.实施前提准备
需要先完成如下基本组件的安装:
a. Docker环境
b. 显卡驱动和CUDA 10.0+GPU
c. NVIDIA Container ToolkitGPUDocker 19.03以上版本可以跳过此步)
d. cuDNN 7.6+GPU
## 2.制作镜像
a.下载PaddleOCR项目代码
```
git clone https://github.com/PaddlePaddle/PaddleOCR.git
```
b.切换至Dockerfile目录需要区分cpu或gpu版本下文以cpu为例gpu版本需要替换一下关键字即可
```
cd docker/cpu
```
c.生成镜像
```
docker build -t paddleocr:cpu .
```
## 3.启动Docker容器
a. CPU 版本
```
sudo docker run -dp 8866:8866 --name paddle_ocr paddleocr:cpu
```
b. GPU 版本 (通过NVIDIA Container Toolkit)
```
sudo nvidia-docker run -dp 8866:8866 --name paddle_ocr paddleocr:gpu
```
c. GPU 版本 (Docker 19.03以上版本,可以直接用如下命令)
```
sudo docker run -dp 8866:8866 --gpus all --name paddle_ocr paddleocr:gpu
```
d. 检查服务运行情况出现Successfully installed ocr_system和Running on http://0.0.0.0:8866/等信息,表示运行成功)
```
docker logs -f paddle_ocr
```
## 4.测试服务
a. 计算待识别图片的Base64编码如果只是测试一下效果可以通过免费的在线工具实现http://tool.chinaz.com/tools/imgtobase/
b. 发送服务请求可参见sample_request.txt中的值
```
curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"填入图片Base64编码(需要删除'data:image/jpg;base64,'\"]}" http://localhost:8866/predict/ocr_system
```
c. 返回结果(如果调用成功,会返回如下结果)
```
{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
```

@ -1,28 +0,0 @@
# Version: 1.0.0
FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda9.0-cudnn7-dev
# PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN python3.7 -m pip install paddlepaddle==1.7.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN pip3.7 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN git clone https://gitee.com/PaddlePaddle/PaddleOCR
WORKDIR /PaddleOCR
RUN pip3.7 install -r requirments.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN mkdir -p /PaddleOCR/inference
# Download orc detect model(light version). if you want to change normal version, you can change ch_det_mv3_db_infer to ch_det_r50_vd_db_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py
ADD https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar /PaddleOCR/inference
RUN tar xf /PaddleOCR/inference/ch_det_mv3_db_infer.tar -C /PaddleOCR/inference
# Download orc recognition model(light version). If you want to change normal version, you can change ch_rec_mv3_crnn_infer to ch_rec_r34_vd_crnn_enhance_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py
ADD https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar /PaddleOCR/inference
RUN tar xf /PaddleOCR/inference/ch_rec_mv3_crnn_infer.tar -C /PaddleOCR/inference
EXPOSE 8866
CMD ["/bin/bash","-c","export PYTHONPATH=. && hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]

@ -1,28 +0,0 @@
# Version: 1.0.0
FROM hub.baidubce.com/paddlepaddle/paddle:latest-gpu-cuda10.0-cudnn7-dev
# PaddleOCR base on Python3.7
RUN pip3.7 install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN python3.7 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN pip3.7 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN git clone https://gitee.com/PaddlePaddle/PaddleOCR
WORKDIR /home/PaddleOCR
RUN pip3.7 install -r requirments.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
RUN mkdir -p /PaddleOCR/inference
# Download orc detect model(light version). if you want to change normal version, you can change ch_det_mv3_db_infer to ch_det_r50_vd_db_infer, also remember change det_model_dir in deploy/hubserving/ocr_system/params.py
ADD https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar /PaddleOCR/inference
RUN tar xf /PaddleOCR/inference/ch_det_mv3_db_infer.tar -C /PaddleOCR/inference
# Download orc recognition model(light version). If you want to change normal version, you can change ch_rec_mv3_crnn_infer to ch_rec_r34_vd_crnn_enhance_infer, also remember change rec_model_dir in deploy/hubserving/ocr_system/params.py
ADD https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_infer.tar /PaddleOCR/inference
RUN tar xf /PaddleOCR/inference/ch_rec_mv3_crnn_infer.tar -C /PaddleOCR/inference
EXPOSE 8866
CMD ["/bin/bash","-c","export PYTHONPATH=. && hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]

@ -1,58 +0,0 @@
English | [简体中文](README_cn.md)
## Introduction
Many user hopes package the PaddleOCR service into an docker image, so that it can be quickly released and used in the docker or k8s environment.
This page provide some standardized code to achieve this goal. You can quickly publish the PaddleOCR project into a callable Restful API service through the following steps. (At present, the deployment based on the HubServing mode is implemented first, and author plans to increase the deployment of the PaddleServing mode in the futrue)
## 1. Prerequisites
You need to install the following basic components first
a. Docker
b. Graphics driver and CUDA 10.0+GPU
c. NVIDIA Container ToolkitGPUDocker 19.03+ can skip this
d. cuDNN 7.6+GPU
## 2. Build Image
a. Download PaddleOCR sourcecode
```
git clone https://github.com/PaddlePaddle/PaddleOCR.git
```
b. Goto Dockerfile directorypsNeed to distinguish between cpu and gpu version, the following takes cpu as an example, gpu version needs to replace the keyword
```
cd docker/cpu
```
c. Build image
```
docker build -t paddleocr:cpu .
```
## 3. Start container
a. CPU version
```
sudo docker run -dp 8866:8866 --name paddle_ocr paddleocr:cpu
```
b. GPU version (base on NVIDIA Container Toolkit)
```
sudo nvidia-docker run -dp 8866:8866 --name paddle_ocr paddleocr:gpu
```
c. GPU version (Docker 19.03++)
```
sudo docker run -dp 8866:8866 --gpus all --name paddle_ocr paddleocr:gpu
```
d. Check service statusIf you can see the following statement then it means completedSuccessfully installed ocr_system && Running on http://0.0.0.0:8866/
```
docker logs -f paddle_ocr
```
## 4. Test
a. Calculate the Base64 encoding of the picture to be recognized (if you just test, you can use a free online tool, likehttps://freeonlinetools24.com/base64-image/
b. Post a service requestsample request in sample_request.txt
```
curl -H "Content-Type:application/json" -X POST --data "{\"images\": [\"Input image Base64 encode(need to delete the code 'data:image/jpg;base64,'\"]}" http://localhost:8866/predict/ocr_system
```
c. Get resposneIf the call is successful, the following result will be returned
```
{"msg":"","results":[[{"confidence":0.8403433561325073,"text":"约定","text_region":[[345,377],[641,390],[634,540],[339,528]]},{"confidence":0.8131805658340454,"text":"最终相遇","text_region":[[356,532],[624,530],[624,596],[356,598]]}]],"status":"0"}
```

File diff suppressed because one or more lines are too long

@ -21,104 +21,72 @@ import os
import sys
import numpy as np
import paddle
import signal
import random
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
import copy
from paddle.io import DataLoader, DistributedBatchSampler, BatchSampler
from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler
import paddle.distributed as dist
from ppocr.data.imaug import transform, create_operators
from ppocr.data.simple_dataset import SimpleDataSet
from ppocr.data.lmdb_dataset import LMDBDateSet
__all__ = ['build_dataloader', 'transform', 'create_operators']
def build_dataset(config, global_config):
from ppocr.data.dataset import SimpleDataSet, LMDBDateSet
support_dict = ['SimpleDataSet', 'LMDBDateSet']
module_name = config.pop('name')
assert module_name in support_dict, Exception(
'DataSet only support {}'.format(support_dict))
def term_mp(sig_num, frame):
""" kill all child processes
"""
pid = os.getpid()
pgid = os.getpgid(os.getpid())
print("main proc {} exit, kill process group " "{}".format(pid, pgid))
os.killpg(pgid, signal.SIGKILL)
dataset = eval(module_name)(config, global_config)
return dataset
signal.signal(signal.SIGINT, term_mp)
signal.signal(signal.SIGTERM, term_mp)
def build_dataloader(config, device, distributed=False, global_config=None):
from ppocr.data.dataset import BatchBalancedDataLoader
def build_dataloader(config, mode, device, logger):
config = copy.deepcopy(config)
dataset_config = config['dataset']
_dataset_list = []
file_list = dataset_config.pop('file_list')
if len(file_list) == 1:
ratio_list = [1.0]
support_dict = ['SimpleDataSet', 'LMDBDateSet']
module_name = config[mode]['dataset']['name']
assert module_name in support_dict, Exception(
'DataSet only support {}'.format(support_dict))
assert mode in ['Train', 'Eval', 'Test'
], "Mode should be Train, Eval or Test."
dataset = eval(module_name)(config, mode, logger)
loader_config = config[mode]['loader']
batch_size = loader_config['batch_size_per_card']
drop_last = loader_config['drop_last']
num_workers = loader_config['num_workers']
if mode == "Train":
#Distribute data to multiple cards
batch_sampler = DistributedBatchSampler(
dataset=dataset,
batch_size=batch_size,
shuffle=False,
drop_last=drop_last)
else:
ratio_list = dataset_config.pop('ratio_list')
for file in file_list:
dataset_config['file_list'] = file
_dataset = build_dataset(dataset_config, global_config)
_dataset_list.append(_dataset)
data_loader = BatchBalancedDataLoader(_dataset_list, ratio_list,
distributed, device, config['loader'])
return data_loader, _dataset.info_dict
def test_loader():
import time
from tools.program import load_config, ArgsParser
FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config)
place = paddle.CPUPlace()
paddle.disable_static(place)
import time
data_loader, _ = build_dataloader(
config['TRAIN'], place, global_config=config['Global'])
start = time.time()
print(len(data_loader))
for epoch in range(1):
print('epoch {} ****************'.format(epoch))
for i, batch in enumerate(data_loader):
if i > len(data_loader):
break
t = time.time() - start
start = time.time()
print('{}, batch : {} ,time {}'.format(i, len(batch[0]), t))
continue
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
import cv2
fig = plt.figure()
# # cv2.imwrite('img.jpg',batch[0].numpy()[0].transpose((1,2,0)))
# # cv2.imwrite('bmap.jpg',batch[1].numpy()[0])
# # cv2.imwrite('bmask.jpg',batch[2].numpy()[0])
# # cv2.imwrite('smap.jpg',batch[3].numpy()[0])
# # cv2.imwrite('smask.jpg',batch[4].numpy()[0])
plt.title('img')
plt.imshow(batch[0].numpy()[0].transpose((1, 2, 0)))
# plt.figure()
# plt.title('bmap')
# plt.imshow(batch[1].numpy()[0],cmap='Greys')
# plt.figure()
# plt.title('bmask')
# plt.imshow(batch[2].numpy()[0],cmap='Greys')
# plt.figure()
# plt.title('smap')
# plt.imshow(batch[3].numpy()[0],cmap='Greys')
# plt.figure()
# plt.title('smask')
# plt.imshow(batch[4].numpy()[0],cmap='Greys')
# plt.show()
# break
if __name__ == '__main__':
test_loader()
#Distribute data to single card
batch_sampler = BatchSampler(
dataset=dataset,
batch_size=batch_size,
shuffle=False,
drop_last=drop_last)
data_loader = DataLoader(
dataset=dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=num_workers,
return_list=True)
return data_loader

File diff suppressed because it is too large Load Diff

@ -148,6 +148,8 @@ class CTCLabelEncode(BaseRecLabelEncode):
text = self.encode(text)
if text is None:
return None
if len(text) > self.max_text_len:
return None
data['length'] = np.array(len(text))
text = text + [0] * (self.max_text_len - len(text))
data['label'] = np.array(text)

@ -29,7 +29,7 @@ class MakeBorderMap(object):
self.thresh_min = thresh_min
self.thresh_max = thresh_max
def __call__(self, data: dict) -> dict:
def __call__(self, data):
img = data['image']
text_polys = data['polys']

@ -99,7 +99,7 @@ class ToCHWImage(object):
return data
class keepKeys(object):
class KeepKeys(object):
def __init__(self, keep_keys, **kwargs):
self.keep_keys = keep_keys

@ -50,16 +50,14 @@ class RecResizeImg(object):
image_shape,
infer_mode=False,
character_type='ch',
use_tps=False,
**kwargs):
self.image_shape = image_shape
self.infer_mode = infer_mode
self.character_type = character_type
self.use_tps = use_tps
def __call__(self, data):
img = data['image']
if self.infer_mode and self.character_type == "ch" and not self.use_tps:
if self.infer_mode and self.character_type == "ch":
norm_img = resize_norm_img_chinese(img, self.image_shape)
else:
norm_img = resize_norm_img(img, self.image_shape)

@ -0,0 +1,119 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
import os
import random
import paddle
from paddle.io import Dataset
import time
import lmdb
import cv2
from .imaug import transform, create_operators
class LMDBDateSet(Dataset):
def __init__(self, config, mode, logger):
super(LMDBDateSet, self).__init__()
global_config = config['Global']
dataset_config = config[mode]['dataset']
loader_config = config[mode]['loader']
batch_size = loader_config['batch_size_per_card']
data_dir = dataset_config['data_dir']
self.do_shuffle = loader_config['shuffle']
self.lmdb_sets = self.load_hierarchical_lmdb_dataset(data_dir)
logger.info("Initialize indexs of datasets:%s" % data_dir)
self.data_idx_order_list = self.dataset_traversal()
if self.do_shuffle:
np.random.shuffle(self.data_idx_order_list)
self.ops = create_operators(dataset_config['transforms'], global_config)
def load_hierarchical_lmdb_dataset(self, data_dir):
lmdb_sets = {}
dataset_idx = 0
for dirpath, dirnames, filenames in os.walk(data_dir + '/'):
if not dirnames:
env = lmdb.open(
dirpath,
max_readers=32,
readonly=True,
lock=False,
readahead=False,
meminit=False)
txn = env.begin(write=False)
num_samples = int(txn.get('num-samples'.encode()))
lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \
"txn":txn, "num_samples":num_samples}
dataset_idx += 1
return lmdb_sets
def dataset_traversal(self):
lmdb_num = len(self.lmdb_sets)
total_sample_num = 0
for lno in range(lmdb_num):
total_sample_num += self.lmdb_sets[lno]['num_samples']
data_idx_order_list = np.zeros((total_sample_num, 2))
beg_idx = 0
for lno in range(lmdb_num):
tmp_sample_num = self.lmdb_sets[lno]['num_samples']
end_idx = beg_idx + tmp_sample_num
data_idx_order_list[beg_idx:end_idx, 0] = lno
data_idx_order_list[beg_idx:end_idx, 1] \
= list(range(tmp_sample_num))
data_idx_order_list[beg_idx:end_idx, 1] += 1
beg_idx = beg_idx + tmp_sample_num
return data_idx_order_list
def get_img_data(self, value):
"""get_img_data"""
if not value:
return None
imgdata = np.frombuffer(value, dtype='uint8')
if imgdata is None:
return None
imgori = cv2.imdecode(imgdata, 1)
if imgori is None:
return None
return imgori
def get_lmdb_sample_info(self, txn, index):
label_key = 'label-%09d'.encode() % index
label = txn.get(label_key)
if label is None:
return None
label = label.decode('utf-8')
img_key = 'image-%09d'.encode() % index
imgbuf = txn.get(img_key)
return imgbuf, label
def __getitem__(self, idx):
lmdb_idx, file_idx = self.data_idx_order_list[idx]
lmdb_idx = int(lmdb_idx)
file_idx = int(file_idx)
sample_info = self.get_lmdb_sample_info(self.lmdb_sets[lmdb_idx]['txn'],
file_idx)
if sample_info is None:
return self.__getitem__(np.random.randint(self.__len__()))
img, label = sample_info
data = {'image': img, 'label': label}
outs = transform(data, self.ops)
if outs is None:
return self.__getitem__(np.random.randint(self.__len__()))
return outs
def __len__(self):
return self.data_idx_order_list.shape[0]

@ -0,0 +1,121 @@
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
import os
import random
import paddle
from paddle.io import Dataset
import time
from .imaug import transform, create_operators
class SimpleDataSet(Dataset):
def __init__(self, config, mode, logger):
super(SimpleDataSet, self).__init__()
global_config = config['Global']
dataset_config = config[mode]['dataset']
loader_config = config[mode]['loader']
batch_size = loader_config['batch_size_per_card']
self.delimiter = dataset_config.get('delimiter', '\t')
label_file_list = dataset_config.pop('label_file_list')
data_source_num = len(label_file_list)
if data_source_num == 1:
ratio_list = [1.0]
else:
ratio_list = dataset_config.pop('ratio_list')
assert sum(ratio_list) == 1, "The sum of the ratio_list should be 1."
assert len(
ratio_list
) == data_source_num, "The length of ratio_list should be the same as the file_list."
self.data_dir = dataset_config['data_dir']
self.do_shuffle = loader_config['shuffle']
logger.info("Initialize indexs of datasets:%s" % label_file_list)
self.data_lines_list, data_num_list = self.get_image_info_list(
label_file_list)
self.data_idx_order_list = self.dataset_traversal(
data_num_list, ratio_list, batch_size)
self.shuffle_data_random()
self.ops = create_operators(dataset_config['transforms'], global_config)
def get_image_info_list(self, file_list):
if isinstance(file_list, str):
file_list = [file_list]
data_lines_list = []
data_num_list = []
for file in file_list:
with open(file, "rb") as f:
lines = f.readlines()
data_lines_list.append(lines)
data_num_list.append(len(lines))
return data_lines_list, data_num_list
def dataset_traversal(self, data_num_list, ratio_list, batch_size):
select_num_list = []
dataset_num = len(data_num_list)
for dno in range(dataset_num):
select_num = round(batch_size * ratio_list[dno])
select_num = max(select_num, 1)
select_num_list.append(select_num)
data_idx_order_list = []
cur_index_sets = [0] * dataset_num
while True:
finish_read_num = 0
for dataset_idx in range(dataset_num):
cur_index = cur_index_sets[dataset_idx]
if cur_index >= data_num_list[dataset_idx]:
finish_read_num += 1
else:
select_num = select_num_list[dataset_idx]
for sno in range(select_num):
cur_index = cur_index_sets[dataset_idx]
if cur_index >= data_num_list[dataset_idx]:
break
data_idx_order_list.append((dataset_idx, cur_index))
cur_index_sets[dataset_idx] += 1
if finish_read_num == dataset_num:
break
return data_idx_order_list
def shuffle_data_random(self):
if self.do_shuffle:
for dno in range(len(self.data_lines_list)):
random.shuffle(self.data_lines_list[dno])
return
def __getitem__(self, idx):
dataset_idx, file_idx = self.data_idx_order_list[idx]
data_line = self.data_lines_list[dataset_idx][file_idx]
data_line = data_line.decode('utf-8')
substr = data_line.strip("\n").split(self.delimiter)
file_name = substr[0]
label = substr[1]
img_path = os.path.join(self.data_dir, file_name)
data = {'img_path': img_path, 'label': label}
with open(data['img_path'], 'rb') as f:
img = f.read()
data['image'] = img
outs = transform(data, self.ops)
if outs is None:
return self.__getitem__(np.random.randint(self.__len__()))
return outs
def __len__(self):
return len(self.data_idx_order_list)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save