fix conflicts

dependabot/pip/PPOCRLabel/requirements/lxml-4.6.2
LDOUBLEV 4 years ago
commit c94428a880

@ -106,7 +106,7 @@ class MainWindow(QMainWindow, WindowMixin):
getStr = lambda strId: self.stringBundle.getString(strId)
self.defaultSaveDir = defaultSaveDir
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=True, lang=lang)
self.ocr = PaddleOCR(use_pdserving=False, use_angle_cls=True, det=True, cls=True, use_gpu=False, lang=lang)
if os.path.exists('./data/paddle.png'):
result = self.ocr.ocr('./data/paddle.png', cls=True, det=True)
@ -274,6 +274,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.preButton.setIconSize(QSize(40, 100))
self.preButton.clicked.connect(self.openPrevImg)
self.preButton.setStyleSheet('border: none;')
self.preButton.setShortcut('a')
self.iconlist = QListWidget()
self.iconlist.setViewMode(QListView.IconMode)
self.iconlist.setFlow(QListView.TopToBottom)
@ -289,12 +290,12 @@ class MainWindow(QMainWindow, WindowMixin):
self.nextButton.setIconSize(QSize(40, 100))
self.nextButton.setStyleSheet('border: none;')
self.nextButton.clicked.connect(self.openNextImg)
self.nextButton.setShortcut('d')
hlayout.addWidget(self.preButton)
hlayout.addWidget(self.iconlist)
hlayout.addWidget(self.nextButton)
# self.setLayout(hlayout)
iconListContainer = QWidget()
iconListContainer.setLayout(hlayout)
@ -359,11 +360,6 @@ class MainWindow(QMainWindow, WindowMixin):
opendir = action(getStr('openDir'), self.openDirDialog,
'Ctrl+u', 'open', getStr('openDir'))
openNextImg = action(getStr('nextImg'), self.openNextImg,
'd', 'next', getStr('nextImgDetail'))
openPrevImg = action(getStr('prevImg'), self.openPrevImg,
'a', 'prev', getStr('prevImgDetail'))
save = action(getStr('save'), self.saveFile,
'Ctrl+V', 'verify', getStr('saveDetail'), enabled=False)
@ -371,7 +367,7 @@ class MainWindow(QMainWindow, WindowMixin):
alcm = action(getStr('choosemodel'), self.autolcm,
'Ctrl+M', 'next', getStr('tipchoosemodel'))
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+D', 'close', getStr('deleteImgDetail'),
deleteImg = action(getStr('deleteImg'), self.deleteImg, 'Ctrl+Shift+D', 'close', getStr('deleteImgDetail'),
enabled=True)
resetAll = action(getStr('resetAll'), self.resetAll, None, 'resetall', getStr('resetAllDetail'))
@ -388,7 +384,7 @@ class MainWindow(QMainWindow, WindowMixin):
'w', 'new', getStr('crtBoxDetail'), enabled=False)
delete = action(getStr('delBox'), self.deleteSelectedShape,
'Delete', 'delete', getStr('delBoxDetail'), enabled=False)
'backspace', 'delete', getStr('delBoxDetail'), enabled=False)
copy = action(getStr('dupBox'), self.copySelectedShape,
'Ctrl+C', 'copy', getStr('dupBoxDetail'),
enabled=False)
@ -446,8 +442,11 @@ class MainWindow(QMainWindow, WindowMixin):
reRec = action(getStr('reRecognition'), self.reRecognition,
'Ctrl+Shift+R', 'reRec', getStr('reRecognition'), enabled=False)
singleRere = action(getStr('singleRe'), self.singleRerecognition,
'Ctrl+R', 'reRec', getStr('singleRe'), enabled=False)
createpoly = action(getStr('creatPolygon'), self.createPolygon,
'p', 'new', 'Creat Polygon', enabled=True)
'q', 'new', 'Creat Polygon', enabled=True)
saveRec = action(getStr('saveRec'), self.saveRecResult,
'', 'save', getStr('saveRec'), enabled=False)
@ -491,6 +490,7 @@ class MainWindow(QMainWindow, WindowMixin):
icon='color', tip=getStr('shapeFillColorDetail'),
enabled=False)
# Label list context menu.
labelMenu = QMenu()
addActions(labelMenu, (edit, delete))
@ -501,7 +501,6 @@ class MainWindow(QMainWindow, WindowMixin):
# Draw squares/rectangles
self.drawSquaresOption = QAction(getStr('drawSquares'), self)
self.drawSquaresOption.setShortcut('Ctrl+Shift+R')
self.drawSquaresOption.setCheckable(True)
self.drawSquaresOption.setChecked(settings.get(SETTING_DRAW_SQUARE, False))
self.drawSquaresOption.triggered.connect(self.toogleDrawSquare)
@ -509,7 +508,7 @@ class MainWindow(QMainWindow, WindowMixin):
# Store actions for further handling.
self.actions = struct(save=save, open=open, resetAll=resetAll, deleteImg=deleteImg,
lineColor=color1, create=create, delete=delete, edit=edit, copy=copy,
saveRec=saveRec,
saveRec=saveRec, singleRere=singleRere,AutoRec=AutoRec,reRec=reRec,
createMode=createMode, editMode=editMode,
shapeLineColor=shapeLineColor, shapeFillColor=shapeFillColor,
zoom=zoom, zoomIn=zoomIn, zoomOut=zoomOut, zoomOrg=zoomOrg,
@ -518,9 +517,9 @@ class MainWindow(QMainWindow, WindowMixin):
fileMenuActions=(
open, opendir, saveLabel, resetAll, quit),
beginner=(), advanced=(),
editMenu=(createpoly, edit, copy, delete,
editMenu=(createpoly, edit, copy, delete,singleRere,
None, color1, self.drawSquaresOption),
beginnerContext=(create, edit, copy, delete),
beginnerContext=(create, edit, copy, delete, singleRere),
advancedContext=(createMode, editMode, edit, copy,
delete, shapeLineColor, shapeFillColor),
onLoadActive=(
@ -562,7 +561,7 @@ class MainWindow(QMainWindow, WindowMixin):
zoomIn, zoomOut, zoomOrg, None,
fitWindow, fitWidth))
addActions(self.menus.autolabel, (alcm, None, help)) #
addActions(self.menus.autolabel, (AutoRec, reRec, alcm, None, help)) #
self.menus.file.aboutToShow.connect(self.updateFileMenu)
@ -572,6 +571,7 @@ class MainWindow(QMainWindow, WindowMixin):
action('&Copy here', self.copyShape),
action('&Move here', self.moveShape)))
self.statusBar().showMessage('%s started.' % __appname__)
self.statusBar().show()
@ -919,6 +919,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.actions.edit.setEnabled(selected)
self.actions.shapeLineColor.setEnabled(selected)
self.actions.shapeFillColor.setEnabled(selected)
self.actions.singleRere.setEnabled(selected)
def addLabel(self, shape):
shape.paintLabel = self.displayLabelOption.isChecked()
@ -988,6 +989,19 @@ class MainWindow(QMainWindow, WindowMixin):
self.updateComboBox()
self.canvas.loadShapes(s)
def singleLabel(self, shape):
if shape is None:
# print('rm empty label')
return
item = self.shapesToItems[shape]
item.setText(shape.label)
self.updateComboBox()
# ADD:
item = self.shapesToItemsbox[shape]
item.setText(str([(int(p.x()), int(p.y())) for p in shape.points]))
self.updateComboBox()
def updateComboBox(self):
# Get the unique labels and add them to the Combobox.
itemsTextList = [str(self.labelList.item(i).text()) for i in range(self.labelList.count())]
@ -1441,6 +1455,8 @@ class MainWindow(QMainWindow, WindowMixin):
self.haveAutoReced = False
self.AutoRecognition.setEnabled(True)
self.reRecogButton.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
self.actions.reRec.setEnabled(True)
self.actions.saveLabel.setEnabled(True)
@ -1755,6 +1771,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.loadFile(self.filePath) # ADD
self.haveAutoReced = True
self.AutoRecognition.setEnabled(False)
self.actions.AutoRec.setEnabled(False)
self.setDirty()
self.saveCacheLabel()
@ -1794,6 +1811,27 @@ class MainWindow(QMainWindow, WindowMixin):
else:
QMessageBox.information(self, "Information", "Draw a box!")
def singleRerecognition(self):
img = cv2.imread(self.filePath)
shape = self.canvas.selectedShape
box = [[int(p.x()), int(p.y())] for p in shape.points]
assert len(box) == 4
img_crop = get_rotate_crop_image(img, np.array(box, np.float32))
if img_crop is None:
msg = 'Can not recognise the detection box in ' + self.filePath + '. Please change manually'
QMessageBox.information(self, "Information", msg)
return
result = self.ocr.ocr(img_crop, cls=True, det=False)
if result[0][0] is not '':
result.insert(0, box)
print('result in reRec is ', result)
if result[1][0] == shape.label:
print('label no change')
else:
shape.label = result[1][0]
self.singleLabel(shape)
self.setDirty()
print(box)
def autolcm(self):
vbox = QVBoxLayout()
@ -1825,6 +1863,7 @@ class MainWindow(QMainWindow, WindowMixin):
self.dialog.exec_()
if self.filePath:
self.AutoRecognition.setEnabled(True)
self.actions.AutoRec.setEnabled(True)
def modelChoose(self):

@ -6,6 +6,10 @@ PPOCRLabel is a semi-automatic graphic annotation tool suitable for OCR field. I
<img src="./data/gif/steps_en.gif" width="100%"/>
### Recent Update
- 2020.12.18: Support re-recognition of a single label box (by [ninetailskim](https://github.com/ninetailskim) ), perfect shortcut keys.
## Installation
### 1. Install PaddleOCR
@ -92,11 +96,30 @@ Therefore, if the recognition result has been manually changed before, it may ch
## Explanation
### Shortcut keys
| Shortcut keys | Description |
| ---------------- | ------------------------------------------------ |
| Ctrl + shift + A | Automatically label all unchecked images |
| Ctrl + shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box |
| Q | Create a four-points box |
| Ctrl + E | Edit label of the selected box |
| Ctrl + R | Re-recognize the selected box |
| Backspace | Delete the selected box |
| Ctrl + V | Check image |
| Ctrl + Shift + d | Delete image |
| D | Next image |
| A | Previous image |
| Ctrl++ | Zoom in |
| Ctrl-- | Zoom out |
| ↑→↓← | Move selected box |
### Built-in Model
- Default model: PPOCRLabel uses the Chinese and English ultra-lightweight OCR model in PaddleOCR by default, supports Chinese, English and number recognition, and multiple language detection.
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
- Model language switching: Changing the built-in model language is supportable by clicking "PaddleOCR"-"Choose OCR Model" in the menu bar. Currently supported languagesinclude French, German, Korean, and Japanese.
For specific model download links, please refer to [PaddleOCR Model List](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/models_list_en.md#multilingual-recognition-modelupdating)
- Custom model: The model trained by users can be replaced by modifying PPOCRLabel.py in [PaddleOCR class instantiation](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/PPOCRLabel/PPOCRLabel.py#L110) referring [Custom Model Code](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_en/whl_en.md#use-custom-model)

@ -6,6 +6,10 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具使用p
<img src="./data/gif/steps.gif" width="100%"/>
#### 近期更新
- 2020.12.18: 支持对单个标记框进行重新识别by [ninetailskim](https://github.com/ninetailskim) ),完善快捷键。
## 安装
### 1. 安装PaddleOCR
@ -72,6 +76,26 @@ python3 PPOCRLabel.py --lang ch
| crop_img | 识别数据。按照检测框切割后的图片。与rec_gt.txt同时产生。 |
## 说明
### 快捷键
| 快捷键 | 说明 |
| ---------------- | ---------------------------- |
| Ctrl + shift + A | 自动标注所有未确认过的图片 |
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 |
| Q | 新建四点框 |
| Ctrl + E | 编辑所选框标签 |
| Ctrl + R | 重新识别所选标记 |
| Backspace | 删除所选框 |
| Ctrl + V | 确认本张图片标记 |
| Ctrl + Shift + d | 删除本张图片 |
| D | 下一张图片 |
| A | 上一张图片 |
| Ctrl++ | 缩小 |
| Ctrl-- | 放大 |
| ↑→↓← | 移动标记框 |
### 内置模型
- 默认模型PPOCRLabel默认使用PaddleOCR中的中英文超轻量OCR模型支持中英文与数字识别多种语言检测。

@ -46,8 +46,9 @@ class Worker(QThread):
chars = res[1][0]
cond = res[1][1]
posi = res[0]
strs += "Transcription: " + chars + " Probability: " + str(
cond) + " Location: " + json.dumps(posi) + '\n'
strs += "Transcription: " + chars + " Probability: " + str(cond) + \
" Location: " + json.dumps(posi) +'\n'
# Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
self.listValue.emit(strs)
self.mainThread.result_dic = self.result_dic
self.mainThread.filePath = Imgpath

File diff suppressed because it is too large Load Diff

@ -94,4 +94,5 @@ ok=确认
autolabeling=自动标注中
hideBox=隐藏所有标注
showBox=显示所有标注
saveLabel=保存标记结果
saveLabel=保存标记结果
singleRe=重识别此区块

@ -1,70 +0,0 @@
saveAsDetail=將標籤保存到其他文件
changeSaveDir=改變存放目錄
openFile=開啟檔案
shapeLineColorDetail=更改線條顏色
resetAll=重置
crtBox=創建區塊
crtBoxDetail=畫一個區塊
dupBoxDetail=複製區塊
verifyImg=驗證圖像
zoominDetail=放大
verifyImgDetail=驗證圖像
saveDetail=將標籤存到
openFileDetail=打開圖像
fitWidthDetail=調整到窗口寬度
tutorial=YouTube教學
editLabel=編輯標籤
openAnnotationDetail=打開標籤文件
quit=結束
shapeFillColorDetail=更改填充顏色
closeCurDetail=關閉目前檔案
closeCur=關閉
deleteImg=刪除圖像
deleteImgDetail=刪除目前圖像
fitWin=調整到跟窗口一樣大小
delBox=刪除選取區塊
boxLineColorDetail=選擇框線顏色
originalsize=原始大小
resetAllDetail=重設所有設定
zoomoutDetail=畫面放大
save=儲存
saveAs=另存為
fitWinDetail=縮放到窗口一樣
openDir=開啟目錄
copyPrevBounding=複製當前圖像中的上一個邊界框
showHide=顯示/隱藏標籤
changeSaveFormat=更改儲存格式
shapeFillColor=填充顏色
quitApp=離開本程式
dupBox=複製區塊
delBoxDetail=刪除區塊
zoomin=放大畫面
info=資訊
openAnnotation=開啟標籤
prevImgDetail=上一個圖像
fitWidth=縮放到跟畫面一樣寬
zoomout=縮小畫面
changeSavedAnnotationDir=更改預設標籤存的目錄
nextImgDetail=下一個圖像
originalsizeDetail=放大到原始大小
prevImg=上一個圖像
tutorialDetail=顯示示範內容
shapeLineColor=形狀線條顏色
boxLineColor=日期分隔線顏色
editLabelDetail=修改所選區塊的標籤
nextImg=下一張圖片
useDefaultLabel=使用預設標籤
useDifficult=有難度的
boxLabelText=區塊的標籤
labels=標籤
autoSaveMode=自動儲存模式
singleClsMode=單一類別模式
displayLabel=顯示類別
fileList=檔案清單
files=檔案
iconList=XX
icon=XX
advancedMode=進階模式
advancedModeDetail=切到進階模式
showAllBoxDetail=顯示所有區塊
hideAllBoxDetail=隱藏所有區塊

@ -94,4 +94,5 @@ ok=OK
autolabeling=Automatic Labeling
hideBox=Hide All Box
showBox=Show All Box
saveLabel=Save Label
saveLabel=Save Label
singleRe=Re-recognition RectBox

@ -122,8 +122,7 @@ For a new language request, please refer to [Guideline for new language_requests
<img src="./doc/ppocr_framework.png" width="800">
</div>
PP-OCR is a practical ultra-lightweight OCR system. It is mainly composed of three parts: DB text detection, detection frame correction and CRNN text recognition. The system adopts 19 effective strategies from 8 aspects including backbone network selection and adjustment, prediction head design, data augmentation, learning rate transformation strategy, regularization parameter selection, pre-training model use, and automatic model tailoring and quantization to optimize and slim down the models of each module. The final results are an ultra-lightweight Chinese and English OCR model with an overall size of 3.5M and a 2.8M English digital OCR model. For more details, please refer to the PP-OCR technical article (https://arxiv.org/abs/2009.09941). Besides, The implementation of the FPGM Pruner and PACT quantization is based on [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim).
PP-OCR is a practical ultra-lightweight OCR system. It is mainly composed of three parts: DB text detection[2], detection frame correction and CRNN text recognition[7]. The system adopts 19 effective strategies from 8 aspects including backbone network selection and adjustment, prediction head design, data augmentation, learning rate transformation strategy, regularization parameter selection, pre-training model use, and automatic model tailoring and quantization to optimize and slim down the models of each module. The final results are an ultra-lightweight Chinese and English OCR model with an overall size of 3.5M and a 2.8M English digital OCR model. For more details, please refer to the PP-OCR technical article (https://arxiv.org/abs/2009.09941). Besides, The implementation of the FPGM Pruner [8] and PACT quantization [9] is based on [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim).
## Visualization [more](./doc/doc_en/visualization_en.md)

@ -115,7 +115,7 @@ PaddleOCR同时支持动态图与静态图两种编程范式
<img src="./doc/ppocr_framework.png" width="800">
</div>
PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测、检测框矫正和CRNN文本识别三部分组成。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面采用19个有效策略对各个模块的模型进行效果调优和瘦身最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 。其中FPGM裁剪器和PACT量化的实现可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。
PP-OCR是一个实用的超轻量OCR系统。主要由DB文本检测[2]、检测框矫正和CRNN文本识别三部分组成[7]。该系统从骨干网络选择和调整、预测头部的设计、数据增强、学习率变换策略、正则化参数选择、预训练模型使用以及模型自动裁剪量化8个方面采用19个有效策略对各个模块的模型进行效果调优和瘦身最终得到整体大小为3.5M的超轻量中英文OCR和2.8M的英文数字OCR。更多细节请参考PP-OCR技术方案 https://arxiv.org/abs/2009.09941 。其中FPGM裁剪器[8]和PACT量化[9]的实现可以参考[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)。
<a name="效果展示"></a>
## 效果展示 [more](./doc/doc_ch/visualization.md)

@ -22,7 +22,7 @@ English | [简体中文](README_ch.md)
</div>
The Style-Text data synthesis tool is a tool based on Baidu's self-developed text editing algorithm "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
The Style-Text data synthesis tool is a tool based on Baidu and HUST cooperation research work, "Editing Text in the Wild" [https://arxiv.org/abs/1908.03047](https://arxiv.org/abs/1908.03047).
Different from the commonly used GAN-based data synthesis tools, the main framework of Style-Text includes:
* (1) Text foreground style transfer module.
@ -69,10 +69,15 @@ fusion_generator:
1. You can run `tools/synth_image` and generate the demo image, which is saved in the current folder.
```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
```
* Note: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
* Note 1: The language options is correspond to the corpus. Currently, the tool only supports English, Simplified Chinese and Korean.
* Note 2: Synth-Text is mainly used to generate images for OCR recognition models.
So the height of style images should be around 32 pixels. Images in other sizes may behave poorly.
* Note 3: You can modify `use_gpu` in `configs/config.yml` to determine whether to use GPU for prediction.
For example, enter the following image and corpus `PaddleOCR`.
@ -136,9 +141,21 @@ We provide a general dataset containing Chinese, English and Korean (50,000 imag
2. You can run the following command to start synthesis task:
``` bash
python3 -m tools.synth_dataset.py -c configs/dataset_config.yml
python3 tools/synth_dataset.py -c configs/dataset_config.yml
```
We also provide example corpus and images in `examples` folder.
<div align="center">
<img src="examples/style_images/1.jpg" width="300">
<img src="examples/style_images/2.jpg" width="300">
</div>
If you run the code above directly, you will get example output data in `output_data` folder.
You will get synthesis images and labels as below:
<div align="center">
<img src="doc/images/12.png" width="800">
</div>
There will be some cache under the `label` folder. If the program exit unexpectedly, you can find cached labels there.
When the program finish normally, you will find all the labels in `label.txt` which give the final results.
<a name="Applications"></a>
### Applications

@ -21,7 +21,7 @@
</div>
Style-Text数据合成工具是基于百度自研的文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
Style-Text数据合成工具是基于百度和华科合作研发的文本编辑算法《Editing Text in the Wild》https://arxiv.org/abs/1908.03047
不同于常用的基于GAN的数据合成工具Style-Text主要框架包括1.文本前景风格迁移模块 2.背景抽取模块 3.融合模块。经过这样三步,就可以迅速实现图像文本风格迁移。下图是一些该数据合成工具效果图。
@ -61,9 +61,13 @@ fusion_generator:
输入一张风格图和一段文字语料运行tools/synth_image合成单张图片结果图像保存在当前目录下
```python
python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
python3 tools/synth_image.py -c configs/config.yml --style_image examples/style_images/2.jpg --text_corpus PaddleOCR --language en
```
* 注意:语言选项和语料相对应,目前该工具只支持英文、简体中文和韩语。
* 注1语言选项和语料相对应目前该工具只支持英文、简体中文和韩语。
* 注2Style-Text生成的数据主要应用于OCR识别场景。基于当前PaddleOCR识别模型的设计我们主要支持高度在32左右的风格图像。
如果输入图像尺寸相差过多,效果可能不佳。
* 注3可以通过修改配置文件中的`use_gpu`(true或者false)参数来决定是否使用GPU进行预测。
例如,输入如下图片和语料"PaddleOCR":
@ -124,8 +128,21 @@ python3 -m tools.synth_image -c configs/config.yml --style_image examples/style_
2. 运行`tools/synth_dataset`合成数据:
``` bash
python3 -m tools.synth_dataset -c configs/dataset_config.yml
python3 tools/synth_dataset.py -c configs/dataset_config.yml
```
我们在examples目录下提供了样例图片和语料。
<div align="center">
<img src="examples/style_images/1.jpg" width="300">
<img src="examples/style_images/2.jpg" width="300">
</div>
直接运行上述命令可以在output_data中产生样例输出包括图片和用于训练识别模型的标注文件
<div align="center">
<img src="doc/images/12.png" width="800">
</div>
其中label目录下的标注文件为程序运行过程中产生的缓存如果程序在中途异常终止可以使用缓存的标注文件。
如果程序正常运行完毕则会在output_data下生成label.txt为最终的标注结果。
<a name="应用案例"></a>
### 四、应用案例

@ -33,7 +33,7 @@ Predictor:
- 0.5
expand_result: false
bg_generator:
pretrain: models/style_text_rec/bg_generator
pretrain: style_text_models/bg_generator
module_name: bg_generator
generator_type: BgGeneratorWithMask
encode_dim: 64
@ -43,7 +43,7 @@ Predictor:
conv_block_dilation: true
output_factor: 1.05
text_generator:
pretrain: models/style_text_rec/text_generator
pretrain: style_text_models/text_generator
module_name: text_generator
generator_type: TextGenerator
encode_dim: 64
@ -52,7 +52,7 @@ Predictor:
conv_block_dropout: false
conv_block_dilation: true
fusion_generator:
pretrain: models/style_text_rec/fusion_generator
pretrain: style_text_models/fusion_generator
module_name: fusion_generator
generator_type: FusionGeneratorSimple
encode_dim: 64

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

@ -28,6 +28,7 @@ class StyleTextRecPredictor(object):
], "Generator {} not supported.".format(algorithm)
use_gpu = config["Global"]['use_gpu']
check_gpu(use_gpu)
paddle.set_device('gpu' if use_gpu else 'cpu')
self.logger = get_logger()
self.generator = getattr(style_text_rec, algorithm)(config)
self.height = config["Global"]["image_height"]

@ -1,2 +1,2 @@
PaddleOCR
Paddle
飞桨文字识别

@ -11,6 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from engine.synthesisers import DatasetSynthesiser

@ -16,13 +16,13 @@ import cv2
import sys
import glob
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
from utils.config import ArgsParser
from engine.synthesisers import ImageSynthesiser
def synth_image():
args = ArgsParser().parse_args()

@ -60,7 +60,8 @@ Metric:
Train:
dataset:
name: SimpleDataSet
label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
data_dir: ./train_data/
label_file_list: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
data_ratio_list: [0.5, 0.5]
transforms:
- DecodeImage: # load image

@ -11,7 +11,7 @@ max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 2.0
det_model_dir ./inference/ch__ppocr_mobile_v2.0_det_infer/
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config
use_angle_cls 0

@ -9,9 +9,9 @@
### 1.文本检测算法
PaddleOCR开源的文本检测算法列表
- [x] DB([paper]( https://arxiv.org/abs/1911.08947) )ppocr推荐
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))
- [x] DB([paper]( https://arxiv.org/abs/1911.08947)) [2]ppocr推荐
- [x] EAST([paper](https://arxiv.org/abs/1704.03155))[1]
- [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
在ICDAR2015文本检测公开数据集上算法效果如下
@ -38,13 +38,13 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
### 2.文本识别算法
PaddleOCR基于动态图开源的文本识别算法列表
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717) )ppocr推荐
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1)) coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294)) coming soon
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]ppocr推荐
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
参考[DTRB](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程使用MJSynth和SynthText两个文字识别数据集训练在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估算法效果如下
参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程使用MJSynth和SynthText两个文字识别数据集训练在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估算法效果如下
|模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
|-|-|-|-|-|

@ -117,7 +117,7 @@ python3 tools/eval.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/
```
# 预测分类结果
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
预测图片:

@ -120,16 +120,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
测试单张图像的检测效果
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```
测试DB模型时调整后处理阈值
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
```
测试文件夹下所有图像的检测效果
```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy"
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
```

@ -245,7 +245,10 @@ python3 tools/infer/predict_det.py --det_algorithm="SAST" --image_dir="./doc/img
超轻量中文识别模型推理,可以执行如下命令:
```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/"
# 下载超轻量中文识别模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
```
![](../imgs_words/ch/word_4.jpg)
@ -266,7 +269,6 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153)
```
python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model=./rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/rec_crnn
```
CRNN 文本识别模型推理,可以执行如下命令:
@ -327,7 +329,10 @@ Predicts of ./doc/imgs_words/korean/1.jpg:('바탕으로', 0.9948904)
方向分类模型推理,可以执行如下命令:
```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="./inference/cls/"
# 下载超轻量中文方向分类器模型:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
```
![](../imgs_words/ch/word_1.jpg)

@ -324,7 +324,6 @@ Eval:
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
*注意* 评估时必须确保配置文件中 infer_img 字段为空
```
# GPU 评估, Global.checkpoints 为待测权重
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
```
# 预测英文结果
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
```
预测图片:
@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
```
# 预测中文结果
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
```
预测图片:

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save