parent
8a5566c974
commit
021c1132a9
@ -1,8 +1,7 @@
|
|||||||
include LICENSE.txt
|
include LICENSE.txt
|
||||||
include README.md
|
include README.md
|
||||||
|
|
||||||
recursive-include ppocr/utils *.txt utility.py character.py check.py
|
recursive-include ppocr/utils *.txt utility.py logging.py
|
||||||
recursive-include ppocr/data/det *.py
|
recursive-include ppocr/data/ *.py
|
||||||
recursive-include ppocr/postprocess *.py
|
recursive-include ppocr/postprocess *.py
|
||||||
recursive-include ppocr/postprocess/lanms *.*
|
recursive-include tools/infer *.py
|
||||||
recursive-include tools/infer *.py
|
|
@ -0,0 +1,111 @@
|
|||||||
|
Global:
|
||||||
|
use_gpu: true
|
||||||
|
epoch_num: 10000
|
||||||
|
log_smooth_window: 20
|
||||||
|
print_batch_step: 2
|
||||||
|
save_model_dir: ./output/east_mv3/
|
||||||
|
save_epoch_step: 1000
|
||||||
|
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||||
|
eval_batch_step: [4000, 5000]
|
||||||
|
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||||
|
load_static_weights: True
|
||||||
|
cal_metric_during_train: False
|
||||||
|
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||||
|
checkpoints:
|
||||||
|
save_inference_dir:
|
||||||
|
use_visualdl: False
|
||||||
|
infer_img:
|
||||||
|
save_res_path: ./output/det_east/predicts_east.txt
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
model_type: det
|
||||||
|
algorithm: EAST
|
||||||
|
Transform:
|
||||||
|
Backbone:
|
||||||
|
name: MobileNetV3
|
||||||
|
scale: 0.5
|
||||||
|
model_name: large
|
||||||
|
Neck:
|
||||||
|
name: EASTFPN
|
||||||
|
model_name: small
|
||||||
|
Head:
|
||||||
|
name: EASTHead
|
||||||
|
model_name: small
|
||||||
|
|
||||||
|
Loss:
|
||||||
|
name: EASTLoss
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: Adam
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
lr:
|
||||||
|
# name: Cosine
|
||||||
|
learning_rate: 0.001
|
||||||
|
# warmup_epoch: 0
|
||||||
|
regularizer:
|
||||||
|
name: 'L2'
|
||||||
|
factor: 0
|
||||||
|
|
||||||
|
PostProcess:
|
||||||
|
name: EASTPostProcess
|
||||||
|
score_thresh: 0.8
|
||||||
|
cover_thresh: 0.1
|
||||||
|
nms_thresh: 0.2
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
name: DetMetric
|
||||||
|
main_indicator: hmean
|
||||||
|
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_list:
|
||||||
|
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||||
|
ratio_list: [1.0]
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- EASTProcessTrain:
|
||||||
|
image_shape: [512, 512]
|
||||||
|
background_ratio: 0.125
|
||||||
|
min_crop_side_ratio: 0.1
|
||||||
|
min_text_size: 10
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
|
||||||
|
loader:
|
||||||
|
shuffle: True
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 16
|
||||||
|
num_workers: 8
|
||||||
|
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_list:
|
||||||
|
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- DetResizeForTest:
|
||||||
|
limit_side_len: 2400
|
||||||
|
limit_type: max
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1./255.
|
||||||
|
mean: [0.485, 0.456, 0.406]
|
||||||
|
std: [0.229, 0.224, 0.225]
|
||||||
|
order: 'hwc'
|
||||||
|
- ToCHWImage:
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||||
|
loader:
|
||||||
|
shuffle: False
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 1 # must be 1
|
||||||
|
num_workers: 2
|
@ -0,0 +1,110 @@
|
|||||||
|
Global:
|
||||||
|
use_gpu: true
|
||||||
|
epoch_num: 10000
|
||||||
|
log_smooth_window: 20
|
||||||
|
print_batch_step: 2
|
||||||
|
save_model_dir: ./output/east_r50_vd/
|
||||||
|
save_epoch_step: 1000
|
||||||
|
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||||
|
eval_batch_step: [4000, 5000]
|
||||||
|
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||||
|
load_static_weights: True
|
||||||
|
cal_metric_during_train: False
|
||||||
|
pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
|
||||||
|
checkpoints:
|
||||||
|
save_inference_dir:
|
||||||
|
use_visualdl: False
|
||||||
|
infer_img:
|
||||||
|
save_res_path: ./output/det_east/predicts_east.txt
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
model_type: det
|
||||||
|
algorithm: EAST
|
||||||
|
Transform:
|
||||||
|
Backbone:
|
||||||
|
name: ResNet
|
||||||
|
layers: 50
|
||||||
|
Neck:
|
||||||
|
name: EASTFPN
|
||||||
|
model_name: large
|
||||||
|
Head:
|
||||||
|
name: EASTHead
|
||||||
|
model_name: large
|
||||||
|
|
||||||
|
Loss:
|
||||||
|
name: EASTLoss
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: Adam
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
lr:
|
||||||
|
# name: Cosine
|
||||||
|
learning_rate: 0.001
|
||||||
|
# warmup_epoch: 0
|
||||||
|
regularizer:
|
||||||
|
name: 'L2'
|
||||||
|
factor: 0
|
||||||
|
|
||||||
|
PostProcess:
|
||||||
|
name: EASTPostProcess
|
||||||
|
score_thresh: 0.8
|
||||||
|
cover_thresh: 0.1
|
||||||
|
nms_thresh: 0.2
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
name: DetMetric
|
||||||
|
main_indicator: hmean
|
||||||
|
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_list:
|
||||||
|
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||||
|
ratio_list: [1.0]
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- EASTProcessTrain:
|
||||||
|
image_shape: [512, 512]
|
||||||
|
background_ratio: 0.125
|
||||||
|
min_crop_side_ratio: 0.1
|
||||||
|
min_text_size: 10
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
|
||||||
|
loader:
|
||||||
|
shuffle: True
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 8
|
||||||
|
num_workers: 8
|
||||||
|
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_list:
|
||||||
|
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- DetResizeForTest:
|
||||||
|
limit_side_len: 2400
|
||||||
|
limit_type: max
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1./255.
|
||||||
|
mean: [0.485, 0.456, 0.406]
|
||||||
|
std: [0.229, 0.224, 0.225]
|
||||||
|
order: 'hwc'
|
||||||
|
- ToCHWImage:
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||||
|
loader:
|
||||||
|
shuffle: False
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 1 # must be 1
|
||||||
|
num_workers: 2
|
@ -0,0 +1,110 @@
|
|||||||
|
Global:
|
||||||
|
use_gpu: true
|
||||||
|
epoch_num: 5000
|
||||||
|
log_smooth_window: 20
|
||||||
|
print_batch_step: 2
|
||||||
|
save_model_dir: ./output/sast_r50_vd_ic15/
|
||||||
|
save_epoch_step: 1000
|
||||||
|
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||||
|
eval_batch_step: [4000, 5000]
|
||||||
|
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||||
|
load_static_weights: True
|
||||||
|
cal_metric_during_train: False
|
||||||
|
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||||
|
checkpoints:
|
||||||
|
save_inference_dir:
|
||||||
|
use_visualdl: False
|
||||||
|
infer_img:
|
||||||
|
save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
model_type: det
|
||||||
|
algorithm: SAST
|
||||||
|
Transform:
|
||||||
|
Backbone:
|
||||||
|
name: ResNet_SAST
|
||||||
|
layers: 50
|
||||||
|
Neck:
|
||||||
|
name: SASTFPN
|
||||||
|
with_cab: True
|
||||||
|
Head:
|
||||||
|
name: SASTHead
|
||||||
|
|
||||||
|
Loss:
|
||||||
|
name: SASTLoss
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: Adam
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
lr:
|
||||||
|
# name: Cosine
|
||||||
|
learning_rate: 0.001
|
||||||
|
# warmup_epoch: 0
|
||||||
|
regularizer:
|
||||||
|
name: 'L2'
|
||||||
|
factor: 0
|
||||||
|
|
||||||
|
PostProcess:
|
||||||
|
name: SASTPostProcess
|
||||||
|
score_thresh: 0.5
|
||||||
|
sample_pts_num: 2
|
||||||
|
nms_thresh: 0.2
|
||||||
|
expand_scale: 1.0
|
||||||
|
shrink_ratio_of_width: 0.3
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
name: DetMetric
|
||||||
|
main_indicator: hmean
|
||||||
|
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/
|
||||||
|
label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
|
||||||
|
data_ratio_list: [0.5, 0.5]
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- SASTProcessTrain:
|
||||||
|
image_shape: [512, 512]
|
||||||
|
min_crop_side_ratio: 0.3
|
||||||
|
min_crop_size: 24
|
||||||
|
min_text_size: 4
|
||||||
|
max_text_size: 512
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
|
||||||
|
loader:
|
||||||
|
shuffle: True
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 4
|
||||||
|
num_workers: 4
|
||||||
|
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_list:
|
||||||
|
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- DetResizeForTest:
|
||||||
|
resize_long: 1536
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1./255.
|
||||||
|
mean: [0.485, 0.456, 0.406]
|
||||||
|
std: [0.229, 0.224, 0.225]
|
||||||
|
order: 'hwc'
|
||||||
|
- ToCHWImage:
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||||
|
loader:
|
||||||
|
shuffle: False
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 1 # must be 1
|
||||||
|
num_workers: 2
|
@ -0,0 +1,109 @@
|
|||||||
|
Global:
|
||||||
|
use_gpu: true
|
||||||
|
epoch_num: 5000
|
||||||
|
log_smooth_window: 20
|
||||||
|
print_batch_step: 2
|
||||||
|
save_model_dir: ./output/sast_r50_vd_tt/
|
||||||
|
save_epoch_step: 1000
|
||||||
|
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||||
|
eval_batch_step: [4000, 5000]
|
||||||
|
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||||
|
load_static_weights: True
|
||||||
|
cal_metric_during_train: False
|
||||||
|
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||||
|
checkpoints:
|
||||||
|
save_inference_dir:
|
||||||
|
use_visualdl: False
|
||||||
|
infer_img:
|
||||||
|
save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
model_type: det
|
||||||
|
algorithm: SAST
|
||||||
|
Transform:
|
||||||
|
Backbone:
|
||||||
|
name: ResNet_SAST
|
||||||
|
layers: 50
|
||||||
|
Neck:
|
||||||
|
name: SASTFPN
|
||||||
|
with_cab: True
|
||||||
|
Head:
|
||||||
|
name: SASTHead
|
||||||
|
|
||||||
|
Loss:
|
||||||
|
name: SASTLoss
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
name: Adam
|
||||||
|
beta1: 0.9
|
||||||
|
beta2: 0.999
|
||||||
|
lr:
|
||||||
|
# name: Cosine
|
||||||
|
learning_rate: 0.001
|
||||||
|
# warmup_epoch: 0
|
||||||
|
regularizer:
|
||||||
|
name: 'L2'
|
||||||
|
factor: 0
|
||||||
|
|
||||||
|
PostProcess:
|
||||||
|
name: SASTPostProcess
|
||||||
|
score_thresh: 0.5
|
||||||
|
sample_pts_num: 6
|
||||||
|
nms_thresh: 0.2
|
||||||
|
expand_scale: 1.2
|
||||||
|
shrink_ratio_of_width: 0.2
|
||||||
|
|
||||||
|
Metric:
|
||||||
|
name: DetMetric
|
||||||
|
main_indicator: hmean
|
||||||
|
|
||||||
|
Train:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
|
||||||
|
ratio_list: [0.1, 0.45, 0.3, 0.15]
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- SASTProcessTrain:
|
||||||
|
image_shape: [512, 512]
|
||||||
|
min_crop_side_ratio: 0.3
|
||||||
|
min_crop_size: 24
|
||||||
|
min_text_size: 4
|
||||||
|
max_text_size: 512
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
|
||||||
|
loader:
|
||||||
|
shuffle: True
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 4
|
||||||
|
num_workers: 4
|
||||||
|
|
||||||
|
Eval:
|
||||||
|
dataset:
|
||||||
|
name: SimpleDataSet
|
||||||
|
data_dir: ./train_data/
|
||||||
|
label_file_list:
|
||||||
|
- ./train_data/total_text_icdar_14pt/test_label_json.txt
|
||||||
|
transforms:
|
||||||
|
- DecodeImage: # load image
|
||||||
|
img_mode: BGR
|
||||||
|
channel_first: False
|
||||||
|
- DetLabelEncode: # Class handling label
|
||||||
|
- DetResizeForTest:
|
||||||
|
resize_long: 768
|
||||||
|
- NormalizeImage:
|
||||||
|
scale: 1./255.
|
||||||
|
mean: [0.485, 0.456, 0.406]
|
||||||
|
std: [0.229, 0.224, 0.225]
|
||||||
|
order: 'hwc'
|
||||||
|
- ToCHWImage:
|
||||||
|
- KeepKeys:
|
||||||
|
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||||
|
loader:
|
||||||
|
shuffle: False
|
||||||
|
drop_last: False
|
||||||
|
batch_size_per_card: 1 # must be 1
|
||||||
|
num_workers: 2
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,63 @@
|
|||||||
|
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
from paddle import nn
|
||||||
|
from .det_basic_loss import DiceLoss
|
||||||
|
|
||||||
|
|
||||||
|
class EASTLoss(nn.Layer):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
eps=1e-6,
|
||||||
|
**kwargs):
|
||||||
|
super(EASTLoss, self).__init__()
|
||||||
|
self.dice_loss = DiceLoss(eps=eps)
|
||||||
|
|
||||||
|
def forward(self, predicts, labels):
|
||||||
|
l_score, l_geo, l_mask = labels[1:]
|
||||||
|
f_score = predicts['f_score']
|
||||||
|
f_geo = predicts['f_geo']
|
||||||
|
|
||||||
|
dice_loss = self.dice_loss(f_score, l_score, l_mask)
|
||||||
|
|
||||||
|
#smoooth_l1_loss
|
||||||
|
channels = 8
|
||||||
|
l_geo_split = paddle.split(
|
||||||
|
l_geo, num_or_sections=channels + 1, axis=1)
|
||||||
|
f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
|
||||||
|
smooth_l1 = 0
|
||||||
|
for i in range(0, channels):
|
||||||
|
geo_diff = l_geo_split[i] - f_geo_split[i]
|
||||||
|
abs_geo_diff = paddle.abs(geo_diff)
|
||||||
|
smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
|
||||||
|
smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
|
||||||
|
in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
|
||||||
|
(abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
|
||||||
|
out_loss = l_geo_split[-1] / channels * in_loss * l_score
|
||||||
|
smooth_l1 += out_loss
|
||||||
|
smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
|
||||||
|
|
||||||
|
dice_loss = dice_loss * 0.01
|
||||||
|
total_loss = dice_loss + smooth_l1_loss
|
||||||
|
losses = {"loss":total_loss, \
|
||||||
|
"dice_loss":dice_loss,\
|
||||||
|
"smooth_l1_loss":smooth_l1_loss}
|
||||||
|
return losses
|
@ -0,0 +1,121 @@
|
|||||||
|
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import paddle
|
||||||
|
from paddle import nn
|
||||||
|
from .det_basic_loss import DiceLoss
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class SASTLoss(nn.Layer):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
eps=1e-6,
|
||||||
|
**kwargs):
|
||||||
|
super(SASTLoss, self).__init__()
|
||||||
|
self.dice_loss = DiceLoss(eps=eps)
|
||||||
|
|
||||||
|
def forward(self, predicts, labels):
|
||||||
|
"""
|
||||||
|
tcl_pos: N x 128 x 3
|
||||||
|
tcl_mask: N x 128 x 1
|
||||||
|
tcl_label: N x X list or LoDTensor
|
||||||
|
"""
|
||||||
|
|
||||||
|
f_score = predicts['f_score']
|
||||||
|
f_border = predicts['f_border']
|
||||||
|
f_tvo = predicts['f_tvo']
|
||||||
|
f_tco = predicts['f_tco']
|
||||||
|
|
||||||
|
l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
|
||||||
|
|
||||||
|
#score_loss
|
||||||
|
intersection = paddle.sum(f_score * l_score * l_mask)
|
||||||
|
union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
|
||||||
|
score_loss = 1.0 - 2 * intersection / (union + 1e-5)
|
||||||
|
|
||||||
|
#border loss
|
||||||
|
l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
|
||||||
|
f_border_split = f_border
|
||||||
|
border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
|
||||||
|
l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
|
||||||
|
l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
|
||||||
|
l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
|
||||||
|
|
||||||
|
border_diff = l_border_split - f_border_split
|
||||||
|
abs_border_diff = paddle.abs(border_diff)
|
||||||
|
border_sign = abs_border_diff < 1.0
|
||||||
|
border_sign = paddle.cast(border_sign, dtype='float32')
|
||||||
|
border_sign.stop_gradient = True
|
||||||
|
border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
|
||||||
|
(abs_border_diff - 0.5) * (1.0 - border_sign)
|
||||||
|
border_out_loss = l_border_norm_split * border_in_loss
|
||||||
|
border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
|
||||||
|
(paddle.sum(l_border_score * l_border_mask) + 1e-5)
|
||||||
|
|
||||||
|
#tvo_loss
|
||||||
|
l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
|
||||||
|
f_tvo_split = f_tvo
|
||||||
|
tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
|
||||||
|
l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
|
||||||
|
l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
|
||||||
|
l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
|
||||||
|
#
|
||||||
|
tvo_geo_diff = l_tvo_split - f_tvo_split
|
||||||
|
abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
|
||||||
|
tvo_sign = abs_tvo_geo_diff < 1.0
|
||||||
|
tvo_sign = paddle.cast(tvo_sign, dtype='float32')
|
||||||
|
tvo_sign.stop_gradient = True
|
||||||
|
tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
|
||||||
|
(abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
|
||||||
|
tvo_out_loss = l_tvo_norm_split * tvo_in_loss
|
||||||
|
tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
|
||||||
|
(paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
|
||||||
|
|
||||||
|
#tco_loss
|
||||||
|
l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
|
||||||
|
f_tco_split = f_tco
|
||||||
|
tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
|
||||||
|
l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
|
||||||
|
l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
|
||||||
|
l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
|
||||||
|
|
||||||
|
tco_geo_diff = l_tco_split - f_tco_split
|
||||||
|
abs_tco_geo_diff = paddle.abs(tco_geo_diff)
|
||||||
|
tco_sign = abs_tco_geo_diff < 1.0
|
||||||
|
tco_sign = paddle.cast(tco_sign, dtype='float32')
|
||||||
|
tco_sign.stop_gradient = True
|
||||||
|
tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
|
||||||
|
(abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
|
||||||
|
tco_out_loss = l_tco_norm_split * tco_in_loss
|
||||||
|
tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
|
||||||
|
(paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
# total loss
|
||||||
|
tvo_lw, tco_lw = 1.5, 1.5
|
||||||
|
score_lw, border_lw = 1.0, 1.0
|
||||||
|
total_loss = score_loss * score_lw + border_loss * border_lw + \
|
||||||
|
tvo_loss * tvo_lw + tco_loss * tco_lw
|
||||||
|
|
||||||
|
losses = {'loss':total_loss, "score_loss":score_loss,\
|
||||||
|
"border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
|
||||||
|
return losses
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,121 @@
|
|||||||
|
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import math
|
||||||
|
import paddle
|
||||||
|
from paddle import nn
|
||||||
|
import paddle.nn.functional as F
|
||||||
|
from paddle import ParamAttr
|
||||||
|
|
||||||
|
|
||||||
|
class ConvBNLayer(nn.Layer):
|
||||||
|
def __init__(self,
|
||||||
|
in_channels,
|
||||||
|
out_channels,
|
||||||
|
kernel_size,
|
||||||
|
stride,
|
||||||
|
padding,
|
||||||
|
groups=1,
|
||||||
|
if_act=True,
|
||||||
|
act=None,
|
||||||
|
name=None):
|
||||||
|
super(ConvBNLayer, self).__init__()
|
||||||
|
self.if_act = if_act
|
||||||
|
self.act = act
|
||||||
|
self.conv = nn.Conv2D(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=out_channels,
|
||||||
|
kernel_size=kernel_size,
|
||||||
|
stride=stride,
|
||||||
|
padding=padding,
|
||||||
|
groups=groups,
|
||||||
|
weight_attr=ParamAttr(name=name + '_weights'),
|
||||||
|
bias_attr=False)
|
||||||
|
|
||||||
|
self.bn = nn.BatchNorm(
|
||||||
|
num_channels=out_channels,
|
||||||
|
act=act,
|
||||||
|
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||||
|
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||||
|
moving_mean_name="bn_" + name + "_mean",
|
||||||
|
moving_variance_name="bn_" + name + "_variance")
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
x = self.bn(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class EASTHead(nn.Layer):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
def __init__(self, in_channels, model_name, **kwargs):
|
||||||
|
super(EASTHead, self).__init__()
|
||||||
|
self.model_name = model_name
|
||||||
|
if self.model_name == "large":
|
||||||
|
num_outputs = [128, 64, 1, 8]
|
||||||
|
else:
|
||||||
|
num_outputs = [64, 32, 1, 8]
|
||||||
|
|
||||||
|
self.det_conv1 = ConvBNLayer(
|
||||||
|
in_channels=in_channels,
|
||||||
|
out_channels=num_outputs[0],
|
||||||
|
kernel_size=3,
|
||||||
|
stride=1,
|
||||||
|
padding=1,
|
||||||
|
if_act=True,
|
||||||
|
act='relu',
|
||||||
|
name="det_head1")
|
||||||
|
self.det_conv2 = ConvBNLayer(
|
||||||
|
in_channels=num_outputs[0],
|
||||||
|
out_channels=num_outputs[1],
|
||||||
|
kernel_size=3,
|
||||||
|
stride=1,
|
||||||
|
padding=1,
|
||||||
|
if_act=True,
|
||||||
|
act='relu',
|
||||||
|
name="det_head2")
|
||||||
|
self.score_conv = ConvBNLayer(
|
||||||
|
in_channels=num_outputs[1],
|
||||||
|
out_channels=num_outputs[2],
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
if_act=False,
|
||||||
|
act=None,
|
||||||
|
name="f_score")
|
||||||
|
self.geo_conv = ConvBNLayer(
|
||||||
|
in_channels=num_outputs[1],
|
||||||
|
out_channels=num_outputs[3],
|
||||||
|
kernel_size=1,
|
||||||
|
stride=1,
|
||||||
|
padding=0,
|
||||||
|
if_act=False,
|
||||||
|
act=None,
|
||||||
|
name="f_geo")
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
f_det = self.det_conv1(x)
|
||||||
|
f_det = self.det_conv2(f_det)
|
||||||
|
f_score = self.score_conv(f_det)
|
||||||
|
f_score = F.sigmoid(f_score)
|
||||||
|
f_geo = self.geo_conv(f_det)
|
||||||
|
f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800
|
||||||
|
|
||||||
|
pred = {'f_score': f_score, 'f_geo': f_geo}
|
||||||
|
return pred
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue