commit
bd7f8f72cc
@ -0,0 +1,111 @@
|
||||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 10000
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/east_mv3/
|
||||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
save_res_path: ./output/det_east/predicts_east.txt
|
||||
|
||||
Architecture:
|
||||
model_type: det
|
||||
algorithm: EAST
|
||||
Transform:
|
||||
Backbone:
|
||||
name: MobileNetV3
|
||||
scale: 0.5
|
||||
model_name: large
|
||||
Neck:
|
||||
name: EASTFPN
|
||||
model_name: small
|
||||
Head:
|
||||
name: EASTHead
|
||||
model_name: small
|
||||
|
||||
Loss:
|
||||
name: EASTLoss
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
# name: Cosine
|
||||
learning_rate: 0.001
|
||||
# warmup_epoch: 0
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: EASTPostProcess
|
||||
score_thresh: 0.8
|
||||
cover_thresh: 0.1
|
||||
nms_thresh: 0.2
|
||||
|
||||
Metric:
|
||||
name: DetMetric
|
||||
main_indicator: hmean
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- EASTProcessTrain:
|
||||
image_shape: [512, 512]
|
||||
background_ratio: 0.125
|
||||
min_crop_side_ratio: 0.1
|
||||
min_text_size: 10
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 16
|
||||
num_workers: 8
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
limit_side_len: 2400
|
||||
limit_type: max
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
@ -0,0 +1,110 @@
|
||||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 10000
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/east_r50_vd/
|
||||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
save_res_path: ./output/det_east/predicts_east.txt
|
||||
|
||||
Architecture:
|
||||
model_type: det
|
||||
algorithm: EAST
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet
|
||||
layers: 50
|
||||
Neck:
|
||||
name: EASTFPN
|
||||
model_name: large
|
||||
Head:
|
||||
name: EASTHead
|
||||
model_name: large
|
||||
|
||||
Loss:
|
||||
name: EASTLoss
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
# name: Cosine
|
||||
learning_rate: 0.001
|
||||
# warmup_epoch: 0
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: EASTPostProcess
|
||||
score_thresh: 0.8
|
||||
cover_thresh: 0.1
|
||||
nms_thresh: 0.2
|
||||
|
||||
Metric:
|
||||
name: DetMetric
|
||||
main_indicator: hmean
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
|
||||
ratio_list: [1.0]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- EASTProcessTrain:
|
||||
image_shape: [512, 512]
|
||||
background_ratio: 0.125
|
||||
min_crop_side_ratio: 0.1
|
||||
min_text_size: 10
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 8
|
||||
num_workers: 8
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
limit_side_len: 2400
|
||||
limit_type: max
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
@ -0,0 +1,110 @@
|
||||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 5000
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/sast_r50_vd_ic15/
|
||||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
|
||||
|
||||
Architecture:
|
||||
model_type: det
|
||||
algorithm: SAST
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet_SAST
|
||||
layers: 50
|
||||
Neck:
|
||||
name: SASTFPN
|
||||
with_cab: True
|
||||
Head:
|
||||
name: SASTHead
|
||||
|
||||
Loss:
|
||||
name: SASTLoss
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
# name: Cosine
|
||||
learning_rate: 0.001
|
||||
# warmup_epoch: 0
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: SASTPostProcess
|
||||
score_thresh: 0.5
|
||||
sample_pts_num: 2
|
||||
nms_thresh: 0.2
|
||||
expand_scale: 1.0
|
||||
shrink_ratio_of_width: 0.3
|
||||
|
||||
Metric:
|
||||
name: DetMetric
|
||||
main_indicator: hmean
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
|
||||
data_ratio_list: [0.5, 0.5]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- SASTProcessTrain:
|
||||
image_shape: [512, 512]
|
||||
min_crop_side_ratio: 0.3
|
||||
min_crop_size: 24
|
||||
min_text_size: 4
|
||||
max_text_size: 512
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 4
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/icdar2015/text_localization/
|
||||
label_file_list:
|
||||
- ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
resize_long: 1536
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
@ -0,0 +1,109 @@
|
||||
Global:
|
||||
use_gpu: true
|
||||
epoch_num: 5000
|
||||
log_smooth_window: 20
|
||||
print_batch_step: 2
|
||||
save_model_dir: ./output/sast_r50_vd_tt/
|
||||
save_epoch_step: 1000
|
||||
# evaluation is run every 5000 iterations after the 4000th iteration
|
||||
eval_batch_step: [4000, 5000]
|
||||
# if pretrained_model is saved in static mode, load_static_weights must set to True
|
||||
load_static_weights: True
|
||||
cal_metric_during_train: False
|
||||
pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||
checkpoints:
|
||||
save_inference_dir:
|
||||
use_visualdl: False
|
||||
infer_img:
|
||||
save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
|
||||
|
||||
Architecture:
|
||||
model_type: det
|
||||
algorithm: SAST
|
||||
Transform:
|
||||
Backbone:
|
||||
name: ResNet_SAST
|
||||
layers: 50
|
||||
Neck:
|
||||
name: SASTFPN
|
||||
with_cab: True
|
||||
Head:
|
||||
name: SASTHead
|
||||
|
||||
Loss:
|
||||
name: SASTLoss
|
||||
|
||||
Optimizer:
|
||||
name: Adam
|
||||
beta1: 0.9
|
||||
beta2: 0.999
|
||||
lr:
|
||||
# name: Cosine
|
||||
learning_rate: 0.001
|
||||
# warmup_epoch: 0
|
||||
regularizer:
|
||||
name: 'L2'
|
||||
factor: 0
|
||||
|
||||
PostProcess:
|
||||
name: SASTPostProcess
|
||||
score_thresh: 0.5
|
||||
sample_pts_num: 6
|
||||
nms_thresh: 0.2
|
||||
expand_scale: 1.2
|
||||
shrink_ratio_of_width: 0.2
|
||||
|
||||
Metric:
|
||||
name: DetMetric
|
||||
main_indicator: hmean
|
||||
|
||||
Train:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
|
||||
ratio_list: [0.1, 0.45, 0.3, 0.15]
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- SASTProcessTrain:
|
||||
image_shape: [512, 512]
|
||||
min_crop_side_ratio: 0.3
|
||||
min_crop_size: 24
|
||||
min_text_size: 4
|
||||
max_text_size: 512
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
|
||||
loader:
|
||||
shuffle: True
|
||||
drop_last: False
|
||||
batch_size_per_card: 4
|
||||
num_workers: 4
|
||||
|
||||
Eval:
|
||||
dataset:
|
||||
name: SimpleDataSet
|
||||
data_dir: ./train_data/
|
||||
label_file_list:
|
||||
- ./train_data/total_text_icdar_14pt/test_label_json.txt
|
||||
transforms:
|
||||
- DecodeImage: # load image
|
||||
img_mode: BGR
|
||||
channel_first: False
|
||||
- DetLabelEncode: # Class handling label
|
||||
- DetResizeForTest:
|
||||
resize_long: 768
|
||||
- NormalizeImage:
|
||||
scale: 1./255.
|
||||
mean: [0.485, 0.456, 0.406]
|
||||
std: [0.229, 0.224, 0.225]
|
||||
order: 'hwc'
|
||||
- ToCHWImage:
|
||||
- KeepKeys:
|
||||
keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
|
||||
loader:
|
||||
shuffle: False
|
||||
drop_last: False
|
||||
batch_size_per_card: 1 # must be 1
|
||||
num_workers: 2
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,63 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from .det_basic_loss import DiceLoss
|
||||
|
||||
|
||||
class EASTLoss(nn.Layer):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
super(EASTLoss, self).__init__()
|
||||
self.dice_loss = DiceLoss(eps=eps)
|
||||
|
||||
def forward(self, predicts, labels):
|
||||
l_score, l_geo, l_mask = labels[1:]
|
||||
f_score = predicts['f_score']
|
||||
f_geo = predicts['f_geo']
|
||||
|
||||
dice_loss = self.dice_loss(f_score, l_score, l_mask)
|
||||
|
||||
#smoooth_l1_loss
|
||||
channels = 8
|
||||
l_geo_split = paddle.split(
|
||||
l_geo, num_or_sections=channels + 1, axis=1)
|
||||
f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
|
||||
smooth_l1 = 0
|
||||
for i in range(0, channels):
|
||||
geo_diff = l_geo_split[i] - f_geo_split[i]
|
||||
abs_geo_diff = paddle.abs(geo_diff)
|
||||
smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
|
||||
smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
|
||||
in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
|
||||
(abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
|
||||
out_loss = l_geo_split[-1] / channels * in_loss * l_score
|
||||
smooth_l1 += out_loss
|
||||
smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
|
||||
|
||||
dice_loss = dice_loss * 0.01
|
||||
total_loss = dice_loss + smooth_l1_loss
|
||||
losses = {"loss":total_loss, \
|
||||
"dice_loss":dice_loss,\
|
||||
"smooth_l1_loss":smooth_l1_loss}
|
||||
return losses
|
@ -0,0 +1,121 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
from .det_basic_loss import DiceLoss
|
||||
import paddle.fluid as fluid
|
||||
import numpy as np
|
||||
|
||||
|
||||
class SASTLoss(nn.Layer):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
eps=1e-6,
|
||||
**kwargs):
|
||||
super(SASTLoss, self).__init__()
|
||||
self.dice_loss = DiceLoss(eps=eps)
|
||||
|
||||
def forward(self, predicts, labels):
|
||||
"""
|
||||
tcl_pos: N x 128 x 3
|
||||
tcl_mask: N x 128 x 1
|
||||
tcl_label: N x X list or LoDTensor
|
||||
"""
|
||||
|
||||
f_score = predicts['f_score']
|
||||
f_border = predicts['f_border']
|
||||
f_tvo = predicts['f_tvo']
|
||||
f_tco = predicts['f_tco']
|
||||
|
||||
l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
|
||||
|
||||
#score_loss
|
||||
intersection = paddle.sum(f_score * l_score * l_mask)
|
||||
union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
|
||||
score_loss = 1.0 - 2 * intersection / (union + 1e-5)
|
||||
|
||||
#border loss
|
||||
l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
|
||||
f_border_split = f_border
|
||||
border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
|
||||
l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
|
||||
l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
|
||||
l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
|
||||
|
||||
border_diff = l_border_split - f_border_split
|
||||
abs_border_diff = paddle.abs(border_diff)
|
||||
border_sign = abs_border_diff < 1.0
|
||||
border_sign = paddle.cast(border_sign, dtype='float32')
|
||||
border_sign.stop_gradient = True
|
||||
border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
|
||||
(abs_border_diff - 0.5) * (1.0 - border_sign)
|
||||
border_out_loss = l_border_norm_split * border_in_loss
|
||||
border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
|
||||
(paddle.sum(l_border_score * l_border_mask) + 1e-5)
|
||||
|
||||
#tvo_loss
|
||||
l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
|
||||
f_tvo_split = f_tvo
|
||||
tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
|
||||
l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
|
||||
l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
|
||||
l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
|
||||
#
|
||||
tvo_geo_diff = l_tvo_split - f_tvo_split
|
||||
abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
|
||||
tvo_sign = abs_tvo_geo_diff < 1.0
|
||||
tvo_sign = paddle.cast(tvo_sign, dtype='float32')
|
||||
tvo_sign.stop_gradient = True
|
||||
tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
|
||||
(abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
|
||||
tvo_out_loss = l_tvo_norm_split * tvo_in_loss
|
||||
tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
|
||||
(paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
|
||||
|
||||
#tco_loss
|
||||
l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
|
||||
f_tco_split = f_tco
|
||||
tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
|
||||
l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
|
||||
l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
|
||||
l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
|
||||
|
||||
tco_geo_diff = l_tco_split - f_tco_split
|
||||
abs_tco_geo_diff = paddle.abs(tco_geo_diff)
|
||||
tco_sign = abs_tco_geo_diff < 1.0
|
||||
tco_sign = paddle.cast(tco_sign, dtype='float32')
|
||||
tco_sign.stop_gradient = True
|
||||
tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
|
||||
(abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
|
||||
tco_out_loss = l_tco_norm_split * tco_in_loss
|
||||
tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
|
||||
(paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
|
||||
|
||||
|
||||
# total loss
|
||||
tvo_lw, tco_lw = 1.5, 1.5
|
||||
score_lw, border_lw = 1.0, 1.0
|
||||
total_loss = score_loss * score_lw + border_loss * border_lw + \
|
||||
tvo_loss * tvo_lw + tco_loss * tco_lw
|
||||
|
||||
losses = {'loss':total_loss, "score_loss":score_loss,\
|
||||
"border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
|
||||
return losses
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,121 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class EASTHead(nn.Layer):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, in_channels, model_name, **kwargs):
|
||||
super(EASTHead, self).__init__()
|
||||
self.model_name = model_name
|
||||
if self.model_name == "large":
|
||||
num_outputs = [128, 64, 1, 8]
|
||||
else:
|
||||
num_outputs = [64, 32, 1, 8]
|
||||
|
||||
self.det_conv1 = ConvBNLayer(
|
||||
in_channels=in_channels,
|
||||
out_channels=num_outputs[0],
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="det_head1")
|
||||
self.det_conv2 = ConvBNLayer(
|
||||
in_channels=num_outputs[0],
|
||||
out_channels=num_outputs[1],
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="det_head2")
|
||||
self.score_conv = ConvBNLayer(
|
||||
in_channels=num_outputs[1],
|
||||
out_channels=num_outputs[2],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=False,
|
||||
act=None,
|
||||
name="f_score")
|
||||
self.geo_conv = ConvBNLayer(
|
||||
in_channels=num_outputs[1],
|
||||
out_channels=num_outputs[3],
|
||||
kernel_size=1,
|
||||
stride=1,
|
||||
padding=0,
|
||||
if_act=False,
|
||||
act=None,
|
||||
name="f_geo")
|
||||
|
||||
def forward(self, x):
|
||||
f_det = self.det_conv1(x)
|
||||
f_det = self.det_conv2(f_det)
|
||||
f_score = self.score_conv(f_det)
|
||||
f_score = F.sigmoid(f_score)
|
||||
f_geo = self.geo_conv(f_det)
|
||||
f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800
|
||||
|
||||
pred = {'f_score': f_score, 'f_geo': f_geo}
|
||||
return pred
|
@ -0,0 +1,128 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import math
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=(kernel_size - 1) // 2,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class SAST_Header1(nn.Layer):
|
||||
def __init__(self, in_channels, **kwargs):
|
||||
super(SAST_Header1, self).__init__()
|
||||
out_channels = [64, 64, 128]
|
||||
self.score_conv = nn.Sequential(
|
||||
ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_score1'),
|
||||
ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_score2'),
|
||||
ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_score3'),
|
||||
ConvBNLayer(out_channels[2], 1, 3, 1, act=None, name='f_score4')
|
||||
)
|
||||
self.border_conv = nn.Sequential(
|
||||
ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_border1'),
|
||||
ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_border2'),
|
||||
ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_border3'),
|
||||
ConvBNLayer(out_channels[2], 4, 3, 1, act=None, name='f_border4')
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
f_score = self.score_conv(x)
|
||||
f_score = F.sigmoid(f_score)
|
||||
f_border = self.border_conv(x)
|
||||
return f_score, f_border
|
||||
|
||||
|
||||
class SAST_Header2(nn.Layer):
|
||||
def __init__(self, in_channels, **kwargs):
|
||||
super(SAST_Header2, self).__init__()
|
||||
out_channels = [64, 64, 128]
|
||||
self.tvo_conv = nn.Sequential(
|
||||
ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tvo1'),
|
||||
ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tvo2'),
|
||||
ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tvo3'),
|
||||
ConvBNLayer(out_channels[2], 8, 3, 1, act=None, name='f_tvo4')
|
||||
)
|
||||
self.tco_conv = nn.Sequential(
|
||||
ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tco1'),
|
||||
ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tco2'),
|
||||
ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tco3'),
|
||||
ConvBNLayer(out_channels[2], 2, 3, 1, act=None, name='f_tco4')
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
f_tvo = self.tvo_conv(x)
|
||||
f_tco = self.tco_conv(x)
|
||||
return f_tvo, f_tco
|
||||
|
||||
|
||||
class SASTHead(nn.Layer):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, in_channels, **kwargs):
|
||||
super(SASTHead, self).__init__()
|
||||
|
||||
self.head1 = SAST_Header1(in_channels)
|
||||
self.head2 = SAST_Header2(in_channels)
|
||||
|
||||
def forward(self, x):
|
||||
f_score, f_border = self.head1(x)
|
||||
f_tvo, f_tco = self.head2(x)
|
||||
|
||||
predicts = {}
|
||||
predicts['f_score'] = f_score
|
||||
predicts['f_border'] = f_border
|
||||
predicts['f_tvo'] = f_tvo
|
||||
predicts['f_tco'] = f_tco
|
||||
return predicts
|
@ -0,0 +1,188 @@
|
||||
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import paddle
|
||||
from paddle import nn
|
||||
import paddle.nn.functional as F
|
||||
from paddle import ParamAttr
|
||||
|
||||
|
||||
class ConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(ConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class DeConvBNLayer(nn.Layer):
|
||||
def __init__(self,
|
||||
in_channels,
|
||||
out_channels,
|
||||
kernel_size,
|
||||
stride,
|
||||
padding,
|
||||
groups=1,
|
||||
if_act=True,
|
||||
act=None,
|
||||
name=None):
|
||||
super(DeConvBNLayer, self).__init__()
|
||||
self.if_act = if_act
|
||||
self.act = act
|
||||
self.deconv = nn.Conv2DTranspose(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
kernel_size=kernel_size,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
groups=groups,
|
||||
weight_attr=ParamAttr(name=name + '_weights'),
|
||||
bias_attr=False)
|
||||
self.bn = nn.BatchNorm(
|
||||
num_channels=out_channels,
|
||||
act=act,
|
||||
param_attr=ParamAttr(name="bn_" + name + "_scale"),
|
||||
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
|
||||
moving_mean_name="bn_" + name + "_mean",
|
||||
moving_variance_name="bn_" + name + "_variance")
|
||||
|
||||
def forward(self, x):
|
||||
x = self.deconv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class EASTFPN(nn.Layer):
|
||||
def __init__(self, in_channels, model_name, **kwargs):
|
||||
super(EASTFPN, self).__init__()
|
||||
self.model_name = model_name
|
||||
if self.model_name == "large":
|
||||
self.out_channels = 128
|
||||
else:
|
||||
self.out_channels = 64
|
||||
self.in_channels = in_channels[::-1]
|
||||
self.h1_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels+self.in_channels[1],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_h_1")
|
||||
self.h2_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels+self.in_channels[2],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_h_2")
|
||||
self.h3_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels+self.in_channels[3],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_h_3")
|
||||
self.g0_deconv = DeConvBNLayer(
|
||||
in_channels=self.in_channels[0],
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_0")
|
||||
self.g1_deconv = DeConvBNLayer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_1")
|
||||
self.g2_deconv = DeConvBNLayer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=4,
|
||||
stride=2,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_2")
|
||||
self.g3_conv = ConvBNLayer(
|
||||
in_channels=self.out_channels,
|
||||
out_channels=self.out_channels,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
if_act=True,
|
||||
act='relu',
|
||||
name="unet_g_3")
|
||||
|
||||
def forward(self, x):
|
||||
f = x[::-1]
|
||||
|
||||
h = f[0]
|
||||
g = self.g0_deconv(h)
|
||||
h = paddle.concat([g, f[1]], axis=1)
|
||||
h = self.h1_conv(h)
|
||||
g = self.g1_deconv(h)
|
||||
h = paddle.concat([g, f[2]], axis=1)
|
||||
h = self.h2_conv(h)
|
||||
g = self.g2_deconv(h)
|
||||
h = paddle.concat([g, f[3]], axis=1)
|
||||
h = self.h3_conv(h)
|
||||
g = self.g3_conv(h)
|
||||
|
||||
return g
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,141 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
from .locality_aware_nms import nms_locality
|
||||
import cv2
|
||||
|
||||
import os
|
||||
import sys
|
||||
# __dir__ = os.path.dirname(os.path.abspath(__file__))
|
||||
# sys.path.append(__dir__)
|
||||
# sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
|
||||
|
||||
|
||||
class EASTPostProcess(object):
|
||||
"""
|
||||
The post process for EAST.
|
||||
"""
|
||||
def __init__(self,
|
||||
score_thresh=0.8,
|
||||
cover_thresh=0.1,
|
||||
nms_thresh=0.2,
|
||||
**kwargs):
|
||||
|
||||
self.score_thresh = score_thresh
|
||||
self.cover_thresh = cover_thresh
|
||||
self.nms_thresh = nms_thresh
|
||||
|
||||
# c++ la-nms is faster, but only support python 3.5
|
||||
self.is_python35 = False
|
||||
if sys.version_info.major == 3 and sys.version_info.minor == 5:
|
||||
self.is_python35 = True
|
||||
|
||||
def restore_rectangle_quad(self, origin, geometry):
|
||||
"""
|
||||
Restore rectangle from quadrangle.
|
||||
"""
|
||||
# quad
|
||||
origin_concat = np.concatenate(
|
||||
(origin, origin, origin, origin), axis=1) # (n, 8)
|
||||
pred_quads = origin_concat - geometry
|
||||
pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
|
||||
return pred_quads
|
||||
|
||||
def detect(self,
|
||||
score_map,
|
||||
geo_map,
|
||||
score_thresh=0.8,
|
||||
cover_thresh=0.1,
|
||||
nms_thresh=0.2):
|
||||
"""
|
||||
restore text boxes from score map and geo map
|
||||
"""
|
||||
score_map = score_map[0]
|
||||
geo_map = np.swapaxes(geo_map, 1, 0)
|
||||
geo_map = np.swapaxes(geo_map, 1, 2)
|
||||
# filter the score map
|
||||
xy_text = np.argwhere(score_map > score_thresh)
|
||||
if len(xy_text) == 0:
|
||||
return []
|
||||
# sort the text boxes via the y axis
|
||||
xy_text = xy_text[np.argsort(xy_text[:, 0])]
|
||||
#restore quad proposals
|
||||
text_box_restored = self.restore_rectangle_quad(
|
||||
xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
|
||||
boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
|
||||
boxes[:, :8] = text_box_restored.reshape((-1, 8))
|
||||
boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
|
||||
if self.is_python35:
|
||||
import lanms
|
||||
boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
|
||||
else:
|
||||
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
|
||||
if boxes.shape[0] == 0:
|
||||
return []
|
||||
# Here we filter some low score boxes by the average score map,
|
||||
# this is different from the orginal paper.
|
||||
for i, box in enumerate(boxes):
|
||||
mask = np.zeros_like(score_map, dtype=np.uint8)
|
||||
cv2.fillPoly(mask, box[:8].reshape(
|
||||
(-1, 4, 2)).astype(np.int32) // 4, 1)
|
||||
boxes[i, 8] = cv2.mean(score_map, mask)[0]
|
||||
boxes = boxes[boxes[:, 8] > cover_thresh]
|
||||
return boxes
|
||||
|
||||
def sort_poly(self, p):
|
||||
"""
|
||||
Sort polygons.
|
||||
"""
|
||||
min_axis = np.argmin(np.sum(p, axis=1))
|
||||
p = p[[min_axis, (min_axis + 1) % 4,\
|
||||
(min_axis + 2) % 4, (min_axis + 3) % 4]]
|
||||
if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
|
||||
return p
|
||||
else:
|
||||
return p[[0, 3, 2, 1]]
|
||||
|
||||
def __call__(self, outs_dict, shape_list):
|
||||
score_list = outs_dict['f_score']
|
||||
geo_list = outs_dict['f_geo']
|
||||
img_num = len(shape_list)
|
||||
dt_boxes_list = []
|
||||
for ino in range(img_num):
|
||||
score = score_list[ino].numpy()
|
||||
geo = geo_list[ino].numpy()
|
||||
boxes = self.detect(
|
||||
score_map=score,
|
||||
geo_map=geo,
|
||||
score_thresh=self.score_thresh,
|
||||
cover_thresh=self.cover_thresh,
|
||||
nms_thresh=self.nms_thresh)
|
||||
boxes_norm = []
|
||||
if len(boxes) > 0:
|
||||
h, w = score.shape[1:]
|
||||
src_h, src_w, ratio_h, ratio_w = shape_list[ino]
|
||||
boxes = boxes[:, :8].reshape((-1, 4, 2))
|
||||
boxes[:, :, 0] /= ratio_w
|
||||
boxes[:, :, 1] /= ratio_h
|
||||
for i_box, box in enumerate(boxes):
|
||||
box = self.sort_poly(box.astype(np.int32))
|
||||
if np.linalg.norm(box[0] - box[1]) < 5 \
|
||||
or np.linalg.norm(box[3] - box[0]) < 5:
|
||||
continue
|
||||
boxes_norm.append(box)
|
||||
dt_boxes_list.append({'points': np.array(boxes_norm)})
|
||||
return dt_boxes_list
|
@ -0,0 +1,199 @@
|
||||
"""
|
||||
Locality aware nms.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
|
||||
def intersection(g, p):
|
||||
"""
|
||||
Intersection.
|
||||
"""
|
||||
g = Polygon(g[:8].reshape((4, 2)))
|
||||
p = Polygon(p[:8].reshape((4, 2)))
|
||||
g = g.buffer(0)
|
||||
p = p.buffer(0)
|
||||
if not g.is_valid or not p.is_valid:
|
||||
return 0
|
||||
inter = Polygon(g).intersection(Polygon(p)).area
|
||||
union = g.area + p.area - inter
|
||||
if union == 0:
|
||||
return 0
|
||||
else:
|
||||
return inter / union
|
||||
|
||||
|
||||
def intersection_iog(g, p):
|
||||
"""
|
||||
Intersection_iog.
|
||||
"""
|
||||
g = Polygon(g[:8].reshape((4, 2)))
|
||||
p = Polygon(p[:8].reshape((4, 2)))
|
||||
if not g.is_valid or not p.is_valid:
|
||||
return 0
|
||||
inter = Polygon(g).intersection(Polygon(p)).area
|
||||
#union = g.area + p.area - inter
|
||||
union = p.area
|
||||
if union == 0:
|
||||
print("p_area is very small")
|
||||
return 0
|
||||
else:
|
||||
return inter / union
|
||||
|
||||
|
||||
def weighted_merge(g, p):
|
||||
"""
|
||||
Weighted merge.
|
||||
"""
|
||||
g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
|
||||
g[8] = (g[8] + p[8])
|
||||
return g
|
||||
|
||||
|
||||
def standard_nms(S, thres):
|
||||
"""
|
||||
Standard nms.
|
||||
"""
|
||||
order = np.argsort(S[:, 8])[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return S[keep]
|
||||
|
||||
|
||||
def standard_nms_inds(S, thres):
|
||||
"""
|
||||
Standard nms, retun inds.
|
||||
"""
|
||||
order = np.argsort(S[:, 8])[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def nms(S, thres):
|
||||
"""
|
||||
nms.
|
||||
"""
|
||||
order = np.argsort(S[:, 8])[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thres)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
|
||||
"""
|
||||
soft_nms
|
||||
:para boxes_in, N x 9 (coords + score)
|
||||
:para threshould, eliminate cases min score(0.001)
|
||||
:para Nt_thres, iou_threshi
|
||||
:para sigma, gaussian weght
|
||||
:method, linear or gaussian
|
||||
"""
|
||||
boxes = boxes_in.copy()
|
||||
N = boxes.shape[0]
|
||||
if N is None or N < 1:
|
||||
return np.array([])
|
||||
pos, maxpos = 0, 0
|
||||
weight = 0.0
|
||||
inds = np.arange(N)
|
||||
tbox, sbox = boxes[0].copy(), boxes[0].copy()
|
||||
for i in range(N):
|
||||
maxscore = boxes[i, 8]
|
||||
maxpos = i
|
||||
tbox = boxes[i].copy()
|
||||
ti = inds[i]
|
||||
pos = i + 1
|
||||
#get max box
|
||||
while pos < N:
|
||||
if maxscore < boxes[pos, 8]:
|
||||
maxscore = boxes[pos, 8]
|
||||
maxpos = pos
|
||||
pos = pos + 1
|
||||
#add max box as a detection
|
||||
boxes[i, :] = boxes[maxpos, :]
|
||||
inds[i] = inds[maxpos]
|
||||
#swap
|
||||
boxes[maxpos, :] = tbox
|
||||
inds[maxpos] = ti
|
||||
tbox = boxes[i].copy()
|
||||
pos = i + 1
|
||||
#NMS iteration
|
||||
while pos < N:
|
||||
sbox = boxes[pos].copy()
|
||||
ts_iou_val = intersection(tbox, sbox)
|
||||
if ts_iou_val > 0:
|
||||
if method == 1:
|
||||
if ts_iou_val > Nt_thres:
|
||||
weight = 1 - ts_iou_val
|
||||
else:
|
||||
weight = 1
|
||||
elif method == 2:
|
||||
weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
|
||||
else:
|
||||
if ts_iou_val > Nt_thres:
|
||||
weight = 0
|
||||
else:
|
||||
weight = 1
|
||||
boxes[pos, 8] = weight * boxes[pos, 8]
|
||||
#if box score falls below thresold, discard the box by
|
||||
#swaping last box update N
|
||||
if boxes[pos, 8] < threshold:
|
||||
boxes[pos, :] = boxes[N - 1, :]
|
||||
inds[pos] = inds[N - 1]
|
||||
N = N - 1
|
||||
pos = pos - 1
|
||||
pos = pos + 1
|
||||
|
||||
return boxes[:N]
|
||||
|
||||
|
||||
def nms_locality(polys, thres=0.3):
|
||||
"""
|
||||
locality aware nms of EAST
|
||||
:param polys: a N*9 numpy array. first 8 coordinates, then prob
|
||||
:return: boxes after nms
|
||||
"""
|
||||
S = []
|
||||
p = None
|
||||
for g in polys:
|
||||
if p is not None and intersection(g, p) > thres:
|
||||
p = weighted_merge(g, p)
|
||||
else:
|
||||
if p is not None:
|
||||
S.append(p)
|
||||
p = g
|
||||
if p is not None:
|
||||
S.append(p)
|
||||
|
||||
if len(S) == 0:
|
||||
return np.array([])
|
||||
return standard_nms(np.array(S), thres)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 343,350,448,135,474,143,369,359
|
||||
print(
|
||||
Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
|
||||
.area)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue