commit
229265e6af
@ -0,0 +1,50 @@
|
|||||||
|
Global:
|
||||||
|
algorithm: SAST
|
||||||
|
use_gpu: true
|
||||||
|
epoch_num: 2000
|
||||||
|
log_smooth_window: 20
|
||||||
|
print_batch_step: 2
|
||||||
|
save_model_dir: ./output/det_sast/
|
||||||
|
save_epoch_step: 20
|
||||||
|
eval_batch_step: 5000
|
||||||
|
train_batch_size_per_card: 8
|
||||||
|
test_batch_size_per_card: 8
|
||||||
|
image_shape: [3, 512, 512]
|
||||||
|
reader_yml: ./configs/det/det_sast_icdar15_reader.yml
|
||||||
|
pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||||
|
save_res_path: ./output/det_sast/predicts_sast.txt
|
||||||
|
checkpoints:
|
||||||
|
save_inference_dir:
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
function: ppocr.modeling.architectures.det_model,DetModel
|
||||||
|
|
||||||
|
Backbone:
|
||||||
|
function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
|
||||||
|
layers: 50
|
||||||
|
|
||||||
|
Head:
|
||||||
|
function: ppocr.modeling.heads.det_sast_head,SASTHead
|
||||||
|
model_name: large
|
||||||
|
only_fpn_up: False
|
||||||
|
# with_cab: False
|
||||||
|
with_cab: True
|
||||||
|
|
||||||
|
Loss:
|
||||||
|
function: ppocr.modeling.losses.det_sast_loss,SASTLoss
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
function: ppocr.optimizer,RMSProp
|
||||||
|
base_lr: 0.001
|
||||||
|
decay:
|
||||||
|
function: piecewise_decay
|
||||||
|
boundaries: [30000, 50000, 80000, 100000, 150000]
|
||||||
|
decay_rate: 0.3
|
||||||
|
|
||||||
|
PostProcess:
|
||||||
|
function: ppocr.postprocess.sast_postprocess,SASTPostProcess
|
||||||
|
score_thresh: 0.5
|
||||||
|
sample_pts_num: 2
|
||||||
|
nms_thresh: 0.2
|
||||||
|
expand_scale: 1.0
|
||||||
|
shrink_ratio_of_width: 0.3
|
@ -0,0 +1,50 @@
|
|||||||
|
Global:
|
||||||
|
algorithm: SAST
|
||||||
|
use_gpu: true
|
||||||
|
epoch_num: 2000
|
||||||
|
log_smooth_window: 20
|
||||||
|
print_batch_step: 2
|
||||||
|
save_model_dir: ./output/det_sast/
|
||||||
|
save_epoch_step: 20
|
||||||
|
eval_batch_step: 5000
|
||||||
|
train_batch_size_per_card: 8
|
||||||
|
test_batch_size_per_card: 1
|
||||||
|
image_shape: [3, 512, 512]
|
||||||
|
reader_yml: ./configs/det/det_sast_totaltext_reader.yml
|
||||||
|
pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
|
||||||
|
save_res_path: ./output/det_sast/predicts_sast.txt
|
||||||
|
checkpoints:
|
||||||
|
save_inference_dir:
|
||||||
|
|
||||||
|
Architecture:
|
||||||
|
function: ppocr.modeling.architectures.det_model,DetModel
|
||||||
|
|
||||||
|
Backbone:
|
||||||
|
function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
|
||||||
|
layers: 50
|
||||||
|
|
||||||
|
Head:
|
||||||
|
function: ppocr.modeling.heads.det_sast_head,SASTHead
|
||||||
|
model_name: large
|
||||||
|
only_fpn_up: False
|
||||||
|
# with_cab: False
|
||||||
|
with_cab: True
|
||||||
|
|
||||||
|
Loss:
|
||||||
|
function: ppocr.modeling.losses.det_sast_loss,SASTLoss
|
||||||
|
|
||||||
|
Optimizer:
|
||||||
|
function: ppocr.optimizer,RMSProp
|
||||||
|
base_lr: 0.001
|
||||||
|
decay:
|
||||||
|
function: piecewise_decay
|
||||||
|
boundaries: [30000, 50000, 80000, 100000, 150000]
|
||||||
|
decay_rate: 0.3
|
||||||
|
|
||||||
|
PostProcess:
|
||||||
|
function: ppocr.postprocess.sast_postprocess,SASTPostProcess
|
||||||
|
score_thresh: 0.5
|
||||||
|
sample_pts_num: 6
|
||||||
|
nms_thresh: 0.2
|
||||||
|
expand_scale: 1.2
|
||||||
|
shrink_ratio_of_width: 0.2
|
@ -0,0 +1,26 @@
|
|||||||
|
TrainReader:
|
||||||
|
reader_function: ppocr.data.det.dataset_traversal,TrainReader
|
||||||
|
process_function: ppocr.data.det.sast_process,SASTProcessTrain
|
||||||
|
num_workers: 8
|
||||||
|
img_set_dir: ./train_data/
|
||||||
|
label_file_path: [./train_data/icdar13/train_label_json.txt, ./train_data/icdar15/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
|
||||||
|
data_ratio_list: [0.1, 0.45, 0.3, 0.15]
|
||||||
|
min_crop_side_ratio: 0.3
|
||||||
|
min_crop_size: 24
|
||||||
|
min_text_size: 4
|
||||||
|
max_text_size: 512
|
||||||
|
|
||||||
|
EvalReader:
|
||||||
|
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||||
|
process_function: ppocr.data.det.sast_process,SASTProcessTest
|
||||||
|
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||||
|
max_side_len: 1536
|
||||||
|
|
||||||
|
TestReader:
|
||||||
|
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||||
|
process_function: ppocr.data.det.sast_process,SASTProcessTest
|
||||||
|
infer_img:
|
||||||
|
img_set_dir: ./train_data/icdar2015/text_localization/
|
||||||
|
label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
|
||||||
|
do_eval: True
|
@ -0,0 +1,24 @@
|
|||||||
|
TrainReader:
|
||||||
|
reader_function: ppocr.data.det.dataset_traversal,TrainReader
|
||||||
|
process_function: ppocr.data.det.sast_process,SASTProcessTrain
|
||||||
|
num_workers: 8
|
||||||
|
img_set_dir: ./train_data/
|
||||||
|
label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train/train_label_json.txt]
|
||||||
|
data_ratio_list: [0.5, 0.5]
|
||||||
|
min_crop_side_ratio: 0.3
|
||||||
|
min_crop_size: 24
|
||||||
|
min_text_size: 4
|
||||||
|
max_text_size: 512
|
||||||
|
|
||||||
|
EvalReader:
|
||||||
|
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||||
|
process_function: ppocr.data.det.sast_process,SASTProcessTest
|
||||||
|
img_set_dir: ./train_data/afs/
|
||||||
|
label_file_path: ./train_data/afs/total_text/test_label_json.txt
|
||||||
|
max_side_len: 768
|
||||||
|
|
||||||
|
TestReader:
|
||||||
|
reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
|
||||||
|
process_function: ppocr.data.det.sast_process,SASTProcessTest
|
||||||
|
infer_img:
|
||||||
|
max_side_len: 768
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,228 @@
|
|||||||
|
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
#you may not use this file except in compliance with the License.
|
||||||
|
#You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
#Unless required by applicable law or agreed to in writing, software
|
||||||
|
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
#See the License for the specific language governing permissions and
|
||||||
|
#limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
from ..common_functions import conv_bn_layer, deconv_bn_layer
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
class SASTHead(object):
|
||||||
|
"""
|
||||||
|
SAST:
|
||||||
|
see arxiv: https://
|
||||||
|
args:
|
||||||
|
params(dict): the super parameters for network build
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, params):
|
||||||
|
self.model_name = params['model_name']
|
||||||
|
self.with_cab = params['with_cab']
|
||||||
|
|
||||||
|
def FPN_Up_Fusion(self, blocks):
|
||||||
|
"""
|
||||||
|
blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
|
||||||
|
1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
|
||||||
|
"""
|
||||||
|
f = [blocks['block_6'], blocks['block_5'], blocks['block_4'], blocks['block_3'], blocks['block_2']]
|
||||||
|
num_outputs = [256, 256, 192, 192, 128]
|
||||||
|
g = [None, None, None, None, None]
|
||||||
|
h = [None, None, None, None, None]
|
||||||
|
for i in range(5):
|
||||||
|
h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
|
||||||
|
filter_size=1, stride=1, act=None, name='fpn_up_h'+str(i))
|
||||||
|
|
||||||
|
for i in range(4):
|
||||||
|
if i == 0:
|
||||||
|
g[i] = deconv_bn_layer(input=h[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g0')
|
||||||
|
print("g[{}] shape: {}".format(i, g[i].shape))
|
||||||
|
else:
|
||||||
|
g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
|
||||||
|
g[i] = fluid.layers.relu(g[i])
|
||||||
|
#g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
|
||||||
|
# filter_size=1, stride=1, act='relu')
|
||||||
|
g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
|
||||||
|
filter_size=3, stride=1, act='relu', name='fpn_up_g%d_1'%i)
|
||||||
|
g[i] = deconv_bn_layer(input=g[i], num_filters=num_outputs[i + 1], act=None, name='fpn_up_g%d_2'%i)
|
||||||
|
print("g[{}] shape: {}".format(i, g[i].shape))
|
||||||
|
|
||||||
|
g[4] = fluid.layers.elementwise_add(x=g[3], y=h[4])
|
||||||
|
g[4] = fluid.layers.relu(g[4])
|
||||||
|
g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
|
||||||
|
filter_size=3, stride=1, act='relu', name='fpn_up_fusion_1')
|
||||||
|
g[4] = conv_bn_layer(input=g[4], num_filters=num_outputs[4],
|
||||||
|
filter_size=1, stride=1, act=None, name='fpn_up_fusion_2')
|
||||||
|
|
||||||
|
return g[4]
|
||||||
|
|
||||||
|
def FPN_Down_Fusion(self, blocks):
|
||||||
|
"""
|
||||||
|
blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
|
||||||
|
1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
|
||||||
|
"""
|
||||||
|
f = [blocks['block_0'], blocks['block_1'], blocks['block_2']]
|
||||||
|
num_outputs = [32, 64, 128]
|
||||||
|
g = [None, None, None]
|
||||||
|
h = [None, None, None]
|
||||||
|
for i in range(3):
|
||||||
|
h[i] = conv_bn_layer(input=f[i], num_filters=num_outputs[i],
|
||||||
|
filter_size=3, stride=1, act=None, name='fpn_down_h'+str(i))
|
||||||
|
for i in range(2):
|
||||||
|
if i == 0:
|
||||||
|
g[i] = conv_bn_layer(input=h[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g0')
|
||||||
|
else:
|
||||||
|
g[i] = fluid.layers.elementwise_add(x=g[i - 1], y=h[i])
|
||||||
|
g[i] = fluid.layers.relu(g[i])
|
||||||
|
g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i], filter_size=3, stride=1, act='relu', name='fpn_down_g%d_1'%i)
|
||||||
|
g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i+1], filter_size=3, stride=2, act=None, name='fpn_down_g%d_2'%i)
|
||||||
|
print("g[{}] shape: {}".format(i, g[i].shape))
|
||||||
|
g[2] = fluid.layers.elementwise_add(x=g[1], y=h[2])
|
||||||
|
g[2] = fluid.layers.relu(g[2])
|
||||||
|
g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
|
||||||
|
filter_size=3, stride=1, act='relu', name='fpn_down_fusion_1')
|
||||||
|
g[2] = conv_bn_layer(input=g[2], num_filters=num_outputs[2],
|
||||||
|
filter_size=1, stride=1, act=None, name='fpn_down_fusion_2')
|
||||||
|
return g[2]
|
||||||
|
|
||||||
|
def SAST_Header1(self, f_common):
|
||||||
|
"""Detector header."""
|
||||||
|
#f_score
|
||||||
|
f_score = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_score1')
|
||||||
|
f_score = conv_bn_layer(input=f_score, num_filters=64, filter_size=3, stride=1, act='relu', name='f_score2')
|
||||||
|
f_score = conv_bn_layer(input=f_score, num_filters=128, filter_size=1, stride=1, act='relu', name='f_score3')
|
||||||
|
f_score = conv_bn_layer(input=f_score, num_filters=1, filter_size=3, stride=1, name='f_score4')
|
||||||
|
f_score = fluid.layers.sigmoid(f_score)
|
||||||
|
print("f_score shape: {}".format(f_score.shape))
|
||||||
|
|
||||||
|
#f_boder
|
||||||
|
f_border = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_border1')
|
||||||
|
f_border = conv_bn_layer(input=f_border, num_filters=64, filter_size=3, stride=1, act='relu', name='f_border2')
|
||||||
|
f_border = conv_bn_layer(input=f_border, num_filters=128, filter_size=1, stride=1, act='relu', name='f_border3')
|
||||||
|
f_border = conv_bn_layer(input=f_border, num_filters=4, filter_size=3, stride=1, name='f_border4')
|
||||||
|
print("f_border shape: {}".format(f_border.shape))
|
||||||
|
|
||||||
|
return f_score, f_border
|
||||||
|
|
||||||
|
def SAST_Header2(self, f_common):
|
||||||
|
"""Detector header."""
|
||||||
|
#f_tvo
|
||||||
|
f_tvo = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tvo1')
|
||||||
|
f_tvo = conv_bn_layer(input=f_tvo, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tvo2')
|
||||||
|
f_tvo = conv_bn_layer(input=f_tvo, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tvo3')
|
||||||
|
f_tvo = conv_bn_layer(input=f_tvo, num_filters=8, filter_size=3, stride=1, name='f_tvo4')
|
||||||
|
print("f_tvo shape: {}".format(f_tvo.shape))
|
||||||
|
|
||||||
|
#f_tco
|
||||||
|
f_tco = conv_bn_layer(input=f_common, num_filters=64, filter_size=1, stride=1, act='relu', name='f_tco1')
|
||||||
|
f_tco = conv_bn_layer(input=f_tco, num_filters=64, filter_size=3, stride=1, act='relu', name='f_tco2')
|
||||||
|
f_tco = conv_bn_layer(input=f_tco, num_filters=128, filter_size=1, stride=1, act='relu', name='f_tco3')
|
||||||
|
f_tco = conv_bn_layer(input=f_tco, num_filters=2, filter_size=3, stride=1, name='f_tco4')
|
||||||
|
print("f_tco shape: {}".format(f_tco.shape))
|
||||||
|
|
||||||
|
return f_tvo, f_tco
|
||||||
|
|
||||||
|
def cross_attention(self, f_common):
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
f_shape = fluid.layers.shape(f_common)
|
||||||
|
f_theta = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_theta')
|
||||||
|
f_phi = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_phi')
|
||||||
|
f_g = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, act='relu', name='f_g')
|
||||||
|
### horizon
|
||||||
|
fh_theta = f_theta
|
||||||
|
fh_phi = f_phi
|
||||||
|
fh_g = f_g
|
||||||
|
#flatten
|
||||||
|
fh_theta = fluid.layers.transpose(fh_theta, [0, 2, 3, 1])
|
||||||
|
fh_theta = fluid.layers.reshape(fh_theta, [f_shape[0] * f_shape[2], f_shape[3], 128])
|
||||||
|
fh_phi = fluid.layers.transpose(fh_phi, [0, 2, 3, 1])
|
||||||
|
fh_phi = fluid.layers.reshape(fh_phi, [f_shape[0] * f_shape[2], f_shape[3], 128])
|
||||||
|
fh_g = fluid.layers.transpose(fh_g, [0, 2, 3, 1])
|
||||||
|
fh_g = fluid.layers.reshape(fh_g, [f_shape[0] * f_shape[2], f_shape[3], 128])
|
||||||
|
#correlation
|
||||||
|
fh_attn = fluid.layers.matmul(fh_theta, fluid.layers.transpose(fh_phi, [0, 2, 1]))
|
||||||
|
#scale
|
||||||
|
fh_attn = fh_attn / (128 ** 0.5)
|
||||||
|
fh_attn = fluid.layers.softmax(fh_attn)
|
||||||
|
#weighted sum
|
||||||
|
fh_weight = fluid.layers.matmul(fh_attn, fh_g)
|
||||||
|
fh_weight = fluid.layers.reshape(fh_weight, [f_shape[0], f_shape[2], f_shape[3], 128])
|
||||||
|
print("fh_weight: {}".format(fh_weight.shape))
|
||||||
|
fh_weight = fluid.layers.transpose(fh_weight, [0, 3, 1, 2])
|
||||||
|
fh_weight = conv_bn_layer(input=fh_weight, num_filters=128, filter_size=1, stride=1, name='fh_weight')
|
||||||
|
#short cut
|
||||||
|
fh_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fh_sc')
|
||||||
|
f_h = fluid.layers.relu(fh_weight + fh_sc)
|
||||||
|
######
|
||||||
|
#vertical
|
||||||
|
fv_theta = fluid.layers.transpose(f_theta, [0, 1, 3, 2])
|
||||||
|
fv_phi = fluid.layers.transpose(f_phi, [0, 1, 3, 2])
|
||||||
|
fv_g = fluid.layers.transpose(f_g, [0, 1, 3, 2])
|
||||||
|
#flatten
|
||||||
|
fv_theta = fluid.layers.transpose(fv_theta, [0, 2, 3, 1])
|
||||||
|
fv_theta = fluid.layers.reshape(fv_theta, [f_shape[0] * f_shape[3], f_shape[2], 128])
|
||||||
|
fv_phi = fluid.layers.transpose(fv_phi, [0, 2, 3, 1])
|
||||||
|
fv_phi = fluid.layers.reshape(fv_phi, [f_shape[0] * f_shape[3], f_shape[2], 128])
|
||||||
|
fv_g = fluid.layers.transpose(fv_g, [0, 2, 3, 1])
|
||||||
|
fv_g = fluid.layers.reshape(fv_g, [f_shape[0] * f_shape[3], f_shape[2], 128])
|
||||||
|
#correlation
|
||||||
|
fv_attn = fluid.layers.matmul(fv_theta, fluid.layers.transpose(fv_phi, [0, 2, 1]))
|
||||||
|
#scale
|
||||||
|
fv_attn = fv_attn / (128 ** 0.5)
|
||||||
|
fv_attn = fluid.layers.softmax(fv_attn)
|
||||||
|
#weighted sum
|
||||||
|
fv_weight = fluid.layers.matmul(fv_attn, fv_g)
|
||||||
|
fv_weight = fluid.layers.reshape(fv_weight, [f_shape[0], f_shape[3], f_shape[2], 128])
|
||||||
|
print("fv_weight: {}".format(fv_weight.shape))
|
||||||
|
fv_weight = fluid.layers.transpose(fv_weight, [0, 3, 2, 1])
|
||||||
|
fv_weight = conv_bn_layer(input=fv_weight, num_filters=128, filter_size=1, stride=1, name='fv_weight')
|
||||||
|
#short cut
|
||||||
|
fv_sc = conv_bn_layer(input=f_common, num_filters=128, filter_size=1, stride=1, name='fv_sc')
|
||||||
|
f_v = fluid.layers.relu(fv_weight + fv_sc)
|
||||||
|
######
|
||||||
|
f_attn = fluid.layers.concat([f_h, f_v], axis=1)
|
||||||
|
f_attn = conv_bn_layer(input=f_attn, num_filters=128, filter_size=1, stride=1, act='relu', name='f_attn')
|
||||||
|
return f_attn
|
||||||
|
|
||||||
|
def __call__(self, blocks, with_cab=False):
|
||||||
|
for k, v in blocks.items():
|
||||||
|
print(k, v.shape)
|
||||||
|
|
||||||
|
#down fpn
|
||||||
|
f_down = self.FPN_Down_Fusion(blocks)
|
||||||
|
print("f_down shape: {}".format(f_down.shape))
|
||||||
|
#up fpn
|
||||||
|
f_up = self.FPN_Up_Fusion(blocks)
|
||||||
|
print("f_up shape: {}".format(f_up.shape))
|
||||||
|
#fusion
|
||||||
|
f_common = fluid.layers.elementwise_add(x=f_down, y=f_up)
|
||||||
|
f_common = fluid.layers.relu(f_common)
|
||||||
|
print("f_common: {}".format(f_common.shape))
|
||||||
|
|
||||||
|
if self.with_cab:
|
||||||
|
print('enhence f_common with CAB.')
|
||||||
|
f_common = self.cross_attention(f_common)
|
||||||
|
|
||||||
|
f_score, f_border= self.SAST_Header1(f_common)
|
||||||
|
f_tvo, f_tco = self.SAST_Header2(f_common)
|
||||||
|
|
||||||
|
predicts = OrderedDict()
|
||||||
|
predicts['f_score'] = f_score
|
||||||
|
predicts['f_border'] = f_border
|
||||||
|
predicts['f_tvo'] = f_tvo
|
||||||
|
predicts['f_tco'] = f_tco
|
||||||
|
return predicts
|
@ -0,0 +1,115 @@
|
|||||||
|
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
#
|
||||||
|
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
#you may not use this file except in compliance with the License.
|
||||||
|
#You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
#Unless required by applicable law or agreed to in writing, software
|
||||||
|
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
#See the License for the specific language governing permissions and
|
||||||
|
#limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
|
||||||
|
|
||||||
|
class SASTLoss(object):
|
||||||
|
"""
|
||||||
|
SAST Loss function
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, params=None):
|
||||||
|
super(SASTLoss, self).__init__()
|
||||||
|
|
||||||
|
def __call__(self, predicts, labels):
|
||||||
|
"""
|
||||||
|
tcl_pos: N x 128 x 3
|
||||||
|
tcl_mask: N x 128 x 1
|
||||||
|
tcl_label: N x X list or LoDTensor
|
||||||
|
"""
|
||||||
|
|
||||||
|
f_score = predicts['f_score']
|
||||||
|
f_border = predicts['f_border']
|
||||||
|
f_tvo = predicts['f_tvo']
|
||||||
|
f_tco = predicts['f_tco']
|
||||||
|
|
||||||
|
l_score = labels['input_score']
|
||||||
|
l_border = labels['input_border']
|
||||||
|
l_mask = labels['input_mask']
|
||||||
|
l_tvo = labels['input_tvo']
|
||||||
|
l_tco = labels['input_tco']
|
||||||
|
|
||||||
|
#score_loss
|
||||||
|
intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
|
||||||
|
union = fluid.layers.reduce_sum(f_score * l_mask) + fluid.layers.reduce_sum(l_score * l_mask)
|
||||||
|
score_loss = 1.0 - 2 * intersection / (union + 1e-5)
|
||||||
|
|
||||||
|
#border loss
|
||||||
|
l_border_split, l_border_norm = fluid.layers.split(l_border, num_or_sections=[4, 1], dim=1)
|
||||||
|
f_border_split = f_border
|
||||||
|
l_border_norm_split = fluid.layers.expand(x=l_border_norm, expand_times=[1, 4, 1, 1])
|
||||||
|
l_border_score = fluid.layers.expand(x=l_score, expand_times=[1, 4, 1, 1])
|
||||||
|
l_border_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 4, 1, 1])
|
||||||
|
border_diff = l_border_split - f_border_split
|
||||||
|
abs_border_diff = fluid.layers.abs(border_diff)
|
||||||
|
border_sign = abs_border_diff < 1.0
|
||||||
|
border_sign = fluid.layers.cast(border_sign, dtype='float32')
|
||||||
|
border_sign.stop_gradient = True
|
||||||
|
border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
|
||||||
|
(abs_border_diff - 0.5) * (1.0 - border_sign)
|
||||||
|
border_out_loss = l_border_norm_split * border_in_loss
|
||||||
|
border_loss = fluid.layers.reduce_sum(border_out_loss * l_border_score * l_border_mask) / \
|
||||||
|
(fluid.layers.reduce_sum(l_border_score * l_border_mask) + 1e-5)
|
||||||
|
|
||||||
|
#tvo_loss
|
||||||
|
l_tvo_split, l_tvo_norm = fluid.layers.split(l_tvo, num_or_sections=[8, 1], dim=1)
|
||||||
|
f_tvo_split = f_tvo
|
||||||
|
l_tvo_norm_split = fluid.layers.expand(x=l_tvo_norm, expand_times=[1, 8, 1, 1])
|
||||||
|
l_tvo_score = fluid.layers.expand(x=l_score, expand_times=[1, 8, 1, 1])
|
||||||
|
l_tvo_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 8, 1, 1])
|
||||||
|
#
|
||||||
|
tvo_geo_diff = l_tvo_split - f_tvo_split
|
||||||
|
abs_tvo_geo_diff = fluid.layers.abs(tvo_geo_diff)
|
||||||
|
tvo_sign = abs_tvo_geo_diff < 1.0
|
||||||
|
tvo_sign = fluid.layers.cast(tvo_sign, dtype='float32')
|
||||||
|
tvo_sign.stop_gradient = True
|
||||||
|
tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
|
||||||
|
(abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
|
||||||
|
tvo_out_loss = l_tvo_norm_split * tvo_in_loss
|
||||||
|
tvo_loss = fluid.layers.reduce_sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
|
||||||
|
(fluid.layers.reduce_sum(l_tvo_score * l_tvo_mask) + 1e-5)
|
||||||
|
|
||||||
|
#tco_loss
|
||||||
|
l_tco_split, l_tco_norm = fluid.layers.split(l_tco, num_or_sections=[2, 1], dim=1)
|
||||||
|
f_tco_split = f_tco
|
||||||
|
l_tco_norm_split = fluid.layers.expand(x=l_tco_norm, expand_times=[1, 2, 1, 1])
|
||||||
|
l_tco_score = fluid.layers.expand(x=l_score, expand_times=[1, 2, 1, 1])
|
||||||
|
l_tco_mask = fluid.layers.expand(x=l_mask, expand_times=[1, 2, 1, 1])
|
||||||
|
#
|
||||||
|
tco_geo_diff = l_tco_split - f_tco_split
|
||||||
|
abs_tco_geo_diff = fluid.layers.abs(tco_geo_diff)
|
||||||
|
tco_sign = abs_tco_geo_diff < 1.0
|
||||||
|
tco_sign = fluid.layers.cast(tco_sign, dtype='float32')
|
||||||
|
tco_sign.stop_gradient = True
|
||||||
|
tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
|
||||||
|
(abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
|
||||||
|
tco_out_loss = l_tco_norm_split * tco_in_loss
|
||||||
|
tco_loss = fluid.layers.reduce_sum(tco_out_loss * l_tco_score * l_tco_mask) / \
|
||||||
|
(fluid.layers.reduce_sum(l_tco_score * l_tco_mask) + 1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
# total loss
|
||||||
|
tvo_lw, tco_lw = 1.5, 1.5
|
||||||
|
score_lw, border_lw = 1.0, 1.0
|
||||||
|
total_loss = score_loss * score_lw + border_loss * border_lw + \
|
||||||
|
tvo_loss * tvo_lw + tco_loss * tco_lw
|
||||||
|
|
||||||
|
losses = {'total_loss':total_loss, "score_loss":score_loss,\
|
||||||
|
"border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
|
||||||
|
return losses
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue