update config

release/1.1
tink2123 5 years ago
parent 09d8cb6d98
commit 6832ca029f

@ -17,11 +17,12 @@ Global:
average_window: 0.15
max_average_window: 15625
min_average_window: 10000
reader_yml: ./configs/rec/rec_srn_reader.yml
reader_yml: ./configs/rec/rec_benchmark_reader.yml
pretrain_weights:
checkpoints:
save_inference_dir:
infer_img:
Architecture:
function: ppocr.modeling.architectures.rec_model,RecModel

@ -118,15 +118,14 @@ class LMDBReader(object):
image_file_list = get_image_file_list(self.infer_img)
for single_img in image_file_list:
img = cv2.imread(single_img)
if img.shape[-1]==1 or len(list(img.shape))==2:
if img.shape[-1] == 1 or len(list(img.shape)) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if self.loss_type == 'srn':
norm_img = process_image_srn(
img=img,
image_shape=self.image_shape,
num_heads=self.num_heads,
max_text_length=self.max_text_length
)
max_text_length=self.max_text_length)
else:
norm_img = process_image(
img=img,
@ -135,20 +134,20 @@ class LMDBReader(object):
tps=self.use_tps,
infer_mode=True)
yield norm_img
elif self.mode == 'test':
image_file_list = get_image_file_list(self.infer_img)
for single_img in image_file_list:
img = cv2.imread(single_img)
if img.shape[-1]==1 or len(list(img.shape))==2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
norm_img = process_image(
img=img,
image_shape=self.image_shape,
char_ops=self.char_ops,
tps=self.use_tps,
infer_mode=True
)
yield norm_img
#elif self.mode == 'eval':
# image_file_list = get_image_file_list(self.infer_img)
# for single_img in image_file_list:
# img = cv2.imread(single_img)
# if img.shape[-1]==1 or len(list(img.shape))==2:
# img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
# norm_img = process_image(
# img=img,
# image_shape=self.image_shape,
# char_ops=self.char_ops,
# tps=self.use_tps,
# infer_mode=True
# )
# yield norm_img
else:
lmdb_sets = self.load_hierarchical_lmdb_dataset()
if process_id == 0:
@ -169,14 +168,15 @@ class LMDBReader(object):
img, label = sample_info
outs = []
if self.loss_type == "srn":
outs = process_image_srn(img, self.image_shape, self.num_heads,
self.max_text_length, label,
self.char_ops, self.loss_type)
outs = process_image_srn(
img, self.image_shape, self.num_heads,
self.max_text_length, label, self.char_ops,
self.loss_type)
else:
outs = process_image(img, self.image_shape, label,
self.char_ops, self.loss_type,
self.max_text_length)
outs = process_image(
img, self.image_shape, label, self.char_ops,
self.loss_type, self.max_text_length)
if outs is None:
continue
yield outs
@ -184,6 +184,7 @@ class LMDBReader(object):
if finish_read_num == len(lmdb_sets):
break
self.close_lmdb_dataset(lmdb_sets)
def batch_iter_reader():
batch_outs = []
for outs in sample_iter_reader():
@ -311,4 +312,4 @@ class SimpleReader(object):
if self.infer_img is None:
return batch_iter_reader
return sample_iter_reader
return sample_iter_reader

@ -79,17 +79,45 @@ class RecModel(object):
feed_list = [image, label_in, label_out]
labels = {'label_in': label_in, 'label_out': label_out}
elif self.loss_type == "srn":
encoder_word_pos = fluid.data(name="encoder_word_pos", shape=[-1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)), 1], dtype="int64")
gsrm_word_pos = fluid.data(name="gsrm_word_pos", shape=[-1, self.max_text_length, 1], dtype="int64")
gsrm_slf_attn_bias1 = fluid.data(name="gsrm_slf_attn_bias1", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
gsrm_slf_attn_bias2 = fluid.data(name="gsrm_slf_attn_bias2", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
lbl_weight = fluid.layers.data(name="lbl_weight", shape=[-1, 1], dtype='int64')
encoder_word_pos = fluid.data(
name="encoder_word_pos",
shape=[
-1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
1
],
dtype="int64")
gsrm_word_pos = fluid.data(
name="gsrm_word_pos",
shape=[-1, self.max_text_length, 1],
dtype="int64")
gsrm_slf_attn_bias1 = fluid.data(
name="gsrm_slf_attn_bias1",
shape=[
-1, self.num_heads, self.max_text_length,
self.max_text_length
])
gsrm_slf_attn_bias2 = fluid.data(
name="gsrm_slf_attn_bias2",
shape=[
-1, self.num_heads, self.max_text_length,
self.max_text_length
])
lbl_weight = fluid.layers.data(
name="lbl_weight", shape=[-1, 1], dtype='int64')
label = fluid.data(
name='label', shape=[-1, 1], dtype='int32', lod_level=1)
feed_list = [image, label, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight]
labels = {'label': label, 'encoder_word_pos': encoder_word_pos,
'gsrm_word_pos': gsrm_word_pos, 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,'lbl_weight':lbl_weight}
feed_list = [
image, label, encoder_word_pos, gsrm_word_pos,
gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight
]
labels = {
'label': label,
'encoder_word_pos': encoder_word_pos,
'gsrm_word_pos': gsrm_word_pos,
'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,
'lbl_weight': lbl_weight
}
else:
label = fluid.data(
name='label', shape=[None, 1], dtype='int32', lod_level=1)
@ -112,15 +140,41 @@ class RecModel(object):
"We set img_shape to be the same , it may affect the inference effect"
)
image_shape = deepcopy(self.image_shape)
image = fluid.data(name='image', shape=image_shape, dtype='float32')
image = fluid.data(name='image', shape=image_shape, dtype='float32')
if self.loss_type == "srn":
encoder_word_pos = fluid.data(name="encoder_word_pos", shape=[-1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)), 1], dtype="int64")
gsrm_word_pos = fluid.data(name="gsrm_word_pos", shape=[-1, self.max_text_length, 1], dtype="int64")
gsrm_slf_attn_bias1 = fluid.data(name="gsrm_slf_attn_bias1", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
gsrm_slf_attn_bias2 = fluid.data(name="gsrm_slf_attn_bias2", shape=[-1, self.num_heads, self.max_text_length, self.max_text_length])
feed_list = [image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2]
labels = {'encoder_word_pos': encoder_word_pos, 'gsrm_word_pos': gsrm_word_pos,
'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1, 'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2}
encoder_word_pos = fluid.data(
name="encoder_word_pos",
shape=[
-1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
1
],
dtype="int64")
gsrm_word_pos = fluid.data(
name="gsrm_word_pos",
shape=[-1, self.max_text_length, 1],
dtype="int64")
gsrm_slf_attn_bias1 = fluid.data(
name="gsrm_slf_attn_bias1",
shape=[
-1, self.num_heads, self.max_text_length,
self.max_text_length
])
gsrm_slf_attn_bias2 = fluid.data(
name="gsrm_slf_attn_bias2",
shape=[
-1, self.num_heads, self.max_text_length,
self.max_text_length
])
feed_list = [
image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
gsrm_slf_attn_bias2
]
labels = {
'encoder_word_pos': encoder_word_pos,
'gsrm_word_pos': gsrm_word_pos,
'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2
}
return image, labels, loader
def __call__(self, mode):
@ -140,8 +194,13 @@ class RecModel(object):
label = labels['label']
if self.loss_type == 'srn':
total_loss, img_loss, word_loss = self.loss(predicts, labels)
outputs = {'total_loss':total_loss, 'img_loss':img_loss, 'word_loss':word_loss,
'decoded_out':decoded_out, 'label':label}
outputs = {
'total_loss': total_loss,
'img_loss': img_loss,
'word_loss': word_loss,
'decoded_out': decoded_out,
'label': label
}
else:
outputs = {'total_loss':loss, 'decoded_out':\
decoded_out, 'label':label}
@ -156,4 +215,4 @@ class RecModel(object):
predict = predicts['predict']
if self.loss_type == "ctc":
predict = fluid.layers.softmax(predict)
return loader, {'decoded_out': decoded_out, 'predicts': predict}
return loader, {'decoded_out': decoded_out, 'predicts': predict}

File diff suppressed because it is too large Load Diff

@ -61,7 +61,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
img_list.append(data[ino][0])
label_list.append(data[ino][1])
if config['Global']['loss_type'] != "srn":
if config['Global']['loss_type'] != "srn":
img_list = np.concatenate(img_list, axis=0)
outs = exe.run(eval_info_dict['program'], \
feed={'image': img_list}, \
@ -75,7 +75,8 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
preds_lod = outs[0].lod()[0]
labels, labels_lod = convert_rec_label_to_lod(label_list)
acc, acc_num, sample_num = cal_predicts_accuracy(
char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate)
char_ops, preds, preds_lod, labels, labels_lod,
is_remove_duplicate)
else:
encoder_word_pos_list = []
gsrm_word_pos_list = []
@ -89,15 +90,19 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
img_list = np.concatenate(img_list, axis=0)
label_list = np.concatenate(label_list, axis=0)
encoder_word_pos_list = np.concatenate(encoder_word_pos_list, axis=0).astype(np.int64)
gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list, axis=0).astype(np.int64)
gsrm_slf_attn_bias1_list = np.concatenate(gsrm_slf_attn_bias1_list, axis=0).astype(np.float32)
gsrm_slf_attn_bias2_list = np.concatenate(gsrm_slf_attn_bias2_list, axis=0).astype(np.float32)
encoder_word_pos_list = np.concatenate(
encoder_word_pos_list, axis=0).astype(np.int64)
gsrm_word_pos_list = np.concatenate(
gsrm_word_pos_list, axis=0).astype(np.int64)
gsrm_slf_attn_bias1_list = np.concatenate(
gsrm_slf_attn_bias1_list, axis=0).astype(np.float32)
gsrm_slf_attn_bias2_list = np.concatenate(
gsrm_slf_attn_bias2_list, axis=0).astype(np.float32)
labels = label_list
outs = exe.run(eval_info_dict['program'], \
feed={'image': img_list, 'encoder_word_pos': encoder_word_pos_list,
feed={'image': img_list, 'encoder_word_pos': encoder_word_pos_list,
'gsrm_word_pos': gsrm_word_pos_list, 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1_list,
'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2_list}, \
fetch_list=eval_info_dict['fetch_varname_list'], \
@ -108,7 +113,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
total_acc_num += acc_num
total_sample_num += sample_num
logger.info("eval batch id: {}, acc: {}".format(total_batch_num, acc))
#logger.info("eval batch id: {}, acc: {}".format(total_batch_num, acc))
total_batch_num += 1
avg_acc = total_acc_num * 1.0 / total_sample_num
metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \

@ -34,6 +34,7 @@ from ppocr.utils.save_load import save_model
import numpy as np
from ppocr.utils.character import cal_predicts_accuracy, cal_predicts_accuracy_srn, CharacterOps
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
@ -196,10 +197,13 @@ def build(config, main_prog, startup_prog, mode):
if config['Global']["loss_type"] == 'srn':
model_average = fluid.optimizer.ModelAverage(
config['Global']['average_window'],
min_average_window=config['Global']['min_average_window'],
max_average_window=config['Global']['max_average_window'])
min_average_window=config['Global'][
'min_average_window'],
max_average_window=config['Global'][
'max_average_window'])
return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name,model_average)
return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name,
model_average)
def build_export(config, main_prog, startup_prog):
@ -398,6 +402,7 @@ def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
save_model(train_info_dict['train_program'], save_path)
return
def preprocess():
FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config)
@ -409,8 +414,8 @@ def preprocess():
check_gpu(use_gpu)
alg = config['Global']['algorithm']
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN']
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN']:
config['Global']['char_ops'] = CharacterOps(config['Global'])
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()

@ -0,0 +1 @@
/workspace/PaddleOCR/train_data/
Loading…
Cancel
Save