|
|
|
@ -18,7 +18,7 @@ import numpy as np
|
|
|
|
|
import six
|
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
|
from paddle.fluid import core
|
|
|
|
|
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm, Embedding, GRUUnit
|
|
|
|
|
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear, BatchNorm, Embedding, GRUUnit
|
|
|
|
|
from paddle.fluid.dygraph.base import to_variable
|
|
|
|
|
from test_imperative_base import new_program_scope
|
|
|
|
|
|
|
|
|
@ -27,6 +27,8 @@ class Config(object):
|
|
|
|
|
'''
|
|
|
|
|
config for training
|
|
|
|
|
'''
|
|
|
|
|
# encoder rnn hidden_size
|
|
|
|
|
encoder_size = 200
|
|
|
|
|
# decoder size for decoder stage
|
|
|
|
|
decoder_size = 128
|
|
|
|
|
# size for word embedding
|
|
|
|
@ -118,8 +120,8 @@ class ConvBNPool(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OCRConv(fluid.dygraph.Layer):
|
|
|
|
|
def __init__(self, name_scope, is_test=False, use_cudnn=True):
|
|
|
|
|
super(OCRConv, self).__init__(name_scope)
|
|
|
|
|
def __init__(self, is_test=False, use_cudnn=True):
|
|
|
|
|
super(OCRConv, self).__init__()
|
|
|
|
|
self.conv_bn_pool_1 = ConvBNPool(
|
|
|
|
|
2, [16, 16], [1, 16], is_test=is_test, use_cudnn=use_cudnn)
|
|
|
|
|
self.conv_bn_pool_2 = ConvBNPool(
|
|
|
|
@ -143,7 +145,6 @@ class OCRConv(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
class DynamicGRU(fluid.dygraph.Layer):
|
|
|
|
|
def __init__(self,
|
|
|
|
|
scope_name,
|
|
|
|
|
size,
|
|
|
|
|
param_attr=None,
|
|
|
|
|
bias_attr=None,
|
|
|
|
@ -152,7 +153,7 @@ class DynamicGRU(fluid.dygraph.Layer):
|
|
|
|
|
candidate_activation='tanh',
|
|
|
|
|
h_0=None,
|
|
|
|
|
origin_mode=False):
|
|
|
|
|
super(DynamicGRU, self).__init__(scope_name)
|
|
|
|
|
super(DynamicGRU, self).__init__()
|
|
|
|
|
|
|
|
|
|
self.gru_unit = GRUUnit(
|
|
|
|
|
size * 3,
|
|
|
|
@ -164,6 +165,7 @@ class DynamicGRU(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
self.h_0 = h_0
|
|
|
|
|
self.is_reverse = is_reverse
|
|
|
|
|
self.size = size
|
|
|
|
|
|
|
|
|
|
def forward(self, inputs):
|
|
|
|
|
hidden = self.h_0
|
|
|
|
@ -188,11 +190,10 @@ class DynamicGRU(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
class EncoderNet(fluid.dygraph.Layer):
|
|
|
|
|
def __init__(self,
|
|
|
|
|
scope_name,
|
|
|
|
|
rnn_hidden_size=200,
|
|
|
|
|
rnn_hidden_size=Config.encoder_size,
|
|
|
|
|
is_test=False,
|
|
|
|
|
use_cudnn=True):
|
|
|
|
|
super(EncoderNet, self).__init__(scope_name)
|
|
|
|
|
super(EncoderNet, self).__init__()
|
|
|
|
|
self.rnn_hidden_size = rnn_hidden_size
|
|
|
|
|
para_attr = fluid.ParamAttr(initializer=fluid.initializer.Normal(0.0,
|
|
|
|
|
0.02))
|
|
|
|
@ -207,28 +208,19 @@ class EncoderNet(fluid.dygraph.Layer):
|
|
|
|
|
shape=[Config.batch_size, rnn_hidden_size],
|
|
|
|
|
dtype='float32',
|
|
|
|
|
value=0)
|
|
|
|
|
self.ocr_convs = OCRConv(
|
|
|
|
|
self.full_name(), is_test=is_test, use_cudnn=use_cudnn)
|
|
|
|
|
|
|
|
|
|
self.fc_1_layer = FC(self.full_name(),
|
|
|
|
|
rnn_hidden_size * 3,
|
|
|
|
|
param_attr=para_attr,
|
|
|
|
|
bias_attr=False,
|
|
|
|
|
num_flatten_dims=2)
|
|
|
|
|
self.fc_2_layer = FC(self.full_name(),
|
|
|
|
|
rnn_hidden_size * 3,
|
|
|
|
|
param_attr=para_attr,
|
|
|
|
|
bias_attr=False,
|
|
|
|
|
num_flatten_dims=2)
|
|
|
|
|
self.ocr_convs = OCRConv(is_test=is_test, use_cudnn=use_cudnn)
|
|
|
|
|
|
|
|
|
|
self.fc_1_layer = Linear(
|
|
|
|
|
768, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False)
|
|
|
|
|
self.fc_2_layer = Linear(
|
|
|
|
|
768, rnn_hidden_size * 3, param_attr=para_attr, bias_attr=False)
|
|
|
|
|
self.gru_forward_layer = DynamicGRU(
|
|
|
|
|
self.full_name(),
|
|
|
|
|
size=rnn_hidden_size,
|
|
|
|
|
h_0=h_0,
|
|
|
|
|
param_attr=para_attr,
|
|
|
|
|
bias_attr=bias_attr,
|
|
|
|
|
candidate_activation='relu')
|
|
|
|
|
self.gru_backward_layer = DynamicGRU(
|
|
|
|
|
self.full_name(),
|
|
|
|
|
size=rnn_hidden_size,
|
|
|
|
|
h_0=h_0,
|
|
|
|
|
param_attr=para_attr,
|
|
|
|
@ -236,10 +228,8 @@ class EncoderNet(fluid.dygraph.Layer):
|
|
|
|
|
candidate_activation='relu',
|
|
|
|
|
is_reverse=True)
|
|
|
|
|
|
|
|
|
|
self.encoded_proj_fc = FC(self.full_name(),
|
|
|
|
|
Config.decoder_size,
|
|
|
|
|
bias_attr=False,
|
|
|
|
|
num_flatten_dims=2)
|
|
|
|
|
self.encoded_proj_fc = Linear(
|
|
|
|
|
rnn_hidden_size * 2, Config.decoder_size, bias_attr=False)
|
|
|
|
|
|
|
|
|
|
def forward(self, inputs):
|
|
|
|
|
conv_features = self.ocr_convs(inputs)
|
|
|
|
@ -272,18 +262,12 @@ class EncoderNet(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SimpleAttention(fluid.dygraph.Layer):
|
|
|
|
|
def __init__(self, scope_name, decoder_size):
|
|
|
|
|
super(SimpleAttention, self).__init__(scope_name)
|
|
|
|
|
|
|
|
|
|
self.fc_1 = FC(self.full_name(),
|
|
|
|
|
decoder_size,
|
|
|
|
|
act=None,
|
|
|
|
|
bias_attr=False)
|
|
|
|
|
self.fc_2 = FC(self.full_name(),
|
|
|
|
|
1,
|
|
|
|
|
num_flatten_dims=2,
|
|
|
|
|
act=None,
|
|
|
|
|
bias_attr=False)
|
|
|
|
|
def __init__(self, decoder_size):
|
|
|
|
|
super(SimpleAttention, self).__init__()
|
|
|
|
|
|
|
|
|
|
self.fc_1 = Linear(
|
|
|
|
|
decoder_size, decoder_size, act=None, bias_attr=False)
|
|
|
|
|
self.fc_2 = Linear(decoder_size, 1, act=None, bias_attr=False)
|
|
|
|
|
|
|
|
|
|
def forward(self, encoder_vec, encoder_proj, decoder_state):
|
|
|
|
|
|
|
|
|
@ -311,22 +295,18 @@ class SimpleAttention(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GRUDecoderWithAttention(fluid.dygraph.Layer):
|
|
|
|
|
def __init__(self, scope_name, decoder_size, num_classes):
|
|
|
|
|
super(GRUDecoderWithAttention, self).__init__(scope_name)
|
|
|
|
|
self.simple_attention = SimpleAttention(self.full_name(), decoder_size)
|
|
|
|
|
|
|
|
|
|
self.fc_1_layer = FC(self.full_name(),
|
|
|
|
|
size=decoder_size * 3,
|
|
|
|
|
bias_attr=False)
|
|
|
|
|
self.fc_2_layer = FC(self.full_name(),
|
|
|
|
|
size=decoder_size * 3,
|
|
|
|
|
bias_attr=False)
|
|
|
|
|
def __init__(self, decoder_size, num_classes):
|
|
|
|
|
super(GRUDecoderWithAttention, self).__init__()
|
|
|
|
|
self.simple_attention = SimpleAttention(decoder_size)
|
|
|
|
|
|
|
|
|
|
self.fc_1_layer = Linear(
|
|
|
|
|
Config.encoder_size * 2, decoder_size * 3, bias_attr=False)
|
|
|
|
|
self.fc_2_layer = Linear(
|
|
|
|
|
decoder_size, decoder_size * 3, bias_attr=False)
|
|
|
|
|
self.gru_unit = GRUUnit(
|
|
|
|
|
size=decoder_size * 3, param_attr=None, bias_attr=None)
|
|
|
|
|
self.out_layer = FC(self.full_name(),
|
|
|
|
|
size=num_classes + 2,
|
|
|
|
|
bias_attr=None,
|
|
|
|
|
act='softmax')
|
|
|
|
|
self.out_layer = Linear(
|
|
|
|
|
decoder_size, num_classes + 2, bias_attr=None, act='softmax')
|
|
|
|
|
|
|
|
|
|
self.decoder_size = decoder_size
|
|
|
|
|
|
|
|
|
@ -357,17 +337,18 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OCRAttention(fluid.dygraph.Layer):
|
|
|
|
|
def __init__(self, scope_name):
|
|
|
|
|
super(OCRAttention, self).__init__(scope_name)
|
|
|
|
|
self.encoder_net = EncoderNet(self.full_name())
|
|
|
|
|
self.fc = FC(self.full_name(),
|
|
|
|
|
size=Config.decoder_size,
|
|
|
|
|
bias_attr=False,
|
|
|
|
|
act='relu')
|
|
|
|
|
def __init__(self):
|
|
|
|
|
super(OCRAttention, self).__init__()
|
|
|
|
|
self.encoder_net = EncoderNet()
|
|
|
|
|
self.fc = Linear(
|
|
|
|
|
Config.encoder_size,
|
|
|
|
|
Config.decoder_size,
|
|
|
|
|
bias_attr=False,
|
|
|
|
|
act='relu')
|
|
|
|
|
self.embedding = Embedding(
|
|
|
|
|
[Config.num_classes + 2, Config.word_vector_dim], dtype='float32')
|
|
|
|
|
self.gru_decoder_with_attention = GRUDecoderWithAttention(
|
|
|
|
|
self.full_name(), Config.decoder_size, Config.num_classes)
|
|
|
|
|
Config.decoder_size, Config.num_classes)
|
|
|
|
|
|
|
|
|
|
def forward(self, inputs, label_in):
|
|
|
|
|
gru_backward, encoded_vector, encoded_proj = self.encoder_net(inputs)
|
|
|
|
@ -425,14 +406,15 @@ class TestDygraphOCRAttention(unittest.TestCase):
|
|
|
|
|
fluid.default_main_program().random_seed = seed
|
|
|
|
|
backward_strategy = fluid.dygraph.BackwardStrategy()
|
|
|
|
|
backward_strategy.sort_sum_gradient = True
|
|
|
|
|
ocr_attention = OCRAttention("ocr_attention")
|
|
|
|
|
ocr_attention = OCRAttention()
|
|
|
|
|
|
|
|
|
|
if Config.learning_rate_decay == "piecewise_decay":
|
|
|
|
|
learning_rate = fluid.layers.piecewise_decay(
|
|
|
|
|
[50000], [Config.LR, Config.LR * 0.01])
|
|
|
|
|
else:
|
|
|
|
|
learning_rate = Config.LR
|
|
|
|
|
optimizer = fluid.optimizer.SGD(learning_rate=0.001)
|
|
|
|
|
optimizer = fluid.optimizer.SGD(
|
|
|
|
|
learning_rate=0.001, parameter_list=ocr_attention.parameters())
|
|
|
|
|
dy_param_init_value = {}
|
|
|
|
|
for param in ocr_attention.parameters():
|
|
|
|
|
dy_param_init_value[param.name] = param.numpy()
|
|
|
|
@ -478,7 +460,7 @@ class TestDygraphOCRAttention(unittest.TestCase):
|
|
|
|
|
# print("static start")
|
|
|
|
|
exe = fluid.Executor(fluid.CPUPlace(
|
|
|
|
|
) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
|
|
|
|
|
ocr_attention = OCRAttention("ocr_attention")
|
|
|
|
|
ocr_attention = OCRAttention()
|
|
|
|
|
|
|
|
|
|
if Config.learning_rate_decay == "piecewise_decay":
|
|
|
|
|
learning_rate = fluid.layers.piecewise_decay(
|
|
|
|
|