@ -1,59 +1,62 @@
import numpy as np
import paddle . v2 as paddle
import paddle . v2 . dataset. conll05 as conll05
import paddle . v2 . fluid as fluid
import paddle . v2 . fluid . core as core
import paddle . v2 . fluid . framework as framework
import paddle . v2 . fluid . layers as layers
from paddle . v2 . fluid . executor import Executor , g_scope
from paddle . v2 . fluid . optimizer import SGDOptimizer
import paddle . v2 . fluid as fluid
import paddle . v2 . fluid . layers as pd
from paddle . v2 . fluid . executor import Executor
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
src_dict , trg_dict = paddle . dataset . wmt14 . get_dict ( dict_size )
hidden_dim = 51 2
word_dim = 512
hidden_dim = 3 2
word_dim = 16
IS_SPARSE = True
batch_size = 5 0
batch_size = 1 0
max_length = 50
topk_size = 50
trg_dic_size = 10000
src_word_id = layers . data ( name = " src_word_id " , shape = [ 1 ] , dtype = ' int64 ' )
src_embedding = layers . embedding (
input = src_word_id ,
size = [ dict_size , word_dim ] ,
dtype = ' float32 ' ,
is_sparse = IS_SPARSE ,
param_attr = fluid . ParamAttr ( name = ' vemb ' ) )
def encoder ( ) :
lstm_hidden0 , lstm_0 = layers . dynamic_lstm (
input = src_embedding ,
size = hidden_dim ,
candidate_activation = ' sigmoid ' ,
cell_activation = ' sigmoid ' )
lstm_hidden1 , lstm_1 = layers . dynamic_lstm (
input = src_embedding ,
size = hidden_dim ,
candidate_activation = ' sigmoid ' ,
cell_activation = ' sigmoid ' ,
is_reverse = True )
bidirect_lstm_out = layers . concat ( [ lstm_hidden0 , lstm_hidden1 ] , axis = 0 )
return bidirect_lstm_out
def decoder_trainer ( context ) :
'''
decoder with trainer
'''
pass
decoder_size = hidden_dim
def encoder_decoder ( ) :
# encoder
src_word_id = layers . data (
name = " src_word_id " , shape = [ 1 ] , dtype = ' int64 ' , lod_level = 1 )
src_embedding = layers . embedding (
input = src_word_id ,
size = [ dict_size , word_dim ] ,
dtype = ' float32 ' ,
is_sparse = IS_SPARSE ,
param_attr = fluid . ParamAttr ( name = ' vemb ' ) )
fc1 = fluid . layers . fc ( input = src_embedding , size = hidden_dim * 4 , act = ' tanh ' )
lstm_hidden0 , lstm_0 = layers . dynamic_lstm ( input = fc1 , size = hidden_dim * 4 )
encoder_out = layers . sequence_pool ( input = lstm_hidden0 , pool_type = " last " )
# decoder
trg_language_word = layers . data (
name = " target_language_word " , shape = [ 1 ] , dtype = ' int64 ' , lod_level = 1 )
trg_embedding = layers . embedding (
input = trg_language_word ,
size = [ dict_size , word_dim ] ,
dtype = ' float32 ' ,
is_sparse = IS_SPARSE ,
param_attr = fluid . ParamAttr ( name = ' vemb ' ) )
rnn = fluid . layers . DynamicRNN ( )
with rnn . block ( ) :
current_word = rnn . step_input ( trg_embedding )
mem = rnn . memory ( init = encoder_out )
fc1 = fluid . layers . fc ( input = [ current_word , mem ] ,
size = decoder_size ,
act = ' tanh ' )
out = fluid . layers . fc ( input = fc1 , size = target_dict_dim , act = ' softmax ' )
rnn . update_memory ( mem , fc1 )
rnn . output ( out )
return rnn ( )
def to_lodtensor ( data , place ) :
@ -72,13 +75,18 @@ def to_lodtensor(data, place):
def main ( ) :
encoder_out = encoder ( )
# TODO(jacquesqiao) call here
decoder_trainer ( encoder_out )
rnn_out = encoder_decoder ( )
label = layers . data (
name = " target_language_next_word " , shape = [ 1 ] , dtype = ' int64 ' , lod_level = 1 )
cost = layers . cross_entropy ( input = rnn_out , label = label )
avg_cost = fluid . layers . mean ( x = cost )
optimizer = fluid . optimizer . Adagrad ( learning_rate = 1e-4 )
optimizer . minimize ( avg_cost )
train_data = paddle . batch (
paddle . reader . shuffle (
paddle . dataset . wmt14 . train ( 8000 ) , buf_size = 1000 ) ,
paddle . dataset . wmt14 . train ( dict_size ) , buf_size = 1000 ) ,
batch_size = batch_size )
place = core . CPUPlace ( )
@ -88,15 +96,23 @@ def main():
batch_id = 0
for pass_id in xrange ( 2 ) :
print ' pass_id ' , pass_id
for data in train_data ( ) :
print ' batch ' , batch_id
batch_id + = 1
if batch_id > 10 : break
word_data = to_lodtensor ( map ( lambda x : x [ 0 ] , data ) , place )
trg_word = to_lodtensor ( map ( lambda x : x [ 1 ] , data ) , place )
trg_word_next = to_lodtensor ( map ( lambda x : x [ 2 ] , data ) , place )
outs = exe . run ( framework . default_main_program ( ) ,
feed = { ' src_word_id ' : word_data , } ,
fetch_list = [ encoder_out ] )
feed = {
' src_word_id ' : word_data ,
' target_language_word ' : trg_word ,
' target_language_next_word ' : trg_word_next
} ,
fetch_list = [ avg_cost ] )
avg_cost_val = np . array ( outs [ 0 ] )
print ( ' pass_id= ' + str ( pass_id ) + ' batch= ' + str ( batch_id ) +
" avg_cost= " + str ( avg_cost_val ) )
if batch_id > 3 :
exit ( 0 )
batch_id + = 1
if __name__ == ' __main__ ' :