|
|
|
@ -1,126 +1,40 @@
|
|
|
|
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
from os.path import join as join_path
|
|
|
|
|
import paddle.trainer_config_helpers.attrs as attrs
|
|
|
|
|
from paddle.trainer_config_helpers.poolings import MaxPooling
|
|
|
|
|
import paddle.v2.layer as layer
|
|
|
|
|
import paddle.v2.activation as activation
|
|
|
|
|
import paddle.v2.data_type as data_type
|
|
|
|
|
import paddle.v2.dataset.imdb as imdb
|
|
|
|
|
import paddle.v2 as paddle
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sequence_conv_pool(input,
|
|
|
|
|
input_size,
|
|
|
|
|
context_len,
|
|
|
|
|
hidden_size,
|
|
|
|
|
name=None,
|
|
|
|
|
context_start=None,
|
|
|
|
|
pool_type=None,
|
|
|
|
|
context_proj_layer_name=None,
|
|
|
|
|
context_proj_param_attr=False,
|
|
|
|
|
fc_layer_name=None,
|
|
|
|
|
fc_param_attr=None,
|
|
|
|
|
fc_bias_attr=None,
|
|
|
|
|
fc_act=None,
|
|
|
|
|
pool_bias_attr=None,
|
|
|
|
|
fc_attr=None,
|
|
|
|
|
context_attr=None,
|
|
|
|
|
pool_attr=None):
|
|
|
|
|
"""
|
|
|
|
|
Text convolution pooling layers helper.
|
|
|
|
|
|
|
|
|
|
Text input => Context Projection => FC Layer => Pooling => Output.
|
|
|
|
|
|
|
|
|
|
:param name: name of output layer(pooling layer name)
|
|
|
|
|
:type name: basestring
|
|
|
|
|
:param input: name of input layer
|
|
|
|
|
:type input: LayerOutput
|
|
|
|
|
:param context_len: context projection length. See
|
|
|
|
|
context_projection's document.
|
|
|
|
|
:type context_len: int
|
|
|
|
|
:param hidden_size: FC Layer size.
|
|
|
|
|
:type hidden_size: int
|
|
|
|
|
:param context_start: context projection length. See
|
|
|
|
|
context_projection's context_start.
|
|
|
|
|
:type context_start: int or None
|
|
|
|
|
:param pool_type: pooling layer type. See pooling_layer's document.
|
|
|
|
|
:type pool_type: BasePoolingType.
|
|
|
|
|
:param context_proj_layer_name: context projection layer name.
|
|
|
|
|
None if user don't care.
|
|
|
|
|
:type context_proj_layer_name: basestring
|
|
|
|
|
:param context_proj_param_attr: context projection parameter attribute.
|
|
|
|
|
None if user don't care.
|
|
|
|
|
:type context_proj_param_attr: ParameterAttribute or None.
|
|
|
|
|
:param fc_layer_name: fc layer name. None if user don't care.
|
|
|
|
|
:type fc_layer_name: basestring
|
|
|
|
|
:param fc_param_attr: fc layer parameter attribute. None if user don't care.
|
|
|
|
|
:type fc_param_attr: ParameterAttribute or None
|
|
|
|
|
:param fc_bias_attr: fc bias parameter attribute. False if no bias,
|
|
|
|
|
None if user don't care.
|
|
|
|
|
:type fc_bias_attr: ParameterAttribute or None
|
|
|
|
|
:param fc_act: fc layer activation type. None means tanh
|
|
|
|
|
:type fc_act: BaseActivation
|
|
|
|
|
:param pool_bias_attr: pooling layer bias attr. None if don't care.
|
|
|
|
|
False if no bias.
|
|
|
|
|
:type pool_bias_attr: ParameterAttribute or None.
|
|
|
|
|
:param fc_attr: fc layer extra attribute.
|
|
|
|
|
:type fc_attr: ExtraLayerAttribute
|
|
|
|
|
:param context_attr: context projection layer extra attribute.
|
|
|
|
|
:type context_attr: ExtraLayerAttribute
|
|
|
|
|
:param pool_attr: pooling layer extra attribute.
|
|
|
|
|
:type pool_attr: ExtraLayerAttribute
|
|
|
|
|
:return: output layer name.
|
|
|
|
|
:rtype: LayerOutput
|
|
|
|
|
"""
|
|
|
|
|
# Set Default Value to param
|
|
|
|
|
context_proj_layer_name = "%s_conv_proj" % name \
|
|
|
|
|
if context_proj_layer_name is None else context_proj_layer_name
|
|
|
|
|
|
|
|
|
|
with layer.mixed(
|
|
|
|
|
name=context_proj_layer_name,
|
|
|
|
|
size=input_size * context_len,
|
|
|
|
|
act=activation.Linear(),
|
|
|
|
|
layer_attr=context_attr) as m:
|
|
|
|
|
m += layer.context_projection(
|
|
|
|
|
input=input,
|
|
|
|
|
context_len=context_len,
|
|
|
|
|
context_start=context_start,
|
|
|
|
|
padding_attr=context_proj_param_attr)
|
|
|
|
|
|
|
|
|
|
fc_layer_name = "%s_conv_fc" % name \
|
|
|
|
|
if fc_layer_name is None else fc_layer_name
|
|
|
|
|
fl = layer.fc(name=fc_layer_name,
|
|
|
|
|
input=m,
|
|
|
|
|
size=hidden_size,
|
|
|
|
|
act=fc_act,
|
|
|
|
|
layer_attr=fc_attr,
|
|
|
|
|
param_attr=fc_param_attr,
|
|
|
|
|
bias_attr=fc_bias_attr)
|
|
|
|
|
|
|
|
|
|
return layer.pooling(
|
|
|
|
|
name=name,
|
|
|
|
|
input=fl,
|
|
|
|
|
pooling_type=pool_type,
|
|
|
|
|
bias_attr=pool_bias_attr,
|
|
|
|
|
layer_attr=pool_attr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def convolution_net(input_dim,
|
|
|
|
|
class_dim=2,
|
|
|
|
|
emb_dim=128,
|
|
|
|
|
hid_dim=128,
|
|
|
|
|
is_predict=False):
|
|
|
|
|
data = layer.data("word", data_type.integer_value_sequence(input_dim))
|
|
|
|
|
emb = layer.embedding(input=data, size=emb_dim)
|
|
|
|
|
conv_3 = sequence_conv_pool(
|
|
|
|
|
input=emb, input_size=emb_dim, context_len=3, hidden_size=hid_dim)
|
|
|
|
|
conv_4 = sequence_conv_pool(
|
|
|
|
|
input=emb, input_size=emb_dim, context_len=4, hidden_size=hid_dim)
|
|
|
|
|
output = layer.fc(input=[conv_3, conv_4],
|
|
|
|
|
data = paddle.layer.data("word",
|
|
|
|
|
paddle.data_type.integer_value_sequence(input_dim))
|
|
|
|
|
emb = paddle.layer.embedding(input=data, size=emb_dim)
|
|
|
|
|
conv_3 = paddle.networks.sequence_conv_pool(
|
|
|
|
|
input=emb, context_len=3, hidden_size=hid_dim)
|
|
|
|
|
conv_4 = paddle.networks.sequence_conv_pool(
|
|
|
|
|
input=emb, context_len=4, hidden_size=hid_dim)
|
|
|
|
|
output = paddle.layer.fc(input=[conv_3, conv_4],
|
|
|
|
|
size=class_dim,
|
|
|
|
|
act=activation.Softmax())
|
|
|
|
|
lbl = layer.data("label", data_type.integer_value(2))
|
|
|
|
|
cost = layer.classification_cost(input=output, label=lbl)
|
|
|
|
|
act=paddle.activation.Softmax())
|
|
|
|
|
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
|
|
|
|
|
cost = paddle.layer.classification_cost(input=output, label=lbl)
|
|
|
|
|
return cost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -152,24 +66,28 @@ def stacked_lstm_net(input_dim,
|
|
|
|
|
lstm_para_attr = attrs.ParameterAttribute(initial_std=0., learning_rate=1.)
|
|
|
|
|
para_attr = [fc_para_attr, lstm_para_attr]
|
|
|
|
|
bias_attr = attrs.ParameterAttribute(initial_std=0., l2_rate=0.)
|
|
|
|
|
relu = activation.Relu()
|
|
|
|
|
linear = activation.Linear()
|
|
|
|
|
relu = paddle.activation.Relu()
|
|
|
|
|
linear = paddle.activation.Linear()
|
|
|
|
|
|
|
|
|
|
data = layer.data("word", data_type.integer_value_sequence(input_dim))
|
|
|
|
|
emb = layer.embedding(input=data, size=emb_dim)
|
|
|
|
|
data = paddle.layer.data("word",
|
|
|
|
|
paddle.data_type.integer_value_sequence(input_dim))
|
|
|
|
|
emb = paddle.layer.embedding(input=data, size=emb_dim)
|
|
|
|
|
|
|
|
|
|
fc1 = layer.fc(input=emb, size=hid_dim, act=linear, bias_attr=bias_attr)
|
|
|
|
|
lstm1 = layer.lstmemory(
|
|
|
|
|
fc1 = paddle.layer.fc(input=emb,
|
|
|
|
|
size=hid_dim,
|
|
|
|
|
act=linear,
|
|
|
|
|
bias_attr=bias_attr)
|
|
|
|
|
lstm1 = paddle.layer.lstmemory(
|
|
|
|
|
input=fc1, act=relu, bias_attr=bias_attr, layer_attr=layer_attr)
|
|
|
|
|
|
|
|
|
|
inputs = [fc1, lstm1]
|
|
|
|
|
for i in range(2, stacked_num + 1):
|
|
|
|
|
fc = layer.fc(input=inputs,
|
|
|
|
|
fc = paddle.layer.fc(input=inputs,
|
|
|
|
|
size=hid_dim,
|
|
|
|
|
act=linear,
|
|
|
|
|
param_attr=para_attr,
|
|
|
|
|
bias_attr=bias_attr)
|
|
|
|
|
lstm = layer.lstmemory(
|
|
|
|
|
lstm = paddle.layer.lstmemory(
|
|
|
|
|
input=fc,
|
|
|
|
|
reverse=(i % 2) == 0,
|
|
|
|
|
act=relu,
|
|
|
|
@ -177,16 +95,16 @@ def stacked_lstm_net(input_dim,
|
|
|
|
|
layer_attr=layer_attr)
|
|
|
|
|
inputs = [fc, lstm]
|
|
|
|
|
|
|
|
|
|
fc_last = layer.pooling(input=inputs[0], pooling_type=MaxPooling())
|
|
|
|
|
lstm_last = layer.pooling(input=inputs[1], pooling_type=MaxPooling())
|
|
|
|
|
output = layer.fc(input=[fc_last, lstm_last],
|
|
|
|
|
fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=MaxPooling())
|
|
|
|
|
lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=MaxPooling())
|
|
|
|
|
output = paddle.layer.fc(input=[fc_last, lstm_last],
|
|
|
|
|
size=class_dim,
|
|
|
|
|
act=activation.Softmax(),
|
|
|
|
|
act=paddle.activation.Softmax(),
|
|
|
|
|
bias_attr=bias_attr,
|
|
|
|
|
param_attr=para_attr)
|
|
|
|
|
|
|
|
|
|
lbl = layer.data("label", data_type.integer_value(2))
|
|
|
|
|
cost = layer.classification_cost(input=output, label=lbl)
|
|
|
|
|
lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
|
|
|
|
|
cost = paddle.layer.classification_cost(input=output, label=lbl)
|
|
|
|
|
return cost
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -196,7 +114,7 @@ if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
# network config
|
|
|
|
|
print 'load dictionary...'
|
|
|
|
|
word_dict = imdb.word_dict()
|
|
|
|
|
word_dict = paddle.dataset.imdb.word_dict()
|
|
|
|
|
dict_dim = len(word_dict)
|
|
|
|
|
class_dim = 2
|
|
|
|
|
|
|
|
|
@ -226,7 +144,8 @@ if __name__ == '__main__':
|
|
|
|
|
if isinstance(event, paddle.event.EndPass):
|
|
|
|
|
result = trainer.test(
|
|
|
|
|
reader=paddle.reader.batched(
|
|
|
|
|
lambda: imdb.test(word_dict), batch_size=128),
|
|
|
|
|
lambda: paddle.dataset.imdb.test(word_dict),
|
|
|
|
|
batch_size=128),
|
|
|
|
|
reader_dict={'word': 0,
|
|
|
|
|
'label': 1})
|
|
|
|
|
print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
|
|
|
|
@ -239,7 +158,7 @@ if __name__ == '__main__':
|
|
|
|
|
trainer.train(
|
|
|
|
|
reader=paddle.reader.batched(
|
|
|
|
|
paddle.reader.shuffle(
|
|
|
|
|
lambda: imdb.train(word_dict), buf_size=1000),
|
|
|
|
|
lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
|
|
|
|
|
batch_size=100),
|
|
|
|
|
event_handler=event_handler,
|
|
|
|
|
reader_dict={'word': 0,
|
|
|
|
|