You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/doc_cn/ui/data_provider/sentimental_provider.py

46 lines
1.5 KiB

from paddle.trainer.PyDataProvider2 import *
def on_init(settings, dictionary, **kwargs):
# on_init will invoke when data provider is initialized. The dictionary
# is passed from trainer_config, and is a dict object with type
# (word string => word id).
# set input types in runtime. It will do the same thing as
# @provider(input_types) will do, but it is set dynamically during runtime.
settings.input_types = [
# The text is a sequence of integer values, and each value is a word id.
# The whole sequence is the sentences that we want to predict its
# sentimental.
integer_value(len(dictionary), seq_type=SequenceType), # text input
# label positive/negative
integer_value(2)
]
# save dictionary as settings.dictionary. It will be used in process
# method.
settings.dictionary = dictionary
@provider(init_hook=on_init)
def process(settings, filename):
f = open(filename, 'r')
for line in f: # read each line of file
label, sentence = line.split('\t') # get label and sentence
words = sentence.split(' ') # get words
# convert word string to word id
# the word not in dictionary will be ignored.
word_ids = []
for each_word in words:
if each_word in settings.dictionary:
word_ids.append(settings.dictionary[each_word])
# give data to paddle.
yield word_ids, int(label)
f.close()