@ -23,7 +23,7 @@ to initialize SRL model.
import tarfile
import gzip
import itertools
import paddle . v2. dataset. common
import paddle . dataset. common
__all__ = [ ' test, get_dict ' , ' get_embedding ' , ' convert ' ]
@ -203,14 +203,11 @@ def get_dict():
Get the word , verb and label dictionary of Wikipedia corpus .
"""
word_dict = load_dict (
paddle . v2 . dataset . common . download ( WORDDICT_URL , ' conll05st ' ,
WORDDICT_MD5 ) )
paddle . dataset . common . download ( WORDDICT_URL , ' conll05st ' , WORDDICT_MD5 ) )
verb_dict = load_dict (
paddle . v2 . dataset . common . download ( VERBDICT_URL , ' conll05st ' ,
VERBDICT_MD5 ) )
paddle . dataset . common . download ( VERBDICT_URL , ' conll05st ' , VERBDICT_MD5 ) )
label_dict = load_label_dict (
paddle . v2 . dataset . common . download ( TRGDICT_URL , ' conll05st ' ,
TRGDICT_MD5 ) )
paddle . dataset . common . download ( TRGDICT_URL , ' conll05st ' , TRGDICT_MD5 ) )
return word_dict , verb_dict , label_dict
@ -218,7 +215,7 @@ def get_embedding():
"""
Get the trained word vector based on Wikipedia corpus .
"""
return paddle . v2. dataset. common . download ( EMB_URL , ' conll05st ' , EMB_MD5 )
return paddle . dataset. common . download ( EMB_URL , ' conll05st ' , EMB_MD5 )
def test ( ) :
@ -235,23 +232,23 @@ def test():
"""
word_dict , verb_dict , label_dict = get_dict ( )
reader = corpus_reader (
paddle . v2. dataset. common . download ( DATA_URL , ' conll05st ' , DATA_MD5 ) ,
paddle . dataset. common . download ( DATA_URL , ' conll05st ' , DATA_MD5 ) ,
words_name = ' conll05st-release/test.wsj/words/test.wsj.words.gz ' ,
props_name = ' conll05st-release/test.wsj/props/test.wsj.props.gz ' )
return reader_creator ( reader , word_dict , verb_dict , label_dict )
def fetch ( ) :
paddle . v2. dataset. common . download ( WORDDICT_URL , ' conll05st ' , WORDDICT_MD5 )
paddle . v2. dataset. common . download ( VERBDICT_URL , ' conll05st ' , VERBDICT_MD5 )
paddle . v2. dataset. common . download ( TRGDICT_URL , ' conll05st ' , TRGDICT_MD5 )
paddle . v2. dataset. common . download ( EMB_URL , ' conll05st ' , EMB_MD5 )
paddle . v2. dataset. common . download ( DATA_URL , ' conll05st ' , DATA_MD5 )
paddle . dataset. common . download ( WORDDICT_URL , ' conll05st ' , WORDDICT_MD5 )
paddle . dataset. common . download ( VERBDICT_URL , ' conll05st ' , VERBDICT_MD5 )
paddle . dataset. common . download ( TRGDICT_URL , ' conll05st ' , TRGDICT_MD5 )
paddle . dataset. common . download ( EMB_URL , ' conll05st ' , EMB_MD5 )
paddle . dataset. common . download ( DATA_URL , ' conll05st ' , DATA_MD5 )
def convert ( path ) :
"""
Converts dataset to recordio format
"""
paddle . v2. dataset. common . convert ( path , test ( ) , 1000 , " conl105_train " )
paddle . v2. dataset. common . convert ( path , test ( ) , 1000 , " conl105_test " )
paddle . dataset. common . convert ( path , test ( ) , 1000 , " conl105_train " )
paddle . dataset. common . convert ( path , test ( ) , 1000 , " conl105_test " )