diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index 80ff6295c3..26252d5bbd 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -24,8 +24,9 @@ import conll05 import uci_housing import sentiment import wmt14 +import mq2007 __all__ = [ 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' - 'uci_housing', 'wmt14' + 'uci_housing', 'wmt14', 'mq2007' ] diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py index fd71b34166..d8c9918d14 100644 --- a/python/paddle/v2/dataset/mq2007.py +++ b/python/paddle/v2/dataset/mq2007.py @@ -41,9 +41,7 @@ def __initialize_meta_info__(): """ fn = fetch() rar = rarfile.RarFile(fn) - dirpath = os.path.dirname(fn) - rar.extractall(path=dirpath) - return dirpath + return rar class Query(object): @@ -273,7 +271,7 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1): querylists = [] querylist = None fn = __initialize_meta_info__() - with open(os.path.join(fn, filepath)) as f: + with fn.open(os.path.join(fn, filepath)) as f: for line in f: query = Query() query = query._parse_(line)