From a6f25f3d2a27e2ef434bd399fd9e0c8c1971638d Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Sat, 25 Feb 2017 04:09:20 +0800 Subject: [PATCH 1/8] add sentiment data package --- paddle/setup.py.in | 2 + paddle/v2/data_set/config.py | 8 ++++ paddle/v2/data_set/sentiment.py | 81 +++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 paddle/v2/data_set/config.py create mode 100644 paddle/v2/data_set/sentiment.py diff --git a/paddle/setup.py.in b/paddle/setup.py.in index 38621af065..8dc3ff6acd 100644 --- a/paddle/setup.py.in +++ b/paddle/setup.py.in @@ -72,6 +72,8 @@ setup(name="py_paddle", packages=['py_paddle'], include_dirs = include_dirs, install_requires = [ + 'h5py', + 'nltk', 'numpy>=1.8.0', # The numpy is required. 'protobuf>=3.0.0' # The paddle protobuf version ], diff --git a/paddle/v2/data_set/config.py b/paddle/v2/data_set/config.py new file mode 100644 index 0000000000..69e96d65ef --- /dev/null +++ b/paddle/v2/data_set/config.py @@ -0,0 +1,8 @@ +import os + +__all__ = ['DATA_HOME'] + +DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') + +if not os.path.exists(DATA_HOME): + os.makedirs(DATA_HOME) diff --git a/paddle/v2/data_set/sentiment.py b/paddle/v2/data_set/sentiment.py new file mode 100644 index 0000000000..323fc214dd --- /dev/null +++ b/paddle/v2/data_set/sentiment.py @@ -0,0 +1,81 @@ +import random +import nltk +import numpy as np +from nltk.corpus import movie_reviews +from config import DATA_HOME + +__all__ = ['train', 'test', 'get_label_dict', 'get_word_dict'] +SPLIT_NUM = 800 +TOTAL_DATASET_NUM = 1000 + + +def get_label_dict(): + label_dict = {'neg': 0, 'pos': 1} + return label_dict + + +def is_download_data(): + try: + nltk.data.path.append(DATA_HOME) + movie_reviews.categories() + except LookupError: + print "dd" + nltk.download('movie_reviews', download_dir=DATA_HOME) + nltk.data.path.append(DATA_HOME) + + +def get_word_dict(): + words_freq_sorted = list() + is_download_data() + words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) + words_sort_list = words_freq.items() + words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) + print words_sort_list + for index, word in enumerate(words_sort_list): + words_freq_sorted.append(word[0]) + return words_freq_sorted + + +def load_sentiment_data(): + label_dict = get_label_dict() + is_download_data() + words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) + data_set = [([words_freq[word] + for word in movie_reviews.words(fileid)], label_dict[category]) + for category in movie_reviews.categories() + for fileid in movie_reviews.fileids(category)] + random.shuffle(data_set) + return data_set + + +data_set = load_sentiment_data() + + +def reader_creator(data_type): + if data_type == 'train': + for each in data_set[0:SPLIT_NUM]: + train_sentences = np.array(each[0], dtype=np.int32) + train_label = np.array(each[1], dtype=np.int8) + yield train_sentences, train_label + else: + for each in data_set[SPLIT_NUM:]: + test_sentences = np.array(each[0], dtype=np.int32) + test_label = np.array(each[1], dtype=np.int8) + yield test_sentences, test_label + + +def train(): + return reader_creator('train') + + +def test(): + return reader_creator('test') + + +if __name__ == '__main__': + for train in train(): + print "train" + print train + for test in test(): + print "test" + print test From 55d19fc4f029105661ddd30aa838e1100d03ee54 Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Sun, 26 Feb 2017 21:41:42 +0800 Subject: [PATCH 2/8] fix bugs --- paddle/setup.py.in | 1 - paddle/v2/data_set/sentiment.py | 81 -------------- paddle/v2/{data_set => dataset}/config.py | 2 +- paddle/v2/dataset/sentiment.py | 127 ++++++++++++++++++++++ 4 files changed, 128 insertions(+), 83 deletions(-) delete mode 100644 paddle/v2/data_set/sentiment.py rename paddle/v2/{data_set => dataset}/config.py (62%) create mode 100644 paddle/v2/dataset/sentiment.py diff --git a/paddle/setup.py.in b/paddle/setup.py.in index 8dc3ff6acd..d44f1145df 100644 --- a/paddle/setup.py.in +++ b/paddle/setup.py.in @@ -72,7 +72,6 @@ setup(name="py_paddle", packages=['py_paddle'], include_dirs = include_dirs, install_requires = [ - 'h5py', 'nltk', 'numpy>=1.8.0', # The numpy is required. 'protobuf>=3.0.0' # The paddle protobuf version diff --git a/paddle/v2/data_set/sentiment.py b/paddle/v2/data_set/sentiment.py deleted file mode 100644 index 323fc214dd..0000000000 --- a/paddle/v2/data_set/sentiment.py +++ /dev/null @@ -1,81 +0,0 @@ -import random -import nltk -import numpy as np -from nltk.corpus import movie_reviews -from config import DATA_HOME - -__all__ = ['train', 'test', 'get_label_dict', 'get_word_dict'] -SPLIT_NUM = 800 -TOTAL_DATASET_NUM = 1000 - - -def get_label_dict(): - label_dict = {'neg': 0, 'pos': 1} - return label_dict - - -def is_download_data(): - try: - nltk.data.path.append(DATA_HOME) - movie_reviews.categories() - except LookupError: - print "dd" - nltk.download('movie_reviews', download_dir=DATA_HOME) - nltk.data.path.append(DATA_HOME) - - -def get_word_dict(): - words_freq_sorted = list() - is_download_data() - words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) - words_sort_list = words_freq.items() - words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) - print words_sort_list - for index, word in enumerate(words_sort_list): - words_freq_sorted.append(word[0]) - return words_freq_sorted - - -def load_sentiment_data(): - label_dict = get_label_dict() - is_download_data() - words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) - data_set = [([words_freq[word] - for word in movie_reviews.words(fileid)], label_dict[category]) - for category in movie_reviews.categories() - for fileid in movie_reviews.fileids(category)] - random.shuffle(data_set) - return data_set - - -data_set = load_sentiment_data() - - -def reader_creator(data_type): - if data_type == 'train': - for each in data_set[0:SPLIT_NUM]: - train_sentences = np.array(each[0], dtype=np.int32) - train_label = np.array(each[1], dtype=np.int8) - yield train_sentences, train_label - else: - for each in data_set[SPLIT_NUM:]: - test_sentences = np.array(each[0], dtype=np.int32) - test_label = np.array(each[1], dtype=np.int8) - yield test_sentences, test_label - - -def train(): - return reader_creator('train') - - -def test(): - return reader_creator('test') - - -if __name__ == '__main__': - for train in train(): - print "train" - print train - for test in test(): - print "test" - print test diff --git a/paddle/v2/data_set/config.py b/paddle/v2/dataset/config.py similarity index 62% rename from paddle/v2/data_set/config.py rename to paddle/v2/dataset/config.py index 69e96d65ef..304c4bc5cd 100644 --- a/paddle/v2/data_set/config.py +++ b/paddle/v2/dataset/config.py @@ -2,7 +2,7 @@ import os __all__ = ['DATA_HOME'] -DATA_HOME = os.path.expanduser('~/.cache/paddle_data_set') +DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') if not os.path.exists(DATA_HOME): os.makedirs(DATA_HOME) diff --git a/paddle/v2/dataset/sentiment.py b/paddle/v2/dataset/sentiment.py new file mode 100644 index 0000000000..83581eadf2 --- /dev/null +++ b/paddle/v2/dataset/sentiment.py @@ -0,0 +1,127 @@ +# /usr/bin/env python +# -*- coding:utf-8 -*- + +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +The script fetch and preprocess movie_reviews data set + +that provided by NLTK +""" + + +import nltk +import numpy as np +from nltk.corpus import movie_reviews +from config import DATA_HOME + +__all__ = ['train', 'test', 'get_label_dict', 'get_word_dict'] +NUM_TRAINING_INSTANCES = 1600 +NUM_TOTAL_INSTANCES = 2000 + + +def get_label_dict(): + """ + Define the labels dict for dataset + """ + label_dict = {'neg': 0, 'pos': 1} + return label_dict + + +def download_data_if_not_yet(): + """ + Download the data set, if the data set is not download. + """ + try: + # make sure that nltk can find the data + nltk.data.path.append(DATA_HOME) + movie_reviews.categories() + except LookupError: + print "Downloading movie_reviews data set, please wait....." + nltk.download('movie_reviews', download_dir=DATA_HOME) + print "Download data set success......" + # make sure that nltk can find the data + nltk.data.path.append(DATA_HOME) + + +def get_word_dict(): + """ + Sorted the words by the frequency of words which occur in sample + :return: + words_freq_sorted + """ + words_freq_sorted = list() + download_data_if_not_yet() + words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) + words_sort_list = words_freq.items() + words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) + for index, word in enumerate(words_sort_list): + words_freq_sorted.append(word[0]) + return words_freq_sorted + + +def load_sentiment_data(): + """ + Load the data set + :return: + data_set + """ + label_dict = get_label_dict() + download_data_if_not_yet() + words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) + data_set = [([words_freq[word.lower()] + for word in movie_reviews.words(fileid)], + label_dict[category]) + for category in movie_reviews.categories() + for fileid in movie_reviews.fileids(category)] + return data_set + + +data_set = load_sentiment_data() + + +def reader_creator(data): + """ + Reader creator, it format data set to numpy + :param data: + train data set or test data set + """ + for each in data: + sentences = np.array(each[0], dtype=np.int32) + labels = np.array(each[1], dtype=np.int8) + yield sentences, labels + + +def train(): + """ + Default train set reader creator + """ + return reader_creator(data_set[0:NUM_TRAINING_INSTANCES]) + + +def test(): + """ + Default test set reader creator + """ + return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) + + +def unittest(): + assert len(data_set) == NUM_TOTAL_INSTANCES + assert len(list(train())) == NUM_TRAINING_INSTANCES + assert len(list(test())) == NUM_TOTAL_INSTANCES - NUM_TRAINING_INSTANCES + + +if __name__ == '__main__': + unittest() From 812e21f3c4c14b8cf215fb1221b74814b132f301 Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Mon, 27 Feb 2017 17:43:28 +0800 Subject: [PATCH 3/8] add cross reading sample files and fix bugs --- paddle/setup.py.in | 2 +- paddle/v2/dataset/config.py | 8 ---- .../paddle}/v2/dataset/sentiment.py | 42 +++++++++++++------ 3 files changed, 30 insertions(+), 22 deletions(-) delete mode 100644 paddle/v2/dataset/config.py rename {paddle => python/paddle}/v2/dataset/sentiment.py (73%) diff --git a/paddle/setup.py.in b/paddle/setup.py.in index d44f1145df..382d5be6ec 100644 --- a/paddle/setup.py.in +++ b/paddle/setup.py.in @@ -72,7 +72,7 @@ setup(name="py_paddle", packages=['py_paddle'], include_dirs = include_dirs, install_requires = [ - 'nltk', + 'nltk>=3.2.2', 'numpy>=1.8.0', # The numpy is required. 'protobuf>=3.0.0' # The paddle protobuf version ], diff --git a/paddle/v2/dataset/config.py b/paddle/v2/dataset/config.py deleted file mode 100644 index 304c4bc5cd..0000000000 --- a/paddle/v2/dataset/config.py +++ /dev/null @@ -1,8 +0,0 @@ -import os - -__all__ = ['DATA_HOME'] - -DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') - -if not os.path.exists(DATA_HOME): - os.makedirs(DATA_HOME) diff --git a/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py similarity index 73% rename from paddle/v2/dataset/sentiment.py rename to python/paddle/v2/dataset/sentiment.py index 83581eadf2..9825d2ef96 100644 --- a/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -20,9 +20,9 @@ The script fetch and preprocess movie_reviews data set that provided by NLTK """ - import nltk import numpy as np +from itertools import chain from nltk.corpus import movie_reviews from config import DATA_HOME @@ -50,9 +50,10 @@ def download_data_if_not_yet(): except LookupError: print "Downloading movie_reviews data set, please wait....." nltk.download('movie_reviews', download_dir=DATA_HOME) - print "Download data set success......" # make sure that nltk can find the data nltk.data.path.append(DATA_HOME) + print "Download data set success....." + print "Path is " + nltk.data.find('corpora/movie_reviews').path def get_word_dict(): @@ -67,24 +68,39 @@ def get_word_dict(): words_sort_list = words_freq.items() words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) for index, word in enumerate(words_sort_list): - words_freq_sorted.append(word[0]) + words_freq_sorted.append((word[0], index + 1)) return words_freq_sorted +def sort_files(): + """ + Sorted the sample for cross reading the sample + :return: + files_list + """ + files_list = list() + download_data_if_not_yet() + neg_file_list = movie_reviews.fileids('neg') + pos_file_list = movie_reviews.fileids('pos') + files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list))) + return files_list + + def load_sentiment_data(): """ Load the data set :return: data_set """ - label_dict = get_label_dict() + data_set = list() download_data_if_not_yet() - words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) - data_set = [([words_freq[word.lower()] - for word in movie_reviews.words(fileid)], - label_dict[category]) - for category in movie_reviews.categories() - for fileid in movie_reviews.fileids(category)] + words_ids = dict(get_word_dict()) + for sample_file in sort_files(): + words_list = list() + category = 0 if 'neg' in sample_file else 1 + for word in movie_reviews.words(sample_file): + words_list.append(words_ids[word.lower()]) + data_set.append((words_list, category)) return data_set @@ -98,9 +114,9 @@ def reader_creator(data): train data set or test data set """ for each in data: - sentences = np.array(each[0], dtype=np.int32) - labels = np.array(each[1], dtype=np.int8) - yield sentences, labels + list_of_int = np.array(each[0], dtype=np.int32) + label = each[1] + yield list_of_int, label def train(): From 6115fcc5a73497157718eadb3bd596311ea83a55 Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Thu, 2 Mar 2017 04:11:11 +0000 Subject: [PATCH 4/8] format by yapf --- python/paddle/v2/dataset/sentiment.py | 51 ++++++------------ .../paddle/v2/dataset/tests/test_sentiment.py | 52 +++++++++++++++++++ 2 files changed, 69 insertions(+), 34 deletions(-) create mode 100644 python/paddle/v2/dataset/tests/test_sentiment.py diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 9825d2ef96..1e7f222f4d 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -20,38 +20,30 @@ The script fetch and preprocess movie_reviews data set that provided by NLTK """ +import paddle.v2.dataset.common as common +import collections import nltk import numpy as np from itertools import chain from nltk.corpus import movie_reviews -from config import DATA_HOME -__all__ = ['train', 'test', 'get_label_dict', 'get_word_dict'] +__all__ = ['train', 'test', 'get_word_dict'] NUM_TRAINING_INSTANCES = 1600 NUM_TOTAL_INSTANCES = 2000 -def get_label_dict(): - """ - Define the labels dict for dataset - """ - label_dict = {'neg': 0, 'pos': 1} - return label_dict - - def download_data_if_not_yet(): """ Download the data set, if the data set is not download. """ try: # make sure that nltk can find the data - nltk.data.path.append(DATA_HOME) + if common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(common.DATA_HOME) movie_reviews.categories() except LookupError: print "Downloading movie_reviews data set, please wait....." - nltk.download('movie_reviews', download_dir=DATA_HOME) - # make sure that nltk can find the data - nltk.data.path.append(DATA_HOME) + nltk.download('movie_reviews', download_dir=common.DATA_HOME) print "Download data set success....." print "Path is " + nltk.data.find('corpora/movie_reviews').path @@ -63,12 +55,17 @@ def get_word_dict(): words_freq_sorted """ words_freq_sorted = list() + word_freq_dict = collections.defaultdict(int) download_data_if_not_yet() - words_freq = nltk.FreqDist(w.lower() for w in movie_reviews.words()) - words_sort_list = words_freq.items() + + for category in movie_reviews.categories(): + for field in movie_reviews.fileids(category): + for words in movie_reviews.words(field): + word_freq_dict[words] += 1 + words_sort_list = word_freq_dict.items() words_sort_list.sort(cmp=lambda a, b: b[1] - a[1]) for index, word in enumerate(words_sort_list): - words_freq_sorted.append((word[0], index + 1)) + words_freq_sorted.append((word[0], index)) return words_freq_sorted @@ -79,7 +76,6 @@ def sort_files(): files_list """ files_list = list() - download_data_if_not_yet() neg_file_list = movie_reviews.fileids('neg') pos_file_list = movie_reviews.fileids('pos') files_list = list(chain.from_iterable(zip(neg_file_list, pos_file_list))) @@ -104,9 +100,6 @@ def load_sentiment_data(): return data_set -data_set = load_sentiment_data() - - def reader_creator(data): """ Reader creator, it format data set to numpy @@ -114,15 +107,14 @@ def reader_creator(data): train data set or test data set """ for each in data: - list_of_int = np.array(each[0], dtype=np.int32) - label = each[1] - yield list_of_int, label + yield each[0], each[1] def train(): """ Default train set reader creator """ + data_set = load_sentiment_data() return reader_creator(data_set[0:NUM_TRAINING_INSTANCES]) @@ -130,14 +122,5 @@ def test(): """ Default test set reader creator """ + data_set = load_sentiment_data() return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) - - -def unittest(): - assert len(data_set) == NUM_TOTAL_INSTANCES - assert len(list(train())) == NUM_TRAINING_INSTANCES - assert len(list(test())) == NUM_TOTAL_INSTANCES - NUM_TRAINING_INSTANCES - - -if __name__ == '__main__': - unittest() diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py new file mode 100644 index 0000000000..48a14aad2a --- /dev/null +++ b/python/paddle/v2/dataset/tests/test_sentiment.py @@ -0,0 +1,52 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import nltk +import paddle.v2.dataset.sentiment as st +from nltk.corpus import movie_reviews + + +class TestSentimentMethods(unittest.TestCase): + def test_get_word_dict(self): + word_dict = st.get_word_dict()[0:10] + test_word_list = [(u',', 0), (u'the', 1), (u'.', 2), (u'a', 3), + (u'and', 4), (u'of', 5), (u'to', 6), (u"'", 7), + (u'is', 8), (u'in', 9)] + for idx, each in enumerate(word_dict): + self.assertEqual(each, test_word_list[idx]) + self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path) + + def test_sort_files(self): + last_label = '' + for sample_file in st.sort_files(): + current_label = sample_file.split("/")[0] + self.assertNotEqual(current_label, last_label) + last_label = current_label + + def test_data_set(self): + data_set = st.load_sentiment_data() + last_label = -1 + for each in st.test(): + self.assertNotEqual(each[1], last_label) + last_label = each[1] + self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES) + self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES) + self.assertEqual( + len(list(st.test())), + (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES)) + + +if __name__ == '__main__': + unittest.main() From a846ef664002a91c949f383d273e0539768198e6 Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Thu, 2 Mar 2017 08:13:13 +0000 Subject: [PATCH 5/8] modify doc --- python/paddle/v2/dataset/sentiment.py | 2 +- python/paddle/v2/dataset/tests/test_sentiment.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 1e7f222f4d..137aa6aea7 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -102,7 +102,7 @@ def load_sentiment_data(): def reader_creator(data): """ - Reader creator, it format data set to numpy + Reader creator, generate an iterator for data set :param data: train data set or test data set """ diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py index 48a14aad2a..4074052907 100644 --- a/python/paddle/v2/dataset/tests/test_sentiment.py +++ b/python/paddle/v2/dataset/tests/test_sentiment.py @@ -1,3 +1,6 @@ +# /usr/bin/env python +# -*- coding:utf-8 -*- + # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); From 3feebce2095ebe7002e86081de0a76d573319129 Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Fri, 3 Mar 2017 05:45:59 +0000 Subject: [PATCH 6/8] add sentiment in __init__.py --- python/paddle/v2/dataset/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index a1b21bab3b..e823f56316 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -18,5 +18,8 @@ import imdb import cifar import movielens import conll05 +import sentiment -__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05'] +__all__ = [ + 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' +] From c9f379ed805d7459fd5333706c0496dee662112d Mon Sep 17 00:00:00 2001 From: wen-bo-yang Date: Fri, 3 Mar 2017 07:05:37 +0000 Subject: [PATCH 7/8] modify code --- python/paddle/v2/dataset/sentiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 137aa6aea7..cbd08fa736 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -20,7 +20,7 @@ The script fetch and preprocess movie_reviews data set that provided by NLTK """ -import paddle.v2.dataset.common as common +import common import collections import nltk import numpy as np From 61619580355d37c1ef817c98995dfce8a6556fc0 Mon Sep 17 00:00:00 2001 From: wenboyang Date: Fri, 3 Mar 2017 15:56:43 +0800 Subject: [PATCH 8/8] update __init__.py I use pre-commit check all file and report is the passed. But it is not passed in Travis, so I update __init__.py --- python/paddle/v2/dataset/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py index fba76b202e..82f11a7c41 100644 --- a/python/paddle/v2/dataset/__init__.py +++ b/python/paddle/v2/dataset/__init__.py @@ -24,4 +24,4 @@ import sentiment __all__ = [ 'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment' 'uci_housing' -] \ No newline at end of file +]