fix conflicts

9 years ago · e749e7b10a
parent 6502614089 d0a908d5f5
commit e749e7b10a
291 changed files with 14536 additions and 9616 deletions
--- a/.clang-format
+++ b/.clang-format
@ -13,8 +13,6 @@
 # The document of clang-format is 
 #   http://clang.llvm.org/docs/ClangFormat.html
 #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
 ---
 Language:        Cpp
 BasedOnStyle:  Google
@ -22,8 +20,9 @@ IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -2  # The private/protected/public has no indent in class
-PointerAlignment: Left    # int* p/int& p, not int *p/int &p
 Standard:  Cpp11 
 AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
 ...

--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,6 @@ build/
 .vscode
 .idea
 .project
+.cproject
 .pydevproject
+Makefile
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,24 @@
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    sha: c25201a00e6b0514370501050cf2a8538ac12270
+    hooks:
+    -   id: remove-crlf
+-   repo: https://github.com/reyoung/mirrors-yapf.git
+    sha: v0.13.2
+    hooks:
+    -   id: yapf
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: 4ef03c4223ad322c7adaa6c6c0efb26b57df3b71
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+    -   id: end-of-file-fixer
+# TODO(yuyang): trailing whitespace has some bugs on markdown 
+# files now, please not add it to pre-commit hook now
+#    -   id: trailing-whitespace
+#
+# TODO(yuyang): debug-statements not fit for Paddle, because
+# not all of our python code is runnable. Some are used for 
+# documenation
+#    -   id: debug-statements
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -109,11 +109,9 @@ else()
    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")

    if(WITH_AVX)
-        if(AVX_FOUND)
-            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -mavx")
-        endif(AVX_FOUND)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
    else(WITH_AVX)
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -msse3")
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
    endif(WITH_AVX)

    if(WITH_DSO)
@ -138,11 +136,11 @@ if(NOT WITH_TIMER)
 endif(NOT WITH_TIMER)

 if(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
 else(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
 endif(WITH_AVX)

 if(WITH_PYTHON)
--- a/cmake/FindAVX.cmake
+++ b/cmake/FindAVX.cmake
@ -3,36 +3,55 @@

 INCLUDE(CheckCXXSourceRuns)

-SET(FIND_AVX_10)
-SET(FIND_AVX_20)
-SET(AVX_FLAGS)
-SET(AVX_FOUND)
-
-# Check AVX 2
-SET(CMAKE_REQUIRED_FLAGS)
 IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  SET(CMAKE_REQUIRED_FLAGS "-mavx2")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)  # reserve for WINDOWS
-  SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
+    set(MMX_FLAG "-mmmx")
+    set(SSE2_FLAG "-msse2")
+    set(SSE3_FLAG "-msse3")
+    SET(AVX_FLAG "-mavx")
+    SET(AVX2_FLAG "-mavx2")
+ELSEIF(MSVC)
+    set(MMX_FLAG "/arch:MMX")
+    set(SSE2_FLAG "/arch:SSE2")
+    set(SSE3_FLAG "/arch:SSE3")
+    SET(AVX_FLAG "/arch:AVX")
+    SET(AVX2_FLAG "/arch:AVX2")
 ENDIF()

+# Check  MMX
+set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
-#include <immintrin.h>
+#include <mmintrin.h>
 int main()
 {
-    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
-    __m256i result = _mm256_abs_epi32 (a);
+    _mm_setzero_si64();
    return 0;
-}" FIND_AVX_20)
+}" MMX_FOUND)

-# Check AVX
-SET(CMAKE_REQUIRED_FLAGS)
-IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    SET(CMAKE_REQUIRED_FLAGS "-mavx")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)
-    SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
-endif()
+# Check SSE2
+set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <emmintrin.h>
+int main()
+{
+    _mm_setzero_si128();
+    return 0;
+}" SSE2_FOUND)

+# Check SSE3
+set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <pmmintrin.h>
+int main()
+{
+    __m128d a = _mm_set1_pd(6.28);
+    __m128d b = _mm_set1_pd(3.14);
+    __m128d result = _mm_addsub_pd(a, b);
+    result = _mm_movedup_pd(result);
+    return 0;
+}" SSE3_FOUND)
+
+# Check AVX
+set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
 #include <immintrin.h>
 int main()
@ -41,25 +60,17 @@ int main()
    __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
    __m256 result = _mm256_add_ps (a, b);
    return 0;
-}" FIND_AVX_10)
-
-IF(${FIND_AVX_20})
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
-    ELSEIF(MSVC)
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
-    ENDIF()
-ENDIF()
+}" AVX_FOUND)

-IF(${FIND_AVX_10})
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
-    ELSEIF(MSVC)
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
-    ENDIF()
-ENDIF()
+# Check AVX 2
+set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <immintrin.h>
+int main()
+{
+    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    __m256i result = _mm256_abs_epi32 (a);
+    return 0;
+}" AVX2_FOUND)

-IF(${FIND_AVX_10})
-    SET(AVX_FOUND TRUE)
-    MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
-ENDIF()
+mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
--- a/demo/image_classification/.gitignore
+++ b/demo/image_classification/.gitignore
@ -5,3 +5,5 @@ plot.png
 train.log
 image_provider_copy_1.py
 *pyc
+train.list
+test.list
--- a/demo/image_classification/data/download_cifar.sh
+++ b/demo/image_classification/data/download_cifar.sh
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@ -16,7 +16,6 @@ import numpy as np
 import sys
 import os
 import PIL.Image as Image
-
 """
  Usage: python process_cifar input_dir output_dir
 """
@ -30,6 +29,7 @@ def mkdir_not_exist(path):
    if not os.path.exists(path):
        os.mkdir(path)

+
 def create_dir_structure(output_dir):
    """
    Create the directory structure for the directory.
@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
    mkdir_not_exist(os.path.join(output_dir, "train"))
    mkdir_not_exist(os.path.join(output_dir, "test"))

-def convert_batch(batch_path, label_set, label_map,
-                  output_dir, data_split):
+
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
    """
    Convert CIFAR batch to the structure of Paddle format.
    batch_path: the batch to be converted.
@ -67,11 +67,23 @@ if __name__ == '__main__':
    output_dir = sys.argv[2]
    num_batch = 5
    create_dir_structure(output_dir)
-    label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
-                 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+    label_map = {
+        0: "airplane",
+        1: "automobile",
+        2: "bird",
+        3: "cat",
+        4: "deer",
+        5: "dog",
+        6: "frog",
+        7: "horse",
+        8: "ship",
+        9: "truck"
+    }
    labels = {}
    for i in range(1, num_batch + 1):
-        convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
-                      label_map, output_dir, "train")
-    convert_batch(os.path.join(input_dir, "test_batch"), {},
-                  label_map, output_dir, "test")
+        convert_batch(
+            os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
+            output_dir, "train")
+    convert_batch(
+        os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+        "test")
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@ -46,26 +46,31 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,

    settings.img_mean = image_util.load_meta(settings.meta_path,
                                             settings.mean_img_size,
-                                             settings.img_size,
-                                             settings.color)
+                                             settings.img_size, settings.color)

    settings.logger.info('Image size: %s', settings.img_size)
    settings.logger.info('Meta path: %s', settings.meta_path)
    settings.input_types = [
        dense_vector(settings.img_raw_size),  # image feature
-        integer_value(settings.num_classes)]  # labels
+        integer_value(settings.num_classes)
+    ]  # labels

    settings.logger.info('DataProvider Initialization finished')


-@provider(init_hook=hook)
-def processData(settings, file_name):
+@provider(init_hook=hook, min_pool_size=0)
+def processData(settings, file_list):
    """
    The main function for loading data.
    Load the batch, iterate all the images and labels in this batch.
-    file_name: the batch file name.
+    file_list: the batch file list.
    """
-    data = cPickle.load(io.open(file_name, 'rb'))
+    with open(file_list, 'r') as fdata:
+        lines = [line.strip() for line in fdata]
+        random.shuffle(lines)
+        for file_name in lines:
+            with io.open(file_name.strip(), 'rb') as file:
+                data = cPickle.load(file)
                indexes = list(range(len(data['images'])))
                if settings.is_train:
                    random.shuffle(indexes)
@ -74,8 +79,8 @@ def processData(settings, file_name):
                        img = image_util.decode_jpeg(data['images'][i])
                    else:
                        img = data['images'][i]
-        img_feat = image_util.preprocess_img(img, settings.img_mean,
-                                             settings.img_size, settings.is_train,
-                                             settings.color)
+                    img_feat = image_util.preprocess_img(
+                        img, settings.img_mean, settings.img_size,
+                        settings.is_train, settings.color)
                    label = data['labels'][i]
-        yield img_feat.tolist(), int(label)
+                    yield img_feat.astype('float32'), int(label)
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@ -16,17 +16,20 @@ import numpy as np
 from PIL import Image
 from cStringIO import StringIO

+
 def resize_image(img, target_size):
    """
    Resize an image so that the shorter edge has length target_size.
    img: the input image to be resized.
    target_size: the target resized image size.
    """
-    percent = (target_size/float(min(img.size[0], img.size[1])))
-    resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+    percent = (target_size / float(min(img.size[0], img.size[1])))
+    resized_size = int(round(img.size[0] * percent)), int(
+        round(img.size[1] * percent))
    img = img.resize(resized_size, Image.ANTIALIAS)
    return img

+
 def flip(im):
    """
    Return the flipped image.
@ -38,6 +41,7 @@ def flip(im):
    else:
        return im[:, ::-1]

+
 def crop_img(im, inner_size, color=True, test=True):
    """
    Return cropped image.
@ -50,20 +54,22 @@ def crop_img(im, inner_size, color=True, test=True):
      If True, crop the center of images.
    """
    if color:
-        height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+        height, width = max(inner_size, im.shape[1]), max(inner_size,
+                                                          im.shape[2])
        padded_im = np.zeros((3, height, width))
        startY = (height - im.shape[1]) / 2
        startX = (width - im.shape[2]) / 2
        endY, endX = startY + im.shape[1], startX + im.shape[2]
-        padded_im[:, startY: endY, startX: endX] = im
+        padded_im[:, startY:endY, startX:endX] = im
    else:
        im = im.astype('float32')
-        height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+        height, width = max(inner_size, im.shape[0]), max(inner_size,
+                                                          im.shape[1])
        padded_im = np.zeros((height, width))
        startY = (height - im.shape[0]) / 2
        startX = (width - im.shape[1]) / 2
        endY, endX = startY + im.shape[0], startX + im.shape[1]
-        padded_im[startY: endY, startX: endX] = im
+        padded_im[startY:endY, startX:endX] = im
    if test:
        startY = (height - inner_size) / 2
        startX = (width - inner_size) / 2
@ -72,19 +78,21 @@ def crop_img(im, inner_size, color=True, test=True):
        startX = np.random.randint(0, width - inner_size + 1)
    endY, endX = startY + inner_size, startX + inner_size
    if color:
-        pic = padded_im[:, startY: endY, startX: endX]
+        pic = padded_im[:, startY:endY, startX:endX]
    else:
-        pic = padded_im[startY: endY, startX: endX]
+        pic = padded_im[startY:endY, startX:endX]
    if (not test) and (np.random.randint(2) == 0):
        pic = flip(pic)
    return pic

+
 def decode_jpeg(jpeg_string):
    np_array = np.array(Image.open(StringIO(jpeg_string)))
    if len(np_array.shape) == 3:
        np_array = np.transpose(np_array, (2, 0, 1))
    return np_array

+
 def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    """
    Does data augmentation for images.
@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    pic -= img_mean
    return pic.flatten()

+
 def load_meta(meta_path, mean_img_size, crop_size, color=True):
    """
    Return the loaded meta file.
@ -109,17 +118,18 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
    mean = np.load(meta_path)['data_mean']
    border = (mean_img_size - crop_size) / 2
    if color:
-        assert(mean_img_size * mean_img_size * 3 == mean.shape[0])
+        assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
        mean = mean.reshape(3, mean_img_size, mean_img_size)
-        mean = mean[:, border: border + crop_size,
-                       border: border + crop_size].astype('float32')
+        mean = mean[:, border:border + crop_size, border:border +
+                    crop_size].astype('float32')
    else:
-        assert(mean_img_size * mean_img_size == mean.shape[0])
+        assert (mean_img_size * mean_img_size == mean.shape[0])
        mean = mean.reshape(mean_img_size, mean_img_size)
-        mean = mean[border: border + crop_size,
-                    border: border + crop_size].astype('float32')
+        mean = mean[border:border + crop_size, border:border +
+                    crop_size].astype('float32')
    return mean

+
 def load_image(img_path, is_color=True):
    """
    Load image and return. 
@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
    img.load()
    return img

+
 def oversample(img, crop_dims):
    """
    image : iterable of (H x W x K) ndarrays
@ -152,26 +163,29 @@ def oversample(img, crop_dims):
        for j in w_indices:
            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
            curr += 1
-    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-        -crop_dims / 2.0,
-         crop_dims / 2.0
-    ])
+    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
+        [-crop_dims / 2.0, crop_dims / 2.0])
    crops_ix = np.tile(crops_ix, (2, 1))

    # Extract crops
-    crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
-                      im_shape[-1]), dtype=np.float32)
+    crops = np.empty(
+        (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+        dtype=np.float32)
    ix = 0
    for im in img:
        for crop in crops_ix:
            crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
            ix += 1
-        crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
+        crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :]  # flip for mirrors
    return crops

+
 class ImageTransformer:
-    def __init__(self, transpose = None,
-                 channel_swap = None, mean = None, is_color = True):
+    def __init__(self,
+                 transpose=None,
+                 channel_swap=None,
+                 mean=None,
+                 is_color=True):
        self.transpose = transpose
        self.channel_swap = None
        self.mean = None
--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import os,sys
+import os, sys
 import numpy as np
 import logging
 from PIL import Image
@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config

-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)

+
 class ImageClassifier():
    def __init__(self,
                 train_conf,
@ -58,8 +60,8 @@ class ImageClassifier():
        self.oversample = oversample
        self.is_color = is_color

-        self.transformer = image_util.ImageTransformer(is_color = is_color)
-        self.transformer.set_transpose((2,0,1))
+        self.transformer = image_util.ImageTransformer(is_color=is_color)
+        self.transformer.set_transpose((2, 0, 1))

        self.mean_file = mean_file
        mean = np.load(self.mean_file)['data_mean']
@ -69,7 +71,8 @@ class ImageClassifier():
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)

@ -90,14 +93,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)

        data_in = []
@ -133,18 +136,20 @@ class ImageClassifier():
        lab = np.argsort(-prob)
        logging.info("Label of %s is: %d", image, lab[0])

+
 if __name__ == '__main__':
-    image_size=32
-    crop_size=32
-    multi_crop=True
-    config="vgg_16_cifar.py"
-    output_layer="__fc_layer_1__"
-    mean_path="data/cifar-out/batches/batches.meta"
-    model_path=sys.argv[1]
-    image=sys.argv[2]
-    use_gpu=bool(int(sys.argv[3]))
-
-    obj = ImageClassifier(train_conf=config,
+    image_size = 32
+    crop_size = 32
+    multi_crop = True
+    config = "vgg_16_cifar.py"
+    output_layer = "__fc_layer_1__"
+    mean_path = "data/cifar-out/batches/batches.meta"
+    model_path = sys.argv[1]
+    image = sys.argv[2]
+    use_gpu = bool(int(sys.argv[3]))
+
+    obj = ImageClassifier(
+        train_conf=config,
        model_dir=model_path,
        resize_dim=image_size,
        crop_dim=crop_size,
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@ -19,22 +19,36 @@ from optparse import OptionParser
 def option_parser():
    parser = OptionParser(usage="usage: python preprcoess.py "\
                          "-i data_dir [options]")
-    parser.add_option("-i", "--input", action="store",
-                      dest="input", help="Input data directory.")
-    parser.add_option("-s", "--size", action="store",
-                      dest="size", help="Processed image size.")
-    parser.add_option("-c", "--color", action="store",
-                      dest="color", help="whether to use color images.")
+    parser.add_option(
+        "-i",
+        "--input",
+        action="store",
+        dest="input",
+        help="Input data directory.")
+    parser.add_option(
+        "-s",
+        "--size",
+        action="store",
+        dest="size",
+        help="Processed image size.")
+    parser.add_option(
+        "-c",
+        "--color",
+        action="store",
+        dest="color",
+        help="whether to use color images.")
    return parser.parse_args()

+
 if __name__ == '__main__':
    options, args = option_parser()
    data_dir = options.input
    processed_image_size = int(options.size)
    color = options.color == "1"
-     data_creator = ImageClassificationDatasetCreater(data_dir,
-                                                      processed_image_size,
-                                                      color)
+    data_creator = ImageClassificationDatasetCreater(
+        data_dir, processed_image_size, color)
+    data_creator.train_list_name = "train.txt"
+    data_creator.test_list_name = "test.txt"
    data_creator.num_per_batch = 1000
    data_creator.overwrite = True
    data_creator.create_batches()
--- a/demo/image_classification/preprocess.sh
+++ b/demo/image_classification/preprocess.sh
@ -17,3 +17,6 @@ set -e
 data_dir=./data/cifar-out

 python preprocess.py -i $data_dir -s 32 -c 1
+
+echo "data/cifar-out/batches/train.txt" > train.list
+echo "data/cifar-out/batches/test.txt" > test.list
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@ -18,36 +18,38 @@ is_predict = get_config_arg("is_predict", bool, False)

 ####################Data Configuration ##################
 if not is_predict:
-  data_dir='data/cifar-out/batches/'
-  meta_path=data_dir+'batches.meta'
-
-  args = {'meta':meta_path,'mean_img_size': 32,
-          'img_size': 32,'num_classes': 10,
-          'use_jpeg': 1,'color': "color"}
-
-  define_py_data_sources2(train_list=data_dir+"train.list",
-                          test_list=data_dir+'test.list',
+    data_dir = 'data/cifar-out/batches/'
+    meta_path = data_dir + 'batches.meta'
+
+    args = {
+        'meta': meta_path,
+        'mean_img_size': 32,
+        'img_size': 32,
+        'num_classes': 10,
+        'use_jpeg': 1,
+        'color': "color"
+    }
+
+    define_py_data_sources2(
+        train_list="train.list",
+        test_list="train.list",
        module='image_provider',
        obj='processData',
        args=args)

 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
-    learning_rate = 0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
-)
+    batch_size=128,
+    learning_rate=0.1 / 128.0,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * 128))

 #######################Network Configuration #############
-data_size=3*32*32
-label_size=10
-img = data_layer(name='image',
-                 size=data_size)
+data_size = 3 * 32 * 32
+label_size = 10
+img = data_layer(name='image', size=data_size)
 # small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
-                    num_channels=3,
-                    num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)

 if not is_predict:
    lbl = data_layer(name="label", size=label_size)
--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
@ -1,4 +1,3 @@
 This folder contains scripts used in PaddlePaddle introduction.
 - use `bash train.sh` to train a simple linear regression model
 - use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
-
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@ -15,10 +15,10 @@
 from paddle.trainer.PyDataProvider2 import *
 import random

+
 # define data types of input: 2 real numbers
-@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
+@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
 def process(settings, input_file):
    for i in xrange(2000):
        x = random.random()
-        yield [x], [2*x+0.3]
-
+        yield [x], [2 * x + 0.3]
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@ -23,14 +23,17 @@ Usage:
 import numpy as np
 import os

+
 def load(file_name):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header for float type.
        return np.fromfile(f, dtype=np.float32)

+
 def main():
    print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
                                           load('output/pass-00029/b'))

+
 if __name__ == '__main__':
    main()
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *

 # 1. read data. Suppose you saved above python code as dataprovider.py
 data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
-define_py_data_sources2(train_list=data_file, test_list=None, 
-        module='dataprovider', obj='process',args={})
+with open(data_file, 'w') as f:
+    f.writelines(' ')
+define_py_data_sources2(
+    train_list=data_file,
+    test_list=None,
+    module='dataprovider',
+    obj='process',
+    args={})

 # 2. learning algorithm
 settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
 # 3. Network configuration
 x = data_layer(name='x', size=1)
 y = data_layer(name='y', size=1)
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+y_predict = fc_layer(
+    input=x,
+    param_attr=ParamAttr(name='w'),
+    size=1,
+    act=LinearActivation(),
+    bias_attr=ParamAttr(name='b'))
 cost = regression_cost(input=y_predict, label=y)
 outputs(cost)
-
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@ -13,9 +13,9 @@
 # limitations under the License.

 o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" +"\n")
+o.write("./data/raw_data/train" + "\n")
 o.close()

 o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" +"\n")
+o.write("./data/raw_data/t10k" + "\n")
 o.close()
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@ -19,4 +19,3 @@ done
 cd $DIR
 rm -f *.list
 python generate_list.py
-
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *


 # Define a py data provider
-@provider(input_types={
-    'pixel': dense_vector(28 * 28),
-    'label': integer_value(10)
-})
+@provider(
+    input_types={'pixel': dense_vector(28 * 28),
+                 'label': integer_value(10)})
 def process(settings, filename):  # settings is not used currently.
    imgf = filename + "-images-idx3-ubyte"
    labelf = filename + "-labels-idx1-ubyte"
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@ -18,32 +18,29 @@ is_predict = get_config_arg("is_predict", bool, False)

 ####################Data Configuration ##################

-
 if not is_predict:
-  data_dir='./data/'
-  define_py_data_sources2(train_list= data_dir + 'train.list',
-                        test_list= data_dir + 'test.list',
+    data_dir = './data/'
+    define_py_data_sources2(
+        train_list=data_dir + 'train.list',
+        test_list=data_dir + 'test.list',
        module='mnist_provider',
        obj='process')

 ######################Algorithm Configuration #############
 settings(
-    batch_size = 128,
-    learning_rate = 0.1 / 128.0,
-    learning_method = MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
-)
+    batch_size=128,
+    learning_rate=0.1 / 128.0,
+    learning_method=MomentumOptimizer(0.9),
+    regularization=L2Regularization(0.0005 * 128))

 #######################Network Configuration #############

-data_size=1*28*28
-label_size=10
+data_size = 1 * 28 * 28
+label_size = 10
 img = data_layer(name='pixel', size=data_size)

 # small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
-                    num_channels=1,
-                    num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)

 if not is_predict:
    lbl = data_layer(name="label", size=label_size)
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
    python extract_para.py --preModel PREMODEL --preDict PREDICT \
@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct

+
 def get_row_index(preDict, usrDict):
    """
    Get the row positions for all words in user dictionary from pre-trained dictionary.
@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
            pos.append(index[word])
    return pos

-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+                                  paraDim):
    """
    Extract desired parameters from a pretrained embedding model based on user dictionary
    """
@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
    print "extract parameters finish, total", len(rowIndex), "lines"
    fi.close()

+
 def main():
    """
    Main entry for running paraconvert.py 
@ -78,19 +81,33 @@ def main():
            "python %prog --preModel PREMODEL --preDict PREDICT" \
            " --usrModel USRMODEL --usrDict USRDICT -d DIM"
    parser = OptionParser(usage)
-    parser.add_option("--preModel", action="store", dest="preModel",
+    parser.add_option(
+        "--preModel",
+        action="store",
+        dest="preModel",
        help="the name of pretrained embedding model")
-    parser.add_option("--preDict", action="store", dest="preDict",
+    parser.add_option(
+        "--preDict",
+        action="store",
+        dest="preDict",
        help="the name of pretrained dictionary")
-    parser.add_option("--usrModel", action="store", dest="usrModel",
+    parser.add_option(
+        "--usrModel",
+        action="store",
+        dest="usrModel",
        help="the name of output usr embedding model")
-    parser.add_option("--usrDict", action="store", dest="usrDict",
+    parser.add_option(
+        "--usrDict",
+        action="store",
+        dest="usrDict",
        help="the name of user specified dictionary")
-    parser.add_option("-d", action="store", dest="dim",
-                      help="dimension of parameter")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
    extract_parameters_by_usrDict(options.preModel, options.preDict,
-                      options.usrModel, options.usrDict, int(options.dim))
+                                  options.usrModel, options.usrDict,
+                                  int(options.dim))
+

 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct

+
 def binary2text(input, output, paraDim):
    """
    Convert a binary parameter file of embedding model to be a text file.  
@ -76,6 +76,7 @@ def binary2text(input, output, paraDim):
    fo.close()
    print "binary2text finish, total", line, "lines"

+
 def get_para_count(input):
    """
    Compute the total number of embedding parameters in input text file. 
@ -90,6 +91,7 @@ def get_para_count(input):
            numRows += 1
    return numRows * paraDim

+
 def text2binary(input, output, paddle_head=True):
    """
    Convert a text parameter file of embedding model to be a binary file.
@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
    fo.close()
    print "text2binary finish, total", count, "lines"

+
 def main():
    """
    Main entry for running paraconvert.py 
@ -131,21 +134,26 @@ def main():
            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
            "python %prog --t2b -i INPUT -o OUTPUT"
    parser = OptionParser(usage)
-    parser.add_option("--b2t", action="store_true",
+    parser.add_option(
+        "--b2t",
+        action="store_true",
        help="convert parameter file of embedding model from binary to text")
-    parser.add_option("--t2b", action="store_true",
+    parser.add_option(
+        "--t2b",
+        action="store_true",
        help="convert parameter file of embedding model from text to binary")
-    parser.add_option("-i", action="store", dest="input",
-                      help="input parameter file name")
-    parser.add_option("-o", action="store", dest="output",
-                      help="output parameter file name")
-    parser.add_option("-d", action="store", dest="dim",
-                      help="dimension of parameter")
+    parser.add_option(
+        "-i", action="store", dest="input", help="input parameter file name")
+    parser.add_option(
+        "-o", action="store", dest="output", help="output parameter file name")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
    if options.b2t:
        binary2text(options.input, options.output, options.dim)
    if options.t2b:
        text2binary(options.input, options.output)

+
 if __name__ == '__main__':
    main()
--- a/Show More
+++ b/Show More