fix conflicts

9 years ago · e749e7b10a
parent 6502614089 d0a908d5f5
commit e749e7b10a
291 changed files with 14536 additions and 9616 deletions
--- a/.clang-format
+++ b/.clang-format
@ -13,8 +13,6 @@
 # The document of clang-format is 
 #   http://clang.llvm.org/docs/ClangFormat.html
 #   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
-#
-# TODO(yuyang18): Add python and other language code style
 ---
 Language:        Cpp
 BasedOnStyle:  Google
@ -22,8 +20,9 @@ IndentWidth:     2
 TabWidth:        2
 ContinuationIndentWidth: 4
 AccessModifierOffset: -2  # The private/protected/public has no indent in class
-PointerAlignment: Left    # int* p/int& p, not int *p/int &p
 Standard:  Cpp11 
 AllowAllParametersOfDeclarationOnNextLine: true
+BinPackParameters: false
+BinPackArguments: false
 ...

--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,6 @@ build/
 .vscode
 .idea
 .project
+.cproject
 .pydevproject
+Makefile
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,24 @@
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    sha: c25201a00e6b0514370501050cf2a8538ac12270
+    hooks:
+    -   id: remove-crlf
+-   repo: https://github.com/reyoung/mirrors-yapf.git
+    sha: v0.13.2
+    hooks:
+    -   id: yapf
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: 4ef03c4223ad322c7adaa6c6c0efb26b57df3b71
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+    -   id: end-of-file-fixer
+# TODO(yuyang): trailing whitespace has some bugs on markdown 
+# files now, please not add it to pre-commit hook now
+#    -   id: trailing-whitespace
+#
+# TODO(yuyang): debug-statements not fit for Paddle, because
+# not all of our python code is runnable. Some are used for 
+# documenation
+#    -   id: debug-statements
--- a/.style.yapf
+++ b/.style.yapf
@ -0,0 +1,3 @@
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -109,11 +109,9 @@ else()
    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math")

    if(WITH_AVX)
-        if(AVX_FOUND)
-            set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -mavx")
-        endif(AVX_FOUND)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
    else(WITH_AVX)
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -msse3")
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
    endif(WITH_AVX)

    if(WITH_DSO)
@ -138,11 +136,11 @@ if(NOT WITH_TIMER)
 endif(NOT WITH_TIMER)

 if(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
 else(WITH_AVX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
 endif(WITH_AVX)

 if(WITH_PYTHON)
--- a/cmake/FindAVX.cmake
+++ b/cmake/FindAVX.cmake
@ -3,36 +3,55 @@

 INCLUDE(CheckCXXSourceRuns)

-SET(FIND_AVX_10)
-SET(FIND_AVX_20)
-SET(AVX_FLAGS)
-SET(AVX_FOUND)
-
-# Check AVX 2
-SET(CMAKE_REQUIRED_FLAGS)
 IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-  SET(CMAKE_REQUIRED_FLAGS "-mavx2")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)  # reserve for WINDOWS
-  SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
+    set(MMX_FLAG "-mmmx")
+    set(SSE2_FLAG "-msse2")
+    set(SSE3_FLAG "-msse3")
+    SET(AVX_FLAG "-mavx")
+    SET(AVX2_FLAG "-mavx2")
+ELSEIF(MSVC)
+    set(MMX_FLAG "/arch:MMX")
+    set(SSE2_FLAG "/arch:SSE2")
+    set(SSE3_FLAG "/arch:SSE3")
+    SET(AVX_FLAG "/arch:AVX")
+    SET(AVX2_FLAG "/arch:AVX2")
 ENDIF()

+# Check  MMX
+set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
-#include <immintrin.h>
+#include <mmintrin.h>
 int main()
 {
-    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
-    __m256i result = _mm256_abs_epi32 (a);
+    _mm_setzero_si64();
    return 0;
-}" FIND_AVX_20)
+}" MMX_FOUND)

-# Check AVX
-SET(CMAKE_REQUIRED_FLAGS)
-IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    SET(CMAKE_REQUIRED_FLAGS "-mavx")
-ELSEIF(MSVC AND NOT CMAKE_CL_64)
-    SET(CMAKE_REQUIRED_FLAGS "/arch:AVX")
-endif()
+# Check SSE2
+set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <emmintrin.h>
+int main()
+{
+    _mm_setzero_si128();
+    return 0;
+}" SSE2_FOUND)

+# Check SSE3
+set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <pmmintrin.h>
+int main()
+{
+    __m128d a = _mm_set1_pd(6.28);
+    __m128d b = _mm_set1_pd(3.14);
+    __m128d result = _mm_addsub_pd(a, b);
+    result = _mm_movedup_pd(result);
+    return 0;
+}" SSE3_FOUND)
+
+# Check AVX
+set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
 CHECK_CXX_SOURCE_RUNS("
 #include <immintrin.h>
 int main()
@ -41,25 +60,17 @@ int main()
    __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
    __m256 result = _mm256_add_ps (a, b);
    return 0;
-}" FIND_AVX_10)
+}" AVX_FOUND)

-IF(${FIND_AVX_20})
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx2")
-    ELSEIF(MSVC)
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
-    ENDIF()
-ENDIF()
-
-IF(${FIND_AVX_10})
-    IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-        SET(AVX_FLAGS "${AVX_FLAGS} -mavx")
-    ELSEIF(MSVC)
-        SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
-    ENDIF()
-ENDIF()
+# Check AVX 2
+set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
+CHECK_CXX_SOURCE_RUNS("
+#include <immintrin.h>
+int main()
+{
+    __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
+    __m256i result = _mm256_abs_epi32 (a);
+    return 0;
+}" AVX2_FOUND)

-IF(${FIND_AVX_10})
-    SET(AVX_FOUND TRUE)
-    MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.")
-ENDIF()
+mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)
--- a/demo/image_classification/.gitignore
+++ b/demo/image_classification/.gitignore
@ -5,3 +5,5 @@ plot.png
 train.log
 image_provider_copy_1.py
 *pyc
+train.list
+test.list
--- a/demo/image_classification/data/download_cifar.sh
+++ b/demo/image_classification/data/download_cifar.sh
--- a/demo/image_classification/data/process_cifar.py
+++ b/demo/image_classification/data/process_cifar.py
@ -16,7 +16,6 @@ import numpy as np
 import sys
 import os
 import PIL.Image as Image
-
 """
  Usage: python process_cifar input_dir output_dir
 """
@ -30,6 +29,7 @@ def mkdir_not_exist(path):
    if not os.path.exists(path):
        os.mkdir(path)

+
 def create_dir_structure(output_dir):
    """
    Create the directory structure for the directory.
@ -39,8 +39,8 @@ def create_dir_structure(output_dir):
    mkdir_not_exist(os.path.join(output_dir, "train"))
    mkdir_not_exist(os.path.join(output_dir, "test"))

-def convert_batch(batch_path, label_set, label_map,
-                  output_dir, data_split):
+
+def convert_batch(batch_path, label_set, label_map, output_dir, data_split):
    """
    Convert CIFAR batch to the structure of Paddle format.
    batch_path: the batch to be converted.
@ -67,11 +67,23 @@ if __name__ == '__main__':
    output_dir = sys.argv[2]
    num_batch = 5
    create_dir_structure(output_dir)
-    label_map = {0: "airplane", 1: "automobile", 2: "bird", 3: "cat", 4: "deer",
-                 5: "dog", 6: "frog", 7: "horse", 8: "ship", 9: "truck"}
+    label_map = {
+        0: "airplane",
+        1: "automobile",
+        2: "bird",
+        3: "cat",
+        4: "deer",
+        5: "dog",
+        6: "frog",
+        7: "horse",
+        8: "ship",
+        9: "truck"
+    }
    labels = {}
    for i in range(1, num_batch + 1):
-        convert_batch(os.path.join(input_dir, "data_batch_%d" % i), labels,
-                      label_map, output_dir, "train")
-    convert_batch(os.path.join(input_dir, "test_batch"), {},
-                  label_map, output_dir, "test")
+        convert_batch(
+            os.path.join(input_dir, "data_batch_%d" % i), labels, label_map,
+            output_dir, "train")
+    convert_batch(
+        os.path.join(input_dir, "test_batch"), {}, label_map, output_dir,
+        "test")
--- a/demo/image_classification/image_provider.py
+++ b/demo/image_classification/image_provider.py
@ -46,26 +46,31 @@ def hook(settings, img_size, mean_img_size, num_classes, color, meta, use_jpeg,

    settings.img_mean = image_util.load_meta(settings.meta_path,
                                             settings.mean_img_size,
-                                             settings.img_size,
-                                             settings.color)
+                                             settings.img_size, settings.color)

    settings.logger.info('Image size: %s', settings.img_size)
    settings.logger.info('Meta path: %s', settings.meta_path)
    settings.input_types = [
        dense_vector(settings.img_raw_size),  # image feature
-        integer_value(settings.num_classes)]  # labels
+        integer_value(settings.num_classes)
+    ]  # labels

    settings.logger.info('DataProvider Initialization finished')


-@provider(init_hook=hook)
-def processData(settings, file_name):
+@provider(init_hook=hook, min_pool_size=0)
+def processData(settings, file_list):
    """
    The main function for loading data.
    Load the batch, iterate all the images and labels in this batch.
-    file_name: the batch file name.
+    file_list: the batch file list.
    """
-    data = cPickle.load(io.open(file_name, 'rb'))
+    with open(file_list, 'r') as fdata:
+        lines = [line.strip() for line in fdata]
+        random.shuffle(lines)
+        for file_name in lines:
+            with io.open(file_name.strip(), 'rb') as file:
+                data = cPickle.load(file)
                indexes = list(range(len(data['images'])))
                if settings.is_train:
                    random.shuffle(indexes)
@ -74,8 +79,8 @@ def processData(settings, file_name):
                        img = image_util.decode_jpeg(data['images'][i])
                    else:
                        img = data['images'][i]
-        img_feat = image_util.preprocess_img(img, settings.img_mean,
-                                             settings.img_size, settings.is_train,
-                                             settings.color)
+                    img_feat = image_util.preprocess_img(
+                        img, settings.img_mean, settings.img_size,
+                        settings.is_train, settings.color)
                    label = data['labels'][i]
-        yield img_feat.tolist(), int(label)
+                    yield img_feat.astype('float32'), int(label)
--- a/demo/image_classification/image_util.py
+++ b/demo/image_classification/image_util.py
@ -16,6 +16,7 @@ import numpy as np
 from PIL import Image
 from cStringIO import StringIO

+
 def resize_image(img, target_size):
    """
    Resize an image so that the shorter edge has length target_size.
@ -23,10 +24,12 @@ def resize_image(img, target_size):
    target_size: the target resized image size.
    """
    percent = (target_size / float(min(img.size[0], img.size[1])))
-    resized_size = int(round(img.size[0] * percent)), int(round(img.size[1] * percent))
+    resized_size = int(round(img.size[0] * percent)), int(
+        round(img.size[1] * percent))
    img = img.resize(resized_size, Image.ANTIALIAS)
    return img

+
 def flip(im):
    """
    Return the flipped image.
@ -38,6 +41,7 @@ def flip(im):
    else:
        return im[:, ::-1]

+
 def crop_img(im, inner_size, color=True, test=True):
    """
    Return cropped image.
@ -50,7 +54,8 @@ def crop_img(im, inner_size, color=True, test=True):
      If True, crop the center of images.
    """
    if color:
-        height, width = max(inner_size, im.shape[1]), max(inner_size, im.shape[2])
+        height, width = max(inner_size, im.shape[1]), max(inner_size,
+                                                          im.shape[2])
        padded_im = np.zeros((3, height, width))
        startY = (height - im.shape[1]) / 2
        startX = (width - im.shape[2]) / 2
@ -58,7 +63,8 @@ def crop_img(im, inner_size, color=True, test=True):
        padded_im[:, startY:endY, startX:endX] = im
    else:
        im = im.astype('float32')
-        height, width = max(inner_size, im.shape[0]), max(inner_size, im.shape[1])
+        height, width = max(inner_size, im.shape[0]), max(inner_size,
+                                                          im.shape[1])
        padded_im = np.zeros((height, width))
        startY = (height - im.shape[0]) / 2
        startX = (width - im.shape[1]) / 2
@ -79,12 +85,14 @@ def crop_img(im, inner_size, color=True, test=True):
        pic = flip(pic)
    return pic

+
 def decode_jpeg(jpeg_string):
    np_array = np.array(Image.open(StringIO(jpeg_string)))
    if len(np_array.shape) == 3:
        np_array = np.transpose(np_array, (2, 0, 1))
    return np_array

+
 def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    """
    Does data augmentation for images.
@ -99,6 +107,7 @@ def preprocess_img(im, img_mean, crop_size, is_train, color=True):
    pic -= img_mean
    return pic.flatten()

+
 def load_meta(meta_path, mean_img_size, crop_size, color=True):
    """
    Return the loaded meta file.
@ -111,15 +120,16 @@ def load_meta(meta_path, mean_img_size, crop_size, color=True):
    if color:
        assert (mean_img_size * mean_img_size * 3 == mean.shape[0])
        mean = mean.reshape(3, mean_img_size, mean_img_size)
-        mean = mean[:, border: border + crop_size,
-                       border: border + crop_size].astype('float32')
+        mean = mean[:, border:border + crop_size, border:border +
+                    crop_size].astype('float32')
    else:
        assert (mean_img_size * mean_img_size == mean.shape[0])
        mean = mean.reshape(mean_img_size, mean_img_size)
-        mean = mean[border: border + crop_size,
-                    border: border + crop_size].astype('float32')
+        mean = mean[border:border + crop_size, border:border +
+                    crop_size].astype('float32')
    return mean

+
 def load_image(img_path, is_color=True):
    """
    Load image and return. 
@ -130,6 +140,7 @@ def load_image(img_path, is_color=True):
    img.load()
    return img

+
 def oversample(img, crop_dims):
    """
    image : iterable of (H x W x K) ndarrays
@ -152,15 +163,14 @@ def oversample(img, crop_dims):
        for j in w_indices:
            crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
            curr += 1
-    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
-        -crop_dims / 2.0,
-         crop_dims / 2.0
-    ])
+    crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate(
+        [-crop_dims / 2.0, crop_dims / 2.0])
    crops_ix = np.tile(crops_ix, (2, 1))

    # Extract crops
-    crops = np.empty((10 * len(img), crop_dims[0], crop_dims[1],
-                      im_shape[-1]), dtype=np.float32)
+    crops = np.empty(
+        (10 * len(img), crop_dims[0], crop_dims[1], im_shape[-1]),
+        dtype=np.float32)
    ix = 0
    for im in img:
        for crop in crops_ix:
@ -169,9 +179,13 @@ def oversample(img, crop_dims):
        crops[ix - 5:ix] = crops[ix - 5:ix, :, ::-1, :]  # flip for mirrors
    return crops

+
 class ImageTransformer:
-    def __init__(self, transpose = None,
-                 channel_swap = None, mean = None, is_color = True):
+    def __init__(self,
+                 transpose=None,
+                 channel_swap=None,
+                 mean=None,
+                 is_color=True):
        self.transpose = transpose
        self.channel_swap = None
        self.mean = None
--- a/demo/image_classification/prediction.py
+++ b/demo/image_classification/prediction.py
@ -24,9 +24,11 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config

-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)

+
 class ImageClassifier():
    def __init__(self,
                 train_conf,
@ -69,7 +71,8 @@ class ImageClassifier():
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (gpu)
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (gpu))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)

@ -90,14 +93,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)

        data_in = []
@ -133,6 +136,7 @@ class ImageClassifier():
        lab = np.argsort(-prob)
        logging.info("Label of %s is: %d", image, lab[0])

+
 if __name__ == '__main__':
    image_size = 32
    crop_size = 32
@ -144,7 +148,8 @@ if __name__ == '__main__':
    image = sys.argv[2]
    use_gpu = bool(int(sys.argv[3]))

-    obj = ImageClassifier(train_conf=config,
+    obj = ImageClassifier(
+        train_conf=config,
        model_dir=model_path,
        resize_dim=image_size,
        crop_dim=crop_size,
--- a/demo/image_classification/preprocess.py
+++ b/demo/image_classification/preprocess.py
@ -19,22 +19,36 @@ from optparse import OptionParser
 def option_parser():
    parser = OptionParser(usage="usage: python preprcoess.py "\
                          "-i data_dir [options]")
-    parser.add_option("-i", "--input", action="store",
-                      dest="input", help="Input data directory.")
-    parser.add_option("-s", "--size", action="store",
-                      dest="size", help="Processed image size.")
-    parser.add_option("-c", "--color", action="store",
-                      dest="color", help="whether to use color images.")
+    parser.add_option(
+        "-i",
+        "--input",
+        action="store",
+        dest="input",
+        help="Input data directory.")
+    parser.add_option(
+        "-s",
+        "--size",
+        action="store",
+        dest="size",
+        help="Processed image size.")
+    parser.add_option(
+        "-c",
+        "--color",
+        action="store",
+        dest="color",
+        help="whether to use color images.")
    return parser.parse_args()

+
 if __name__ == '__main__':
    options, args = option_parser()
    data_dir = options.input
    processed_image_size = int(options.size)
    color = options.color == "1"
-     data_creator = ImageClassificationDatasetCreater(data_dir,
-                                                      processed_image_size,
-                                                      color)
+    data_creator = ImageClassificationDatasetCreater(
+        data_dir, processed_image_size, color)
+    data_creator.train_list_name = "train.txt"
+    data_creator.test_list_name = "test.txt"
    data_creator.num_per_batch = 1000
    data_creator.overwrite = True
    data_creator.create_batches()
--- a/demo/image_classification/preprocess.sh
+++ b/demo/image_classification/preprocess.sh
@ -17,3 +17,6 @@ set -e
 data_dir=./data/cifar-out

 python preprocess.py -i $data_dir -s 32 -c 1
+
+echo "data/cifar-out/batches/train.txt" > train.list
+echo "data/cifar-out/batches/test.txt" > test.list
--- a/demo/image_classification/vgg_16_cifar.py
+++ b/demo/image_classification/vgg_16_cifar.py
@ -21,12 +21,18 @@ if not is_predict:
    data_dir = 'data/cifar-out/batches/'
    meta_path = data_dir + 'batches.meta'

-  args = {'meta':meta_path,'mean_img_size': 32,
-          'img_size': 32,'num_classes': 10,
-          'use_jpeg': 1,'color': "color"}
-
-  define_py_data_sources2(train_list=data_dir+"train.list",
-                          test_list=data_dir+'test.list',
+    args = {
+        'meta': meta_path,
+        'mean_img_size': 32,
+        'img_size': 32,
+        'num_classes': 10,
+        'use_jpeg': 1,
+        'color': "color"
+    }
+
+    define_py_data_sources2(
+        train_list="train.list",
+        test_list="train.list",
        module='image_provider',
        obj='processData',
        args=args)
@ -36,18 +42,14 @@ settings(
    batch_size=128,
    learning_rate=0.1 / 128.0,
    learning_method=MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
-)
+    regularization=L2Regularization(0.0005 * 128))

 #######################Network Configuration #############
 data_size = 3 * 32 * 32
 label_size = 10
-img = data_layer(name='image',
-                 size=data_size)
+img = data_layer(name='image', size=data_size)
 # small_vgg is predefined in trainer_config_helpers.networks
-predict = small_vgg(input_image=img,
-                    num_channels=3,
-                    num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=3, num_classes=label_size)

 if not is_predict:
    lbl = data_layer(name="label", size=label_size)
--- a/demo/introduction/README.md
+++ b/demo/introduction/README.md
@ -1,4 +1,3 @@
 This folder contains scripts used in PaddlePaddle introduction.
 - use `bash train.sh` to train a simple linear regression model
 - use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].
-
--- a/demo/introduction/dataprovider.py
+++ b/demo/introduction/dataprovider.py
@ -15,10 +15,10 @@
 from paddle.trainer.PyDataProvider2 import *
 import random

+
 # define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)], use_seq=False)
 def process(settings, input_file):
    for i in xrange(2000):
        x = random.random()
        yield [x], [2 * x + 0.3]
-
--- a/demo/introduction/evaluate_model.py
+++ b/demo/introduction/evaluate_model.py
@ -23,14 +23,17 @@ Usage:
 import numpy as np
 import os

+
 def load(file_name):
    with open(file_name, 'rb') as f:
        f.read(16)  # skip header for float type.
        return np.fromfile(f, dtype=np.float32)

+
 def main():
    print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
                                           load('output/pass-00029/b'))

+
 if __name__ == '__main__':
    main()
--- a/demo/introduction/trainer_config.py
+++ b/demo/introduction/trainer_config.py
@ -16,9 +16,14 @@ from paddle.trainer_config_helpers import *

 # 1. read data. Suppose you saved above python code as dataprovider.py
 data_file = 'empty.list'
-with open(data_file, 'w') as f: f.writelines(' ')
-define_py_data_sources2(train_list=data_file, test_list=None, 
-        module='dataprovider', obj='process',args={})
+with open(data_file, 'w') as f:
+    f.writelines(' ')
+define_py_data_sources2(
+    train_list=data_file,
+    test_list=None,
+    module='dataprovider',
+    obj='process',
+    args={})

 # 2. learning algorithm
 settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
@ -26,7 +31,11 @@ settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
 # 3. Network configuration
 x = data_layer(name='x', size=1)
 y = data_layer(name='y', size=1)
-y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
+y_predict = fc_layer(
+    input=x,
+    param_attr=ParamAttr(name='w'),
+    size=1,
+    act=LinearActivation(),
+    bias_attr=ParamAttr(name='b'))
 cost = regression_cost(input=y_predict, label=y)
 outputs(cost)
-
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@ -19,4 +19,3 @@ done
 cd $DIR
 rm -f *.list
 python generate_list.py
-
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@ -2,10 +2,9 @@ from paddle.trainer.PyDataProvider2 import *


 # Define a py data provider
-@provider(input_types={
-    'pixel': dense_vector(28 * 28),
-    'label': integer_value(10)
-})
+@provider(
+    input_types={'pixel': dense_vector(28 * 28),
+                 'label': integer_value(10)})
 def process(settings, filename):  # settings is not used currently.
    imgf = filename + "-images-idx3-ubyte"
    labelf = filename + "-labels-idx1-ubyte"
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@ -18,10 +18,10 @@ is_predict = get_config_arg("is_predict", bool, False)

 ####################Data Configuration ##################

-
 if not is_predict:
    data_dir = './data/'
-  define_py_data_sources2(train_list= data_dir + 'train.list',
+    define_py_data_sources2(
+        train_list=data_dir + 'train.list',
        test_list=data_dir + 'test.list',
        module='mnist_provider',
        obj='process')
@ -31,8 +31,7 @@ settings(
    batch_size=128,
    learning_rate=0.1 / 128.0,
    learning_method=MomentumOptimizer(0.9),
-    regularization = L2Regularization(0.0005 * 128)
-)
+    regularization=L2Regularization(0.0005 * 128))

 #######################Network Configuration #############

@ -41,9 +40,7 @@ label_size=10
 img = data_layer(name='pixel', size=data_size)

 # small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img,
-                    num_channels=1,
-                    num_classes=label_size)
+predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)

 if not is_predict:
    lbl = data_layer(name="label", size=label_size)
--- a/demo/model_zoo/embedding/extract_para.py
+++ b/demo/model_zoo/embedding/extract_para.py
@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
    python extract_para.py --preModel PREMODEL --preDict PREDICT \
@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct

+
 def get_row_index(preDict, usrDict):
    """
    Get the row positions for all words in user dictionary from pre-trained dictionary.
@ -47,7 +47,9 @@ def get_row_index(preDict, usrDict):
            pos.append(index[word])
    return pos

-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim):
+
+def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
+                                  paraDim):
    """
    Extract desired parameters from a pretrained embedding model based on user dictionary
    """
@ -70,6 +72,7 @@ def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict, paraDim)
    print "extract parameters finish, total", len(rowIndex), "lines"
    fi.close()

+
 def main():
    """
    Main entry for running paraconvert.py 
@ -78,19 +81,33 @@ def main():
            "python %prog --preModel PREMODEL --preDict PREDICT" \
            " --usrModel USRMODEL --usrDict USRDICT -d DIM"
    parser = OptionParser(usage)
-    parser.add_option("--preModel", action="store", dest="preModel",
+    parser.add_option(
+        "--preModel",
+        action="store",
+        dest="preModel",
        help="the name of pretrained embedding model")
-    parser.add_option("--preDict", action="store", dest="preDict",
+    parser.add_option(
+        "--preDict",
+        action="store",
+        dest="preDict",
        help="the name of pretrained dictionary")
-    parser.add_option("--usrModel", action="store", dest="usrModel",
+    parser.add_option(
+        "--usrModel",
+        action="store",
+        dest="usrModel",
        help="the name of output usr embedding model")
-    parser.add_option("--usrDict", action="store", dest="usrDict",
+    parser.add_option(
+        "--usrDict",
+        action="store",
+        dest="usrDict",
        help="the name of user specified dictionary")
-    parser.add_option("-d", action="store", dest="dim",
-                      help="dimension of parameter")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
    extract_parameters_by_usrDict(options.preModel, options.preDict,
-                      options.usrModel, options.usrDict, int(options.dim))
+                                  options.usrModel, options.usrDict,
+                                  int(options.dim))
+

 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/embedding/paraconvert.py
+++ b/demo/model_zoo/embedding/paraconvert.py
@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Example:
    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
@ -29,6 +28,7 @@ Options:
 from optparse import OptionParser
 import struct

+
 def binary2text(input, output, paraDim):
    """
    Convert a binary parameter file of embedding model to be a text file.  
@ -76,6 +76,7 @@ def binary2text(input, output, paraDim):
    fo.close()
    print "binary2text finish, total", line, "lines"

+
 def get_para_count(input):
    """
    Compute the total number of embedding parameters in input text file. 
@ -90,6 +91,7 @@ def get_para_count(input):
            numRows += 1
    return numRows * paraDim

+
 def text2binary(input, output, paddle_head=True):
    """
    Convert a text parameter file of embedding model to be a binary file.
@ -123,6 +125,7 @@ def text2binary(input, output, paddle_head=True):
    fo.close()
    print "text2binary finish, total", count, "lines"

+
 def main():
    """
    Main entry for running paraconvert.py 
@ -131,21 +134,26 @@ def main():
            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
            "python %prog --t2b -i INPUT -o OUTPUT"
    parser = OptionParser(usage)
-    parser.add_option("--b2t", action="store_true",
+    parser.add_option(
+        "--b2t",
+        action="store_true",
        help="convert parameter file of embedding model from binary to text")
-    parser.add_option("--t2b", action="store_true",
+    parser.add_option(
+        "--t2b",
+        action="store_true",
        help="convert parameter file of embedding model from text to binary")
-    parser.add_option("-i", action="store", dest="input",
-                      help="input parameter file name")
-    parser.add_option("-o", action="store", dest="output",
-                      help="output parameter file name")
-    parser.add_option("-d", action="store", dest="dim",
-                      help="dimension of parameter")
+    parser.add_option(
+        "-i", action="store", dest="input", help="input parameter file name")
+    parser.add_option(
+        "-o", action="store", dest="output", help="output parameter file name")
+    parser.add_option(
+        "-d", action="store", dest="dim", help="dimension of parameter")
    (options, args) = parser.parse_args()
    if options.b2t:
        binary2text(options.input, options.output, options.dim)
    if options.t2b:
        text2binary(options.input, options.output)

+
 if __name__ == '__main__':
    main()
--- a/demo/model_zoo/resnet/classify.py
+++ b/demo/model_zoo/resnet/classify.py
@ -26,16 +26,22 @@ from py_paddle import swig_paddle, DataProviderConverter
 from paddle.trainer.PyDataProvider2 import dense_vector
 from paddle.trainer.config_parser import parse_config

-logging.basicConfig(format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
+logging.basicConfig(
+    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
 logging.getLogger().setLevel(logging.INFO)

+
 class ImageClassifier():
-    def __init__(self, train_conf, model_dir=None,
-                 resize_dim=256, crop_dim=224,
+    def __init__(self,
+                 train_conf,
+                 model_dir=None,
+                 resize_dim=256,
+                 crop_dim=224,
                 use_gpu=True,
                 mean_file=None,
                 output_layer=None,
-                 oversample=False, is_color=True):
+                 oversample=False,
+                 is_color=True):
        """
        train_conf: network configure.
        model_dir: string, directory of model.
@ -79,7 +85,8 @@ class ImageClassifier():
        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
        conf = parse_config(train_conf, conf_args)
        swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
+        self.network = swig_paddle.GradientMachine.createFromConfigProto(
+            conf.model_config)
        assert isinstance(self.network, swig_paddle.GradientMachine)
        self.network.loadParameters(self.model_dir)

@ -105,14 +112,14 @@ class ImageClassifier():
            # image_util.resize_image: short side is self.resize_dim
            image = image_util.resize_image(image, self.resize_dim)
            image = np.array(image)
-            input = np.zeros((1, image.shape[0], image.shape[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
            input[0] = image.astype(np.float32)
            input = image_util.oversample(input, self.crop_dims)
        else:
            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros((1, self.crop_dims[0], self.crop_dims[1], 3),
-                             dtype=np.float32)
+            input = np.zeros(
+                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
            input[0] = np.array(image).astype(np.float32)

        data_in = []
@ -215,37 +222,63 @@ class ImageClassifier():
        of = open(file, 'wb')
        cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)

+
 def option_parser():
    """
    Main entry for predciting
    """
    usage = "%prog -c config -i data_list -w model_dir [options]"
    parser = OptionParser(usage="usage: %s" % usage)
-    parser.add_option("-j", "--job",
-                      action="store", dest="job_type",
+    parser.add_option(
+        "-j",
+        "--job",
+        action="store",
+        dest="job_type",
        help="job type: predict, extract\
                            predict: predicting,\
                            extract: extract features")
-    parser.add_option("-c", "--conf",
-                      action="store", dest="train_conf",
+    parser.add_option(
+        "-c",
+        "--conf",
+        action="store",
+        dest="train_conf",
        help="network config")
-    parser.add_option("-i", "--data",
-                      action="store", dest="data_file",
-                      help="image list")
-    parser.add_option("-w", "--model",
-                      action="store", dest="model_path",
-                      default=None, help="model path")
-    parser.add_option("-g", "--use_gpu", action="store",
-                      dest="use_gpu", default=True,
+    parser.add_option(
+        "-i", "--data", action="store", dest="data_file", help="image list")
+    parser.add_option(
+        "-w",
+        "--model",
+        action="store",
+        dest="model_path",
+        default=None,
+        help="model path")
+    parser.add_option(
+        "-g",
+        "--use_gpu",
+        action="store",
+        dest="use_gpu",
+        default=True,
        help="Whether to use gpu mode.")
-    parser.add_option("-o", "--output_dir",
-                      action="store", dest="output_dir",
-                      default="output", help="output path")
-    parser.add_option("-m", "--mean", action="store",
-                      dest="mean", default=None,
+    parser.add_option(
+        "-o",
+        "--output_dir",
+        action="store",
+        dest="output_dir",
+        default="output",
+        help="output path")
+    parser.add_option(
+        "-m",
+        "--mean",
+        action="store",
+        dest="mean",
+        default=None,
        help="mean file.")
-    parser.add_option("-p", "--multi_crop", action="store_true",
-                      dest="multi_crop", default=False,
+    parser.add_option(
+        "-p",
+        "--multi_crop",
+        action="store_true",
+        dest="multi_crop",
+        default=False,
        help="Wether to use multiple crops on image.")
    parser.add_option("-l", "--output_layer", action="store",
                      dest="output_layer", default=None,
@ -254,13 +287,15 @@ def option_parser():
                           "classification probability, output in resnet.py.")
    return parser.parse_args()

+
 def main():
    """
    1. parse input arguments.
    2. predicting or extract features according job type.
    """
    options, args = option_parser()
-    obj = ImageClassifier(options.train_conf,
+    obj = ImageClassifier(
+        options.train_conf,
        options.model_path,
        use_gpu=options.use_gpu,
        mean_file=options.mean,
@ -270,8 +305,8 @@ def main():
        obj.predict(options.data_file)

    elif options.job_type == "extract":
-        obj.extract(options.data_file,
-                    options.output_dir)
+        obj.extract(options.data_file, options.output_dir)
+

 if __name__ == '__main__':
    main()
--- a/Show More
+++ b/Show More