diff --git a/.copyright.hook b/.copyright.hook
new file mode 100644
index 0000000000..2446e27248
--- /dev/null
+++ b/.copyright.hook
@@ -0,0 +1,106 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import io, re
+import sys, os
+import subprocess
+import platform
+
+COPYRIGHT = '''
+  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+LANG_COMMENT_MARK = None
+
+NEW_LINE_MARK = None
+
+COPYRIGHT_HEADER = None
+
+if platform.system() == "Windows":
+    NEW_LINE_MARK = "\r\n"
+else:
+    NEW_LINE_MARK = '\n'
+    COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
+    p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
+    process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
+    date, err = process.communicate()
+    date = date.decode("utf-8").rstrip("\n")
+    COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)
+
+
+def generate_copyright(template, lang='C'):
+    if lang == 'Python':
+        LANG_COMMENT_MARK = '#'
+    else:
+        LANG_COMMENT_MARK = "//"
+
+    lines = template.split(NEW_LINE_MARK)
+    ans = LANG_COMMENT_MARK + " " + COPYRIGHT_HEADER + NEW_LINE_MARK
+    for lino, line in enumerate(lines):
+        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
+        ans += LANG_COMMENT_MARK + " " + line + NEW_LINE_MARK
+
+    return ans + "\n"
+
+
+def lang_type(filename):
+    if filename.endswith(".py"):
+        return "Python"
+    elif filename.endswith(".h"):
+        return "C"
+    elif filename.endswith(".hpp"):
+        return "C"
+    elif filename.endswith(".cc"):
+        return "C"
+    elif filename.endswith(".cpp"):
+        return "C"
+    elif filename.endswith(".cu"):
+        return "C"
+    elif filename.endswith(".cuh"):
+        return "C"
+    elif filename.endswith(".go"):
+        return "C"
+    elif filename.endswith(".proto"):
+        return "C"
+    else:
+        print("Unsupported filetype")
+        exit(0)
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(
+        description='Checker for copyright declaration.')
+    parser.add_argument('filenames', nargs='*', help='Filenames to check')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        first_line = io.open(filename).readline()
+        if "COPYRIGHT" in first_line.upper() : continue
+        original_contents = io.open(filename).read()
+        new_contents = generate_copyright(
+            COPYRIGHT, lang_type(filename)) + original_contents
+        print('Auto Insert Copyright Header {}'.format(filename))
+        retv = 1
+        with io.open(filename, 'w') as output_file:
+            output_file.write(new_contents)
+
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 59661c9c1d..89c620bb2f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,3 +31,11 @@
     -   id: go-fmt
         types:
         - go
+-   repo: local
+    hooks:
+    -   id: copyright_checker
+        name: copyright_checker
+        entry: python ./.copyright.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000..54131b48ec
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,46 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at paddle-dev@baidu.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/4/
diff --git a/CODE_OF_CONDUCT_cn.md b/CODE_OF_CONDUCT_cn.md
new file mode 100644
index 0000000000..c3a22f29cc
--- /dev/null
+++ b/CODE_OF_CONDUCT_cn.md
@@ -0,0 +1,50 @@
+# 貢獻者公約
+
+## 我們的承諾
+
+為了促進一個開放透明且受歡迎的環境，我們作為貢獻者和維護者保證，無論年齡、種族、民族、性別認同和表達、體型、殘疾、經驗水平、國籍、個人表現、宗教或性別取向，在我們的專案以及社群的參與者都有不被騷擾的體驗。
+
+## 我們的準則
+
+舉例來說有助於創造正面環境的行為包括：
+* 使用歡迎和包容性語言
+* 尊重不同的觀點和經驗
+* 優雅地接受建設性批評
+* 關注在對於社群最好的事情上
+* 對其他社群成員的表現友善
+
+舉例來說身為參與者不能接受的行為包括：
+* 使用與性有關的言語或是圖像，以及不受歡迎的性騷擾
+* 酸民/反串/釣魚行為或進行侮辱/貶損的評論，人身攻擊及政治攻擊
+* 公開或私下的騷擾
+* 未經許可地發布他人的個人資料，例如住址或是電子地址
+* 其他可以被合理地認定為不恰當或者違反職業操守的行為
+
+## 我們的責任
+
+專案維護者有責任為"可接受的行為"準則做出詮釋，以及對已發生的不被接受的行為採取恰當且公平的糾正措施。
+
+專案維護者有權力及責任去刪除、編輯、拒絕與本行為準則有所違背的評論(comments)、提交(commits)、程式碼、wiki 編輯、問題(issues)和其他貢獻，以及專案維護者可暫時或永久性的禁止任何他們認為有不適當、威脅、冒犯、有害行為的貢獻者。
+
+## 使用範圍
+
+當一個人代表該專案或是其社群時，本行為準則適用於其專案平台和公共平台。
+
+代表專案或是社群的情況，舉例來說包括使用官方專案的電子郵件地址、通過官方的社群媒體帳號發布或線上或線下事件中擔任指定代表。
+
+該專案的呈現方式可由其專案維護者進行進一步的定義及解釋。
+
+## 強制執行
+
+可以透過paddle-dev@baidu.com，來聯繫專案團隊來報告濫用、騷擾或其他不被接受的行為。
+
+任何維護團隊認為有必要且適合的所有投訴都將進行審查及調查，並做出相對應的回應。專案小組有對事件回報者有保密的義務。具體執行的方針近一步細節可能會單獨公佈。
+
+沒有真誠的遵守或是執行本行為準則的專案維護人員，可能會因專案領導人或是其他成員的決定，暫時或是永久的取消其身份。
+
+## 來源
+
+本行為準則改編自[貢獻者公約][首頁]，版本 1.4
+可在此觀看https://www.contributor-covenant.org/zh-tw/version/1/4/code-of-conduct.html
+
+[首頁]: https://www.contributor-covenant.org
diff --git a/README.md b/README.md
index 577528e7aa..d06375a444 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,7 @@ Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddl
 
       - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
       (e.g. MKL, OpenBLAS, cuBLAS) or customized CPU/GPU kernels.
+      - Optimized CNN networks through MKL-DNN library.
       - Highly optimized recurrent networks which can handle **variable-length**
       sequence without padding.
       - Optimized local and distributed training for models with high dimensional
diff --git a/adversarial/README.md b/adversarial/README.md
new file mode 100644
index 0000000000..51da21918a
--- /dev/null
+++ b/adversarial/README.md
@@ -0,0 +1,9 @@
+# Advbox
+
+Advbox is a Python toolbox to create adversarial examples that fool neural networks. It requires Python and paddle.
+
+## How to use
+
+1. train a model and save it's parameters. (like fluid_mnist.py)
+2. load the parameters which is trained in step1, then reconstruct the model.(like mnist_tutorial_fgsm.py)
+3. use advbox to generate the adversarial sample.
diff --git a/v1_api_demo/gan/data/download_cifar.sh b/adversarial/advbox/__init__.py
old mode 100755
new mode 100644
similarity index 73%
rename from v1_api_demo/gan/data/download_cifar.sh
rename to adversarial/advbox/__init__.py
index bbadc7c10c..f56f14f18d
--- a/v1_api_demo/gan/data/download_cifar.sh
+++ b/adversarial/advbox/__init__.py
@@ -1,5 +1,4 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-set -e
-wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
-tar zxf cifar-10-python.tar.gz
-rm cifar-10-python.tar.gz
+"""
+   A set of tools for generating adversarial example on paddle platform 
+"""
diff --git a/adversarial/advbox/attacks/base.py b/adversarial/advbox/attacks/base.py
new file mode 100644
index 0000000000..000baa48f6
--- /dev/null
+++ b/adversarial/advbox/attacks/base.py
@@ -0,0 +1,52 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+"""
+The base model of the model.
+"""
+from abc import ABCMeta, abstractmethod
+
+
+class Attack(object):
+    """
+    Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
+    which search an adversarial example. subclass should implement the _apply() method.
+
+    Args:
+        model(Model): an instance of the class advbox.base.Model.
+
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self, model):
+        self.model = model
+
+    def __call__(self, image_label):
+        """
+        Generate the adversarial sample.
+
+        Args:
+        image_label(list): The image and label tuple list with one element.
+        """
+        adv_img = self._apply(image_label)
+        return adv_img
+
+    @abstractmethod
+    def _apply(self, image_label):
+        """
+        Search an adversarial example.
+
+        Args:
+        image_batch(list): The image and label tuple list with one element.
+        """
+        raise NotImplementedError
diff --git a/adversarial/advbox/attacks/gradientsign.py b/adversarial/advbox/attacks/gradientsign.py
new file mode 100644
index 0000000000..cc26ffb690
--- /dev/null
+++ b/adversarial/advbox/attacks/gradientsign.py
@@ -0,0 +1,87 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+"""
+This module provide the attack method for FGSM's implement.
+"""
+from __future__ import division
+import numpy as np
+from collections import Iterable
+from .base import Attack
+
+
+class GradientSignAttack(Attack):
+    """
+    This attack was originally implemented by Goodfellow et al. (2015) with the
+    infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
+    the Fast Gradient Method.
+    Paper link: https://arxiv.org/abs/1412.6572
+    """
+
+    def _apply(self, image_label, epsilons=1000):
+        assert len(image_label) == 1
+        pre_label = np.argmax(self.model.predict(image_label))
+
+        min_, max_ = self.model.bounds()
+        gradient = self.model.gradient(image_label)
+        gradient_sign = np.sign(gradient) * (max_ - min_)
+
+        if not isinstance(epsilons, Iterable):
+            epsilons = np.linspace(0, 1, num=epsilons + 1)
+
+        for epsilon in epsilons:
+            adv_img = image_label[0][0].reshape(
+                gradient_sign.shape) + epsilon * gradient_sign
+            adv_img = np.clip(adv_img, min_, max_)
+            adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
+            if pre_label != adv_label:
+                return adv_img
+
+
+FGSM = GradientSignAttack
+
+
+class IteratorGradientSignAttack(Attack):
+    """
+    This attack was originally implemented by Alexey Kurakin(Google Brain).
+    Paper link: https://arxiv.org/pdf/1607.02533.pdf
+    """
+
+    def _apply(self, image_label, epsilons=100, steps=10):
+        """
+        Apply the iterative gradient sign attack.
+        Args:
+            image_label(list): The image and label tuple list of one element.
+            epsilons(list|tuple|int): The epsilon (input variation parameter).
+            steps(int): The number of iterator steps.
+        Return:
+            numpy.ndarray: The adversarail sample generated by the algorithm.
+        """
+        assert len(image_label) == 1
+        pre_label = np.argmax(self.model.predict(image_label))
+        gradient = self.model.gradient(image_label)
+        min_, max_ = self.model.bounds()
+
+        if not isinstance(epsilons, Iterable):
+            epsilons = np.linspace(0, 1, num=epsilons + 1)
+
+        for epsilon in epsilons:
+            adv_img = image_label[0][0].reshape(gradient.shape)
+            for _ in range(steps):
+                gradient = self.model.gradient([(adv_img, image_label[0][1])])
+                gradient_sign = np.sign(gradient) * (max_ - min_)
+                adv_img = adv_img + epsilon * gradient_sign
+                adv_img = np.clip(adv_img, min_, max_)
+                adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
+                if pre_label != adv_label:
+                    return adv_img
diff --git a/v1_api_demo/model_zoo/resnet/example/__init__.py b/adversarial/advbox/models/__init__.py
similarity index 83%
rename from v1_api_demo/model_zoo/resnet/example/__init__.py
rename to adversarial/advbox/models/__init__.py
index f662d68263..eee0f6efd4 100644
--- a/v1_api_demo/model_zoo/resnet/example/__init__.py
+++ b/adversarial/advbox/models/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,3 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+Paddle model for target of attack 
+"""
diff --git a/adversarial/advbox/models/base.py b/adversarial/advbox/models/base.py
new file mode 100644
index 0000000000..084e563f7b
--- /dev/null
+++ b/adversarial/advbox/models/base.py
@@ -0,0 +1,103 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+"""
+The base model of the model.
+"""
+from abc import ABCMeta
+import abc
+
+abstractmethod = abc.abstractmethod
+
+
+class Model(object):
+    """
+    Base class of model to provide attack.
+
+
+    Args:
+        bounds(tuple): The lower and upper bound for the image pixel.
+        channel_axis(int): The index of the axis that represents the color channel.
+        preprocess(tuple): Two element tuple used to preprocess the input. First
+            substract the first element, then divide the second element.
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(self, bounds, channel_axis, preprocess=None):
+        assert len(bounds) == 2
+        assert channel_axis in [0, 1, 2, 3]
+
+        if preprocess is None:
+            preprocess = (0, 1)
+        self._bounds = bounds
+        self._channel_axis = channel_axis
+        self._preprocess = preprocess
+
+    def bounds(self):
+        """
+        Return the upper and lower bounds of the model.
+        """
+        return self._bounds
+
+    def channel_axis(self):
+        """
+        Return the channel axis of the model.
+        """
+        return self._channel_axis
+
+    def _process_input(self, input_):
+        res = input_
+        sub, div = self._preprocess
+        if sub != 0:
+            res = input_ - sub
+        assert div != 0
+        if div != 1:
+            res /= div
+        return res
+
+    @abstractmethod
+    def predict(self, image_batch):
+        """
+        Calculate the prediction of the image batch.
+
+        Args:
+            image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
+
+        Return:
+            numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def num_classes(self):
+        """
+        Determine the number of the classes
+
+        Return:
+            int: the number of the classes
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def gradient(self, image_batch):
+        """
+        Calculate the gradient of the cross-entropy loss w.r.t the image.
+
+        Args:
+            image_batch(list): The image and label tuple list.
+
+        Return:
+            numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
+                the shape (height, width, channel).
+        """
+        raise NotImplementedError
diff --git a/adversarial/advbox/models/paddle.py b/adversarial/advbox/models/paddle.py
new file mode 100644
index 0000000000..4048b47f89
--- /dev/null
+++ b/adversarial/advbox/models/paddle.py
@@ -0,0 +1,114 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from __future__ import absolute_import
+
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+from paddle.v2.fluid.framework import program_guard
+
+from .base import Model
+
+
+class PaddleModel(Model):
+    """
+    Create a PaddleModel instance.
+    When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
+
+    Args:
+        program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
+        input_name(string): The name of the input.
+        logits_name(string): The name of the logits.
+        predict_name(string): The name of the predict.
+        cost_name(string): The name of the loss in the program.
+    """
+
+    def __init__(self,
+                 program,
+                 input_name,
+                 logits_name,
+                 predict_name,
+                 cost_name,
+                 bounds,
+                 channel_axis=3,
+                 preprocess=None):
+        super(PaddleModel, self).__init__(
+            bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
+
+        if preprocess is None:
+            preprocess = (0, 1)
+
+        self._program = program
+        self._place = fluid.CPUPlace()
+        self._exe = fluid.Executor(self._place)
+
+        self._input_name = input_name
+        self._logits_name = logits_name
+        self._predict_name = predict_name
+        self._cost_name = cost_name
+
+        # gradient
+        loss = self._program.block(0).var(self._cost_name)
+        param_grads = fluid.backward.append_backward(
+            loss, parameter_list=[self._input_name])
+        self._gradient = dict(param_grads)[self._input_name]
+
+    def predict(self, image_batch):
+        """
+            Predict the label of the image_batch.
+
+            Args:
+                image_batch(list): The image and label tuple list.
+            Return:
+                numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
+        """
+        feeder = fluid.DataFeeder(
+            feed_list=[self._input_name, self._logits_name],
+            place=self._place,
+            program=self._program)
+        predict_var = self._program.block(0).var(self._predict_name)
+        predict = self._exe.run(self._program,
+                                feed=feeder.feed(image_batch),
+                                fetch_list=[predict_var])
+        return predict
+
+    def num_classes(self):
+        """
+            Calculate the number of classes of the output label. 
+
+        Return:
+            int: the number of classes
+        """
+        predict_var = self._program.block(0).var(self._predict_name)
+        assert len(predict_var.shape) == 2
+        return predict_var.shape[1]
+
+    def gradient(self, image_batch):
+        """
+        Calculate the gradient of the loss w.r.t the input.
+
+        Args:
+            image_batch(list): The image and label tuple list.
+        Return:
+            list: The list of the gradient of the image.
+        """
+        feeder = fluid.DataFeeder(
+            feed_list=[self._input_name, self._logits_name],
+            place=self._place,
+            program=self._program)
+
+        grad, = self._exe.run(self._program,
+                              feed=feeder.feed(image_batch),
+                              fetch_list=[self._gradient])
+        return grad
diff --git a/adversarial/fluid_mnist.py b/adversarial/fluid_mnist.py
new file mode 100644
index 0000000000..f8c7fe8d0e
--- /dev/null
+++ b/adversarial/fluid_mnist.py
@@ -0,0 +1,99 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+"""
+CNN on mnist data using fluid api of paddlepaddle
+"""
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+
+def mnist_cnn_model(img):
+    """
+    Mnist cnn model
+
+    Args:
+        img(Varaible): the input image to be recognized
+
+    Returns:
+        Variable: the label prediction
+    """
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
+        input=img,
+        num_filters=20,
+        filter_size=5,
+        pool_size=2,
+        pool_stride=2,
+        act='relu')
+
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
+        input=conv_pool_1,
+        num_filters=50,
+        filter_size=5,
+        pool_size=2,
+        pool_stride=2,
+        act='relu')
+
+    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+    return logits
+
+
+def main():
+    """
+    Train the cnn model on mnist datasets
+    """
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    logits = mnist_cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    optimizer = fluid.optimizer.Adam(learning_rate=0.01)
+    optimizer.minimize(avg_cost)
+
+    accuracy = fluid.evaluator.Accuracy(input=logits, label=label)
+
+    BATCH_SIZE = 50
+    PASS_NUM = 3
+    ACC_THRESHOLD = 0.98
+    LOSS_THRESHOLD = 10.0
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=500),
+        batch_size=BATCH_SIZE)
+
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
+    exe.run(fluid.default_startup_program())
+
+    for pass_id in range(PASS_NUM):
+        accuracy.reset(exe)
+        for data in train_reader():
+            loss, acc = exe.run(fluid.default_main_program(),
+                                feed=feeder.feed(data),
+                                fetch_list=[avg_cost] + accuracy.metrics)
+            pass_acc = accuracy.eval(exe)
+            print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc="
+                  + str(pass_acc))
+            if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD:
+                break
+
+        pass_acc = accuracy.eval(exe)
+        print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
+    fluid.io.save_params(
+        exe, dirname='./mnist', main_program=fluid.default_main_program())
+    print('train mnist done')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/adversarial/mnist_tutorial_fgsm.py b/adversarial/mnist_tutorial_fgsm.py
new file mode 100644
index 0000000000..c63e030cd8
--- /dev/null
+++ b/adversarial/mnist_tutorial_fgsm.py
@@ -0,0 +1,100 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+"""
+FGSM demos on mnist using advbox tool.
+"""
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+import matplotlib.pyplot as plt
+import numpy as np
+
+from advbox.models.paddle import PaddleModel
+from advbox.attacks.gradientsign import GradientSignAttack
+
+
+def cnn_model(img):
+    """
+    Mnist cnn model
+    Args:
+        img(Varaible): the input image to be recognized
+    Returns:
+        Variable: the label prediction
+    """
+    #conv1 = fluid.nets.conv2d()
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
+        input=img,
+        num_filters=20,
+        filter_size=5,
+        pool_size=2,
+        pool_stride=2,
+        act='relu')
+
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
+        input=conv_pool_1,
+        num_filters=50,
+        filter_size=5,
+        pool_size=2,
+        pool_stride=2,
+        act='relu')
+
+    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+    return logits
+
+
+def main():
+    """
+    Advbox demo which demonstrate how to use advbox.
+    """
+    IMG_NAME = 'img'
+    LABEL_NAME = 'label'
+
+    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
+    # gradient should flow
+    img.stop_gradient = False
+    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
+    logits = cnn_model(img)
+    cost = fluid.layers.cross_entropy(input=logits, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    BATCH_SIZE = 1
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.mnist.train(), buf_size=500),
+        batch_size=BATCH_SIZE)
+    feeder = fluid.DataFeeder(
+        feed_list=[IMG_NAME, LABEL_NAME],
+        place=place,
+        program=fluid.default_main_program())
+
+    fluid.io.load_params(
+        exe, "./mnist/", main_program=fluid.default_main_program())
+
+    # advbox demo
+    m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
+                    logits.name, avg_cost.name, (-1, 1))
+    att = GradientSignAttack(m)
+    for data in train_reader():
+        # fgsm attack
+        adv_img = att(data)
+        plt.imshow(n[0][0], cmap='Greys_r')
+        plt.show()
+        #np.save('adv_img', adv_img)
+        break
+
+
+if __name__ == '__main__':
+    main()
diff --git a/benchmark/cluster/README.md b/benchmark/cluster/README.md
new file mode 100644
index 0000000000..b619613ea7
--- /dev/null
+++ b/benchmark/cluster/README.md
@@ -0,0 +1,78 @@
+# Cluster Training Benchmark
+
+## Setup
+
+- Platform
+  - Kubernetes: v1.6.2
+  - Linux Kernel: v3.10.0
+
+- Resource
+  - CPU: 10 Cores per Pod
+  - Memory: 5GB per Pod
+
+- Docker Image
+
+  We use different base Docker Image to run the benchmark on Kubernetes:
+  - PaddlePaddle v2: paddlepaddle/paddle:0.11.0
+  - PaddlePaddle Fluid: paddlepaddle/paddle:[commit-id]
+  - TensorFlow: tensorflow/tensorflow:1.5.0-rc0
+
+- Model
+  vgg16 is used in this benchmark.
+
+## Cases
+
+- Variable
+  - Batch Size of training data.
+  - PServer count of the training job.
+  - The number of trainers.
+
+- Invariant
+  - The resource of trainer/pserver Pod.
+
+### Measure the Performance for Different Batch Size
+
+- PServer Count: 40
+- Trainer Count: 100
+- Metrics: mini-batch / sec
+
+| Batch Size | 32 | 64 | 128 | 256 |
+| -- | -- | -- | -- | -- |
+| PaddlePaddle Fluid | - | - | - | - |
+| PaddlePaddle v2 | - | - | - | - |
+| TensorFlow | - | - | - | - |
+
+### Measure the Performance for Different PServer Count
+
+- Trainer Count: 100
+- Batch Size: 64
+- Metrics: mini-batch / sec
+
+| PServer Count | 10 | 20 | 40 | 60 |
+| -- | -- | -- | -- | -- |
+| PaddlePaddle Fluid | - | - | - | - |
+| PaddlePaddle v2 | - | - | - | - |
+| TensorFlow | - | - | - | - |
+
+### Measure Parallel Efficiency By Increasing Trainer Count
+
+- PServer Count: 20
+- Batch Size: 64
+- Metrics:
+
+$S = \div(T1, TN)$
+
+which S is the ratio of T1 over TN, training time of 1 and N trainers.
+The parallel efficiency is:
+
+$E = \div(S, N)$
+
+| Trainer Counter | 1 | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 |
+| -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- |
+| PaddlePaddle Fluid | - | - | - | - | - | - | - | - | - | - | - |
+| PaddlePaddle v2 | - | - | - | - | - | - | - | - | - | - | - | - |
+| TensorFlow | - | - | - | - | - | - | - | - | - | - | - | - | - |
+
+## Reproduce the benchmark
+
+TODO
diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py
index cad6051f14..07f478d8fa 100644
--- a/benchmark/paddle/image/alexnet.py
+++ b/benchmark/paddle/image/alexnet.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 
 from paddle.trainer_config_helpers import *
diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py
index 2a850ccb7f..3241be9c5f 100644
--- a/benchmark/paddle/image/googlenet.py
+++ b/benchmark/paddle/image/googlenet.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 from paddle.trainer_config_helpers import *
 
diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py
index 1018ec9ce1..220c4bee35 100644
--- a/benchmark/paddle/image/provider.py
+++ b/benchmark/paddle/image/provider.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import io, os
 import random
 import numpy as np
diff --git a/benchmark/paddle/image/resnet.py b/benchmark/paddle/image/resnet.py
index 2846e4763f..acc6d31d4b 100644
--- a/benchmark/paddle/image/resnet.py
+++ b/benchmark/paddle/image/resnet.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 from paddle.trainer_config_helpers import *
 
diff --git a/benchmark/paddle/image/smallnet_mnist_cifar.py b/benchmark/paddle/image/smallnet_mnist_cifar.py
index 58879c454f..64a5da3220 100644
--- a/benchmark/paddle/image/smallnet_mnist_cifar.py
+++ b/benchmark/paddle/image/smallnet_mnist_cifar.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 
 from paddle.trainer_config_helpers import *
diff --git a/benchmark/paddle/image/vgg.py b/benchmark/paddle/image/vgg.py
index ca0a6798fb..a357207a62 100644
--- a/benchmark/paddle/image/vgg.py
+++ b/benchmark/paddle/image/vgg.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 from paddle.trainer_config_helpers import *
 
diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py
index 928ca75daf..c03df3a002 100644
--- a/benchmark/paddle/rnn/provider.py
+++ b/benchmark/paddle/rnn/provider.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import io, os
 import random
 import numpy as np
diff --git a/benchmark/paddle/rnn/rnn.py b/benchmark/paddle/rnn/rnn.py
index 83eb3e5654..97005f2c35 100755
--- a/benchmark/paddle/rnn/rnn.py
+++ b/benchmark/paddle/rnn/rnn.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 
 from paddle.trainer_config_helpers import *
diff --git a/benchmark/tensorflow/image/alexnet.py b/benchmark/tensorflow/image/alexnet.py
index f6a39ef778..edf462e6a1 100644
--- a/benchmark/tensorflow/image/alexnet.py
+++ b/benchmark/tensorflow/image/alexnet.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/alexnet_multi_gpu.py b/benchmark/tensorflow/image/alexnet_multi_gpu.py
index 7b5ee78f4d..90b8f16bca 100644
--- a/benchmark/tensorflow/image/alexnet_multi_gpu.py
+++ b/benchmark/tensorflow/image/alexnet_multi_gpu.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/googlenet.py b/benchmark/tensorflow/image/googlenet.py
index decf855b54..55431eceb3 100644
--- a/benchmark/tensorflow/image/googlenet.py
+++ b/benchmark/tensorflow/image/googlenet.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from six.moves import xrange
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/googlenet_multi_gpu.py b/benchmark/tensorflow/image/googlenet_multi_gpu.py
index 31466faa37..44de3800a8 100644
--- a/benchmark/tensorflow/image/googlenet_multi_gpu.py
+++ b/benchmark/tensorflow/image/googlenet_multi_gpu.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/image/smallnet_mnist_cifar.py b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
index 1a625134a6..0858b5f9c9 100644
--- a/benchmark/tensorflow/image/smallnet_mnist_cifar.py
+++ b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from six.moves import xrange  # pylint: disable=redefined-builtin
 from datetime import datetime
 import math
diff --git a/benchmark/tensorflow/rnn/reader.py b/benchmark/tensorflow/rnn/reader.py
index f538329a15..710940c9ae 100755
--- a/benchmark/tensorflow/rnn/reader.py
+++ b/benchmark/tensorflow/rnn/reader.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os.path
 import io
 import numpy as np
diff --git a/benchmark/tensorflow/rnn/rnn.py b/benchmark/tensorflow/rnn/rnn.py
index f288083e13..507481b9cc 100755
--- a/benchmark/tensorflow/rnn/rnn.py
+++ b/benchmark/tensorflow/rnn/rnn.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import math
diff --git a/benchmark/tensorflow/rnn/rnn_multi_gpu.py b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
index eabee4fa8f..f24cbaef62 100755
--- a/benchmark/tensorflow/rnn/rnn_multi_gpu.py
+++ b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 from six.moves import xrange  # pylint: disable=redefined-builtin
 import re
diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake
index abee6698e3..79b2449fe6 100644
--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -33,7 +33,7 @@ ExternalProject_Add(
     extern_grpc
     DEPENDS protobuf zlib
     GIT_REPOSITORY "https://github.com/grpc/grpc.git"
-    GIT_TAG "v1.7.x"
+    GIT_TAG "v1.8.x"
     PREFIX          ${GRPC_SOURCES_DIR}
     UPDATE_COMMAND  ""
     CONFIGURE_COMMAND ""
diff --git a/cmake/make_resource.py b/cmake/make_resource.py
index a9241b0e3e..d71e82eca2 100644
--- a/cmake/make_resource.py
+++ b/cmake/make_resource.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os
 import re
 import sys
diff --git a/doc/api/v1/data_provider/src/mnist_config.py b/doc/api/v1/data_provider/src/mnist_config.py
index 429338c57f..427e0465a6 100644
--- a/doc/api/v1/data_provider/src/mnist_config.py
+++ b/doc/api/v1/data_provider/src/mnist_config.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 define_py_data_sources2(
diff --git a/doc/api/v1/data_provider/src/mnist_provider.dict.py b/doc/api/v1/data_provider/src/mnist_provider.dict.py
index 2ba0b126a0..3fbb783e2f 100644
--- a/doc/api/v1/data_provider/src/mnist_provider.dict.py
+++ b/doc/api/v1/data_provider/src/mnist_provider.dict.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer.PyDataProvider2 import *
 
 
diff --git a/doc/api/v1/data_provider/src/sentimental_config.py b/doc/api/v1/data_provider/src/sentimental_config.py
index 7ce71608a2..edbf3cf140 100644
--- a/doc/api/v1/data_provider/src/sentimental_config.py
+++ b/doc/api/v1/data_provider/src/sentimental_config.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 dictionary = dict()
diff --git a/doc/api/v1/data_provider/src/sentimental_provider.py b/doc/api/v1/data_provider/src/sentimental_provider.py
index 14bd0e05a9..03ad1fe7d8 100644
--- a/doc/api/v1/data_provider/src/sentimental_provider.py
+++ b/doc/api/v1/data_provider/src/sentimental_provider.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer.PyDataProvider2 import *
 
 
diff --git a/doc/api/v2/fluid.rst b/doc/api/v2/fluid.rst
index 43fc19dc49..5f15cad2b5 100644
--- a/doc/api/v2/fluid.rst
+++ b/doc/api/v2/fluid.rst
@@ -15,4 +15,4 @@ Fluid
     fluid/param_attr.rst
     fluid/profiler.rst
     fluid/regularizer.rst
-
+    fluid/io.rst
diff --git a/doc/api/v2/fluid/io.rst b/doc/api/v2/fluid/io.rst
new file mode 100644
index 0000000000..67f68c4e9e
--- /dev/null
+++ b/doc/api/v2/fluid/io.rst
@@ -0,0 +1,10 @@
+===========
+IO
+===========
+
+
+
+is_parameter
+-----------
+..  autofunction:: paddle.v2.fluid.io.is_parameter
+    :noindex:
diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst
index a7c8670f66..6e1d6719ac 100644
--- a/doc/api/v2/fluid/layers.rst
+++ b/doc/api/v2/fluid/layers.rst
@@ -38,6 +38,16 @@ elementwise_add
 ..  autofunction:: paddle.v2.fluid.layers.elementwise_add
     :noindex:
 
+elementwise_sub
+---------------
+..  autofunction:: paddle.v2.fluid.layers.elementwise_sub
+    :noindex:
+
+elementwise_mul
+---------------
+..  autofunction:: paddle.v2.fluid.layers.elementwise_mul
+    :noindex:
+
 elementwise_div
 ---------------
 ..  autofunction:: paddle.v2.fluid.layers.elementwise_div
@@ -348,3 +358,144 @@ reduce_min
 ..  autofunction:: paddle.v2.fluid.layers.reduce_min
     :noindex:
 
+
+split
+-----
+..  autofunction:: paddle.v2.fluid.layers.split
+    :noindex:
+
+
+matmul
+------
+..  autofunction:: paddle.v2.fluid.layers.matmul
+    :noindex:
+
+logsigmoid
+----------
+..  autofunction:: paddle.v2.fluid.layers.logsigmoid
+    :noindex:
+
+exp
+---
+..  autofunction:: paddle.v2.fluid.layers.exp
+    :noindex:
+
+relu
+----
+..  autofunction:: paddle.v2.fluid.layers.relu
+    :noindex:
+
+tanh
+----
+..  autofunction:: paddle.v2.fluid.layers.tanh
+    :noindex:
+
+tanh_shrink
+-----------
+..  autofunction:: paddle.v2.fluid.layers.tanh_shrink
+    :noindex:
+
+softshrink
+----------
+..  autofunction:: paddle.v2.fluid.layers.softshrink
+    :noindex:
+
+sqrt
+----
+..  autofunction:: paddle.v2.fluid.layers.sqrt
+    :noindex:
+
+abs
+----
+..  autofunction:: paddle.v2.fluid.layers.abs
+    :noindex:
+
+ceil
+----
+..  autofunction:: paddle.v2.fluid.layers.ceil
+    :noindex:
+
+floor
+-----
+..  autofunction:: paddle.v2.fluid.layers.floor
+    :noindex:
+
+round
+-----
+..  autofunction:: paddle.v2.fluid.layers.round
+    :noindex:
+
+reciprocal
+----------
+..  autofunction:: paddle.v2.fluid.layers.reciprocal
+    :noindex:
+
+log
+---
+..  autofunction:: paddle.v2.fluid.layers.log
+    :noindex:
+
+square
+------
+..  autofunction:: paddle.v2.fluid.layers.square
+    :noindex:
+
+softplus
+--------
+..  autofunction:: paddle.v2.fluid.layers.softplus
+    :noindex:
+
+softsign
+---------
+..  autofunction:: paddle.v2.fluid.layers.softsign
+    :noindex:
+
+brelu
+-----
+..  autofunction:: paddle.v2.fluid.layers.brelu
+    :noindex:
+
+leaky_relu
+----------
+..  autofunction:: paddle.v2.fluid.layers.leaky_relu
+    :noindex:
+
+soft_relu
+---------
+..  autofunction:: paddle.v2.fluid.layers.soft_relu
+    :noindex:
+
+elu
+----
+..  autofunction:: paddle.v2.fluid.layers.elu
+    :noindex:
+
+relu6
+-----
+..  autofunction:: paddle.v2.fluid.layers.relu6
+    :noindex:
+
+pow
+----
+..  autofunction:: paddle.v2.fluid.layers.pow
+    :noindex:
+
+hard_shrink
+-----------
+..  autofunction:: paddle.v2.fluid.layers.hard_shrink
+    :noindex:
+
+thresholded_relu
+----------------
+..  autofunction:: paddle.v2.fluid.layers.thresholded_relu
+    :noindex:
+
+hard_sigmoid
+-------------
+..  autofunction:: paddle.v2.fluid.layers.hard_sigmoid
+    :noindex:
+
+swish
+------
+..  autofunction:: paddle.v2.fluid.layers.swish
+    :noindex:
diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst
index b792efb71f..f6b1cb4ba1 100644
--- a/doc/api/v2/fluid/nets.rst
+++ b/doc/api/v2/fluid/nets.rst
@@ -20,3 +20,14 @@ sequence_conv_pool
     :noindex:
 
 
+glu
+---
+..  autofunction:: paddle.v2.fluid.nets.glu
+    :noindex:
+
+
+dot_product_attention
+---------------------
+..  autofunction:: paddle.v2.fluid.nets.dot_product_attention
+    :noindex:
+
diff --git a/doc/design/block.md b/doc/design/block.md
index fab7f2dc48..907a2def55 100644
--- a/doc/design/block.md
+++ b/doc/design/block.md
@@ -202,8 +202,8 @@ This `OpDesc` value is in the `ops` field of the `BlockDesc` value representing
 
 During the generation of the Protobuf message, the Block should store VarDesc (the Protobuf message which describes Variable) and OpDesc (the Protobuf message which describes Operator).
 
-VarDesc in a block should have its name scope to avoid local variables affect parent block's name scope.
-Child block's name scopes should inherit the parent's so that OpDesc in child block can reference a VarDesc that stored in parent block. For example:
+VarDesc in a block should have its name scope to avoid local variables affecting parent block's name scope.
+Child block's name scopes should inherit the parent's so that OpDesc in child block can reference a VarDesc that is stored in the parent block. For example:
 
 ```python
 a = pd.Variable(shape=[20, 20])
diff --git a/doc/design/memory_optimization.md b/doc/design/memory_optimization.md
index 00f514711a..1f68cef4cc 100644
--- a/doc/design/memory_optimization.md
+++ b/doc/design/memory_optimization.md
@@ -5,28 +5,28 @@
 
 In a lecture from Andrew Ng, he attributes the recent sucess of AI due to a combination of these:
 
-- availability of Big Data
-- supercomputing power to process this Big Data over very large neural networks
-- modern algorithms
+- Availability of Big Data
+- Supercomputing power to process this Big Data over very large neural networks
+- Modern algorithms
 
 Following graph shows the details:
 
 ![](images/deep_learning.png)
 
-Larger model usually brings better performance. However, GPU memory is certain limited. For example, the memory size of a GTX TITAN X is only 12GB. To train complex and large model, we have to take care of memory using. Besides, memory optimization is also necessary in both online/mobile inference. 
+Larger model usually bring better performance. However, GPU memory is limited. For example, the memory size of a GTX TITAN X is only 12GB. To train complex and large models, we have to take care of memory usage. Besides, memory optimization is also necessary in both online/mobile inference. 
 
 ## Solution
 
 ### Basic Strategy
 
-There are some basic strategies to make memory optimization, including in-place operation and memory sharing.
+There are some basic strategies to improve memory usage, including in-place operations and memory sharing.
 
 #### In-place Operation
 In a relu activation operator： 
 
 $y = \max(x, 0)$
 
-If the variable x is not used in any other operator, we can make an in-place operation. In other words, the memory block of variable y and variable x are the same. In-place operation will save 50% memory occupancy immediately.
+If the variable x is not used in any other operator, we can make an in-place operation. In other words, the memory block of variable y and variable x will be the same. In-place operations will save 50% memory occupancy immediately.
 
 #### Memory Sharing
 
@@ -40,18 +40,18 @@ d = op2(a)
 e = op3(d, f)
 ```
 
-In this case, variable a is no longer used, and op2 does not support in-place operation. After op2 finished, we can put the memory of variable a to a memory pool. Then, variable e can share the memory of variable a from the pool.
+In this case, variable a is no longer used, and op2 does not support in-place operation. After op2 finishes, we can put the memory of variable a to a memory pool. Then, variable e can share the memory of variable a from the pool.
 
 
 ### Live Variable Analysis
 
-It's not enough to only have some basic strategies. The prerequisite of memory optimization is to know if a variable is still "live" after an operation.
+It's not enough to only have some basic strategies. The pre-requisite of memory optimization is to know if a variable is still "live" after an operation.
 
 In our design, the neural network topology is defined as a program. Luckily, [live variable analysis](https://en.wikipedia.org/wiki/Live_variable_analysis) is a classic problem in compilers which can be used in many stages, such as register allocation. 
 
-In compilers, the front end of the compilers translates programs into an intermediate language with an unbounded number of temporaries. This program must run on a machine with a bounded number of registers. Two temporaries a and b can fit into the same register, if a and b are never "in use" at the same time. Thus, many temporaries can fit in few registers; if they don't all fit, the excess temporaries can be kept in memory.
+In compilers, the front end of the compiler translates programs into an intermediate language with an unbounded number of temporary variables. This program must run on a machine with a bounded number of registers. Two temporary variables a and b can fit into the same register, if a and b are never "in use" at the same time. Thus, many temporary variables can fit in few registers; if they don't all fit, the excess tempory variables can be kept in memory.
 
-Therefore, the compiler needs to analyze the intermediate-representation program to determine which temporaries are in use at the same time. We say a variable is "live" if it holds a value that may be needed in the future, so this analysis is called liveness analysis. 
+Therefore, the compiler needs to analyze the intermediate-representation program to determine which temporary variables are in use at the same time. We say a variable is "live" if it holds a value that may be needed in the future, so this analysis is called liveness analysis. 
 
 We can leran these techniques from compilers. There are mainly two stages to make live variable analysis:
 
@@ -60,7 +60,7 @@ We can leran these techniques from compilers. There are mainly two stages to mak
 
 
 #### Control Flow Graph
-To preform analyses on a program, it is often useful to make a control flow graph. A [control flow graph](https://en.wikipedia.org/wiki/Control_flow_graph) (CFG) in computer science is a representation, using graph notation, of all paths that might be traversed through a program during its execution. Each statement in the program is a node in the flow graph; if statemment x can be followed by statement y, there is an egde from x to y.
+To perform analysis on a program, it is often useful to make a control flow graph. A [control flow graph](https://en.wikipedia.org/wiki/Control_flow_graph) (CFG) in computer science is a representation, using graph notation, of all paths that might be traversed through a program during its execution. Each statement in the program is a node in the flow graph; if statemment x can be followed by statement y, there is an egde from x to y.
 
 Following is the flow graph for a simple loop.
 
@@ -68,18 +68,18 @@ Following is the flow graph for a simple loop.
 
 #### Dataflow Analysis
 
-liveness of variable "flows" around the edges of the control flow graph; determining the live range of each variable is an example of a dataflow problem. [Dataflow analysis](https://en.wikipedia.org/wiki/Data-flow_analysis) is a technique for gathering information about the possible set of values calculated at various points in a computer program.
+Liveness of variable "flows" around the edges of the control flow graph; determining the live range of each variable is an example of a dataflow problem. [Dataflow analysis](https://en.wikipedia.org/wiki/Data-flow_analysis) is a technique for gathering information about the possible set of values calculated at various points in a computer program.
 
 A simple way to perform data-flow analysis of programs is to set up dataflow equations for each node of the control flow graph and solve them by repeatedly calculating the output from the input locally at each node until the whole system stabilizes.
 
 - Flow Graph Terminology
 
-A flow graph node has out-edges that lead to sucessor nodes, and in-edges that come from presucessor nodes. The set *pred[n]* is all the predecessors of node n, and *succ[n]* is the set of sucessors.
+A flow graph node has out-edges that lead to sucessor nodes, and in-edges that come from predecessor nodes. The set *pred[n]* is all the predecessors of node n, and *succ[n]* is the set of sucessors.
 In former control flow graph, the out-edges of node 5 are 5 --> 6 and 5 --> 2, and *succ[5]* = {2, 6}. The in-edges of 2 are 5 --> 2 and 1 --> 2, and *pred[2]* = {1, 5}.
 
 - Uses and Defs
 
-An assignmemt to a variable or temporary defines that variable. An occurence of a variable on the right-hand side of an assginment(or in other expressions) uses the variable. We can speak the *def* of a variable as the set of graph nodes that define it; or the *def* of a graph node as the set of variables that it defines; and the similarly for the *use* of a variable or graph node. In former control flow graph, *def(3)* = {c}, *use(3)* = {b, c}.
+An assignmemt to a variable or temporary defines that variable. An occurence of a variable on the right-hand side of an assginment(or in other expressions) uses the variable. We can define the *def* of a variable as the set of graph nodes that define it; or the *def* of a graph node as the set of variables that it defines; and the similarly for the *use* of a variable or graph node. In former control flow graph, *def(3)* = {c}, *use(3)* = {b, c}.
 
 - Liveness
 
@@ -168,9 +168,9 @@ class ControlFlowGraph(object):
         return self._program
 ```
 
-#### make dataflow analysis
+#### Make dataflow analysis
 
-We follow guide from compilers and try to solve the dataflow equation to get liveness of every variable. If the live-in of an operator node is different from the live-out, then we can make memory sharing. 
+We follow the guide from compilers and try to solve the dataflow equation to get liveness of every variable. If the live-in of an operator node is different from the live-out, then we can make memory sharing. 
 
 For example:
 
diff --git a/doc/design/operator_kernel_type.md b/doc/design/operator_kernel_type.md
index aa82e96bf7..f86e6b7a56 100644
--- a/doc/design/operator_kernel_type.md
+++ b/doc/design/operator_kernel_type.md
@@ -1,6 +1,6 @@
 # Design Doc: The Keys of Operator Kernel Type
 ## Problem
-An operator can have different kernel implementations, and each operator will have a map to store the related kernels. Fluid uses `OpKernelType` as a key to identify a unique Kernel. Before an operator runs, an certain kernel must be chosen by a key of `OpKernelType`. Currently, `OpKernelType` is defined as follows:
+An operator can have different kernel implementations, and each operator will have a map to store the related kernels. Fluid uses `OpKernelType` as a key to identify a unique kernel. Before an operator runs, a certain type of kernel must be chosen via a key of `OpKernelType`. Currently, `OpKernelType` is defined as follows:
 
 ```cpp
 struct OpKernelType {
@@ -10,13 +10,13 @@ struct OpKernelType {
 ```
 For more details, please refer to [codes](https://github.com/PaddlePaddle/Paddle/blob/2d5ec16bc8a09fb8e0f62c89b116b0cd1d333907/paddle/framework/operator.h#L348-L374) in github.
 
-It contains two keys, `Place` and `DataType`. And these two keys will be hashed to a unique key to represent a certain type of kernel. However, these two keys are not enough. We need a more complete representation of `OpKernelType`. 
+It contains two keys, `Place` and `DataType`. And these two keys will be hashed to a unique key to represent a certain type of kernel. However, these two keys do not provide enough information. We need a more complete representation of `OpKernelType`.
 
-We often implement a kernel of an operator with some computing library in certain device(place). Please remind that computing library and device are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices. 
+We often implement a kernel of an operator with some computing library on certain device(place). Please note that computing library and device do not have a one-to-one correspondence. A device can have a lot of computing libraries and a computing library can also support different devices.
 
-For example, Eigen library can support Nvidia GPU/AMD GPU/CPU. And MKLDNN library can support Intel CPU/Intel FPGA. Both `Place` and `Library` should be a key of `OpKernelType`.
+For example, Eigen library supports Nvidia GPU/AMD GPU/CPU and MKLDNN library supports Intel CPU/Intel FPGA. Both `Place` and `Library` should be a key of `OpKernelType`.
 
-It's obvious that different DataTypes, like fp64/fp32/int8 will have different kernels. But the data layout of a Tensor will also lead to different implementation. Please refer to the batch norm operator [kernels](https://github.com/PaddlePaddle/Paddle/blob/a948fac4d0ad7e0412d373b8aabeb711c2899563/paddle/operators/batch_norm_op.cc#L180-L209). Data Layout should also be taken into consideration.
+Different DataTypes, such as fp64/fp32/int8, will obviously have different kernels. But different data layout of a Tensor will also lead to different implementations. Please refer to the batch norm operator [kernels](https://github.com/PaddlePaddle/Paddle/blob/a948fac4d0ad7e0412d373b8aabeb711c2899563/paddle/operators/batch_norm_op.cc#L180-L209) as an example. Data layout should also be taken into consideration.
 
 ## Solution
 
@@ -31,17 +31,17 @@ struct OpKernelType {
 };
 ```
 
-Following is the details:
+The details are as follows:
 
 ### Place
 
-`Place` is defined as follows:
+`Place` is defined as:
 
 ```cpp
 typedef boost::variant<CUDAPlace, ROCmPlace, FPGAPlace, CPUPlace> Place;
 ```
 
-`Place` is to represent the device memory where data is locating.
+`Place` represents the device memory where data is located.
 
 
 ### Library
@@ -52,10 +52,10 @@ One operator kernel is usually implemented based on one library. `Library` is de
 enum Library { Plain, MKLDNN, CUDNN };
 ```
 
-We use `Plain` enumerator to represent default library. Since most operators in Fluid are implemented based on `Eigen` library, we take `Eigen` library as the `Plain` enumerator.
-A library usually has a corresponding `DeviceContext` which contains some handles needed by computation. Fluid now have two default DeviceContexts in CPU and CUDA, `CPUDeviceContext` and `CUDADeviceContext`. `CPUDeviceContext` contains a Eigen library handle and `CDUADeviceContext` contains a Eigen library handle and cuBLAS handle.
+We use `Plain` enumerator to represent default library. Since most operators in Fluid are implemented based on the `Eigen` library, we take `Eigen` library as the `Plain` enumerator.
+A library usually has a corresponding `DeviceContext` which contains some handles needed for computation. Fluid now has two default DeviceContexts for CPU and CUDA, namely, `CPUDeviceContext` and `CUDADeviceContext`. `CPUDeviceContext` contains an Eigen library handle and `CDUADeviceContext` contains an Eigen library handle and a cuBLAS handle.
 
-If we want to support new Library, a new enumerator need to be added to `Library` and a new corresponding `LibraryDeviceContext` will be created.
+If we want to support new library, a new enumerator need to be added to `Library` and a corresponding new `LibraryDeviceContext` need to be created.
 
 
 ### DataType
@@ -67,15 +67,15 @@ If we want to support new Library, a new enumerator need to be added to `Library
 
 Actually, a Tensor is a view of a block of memory. Besides a pointer to the memory, we also have to get some other descriptions of this block of memory, such as shape(ddim), stride, and layout.
 
-Different layout leads to different implementation of operator kernel. There are mainly 4 principles we have to follow to support layout in our fluid framework.
+Different layout leads to different implementation of the operator kernel. There are mainly 4 principles we have to follow to support layout in our Fluid framework.
 
-- We take layout as a data member of Tensor. Layout is actually a enum variable. If fluid is built with MKLDNN, then, the memory format in MKLDNN will be added into this enum variable too.
+- We take layout as a data member of Tensor. Layout is actually a enum variable. If Fluid is built with MKLDNN, then the memory format in MKLDNN will also be added into this enum variable.
 
-- Users have to set layout for input data. And some operators like fill_constant/random, also have to set layout of generating data. Of course, we can have some default layout, like NCHW.
+- Users have to set layout for input data. And some operators like fill_constant/random, also have to set layout for generating data. Of course, we can have some default layout, like NCHW.
 
-- The inference of Layout is at run-time, not compile-time.
+- The inference of Layout is at run-time, not at compile-time.
 
-- Every operator have to implement different kernels for different layouts. Let's take MKLDNN as an example, if we want to implement a MKLDNN convolution operator, we have to realize all the kernels for different layout, list at [here](http://01org.github.io/mkl-dnn/structmkldnn_1_1memory.html). And we will have a special macro to do registering kernels for MKLDNN operators.
+- Every operator has to implement different kernels for different layouts. Let's take MKLDNN as an example. If we want to implement an MKLDNN convolution operator, we have to implement all the kernels for different layouts, which are listed [here](http://01org.github.io/mkl-dnn/structmkldnn_1_1memory.html). And we will have a special macro to  register kernels for MKLDNN operators.
 
 `Layout` is also defined as a enum variable:
 
diff --git a/doc/design/python_api.md b/doc/design/python_api.md
index cb5fdc765b..73f6d7b90c 100644
--- a/doc/design/python_api.md
+++ b/doc/design/python_api.md
@@ -279,6 +279,26 @@ class LayerHelper(object):
     return tmp
 ```
 
+### Return value of layer functions
+
+The layer will return a Variable, which is also the output of an operator.  However, outputs of a layer function have more attributes than an operator. There are parameter variables, and their gradient variables need to return. To return them is useful. For example,
+
+1. Users can debug the network by printing parameter gradients.
+2. Users can append attributes to a parameter, such as, `param.stop_gradient=True` will make a parameter stop generate the gradient. We can fix the parameter value during training by using this attribute.
+
+However, it is good to return a Variable for layers, since all layers and operators use Variables as their parameters. We can just append a `param` field and a `grad` field for layer function since the Python is dynamic typing.
+
+The sample usage is
+
+```python
+data = fluid.layers.data(...)
+hidden = fluid.layers.fc(data, ...)
+...
+
+executor.run(fetch_list=[hidden.param, hidden.param.grad], ...)
+```
+
+
 ## Optimizer
 
 [Optimizer Design Doc](./optimizer.md)
diff --git a/doc/design/var_desc.md b/doc/design/var_desc.md
index 0b2958c1b1..89fa95326c 100644
--- a/doc/design/var_desc.md
+++ b/doc/design/var_desc.md
@@ -1,12 +1,12 @@
 ## Background
-PaddlePaddle divides the description of neural network computation graph into two stages: compile time and runtime.
+PaddlePaddle divides the description of neural network computation into two stages: compile time and runtime. At compile time, the neural network computation is described as a `ProgramDesc` whereas at runtime an `Executor` interprets the `ProgramDesc` to compute the operations.
 
-PaddlePaddle use proto message to describe compile time graph because
+PaddlePaddle use proto message to describe compile time program because
 
-1. Computation graph should be able to be saved to a file.
-1. In distributed training, the graph will be serialized and send to multiple workers.
+1. The computation program description must be serializable and saved in a file.
+1. During distributed training, the sreialized program will be sent to multiple workers. It should also be possible to break the program into different components, each of which can be executed on different workers.
 
-The computation graph is constructed by Data Node and Operation Node. The concept to represent them is in the table below.
+The computation `Program` consists of nested `Blocks`. Each `Block` will consist of data(i.e. `Variable`)  and  `Operations`. The concept to represent them is in the table below.
 
 | |compile time|runtime|
 |---|---|---|
diff --git a/doc/faq/local/src/reduce_min_pool_size.py b/doc/faq/local/src/reduce_min_pool_size.py
index 5715397cc1..96073633d2 100644
--- a/doc/faq/local/src/reduce_min_pool_size.py
+++ b/doc/faq/local/src/reduce_min_pool_size.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 @provider(min_pool_size=0, ...)
 def process(settings, filename):
     os.system('shuf %s > %s.shuf' % (filename, filename))  # shuffle before.
diff --git a/doc/faq/local/src/word2vec_config.py b/doc/faq/local/src/word2vec_config.py
index 866b40c3d4..03619b2628 100644
--- a/doc/faq/local/src/word2vec_config.py
+++ b/doc/faq/local/src/word2vec_config.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 ...  # the settings and define data provider is omitted.
 DICT_DIM = 3000  # dictionary dimension.
 word_ids = data_layer('word_ids', size=DICT_DIM)
diff --git a/doc/faq/local/src/word2vec_dataprovider.py b/doc/faq/local/src/word2vec_dataprovider.py
index ec2753a7d0..a439a8f52e 100644
--- a/doc/faq/local/src/word2vec_dataprovider.py
+++ b/doc/faq/local/src/word2vec_dataprovider.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 DICT_DIM = 3000
 
 
diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst
index 41ac07ca56..71904dc41e 100644
--- a/doc/getstarted/build_and_install/build_from_source_cn.rst
+++ b/doc/getstarted/build_and_install/build_from_source_cn.rst
@@ -32,6 +32,16 @@ PaddlePaddle主要使用 `CMake <https://cmake.org>`_ 以及GCC, G++作为编译
 
    pip install build/python/dist/*.whl
 
+如果机器中已经安装过PaddlePaddle，有两种方法：
+
+.. code-block:: bash
+
+   1. 先卸载之前的版本，再重新安装
+   pip uninstall paddlepaddle
+   pip install build/python/dist/*.whl
+
+   2. 直接升级到更新的版本
+   pip install build/python/dist/*.whl -U
 
 .. _run_test:
 
diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst
index 92211aee8c..27f73b2e2c 100644
--- a/doc/getstarted/build_and_install/build_from_source_en.rst
+++ b/doc/getstarted/build_and_install/build_from_source_en.rst
@@ -36,6 +36,16 @@ machine or copy it to the target machine.
 
    pip install build/python/dist/*.whl
 
+If the machine has installed PaddlePaddle before, there are two methods:
+
+.. code-block:: bash
+
+   1. uninstall and reinstall
+   pip uninstall paddlepaddle
+   pip install build/python/dist/*.whl
+
+   2. upgrade directly
+   pip install build/python/dist/*.whl -U
 
 .. _run_test:
 
diff --git a/doc/getstarted/concepts/src/infer.py b/doc/getstarted/concepts/src/infer.py
index 4cc58dfee0..ee71cd7a9a 100644
--- a/doc/getstarted/concepts/src/infer.py
+++ b/doc/getstarted/concepts/src/infer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2 as paddle
 import numpy as np
 
diff --git a/doc/getstarted/concepts/src/train.py b/doc/getstarted/concepts/src/train.py
index 4bccbfca3c..d9c0c66b8a 100644
--- a/doc/getstarted/concepts/src/train.py
+++ b/doc/getstarted/concepts/src/train.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2 as paddle
 import numpy as np
 
diff --git a/doc/howto/dev/new_op_cn.md b/doc/howto/dev/new_op_cn.md
index 3109d72001..9299658567 100644
--- a/doc/howto/dev/new_op_cn.md
+++ b/doc/howto/dev/new_op_cn.md
@@ -24,7 +24,7 @@
 - `framework::OperatorWithKernel`：继承自OperatorBase，Op有计算函数，称作有Kernel。
 - `class OpProtoAndCheckerMaker`：描述该Op的输入、输出、属性、注释,主要用于Python API接口生成
 
-依据是否包含kernel，可以将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorBase`，后者继承自`OperatorWithKernel`。本教程主要介绍带Kernel的Op如何写，简单总结Op需要包含的内容如下：
+依据是否包含kernel，可以将Op分为两种：包含Kernel的Op和不包含kernel的Op，前者Op的定义继承自`OperatorWithKernel`，后者继承自`OperatorBase`。本教程主要介绍带Kernel的Op如何写，简单总结Op需要包含的内容如下：
 
 
  内容            | 定义位置
diff --git a/doc/howto/index_cn.rst b/doc/howto/index_cn.rst
index ccd9097702..e0c69f7a6a 100644
--- a/doc/howto/index_cn.rst
+++ b/doc/howto/index_cn.rst
@@ -9,6 +9,7 @@
 
   usage/cmd_parameter/index_cn.rst
   usage/cluster/cluster_train_cn.md
+  usage/capi/index_cn.rst
 
 开发标准
 --------
diff --git a/doc/howto/read_source.md b/doc/howto/read_source.md
index e4211abb3b..31987920f3 100644
--- a/doc/howto/read_source.md
+++ b/doc/howto/read_source.md
@@ -26,16 +26,16 @@ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
 sgd_optimizer.minimize(avg_cost)
 ```
 
-- Variables: `x`,  `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/framework.py#L93)
-- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/layers.py)
+- Variables: `x`,  `y`, `y_predict`, `cost` and `avg_cost`. [Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/framework.py#)
+- Layers: `fluid.layers.data`, `fluid.layers.fc` and `fluid.layers.mean` are layers. [Python](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/v2/fluid/layers)
   - Every Layer has one or more operators and variables/parameters
     - All the operators are defined at [`paddle/operators/`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators). Other worth-looking files:
       - Base class: [`paddle/framework/operator.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h)
       - Operator Registration: [`paddle/framework/op_registry.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_registry.h) 
       - Operator Lookup: [`paddle/framework/op_info.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/op_info.h)
 - Optimizer: `fluid.optimizer.SGD`. It does the following
-  - Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/backward.py), [C++](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/backward.cc)]
-  - Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/optimizer.py), [C++](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/optimizer)]
+  - Add backward operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/backward.py)]
+  - Add optimizer operators. [[Python](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/optimizer.py)]
 
 # Run Time
 
diff --git a/doc/howto/usage/capi/compile_paddle_lib_cn.md b/doc/howto/usage/capi/compile_paddle_lib_cn.md
new file mode 100644
index 0000000000..ac5ecffe2e
--- /dev/null
+++ b/doc/howto/usage/capi/compile_paddle_lib_cn.md
@@ -0,0 +1,122 @@
+## 编译 PaddlePaddle 预测库
+
+### 概述
+
+使用 C-API 进行预测依赖于将 PaddlePaddle 核心代码编译成链接库，只需在编译时需配制下面这些编译选项：
+
+必须配置选项：
+- `WITH_C_API`，必须配置为`ON`。
+
+推荐配置选项：
+- `WITH_PYTHON`，推荐配置为`OFF`
+- `WITH_SWIG_PY`，推荐配置为`OFF`
+- `WITH_GOLANG`，推荐设置为`OFF`
+
+可选配置选项：
+- `WITH_GPU`，可配置为`ON/OFF`
+- `WITH_MKL`，可配置为`ON/OFF`
+
+对推荐配置中的选项建议按照设置，以避免链接不必要的库。其它可选编译选项按需进行设定。
+
+下面的代码片段从github拉取最新代码，配制编译选项（需要将PADDLE_ROOT替换为PaddlePaddle预测库的安装路径）：
+
+```shell
+PADDLE_ROOT=/path/of/capi
+git clone https://github.com/PaddlePaddle/Paddle.git
+cd Paddle
+mkdir build
+cd build
+cmake -DCMAKE_INSTALL_PREFIX=$PADDLE_ROOT \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DWITH_C_API=ON \
+      -DWITH_SWIG_PY=OFF \
+      -DWITH_GOLANG=OFF \
+      -DWITH_PYTHON=OFF \
+      -DWITH_MKL=OFF \
+      -DWITH_GPU=OFF  \
+      ..
+```
+
+执行上述代码生成Makefile文件后，执行：`make && make install`。成功编译后，使用C-API所需的依赖（包括：（1）编译出的PaddlePaddle预测库和头文件；（2）第三方链接库和头文件）均会存放于`PADDLE_ROOT`目录中。
+
+编译成功后在 `PADDLE_ROOT` 下会看到如下目录结构（包括了编译出的PaddlePaddle头文件和链接库，以及第三方依赖链接库和头文件（如果需要，由链接方式决定））：
+
+```text
+├── include
+│   └── paddle
+│       ├── arguments.h
+│       ├── capi.h
+│       ├── capi_private.h
+│       ├── config.h
+│       ├── error.h
+│       ├── gradient_machine.h
+│       ├── main.h
+│       ├── matrix.h
+│       ├── paddle_capi.map
+│       └── vector.h
+├── lib
+│   ├── libpaddle_capi_engine.a
+│   ├── libpaddle_capi_layers.a
+│   ├── libpaddle_capi_shared.so
+│   └── libpaddle_capi_whole.a
+└── third_party
+    ├── gflags
+    │   ├── include
+    │   │   └── gflags
+    │   │       ├── gflags_completions.h
+    │   │       ├── gflags_declare.h
+    │   │       ...
+    │   └── lib
+    │       └── libgflags.a
+    ├── glog
+    │   ├── include
+    │   │   └── glog
+    │   │       ├── config.h
+    │   │       ...
+    │   └── lib
+    │       └── libglog.a
+    ├── openblas
+    │   ├── include
+    │   │   ├── cblas.h
+    │   │   ...
+    │   └── lib
+    │       ...
+    ├── protobuf
+    │   ├── include
+    │   │   └── google
+    │   │       └── protobuf
+    │   │           ...
+    │   └── lib
+    │       └── libprotobuf-lite.a
+    └── zlib
+        ├── include
+        │   ...
+        └── lib
+            ...
+
+```
+
+### 链接说明
+
+目前提供三种链接方式：
+
+1. 链接`libpaddle_capi_shared.so` 动态库
+    - 使用 PaddlePaddle C-API 开发预测程序链接`libpaddle_capi_shared.so`时，需注意：
+        1. 如果编译时指定编译CPU版本，且使用`OpenBLAS`数学库，在使用C-API开发预测程序时，只需要链接`libpaddle_capi_shared.so`这一个库。
+        1. 如果是用编译时指定CPU版本，且使用`MKL`数学库，由于`MKL`库有自己独立的动态库文件，在使用PaddlePaddle C-API开发预测程序时，需要自己链接MKL链接库。
+        1. 如果编译时指定编译GPU版本，CUDA相关库会在预测程序运行时动态装载，需要将CUDA相关的库设置到`LD_LIBRARY_PATH`环境变量中。
+    - 这种方式最为简便，链接相对容易，**在无特殊需求情况下，推荐使用此方式**。
+
+2. 链接静态库 `libpaddle_capi_whole.a`
+    - 使用PaddlePaddle C-API 开发预测程序链接`libpaddle_capi_whole.a`时，需注意：
+        1. 需要指定`-Wl,--whole-archive`链接选项。
+        1. 需要显式地链接 `gflags`、`glog`、`libz`、`protobuf` 等第三方库，可在`PADDLE_ROOT/third_party`下找到。
+        1. 如果在编译 C-API 时使用OpenBLAS数学库，需要显示地链接`libopenblas.a`。
+        1. 如果在编译 C-API 是使用MKL数学库，需要显示地链接MKL的动态库。
+
+3. 链接静态库 `libpaddle_capi_layers.a`和`libpaddle_capi_engine.a`
+    - 使用PaddlePaddle C-API 开发预测程序链接`libpaddle_capi_whole.a`时，需注意：
+        1. 这种链接方式主要用于移动端预测。
+        1. 为了减少生成链接库的大小把`libpaddle_capi_whole.a`拆成以上两个静态链接库。
+        1. 需指定`-Wl,--whole-archive -lpaddle_capi_layers` 和 `-Wl,--no-whole-archive -lpaddle_capi_engine` 进行链接。
+        1. 第三方依赖库需要按照与方式2同样方法显示地进行链接。
diff --git a/doc/howto/usage/capi/images/csr.png b/doc/howto/usage/capi/images/csr.png
new file mode 100644
index 0000000000..3dc10b8de4
Binary files /dev/null and b/doc/howto/usage/capi/images/csr.png differ
diff --git a/doc/howto/usage/capi/images/sequence_data.png b/doc/howto/usage/capi/images/sequence_data.png
new file mode 100644
index 0000000000..6e47a46b89
Binary files /dev/null and b/doc/howto/usage/capi/images/sequence_data.png differ
diff --git a/doc/howto/usage/capi/images/workflow_of_CAPI.png b/doc/howto/usage/capi/images/workflow_of_CAPI.png
new file mode 100644
index 0000000000..a4399ade04
Binary files /dev/null and b/doc/howto/usage/capi/images/workflow_of_CAPI.png differ
diff --git a/doc/howto/usage/capi/index_cn.rst b/doc/howto/usage/capi/index_cn.rst
new file mode 100644
index 0000000000..fd774fbc74
--- /dev/null
+++ b/doc/howto/usage/capi/index_cn.rst
@@ -0,0 +1,9 @@
+PaddlePaddle C-API
+==================
+
+..  toctree::
+  :maxdepth: 1
+
+  compile_paddle_lib_cn.md
+  organization_of_the_inputs_cn.md
+  workflow_of_capi_cn.md
diff --git a/doc/howto/usage/capi/organization_of_the_inputs_cn.md b/doc/howto/usage/capi/organization_of_the_inputs_cn.md
new file mode 100644
index 0000000000..a889ae4ffa
--- /dev/null
+++ b/doc/howto/usage/capi/organization_of_the_inputs_cn.md
@@ -0,0 +1,285 @@
+## 输入/输出数据组织
+
+这篇文档介绍在使用 PaddlePaddle C-API 时如何组织输入数据，以及如何解析神经网络前向计算的输出结果。
+
+### 输入/输出数据类型
+在C-API中，按照基本数据类型在PaddlePaddle内部的定义和实现，输入数据可分为：
+1. 一维整型数组
+1. 二维浮点型矩阵
+    - 稠密矩阵
+    - 稀疏矩阵
+
+说明：
+1. 一维数组**仅支持整型值**；
+    - 常用于自然语言处理任务，例如：表示词语在词典中的序号；
+    - 分类任务中类别标签；
+1. 逻辑上高于二维的数据（例如含有多个通道的图片，视频等）在程序实现中都会转化为二维矩阵，转化方法在相应的领域都有通用解决方案，需要使用者自己了解并完成转化；
+1. 二维矩阵可以表示行向量和列向量，任何时候如果需要浮点型数组（向量），都应使用C-API中的矩阵来表示，而不是C-API中的一维数组。
+1. 不论是一维整型数组还是二维浮点数矩阵，**为它们附加上序列信息将变成序列输入。PaddlePaddle 会通过判数据是否附带有序列信息来判断一个向量/矩阵是否是一个序列**。当非序列输入时，无需关心和处理序列信息。关于什么是“序列信息”，下文会详细进行介绍。
+
+### 基本使用概念
+
+- 在PaddlePaddle内部，神经网络中一个计算层的输入/输出被组织为一个 `Argument` 结构体，如果神经网络有多个输入或者多个输出，每一个输入/输出都会对应有自己的`Argument`。
+- `Argument` 并不真正“存储”数据，而是将输入/输出信息有机地组织在一起。
+- 在`Argument`内部由`IVector`（对应着上文提到的一维整型数组）和`Matrix`（对应着上文提到的二维浮点型矩阵）来实际存储数据；由 `Sequence Start Positions` (下文详细解释) 来描述输入/输出的序列信息。
+
+- **注**：
+    1. 这篇文档之后部分将会统一使用`argument`来特指PaddlePaddle中神经网络计算层一个输入/输出数据。
+    1. 使用`paddle_ivector`来特指PaddlePaddle中的一维整型数组。
+    1. 使用`paddle_matrix`来特指PaddlePaddle中的二维浮点型矩阵。
+
+### 组织输入数据
+- 一维整型数组
+
+    概念上可以将`paddle_ivector`理解为一个一维的整型数组，通常用于表示离散的类别标签，或是在自然语言处理任务中表示词语在字典中的序号。下面的代码片段创建了含有三个元素`1`、`2`、`3`的`paddle_ivector`。
+    ```c
+    int ids[] = {1, 2, 3};
+     paddle_ivector ids_array =
+         paddle_ivector_create(ids, sizeof(ids) / sizeof(int), false, false);
+     CHECK(paddle_arguments_set_ids(in_args, 0, ids_array));
+    ```
+
+- **稠密矩阵**
+    - 一个`m×n`的稠密矩阵是一个由`m`行`n`列元素排列成的矩形阵列，矩阵里的元素是浮点数。对神经网络来说，矩阵的高度`m`是一次预测接受的样本数目，宽度$n$是神经网络定义时，`paddle.layer.data`的`size`。
+    - 下面的代码片段创建了一个高度为1，宽度为`layer_size`的稠密矩阵，矩阵中每个元素的值随机生成。
+
+    ```c
+    paddle_matrix mat = paddle_matrix_create(
+                            /* height = batch size */ 1,
+                            /* width = dimensionality of the data layer */ layer_size,
+                            /* whether to use GPU */ false);
+
+    paddle_real* array;
+    // Get the pointer pointing to the start address of the first row of the
+    // created matrix.
+    CHECK(paddle_matrix_get_row(mat, 0, &array));
+
+    // Fill the matrix with a randomly generated test sample.
+    srand(time(0));
+    for (int i = 0; i < layer_size; ++i) {
+      array[i] = rand() / ((float)RAND_MAX);
+    }
+
+    // Assign the matrix to the argument.
+    CHECK(paddle_arguments_set_value(in_args, 0, mat));
+    ```
+
+- **稀疏矩阵**
+
+  PaddlePaddle C-API 中 稀疏矩阵使用[CSR（Compressed Sparse Row Format）](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format))格式存储。下图是CSR存储稀疏矩阵的示意图。
+  <p align="center">
+  <img src="https://user-images.githubusercontent.com/5842774/34159369-009fd328-e504-11e7-9e08-36bc6dc5e505.png" width=700><br> 图1. 稀疏矩阵存储示意图
+  </p>
+
+  CSR存储格式通过：（1）非零元素的值（上图中的`values`）；（2）行偏移(上图中的`row offsets`)：每一行元素在`values`中的起始偏移，`row offsets`中元素个数总是等于行数 + 1；（3）非零元素的列号（上图中的`column indices`）来确定稀疏矩阵的内容。
+
+  在PaddlePaddle C-API中，通过调用以下接口创建稀疏矩阵：
+
+  ```c
+  PD_API paddle_matrix paddle_matrix_create_sparse(
+      uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
+  ```
+
+  1. 创建稀疏矩阵时需要显示地指定矩阵的（1）高度（`height`，在神经网络中等于一次预测处理的样本数）（2）宽度（`width`，`paddle.layer.data`的`size`）以及（3）非零元个数（`nnz`）。
+  1. 当上述接口第4个参数`isBinary`指定为`true`时，**只需要设置行偏移（`row_offset`）和列号(`colum indices`)，不需要提供元素值（`values`）**，这时行偏移和列号指定的元素默认其值为1。
+
+  下面的代码片段创建了一个CPU上的二值稀疏矩阵：
+
+  ```c
+  paddle_matrix mat = paddle_matrix_create_sparse(1, layer_size, nnz, true, false);
+  int colIndices[] = {9, 93, 109};  // layer_size here is greater than 109.
+  int rowOffset[] = {0, sizeof(colIndices) / sizeof(int)};
+
+  CHECK(paddle_matrix_sparse_copy_from(mat,
+                                 rowOffset,
+                                 sizeof(rowOffset) / sizeof(int),
+                                 colIndices,
+                                 (colIndices) / sizeof(int),
+                                 NULL /*values array is NULL.*/,
+                                 0 /*size of the value arrary is 0.*/));
+  CHECK(paddle_arguments_set_value(in_args, 0, mat));
+  ```
+  下面的代码片段在创建了一个CPU上的带元素值的稀疏矩阵：
+  ```c
+  paddle_matrix mat = paddle_matrix_create_sparse(1, layer_size, nnz, false, false);
+  int colIndices[] = {9, 93, 109};  // layer_size here is greater than 109.
+  int rowOffset[] = {0, sizeof(colIndices) / sizeof(int)};
+  float values[] = {0.5, 0.5, 0.5};
+
+  CHECK(paddle_matrix_sparse_copy_from(mat,
+                                 rowOffset,
+                                 sizeof(rowOffset) / sizeof(int),
+                                 colIndices,
+                                 sizeof(colIndices) / sizeof(int),
+                                 values,
+                                 sizeof(values) / sizeof(float)));
+  ```
+  注意事项：
+  1. 移动端预测**不支持**稀疏矩阵及相关的接口。
+
+### 组织序列信息
+
+多个排成一列的元素（可以是整型、浮点数、浮点数向量等）构成一个序列，元素之间的顺序是序列所携带的重要信息。不同序列可能会含有不同数目个元素。在 PaddlePaddle 中，序列输入/输出数据是在上文介绍的**数据输入（一维整型数组，二维浮点数矩阵）基础上，附加上序列信息**。下面详细解释什么是“序列信息”。
+
+我们将神经网络一次计算接受的所有输入样本称之为一个`batch`（可以含有一条或多条样本），每一个序列在整个`batch`中的偏移，就是PaddlePaddle中所指的**序列信息**，称之为“sequence start positions”。PaddlePaddle 支持两种序列类型：
+
+1. 单层序列
+    - 序列中的每一个元素是非序列，是进行计算的基本单位，不可再进行拆分。
+    - 例如：自然语言中的句子是一个序列，序列中的元素是词语；
+1. 双层序列
+    - 序列中的每一个元素又是一个序列。
+    - 例如：自然语言中的段落是一个双层序列；段落是由句子构成的序列；句子是由词语构成的序列。
+    - 双层序列在处理长序列的任务或是构建层级模型时会发挥作用。
+
+这篇文档之后部分会统一使用`sequence_start_positions`来特指：PaddlePaddle中神经网络计算层输入/输出所携带的序列信息。
+
+对双层序列来讲，不仅要提供每一个外层序列在整个`batch`中的偏移，每一个外层序列又含有若干个内层序列，需要同时提供每一个内层序列在整个`batch`中的偏移。也就是说：**双层序列需要设置分别为外层序列和内层序列分别设置`sequence_start_positions`信息**。
+
+**注：**
+1. 不论序列中的元素在内存中占用多少实际存储空间，`sequence_start_positions`表示的偏移是以“序列中的一个元素”作为统计的基本单位，而不是相对`batch`起始存储地址以数据的存储大小为单位的偏移。
+1. 非序列输入不携带`sequence_start_positions`，非序列输入无需构造`sequence_start_positions`。
+1. **不论是单层序列还是双层序列的序列信息，都使用`paddle_ivector`（也就是PaddlePaddle中的一维整型数组）来存储。**
+
+图2 是PaddlePaddle中单层序列和双层序列存储示意图。
+<p align="center">
+<img src="https://user-images.githubusercontent.com/5842774/34159714-1f81a9be-e505-11e7-8a8a-4902146ec899.png" width=800><br>图2. 序列输入示意图
+</p>
+
+- 单层序列
+
+    图2 (a) 展示了一个含有4个序列的`batch`输入：
+    1. 4个序列的长度分别为：5、3、2、4；
+    1. 这时的`sequence_start_positions`为：`[0, 5, 8, 10, 14]`；
+    1. 本地训练. 不论数据域是`paddle_ivector`类型还是`paddle_matrix`类型，都可以通过调用下面的接口为原有的数据输入附加上序列信息，使之变为一个单层序列输入，代码片段如下：
+
+    ```c
+    int seq_pos_array[] = {0, 5, 8, 10, 14};
+    paddle_ivector seq_pos = paddle_ivector_create(
+        seq_pos_array, sizeof(seq_pos_array) / sizeof(int), false, false);
+    // Suppose the network only has one input data layer.
+    CHECK(paddle_arguments_set_sequence_start_pos(in_args, 0, 0, seq_pos));
+    ```
+
+- 双层序列
+
+    图2 (b) 展示了一个含有4个序列的`batch`输入；
+    1. 4个序列的长度分别为：5、3、2、4；这四个序列又分别含有3、2、1、2个子序列；
+    1. 这时的需要同时提供：
+        - 外层序列在`batch`中的起始偏移`：[0, 5, 8, 10, 14]`；
+        - 内层序列在`batch`中的起始偏移：`[0, 2, 3, 5, 7， 8， 10， 13， 14]`；
+    1. 不论数据域是`paddle_ivector`类型还是`paddle_matrix`类型，这时需要调用创建序列信息和为`argument`设置序列信息的接口**两次**，分别为数据输入添加外层序列和内层序列的序列信息，使之变为一个双层序列输入，代码片段如下：
+    ```c
+    // set the sequence start positions for the outter sequences.
+    int outter_seq_pos_array[] = {0, 5, 8, 10, 14};
+    paddle_ivector seq_pos =
+        paddle_ivector_create(outter_seq_pos_array,
+                              sizeof(outter_pos_array) / sizeof(int),
+                              false,
+                              false);
+    // The third parameter of this API indicates the sequence level.
+    // 0 for the outter sequence. 1 for the inner sequence.
+    // If the input is a sequence not the nested sequence, the third parameter is
+    // fixed to be 0.
+    CHECK(paddle_arguments_set_sequence_start_pos(in_args, 0, 0, seq_pos));
+
+    // set the sequence start positions for the outter sequences.
+    int inner_seq_pos_array[] = {0, 2, 3, 5, 7， 8， 10， 13， 14};
+    paddle_ivector seq_pos = paddle_ivector_create(
+        inner_pos_array, sizeof(inner_pos_array) / sizeof(int), false, false);
+    // The third parameter of this API indicates the sequence level.
+    // 0 for the outter sequence. 1 for the inner sequence.
+    CHECK(paddle_arguments_set_sequence_start_pos(in_args, 0, 1, seq_pos));
+    ```
+
+注意事项：
+1. 当一个`batch`中含有多个序列，**不支持序列长度为`0`的序列（也就是空输入）** 作为输入。不同计算层对空输入的处理策略有可能不同，潜在会引起未定义行为，或者引起行时错误，请在输入时进行合法性检查。
+
+### Python 端数据类型说明
+
+下表列出了Python端训练接口暴露的数据类型（`paddle.layer.data`函数`type`字段的取值）对应于调用C-API需要创建的数据类型：
+
+<html>
+<table border="2" frame="border">
+<table>
+<thead>
+<tr>
+<th style="text-align:left">Python 端数据类型</th>
+<th style="text-align:left">C-API 输入数据类型</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td style="text-align:left">paddle.data_type.integer_value</td>
+<td style="text-align:left">整型数组，无需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.dense_vector</td>
+<td style="text-align:left">浮点型稠密矩阵，无需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.sparse_binary_vector</td>
+<td style="text-align:left">浮点型稀疏矩阵，无需提供非零元的值，默认为1，无需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.sparse_vector</td>
+<td style="text-align:left">浮点型稀疏矩阵，需提供非零元的值，无需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.integer_value_sequence</td>
+<td style="text-align:left">整型数组，需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.dense_vector_sequence</td>
+<td style="text-align:left">浮点型稠密矩阵，需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.sparse_binary_vector_sequence</td>
+<td style="text-align:left">浮点型稀疏矩阵，无需提供非零元的值，默认为1，需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.sparse_vector_sequence</td>
+<td style="text-align:left">浮点型稀疏矩阵，需提供非零元的值，需附加序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.integer_value_sub_sequence</td>
+<td style="text-align:left">整型数组，需附加双层序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.dense_vector_sub_sequence</td>
+<td style="text-align:left">浮点型稠密矩阵，需附加双层序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.sparse_binary_vector_sub_sequence</td>
+<td style="text-align:left">浮点型稀疏矩阵，无需提供非零元的值，默认为1，需附加双层序列信息</td>
+</tr>
+<tr>
+<td style="text-align:left">paddle.data_type.sparse_vector_sub_sequence</td>
+<td style="text-align:left">浮点型稀疏矩阵，需提供非零元的值，需附加双层序列信息</td>
+</tr>
+</tbody>
+</table>
+</html>
+<br>
+
+
+### 输出数据
+
+PaddlePaddle中一个计算层的输出数据组织方式和输入数据组织方式完全相同。一个输出数据同样被组织为一个`argument`，`argument`通过`paddle_matrix`或`paddle_ivector`存数数据，如果输出是一个序列，那么会携带有`sequence_start_positions`信息。调用C-API相关接口，读取需要的结果即可。
+
+### 总结
+
+- 在PaddlePaddle内部，神经网络中一个计算层的输入/输出被组织为`argument`。
+- `argument`并不真正“存储”数据，而是将输入/输出信息有机地组织在一起。
+- 在`argument`内部由`paddle_ivector`（一维整型数组）和`paddle_matrix`（二维浮点型矩阵）来实际存储数据。
+如果是一个序列输入/输出由 `sequence start positions` 来记录输入/输出的序列信息。
+
+于是，在组织神经网络输入时，需要思考完成以下工作：
+1. 为每一个输入/输出创建`argument`。
+    - C-API 中操作`argument`的接口请查看[argument.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/arguments.h)。
+1. 为每一个`argument`创建`paddle_matrix`或者`paddle_ivector`来存储数据。
+    - C-API 中操作`paddle_ivector`的接口请查看 [vector.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/vector.h)。
+    - C-API 中操作`paddle_matrix`的接口请查看[matrix.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/matrix.h)。
+1. 如果输入是序列数据，需要创建并填写`sequence_start_positions`信息。
+    - 通过调用 [`paddle_arguments_set_sequence_start_pos`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/arguments.h#L137) 来为一个`argument`添加序列信息。
+    - 通过调用 [`paddle_arguments_get_sequence_start_pos`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/arguments.h#L150) 来读取一个`argument`添加序列信息。
+    - 接口说明请查看 [argument.h](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/arguments.h) 文件。
diff --git a/doc/howto/usage/capi/workflow_of_capi_cn.md b/doc/howto/usage/capi/workflow_of_capi_cn.md
new file mode 100644
index 0000000000..e0a42fff12
--- /dev/null
+++ b/doc/howto/usage/capi/workflow_of_capi_cn.md
@@ -0,0 +1,119 @@
+## C-API 使用流程
+
+这篇文档介绍 PaddlePaddle C-API 整体使用流程。
+
+### 使用流程
+
+使用 C-API 的工作流程如图1所示，分为（1）准备预测模型和（2）预测程序开发两大部分。
+
+<p align="center">
+<img src="https://user-images.githubusercontent.com/5842774/34658453-365f73ea-f46a-11e7-9b3f-0fd112b27bae.png" width=500><br> 图1. C-API使用流程示意图
+</p>
+
+- 准备预测模型
+    1. 只将神经网络结构进行序列化。
+        - 只对神经网络结构进行序列化，加载模型需同时指定：网络结构的序列化结果和模型参数存储目录。
+    1. 将网络结构定义和训练结束存储下来的模型参数文件（多个）合并入一个文件。
+        - 神经网络模型结构和训练好的模型将被序列化合并入一个文件。
+        - 预测时只需加载一个文件便于发布。
+    - **注意**：以上两种方式只需选择其一即可。
+- 调用 C-API 开发预测序
+    1. 初始化PaddlePaddle运行环境。
+    1. 加载预测模型。
+    1. 创建神经网络输入，组织输入数据。
+    1. 进行前向计算，获得计算结果。
+    1. 清理和结束。
+
+### 准备预测模型
+
+准备预测模型部分，我们以手写数字识别任务为例进行介绍。手写数字识别任务定义了一个含有[两个隐层的简单全连接网络](https://github.com/PaddlePaddle/book/blob/develop/02.recognize_digits/README.cn.md#softmax回归softmax-regression)，网络接受一幅图片作为输入，将图片分类到 0 ~ 9 类别标签之一。完整代码可以查看[此目录](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense) 中的相关脚本。
+
+调用C-API开发预测程序需要一个训练好的模型，运行[MNIST手写数字识别目录](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)下的[mnist_v2.py](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/examples/model_inference/dense/mnist_v2.py)脚本，在终端执行`python mnist_v2.py`，会使用 PaddlePaddle 内置的 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/)进行训练。训练好的模型默认保存在当前运行目录下的`models`目录中。
+
+下面，我们将训练结束后存储下来的模型转换成预测模型。
+
+1. 序列化神经网络模型配置
+
+    PaddlePaddle 使用 protobuf 来传输网络配置文件中定义的网络结构和相关参数，使用 C-API 进行预测时，需要将网络结构使用 protobuf 进行序列化，写入文件中。
+
+    调用[`paddle.utils.dump_v2_config`](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/utils/dump_v2_config.py)中的`dump_v2_config`函数能够将使用 PaddlePaddle V2 API 定义的神经网络结构 dump 到指定文件中，示例代码如下：
+
+    ```python
+    from paddle.utils.dump_v2_config import dump_v2_config
+    from mnist_v2 import network
+
+    predict = network(is_infer=True)
+    dump_v2_config(predict, "trainer_config.bin", True)
+    ```
+
+    对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例，[`mnist_v2.py`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/mnist_v2.py)脚本集成了序列化神经网络结构的过程，可以直接运行 `python mnist_v2.py --task dump_config` 对神经网络结构进行序列化，结果会写入当前运行目录下的`trainer_config.bin`文件中。
+
+    使用这种方式，需要**在运行时将神经网络的多个可学习参数放在同一个目录中**，C-API可以通过分别指定序列化后的网络结构文件和参数目录来加载训练好的模型。
+
+2. 合并模型文件(可选)
+
+    一些情况为了便于发布，希望能够将序列化后的神经网络结构和训练好的模型参数打包进一个文件。对于这样的需求，可以使用`paddle.utils.merge_model`中的`merge_v2_model`接口对神经网络结构和训练好的参数进行序列化，将序列化结果写入一个文件内。
+
+    代码示例如下：
+
+    ```python
+    from paddle.utils.merge_model import merge_v2_modelss
+    from mnist_v2 import network
+
+    net = network(is_infer=True)
+    param_file = "models/params_pass_4.tar"
+    output_file = "output.paddle.model"
+    merge_v2_model(net, param_file, output_file)
+    ```
+    对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例，可直接运行 `python` [merge_v2_model.py](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/merge_v2_model.py)。序列化结果会写入当前运行目录下的`output.paddle.model`文件中。使用这种方式，运行时C-API可以通过指定`output.paddle.model`文件的路径来加载预测模型。
+
+#### 注意事项
+1. 为使用C-API，在调用`dump_v2_config`序列化神经网络结构时，参数`binary`必须指定为`True`。
+1. **预测使用的网络结构往往不同于训练**，通常需要去掉网络中的：（1）类别标签层；（2）损失函数层；（3）`evaluator`等，只留下核心计算层，请注意是否需要修改网络结构。
+1. 预测时，可以获取网络中定义的任意多个（大于等于一个）层前向计算的结果，需要哪些层的计算结果作为输出，就将这些层加入一个Python list中，作为调用`dump_v2_config`的第一个参数。
+
+### 编写预测代码
+
+预测代码更多详细示例代码请参考[C-API使用示例](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference) 目录下的代码示例。这一节对图1中预测代码编写的5个步骤进行介绍和说明。
+
+#### step 1. 初始化PaddlePaddle运行环境
+第一步需调用[`paddle_init`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/main.h#L27) 初始化PaddlePaddle运行环境，该接口接受两个参数：参数的个数和参数列表。
+
+#### step2. 加载模型
+
+这里介绍C-API使用中的一个重要概念：Gradient Machine。
+
+概念上，在 PaddlePaddle 内部，一个GradientMachine类的对象管理着一组计算层（PaddlePaddle Layers）来完成前向和反向计算，并处理与之相关的所有细节。在调用C-API预测时，只需进行前向计算而无需调用反向计算。这篇文档之后部分会使用`gradient machine`来特指调用PaddlePaddle C-API创建的GradientMachine类的对象。每一个 `gradient machine` 都会管理维护一份训练好的模型，下面是C-API提供的，两种常用的模型加载方式：
+
+1. 调用[`paddle_gradient_machine_load_parameter_from_disk`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/gradient_machine.h#L61)接口，从磁盘加载预测模型。这时`gradient machine`会独立拥有一份训练好的模型；
+1. 调用[`paddle_gradient_machine_create_shared_param`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/gradient_machine.h#L88)接口，与其它`gradient machine`的共享已经加载的预测模型。这种情况多出现在使用多线程预测时，通过多个线程共享同一个模型来减少内存开销。可参考[此示例](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/examples/model_inference/multi_thread/main.c)。
+
+- 注意事项
+    1. 使用PaddlePaddle V2 API训练，模型中所有可学习参数会被存为一个压缩文件，需要手动进行解压，将它们放在同一目录中，C-API不会直接加载 V2 API 存储的压缩文件。
+    1. 如果使用`merge model`方式将神经网络结构和训练好的参数序列化到一个文件，请参考此[示例](https://github.com/PaddlePaddle/Mobile/blob/develop/Demo/linux/paddle_image_recognizer.cpp#L59)。
+    1. 通过灵活使用以上两个接口，加载模型可其它多种方式，例如也可在程序运行过程中再加载另外一个模型。
+
+#### step 3. 创建神经网络输入，组织输入数据
+
+基本使用概念：
+- 在PaddlePaddle内部，神经网络中一个计算层的输入输出被组织为一个 `Argument` 结构体，如果神经网络有多个输入或者多个输出，每一个输入/输出都会对应有自己的`Argument`。
+- `Argument` 并不真正“存储”数据，而是将输入/输出数据有机地组织在一起。
+- 在`Argument`内部由：1. `Matrix`（二维矩阵，存储浮点类型输入/输出）；2. `IVector`（一维数组，**仅用于存储整型值**，多用于自然语言处理任务）来实际存储数据。
+
+C-API支持的所有输入数据类型和他们的组织方式，请参考“输入/输出数据组织”一节。
+
+这篇文档的之后部分会使用`argument`来特指PaddlePaddle C-API中神经网络的一个输入/输出，使用`paddle_matrix`**特指**`argument`中用于存储数据的`Matrix`类的对象。
+
+在组织神经网络输入，获取输出时，需要思考完成以下工作：
+1. 为每一个输入/输出创建`argument`；
+1. 为每一个`argument`创建`paddle_matrix`来存储数据；
+
+与输入不同的是，不需在使用C-API时为输出`argument`的`paddle_matrix`对象分配空间。前向计算之后PaddlePaddle内部已经分配/管理了每个计算层输出的存储空间。
+
+#### step 4. 前向计算
+
+完成上述准备之后，通过调用 [`paddle_gradient_machine_forward`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/gradient_machine.h#L73) 接口完成神经网络的前向计算。
+
+#### step 5. 清理
+
+结束预测之后，对使用的中间变量和资源进行清理和释放。
diff --git a/doc/howto/usage/cluster/fluid_cluster_train_en.md b/doc/howto/usage/cluster/fluid_cluster_train_en.md
new file mode 100644
index 0000000000..419eac51aa
--- /dev/null
+++ b/doc/howto/usage/cluster/fluid_cluster_train_en.md
@@ -0,0 +1,138 @@
+# Fluid Distributed Training
+
+## Introduction
+
+In this article, we'll explain how to config and run distributed training jobs with PaddlePaddle Fluid in a bare metal cluster.
+
+## Preparations
+
+### Get your cluster ready
+
+Prepare your computer nodes in the cluster. Nodes in this cluster can be of any specification that runs PaddlePaddle, and with a unique IP address assigned to it. Make sure they can communicate with each other.
+
+### Have PaddlePaddle installed
+
+PaddlePaddle must be installed on all nodes. If you have GPU cards on your nodes, be sure to properly install drivers and CUDA libraries.
+
+PaddlePaddle build and installation guide can be found from [here](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/index_en.html).
+
+### Update training script
+
+#### Non-cluster training script
+
+Let's take [Deep Learning 101](http://www.paddlepaddle.org/docs/develop/book/01.fit_a_line/index.html)'s first chapter: "fit a line" as an example.
+
+This demo's non-cluster version with fluid API is as follows:
+
+``` python
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+y_predict = fluid.layers.fc(input=x, size=1, act=None)
+y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+
+cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+avg_cost = fluid.layers.mean(x=cost)
+
+sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+sgd_optimizer.minimize(avg_cost)
+
+BATCH_SIZE = 20
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.train(), buf_size=500),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
+exe = fluid.Executor(place)
+
+exe.run(fluid.default_startup_program())
+
+PASS_NUM = 100
+for pass_id in range(PASS_NUM):
+    fluid.io.save_persistables(exe, "./fit_a_line.model/")
+    fluid.io.load_persistables(exe, "./fit_a_line.model/")
+    for data in train_reader():
+        avg_loss_value, = exe.run(fluid.default_main_program(),
+                                  feed=feeder.feed(data),
+                                  fetch_list=[avg_cost])
+
+        if avg_loss_value[0] < 10.0:
+            exit(0)  # if avg cost less than 10.0, we think our code is good.
+exit(1)
+```
+
+We created a simple fully connected neural networks training program and handed it to the fluid executor to run for 100 passes.
+
+Now let's try to convert it to a distributed version to run in a cluster.
+
+#### Introducing parameter server
+
+As you see from the non-cluster version of training script, there is only one role in it: the trainer, who does the computing as well as holding parameters. In cluster training, since multi-trainers are working on the same task, they need one centralized place to hold and distribute parameters. This centralized place is called the Parameter Server in PaddlePaddle.
+
+![parameter server architect](src/trainer.png)
+
+Parameter Server in fluid does not only hold parameters but is also assigned with a part of the program. Trainers communicate with parameter servers via send/receive OPs. For more tech detail, please refer to this [document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/dist_refactor/distributed_architecture.md).
+
+Now we need to create program for both trainers and parameter servers, the question is how?
+
+#### Slice the program
+
+Fluid provides a tool called "Distribute Transpiler" to automatically convert the non-cluster program into cluster program.
+
+The idea behind this tool is to find optimize OPs and gradient parameters, slice the program into 2 pieces and connect them with send/receive OP.
+
+Optimize OPs and gradient parameters can be found from the return values of optimizer's minimize function.
+
+To put them together:
+
+``` python
+... #define the program, cost, and create sgd optimizer
+
+optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) #get optimize OPs and gradient parameters
+
+t = fluid.DistributeTranspiler() # create transpiler instance
+# slice the program into 2 pieces with optimizer_ops and gradient parameters list, as well as pserver_endpoints, which is a comma separated list of [IP:PORT] and number of trainers
+t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2) 
+
+... #create executor
+
+# in pserver, run this
+exe.run(fluid.default_startup_program())
+#current_endpoint here means current pserver IP:PORT you wish to run on
+exe.run(t.get_pserver_program(current_endpoint, optimize_ops)) 
+
+# in trainer, run this
+... # define data reader
+exe.run(fluid.default_startup_program())
+for pass_id in range(100):
+    for data in train_reader():
+        exe.run(t.get_trainer_program())
+
+
+```
+
+### E2E demo
+
+Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py). In parameter server node run this in the command line:
+
+``` bash
+PSERVERS=192.168.1.2:6174 SERVER_ENDPOINT=192.168.1.2:6174 TRAINING_ROLE=PSERVER python notest_dist_fit_a_line.py
+```
+
+*please note we assume that your parameter server runs at 192.168.1.2:6174*
+
+Wait until the prompt `Server listening on 192.168.1.2:6174`
+
+Then in 2 of your trainer node run this:
+
+``` bash
+PSERVERS=192.168.1.2:6174 SERVER_ENDPOINT=192.168.1.2:6174 TRAINING_ROLE=TRAINER python notest_dist_fit_a_line.py
+```
+
+*the reason you need to run this command twice in 2 nodes is: in the script we set the trainer count to be 2. You can change this setting on line 50*
+
+Now you have 2 trainers and 1 parameter server up and running.
diff --git a/doc/howto/usage/cluster/src/k8s_train/start_paddle.py b/doc/howto/usage/cluster/src/k8s_train/start_paddle.py
index 935c12bb67..1774f8b640 100755
--- a/doc/howto/usage/cluster/src/k8s_train/start_paddle.py
+++ b/doc/howto/usage/cluster/src/k8s_train/start_paddle.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
diff --git a/doc/howto/usage/cluster/src/word2vec/api_train_v2.py b/doc/howto/usage/cluster/src/word2vec/api_train_v2.py
index c0940f0e56..d449e02023 100644
--- a/doc/howto/usage/cluster/src/word2vec/api_train_v2.py
+++ b/doc/howto/usage/cluster/src/word2vec/api_train_v2.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import gzip
 import math
 
diff --git a/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py b/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py
index 2e6d888712..a5dd347f0b 100644
--- a/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py
+++ b/doc/howto/usage/cluster/src/word2vec/api_train_v2_cluster.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import math
 import os
 import paddle.v2 as paddle
diff --git a/go/pserver/client/c/test/test_mnist.py b/go/pserver/client/c/test/test_mnist.py
index c3a3af55e2..7b50a10afc 100644
--- a/go/pserver/client/c/test/test_mnist.py
+++ b/go/pserver/client/c/test/test_mnist.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2 as paddle
 import gzip
 
diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py
index 8d9c6b9b20..7ef0fca496 100644
--- a/go/pserver/client/c/test/test_train.py
+++ b/go/pserver/client/c/test/test_train.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2 as paddle
 import paddle.v2.dataset.uci_housing as uci_housing
 import paddle.v2.master as master
diff --git a/paddle/api/test/testTrainConfig.py b/paddle/api/test/testTrainConfig.py
index 77e0cd37d5..ab9a83e4a3 100644
--- a/paddle/api/test/testTrainConfig.py
+++ b/paddle/api/test/testTrainConfig.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_method=AdamOptimizer())
diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/capi/examples/model_inference/common/common.h
index e32f2f9836..9efcbc387e 100644
--- a/paddle/capi/examples/model_inference/common/common.h
+++ b/paddle/capi/examples/model_inference/common/common.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #ifndef __CAPI_EXAMPLE_COMMON_H__
 #define __CAPI_EXAMPLE_COMMON_H__
 #include <stdio.h>
diff --git a/paddle/capi/examples/model_inference/dense/main.c b/paddle/capi/examples/model_inference/dense/main.c
index 5eeaf7e31f..376cd46fb0 100644
--- a/paddle/capi/examples/model_inference/dense/main.c
+++ b/paddle/capi/examples/model_inference/dense/main.c
@@ -3,59 +3,82 @@
 
 #include "../common/common.h"
 
+// Modify this path as needed.
 #define CONFIG_BIN "./trainer_config.bin"
+// Modify this path as needed.
+// This demo assumes that merged model is not used, then this path is the
+// directory storing all the trained parameters.
+// If the model is trained by PaddlePaddle V2 API, the model is saved as
+// a compressed file. You need to uncompress the compressed file first.
+#define MODEL_PATH "models/pass_4"
 
 int main() {
-  // Initalize Paddle
+  // Initalize the PaddlePaddle runtime environment.
   char* argv[] = {"--use_gpu=False"};
   CHECK(paddle_init(1, (char**)argv));
 
-  // Reading config binary file. It is generated by `convert_protobin.sh`
+  // Read the binary configuration file generated by `convert_protobin.sh`
   long size;
   void* buf = read_config(CONFIG_BIN, &size);
 
-  // Create a gradient machine for inference.
+  // Create the gradient machine for inference.
   paddle_gradient_machine machine;
   CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
-  CHECK(paddle_gradient_machine_randomize_param(machine));
 
-  // Loading parameter. Uncomment the following line and change the directory.
-  // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
-  //                                                "./some_where_to_params"));
+  // Load the trained model. Modify the parameter MODEL_PATH to set the correct
+  // path of the trained model.
+  CHECK(paddle_gradient_machine_load_parameter_from_disk(machine, MODEL_PATH));
+
+  // Inputs and outputs of the network are organized as paddle_arguments object
+  // in C-API. In the comments below, "argument" specifically means one input of
+  // the neural network in PaddlePaddle C-API.
   paddle_arguments in_args = paddle_arguments_create_none();
 
-  // There is only one input of this network.
+  // There is only one data layer in this demo MNIST network, invoke this
+  // function to create one argument.
   CHECK(paddle_arguments_resize(in_args, 1));
 
-  // Create input matrix.
-  paddle_matrix mat = paddle_matrix_create(/* sample_num */ 1,
-                                           /* size */ 784,
-                                           /* useGPU */ false);
-  srand(time(0));
+  // Each argument needs one matrix or one ivector (integer vector, for sparse
+  // index input, usually used in NLP task) to holds the real input data.
+  // In the comments below, "matrix" specifically means the object needed by
+  // argument to hold the data. Here we create the matrix for the above created
+  // agument to store the testing samples.
+  paddle_matrix mat =
+      paddle_matrix_create(/* height = batch size */ 1,
+                           /* width = dimensionality of the data layer */ 784,
+                           /* whether to use GPU */ false);
 
   paddle_real* array;
-
-  // Get First row.
+  // Get the pointer pointing to the start address of the first row of the
+  // created matrix.
   CHECK(paddle_matrix_get_row(mat, 0, &array));
 
+  // Fill the matrix with a randomly generated test sample.
+  srand(time(0));
   for (int i = 0; i < 784; ++i) {
     array[i] = rand() / ((float)RAND_MAX);
   }
 
+  // Assign the matrix to the argument.
   CHECK(paddle_arguments_set_value(in_args, 0, mat));
 
+  // Create the output argument.
   paddle_arguments out_args = paddle_arguments_create_none();
+
+  // Invoke the forward computation.
   CHECK(paddle_gradient_machine_forward(machine,
                                         in_args,
                                         out_args,
-                                        /* isTrain */ false));
-  paddle_matrix prob = paddle_matrix_create_none();
+                                        /* is train taks or not */ false));
 
+  // Create the matrix to hold the forward result of the neural network.
+  paddle_matrix prob = paddle_matrix_create_none();
+  // Access the matrix of the output argument, the predicted result is stored in
+  // which.
   CHECK(paddle_arguments_get_value(out_args, 0, prob));
 
   uint64_t height;
   uint64_t width;
-
   CHECK(paddle_matrix_get_shape(prob, &height, &width));
   CHECK(paddle_matrix_get_row(prob, 0, &array));
 
@@ -68,6 +91,7 @@ int main() {
   }
   printf("\n");
 
+  // The cleaning up.
   CHECK(paddle_matrix_destroy(prob));
   CHECK(paddle_arguments_destroy(out_args));
   CHECK(paddle_matrix_destroy(mat));
diff --git a/paddle/capi/examples/model_inference/dense/merge_v2_model.py b/paddle/capi/examples/model_inference/dense/merge_v2_model.py
new file mode 100644
index 0000000000..760a485a53
--- /dev/null
+++ b/paddle/capi/examples/model_inference/dense/merge_v2_model.py
@@ -0,0 +1,21 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from paddle.utils.merge_model import merge_v2_model
+
+from mnist_v2 import network
+
+net = network(is_infer=True)
+param_file = "models/params_pass_4.tar"
+output_file = "output.paddle.model"
+merge_v2_model(net, param_file, output_file)
diff --git a/paddle/capi/examples/model_inference/dense/mnist_v2.py b/paddle/capi/examples/model_inference/dense/mnist_v2.py
new file mode 100644
index 0000000000..174436bd1d
--- /dev/null
+++ b/paddle/capi/examples/model_inference/dense/mnist_v2.py
@@ -0,0 +1,130 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import os
+import sys
+import gzip
+import logging
+import argparse
+from PIL import Image
+import numpy as np
+
+import paddle.v2 as paddle
+from paddle.utils.dump_v2_config import dump_v2_config
+
+logger = logging.getLogger("paddle")
+logger.setLevel(logging.INFO)
+
+
+def multilayer_perceptron(img, layer_size, lbl_dim):
+    for idx, size in enumerate(layer_size):
+        hidden = paddle.layer.fc(input=(img if not idx else hidden),
+                                 size=size,
+                                 act=paddle.activation.Relu())
+    return paddle.layer.fc(input=hidden,
+                           size=lbl_dim,
+                           act=paddle.activation.Softmax())
+
+
+def network(input_dim=784, lbl_dim=10, is_infer=False):
+    images = paddle.layer.data(
+        name='pixel', type=paddle.data_type.dense_vector(input_dim))
+
+    predict = multilayer_perceptron(
+        images, layer_size=[128, 64], lbl_dim=lbl_dim)
+
+    if is_infer:
+        return predict
+    else:
+        label = paddle.layer.data(
+            name='label', type=paddle.data_type.integer_value(lbl_dim))
+        return paddle.layer.classification_cost(input=predict, label=label)
+
+
+def main(task="train", use_gpu=False, trainer_count=1, save_dir="models"):
+    if task == "train":
+        if not os.path.exists(save_dir):
+            os.mkdir(save_dir)
+
+        paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
+        cost = network()
+        parameters = paddle.parameters.create(cost)
+        optimizer = paddle.optimizer.Momentum(
+            learning_rate=0.1 / 128.0,
+            momentum=0.9,
+            regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
+
+        trainer = paddle.trainer.SGD(cost=cost,
+                                     parameters=parameters,
+                                     update_equation=optimizer)
+
+        def event_handler(event):
+            if isinstance(event, paddle.event.EndIteration):
+                if event.batch_id % 100 == 0:
+                    logger.info("Pass %d, Batch %d, Cost %f, %s" %
+                                (event.pass_id, event.batch_id, event.cost,
+                                 event.metrics))
+            if isinstance(event, paddle.event.EndPass):
+                with gzip.open(
+                        os.path.join(save_dir, "params_pass_%d.tar" %
+                                     event.pass_id), "w") as f:
+                    trainer.save_parameter_to_tar(f)
+
+        trainer.train(
+            reader=paddle.batch(
+                paddle.reader.shuffle(
+                    paddle.dataset.mnist.train(), buf_size=8192),
+                batch_size=128),
+            event_handler=event_handler,
+            num_passes=5)
+    elif task == "dump_config":
+        predict = network(is_infer=True)
+        dump_v2_config(predict, "trainer_config.bin", True)
+    else:
+        raise RuntimeError(("Error value for parameter task. "
+                            "Available options are: train and dump_config."))
+
+
+def parse_cmd():
+    parser = argparse.ArgumentParser(
+        description="PaddlePaddle MNIST demo for CAPI.")
+    parser.add_argument(
+        "--task",
+        type=str,
+        required=False,
+        help=("A string indicating the taks type. "
+              "Available options are: \"train\", \"dump_config\"."),
+        default="train")
+    parser.add_argument(
+        "--use_gpu",
+        type=bool,
+        help=("A bool flag indicating whether to use GPU device or not."),
+        default=False)
+    parser.add_argument(
+        "--trainer_count",
+        type=int,
+        help=("This parameter is only used in training task. It indicates "
+              "how many computing threads are created in training."),
+        default=1)
+    parser.add_argument(
+        "--save_dir",
+        type=str,
+        help=("This parameter is only used in training task. It indicates "
+              "path of the directory to save the trained models."),
+        default="models")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_cmd()
+    main(args.task, args.use_gpu, args.trainer_count, args.save_dir)
diff --git a/paddle/capi/examples/model_inference/dense/trainer_config.py b/paddle/capi/examples/model_inference/dense/trainer_config.py
index 873ec119e7..fbf0890357 100644
--- a/paddle/capi/examples/model_inference/dense/trainer_config.py
+++ b/paddle/capi/examples/model_inference/dense/trainer_config.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 img = data_layer(name='pixel', size=784)
diff --git a/paddle/capi/examples/model_inference/sequence/trainer_config.py b/paddle/capi/examples/model_inference/sequence/trainer_config.py
index 6bbc7a909a..c1326bb955 100644
--- a/paddle/capi/examples/model_inference/sequence/trainer_config.py
+++ b/paddle/capi/examples/model_inference/sequence/trainer_config.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 WORD_DIM = 3000
diff --git a/paddle/capi/examples/model_inference/sparse_binary/main.c b/paddle/capi/examples/model_inference/sparse_binary/main.c
index 8ba67aee56..029b94ee63 100644
--- a/paddle/capi/examples/model_inference/sparse_binary/main.c
+++ b/paddle/capi/examples/model_inference/sparse_binary/main.c
@@ -1,5 +1,6 @@
 #include <paddle/capi.h>
 #include <time.h>
+
 #include "../common/common.h"
 
 #define CONFIG_BIN "./trainer_config.bin"
@@ -9,16 +10,18 @@ int main() {
   char* argv[] = {"--use_gpu=False"};
   CHECK(paddle_init(1, (char**)argv));
 
-  // Reading config binary file. It is generated by `convert_protobin.sh`
+  // Read the binary configuration file which is generated by
+  // `convert_protobin.sh`
   long size;
   void* buf = read_config(CONFIG_BIN, &size);
 
-  // Create a gradient machine for inference.
+  // Create the gradient machine for inference.
   paddle_gradient_machine machine;
   CHECK(paddle_gradient_machine_create_for_inference(&machine, buf, (int)size));
   CHECK(paddle_gradient_machine_randomize_param(machine));
 
-  // Loading parameter. Uncomment the following line and change the directory.
+  // Load the trained parameters. Uncomment the following line and change the
+  // directory as needed.
   // CHECK(paddle_gradient_machine_load_parameter_from_disk(machine,
   //                                                "./some_where_to_params"));
   paddle_arguments in_args = paddle_arguments_create_none();
@@ -26,7 +29,7 @@ int main() {
   // There is only one input of this network.
   CHECK(paddle_arguments_resize(in_args, 1));
 
-  // Create input matrix.
+  // Create the input matrix.
   paddle_matrix mat = paddle_matrix_create_sparse(1, 784, 3, true, false);
   srand(time(0));
   paddle_real* array;
diff --git a/paddle/capi/tests/test_predict_network.py b/paddle/capi/tests/test_predict_network.py
index 82ef5cb1a7..46a985d476 100644
--- a/paddle/capi/tests/test_predict_network.py
+++ b/paddle/capi/tests/test_predict_network.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100)
diff --git a/paddle/cuda/src/avx_mathfun.h b/paddle/cuda/src/avx_mathfun.h
index 2412ed5abc..a0ba71faba 100644
--- a/paddle/cuda/src/avx_mathfun.h
+++ b/paddle/cuda/src/avx_mathfun.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
    AVX implementation of sin, cos, sincos, exp and log
 
diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt
index ed5f6310f4..597ea959f2 100644
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -47,7 +47,7 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
 cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
 cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
 cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
-    shape_inference data_transform)
+    shape_inference data_transform lod_tensor)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
 
diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc
index 692406b1c3..72743b5fd0 100644
--- a/paddle/framework/backward_test.cc
+++ b/paddle/framework/backward_test.cc
@@ -1,16 +1,16 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "paddle/framework/backward.h"
 
diff --git a/paddle/framework/data_device_transform.cc b/paddle/framework/data_device_transform.cc
index b3fd48ae12..d38d87927f 100644
--- a/paddle/framework/data_device_transform.cc
+++ b/paddle/framework/data_device_transform.cc
@@ -31,15 +31,14 @@ static const platform::DeviceContext* GetDeviceContext(
   }
 }
 
-Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place) {
+void DeviceTransform(const Tensor& in, const platform::Place& dst_place,
+                     Tensor* out) {
   VLOG(3) << "DeviceTransform in, src_place " << in.place()
           << " dst_place: " << dst_place;
-  Tensor* out = new Tensor();
   auto* dev_ctx = GetDeviceContext(in.place(), dst_place);
   dev_ctx->Wait();
   Copy(in, dst_place, *dev_ctx, out);
   dev_ctx->Wait();
-  return out;
 }
 
 }  // namespace framework
diff --git a/paddle/framework/data_device_transform.h b/paddle/framework/data_device_transform.h
index bebf0d1b32..b21ed0be34 100644
--- a/paddle/framework/data_device_transform.h
+++ b/paddle/framework/data_device_transform.h
@@ -21,7 +21,8 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-Tensor* DeviceTransform(const Tensor& in, const platform::Place& dst_place);
+void DeviceTransform(const Tensor& in, const platform::Place& dst_place,
+                     Tensor* out);
 
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/data_layout.h b/paddle/framework/data_layout.h
index 3ab976ecac..31817251ed 100644
--- a/paddle/framework/data_layout.h
+++ b/paddle/framework/data_layout.h
@@ -14,7 +14,9 @@ limitations under the License. */
 
 #pragma once
 
-#include <iostream>
+#include <cctype>
+#include <ostream>
+
 #include "paddle/platform/enforce.h"
 
 namespace paddle {
@@ -27,12 +29,19 @@ enum class DataLayout {
 };
 
 inline DataLayout StringToDataLayout(const std::string& str) {
-  if (str == "NHWC" || str == "nhwc") {
+  std::string s(str);
+  for (size_t i = 0; i < s.size(); ++i) {
+    s[i] = toupper(s[i]);
+  }
+
+  if (s == "NHWC") {
     return DataLayout::kNHWC;
-  } else if (str == "NCHW" || str == "nchw") {
+  } else if (s == "NCHW") {
     return DataLayout::kNCHW;
+  } else if (s == "ANYLAYOUT") {
+    return DataLayout::kAnyLayout;
   } else {
-    PADDLE_THROW("Unknown storage order string: %s", str);
+    PADDLE_THROW("Unknown storage order string: %s", s);
   }
 }
 
@@ -49,7 +58,7 @@ inline std::string DataLayoutToString(const DataLayout& data_layout) {
   }
 }
 
-inline std::ostream& operator<<(std::ostream& out, DataLayout l) {
+inline std::ostream& operator<<(std::ostream& out, const DataLayout& l) {
   out << DataLayoutToString(l);
   return out;
 }
diff --git a/paddle/framework/data_transform.cc b/paddle/framework/data_transform.cc
index e56edb9539..d826f0edac 100644
--- a/paddle/framework/data_transform.cc
+++ b/paddle/framework/data_transform.cc
@@ -19,16 +19,14 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-Tensor* DataTransform(const OpKernelType& expected_kernel_type,
-                      const OpKernelType& kernel_type_for_var,
-                      const Tensor& input_tensor) {
-  Tensor* out = nullptr;
+void DataTransform(const OpKernelType& expected_kernel_type,
+                   const OpKernelType& kernel_type_for_var,
+                   const Tensor& input_tensor, Tensor* out) {
   if (!platform::is_same_place(kernel_type_for_var.place_,
                                expected_kernel_type.place_)) {
-    out = DeviceTransform(input_tensor, expected_kernel_type.place_);
+    DeviceTransform(input_tensor, expected_kernel_type.place_, out);
   }
   PADDLE_ENFORCE_NOT_NULL(out, "out should not be null");
-  return out;
 }
 
 void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
diff --git a/paddle/framework/data_transform.h b/paddle/framework/data_transform.h
index ee95c7e856..a4b7890237 100644
--- a/paddle/framework/data_transform.h
+++ b/paddle/framework/data_transform.h
@@ -30,9 +30,9 @@ limitations under the License. */
 namespace paddle {
 namespace framework {
 
-Tensor* DataTransform(const OpKernelType& expected_kernel_type,
-                      const OpKernelType& kernel_type_for_var,
-                      const Tensor& input_tensor);
+void DataTransform(const OpKernelType& expected_kernel_type,
+                   const OpKernelType& kernel_type_for_var,
+                   const Tensor& input_tensor, Tensor* out);
 
 void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor,
                             Variable& out_var);
diff --git a/paddle/framework/dim.h b/paddle/framework/dim.h
index 04d4b0e604..ec17d7c615 100644
--- a/paddle/framework/dim.h
+++ b/paddle/framework/dim.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #include <iostream>
diff --git a/paddle/framework/dim_test.cu b/paddle/framework/dim_test.cu
index 0a6a87669c..2bcab7c5c2 100644
--- a/paddle/framework/dim_test.cu
+++ b/paddle/framework/dim_test.cu
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <thrust/device_vector.h>
 #include <sstream>
 
diff --git a/paddle/framework/eigen_test.cc b/paddle/framework/eigen_test.cc
index bc4a2db32c..c6f77ecfab 100644
--- a/paddle/framework/eigen_test.cc
+++ b/paddle/framework/eigen_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc
index e12bac1d78..4ef82a541e 100644
--- a/paddle/framework/init.cc
+++ b/paddle/framework/init.cc
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include <string.h>  // for strdup
 #include <algorithm>
 #include <string>
 
@@ -60,7 +61,9 @@ void InitDevices() {
 }
 
 void InitGLOG(const std::string &prog_name) {
-  google::InitGoogleLogging(prog_name.c_str());
+  // glog will not hold the ARGV[0] inside.
+  // Use strdup to alloc a new string.
+  google::InitGoogleLogging(strdup(prog_name.c_str()));
   google::InstallFailureSignalHandler();
 }
 
diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc
index b179e270e1..3f2c2e64eb 100644
--- a/paddle/framework/lod_tensor.cc
+++ b/paddle/framework/lod_tensor.cc
@@ -69,6 +69,12 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) {
   return os;
 }
 
+std::string LoDToString(const LoD &lod) {
+  std::ostringstream stream;
+  stream << lod;
+  return stream.str();
+}
+
 LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin,
                  size_t elem_end) {
   PADDLE_ENFORCE_LT(level, in.size());
@@ -129,6 +135,65 @@ bool operator==(const LoD &a, const LoD &b) {
   return true;
 }
 
+bool CheckLoD(const LoD &in, int tensor_height) {
+  if (in.empty()) return true;
+  for (const auto &level : in) {
+    // check: there should be more than 2 offsets existing in each level.
+    if (level.size() < 2) return false;
+    // check: the first offset(the begin offset) of each level should be 0.
+    if (level.front() != 0) return false;
+    // check: all the offsets in a level should be ascending(no same items
+    // allows).
+    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
+          if (a < b) return true;
+          return false;
+        })) {
+      LOG(INFO) << "ascending error";
+      return false;
+    }
+  }
+  // check: the lowest level's last offset should equals `tensor_height` if
+  //        tensor_height>0.
+  if (tensor_height > 0 && (size_t)tensor_height != in.back().back())
+    return false;
+
+  // check: the higher level's last offset should equals the lower level's
+  // size-1.
+  // NOTE LoD store the levels from top to bottom, so the higher level goes
+  // first.
+  for (size_t level = 0; level < in.size() - 1; level++) {
+    if (in[level].back() != in[level + 1].size() - 1) return false;
+  }
+  return true;
+}
+
+bool CheckAbsLoD(const LoD &in, int tensor_height) {
+  if (in.empty()) return true;
+  for (const auto &level : in) {
+    // check: all the offsets in a level should be ascending(no same items
+    // allows).
+    if (!std::is_sorted(level.begin(), level.begin(), [](size_t a, size_t b) {
+          if (a < b) return true;
+          return false;
+        })) {
+      return false;
+    }
+
+    // check: there should be more than 2 offsets existing in each level.
+    if (level.size() < 2) return false;
+
+    // check: the first offset of each level should be 0, and the last should be
+    // the same(the height of underlying tensor).
+    if (level.front() != 0) return false;
+    if (tensor_height < 0) {
+      tensor_height = level.back();
+    } else if ((size_t)tensor_height != level.back()) {
+      return false;
+    }
+  }
+  return true;
+}
+
 using LoDAndOffset = std::pair<LoD, std::pair<size_t, size_t>>;
 LoDAndOffset GetSubLoDAndAbsoluteOffset(const LoD &lod, size_t start_idx,
                                         size_t end_idx, size_t start_level) {
@@ -224,14 +289,20 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
 std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
     const std::vector<platform::Place> places) const {
   check_memory_size();
-  PADDLE_ENFORCE(dims()[0] % places.size() == 0,
-                 "Batch size should be divided by places size");
-
-  std::vector<LoDTensor> lods;
-  for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-    size_t batch_size = lod().empty() ? dims()[0] : NumElements(0);
-    size_t begin = place_idx * batch_size / places.size();
-    size_t end = (place_idx + 1) * batch_size / places.size();
+  PADDLE_ENFORCE(lod().empty(), "Disable parallel lod for now");
+  size_t result_size = std::min(static_cast<size_t>(dims()[0]), places.size());
+  size_t remainder = dims()[0] % places.size();
+
+  std::vector<LoDTensor> results;
+  results.reserve(result_size);
+
+  int step_width = static_cast<int>(dims()[0] / result_size);
+  for (size_t i = 0; i < result_size; ++i) {
+    int begin = static_cast<int>(i * step_width);
+    int end = static_cast<int>((i + 1) * step_width);
+    if (i + 1 == places.size()) {  // last
+      end += remainder;
+    }
 
     LoDTensor dst;
     if (lod().empty()) {
@@ -259,7 +330,7 @@ std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
     lods.emplace_back(dst);
   }
 
-  return lods;
+  return results;
 }
 
 void LoDTensor::MergeLoDTensor(
diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h
index 37753f5f4d..9d1294fdeb 100644
--- a/paddle/framework/lod_tensor.h
+++ b/paddle/framework/lod_tensor.h
@@ -60,6 +60,8 @@ using LoD = std::vector<Vector<size_t>>;
 std::ostream& operator<<(std::ostream& os, const LoD& lod);
 std::ostream& operator<<(std::ostream& os, const LoDTensor& t);
 
+std::string LoDToString(const LoD& lod);
+
 LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin,
                  size_t elem_end);
 /*
@@ -69,6 +71,38 @@ LoD ToAbsOffset(const LoD& in);
 
 bool operator==(const LoD& a, const LoD& b);
 
+/*
+ * Check whether this lod's format is valid.
+ *
+ * ATTENTION:
+ *   - Empty lod is treated as valid.
+ *
+ * It will check two things:
+ *
+ *  1. all the offsets in a level should be ascending(no same items allows).
+ *  2. there should be more than 2 offsets existing in each level.
+ *  3. the higher level's last offset should equals the lower level's size-1.
+ *  4. the first offset(the begin offset) of each level should be 0.
+ *  5. the lowest level's last offset should equals `tensor_height` if
+ * tensor_height>0.
+ */
+
+bool CheckLoD(const LoD& in, int tensor_height = -1);
+/*
+ * Check whether this absolute lod's format is valid.
+ *
+ * ATTENTION:
+ *   - Empty lod is treated as valid.
+ *
+ * It will check two things:
+ *  1. all the offsets in a level should be ascending(no same items allows)
+ *  2. there should be more than 2 offsets existing in each level.
+ *  3. the first offset of each level should be 0, and the last should be the
+ *     same(the height of underlying tensor) or `tensor_height` if
+ *     tensor_height>0.
+ */
+bool CheckAbsLoD(const LoD& in, int tensor_height = -1);
+
 /*
  * LoDTensor (Level of details Tensor)
  * see https://en.wikipedia.org/wiki/Level_of_details for reference.
diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc
index ed66a98ebd..2c4d2e10e0 100644
--- a/paddle/framework/lod_tensor_test.cc
+++ b/paddle/framework/lod_tensor_test.cc
@@ -1,15 +1,16 @@
-/*
-  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at
-  http://www.apache.org/licenses/LICENSE-2.0
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
-*/
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 
 #include "paddle/framework/lod_tensor.h"
 
@@ -22,38 +23,6 @@
 namespace paddle {
 namespace framework {
 
-const int kLodTensorSize = 20 * 128;
-
-class LoDTensorTester : public ::testing::Test {
- public:
-  virtual void SetUp() override {
-    // tensor's batch_size: 30
-    // 3 levels
-    // 0 10 20
-    // 0 5 10 15 20
-    // 0 2 5 7 10 12 15 20
-    LoD lod;
-    lod.push_back(std::vector<size_t>{0, 2, 3});
-    lod.push_back(std::vector<size_t>{0, 2, 5, 8});
-    lod.push_back(std::vector<size_t>{0, 2, 5, 7, 10, 12, 15, 17, 20});
-
-    ASSERT_EQ(lod.size(), 3UL);
-
-    lod_tensor_.Resize({20 /*batch size*/, 128 /*dim*/});
-    // malloc memory
-    float* dst_ptr = lod_tensor_.mutable_data<float>(place);
-    for (int i = 0; i < kLodTensorSize; ++i) {
-      dst_ptr[i] = i;
-    }
-
-    lod_tensor_.set_lod(lod);
-  }
-
- protected:
-  platform::CPUPlace place;
-  LoDTensor lod_tensor_;
-};
-
 TEST(LodExpand, test) {
   LoD lod{{0, 2}};
   LoDTensor tensor;
@@ -196,5 +165,53 @@ TEST(LoD, MergeLoDTensor) {
   EXPECT_EQ(lod_tensor.lod(), lod);
 }
 
+TEST(LoD, CheckLoD) {
+  LoD relative_lod;
+  relative_lod.push_back(std::vector<size_t>({0, 2}));
+  relative_lod.push_back(std::vector<size_t>({0, 1, 3}));
+  relative_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
+
+  // check compatible
+  ASSERT_TRUE(CheckLoD(relative_lod));
+  relative_lod[1].back()++;
+  ASSERT_FALSE(CheckLoD(relative_lod));
+  relative_lod[1].back()--;  // recover it
+
+  // check empty
+  LoD empty_lod;
+  ASSERT_TRUE(CheckLoD(empty_lod));
+
+  // check less than 2 offsets in a level
+  LoD some_lod0;
+  some_lod0.push_back(std::vector<size_t>({0}));
+  ASSERT_FALSE(CheckLoD(some_lod0));
+
+  // check with underlying tensor storage.
+  ASSERT_TRUE(CheckLoD(relative_lod, 5));
+  ASSERT_FALSE(CheckLoD(relative_lod, 9));
+}
+
+TEST(LoD, CheckAbsLoD) {
+  LoD relative_lod;
+  relative_lod.push_back(std::vector<size_t>({0, 2}));
+  relative_lod.push_back(std::vector<size_t>({0, 1, 3}));
+  relative_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
+
+  auto abs_lod = ToAbsOffset(relative_lod);
+
+  ASSERT_TRUE(CheckAbsLoD(abs_lod));
+
+  // check less than 2 offsets in a level.
+
+  // check the last item should be compatible with tensor height.
+  abs_lod.back().back()++;
+  ASSERT_FALSE(CheckAbsLoD(abs_lod));
+  abs_lod.back().back()--;  // restore
+
+  // check less than 2 offsets in a lod.
+  LoD abs_lod0;
+  abs_lod0.push_back(std::vector<size_t>({0}));
+  ASSERT_FALSE(CheckAbsLoD(abs_lod0));
+}
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu
index e8508ad265..0f46e9b1e3 100644
--- a/paddle/framework/lod_tensor_test.cu
+++ b/paddle/framework/lod_tensor_test.cu
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/paddle/framework/op_kernel_type.h b/paddle/framework/op_kernel_type.h
index 053897784c..312bd5f892 100644
--- a/paddle/framework/op_kernel_type.h
+++ b/paddle/framework/op_kernel_type.h
@@ -85,5 +85,10 @@ inline std::string KernelTypeToString(const OpKernelType& kernel_key) {
   return stream.str();
 }
 
+inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) {
+  return (!platform::places_are_same_class(l.place_, r.place_)) ||
+         (l.data_type_ != r.data_type_) || (l.data_layout_ != r.data_layout_);
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h
index d75c0233e8..5de9ae559c 100644
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
@@ -177,16 +177,16 @@ class OpKernelRegistrar : public Registrar {
 /**
  * Macro to register OperatorKernel.
  */
-#define REGISTER_OP_KERNEL(op_type, DEVICE_TYPE, place_class, ...)        \
-  STATIC_ASSERT_GLOBAL_NAMESPACE(                                         \
-      __reg_op_kernel_##op_type##_##DEVICE_TYPE##__,                      \
-      "REGISTER_OP_KERNEL must be called in global namespace");           \
-  static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \
-      __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type,       \
-                                                          #DEVICE_TYPE);  \
-  int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() {                \
-    __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch();          \
-    return 0;                                                             \
+#define REGISTER_OP_KERNEL(op_type, LIBRARY_TYPE, place_class, ...)        \
+  STATIC_ASSERT_GLOBAL_NAMESPACE(                                          \
+      __reg_op_kernel_##op_type##_##LIBRARY_TYPE##__,                      \
+      "REGISTER_OP_KERNEL must be called in global namespace");            \
+  static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__>  \
+      __op_kernel_registrar_##op_type##_##LIBRARY_TYPE##__(#op_type,       \
+                                                           #LIBRARY_TYPE); \
+  int TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE() {                \
+    __op_kernel_registrar_##op_type##_##LIBRARY_TYPE##__.Touch();          \
+    return 0;                                                              \
   }
 
 #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
@@ -208,14 +208,14 @@ class OpKernelRegistrar : public Registrar {
   static int use_op_itself_##op_type##_ __attribute__((unused)) = \
       TouchOpRegistrar_##op_type()
 
-#define USE_OP_DEVICE_KERNEL(op_type, DEVICE_TYPE)               \
-  STATIC_ASSERT_GLOBAL_NAMESPACE(                                \
-      __use_op_kernel_##op_type##_##DEVICE_TYPE##__,             \
-      "USE_OP_DEVICE_KERNEL must be in global namespace");       \
-  extern int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE(); \
-  static int use_op_kernel_##op_type##_##DEVICE_TYPE##_          \
-      __attribute__((unused)) =                                  \
-          TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE()
+#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE)               \
+  STATIC_ASSERT_GLOBAL_NAMESPACE(                                 \
+      __use_op_kernel_##op_type##_##LIBRARY_TYPE##__,             \
+      "USE_OP_DEVICE_KERNEL must be in global namespace");        \
+  extern int TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE(); \
+  static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_          \
+      __attribute__((unused)) =                                   \
+          TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE()
 
 // TODO(fengjiayi): The following macros
 // seems ugly, do we have better method?
diff --git a/paddle/framework/op_registry_test.cc b/paddle/framework/op_registry_test.cc
index 66f07b6757..341da8befd 100644
--- a/paddle/framework/op_registry_test.cc
+++ b/paddle/framework/op_registry_test.cc
@@ -368,24 +368,6 @@ TEST(OperatorRegistrar, OpWithMultiKernel) {
 
   // TODO(qiao) add priority back
   // use all available kernels
-  paddle::framework::UseALL();
   op->Run(scope, cuda_place);
   EXPECT_EQ(op_test_value, -10);
-
-  // remove cuda kernels
-  paddle::framework::UseCPU();
-  op->Run(scope, cpu_place);
-
-  EXPECT_EQ(op_test_value, -9);
-
-  // add cuda kernels
-  paddle::framework::UseCUDA();
-  op->Run(scope, cuda_place);
-
-  EXPECT_EQ(op_test_value, -10);
-
-  // use cudnn kernel
-  paddle::framework::UseCUDNN();
-  op->Run(scope, cuda_place);
-  EXPECT_EQ(op_test_value, -20);
 }
diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc
index ef2c55cc37..84c010df7c 100644
--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#include <gflags/gflags.h>
 #include <glog/logging.h>
 
 #include <algorithm>
@@ -21,61 +22,27 @@ limitations under the License. */
 #include "paddle/framework/shape_inference.h"
 #include "paddle/framework/var_type.h"
 
+DEFINE_bool(op_sync, false,
+            "Default cuda is asynchronous device, set to True will"
+            "force op run in synchronous mode.");
+
 namespace paddle {
 namespace framework {
 
-std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
-
-void UseCPU() {
-  kKernelPriority.clear();
-  /*Plain CPU*/
-  auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kPlain);
-  kKernelPriority.insert(kKernelPriority.begin(), pair0);
-}
-
-void UseMKLDNN() {
-  UseCPU();
-#if PADDLE_WITH_MKLML
-  {
-    /*MKLDNN Kernel*/
-    auto pair0 = std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN);
-    kKernelPriority.insert(kKernelPriority.begin(), pair0);
-  }
-#endif
-}
-
-void UseCUDA() {
-  UseMKLDNN();
-#if PADDLE_WITH_CUDA
-  /*Plain GPU*/
-  auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain);
-  kKernelPriority.insert(kKernelPriority.begin(), pair0);
-#endif
-}
-
-void UseCUDNN() {
-  UseCUDA();
-#if PADDLE_WITH_CUDA
-  if (platform::dynload::HasCUDNN()) {
-    /*CUDNN Kernel*/
-    auto pair0 = std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN);
-    kKernelPriority.insert(kKernelPriority.begin(), pair0);
-  }
-#endif
-}
-
-void UseALL() {
-  UseCPU();
-  UseMKLDNN();
-  UseCUDA();
-  UseCUDNN();
-}
+std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority = {
+    std::make_tuple(platform::CUDAPlace(0), LibraryType::kCUDNN),
+    std::make_tuple(platform::CUDAPlace(0), LibraryType::kPlain),
+    std::make_tuple(platform::CPUPlace(), LibraryType::kMKLDNN),
+    std::make_tuple(platform::CPUPlace(), LibraryType::kPlain),
+};
 
 static DDim GetDims(const Scope& scope, const std::string& name) {
   Variable* var = scope.FindVar(name);
   if (var == nullptr) {
     return DDim({-1});
-  } else if (var->IsType<LoDTensor>()) {
+  }
+
+  if (var->IsType<LoDTensor>()) {
     return var->Get<LoDTensor>().dims();
   } else if (var->IsType<SelectedRows>()) {
     return var->Get<SelectedRows>().GetCompleteDims();
@@ -84,6 +51,21 @@ static DDim GetDims(const Scope& scope, const std::string& name) {
   }
 }
 
+static LoD GetLoD(const Scope& scope, const std::string& name) {
+  Variable* var = scope.FindVar(name);
+  auto default_lod = LoD({{}});
+
+  if (var == nullptr) {
+    return default_lod;
+  }
+
+  if (var->IsType<LoDTensor>()) {
+    return var->Get<LoDTensor>().lod();
+  } else {
+    return default_lod;
+  }
+}
+
 std::string OperatorBase::Input(const std::string& name) const {
   auto& ins = Inputs(name);
   PADDLE_ENFORCE_LE(ins.size(), 1UL,
@@ -125,7 +107,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
     for (size_t i = 0; i < input.second.size(); ++i) {
       ss << input.second[i];
       if (scope) {
-        ss << "(" << GetDims(*scope, input.second[i]) << ")";
+        ss << "[" << GetDims(*scope, input.second[i]) << "]";
+        ss << "(" << GetLoD(*scope, input.second[i]) << ")";
       }
       if (i != input.second.size() - 1) {
         ss << ", ";
@@ -144,7 +127,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
     for (size_t i = 0; i < output.second.size(); ++i) {
       ss << output.second[i];
       if (scope) {
-        ss << "(" << GetDims(*scope, output.second[i]) << ")";
+        ss << "[" << GetDims(*scope, output.second[i]) << "]";
+        ss << "(" << GetLoD(*scope, output.second[i]) << ")";
       }
       if (i != output.second.size() - 1) {
         ss << ", ";
@@ -247,36 +231,33 @@ static bool VarIsTensor(const Variable* var) {
   return var->IsType<LoDTensor>() || var->IsType<SelectedRows>();
 }
 
-static const Tensor* GetTensorFromVar(const Variable* var) {
-  const Tensor* t = nullptr;
+static const Tensor* GetTensorFromVar(Variable* var) {
   if (var->IsType<LoDTensor>()) {
-    t = &(var->Get<LoDTensor>());
+    return var->GetMutable<LoDTensor>();
   } else if (var->IsType<SelectedRows>()) {
-    t = &(var->Get<SelectedRows>().value());
+    return var->GetMutable<SelectedRows>()->mutable_value();
   } else {
     PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
                  var->Type().name());
   }
-  return t;
 }
 
 static Tensor* GetMutableTensorFromVar(Variable* var) {
-  Tensor* t = nullptr;
   if (var->IsType<LoDTensor>()) {
-    t = var->GetMutable<LoDTensor>();
+    return var->GetMutable<LoDTensor>();
   } else if (var->IsType<SelectedRows>()) {
-    t = var->GetMutable<SelectedRows>()->mutable_value();
+    return var->GetMutable<SelectedRows>()->mutable_value();
   } else {
     PADDLE_THROW("Variable type_id %s, expect LoDTensor/SelectedRows.",
                  var->Type().name());
   }
-  return t;
 }
 
 template <>
 const Tensor* ExecutionContext::Input<Tensor>(const std::string& name) const {
   auto* var = InputVar(name);
-  return var == nullptr ? nullptr : GetTensorFromVar(var);
+  return var == nullptr ? nullptr
+                        : GetTensorFromVar(const_cast<Variable*>(var));
 }
 
 template <>
@@ -319,6 +300,7 @@ bool OpSupportGPU(const std::string& op_type) {
   auto it = all_kernels.find(op_type);
   if (it == all_kernels.end()) {
     // All control operator must support GPU
+
     return true;
   }
   for (auto& kern_pair : it->second) {
@@ -492,21 +474,17 @@ void OperatorWithKernel::Run(const Scope& scope,
   }
 
   ExecutionContext ctx(*this, scope, *dev_ctx);
-  auto expected_kernel_key = this->GetExpectedKernelType(ctx);
 
   OpKernelMap& kernels = kernels_iter->second;
 
-  for (auto& candidate : kKernelPriority) {
-    auto candidate_key =
-        OpKernelType(expected_kernel_key.data_type_, std::get<0>(candidate),
-                     expected_kernel_key.data_layout_, std::get<1>(candidate));
+  // TODO(dzhwinter) : kernel fallback mechanism will be added when all the
+  // transform functions are ready.
 
-    if ((candidate_key == expected_kernel_key) ||
-        (kernels.count(candidate_key))) {
-      expected_kernel_key = candidate_key;
-      break;
-    }
-  }
+  // for (auto& candidate : kKernelPriority) {
+  //   Do selection
+  // }
+
+  auto expected_kernel_key = this->GetExpectedKernelType(ctx);
 
   VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
 
@@ -520,7 +498,7 @@ void OperatorWithKernel::Run(const Scope& scope,
         if (tensor_in->IsInitialized()) {
           auto kernel_type_for_var = this->GetKernelTypeForVar(
               var_name_item.first, *tensor_in, expected_kernel_key);
-          if (kernel_type_for_var != expected_kernel_key) {
+          if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) {
             auto out_var_names = OutputVars(true);
             if (std::find(out_var_names.begin(), out_var_names.end(),
                           var_name) != out_var_names.end()) {
@@ -529,11 +507,13 @@ void OperatorWithKernel::Run(const Scope& scope,
                   "does not support transform",
                   var_name);
             }
-            VLOG(3) << "need to do transform for var " << var_name;
+            VLOG(3) << "Transform Variable " << var_name << " from "
+                    << kernel_type_for_var << " to " << expected_kernel_key;
             auto* trans_var = new_scope.Var(var_name);
-            auto* out = DataTransform(expected_kernel_key, kernel_type_for_var,
-                                      *tensor_in);
-            CopyVariableWithTensor(*var, *out, *trans_var);
+            std::shared_ptr<Tensor> out(new Tensor);
+            DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in,
+                          out.get());
+            CopyVariableWithTensor(*var, *(out.get()), *trans_var);
           }
         }
       }
@@ -542,8 +522,14 @@ void OperatorWithKernel::Run(const Scope& scope,
 
   auto kernel_iter = kernels.find(expected_kernel_key);
 
-  kernel_iter->second->Compute(ExecutionContext(
-      *this, new_scope, *pool.Get(expected_kernel_key.place_)));
+  auto* new_dev_ctx = pool.Get(expected_kernel_key.place_);
+  kernel_iter->second->Compute(
+      ExecutionContext(*this, new_scope, *new_dev_ctx));
+
+  /*For profiling/benchmark only*/
+  if (FLAGS_op_sync) {
+    new_dev_ctx->Wait();
+  }
 }
 
 proto::DataType OperatorWithKernel::IndicateDataType(
diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h
index d5feb59864..c9140f304c 100644
--- a/paddle/framework/operator.h
+++ b/paddle/framework/operator.h
@@ -54,33 +54,9 @@ constexpr char kGradVarSuffix[] = "@GRAD";
 constexpr char kZeroVarSuffix[] = "@ZERO";
 
 // define some kernel priority
+/* Define multiple kernel type fallback order*/
 extern std::vector<std::tuple<platform::Place, LibraryType>> kKernelPriority;
 
-/**
- * @brief Use cpu kernel only
- */
-void UseCPU();
-
-/**
- * @brief Perfer MKLDNN kernel than Plain CPU kernel
- */
-void UseMKLDNN();
-
-/**
- * @brief Perfer CUDA kernel than Plain CPU kernel
- */
-void UseCUDA();
-
-/**
- * @brief Perfer cudnn kernel than Plain CUDA kernel
- */
-void UseCUDNN();
-
-/**
- * @brief Use all available kernels
- */
-void UseALL();
-
 inline std::string GradVarName(const std::string& var_name) {
   return var_name + kGradVarSuffix;
 }
diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc
index a1b4a03289..c04cd38697 100644
--- a/paddle/framework/tensor_test.cc
+++ b/paddle/framework/tensor_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/paddle/framework/tensor_util.h b/paddle/framework/tensor_util.h
index f541d2ba69..b49c614499 100644
--- a/paddle/framework/tensor_util.h
+++ b/paddle/framework/tensor_util.h
@@ -116,8 +116,8 @@ inline void Copy(const Tensor& src, const platform::Place& dst_place,
  * @param[in] src        The external tensor.
  * @param[in] ctx        The device context contains device resources.
  *
- * * @note    CopyFromVector assumes that the tensor has been resized
- *            before invoking.
+ * * @note    CopyFromVector will resize dst to an 1D tensor with the same
+ *            size as src.
  */
 template <typename T>
 inline void CopyFromVector(const std::vector<T>& src,
@@ -315,9 +315,8 @@ inline void DeserializeFromStream(std::istream& is, Tensor* tensor,
           desc.data_type(),
           DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
       is.read(static_cast<char*>(buf), cpu_tensor.memory_size());
-      auto cpu_place = new platform::CPUPlace();
-      framework::Copy(cpu_tensor, *cpu_place, dev_ctx, tensor);
-      delete cpu_place;
+      auto dst_place = dev_ctx.GetPlace();
+      framework::Copy(cpu_tensor, dst_place, dev_ctx, tensor);
 #else
       PADDLE_THROW("Unexpected branch");
 #endif
diff --git a/paddle/framework/tensor_util_test.cc b/paddle/framework/tensor_util_test.cc
index 3636125f20..f541927c0e 100644
--- a/paddle/framework/tensor_util_test.cc
+++ b/paddle/framework/tensor_util_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h
index 36b76fb196..03992c8608 100644
--- a/paddle/framework/variable.h
+++ b/paddle/framework/variable.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
   Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/gserver/layers/MKLDNNConcatLayer.cpp
index 44bb0883b8..520ccc1a99 100644
--- a/paddle/gserver/layers/MKLDNNConcatLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNConcatLayer.cpp
@@ -43,7 +43,7 @@ void MKLDNNConcatLayer::reshape(
   channels_[0] = ic;
   oc = ic;
   for (size_t i = 1; i < inputLayers_.size(); i++) {
-    int batchsize, height, witdh;
+    int batchsize = 0, height = 0, witdh = 0;
     reshapeInput(batchsize, height, witdh, i);
     CHECK_EQ(bs, batchsize);
     CHECK_EQ(ih, height);
@@ -84,6 +84,7 @@ void MKLDNNConcatLayer::resetFwdBuffers(std::vector<MKLDNNMatrixPtr>& inputs,
   bool has8c = false, has16c = false, hasnc = false;
   for (size_t i = 0; i < inputs.size(); i++) {
     resetInValue(inputs[i], nullptr, i, channels_[i]);
+    inputs[i]->downSpatial();
     CHECK(inputs[i]);
     auto dm = inputs[i]->getDims();
     // inputs format can be different, but ndims must equal
diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/gserver/layers/MKLDNNLayer.cpp
index 6fbf3c7fde..2d0fff608c 100644
--- a/paddle/gserver/layers/MKLDNNLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNLayer.cpp
@@ -132,6 +132,8 @@ void MKLDNNLayer::reshapeInput(int& batchsize,
   if (w != 0) {
     width = w;
   }
+  height = height != 0 ? height : 1;
+  width = width != 0 ? width : 1;
 }
 
 void MKLDNNLayer::reshapeOutput(size_t height, size_t width) {
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
index e48b9b5a91..3ba39f18b6 100644
--- a/paddle/gserver/layers/MKLDNNLayer.h
+++ b/paddle/gserver/layers/MKLDNNLayer.h
@@ -98,6 +98,8 @@ protected:
 public:
   explicit MKLDNNLayer(const LayerConfig& config)
       : Layer(config),
+        ih_(0),
+        iw_(0),
         condition_(0),
         needResetBwd_(true),
         outputOnlyMKLDNN_(false),
diff --git a/paddle/gserver/tests/img_conv_cudnn.py b/paddle/gserver/tests/img_conv_cudnn.py
index 3934607fa4..0ea6d6bae6 100644
--- a/paddle/gserver/tests/img_conv_cudnn.py
+++ b/paddle/gserver/tests/img_conv_cudnn.py
@@ -1,17 +1,16 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/img_conv_exconv.py b/paddle/gserver/tests/img_conv_exconv.py
index ad5a8ba2bd..c618cdab27 100644
--- a/paddle/gserver/tests/img_conv_exconv.py
+++ b/paddle/gserver/tests/img_conv_exconv.py
@@ -1,17 +1,16 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/pyDataProvider.py b/paddle/gserver/tests/pyDataProvider.py
index 7235a23943..d2ad5888b5 100644
--- a/paddle/gserver/tests/pyDataProvider.py
+++ b/paddle/gserver/tests/pyDataProvider.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy
 import struct
 import traceback
diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/gserver/tests/rnn_data_provider.py
index 913365a5a4..063a4127e5 100644
--- a/paddle/gserver/tests/rnn_data_provider.py
+++ b/paddle/gserver/tests/rnn_data_provider.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer.PyDataProvider2 import *
 
 # Note that each config should has an independent provider
diff --git a/paddle/gserver/tests/sequenceGen.py b/paddle/gserver/tests/sequenceGen.py
index fd725727c0..04a1732d61 100644
--- a/paddle/gserver/tests/sequenceGen.py
+++ b/paddle/gserver/tests/sequenceGen.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os
 import sys
 
diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
index 7303d08804..aeaaa221f9 100644
--- a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
+++ b/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py
@@ -1,18 +1,16 @@
-# edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 ######################## data source ################################
diff --git a/paddle/gserver/tests/sequence_recurrent.py b/paddle/gserver/tests/sequence_recurrent.py
index 4895df186b..8786a5465d 100644
--- a/paddle/gserver/tests/sequence_recurrent.py
+++ b/paddle/gserver/tests/sequence_recurrent.py
@@ -1,17 +1,16 @@
-#!/usr/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py
index a1d54542e3..da182942c9 100644
--- a/paddle/gserver/tests/sequence_recurrent_group.py
+++ b/paddle/gserver/tests/sequence_recurrent_group.py
@@ -1,18 +1,16 @@
-#!/usr/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 ######################## data source ################################
diff --git a/paddle/gserver/tests/sequence_rnn_matched_inputs.py b/paddle/gserver/tests/sequence_rnn_matched_inputs.py
index 59e8c91733..0c55f2cf9d 100644
--- a/paddle/gserver/tests/sequence_rnn_matched_inputs.py
+++ b/paddle/gserver/tests/sequence_rnn_matched_inputs.py
@@ -1,17 +1,16 @@
-# edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/sequence_rnn_mixed_inputs.py b/paddle/gserver/tests/sequence_rnn_mixed_inputs.py
index 6fe9dca6e2..22b376b91a 100644
--- a/paddle/gserver/tests/sequence_rnn_mixed_inputs.py
+++ b/paddle/gserver/tests/sequence_rnn_mixed_inputs.py
@@ -1,17 +1,16 @@
-# edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
index 786a0c6d78..3ce87490bb 100644
--- a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
+++ b/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py
@@ -1,17 +1,16 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 
 from paddle.trainer_config_helpers import *
 
diff --git a/paddle/gserver/tests/test_PyDataProvider2.py b/paddle/gserver/tests/test_PyDataProvider2.py
index 0d0fe476ff..044aede98e 100644
--- a/paddle/gserver/tests/test_PyDataProvider2.py
+++ b/paddle/gserver/tests/test_PyDataProvider2.py
@@ -1,17 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#  Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import random
 
 from paddle.trainer.PyDataProvider2 import *
diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt
index 5889a50db0..2412ebd82a 100644
--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -135,9 +135,8 @@ op_library(detection_output_op DEPS softmax)
 op_library(sequence_softmax_op DEPS softmax)
 op_library(sum_op DEPS selected_rows_functor)
 op_library(sgd_op DEPS selected_rows_functor)
+op_library(print_op DEPS lod_tensor)
 op_library(adagrad_op DEPS selected_rows_functor)
-op_library(conv_op DEPS vol2col)
-op_library(pool_op DEPS pooling)
 op_library(maxout_op DEPS maxouting)
 op_library(unpool_op DEPS unpooling)
 op_library(pool_with_index_op DEPS pooling)
@@ -148,12 +147,27 @@ op_library(max_sequence_len_op DEPS lod_rank_table)
 op_library(sequence_conv_op DEPS context_project)
 op_library(sequence_pool_op DEPS sequence_pooling)
 op_library(lstm_op DEPS sequence2batch lstm_compute)
-op_library(conv_transpose_op DEPS vol2col)
 op_library(gru_op DEPS sequence2batch gru_compute)
 op_library(recurrent_op DEPS executor)
-op_library(warpctc_op DEPS dynload_warpctc sequence_padding math_function)
+op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function)
 op_library(cos_sim_op DEPS cos_sim_functor)
 op_library(parallel_do_op DEPS executor)
+
+# Regist multiple Kernel to pybind
+if (WITH_GPU)
+op_library(conv_op SRCS conv_op.cc conv_op.cu.cc conv_cudnn_op.cu.cc DEPS vol2col)
+op_library(pool_op SRCS pool_op.cc pool_op.cu.cc pool_cudnn_op.cu.cc DEPS pooling)
+op_library(conv_transpose_op SRCS conv_transpose_op.cc conv_transpose_op.cu.cc
+  conv_transpose_cudnn_op.cu.cc DEPS vol2col)
+file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d, CUDNN);\n")
+file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(pool2d, CUDNN);\n")
+file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(conv2d_transpose, CUDNN);\n")
+else()
+op_library(conv_op SRCS conv_op.cc DEPS vol2col)
+op_library(pool_op SRCS pool_op.cc DEPS pooling)
+op_library(conv_transpose_op SRCS conv_transpose_op.cc DEPS vol2col)
+endif()
+
 # FIXME(typhoonzero): save/load depends lodtensor serialization functions
 op_library(save_op DEPS lod_tensor)
 op_library(load_op DEPS lod_tensor)
diff --git a/paddle/operators/assign_value_op.cc b/paddle/operators/assign_value_op.cc
new file mode 100644
index 0000000000..d5671c1183
--- /dev/null
+++ b/paddle/operators/assign_value_op.cc
@@ -0,0 +1,73 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/assign_value_op.h"
+
+namespace paddle {
+namespace operators {
+
+class AssignValueOp : public framework::OperatorWithKernel {
+ public:
+  AssignValueOp(const std::string &type,
+                const framework::VariableNameMap &inputs,
+                const framework::VariableNameMap &outputs,
+                const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of AssignValueOp should not be null.");
+    auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
+    ctx->SetOutputDim("Out", framework::make_ddim(shape));
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        framework::proto::DataType(ctx.Attr<int>("dtype")), ctx.GetPlace());
+  }
+};
+
+class AssignValueOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  AssignValueOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddOutput("Out", "(Tensor) Output tensor of assign_value operator.");
+    AddAttr<std::vector<int>>("shape",
+                              "(vector<int>) "
+                              "Shape of values.");
+    AddAttr<int>("dtype", "data type of values")
+        .InEnum({framework::proto::DataType::INT32,
+                 framework::proto::DataType::FP32});
+    AddAttr<std::vector<float>>("fp32_values", "store the float values")
+        .SetDefault({});
+    AddAttr<std::vector<int>>("int32_values", "store the int values")
+        .SetDefault({});
+    AddComment(R"DOC(
+AssignValue operator
+
+$$Out = values$$
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker);
+REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel<int>,
+                       ops::AssignValueKernel<float>);
diff --git a/paddle/operators/pool_cudnn_op.h b/paddle/operators/assign_value_op.cu.cc
similarity index 68%
rename from paddle/operators/pool_cudnn_op.h
rename to paddle/operators/assign_value_op.cu.cc
index 5adf27f5bc..b17e201500 100644
--- a/paddle/operators/pool_cudnn_op.h
+++ b/paddle/operators/assign_value_op.cu.cc
@@ -1,19 +1,19 @@
 /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+Indicesou may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#pragma once
-
-#include "paddle/framework/op_registry.h"
-#include "paddle/operators/pool_op.h"
+#include "paddle/operators/assign_value_op.h"
 
-namespace paddle {
-namespace operators {}  // namespace operators
-}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(assign_value, ops::AssignValueKernel<int>,
+                        ops::AssignValueKernel<float>);
diff --git a/paddle/operators/assign_value_op.h b/paddle/operators/assign_value_op.h
new file mode 100644
index 0000000000..db2e430779
--- /dev/null
+++ b/paddle/operators/assign_value_op.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/platform/enforce.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class AssignValueKernel : public framework::OpKernel<T> {
+ public:
+  virtual void Compute(const framework::ExecutionContext& ctx) const {
+    auto shape = ctx.Attr<std::vector<int>>("shape");
+    auto* out = ctx.Output<framework::Tensor>("Out");
+    int dtype = ctx.Attr<int>("dtype");
+    const char* value_name = nullptr;
+    switch (dtype) {
+      case framework::proto::DataType::INT32:
+        value_name = "int32_values";
+        break;
+      case framework::proto::DataType::FP32:
+        value_name = "fp32_values";
+        break;
+      default:
+        PADDLE_THROW("Unsupported dtype for assign_value_op: %d", dtype);
+        break;
+    }
+    auto values = ctx.Attr<std::vector<T>>(value_name);
+    framework::CopyFromVector(values, ctx.device_context(), out);
+    out->Resize(framework::make_ddim(shape));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/conv_cudnn_op.cc b/paddle/operators/conv_cudnn_op.cc
deleted file mode 100644
index 84d9ce1973..0000000000
--- a/paddle/operators/conv_cudnn_op.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/conv_op.h"
-
-namespace paddle {
-namespace operators {
-
-class CudnnConv2DOpMaker : public Conv2DOpMaker {
- public:
-  CudnnConv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv2DOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardware. This size should be chosen carefully.")
-        .SetDefault(4096);
-  }
-};
-
-class CudnnConv3DOpMaker : public Conv3DOpMaker {
- public:
-  CudnnConv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv3DOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardware. This size should be chosen carefully.")
-        .SetDefault(4096);
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP(conv2d_cudnn, ops::ConvOp, ops::CudnnConv2DOpMaker,
-            conv2d_cudnn_grad, ops::ConvOpGrad);
-
-REGISTER_OP(conv3d_cudnn, ops::ConvOp, ops::CudnnConv3DOpMaker,
-            conv3d_cudnn_grad, ops::ConvOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    conv2d_cudnn,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv2d_cudnn_grad,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
-
-REGISTER_OP_CPU_KERNEL(
-    conv3d_cudnn,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv3d_cudnn_grad,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvGradKernel<paddle::platform::CPUDeviceContext, double>);
diff --git a/paddle/operators/conv_cudnn_op.cu.cc b/paddle/operators/conv_cudnn_op.cu.cc
index 0c5ed3e4e8..3a5409a7e3 100644
--- a/paddle/operators/conv_cudnn_op.cu.cc
+++ b/paddle/operators/conv_cudnn_op.cu.cc
@@ -32,7 +32,7 @@ static constexpr size_t kCONV_CUDNN_WORKSPACE_LIMIT_BYTES =
     static_cast<size_t>(1024) * 1024 * 1024;
 
 template <typename T>
-class CudnnConvOpKernel : public framework::OpKernel<T> {
+class CUDNNConvOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -147,7 +147,7 @@ class CudnnConvOpKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-class CudnnConvGradOpKernel : public framework::OpKernel<T> {
+class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -315,17 +315,16 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 
-// TODO(dzhwinter) : below register should be removed
-REGISTER_OP_CUDA_KERNEL(conv2d_cudnn,
-                        paddle::operators::CudnnConvOpKernel<float>,
-                        paddle::operators::CudnnConvOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv2d_cudnn_grad,
-                        paddle::operators::CudnnConvGradOpKernel<float>,
-                        paddle::operators::CudnnConvGradOpKernel<double>);
-
-REGISTER_OP_CUDA_KERNEL(conv3d_cudnn,
-                        paddle::operators::CudnnConvOpKernel<float>,
-                        paddle::operators::CudnnConvOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv3d_cudnn_grad,
-                        paddle::operators::CudnnConvGradOpKernel<float>,
-                        paddle::operators::CudnnConvGradOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvOpKernel<float>,
+                   paddle::operators::CUDNNConvOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvGradOpKernel<float>,
+                   paddle::operators::CUDNNConvGradOpKernel<double>);
+
+REGISTER_OP_KERNEL(conv3d, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvOpKernel<float>,
+                   paddle::operators::CUDNNConvOpKernel<double>);
+REGISTER_OP_KERNEL(conv3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   paddle::operators::CUDNNConvGradOpKernel<float>,
+                   paddle::operators::CUDNNConvGradOpKernel<double>);
diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc
index 1468e3eb96..424eccdb7d 100644
--- a/paddle/operators/conv_op.cc
+++ b/paddle/operators/conv_op.cc
@@ -67,6 +67,23 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
   ctx->ShareLoD("Input", "Output");
 }
 
+framework::OpKernelType ConvOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
     : OpProtoAndCheckerMaker(proto, op_checker) {
   AddInput(
@@ -108,6 +125,26 @@ Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
                             "dilations(h_dilation, w_dilation) of "
                             "convolution operator.")
       .SetDefault({1, 1});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Only used in cudnn kernel. Need set use_cudnn to true."
+               "workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardware. This size should be chosen carefully.")
+      .SetDefault(4096);
   AddComment(R"DOC(
 Convolution Operator.
 
@@ -181,6 +218,25 @@ Conv3DOpMaker::Conv3DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
                             "dilations(d_dilation, h_dilation, w_dilation) of "
                             "convolution operator.")
       .SetDefault({1, 1, 1});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Only used in cudnn kernel. workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardware. This size should be chosen carefully.")
+      .SetDefault(4096);
 
   AddComment(R"DOC(
 Convolution3D Operator.
@@ -224,6 +280,23 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
   }
 }
 
+framework::OpKernelType ConvOpGrad::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 }  // namespace operators
 }  // namespace paddle
 
diff --git a/paddle/operators/conv_op.h b/paddle/operators/conv_op.h
index 83786e2329..5a8933e791 100644
--- a/paddle/operators/conv_op.h
+++ b/paddle/operators/conv_op.h
@@ -62,12 +62,20 @@ class ConvOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class ConvOpGrad : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc
deleted file mode 100644
index 2e5333a265..0000000000
--- a/paddle/operators/conv_transpose_cudnn_op.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/conv_transpose_op.h"
-
-namespace paddle {
-namespace operators {
-
-class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
- public:
-  CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv2DTransposeOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardward. This size should be carefully setted.")
-        .SetDefault(4096);
-  }
-};
-
-class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker {
- public:
-  CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
-      : Conv3DTransposeOpMaker(proto, op_checker) {
-    AddAttr<int>("workspace_size_MB",
-                 "workspace size for cudnn, in MB, "
-                 "workspace is a section of GPU memory which will be "
-                 "allocated/freed each time the operator runs, larger "
-                 "workspace size can increase performance but also requires "
-                 "better hardward. This size should be carefully setted.")
-        .SetDefault(4096);
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP(conv2d_transpose_cudnn, ops::ConvTransposeOp,
-            ops::CudnnConv2DTransposeOpMaker, conv2d_transpose_cudnn_grad,
-            ops::ConvTransposeOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    conv2d_transpose_cudnn,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv2d_transpose_cudnn_grad,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
-                                     double>);
-
-REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp,
-            ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad,
-            ops::ConvTransposeOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    conv3d_transpose_cudnn,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    conv3d_transpose_cudnn_grad,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
-                                     double>);
diff --git a/paddle/operators/conv_transpose_cudnn_op.cu.cc b/paddle/operators/conv_transpose_cudnn_op.cu.cc
index fc37776ba1..23bc97e13c 100644
--- a/paddle/operators/conv_transpose_cudnn_op.cu.cc
+++ b/paddle/operators/conv_transpose_cudnn_op.cu.cc
@@ -28,10 +28,10 @@ using ScopedFilterDescriptor = platform::ScopedFilterDescriptor;
 using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor;
 using DataLayout = platform::DataLayout;
 
-static constexpr size_t kConvCudnnWorkspaceLimitBytes = 1024 * 1024 * 1024;
+static constexpr size_t kConvCUDNNWorkspaceLimitBytes = 1024 * 1024 * 1024;
 
 template <typename T>
-class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
+class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -77,7 +77,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
     // ------------------- cudnn conv workspace ---------------------
     void* cudnn_workspace = nullptr;
     size_t workspace_size_in_bytes;  // final workspace to allocate.
-    size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes;
+    size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes;
     if (user_workspace_size > 0) {
       workspace_size_limit = user_workspace_size * 1024 * 1024;
     }
@@ -116,7 +116,7 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
+class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -161,7 +161,7 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
     cudnnConvolutionBwdFilterAlgo_t filter_algo;
     size_t bwd_filter_ws_size, fwd_ws_size;
     size_t workspace_size_in_bytes = 0;
-    size_t workspace_size_limit = kConvCudnnWorkspaceLimitBytes;
+    size_t workspace_size_limit = kConvCUDNNWorkspaceLimitBytes;
     if (user_workspace_size > 0) {
       workspace_size_limit = user_workspace_size * 1024 * 1024;
     }
@@ -236,16 +236,16 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn,
-                        ops::CudnnConvTransposeOpKernel<float>,
-                        ops::CudnnConvTransposeOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv2d_transpose_cudnn_grad,
-                        ops::CudnnConvTransposeGradOpKernel<float>,
-                        ops::CudnnConvTransposeGradOpKernel<double>);
-
-REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn,
-                        ops::CudnnConvTransposeOpKernel<float>,
-                        ops::CudnnConvTransposeOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(conv3d_transpose_cudnn_grad,
-                        ops::CudnnConvTransposeGradOpKernel<float>,
-                        ops::CudnnConvTransposeGradOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d_transpose, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeOpKernel<float>,
+                   ops::CUDNNConvTransposeOpKernel<double>);
+REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeGradOpKernel<float>,
+                   ops::CUDNNConvTransposeGradOpKernel<double>);
+
+REGISTER_OP_KERNEL(conv3d_transpose, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeOpKernel<float>,
+                   ops::CUDNNConvTransposeOpKernel<double>);
+REGISTER_OP_KERNEL(conv3d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::CUDNNConvTransposeGradOpKernel<float>,
+                   ops::CUDNNConvTransposeGradOpKernel<double>);
diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc
index 74636d138f..cf4e8c0a30 100644
--- a/paddle/operators/conv_transpose_op.cc
+++ b/paddle/operators/conv_transpose_op.cc
@@ -58,6 +58,23 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
   ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
 }
 
+framework::OpKernelType ConvTransposeOp::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
                                                OpAttrChecker* op_checker)
     : OpProtoAndCheckerMaker(proto, op_checker) {
@@ -94,6 +111,25 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
       "(vector<int> default:{0, 0}), the paddings(h_pad, w_pad) of convolution "
       "transpose operator.")
       .SetDefault({0, 0});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Used in cudnn kernel only. workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardward. This size should be carefully setted.")
+      .SetDefault(4096);
   AddComment(R"DOC(
 Convolution2D Transpose Operator.
 
@@ -163,6 +199,25 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
                             "(vector<int> default:{0, 0, 0}), paddings(d_pad, "
                             "h_pad, w_pad) of convolution transpose operator.")
       .SetDefault({0, 0, 0});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+  AddAttr<int>("workspace_size_MB",
+               "Used in cudnn kernel only. workspace size for cudnn, in MB, "
+               "workspace is a section of GPU memory which will be "
+               "allocated/freed each time the operator runs, larger "
+               "workspace size can increase performance but also requires "
+               "better hardward. This size should be carefully setted.")
+      .SetDefault(4096);
   AddComment(R"DOC(
 Convolution3D Transpose Operator.
 
@@ -205,6 +260,23 @@ void ConvTransposeOpGrad::InferShape(framework::InferShapeContext* ctx) const {
   }
 }
 
+framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType(
+    const framework::ExecutionContext& ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("Input")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 }  // namespace operators
 }  // namespace paddle
 
diff --git a/paddle/operators/conv_transpose_op.h b/paddle/operators/conv_transpose_op.h
index 4c8f8a8067..a42ade41b1 100644
--- a/paddle/operators/conv_transpose_op.h
+++ b/paddle/operators/conv_transpose_op.h
@@ -42,12 +42,20 @@ class ConvTransposeOp : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class ConvTransposeOpGrad : public framework::OperatorWithKernel {
  public:
   using framework::OperatorWithKernel::OperatorWithKernel;
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/operators/detail/CMakeLists.txt b/paddle/operators/detail/CMakeLists.txt
index f6bdc63cc2..571a75c9dc 100644
--- a/paddle/operators/detail/CMakeLists.txt
+++ b/paddle/operators/detail/CMakeLists.txt
@@ -1 +1 @@
-grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
+grpc_library(sendrecvop_grpc SRCS sendrecvop_utils.cc grpc_client.cc grpc_server.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
diff --git a/paddle/operators/detail/grpc_client.cc b/paddle/operators/detail/grpc_client.cc
new file mode 100644
index 0000000000..aee56ffe01
--- /dev/null
+++ b/paddle/operators/detail/grpc_client.cc
@@ -0,0 +1,153 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "grpc_client.h"
+namespace paddle {
+namespace operators {
+namespace detail {
+
+bool RPCClient::AsyncSendVariable(const std::string& ep,
+                                  const platform::DeviceContext& ctx,
+                                  const framework::Scope& scope,
+                                  const std::string& var_name,
+                                  int64_t time_out) {
+  sendrecv::VariableMessage req;
+  auto* var = scope.FindVar(var_name);
+  SerializeToMessage(var_name, var, ctx, &req);
+
+  // varhandle
+  VarHandle var_h;
+  var_h.ep = ep;
+  var_h.scope = &scope;
+  var_h.name = var_name;
+  var_h.ctx = &ctx;
+
+  // stub context
+  auto ch = GetChannel(ep);
+  SendProcessor* s = new SendProcessor(ch);
+  s->Prepare(var_h, time_out);
+  s->response_call_back_ = NULL;
+
+  auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_);
+  rpc->Finish(&s->reply_, &s->status_, (void*)s);
+
+  req_count_++;
+
+  return true;
+}
+
+void ProcGetResponse(const VarHandle& var_h,
+                     const sendrecv::VariableMessage& ret_msg) {
+  auto* outvar = var_h.scope->FindVar(var_h.name);
+
+  std::istringstream iss(ret_msg.serialized());
+  DeserializeFromMessage(ret_msg, *var_h.ctx, outvar);
+}
+
+bool RPCClient::AsyncGetVariable(const std::string& ep,
+                                 const platform::DeviceContext& ctx,
+                                 const framework::Scope& scope,
+                                 const std::string& var_name,
+                                 int64_t time_out) {
+  sendrecv::VariableMessage req;
+  req.set_varname(var_name);
+
+  auto* var = scope.FindVar(var_name);
+  SerializeToMessage(var_name, var, ctx, &req);
+
+  // varhandle
+  VarHandle var_h;
+  var_h.ep = ep;
+  var_h.scope = &scope;
+  var_h.name = var_name;
+  var_h.ctx = &ctx;
+
+  // stub context
+  auto ch = GetChannel(ep);
+  GetProcessor* s = new GetProcessor(ch);
+  s->Prepare(var_h, time_out);
+  s->response_call_back_ = ProcGetResponse;
+
+  auto rpc = s->stub_->AsyncGetVariable(s->context_.get(), req, &cq_);
+  rpc->Finish(&s->reply_, &s->status_, (void*)s);
+
+  req_count_++;
+
+  return true;
+}
+
+bool RPCClient::Wait() {
+  bool ok = true;
+
+  while (true) {
+    if (req_count_ <= 0) {
+      break;
+    }
+
+    if (!Proceed()) {
+      return false;
+    }
+  }
+
+  return ok;
+}
+
+bool RPCClient::Proceed() {
+  void* tag = NULL;
+  bool ok = false;
+
+  // request counts.
+  if (!cq_.Next(&tag, &ok)) {
+    LOG(ERROR) << "Get meets CompletionQueue error";
+    return false;
+  }
+
+  GPR_ASSERT(ok);
+  PADDLE_ENFORCE(tag);
+
+  // TODO(gongwb): add more retries.
+  ClientBase* c = static_cast<ClientBase*>(tag);
+  if (!c->status_.ok()) {
+    LOG(ERROR) << "proc param error:" << c->var_h_.String()
+               << " grpc error:" << c->status_.error_message();
+    delete c;
+    return false;
+  }
+
+  c->Process();
+  delete c;
+  req_count_--;
+  return true;
+}
+
+std::shared_ptr<grpc::Channel> RPCClient::GetChannel(const std::string& ep) {
+  auto it = channels_.find(ep);
+  if (it != channels_.end()) {
+    return it->second;
+  }
+
+  grpc::ChannelArguments args;
+  args.SetMaxSendMessageSize(std::numeric_limits<int>::max());
+  args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
+
+  auto ch = std::shared_ptr<grpc::Channel>(
+      grpc::CreateCustomChannel(ep, grpc::InsecureChannelCredentials(), args));
+
+  channels_[ep] = ch;
+  return ch;
+}
+
+}  // namespace detail
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/detail/grpc_client.h b/paddle/operators/detail/grpc_client.h
new file mode 100644
index 0000000000..a62e70a253
--- /dev/null
+++ b/paddle/operators/detail/grpc_client.h
@@ -0,0 +1,147 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <grpc++/grpc++.h>
+#include <grpc/support/log.h>
+#include <time.h>
+#include <chrono>
+#include <ctime>
+#include <functional>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "paddle/framework/data_type.h"
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/framework/scope.h"
+#include "paddle/framework/selected_rows.h"
+#include "paddle/operators/detail/sendrecvop_utils.h"
+#include "paddle/operators/detail/simple_block_queue.h"
+
+namespace paddle {
+namespace operators {
+namespace detail {
+
+struct VarHandle {
+  std::string ep;
+  const platform::DeviceContext* ctx;
+  const framework::Scope* scope;
+  std::string name;
+
+  std::string String() const {
+    std::ostringstream s;
+    s << "name:[" << name << "] ep:[" << ep << "]";
+    return s.str();
+  }
+};
+
+void ProcGetResponse(const VarHandle& var_h,
+                     const sendrecv::VariableMessage& msg);
+
+class ClientBase {
+ public:
+  explicit ClientBase(std::shared_ptr<grpc::Channel> ch) {
+    stub_ = sendrecv::SendRecvService::NewStub(ch);
+    context_ = NULL;
+  }
+
+  virtual ~ClientBase() {}
+
+  virtual void Prepare(const VarHandle& var_info, int64_t time_out) {
+    context_.reset(new grpc::ClientContext());
+    var_h_ = var_info;
+
+    std::chrono::system_clock::time_point deadline =
+        std::chrono::system_clock::now() + std::chrono::milliseconds(time_out);
+
+    context_->set_deadline(deadline);
+  }
+
+  virtual void Process() = 0;
+
+  std::unique_ptr<sendrecv::SendRecvService::Stub> stub_;
+  std::unique_ptr<grpc::ClientContext> context_;
+  grpc::Status status_;
+  VarHandle var_h_;
+};
+
+typedef std::function<void(const VarHandle&, const sendrecv::VoidMessage&)>
+    RequestSendCallBack;
+
+class SendProcessor : public ClientBase {
+ public:
+  explicit SendProcessor(std::shared_ptr<grpc::Channel> ch) : ClientBase(ch) {}
+
+  virtual ~SendProcessor() {}
+
+  virtual void Process() {
+    if (response_call_back_) {
+      response_call_back_(var_h_, reply_);
+    }
+  }
+
+  sendrecv::VoidMessage reply_;
+  RequestSendCallBack response_call_back_ = NULL;
+};
+
+typedef std::function<void(const VarHandle&, const sendrecv::VariableMessage&)>
+    RequestGetCallBack;
+
+class GetProcessor : public ClientBase {
+ public:
+  explicit GetProcessor(std::shared_ptr<grpc::Channel> ch) : ClientBase(ch) {}
+
+  virtual ~GetProcessor() {}
+
+  virtual void Process() {
+    if (response_call_back_) {
+      response_call_back_(var_h_, reply_);
+    }
+  }
+
+  sendrecv::VariableMessage reply_;
+  RequestGetCallBack response_call_back_ = ProcGetResponse;
+};
+
+class RPCClient {
+ public:
+  bool AsyncSendVariable(const std::string& ep,
+                         const platform::DeviceContext& ctx,
+                         const framework::Scope& scope,
+                         const std::string& var_name,
+                         int64_t time_out = 600 * 1000);
+
+  bool AsyncGetVariable(const std::string& ep,
+                        const platform::DeviceContext& ctx,
+                        const framework::Scope& scope,
+                        const std::string& var_name,
+                        int64_t time_out = 600 * 1000);
+  bool Wait();
+
+ private:
+  bool Proceed();
+  std::shared_ptr<grpc::Channel> GetChannel(const std::string& ep);
+
+ private:
+  grpc::CompletionQueue cq_;
+  std::map<std::string, std::shared_ptr<grpc::Channel>> channels_;
+  int64_t req_count_ = 0;
+};
+
+}  // namespace detail
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/detail/grpc_server.cc b/paddle/operators/detail/grpc_server.cc
new file mode 100644
index 0000000000..c0b94746a0
--- /dev/null
+++ b/paddle/operators/detail/grpc_server.cc
@@ -0,0 +1,245 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/detail/grpc_server.h"
+
+using grpc::ServerAsyncResponseWriter;
+
+namespace paddle {
+namespace operators {
+namespace detail {
+
+enum CallStatus { PROCESS = 0, FINISH };
+
+// reference:
+// https://stackoverflow.com/questions/41732884/grpc-multiple-services-in-cpp-async-server
+class RequestBase {
+ public:
+  explicit RequestBase(sendrecv::SendRecvService::AsyncService* service,
+                       grpc::ServerCompletionQueue* cq)
+      : service_(service), cq_(cq), status_(PROCESS) {
+    PADDLE_ENFORCE(cq_);
+  }
+  virtual ~RequestBase() {}
+  virtual void Process() { assert(false); }
+
+  CallStatus Status() { return status_; }
+  void SetStatus(CallStatus status) { status_ = status; }
+  virtual std::string GetReqName() { assert(false); }
+
+ protected:
+  grpc::ServerContext ctx_;
+  sendrecv::SendRecvService::AsyncService* service_;
+  grpc::ServerCompletionQueue* cq_;
+  CallStatus status_;
+};
+
+typedef std::pair<std::string, sendrecv::VariableMessage> MessageWithName;
+
+class RequestSend final : public RequestBase {
+ public:
+  explicit RequestSend(sendrecv::SendRecvService::AsyncService* service,
+                       grpc::ServerCompletionQueue* cq,
+                       SimpleBlockQueue<MessageWithName>* queue)
+      : RequestBase(service, cq), queue_(queue), responder_(&ctx_) {
+    service_->RequestSendVariable(&ctx_, &request_, &responder_, cq_, cq_,
+                                  this);
+  }
+
+  virtual ~RequestSend() {}
+
+  virtual std::string GetReqName() { return request_.varname(); }
+
+  virtual void Process() {
+    MessageWithName msg_with_name =
+        std::make_pair(request_.varname(), std::move(request_));
+    queue_->Push(std::move(msg_with_name));
+    responder_.Finish(reply_, grpc::Status::OK, this);
+    status_ = FINISH;
+  }
+
+ protected:
+  sendrecv::VariableMessage request_;
+  sendrecv::VoidMessage reply_;
+  SimpleBlockQueue<MessageWithName>* queue_;
+  ServerAsyncResponseWriter<sendrecv::VoidMessage> responder_;
+};
+
+class RequestGet final : public RequestBase {
+ public:
+  explicit RequestGet(sendrecv::SendRecvService::AsyncService* service,
+                      grpc::ServerCompletionQueue* cq, framework::Scope* scope,
+                      const platform::DeviceContext* dev_ctx)
+      : RequestBase(service, cq),
+        responder_(&ctx_),
+        scope_(scope),
+        dev_ctx_(dev_ctx) {
+    service_->RequestGetVariable(&ctx_, &request_, &responder_, cq_, cq_, this);
+  }
+
+  virtual ~RequestGet() {}
+
+  virtual std::string GetReqName() { return request_.varname(); }
+
+  virtual void Process() {
+    // proc request.
+    std::string var_name = request_.varname();
+    auto* var = scope_->FindVar(var_name);
+    SerializeToMessage(var_name, var, *dev_ctx_, &reply_);
+    // TODO(gongwb): check var's info.
+    responder_.Finish(reply_, grpc::Status::OK, this);
+    status_ = FINISH;
+  }
+
+ protected:
+  sendrecv::VariableMessage request_;
+  sendrecv::VariableMessage reply_;
+  ServerAsyncResponseWriter<sendrecv::VariableMessage> responder_;
+  framework::Scope* scope_;
+  const platform::DeviceContext* dev_ctx_;
+};
+
+void AsyncGRPCServer::RunSyncUpdate() {
+  grpc::ServerBuilder builder;
+  builder.AddListeningPort(address_, grpc::InsecureServerCredentials());
+  builder.SetMaxSendMessageSize(std::numeric_limits<int>::max());
+  builder.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
+  builder.RegisterService(&service_);
+
+  cq_send_ = builder.AddCompletionQueue();
+  cq_get_ = builder.AddCompletionQueue();
+  server_ = builder.BuildAndStart();
+  LOG(INFO) << "Server listening on " << address_ << std::endl;
+
+  std::function<void()> send_register =
+      std::bind(&AsyncGRPCServer::TryToRegisterNewSendOne, this);
+  std::function<void()> get_register =
+      std::bind(&AsyncGRPCServer::TryToRegisterNewGetOne, this);
+
+  t_send_.reset(
+      new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, false,
+                                cq_send_.get(), "cq_send", send_register)));
+
+  t_get_.reset(
+      new std::thread(std::bind(&AsyncGRPCServer::HandleRequest, this, true,
+                                cq_get_.get(), "cq_get", get_register)));
+
+  // wait server
+  server_->Wait();
+  t_send_->join();
+  t_get_->join();
+}
+
+void AsyncGRPCServer::ShutdownQueue() {
+  std::unique_lock<std::mutex> lock(cq_mutex_);
+  cq_send_->Shutdown();
+  cq_get_->Shutdown();
+  is_shut_down_ = true;
+}
+
+// This URL explains why shutdown is complicate:
+// https://stackoverflow.com/questions/35708348/grpc-what-is-the-recommended-way-to-shut-down-an-asynchronous-server-in-c
+void AsyncGRPCServer::ShutDown() {
+  server_->Shutdown();
+  ShutdownQueue();
+}
+
+void AsyncGRPCServer::TryToRegisterNewSendOne() {
+  std::unique_lock<std::mutex> lock(cq_mutex_);
+  if (is_shut_down_) {
+    return;
+  }
+  RequestSend* send =
+      new RequestSend(&service_, cq_send_.get(), &var_recv_queue_);
+  VLOG(4) << "create RequestSend status:" << send->Status();
+}
+
+void AsyncGRPCServer::TryToRegisterNewGetOne() {
+  std::unique_lock<std::mutex> lock(cq_mutex_);
+  if (is_shut_down_) {
+    return;
+  }
+  RequestGet* get = new RequestGet(&service_, cq_get_.get(), scope_, dev_ctx_);
+  VLOG(4) << "create Requestget status:" << get->Status();
+}
+
+void AsyncGRPCServer::HandleRequest(bool wait, grpc::ServerCompletionQueue* cq,
+                                    std::string cq_name,
+                                    std::function<void()> TryToRegisterNewOne) {
+  TryToRegisterNewOne();
+
+  void* tag = NULL;
+  bool ok = false;
+  while (true) {
+    if (!cq->Next(&tag, &ok)) {
+      LOG(INFO) << cq_name << " get CompletionQueue shutdown!";
+      break;
+    }
+
+    PADDLE_ENFORCE(tag);
+    if (wait && !done_) {
+      Wait();
+    }
+
+    RequestBase* base = (RequestBase*)tag;
+    // reference:
+    // https://github.com/tensorflow/tensorflow/issues/5596
+    // https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM
+    // https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I
+    if (!ok) {
+      LOG(WARNING) << cq_name << " recv no regular event:argument name"
+                   << base->GetReqName();
+      TryToRegisterNewOne();
+      delete base;
+      continue;
+    }
+
+    switch (base->Status()) {
+      case PROCESS: {
+        VLOG(4) << cq_name << " status:" << base->Status();
+        TryToRegisterNewOne();
+        base->Process();
+        break;
+      }
+      case FINISH: {
+        VLOG(4) << cq_name << " status:" << base->Status();
+        delete base;
+        break;
+      }
+      default: { assert(false); }
+    }
+  }
+}
+
+void AsyncGRPCServer::Wait() {
+  std::unique_lock<std::mutex> lock(this->mutex_);
+  condition_.wait(lock, [=] { return this->done_ == true; });
+}
+
+void AsyncGRPCServer::Reset() {
+  std::lock_guard<std::mutex> lock(this->mutex_);
+  done_ = false;
+}
+
+void AsyncGRPCServer::Done() {
+  {
+    std::lock_guard<std::mutex> lock(this->mutex_);
+    done_ = true;
+  }
+  condition_.notify_all();
+}
+
+}  // namespace detail
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/detail/grpc_server.h b/paddle/operators/detail/grpc_server.h
new file mode 100644
index 0000000000..2c078b7777
--- /dev/null
+++ b/paddle/operators/detail/grpc_server.h
@@ -0,0 +1,93 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/framework/scope.h"
+#include "paddle/framework/selected_rows.h"
+#include "paddle/framework/var_type.h"
+#include "paddle/operators/detail/simple_block_queue.h"
+
+#include "paddle/operators/detail/send_recv.grpc.pb.h"
+#include "paddle/operators/detail/send_recv.pb.h"
+
+#include <grpc++/grpc++.h>
+#include <grpc/support/log.h>
+#include <thread>
+#include "paddle/operators/detail/sendrecvop_utils.h"
+
+namespace paddle {
+namespace operators {
+namespace detail {
+
+typedef std::pair<std::string, sendrecv::VariableMessage> MessageWithName;
+class RequestBase;
+
+class AsyncGRPCServer final : public sendrecv::SendRecvService::Service {
+ public:
+  explicit AsyncGRPCServer(const std::string &address) : address_(address) {}
+
+  void RunSyncUpdate();
+
+  void Reset();
+
+  void Done();
+
+  void SetScope(framework::Scope *scope) { scope_ = scope; }
+
+  void SetDevCtx(const platform::DeviceContext *dev_ctx) { dev_ctx_ = dev_ctx; }
+
+  const MessageWithName Get() { return this->var_recv_queue_.Pop(); }
+
+  void Push(const MessageWithName &msg) { this->var_recv_queue_.Push(msg); }
+
+  void ShutDown();
+
+ protected:
+  void Wait();
+  void HandleRequest(bool wait, grpc::ServerCompletionQueue *cq,
+                     std::string cq_name,
+                     std::function<void()> TryToRegisterNewOne);
+  void TryToRegisterNewSendOne();
+  void TryToRegisterNewGetOne();
+  void ShutdownQueue();
+
+ private:
+  std::mutex cq_mutex_;
+  volatile bool is_shut_down_ = false;
+  std::unique_ptr<grpc::ServerCompletionQueue> cq_send_;
+  std::unique_ptr<grpc::ServerCompletionQueue> cq_get_;
+
+  sendrecv::SendRecvService::AsyncService service_;
+  std::unique_ptr<grpc::Server> server_;
+
+  std::string address_;
+  framework::Scope *scope_;
+  const platform::DeviceContext *dev_ctx_;
+  // received variable from RPC, operators fetch variable from this queue.
+  SimpleBlockQueue<MessageWithName> var_recv_queue_;
+
+  // condition of the sub program
+  std::mutex mutex_;
+  volatile mutable bool done_;
+  std::condition_variable condition_;
+
+  std::unique_ptr<std::thread> t_send_;
+  std::unique_ptr<std::thread> t_get_;
+};
+
+};  // namespace detail
+};  // namespace operators
+};  // namespace paddle
diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc
deleted file mode 100644
index 319404e56a..0000000000
--- a/paddle/operators/detail/recv_impl.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "send_recv_impl.h"
-
-namespace paddle {
-namespace operators {
-namespace detail {
-
-Status SendRecvServerImpl::SendVariable(ServerContext *context,
-                                        const VariableMessage *in_var,
-                                        VoidMessage *out_var) {
-  MessageWithName msg_with_name =
-      std::make_pair(in_var->varname(), std::move(*in_var));
-  var_recv_queue_.Push(std::move(msg_with_name));
-  return Status::OK;
-}
-
-Status SendRecvServerImpl::GetVariable(ServerContext *context,
-                                       const VariableMessage *in_var,
-                                       VariableMessage *out_var) {
-  std::string get_var_name = in_var->varname();
-  auto *var = scope_->FindVar(get_var_name);
-
-  SerializeToMessage(get_var_name, var, platform::CPUDeviceContext(), out_var);
-  return Status::OK;
-}
-
-Status SendRecvServerImpl::Wait(ServerContext *context,
-                                const VoidMessage *in_var,
-                                VoidMessage *out_var) {
-  {
-    std::unique_lock<std::mutex> lock(this->mutex_);
-    condition_.wait(lock, [=] { return this->done_ == true; });
-  }
-  return Status::OK;
-}
-
-void SendRecvServerImpl::Reset() {
-  std::lock_guard<std::mutex> lock(this->mutex_);
-  done_ = false;
-}
-
-void SendRecvServerImpl::Done() {
-  {
-    std::lock_guard<std::mutex> lock(this->mutex_);
-    done_ = true;
-  }
-  condition_.notify_all();
-}
-
-}  // namespace detail
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc
deleted file mode 100644
index ae85cf2cec..0000000000
--- a/paddle/operators/detail/send_impl.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "send_recv_impl.h"
-
-namespace paddle {
-namespace operators {
-namespace detail {
-
-bool RPCClient::SendVariable(const framework::Scope& scope,
-                             const std::string& inname) {
-  ClientContext context;
-  VariableMessage msg;
-  VoidMessage out_msg;
-  // FIXME(typhoonzero): pass device context to here.
-  auto ctx = platform::CPUDeviceContext();
-  auto* var = scope.FindVar(inname);
-  PADDLE_ENFORCE(var);
-  SerializeToMessage(inname, var, ctx, &msg);
-
-  Status status = stub_->SendVariable(&context, msg, &out_msg);
-  if (!status.ok()) {
-    LOG(ERROR) << "gRPC error: " << status.error_message();
-    return false;
-  }
-  return true;
-}
-
-bool RPCClient::GetVariable(const framework::Scope& scope,
-                            const std::string& outname) {
-  ClientContext context;
-  VariableMessage call_msg, ret_msg;
-  call_msg.set_varname(outname);
-  auto ctx = platform::CPUDeviceContext();
-  Status status = stub_->GetVariable(&context, call_msg, &ret_msg);
-  auto* outvar = scope.FindVar(outname);
-  if (!status.ok()) {
-    LOG(ERROR) << "gRPC error: " << status.error_message();
-    return false;
-  }
-
-  std::istringstream iss(ret_msg.serialized());
-  DeserializeFromMessage(ret_msg, ctx, outvar);
-
-  return true;
-}
-
-void RPCClient::Wait() {
-  ClientContext context;
-  VoidMessage call_msg, ret_msg;
-  stub_->Wait(&context, call_msg, &ret_msg);
-}
-
-}  // namespace detail
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/operators/detail/send_recv.proto b/paddle/operators/detail/send_recv.proto
index f141c755ce..8f962b4c69 100644
--- a/paddle/operators/detail/send_recv.proto
+++ b/paddle/operators/detail/send_recv.proto
@@ -21,8 +21,6 @@ service SendRecvService {
   rpc SendVariable(VariableMessage) returns (VoidMessage) {}
   // Argument VariableMessage for GetVariable should only contain varname.
   rpc GetVariable(VariableMessage) returns (VariableMessage) {}
-  // wait for one execution of the program
-  rpc Wait(VoidMessage) returns (VoidMessage) {}
 }
 
 // VariableMessage is serialized paddle variable message.
diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h
deleted file mode 100644
index 1fe54f1f05..0000000000
--- a/paddle/operators/detail/send_recv_impl.h
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/framework/lod_tensor.h"
-#include "paddle/framework/scope.h"
-#include "paddle/framework/selected_rows.h"
-#include "paddle/framework/var_type.h"
-#include "paddle/operators/detail/simple_block_queue.h"
-
-#include "paddle/operators/detail/send_recv.grpc.pb.h"
-#include "paddle/operators/detail/send_recv.pb.h"
-
-#include <grpc++/grpc++.h>
-
-using grpc::Channel;
-using grpc::Server;
-using grpc::ServerContext;
-using grpc::ServerReader;
-using grpc::ServerBuilder;
-
-using grpc::ClientContext;
-using grpc::ClientReader;
-using grpc::ClientReaderWriter;
-using grpc::ClientWriter;
-using grpc::Status;
-using sendrecv::SendRecvService;
-using sendrecv::VariableMessage;
-using sendrecv::VoidMessage;
-
-namespace paddle {
-namespace operators {
-namespace detail {
-
-typedef std::pair<std::string, sendrecv::VariableMessage> MessageWithName;
-
-class SendRecvServerImpl final : public SendRecvService::Service {
- public:
-  explicit SendRecvServerImpl() {}
-
-  Status SendVariable(ServerContext *context, const VariableMessage *in_var,
-                      VoidMessage *out_var) override;
-  Status GetVariable(ServerContext *context, const VariableMessage *in_var,
-                     VariableMessage *out_var) override;
-  Status Wait(ServerContext *context, const VoidMessage *in_var,
-              VoidMessage *out_var) override;
-  void Reset();
-  void Done();
-  void SetScope(framework::Scope *scope) { scope_ = scope; };
-
-  const MessageWithName Get() { return this->var_recv_queue_.Pop(); }
-
-  void Push(const MessageWithName &msg) { this->var_recv_queue_.Push(msg); }
-
- private:
-  // received variable from RPC, operators fetch variable from this queue.
-  SimpleBlockQueue<MessageWithName> var_recv_queue_;
-  framework::Scope *scope_;
-  // condition of the sub program
-  std::mutex mutex_;
-  bool done_;
-  std::condition_variable condition_;
-};
-
-// RPCClient is a class to send tensors to pserver sub-network
-// using different hashing methods.
-class RPCClient {
- public:
-  RPCClient(std::shared_ptr<Channel> channel)
-      : stub_(SendRecvService::NewStub(channel)) {}
-
-  bool SendVariable(const framework::Scope &scope, const std::string &inname);
-  bool GetVariable(const framework::Scope &scope, const std::string &outname);
-  void Wait();
-
- private:
-  std::unique_ptr<SendRecvService::Stub> stub_;
-};
-
-inline void SerializeToMessage(const std::string &name,
-                               const framework::Variable *var,
-                               const platform::DeviceContext &ctx,
-                               VariableMessage *msg) {
-  msg->set_varname(name);
-  std::ostringstream oss;
-  switch (framework::ToVarType(var->Type())) {
-    case framework::proto::VarDesc_VarType_LOD_TENSOR:
-      msg->set_type(sendrecv::VarType::LOD_TENSOR);
-      framework::SerializeToStream(oss, var->Get<framework::LoDTensor>(), ctx);
-      break;
-    case framework::proto::VarDesc_VarType_SELECTED_ROWS:
-      msg->set_type(sendrecv::VarType::SELECTED_ROWS);
-      framework::SerializeToStream(oss, var->Get<framework::SelectedRows>(),
-                                   ctx);
-      break;
-    default: {
-      PADDLE_THROW("Serialize does not support type: %s",
-                   typeid(var->Type()).name());
-      break;
-    }
-  }
-  msg->set_serialized(oss.str());
-}
-
-inline void DeserializeFromMessage(const VariableMessage &msg,
-                                   const platform::DeviceContext &ctx,
-                                   framework::Variable *var) {
-  using namespace paddle::framework::proto;
-  std::istringstream iss(msg.serialized());
-  switch (msg.type()) {
-    case sendrecv::VarType::LOD_TENSOR:
-      DeserializeFromStream(iss, var->GetMutable<framework::LoDTensor>(), ctx);
-      break;
-    case sendrecv::VarType::SELECTED_ROWS: {
-      DeserializeFromStream(iss, var->GetMutable<framework::SelectedRows>(),
-                            ctx);
-      break;
-    }
-    default: {
-      PADDLE_THROW("Deserialize does not support type: %s",
-                   typeid(var->Type()).name());
-      break;
-    }
-  }
-}
-
-}  // namespace detail
-}  // namespace operators
-}  // namespace paddle
diff --git a/paddle/operators/detail/sendrecvop_utils.cc b/paddle/operators/detail/sendrecvop_utils.cc
new file mode 100644
index 0000000000..7635b9e8db
--- /dev/null
+++ b/paddle/operators/detail/sendrecvop_utils.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/detail/sendrecvop_utils.h"
+
+namespace paddle {
+namespace operators {
+namespace detail {
+
+void SerializeToMessage(const std::string& name, const framework::Variable* var,
+                        const platform::DeviceContext& ctx,
+                        sendrecv::VariableMessage* msg) {
+  msg->set_varname(name);
+  std::ostringstream oss;
+  switch (framework::ToVarType(var->Type())) {
+    case framework::proto::VarDesc_VarType_LOD_TENSOR:
+      msg->set_type(sendrecv::VarType::LOD_TENSOR);
+      framework::SerializeToStream(oss, var->Get<framework::LoDTensor>(), ctx);
+      break;
+    case framework::proto::VarDesc_VarType_SELECTED_ROWS:
+      msg->set_type(sendrecv::VarType::SELECTED_ROWS);
+      framework::SerializeToStream(oss, var->Get<framework::SelectedRows>(),
+                                   ctx);
+      break;
+    default: {
+      PADDLE_THROW("Serialize does not support type: %s",
+                   typeid(var->Type()).name());
+      break;
+    }
+  }
+  msg->set_serialized(oss.str());
+}
+
+void DeserializeFromMessage(const sendrecv::VariableMessage& msg,
+                            const platform::DeviceContext& ctx,
+                            framework::Variable* var) {
+  std::istringstream iss(msg.serialized());
+  switch (msg.type()) {
+    case sendrecv::VarType::LOD_TENSOR:
+      DeserializeFromStream(iss, var->GetMutable<framework::LoDTensor>(), ctx);
+      break;
+    case sendrecv::VarType::SELECTED_ROWS: {
+      DeserializeFromStream(iss, var->GetMutable<framework::SelectedRows>(),
+                            ctx);
+      break;
+    }
+    default: {
+      PADDLE_THROW("Deserialize does not support type: %s",
+                   typeid(var->Type()).name());
+      break;
+    }
+  }
+}
+
+}  // namespace detail
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/detail/sendrecvop_utils.h b/paddle/operators/detail/sendrecvop_utils.h
new file mode 100644
index 0000000000..bc6581afab
--- /dev/null
+++ b/paddle/operators/detail/sendrecvop_utils.h
@@ -0,0 +1,42 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "paddle/framework/data_type.h"
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/framework/scope.h"
+#include "paddle/framework/selected_rows.h"
+#include "paddle/framework/var_type.h"
+
+#include "paddle/operators/detail/send_recv.grpc.pb.h"
+#include "paddle/operators/detail/send_recv.pb.h"
+
+namespace paddle {
+namespace operators {
+namespace detail {
+
+void SerializeToMessage(const std::string& name, const framework::Variable* var,
+                        const platform::DeviceContext& ctx,
+                        sendrecv::VariableMessage* msg);
+
+void DeserializeFromMessage(const sendrecv::VariableMessage& msg,
+                            const platform::DeviceContext& ctx,
+                            framework::Variable* var);
+}  // namespace detail
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/edit_distance_op.cc b/paddle/operators/edit_distance_op.cc
new file mode 100644
index 0000000000..e383f07fa9
--- /dev/null
+++ b/paddle/operators/edit_distance_op.cc
@@ -0,0 +1,98 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/edit_distance_op.h"
+
+namespace paddle {
+namespace operators {
+
+class EditDistanceOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Hyps"), "Input(Hyps) shouldn't be null.");
+    PADDLE_ENFORCE(ctx->HasInput("Refs"), "Input(Refs) shouldn't be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) shouldn't be null.");
+    auto hyp_dims = ctx->GetInputDim("Hyps");
+    auto ref_dims = ctx->GetInputDim("Refs");
+    PADDLE_ENFORCE(hyp_dims.size() == 2 && hyp_dims[1] == 1,
+                   "Input(Hyps) must be a 2-D LoDTensor with the 2nd dimension "
+                   "equal to 1.");
+    PADDLE_ENFORCE(ref_dims.size() == 2 && ref_dims[1] == 1,
+                   "Input(Refs) must be a 2-D LoDTensor with the 2nd dimension "
+                   "equal to 1.");
+    ctx->SetOutputDim("Out", ctx->GetInputDim("Refs"));
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(framework::proto::DataType::FP32,
+                                   ctx.device_context());
+  }
+};
+
+class EditDistanceOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  EditDistanceOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Hyps",
+             "(2-D LoDTensor<int>, 2nd dim. equal to 1) "
+             "The indices for hypothesis strings.");
+    AddInput("Refs",
+             "(2-D LoDTensor<int>, 2nd dim. equal to 1) "
+             "The indices for reference strings.");
+    AddAttr<bool>("normalized",
+                  "(bool, default false) Indicated whether to normalize "
+                  "the edit distance by the length of reference string.")
+        .SetDefault(false);
+    AddOutput("Out",
+              "(2-D Tensor with shape [`batch_size` x 1]) "
+              "The output edit distances of EditDistance operator.");
+    AddComment(R"DOC(
+
+EditDistance operator computes the edit distances between a batch of hypothesis
+strings and their references.
+
+Edit distance, also called Levenshtein distance, measures how dissimilar two strings 
+are by counting the minimum number of operations to transform one string into anthor. 
+Here the operations include insertion, deletion, and substitution. For example, 
+given hypothesis string A = "kitten" and reference B = "sitting", the edit distance 
+is 3 for A will be transformed into B at least after two substitutions and one 
+insertion:
+  
+   "kitten" -> "sitten" -> "sittin" -> "sitting"
+
+Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with the total 
+number denoted by `batch_size`, and the separation is specified by the LoD information. 
+And the `batch_size` reference strings are arranged in order in the same way in the 
+LoDTensor Input(Refs).
+
+Output(Out) contains the `batch_size` results and each stands for the edit stance 
+for a pair of strings respectively. If Attr(normalized) is true, the edit distance 
+will be divided by the length of reference string.
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(edit_distance, ops::EditDistanceOp, ops::EditDistanceOpMaker,
+                  paddle::framework::EmptyGradOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    edit_distance, ops::EditDistanceKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/edit_distance_op.cu b/paddle/operators/edit_distance_op.cu
new file mode 100644
index 0000000000..cf5ebc5c38
--- /dev/null
+++ b/paddle/operators/edit_distance_op.cu
@@ -0,0 +1,149 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include "paddle/framework/op_registry.h"
+#include "paddle/platform/cuda_helper.h"
+#include "paddle/platform/gpu_info.h"
+
+namespace paddle {
+namespace operators {
+
+using platform::PADDLE_CUDA_NUM_THREADS;
+
+template <typename T>
+__global__ void FillFirstRow(T* dist, const int N) {
+  int idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (idx < N + 1) {
+    dist[idx] = idx;
+  }
+}
+
+template <typename T>
+__global__ void FillFirstColumn(T* dist, const int M, const int N) {
+  int idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (idx < M + 1) {
+    dist[idx * (N + 1)] = idx;
+  }
+}
+
+template <typename T>
+__global__ void Levenshtein(T* dist, const int* x1, const int* x2, const int M,
+                            const int N, const int start) {
+  int idx = blockDim.x * blockIdx.x + threadIdx.x;
+  int offset = N;
+  int index = start + idx * offset;
+  int row = index / (N + 1);
+  int col = index % (N + 1);
+  if (row > 0 && col > 0 && row < M + 1 && col < N + 1) {
+    int cost = x1[row - 1] == x2[col - 1] ? 0 : 1;
+    int dels = dist[(row - 1) * (N + 1) + col] + 1;
+    int ins = dist[row * (N + 1) + col - 1] + 1;
+    int subs = dist[(row - 1) * (N + 1) + (col - 1)] + cost;
+    dist[index] = min(dels, min(ins, subs));
+  }
+}
+
+template <typename T>
+__global__ void SetOutput(T* out, const T* dist, const int M, const int N,
+                          bool normalized) {
+  int idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (idx == 0) {
+    out[0] = normalized ? dist[M * (N + 1) + N] / N : dist[M * (N + 1) + N];
+  }
+}
+
+template <typename Place, typename T>
+class EditDistanceGPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const {
+    auto* out_t = ctx.Output<framework::Tensor>("Out");
+
+    auto* x1_t = ctx.Input<framework::LoDTensor>("Hyps");
+    auto* x2_t = ctx.Input<framework::LoDTensor>("Refs");
+
+    auto normalized = ctx.Attr<bool>("normalized");
+    auto stream = reinterpret_cast<const platform::CUDADeviceContext&>(
+                      ctx.device_context())
+                      .stream();
+
+    auto hyp_lod = x1_t->lod()[0];
+    auto ref_lod = x2_t->lod()[0];
+    PADDLE_ENFORCE(
+        hyp_lod.size() == ref_lod.size(),
+        "Input(Hyps) and Input(Refs) must have the same batch size.");
+    for (size_t i = 1; i < ref_lod.size(); ++i) {
+      PADDLE_ENFORCE(ref_lod[i] > ref_lod[i - 1],
+                     "Reference string %d is empty.", i);
+    }
+
+    auto num_strs = hyp_lod.size() - 1;
+    out_t->Resize({static_cast<int64_t>(num_strs), 1});
+    out_t->mutable_data<T>(ctx.GetPlace());
+    auto out = out_t->data<T>();
+
+    T distance = 0.0;
+    for (size_t num = 0; num < num_strs; num++) {
+      auto m = static_cast<int64_t>(hyp_lod[num + 1] - hyp_lod[num]);
+      auto n = static_cast<int64_t>(ref_lod[num + 1] - ref_lod[num]);
+      if (m == 0 || n == 0) {
+        distance = std::max(m, n);
+        if (normalized) {
+          PADDLE_ENFORCE(n > 0,
+                         "The reference string (#%d) cannot be empty "
+                         "when Attr(normalized) is enabled.",
+                         n);
+          distance = distance / n;
+        }
+        memory::Copy(boost::get<Place>(ctx.GetPlace()), out + num,
+                     platform::CPUPlace(), &distance, sizeof(T), stream);
+      } else {
+        framework::Tensor dist_t;
+        dist_t.Resize({m + 1, n + 1});
+        dist_t.mutable_data<T>(ctx.GetPlace());
+        auto dist = dist_t.data<T>();
+        auto x1 = x1_t->data<int>() + hyp_lod[num];
+        auto x2 = x2_t->data<int>() + ref_lod[num];
+
+        FillFirstColumn<T><<<1 + m / PADDLE_CUDA_NUM_THREADS,
+                             PADDLE_CUDA_NUM_THREADS, 0, stream>>>(dist, m, n);
+
+        FillFirstRow<T><<<1 + n / PADDLE_CUDA_NUM_THREADS,
+                          PADDLE_CUDA_NUM_THREADS, 0, stream>>>(dist, n);
+        // Compute the elements of distance matrix in the anti-diagonal diretion
+        for (int64_t slice = 2; slice < m + n + 1; ++slice) {
+          int z_m = slice < m + 1 ? 0 : slice - m;
+          int z_n = slice < n + 1 ? 0 : slice - n;
+          int size = slice - (z_m + z_n) + 1;  // number of elments in the same
+                                               // anti-diagonal line to update
+          // the start index at which computes from
+          int start = slice < n + 1 ? slice : (z_n + 1) * (n + 1) - 1;
+          Levenshtein<T><<<1 + (size - 1) / PADDLE_CUDA_NUM_THREADS,
+                           PADDLE_CUDA_NUM_THREADS, 0, stream>>>(dist, x1, x2,
+                                                                 m, n, start);
+        }
+        SetOutput<T><<<1, 1, 0, stream>>>(out + num, dist, m, n, normalized);
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_CUDA_KERNEL(
+    edit_distance,
+    ops::EditDistanceGPUKernel<paddle::platform::CUDAPlace, float>);
diff --git a/paddle/operators/edit_distance_op.h b/paddle/operators/edit_distance_op.h
new file mode 100644
index 0000000000..537e70281a
--- /dev/null
+++ b/paddle/operators/edit_distance_op.h
@@ -0,0 +1,96 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <algorithm>
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename Place, typename T>
+class EditDistanceKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const {
+    auto* out_t = ctx.Output<framework::Tensor>("Out");
+
+    auto* x1_t = ctx.Input<framework::LoDTensor>("Hyps");
+    auto* x2_t = ctx.Input<framework::LoDTensor>("Refs");
+
+    auto normalized = ctx.Attr<bool>("normalized");
+
+    auto hyp_lod = x1_t->lod()[0];
+    auto ref_lod = x2_t->lod()[0];
+    PADDLE_ENFORCE(
+        hyp_lod.size() == ref_lod.size(),
+        "Input(Hyps) and Input(Refs) must have the same batch size.");
+    for (size_t i = 1; i < ref_lod.size(); ++i) {
+      PADDLE_ENFORCE(ref_lod[i] > ref_lod[i - 1],
+                     "Reference string %d is empty.", i);
+    }
+    auto num_strs = hyp_lod.size() - 1;
+
+    out_t->Resize({static_cast<int64_t>(num_strs), 1});
+    out_t->mutable_data<float>(ctx.GetPlace());
+    auto out = out_t->data<T>();
+
+    T distance = 0.0;
+    for (size_t num = 0; num < num_strs; ++num) {
+      auto m = static_cast<int64_t>(hyp_lod[num + 1] - hyp_lod[num]);
+      auto n = static_cast<int64_t>(ref_lod[num + 1] - ref_lod[num]);
+
+      if (m == 0) {
+        distance = n;
+      } else if (n == 0) {
+        distance = m;
+      } else {
+        framework::Tensor dist_t;
+        dist_t.Resize({m + 1, n + 1});
+        dist_t.mutable_data<T>(ctx.GetPlace());
+        auto dist = dist_t.data<T>();
+        auto x1 = x1_t->data<int>() + hyp_lod[num];
+        auto x2 = x2_t->data<int>() + ref_lod[num];
+        for (int64_t i = 0; i < m + 1; ++i) {
+          dist[i * (n + 1)] = i;
+        }
+        for (int64_t j = 0; j < n + 1; ++j) {
+          dist[j] = j;
+        }
+        for (int64_t i = 1; i < m + 1; ++i) {
+          for (int64_t j = 1; j < n + 1; ++j) {
+            int cost = x1[i - 1] == x2[j - 1] ? 0 : 1;
+            int dels = dist[(i - 1) * (n + 1) + j] + 1;
+            int ins = dist[i * (n + 1) + (j - 1)] + 1;
+            int subs = dist[(i - 1) * (n + 1) + (j - 1)] + cost;
+            dist[i * (n + 1) + j] = std::min(dels, std::min(ins, subs));
+          }
+        }
+        distance = dist[m * (n + 1) + n];
+      }
+
+      if (normalized) {
+        PADDLE_ENFORCE(n > 0,
+                       "The reference string (#%d) cannot be empty "
+                       "when Attr(normalized) is enabled.",
+                       n);
+        distance = distance / n;
+      }
+      out[num] = distance;
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/elementwise_add_op.cc b/paddle/operators/elementwise_add_op.cc
index 70b7c9f2ec..37951fa758 100644
--- a/paddle/operators/elementwise_add_op.cc
+++ b/paddle/operators/elementwise_add_op.cc
@@ -21,7 +21,7 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
  public:
   ElementwiseAddOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : ElementwiseOpMaker(proto, op_checker) {
-    SetComment("Add", "$Out = X + Y$");
+    SetComment("Add", "Out = X + Y");
     AddComment(comment_);
   }
 };
diff --git a/paddle/operators/elementwise_add_op.h b/paddle/operators/elementwise_add_op.h
index 59abbb57d1..6478e1e0c2 100644
--- a/paddle/operators/elementwise_add_op.h
+++ b/paddle/operators/elementwise_add_op.h
@@ -81,23 +81,6 @@ struct ElementwiseAddGradFunctor {
   }
 };
 
-template <typename T>
-struct ElementwiseAddOneGradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = dz_e.sum();
-    }
-  }
-};
-
 template <typename T>
 struct ElementwiseAddBroadCastGradFunctor {
   template <typename Device, typename X, typename Y, typename Z, typename dX,
@@ -142,7 +125,6 @@ class ElementwiseAddGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseAddGradFunctor<T>,
-                           ElementwiseAddOneGradFunctor<T>,
                            ElementwiseAddBroadCastGradFunctor<T>,
                            ElementwiseAddBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/elementwise_div_op.cc b/paddle/operators/elementwise_div_op.cc
index 1fa960866f..6ebd58b1b3 100644
--- a/paddle/operators/elementwise_div_op.cc
+++ b/paddle/operators/elementwise_div_op.cc
@@ -21,7 +21,7 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
  public:
   ElementwiseDivOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : ElementwiseOpMaker(proto, op_checker) {
-    SetComment("Div", "$Out = X / Y$");
+    SetComment("Div", "Out = X / Y");
     AddComment(comment_);
   }
 };
diff --git a/paddle/operators/elementwise_div_op.h b/paddle/operators/elementwise_div_op.h
index 875abd313f..7783875e24 100644
--- a/paddle/operators/elementwise_div_op.h
+++ b/paddle/operators/elementwise_div_op.h
@@ -107,7 +107,6 @@ class ElementwiseDivGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseDivGradFunctor<T>,
-                           ElementwiseDivGradFunctor<T>,
                            ElementwiseDivBroadCastGradFunctor<T>,
                            ElementwiseDivBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc
index a6d1173619..450dd05c79 100644
--- a/paddle/operators/elementwise_mul_op.cc
+++ b/paddle/operators/elementwise_mul_op.cc
@@ -22,7 +22,7 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
  public:
   ElementwiseMulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : ElementwiseOpMaker(proto, op_checker) {
-    SetComment("Mul", "$Out = X \\odot\\ Y$");
+    SetComment("Mul", "Out = X \\odot\\ Y");
     AddComment(comment_);
   }
 };
diff --git a/paddle/operators/elementwise_mul_op.h b/paddle/operators/elementwise_mul_op.h
index 3ee50207c0..0e6559eacc 100644
--- a/paddle/operators/elementwise_mul_op.h
+++ b/paddle/operators/elementwise_mul_op.h
@@ -106,7 +106,6 @@ class ElementwiseMulGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseMulGradFunctor<T>,
-                           ElementwiseMulGradFunctor<T>,
                            ElementwiseMulBroadCastGradFunctor<T>,
                            ElementwiseMulBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/elementwise_op.h b/paddle/operators/elementwise_op.h
index f308ee05e1..a342595b54 100644
--- a/paddle/operators/elementwise_op.h
+++ b/paddle/operators/elementwise_op.h
@@ -58,7 +58,8 @@ Limited Elementwise {name} Operator.
 
 The equation is:
 
-{equation}
+.. math::
+  {equation}
 
 X is a tensor of any dimension and the dimensions of tensor Y must be smaller than
 or equal to the dimensions of X. 
@@ -71,15 +72,16 @@ For case 2:
 Y will be broadcasted to match the shape of X and axis should be 
 the starting dimension index for broadcasting Y onto X.
 
-example:
-  shape(X) = (2, 3, 4, 5), shape(Y) = (,)
-  shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
-  shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
-  shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
-  shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
+For example
+  .. code-block:: python
 
-Both the input X and Y can carry the LoD (Level of Details) information,
-or not. But the output only shares the LoD information with input X.
+    shape(X) = (2, 3, 4, 5), shape(Y) = (,)
+    shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
+    shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5)
+    shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
+    shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
+
+Either of the inputs X and Y or none can carry the LoD (Level of Details) information. However, the output only shares the LoD information with input X.
 
 )DOC";
     AddComment(comment_);
diff --git a/paddle/operators/elementwise_op_function.h b/paddle/operators/elementwise_op_function.h
index 560247cb10..0c75276b03 100644
--- a/paddle/operators/elementwise_op_function.h
+++ b/paddle/operators/elementwise_op_function.h
@@ -311,8 +311,7 @@ EIGEN_FUNCTOR(Mul, EIGEN_MUL);
 EIGEN_FUNCTOR(Div, EIGEN_DIV);
 
 template <typename DeviceContext, typename T, typename functor,
-          typename functor1, typename broadcastfunctor,
-          typename broadcast2functor>
+          typename broadcastfunctor, typename broadcast2functor>
 void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
   using Tensor = framework::Tensor;
 
diff --git a/paddle/operators/elementwise_sub_op.cc b/paddle/operators/elementwise_sub_op.cc
index 2a8d0845b1..d3c51f0a69 100644
--- a/paddle/operators/elementwise_sub_op.cc
+++ b/paddle/operators/elementwise_sub_op.cc
@@ -21,7 +21,7 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
  public:
   ElementwiseSubOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : ElementwiseOpMaker(proto, op_checker) {
-    SetComment("Sub", "$Out = X - Y$");
+    SetComment("Sub", "Out = X - Y");
     AddComment(comment_);
   }
 };
diff --git a/paddle/operators/elementwise_sub_op.h b/paddle/operators/elementwise_sub_op.h
index 66edf8672d..347e92f87c 100644
--- a/paddle/operators/elementwise_sub_op.h
+++ b/paddle/operators/elementwise_sub_op.h
@@ -43,23 +43,6 @@ struct ElementwiseSubGradFunctor {
   }
 };
 
-template <typename T>
-struct ElementwiseSubOneGradFunctor {
-  template <typename Device, typename X, typename Y, typename Z, typename dX,
-            typename dY, typename dZ>
-  void operator()(Device d, X x, Y y, Z z, dX dx, dY dy, dZ dz) {
-    auto dz_e = framework::EigenVector<T>::Flatten(*dz);
-    if (dx) {
-      auto dx_e = framework::EigenVector<T>::Flatten(*dx);
-      dx_e.device(d) = dz_e;
-    }
-    if (dy) {
-      auto dy_e = framework::EigenVector<T>::Flatten(*dy);
-      dy_e.device(d) = (-1.0) * dz_e.sum();
-    }
-  }
-};
-
 template <typename T>
 struct ElementwiseSubBroadCastGradFunctor {
   template <typename Device, typename X, typename Y, typename Z, typename dX,
@@ -106,7 +89,6 @@ class ElementwiseSubGradKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext& ctx) const override {
     ElementwiseGradCompute<DeviceContext, T, ElementwiseSubGradFunctor<T>,
-                           ElementwiseSubOneGradFunctor<T>,
                            ElementwiseSubBroadCastGradFunctor<T>,
                            ElementwiseSubBroadCast2GradFunctor<T>>(ctx);
   }
diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt
index fd59eef7d6..c607704efa 100644
--- a/paddle/operators/math/CMakeLists.txt
+++ b/paddle/operators/math/CMakeLists.txt
@@ -13,6 +13,7 @@ if(WITH_GPU)
     nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function)
     nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor)
     nv_library(sequence_padding SRCS sequence_padding.cc sequence_padding.cu DEPS lod_tensor device_context)
+    nv_library(sequence_scale SRCS sequence_scale.cc sequence_scale.cu DEPS lod_tensor device_context)
     nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions)
     nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context)
     nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context)
@@ -29,6 +30,7 @@ else()
     cc_library(context_project SRCS context_project.cc DEPS device_context math_function)
     cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor)
     cc_library(sequence_padding SRCS sequence_padding.cc DEPS lod_tensor device_context)
+    cc_library(sequence_scale SRCS sequence_scale.cc DEPS lod_tensor device_context)
     cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
     cc_library(maxouting SRCS maxouting.cc DEPS device_context)
     cc_library(unpooling SRCS unpooling.cc DEPS device_context)
diff --git a/paddle/operators/math/math_function_test.cc b/paddle/operators/math/math_function_test.cc
index 7c6f098ca9..c9f322b92e 100644
--- a/paddle/operators/math/math_function_test.cc
+++ b/paddle/operators/math/math_function_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/operators/math/math_function.h"
 #include "gtest/gtest.h"
 
diff --git a/paddle/operators/math/math_function_test.cu b/paddle/operators/math/math_function_test.cu
index d1139ac988..6f16d66792 100644
--- a/paddle/operators/math/math_function_test.cu
+++ b/paddle/operators/math/math_function_test.cu
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "gtest/gtest.h"
 #include "paddle/operators/math/math_function.h"
 
diff --git a/paddle/operators/math/sequence2batch.cc b/paddle/operators/math/sequence2batch.cc
index 88977be1f8..e459a42ca2 100644
--- a/paddle/operators/math/sequence2batch.cc
+++ b/paddle/operators/math/sequence2batch.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/operators/math/sequence2batch.h"
+#include "paddle/operators/math/math_function.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/operators/math/sequence_scale.cc b/paddle/operators/math/sequence_scale.cc
new file mode 100644
index 0000000000..7e439e9a2c
--- /dev/null
+++ b/paddle/operators/math/sequence_scale.cc
@@ -0,0 +1,46 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/math/sequence_scale.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+template <typename T>
+class ScaleLoDTensorFunctor<platform::CPUDeviceContext, T> {
+ public:
+  void operator()(const platform::CPUDeviceContext& context,
+                  framework::LoDTensor& seq, const T* scales) {
+    const size_t level = 0;
+    auto lod = seq.lod();
+    const size_t num_seq = lod[level].size() - 1;
+    size_t seq_width = seq.dims()[1];
+    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
+
+    T* seq_data = seq.mutable_data<T>(context.GetPlace());
+    for (size_t i = 0; i < num_seq; ++i) {
+      for (size_t j = lod[level][i] * seq_width;
+           j < lod[level][i + 1] * seq_width; ++j) {
+        seq_data[j] *= scales[i];
+      }
+    }
+  }
+};
+
+template class ScaleLoDTensorFunctor<platform::CPUDeviceContext, float>;
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/math/sequence_scale.cu b/paddle/operators/math/sequence_scale.cu
new file mode 100644
index 0000000000..ceaabd8e0f
--- /dev/null
+++ b/paddle/operators/math/sequence_scale.cu
@@ -0,0 +1,57 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/math/sequence_scale.h"
+#include "paddle/platform/cuda_helper.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+using platform::PADDLE_CUDA_NUM_THREADS;
+
+template <typename T, int BlockSize>
+__global__ void SequenceScaleKernel(T* seq, size_t* lod, const T* scales,
+                                    const size_t seq_width) {
+  for (int i = threadIdx.x;
+       i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * seq_width;
+       i += BlockSize) {
+    int idx = lod[blockIdx.x] * seq_width + i;
+    seq[idx] *= scales[blockIdx.x];
+  }
+}
+
+template <typename T>
+class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> {
+ public:
+  void operator()(const platform::CUDADeviceContext& context,
+                  framework::LoDTensor& seq, const T* scales) {
+    const size_t level = 0;
+    auto lod = seq.lod();
+    const size_t num_seq = lod[level].size() - 1;
+    const size_t seq_width = seq.numel() / seq.dims()[0];
+    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
+    T* seq_data = seq.mutable_data<T>(context.GetPlace());
+
+    SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<<
+        num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>(
+        seq_data, abs_offset_lod[level].data(), scales, seq_width);
+  }
+};
+
+template class ScaleLoDTensorFunctor<platform::CUDADeviceContext, float>;
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/math/sequence_scale.h b/paddle/operators/math/sequence_scale.h
new file mode 100644
index 0000000000..ecd9a57c3f
--- /dev/null
+++ b/paddle/operators/math/sequence_scale.h
@@ -0,0 +1,55 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/lod_tensor.h"
+#include "paddle/platform/device_context.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+/*
+ * \brief   Scale a sequence.
+ *
+ *  All sequences will be padded to the same length and stored in a transposed
+ * shape.
+ *  Example:
+ *    Given:
+ *      seq = (s0, s0, s0, s0; s1, s1; s2, s2, s2; s3)
+ *      scales = (2, 3, 4, 5)
+ *    then:
+ *      result = (2*s0, 2*s0, 2*s0, 2*s0; 3*s1, 3*s1; 4*s2, 4*s2, 4*s2; 5*s3)
+
+ *
+ * \param context       Device context of this functor.
+ * \param seq           LoDTensor which is stored in sequence format, the shape
+ *                      is [total_sequence_length, sequence_width] where
+ *                      total_sequence_length is the sum of all sequences'
+ *                      length.
+ * \param scales        Array<T>. The i-th sequence will be scaled by scales[i].
+ * \param num_seq       Number of sequence
+ *
+ */
+template <typename DeviceContext, typename T>
+class ScaleLoDTensorFunctor {
+ public:
+  void operator()(const DeviceContext& context, framework::LoDTensor& seq,
+                  const T* scales);
+};
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/net_op.cc b/paddle/operators/net_op.cc
index 03302f5cbf..f12074a5f2 100644
--- a/paddle/operators/net_op.cc
+++ b/paddle/operators/net_op.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/operators/net_op_test.cc b/paddle/operators/net_op_test.cc
index dfd86546e8..9358f29f62 100644
--- a/paddle/operators/net_op_test.cc
+++ b/paddle/operators/net_op_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/operators/net_op.h"
 
 #include <gtest/gtest.h>
diff --git a/paddle/operators/op_documentation/batch_norm_op.md b/paddle/operators/op_documentation/batch_norm_op.md
index 80948adf2b..d1392619c4 100644
--- a/paddle/operators/op_documentation/batch_norm_op.md
+++ b/paddle/operators/op_documentation/batch_norm_op.md
@@ -66,7 +66,7 @@ As most C++ operators do, `batch_norm_op` is defined by inputs, outputs, attribu
 
 The following graph showes the training computational process of `batch_norm_op`:
 
-<img src="./images/batch_norm_op_kernel.png" width="800"/>
+<img src="../images/batch_norm_op_kernel.png" width="800"/>
 
 cudnn provides APIs to finish the whole series of computation, we can use them in our GPU kernel.
 
@@ -124,7 +124,7 @@ for pass_id in range(PASS_NUM):
 `is_infer` is an attribute. Once an operator is created, its attributes can not be changed. It suggests us that we shall maintain two `batch_norm_op` in the model, one's `is_infer` is `True`(we call it `infer_batch_norm_op`) and the other one's is `False`(we call it `train_batch_norm_op`). They share all parameters and variables, but be placed in two different branches. That is to say, if a network contains a `batch_norm_op`, it will fork into two branches, one go through `train_batch_norm_op` and the other one go through `infer_batch_norm_op`:
 
 <div align=center>
-<img src="./images/batch_norm_fork.png" width="500"/>
+<img src="../images/batch_norm_fork.png" width="500"/>
 </div>
 
 Just like what is shown in the above graph, the net forks before `batch_norm_op` and will never merge again. All the operators after `batch_norm_op` will duplicate. 
diff --git a/paddle/operators/parallel_do_op.cc b/paddle/operators/parallel_do_op.cc
index e1bec0421e..c2561fa2bf 100644
--- a/paddle/operators/parallel_do_op.cc
+++ b/paddle/operators/parallel_do_op.cc
@@ -30,16 +30,13 @@ static constexpr char kParallelScopes[] = "parallel_scopes";
 
 static constexpr char kParallelBlock[] = "sub_block";
 
-// using ParallelScopeVar = std::vector<framework::Scope *>;
 using LoDTensor = framework::LoDTensor;
-using OperatorBase = framework::OperatorBase;
 
-void SplitTensorAndMoveTensorToScopes(
-    const framework::Scope &scope,
-    const std::vector<framework::Scope *> &sub_scopes,
+static void SplitTensorAndMoveTensorToScopes(
+    const framework::Scope &scope, std::vector<framework::Scope *> *sub_scopes,
     const std::vector<platform::Place> &places,
     const std::vector<std::string> &names) {
-  PADDLE_ENFORCE_EQ(sub_scopes.size(), places.size());
+  size_t num_sub_scopes = 0;
   for (auto &argu : names) {
     auto *var = scope.FindVar(argu);
     const auto &tensor = var->Get<LoDTensor>();
@@ -48,9 +45,21 @@ void SplitTensorAndMoveTensorToScopes(
     for (auto &lod : lod_tensors) {
       VLOG(3) << lod.dims();
     }
+    if (num_sub_scopes == 0) {
+      num_sub_scopes = lod_tensors.size();
+    } else {
+      PADDLE_ENFORCE_EQ(num_sub_scopes, lod_tensors.size());
+    }
+    PADDLE_ENFORCE_NE(num_sub_scopes, 0);
+    if (sub_scopes->size() == 0) {
+      sub_scopes->reserve(num_sub_scopes);
+      for (size_t i = 0; i < num_sub_scopes; ++i) {
+        sub_scopes->emplace_back(&scope.NewScope());
+      }
+    }
 
-    for (size_t i = 0; i < sub_scopes.size(); ++i) {
-      *sub_scopes[i]->Var(argu)->GetMutable<LoDTensor>() = lod_tensors[i];
+    for (size_t i = 0; i < lod_tensors.size(); ++i) {
+      *(*sub_scopes)[i]->Var(argu)->GetMutable<LoDTensor>() = lod_tensors[i];
     }
   }
 }
@@ -70,7 +79,7 @@ class ParallelDoOp : public framework::OperatorBase {
                const framework::VariableNameMap &inputs,
                const framework::VariableNameMap &outputs,
                const framework::AttributeMap &attrs)
-      : OperatorBase(type, inputs, outputs, attrs) {}
+      : framework::OperatorBase(type, inputs, outputs, attrs) {}
 
   void Run(const framework::Scope &scope,
            const platform::Place &place) const override {
@@ -85,19 +94,17 @@ class ParallelDoOp : public framework::OperatorBase {
 
     auto &sub_scopes = *scope.FindVar(Output(kParallelScopes))
                             ->GetMutable<std::vector<framework::Scope *>>();
-    for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-      sub_scopes.push_back(&scope.NewScope());
-    }
 
     // split input
-    SplitTensorAndMoveTensorToScopes(scope, sub_scopes, places,
+    SplitTensorAndMoveTensorToScopes(scope, &sub_scopes, places,
                                      Inputs(kInputs));
+
     // copy parameter
     for (auto &param : Inputs(kParameters)) {
       PADDLE_ENFORCE(scope.FindVar(param)->IsType<LoDTensor>(),
                      "Only support parameter type as LoDTensor");
       auto &src = scope.FindVar(param)->Get<LoDTensor>();
-      for (size_t i = 0; i < places.size(); ++i) {
+      for (size_t i = 0; i < sub_scopes.size(); ++i) {
         auto &place = places[i];
         auto *sub_scope = sub_scopes[i];
         auto *dst = sub_scope->Var(param)->GetMutable<LoDTensor>();
@@ -108,9 +115,7 @@ class ParallelDoOp : public framework::OperatorBase {
 
     std::vector<std::future<void>> workers;
     workers.reserve(places.size());
-    for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-      VLOG(3) << "Run " << place_idx;
-
+    for (size_t place_idx = 0; place_idx < sub_scopes.size(); ++place_idx) {
       auto &place = places[place_idx];
       auto *cur_scope = sub_scopes[place_idx];
 
@@ -157,21 +162,16 @@ ParallelDo Operator.
   }
 };
 
-class ParallelDoGradOp : public OperatorBase {
+class ParallelDoGradOp : public framework::OperatorBase {
  public:
   ParallelDoGradOp(const std::string &type,
                    const framework::VariableNameMap &inputs,
                    const framework::VariableNameMap &outputs,
                    const framework::AttributeMap &attrs)
-      : OperatorBase(type, inputs, outputs, attrs) {}
+      : framework::OperatorBase(type, inputs, outputs, attrs) {}
 
   void Run(const framework::Scope &scope,
            const platform::Place &place) const override {
-    // // get device context from pool
-    // platform::DeviceContextPool &pool =
-    //        platform::DeviceContextPool::Instance();
-    // auto &dev_ctx = *pool.Get(place);
-
     auto *block = Attr<framework::BlockDesc *>(kParallelBlock);
     auto *program = block->Program();
 
@@ -181,26 +181,16 @@ class ParallelDoGradOp : public OperatorBase {
     auto &places = scope.FindVar(Input(kPlaces))->Get<platform::PlaceList>();
 
     // feed output@grad
-    SplitTensorAndMoveTensorToScopes(scope, sub_scopes, places,
-                                     Inputs(framework::GradVarName(kOutputs)));
+    SplitTensorAndMoveTensorToScopes(
+        scope, const_cast<std::vector<framework::Scope *> *>(&sub_scopes),
+        places, Inputs(framework::GradVarName(kOutputs)));
     WaitOnPlaces(places);
 
-    // for debugging
-    for (auto &s : Inputs(framework::GradVarName(kOutputs))) {
-      VLOG(3) << s;
-      VLOG(3) << scope.FindVar(s)->Get<LoDTensor>();
-      for (auto *sub_scope : sub_scopes) {
-        VLOG(3) << sub_scope->FindVar(s)->Get<LoDTensor>();
-      }
-    }
-
     // exe run
     std::vector<std::future<void>> workers;
-    for (size_t place_idx = 0; place_idx < places.size(); ++place_idx) {
-      VLOG(3) << "Run " << place_idx;
-
-      auto &place = places[place_idx];
-      auto *cur_scope = sub_scopes[place_idx];
+    for (size_t i = 0; i < sub_scopes.size(); ++i) {
+      auto &place = places[i];
+      auto *cur_scope = sub_scopes[i];
 
       // execute
       workers.emplace_back(framework::Async([program, cur_scope, place, block] {
@@ -216,33 +206,38 @@ class ParallelDoGradOp : public OperatorBase {
 
     // merge grad
     for (auto &s : Outputs(framework::GradVarName(kParameters))) {
-      VLOG(3) << "merge grad " << s;
-
-      auto &t = sub_scopes[0]->FindVar(s)->Get<LoDTensor>();
-      VLOG(3) << t;
-
-      std::string s_buf = s + "@BUF";
-      auto *t_buf = sub_scopes[0]->Var(s_buf)->GetMutable<LoDTensor>();
-
-      for (size_t place_idx = 1; place_idx < places.size(); ++place_idx) {
-        auto &tt = sub_scopes[place_idx]->FindVar(s)->Get<LoDTensor>();
-        VLOG(3) << place_idx;
-        VLOG(3) << tt;
-        framework::Copy(tt, places[0], t_buf);
+      auto &result = sub_scopes[0]->FindVar(s)->Get<LoDTensor>();
+      std::string tmp_name;
+      auto *tmp = sub_scopes[0]->Var(&tmp_name)->GetMutable<LoDTensor>();
+
+      for (size_t i = 1; i < sub_scopes.size(); ++i) {
+        auto &tensor_to_merge = sub_scopes[i]->FindVar(s)->Get<LoDTensor>();
+        if (!(places[i] == places[0])) {
+          framework::Copy(tensor_to_merge, places[0], tmp);
+        } else {
+          tmp->ShareDataWith(tensor_to_merge);
+        }
 
         auto sum_op = framework::OpRegistry::CreateOp(
-            "sum", {{"X", {s, s_buf}}}, {{"Out", {s}}},
+            "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
             framework::AttributeMap{});
         sum_op->Run(*sub_scopes[0], places[0]);
         WaitOnPlaces(places);
       }
 
-      VLOG(3) << t;
-      framework::Copy(t, place, scope.FindVar(s)->GetMutable<LoDTensor>());
+      VLOG(3) << result;
+      framework::Copy(result, place, scope.FindVar(s)->GetMutable<LoDTensor>());
     }
   }
 };
 
+std::ostream &operator<<(std::ostream &sout,
+                         const std::vector<std::string> &strs) {
+  std::copy(strs.begin(), strs.end(),
+            std::ostream_iterator<std::string>(sout, ","));
+  return sout;
+}
+
 class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
  public:
   using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
@@ -283,18 +278,30 @@ class ParallelDoGradOpShapeInference : public framework::InferShapeBase {
   void operator()(framework::InferShapeContext *ctx) const override {
     std::vector<std::string> input{kParameters, kInputs};
     std::vector<std::string> output{kOutputs};
-    for (auto &s : input) {
-      PADDLE_ENFORCE(ctx->HasInputs(s));
-      PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(s)),
-                     "Cannot find the gradient variable %s",
-                     framework::GradVarName(s));
-    }
+
+    PADDLE_ENFORCE(ctx->HasInputs(kParameters));
+    PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters)));
+    PADDLE_ENFORCE(ctx->HasInput(kInputs));
+
     for (auto &s : output) {
       PADDLE_ENFORCE(ctx->HasInputs(s));
     }
-    for (auto &s : input) {
-      ctx->SetOutputsDim(framework::GradVarName(s), ctx->GetInputsDim(s));
+
+    ctx->SetOutputsDim(framework::GradVarName(kParameters),
+                       ctx->GetInputsDim(kParameters));
+
+    auto i_dims = ctx->GetInputsDim(kInputs);
+    auto ig_names = ctx->Outputs(framework::GradVarName(kInputs));
+
+    for (size_t i = 0; i < ig_names.size(); ++i) {
+      auto &ig_name = ig_names[i];
+      if (ig_name == framework::kEmptyVarName) {
+        continue;
+      }
+
+      ctx->SetDims({ig_name}, {i_dims[i]});
     }
+
     if (ctx->HasInputs(kParameters)) {
       PADDLE_ENFORCE(ctx->HasOutputs(framework::GradVarName(kParameters)));
       ctx->SetOutputsDim(framework::GradVarName(kParameters),
diff --git a/paddle/operators/pool_cudnn_op.cc b/paddle/operators/pool_cudnn_op.cc
deleted file mode 100644
index 77407f5cdf..0000000000
--- a/paddle/operators/pool_cudnn_op.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/operators/pool_cudnn_op.h"
-
-namespace ops = paddle::operators;
-
-REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
-            ops::PoolOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    pool2d_cudnn, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    pool2d_cudnn_grad,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>)
-
-REGISTER_OP(pool3d_cudnn, ops::PoolOp, ops::Pool3dOpMaker, pool3d_cudnn_grad,
-            ops::PoolOpGrad);
-
-REGISTER_OP_CPU_KERNEL(
-    pool3d_cudnn, ops::PoolKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolKernel<paddle::platform::CPUDeviceContext, double>);
-REGISTER_OP_CPU_KERNEL(
-    pool3d_cudnn_grad,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::PoolGradKernel<paddle::platform::CPUDeviceContext, double>)
diff --git a/paddle/operators/pool_cudnn_op.cu.cc b/paddle/operators/pool_cudnn_op.cu.cc
index 2d0001ba11..446fb0819d 100644
--- a/paddle/operators/pool_cudnn_op.cu.cc
+++ b/paddle/operators/pool_cudnn_op.cu.cc
@@ -12,7 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/operators/pool_cudnn_op.h"
+#include "paddle/framework/op_registry.h"
+#include "paddle/operators/pool_op.h"
 #include "paddle/platform/cudnn_helper.h"
 
 namespace paddle {
@@ -25,7 +26,7 @@ using DataLayout = platform::DataLayout;
 using PoolingMode = platform::PoolingMode;
 
 template <typename T>
-class PoolCudnnOpKernel : public framework::OpKernel<T> {
+class PoolCUDNNOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -86,7 +87,7 @@ class PoolCudnnOpKernel : public framework::OpKernel<T> {
 };
 
 template <typename T>
-class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
+class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
  public:
   void Compute(const framework::ExecutionContext &ctx) const override {
     PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
@@ -162,12 +163,16 @@ class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_CUDA_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>,
-                        ops::PoolCudnnOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
-                        ops::PoolCudnnGradOpKernel<double>);
-
-REGISTER_OP_CUDA_KERNEL(pool3d_cudnn, ops::PoolCudnnOpKernel<float>,
-                        ops::PoolCudnnOpKernel<double>);
-REGISTER_OP_CUDA_KERNEL(pool3d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>,
-                        ops::PoolCudnnGradOpKernel<double>);
+REGISTER_OP_KERNEL(pool2d, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNOpKernel<float>,
+                   ops::PoolCUDNNOpKernel<double>);
+REGISTER_OP_KERNEL(pool2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNGradOpKernel<float>,
+                   ops::PoolCUDNNGradOpKernel<double>);
+
+REGISTER_OP_KERNEL(pool3d, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNOpKernel<float>,
+                   ops::PoolCUDNNOpKernel<double>);
+REGISTER_OP_KERNEL(pool3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+                   ops::PoolCUDNNGradOpKernel<float>,
+                   ops::PoolCUDNNGradOpKernel<double>);
diff --git a/paddle/operators/pool_op.cc b/paddle/operators/pool_op.cc
index d3cf5fa638..3e567efd08 100644
--- a/paddle/operators/pool_op.cc
+++ b/paddle/operators/pool_op.cc
@@ -61,6 +61,23 @@ void PoolOp::InferShape(framework::InferShapeContext *ctx) const {
   ctx->ShareLoD("X", "Out");
 }
 
+framework::OpKernelType PoolOp::GetExpectedKernelType(
+    const framework::ExecutionContext &ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
   PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
   PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
@@ -68,6 +85,23 @@ void PoolOpGrad::InferShape(framework::InferShapeContext *ctx) const {
   ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
 }
 
+framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
+    const framework::ExecutionContext &ctx) const {
+  bool use_cudnn = ctx.Attr<bool>("use_cudnn");
+  framework::LibraryType library_;
+  if (use_cudnn) {
+    library_ = framework::LibraryType::kCUDNN;
+  } else {
+    library_ = framework::LibraryType::kPlain;
+  }
+
+  std::string data_format = ctx.Attr<std::string>("data_format");
+  framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
+  return framework::OpKernelType(
+      framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
+      layout_, library_);
+}
+
 Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
     : OpProtoAndCheckerMaker(proto, op_checker) {
   AddInput(
@@ -101,15 +135,27 @@ Pool2dOpMaker::Pool2dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
   AddAttr<std::vector<int>>("strides",
                             "(vector<int>, default {1, 1}), strides(height, "
                             "width) of pooling operator.")
-      .SetDefault({1, 1});  // TODO(Chengduo): Add checker. (Currently,
+      .SetDefault({1, 1});
+  // TODO(Chengduo): Add checker. (Currently,
   // TypedAttrChecker don't support vector type.)
   AddAttr<std::vector<int>>(
       "paddings",
       "(vector<int>, default {0,0}), paddings(height, width) of pooling "
       "operator."
       "If global_pooling = true, paddings and ksize will be ignored.")
-      .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
-  // TypedAttrChecker don't support vector type.)
+      .SetDefault({0, 0});
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
 
   AddComment(R"DOC(
 Pool2d Operator.
@@ -182,6 +228,19 @@ Pool3dOpMaker::Pool3dOpMaker(OpProto *proto, OpAttrChecker *op_checker)
       .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
                                // TypedAttrChecker don't support vector type.)
 
+  AddAttr<bool>(
+      "use_cudnn",
+      "(bool, default false) Only used in cudnn kernel, need install cudnn")
+      .SetDefault(false);
+  AddAttr<std::string>(
+      "data_format",
+      "(string, default NCHW) Only used in "
+      "An optional string from: \"NHWC\", \"NCHW\". "
+      "Defaults to \"NHWC\". Specify the data format of the output data, "
+      "the input will be transformed automatically. ")
+      .SetDefault("AnyLayout");
+  // TODO(dzhwinter): need to registered layout transform function
+
   AddComment(R"DOC(
 Pool3d Operator.
 
diff --git a/paddle/operators/pool_op.h b/paddle/operators/pool_op.h
index 3860e295f4..c3d82ecbde 100644
--- a/paddle/operators/pool_op.h
+++ b/paddle/operators/pool_op.h
@@ -29,6 +29,10 @@ class PoolOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class PoolOpGrad : public framework::OperatorWithKernel {
@@ -36,6 +40,10 @@ class PoolOpGrad : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext* ctx) const override;
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override;
 };
 
 class Pool2dOpMaker : public framework::OpProtoAndCheckerMaker {
diff --git a/paddle/operators/print_op.cc b/paddle/operators/print_op.cc
new file mode 100644
index 0000000000..8b233d64c9
--- /dev/null
+++ b/paddle/operators/print_op.cc
@@ -0,0 +1,283 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include <algorithm>
+#include <ctime>
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/variable.h"
+
+namespace paddle {
+namespace operators {
+
+#define CLOG std::cout
+
+const std::string kForward = "FORWARD";
+const std::string kBackward = "BACKWARD";
+const std::string kBoth = "BOTH";
+
+struct Formater {
+  std::string message;
+  std::string name;
+  std::vector<int> dims;
+  std::type_index dtype{typeid(char)};
+  framework::LoD lod;
+  int summarize;
+  void* data{nullptr};
+
+  void operator()(size_t size) {
+    PrintMessage();
+    PrintName();
+    PrintDims();
+    PrintDtype();
+    PrintLod();
+    PrintData(size);
+  }
+
+ private:
+  void PrintMessage() { CLOG << std::time(nullptr) << "\t" << message; }
+  void PrintName() {
+    if (!name.empty()) {
+      CLOG << "Tensor[" << name << "]" << std::endl;
+    }
+  }
+  void PrintDims() {
+    if (!dims.empty()) {
+      CLOG << "\tshape: [";
+      for (auto i : dims) {
+        CLOG << i << ",";
+      }
+      CLOG << "]" << std::endl;
+    }
+  }
+  void PrintDtype() {
+    if (dtype.hash_code() != typeid(char).hash_code()) {
+      CLOG << "\tdtype: " << dtype.name() << std::endl;
+    }
+  }
+  void PrintLod() {
+    if (!lod.empty()) {
+      CLOG << "\tLoD: [";
+      for (auto level : lod) {
+        CLOG << "[ ";
+        for (auto i : level) {
+          CLOG << i << ",";
+        }
+        CLOG << " ]";
+      }
+      CLOG << "]" << std::endl;
+    }
+  }
+
+  void PrintData(size_t size) {
+    PADDLE_ENFORCE_NOT_NULL(data);
+    // print float
+    if (dtype.hash_code() == typeid(float).hash_code()) {
+      Display<float>(size);
+    }
+    if (dtype.hash_code() == typeid(double).hash_code()) {
+      Display<double>(size);
+    }
+    if (dtype.hash_code() == typeid(int).hash_code()) {
+      Display<int>(size);
+    }
+    if (dtype.hash_code() == typeid(int64_t).hash_code()) {
+      Display<int64_t>(size);
+    }
+  }
+
+  template <typename T>
+  void Display(size_t size) {
+    auto* d = (T*)data;
+    CLOG << "\tdata: ";
+    if (summarize != -1) {
+      summarize = std::min(size, (size_t)summarize);
+      for (int i = 0; i < summarize; i++) {
+        CLOG << d[i] << ",";
+      }
+    } else {
+      for (size_t i = 0; i < size; i++) {
+        CLOG << d[i] << ",";
+      }
+    }
+    CLOG << std::endl;
+  }
+};
+
+// TODO(ChunweiYan) there should be some other printers for TensorArray
+class TensorPrintOp : public framework::OperatorBase {
+ public:
+  TensorPrintOp(const std::string& type,
+                const framework::VariableNameMap& inputs,
+                const framework::VariableNameMap& outputs,
+                const framework::AttributeMap& attrs)
+      : OperatorBase(type, inputs, outputs, attrs) {}
+
+  TensorPrintOp(const TensorPrintOp& o)
+      : framework::OperatorBase(
+            static_cast<const framework::OperatorBase&>(o)) {
+    PADDLE_THROW("Not implemented.");
+  }
+
+  void Run(const framework::Scope& scope,
+           const platform::Place& place) const override {
+    const framework::Variable* in_var_ptr = nullptr;
+    std::string phase = kForward;
+    std::string printed_var_name = "";
+
+    auto& inputs = Inputs();
+    if (inputs.find("In") != inputs.end() && !Inputs("In").empty()) {
+      in_var_ptr = scope.FindVar(Input("In"));
+      printed_var_name = Inputs("In").front();
+    } else if (inputs.find("In@GRAD") != inputs.end() &&
+               !Inputs("In@GRAD").empty()) {
+      in_var_ptr = scope.FindVar(Input("In@GRAD"));
+      printed_var_name = Inputs("In@GRAD").front();
+      phase = kBackward;
+    } else {
+      PADDLE_THROW("Unknown phase, should be forward or backward.");
+    }
+
+    PADDLE_ENFORCE_NOT_NULL(in_var_ptr);
+
+    auto& in_tensor = in_var_ptr->Get<framework::LoDTensor>();
+    auto* out_var_ptr = scope.FindVar(Output("Out"));
+    auto& out_tensor = *out_var_ptr->GetMutable<framework::LoDTensor>();
+
+    // Just copy data from input tensor to output tensor
+    // output tensor share same memory with input tensor
+    out_tensor.ShareDataWith(in_tensor);
+    out_tensor.set_lod(in_tensor.lod());
+
+    std::string print_phase = Attr<std::string>("print_phase");
+    if (print_phase != phase && print_phase != kBoth) {
+      return;
+    }
+
+    int first_n = Attr<int>("first_n");
+    if (first_n > 0 && ++times_ > first_n) return;
+
+    framework::LoDTensor printed_tensor;
+    printed_tensor.set_lod(in_tensor.lod());
+    printed_tensor.Resize(in_tensor.dims());
+
+    if (platform::is_cpu_place(in_tensor.place())) {
+      printed_tensor.ShareDataWith(in_tensor);
+    } else {
+      // copy data to cpu to print
+      platform::CPUPlace place;
+      framework::Copy(in_tensor, place, &printed_tensor);
+    }
+
+    Formater formater;
+    if (Attr<bool>("print_tensor_name")) {
+      formater.name = printed_var_name;
+    }
+    if (Attr<bool>("print_tensor_type")) {
+      formater.dtype = printed_tensor.type();
+    }
+    if (Attr<bool>("print_tensor_shape")) {
+      auto& dims = printed_tensor.dims();
+      formater.dims.resize(dims.size());
+      for (int i = 0; i < dims.size(); ++i) formater.dims[i] = dims[i];
+    }
+    if (Attr<bool>("print_tensor_lod")) {
+      formater.lod = printed_tensor.lod();
+    }
+    formater.summarize = Attr<int>("summarize");
+    formater.data = (void*)printed_tensor.data<void>();
+    formater(printed_tensor.numel());
+  }
+
+ private:
+  mutable int times_{0};
+};
+
+class PrintOpProtoAndCheckMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  PrintOpProtoAndCheckMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("In", "Input tensor to be displayed.");
+    AddAttr<int>("first_n", "Only log `first_n` number of times.");
+    AddAttr<std::string>("message", "A string message to print as a prefix.");
+    AddAttr<int>("summarize", "Number of elements printed.");
+    AddAttr<bool>("print_tensor_name", "Whether to print the tensor name.");
+    AddAttr<bool>("print_tensor_type", "Whether to print the tensor's dtype.");
+    AddAttr<bool>("print_tensor_shape", "Whether to print the tensor's shape.");
+    AddAttr<bool>("print_tensor_lod", "Whether to print the tensor's lod.");
+    AddAttr<std::string>(
+        "print_phase",
+        "(string, default 'BOTH') Which phase to display including 'FORWARD' "
+        "'BACKWARD' and 'BOTH'.")
+        .SetDefault(kBoth)
+        .InEnum({kForward, kBackward, kBoth});
+    AddOutput("Out", "Output tensor with same data as input tensor.");
+    AddComment(R"DOC(
+Creates a print op that will print when a tensor is accessed.
+
+Wraps the tensor passed in so that whenever that a tensor is accessed,
+the message `message` is printed, along with the current value of the
+tensor `t`.)DOC");
+  }
+};
+
+class InferShapeForward : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext* context) const override {
+    PADDLE_ENFORCE(context->HasInput("In"), "Input(In) should not be null.");
+    context->ShareLoD("In", /*->*/ "Out");
+    context->SetOutputDim("Out", context->GetInputDim("In"));
+  }
+};
+
+class InferShapeBackward : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext* context) const override {
+    PADDLE_ENFORCE(context->HasInput("In@GRAD"),
+                   "Input(In@GRAD) should not be null.");
+    context->ShareLoD("In@GRAD", /*->*/ "Out");
+    context->SetOutputDim("Out", context->GetInputDim("In@GRAD"));
+  }
+};
+
+class InferVarType : public framework::VarTypeInference {
+ public:
+  void operator()(const framework::OpDesc& op_desc,
+                  framework::BlockDesc* block) const override {}
+};
+
+class PrintOpProtoAndCheckGradOpMaker
+    : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* op_desc_ptr = new framework::OpDesc();
+    op_desc_ptr->SetType("print_grad");
+    op_desc_ptr->SetInput("In@GRAD", OutputGrad("Out"));
+    op_desc_ptr->SetOutput("Out", InputGrad("In"));
+    op_desc_ptr->SetAttrMap(Attrs());
+    return std::unique_ptr<framework::OpDesc>(op_desc_ptr);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(print, ops::TensorPrintOp, ops::PrintOpProtoAndCheckMaker,
+                  ops::PrintOpProtoAndCheckGradOpMaker, ops::InferShapeForward,
+                  ops::InferVarType);
+REGISTER_OPERATOR(print_grad, ops::TensorPrintOp, ops::InferShapeBackward);
diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc
index 9331c7b563..f9ed751682 100644
--- a/paddle/operators/recv_op.cc
+++ b/paddle/operators/recv_op.cc
@@ -24,7 +24,8 @@ limitations under the License. */
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/op_registry.h"
 #include "paddle/framework/proto_desc.h"
-#include "paddle/operators/detail/send_recv_impl.h"
+#include "paddle/operators/detail/grpc_server.h"
+#include "paddle/operators/detail/sendrecvop_utils.h"
 #include "paddle/operators/detail/simple_block_queue.h"
 
 #define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV"
@@ -32,6 +33,11 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
+void RunServer(std::shared_ptr<detail::AsyncGRPCServer> service) {
+  service->RunSyncUpdate();
+  VLOG(4) << "RunServer thread end";
+}
+
 static void CreateTensorFromMessageType(framework::Variable *var,
                                         sendrecv::VarType var_type) {
   if (var_type == sendrecv::VarType::LOD_TENSOR) {
@@ -46,18 +52,6 @@ static void CreateTensorFromMessageType(framework::Variable *var,
   }
 }
 
-void RunServer(Server **rpc_server,
-               std::shared_ptr<detail::SendRecvServerImpl> service,
-               const std::string &server_address) {
-  ServerBuilder builder;
-  builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
-  builder.RegisterService(service.get());
-  std::unique_ptr<Server> server(builder.BuildAndStart());
-  *rpc_server = server.get();
-  LOG(INFO) << "Server listening on " << server_address;
-  server->Wait();
-}
-
 class RecvOp : public framework::OperatorBase {
  public:
   RecvOp(const std::string &type, const framework::VariableNameMap &inputs,
@@ -65,10 +59,9 @@ class RecvOp : public framework::OperatorBase {
          const framework::AttributeMap &attrs)
       : OperatorBase(type, inputs, outputs, attrs) {
     if (!rpc_service_) {
-      rpc_service_.reset(new detail::SendRecvServerImpl());
       std::string endpoint = Attr<std::string>("endpoint");
-      server_thread_.reset(
-          new std::thread(RunServer, &rpc_server_, rpc_service_, endpoint));
+      rpc_service_.reset(new detail::AsyncGRPCServer(endpoint));
+      server_thread_.reset(new std::thread(RunServer, rpc_service_));
     }
   }
 
@@ -76,7 +69,7 @@ class RecvOp : public framework::OperatorBase {
     detail::MessageWithName term_msg;
     term_msg.first = LISTEN_TERMINATE_MESSAGE;
     rpc_service_->Push(term_msg);
-    rpc_server_->Shutdown();
+    rpc_service_->ShutDown();
     server_thread_->join();
   }
 
@@ -94,15 +87,24 @@ class RecvOp : public framework::OperatorBase {
            const platform::Place &dev_place) const override {
     // FIXME(typhoonzero): no new scopes for every run.
     framework::Scope &recv_scope = scope.NewScope();
+    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
+    auto &dev_ctx = *pool.Get(dev_place);
+
+    // FIXME(Yancey1989): initialize rpc server with laze mode.
     rpc_service_->SetScope(&recv_scope);
+    rpc_service_->SetDevCtx(&dev_ctx);
     auto param_list = Attr<std::vector<std::string>>("ParamList");
     auto grad_list = Attr<std::vector<std::string>>("GradList");
     auto trainer_count = Attr<int>("Trainers");
     size_t param_count = param_list.size();
+
     rpc_service_->Reset();
     // TODO(typhoonzero): change this to a while_op for every cluster-batch.
     bool exit_flag = false;
+    VLOG(4) << "param_count:" << param_count
+            << " trainer_count:" << trainer_count;
     while (!exit_flag) {
+      // TODO(gognwb): simply this loop.
       // Get from multiple trainers, we don't care about order in which
       // the gradient arrives, just add suffix 0~n then average the gradient.
       for (size_t i = 0; i < param_count * trainer_count; ++i) {
@@ -110,6 +112,7 @@ class RecvOp : public framework::OperatorBase {
         const detail::MessageWithName &v = rpc_service_->Get();
         auto grad_var_name = v.first;
         if (grad_var_name == LISTEN_TERMINATE_MESSAGE) {
+          VLOG(4) << "received LISTEN_TERMINATE_MESSAGE and RunOp.Run() exit";
           exit_flag = true;
           break;
         }
@@ -118,10 +121,12 @@ class RecvOp : public framework::OperatorBase {
         if (it != grad_list.end()) {
           param_var_name = param_list[it - grad_list.begin()];
         } else {
-          LOG(ERROR) << "grad have no paired param found!";
+          LOG(ERROR) << "grad have no paired param found!\"" << grad_var_name
+                     << "\"";
         }
         VLOG(3) << "recved grad: " << grad_var_name
                 << " updating param: " << param_var_name;
+
         auto *merged_grad = recv_scope.FindVar(grad_var_name);
         if (merged_grad == nullptr) {
           auto *ptr = recv_scope.Var(grad_var_name);
@@ -136,14 +141,13 @@ class RecvOp : public framework::OperatorBase {
         }
 
         auto *var = recv_scope.Var(grad_var_name);
-        platform::DeviceContextPool &pool =
-            platform::DeviceContextPool::Instance();
-        auto &dev_ctx = *pool.Get(dev_place);
         detail::DeserializeFromMessage(v.second, dev_ctx, var);
       }
+
       if (exit_flag) {
         break;
       }
+
       rpc_service_->Reset();
 
       std::string program_str = Attr<std::string>("OptimizeProgram");
@@ -158,17 +162,14 @@ class RecvOp : public framework::OperatorBase {
       } catch (std::exception &e) {
         LOG(ERROR) << "run sub program error " << e.what();
       }
+
       rpc_service_->Done();
       grads_counter_.clear();
     }  // while(true)
   }
 
  protected:
-  // grpc server instance to track status and gracefully shutdown.
-  // borrow an pointer from server thread.
-  Server *rpc_server_{nullptr};
-  // grpc send/recv service implement to register.
-  std::shared_ptr<detail::SendRecvServerImpl> rpc_service_;
+  std::shared_ptr<detail::AsyncGRPCServer> rpc_service_;
   std::shared_ptr<std::thread> server_thread_;
   mutable std::unordered_map<std::string, int> grads_counter_;
 };
diff --git a/paddle/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/operators/reorder_lod_tensor_by_rank_op.cc
index a055cdf7e8..3c30447949 100644
--- a/paddle/operators/reorder_lod_tensor_by_rank_op.cc
+++ b/paddle/operators/reorder_lod_tensor_by_rank_op.cc
@@ -26,22 +26,44 @@ class ReorderLoDTensorByRankTableOpProtoMaker
   ReorderLoDTensorByRankTableOpProtoMaker(OpProto *proto,
                                           OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "(LoDTensor) the input lod tensor need to be reordered.");
+    AddInput("X",
+             "(LoDTensor), the input lod tensor to be reordered according to "
+             "Input(RankTable).");
     AddInput("RankTable",
-             "(LoDRankTable) the rank table that input need follow");
-    AddOutput("Out", "(LoDTensor) reordered lod tensor");
-    AddComment(R"DOC(ReorderLoDTensorByRankTable
+             "(LoDRankTable), the rank table according to which Input(X) is "
+             "reordered.");
+    AddOutput("Out", "(LoDTensor), the reordered lod tensor.");
+    AddComment(R"DOC(ReorderLoDTensorByRankTable operator.
 
-Reorder the input X by the rank of `RankTable`. If `RankTable` is ordered by
-index [3, 0, 2, 1]. Input X will reorder its sequence, the third sequence of
-X will be the first sequence of Output.
-
-NOTE: The RankTable does not need to be calculated by X.
+Input(X) is a batch of sequences. Input(RankTable) stores new orders of the
+input sequence batch. The reorder_lod_tensor_by_rank operator reorders the
+Input(X) according to the information provided by Input(RankTable).
 
 For example:
-The X = [Seq0, Seq1, Seq2, Seq3]. The indices of RankTable are [3, 0, 2, 1].
 
-The Out =  [Seq3, Seq0, Seq2, Seq1] with correct LoD information.
+If the indices stored in the Input(RankTable) are [3, 0, 2, 1], the
+Input(X) will be reordered that the fourth sequence in Input(X) will become the
+first one, and then followed by the original first, third, and the second one.
+
+This is:
+X = [Seq0, Seq1, Seq2, Seq3]. The indices in RankTable are [3, 0, 2, 1].
+Out =  [Seq3, Seq0, Seq2, Seq1] with a new LoD information.
+
+If the LoD information of Input(X) is empty, this means Input(X) is not sequence
+data. This is also identical to a batch of sequences where each sequence has a
+fixed length 1. In this case, the reorder_lod_tensor_by_rank operator reorders
+each slice of Input(X) along the first axis according to Input(RankTable).
+
+This is:
+X = [Slice0, Slice1, Slice2, Slice3] and its LoD information is empty. The
+indices in RankTable are [3, 0, 2, 1].
+Out = [Slice3, Slice0, Slice2, Slice1] with no LoD information is appended.
+
+NOTE: This operator sorts Input(X) according to a given LoDRankTable which does
+not need to be calculated according to Input(X). It can be calculated according
+to another different sequence, and then this operator sorts Input(X) according
+to the given LoDRankTable.
+
 )DOC");
   }
 };
diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc
index 95c207221a..7c81a9524d 100644
--- a/paddle/operators/send_op.cc
+++ b/paddle/operators/send_op.cc
@@ -19,59 +19,45 @@ limitations under the License. */
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/op_registry.h"
 
-#include "paddle/operators/detail/send_recv_impl.h"
-#include "paddle/operators/detail/simple_block_queue.h"
+#include <future>
+#include "paddle/operators/detail/grpc_client.h"
 
 namespace paddle {
 namespace operators {
 
-// TODO(typhoonzero): this is a simple implementation which only send
-// one tensor
 class SendOp : public framework::OperatorBase {
  public:
-  SendOp(const std::string &type, const framework::VariableNameMap &inputs,
-         const framework::VariableNameMap &outputs,
-         const framework::AttributeMap &attrs)
-      : OperatorBase(type, inputs, outputs, attrs) {
-    // init client when the operator is created at runtime.
-    std::vector<std::string> endpoints =
-        Attr<std::vector<std::string>>("endpoints");
-    for (auto ep : endpoints) {
-      client_map_[ep].reset(new detail::RPCClient(
-          grpc::CreateChannel(ep, grpc::InsecureChannelCredentials())));
-    }
-  }
+  SendOp(const std::string& type, const framework::VariableNameMap& inputs,
+         const framework::VariableNameMap& outputs,
+         const framework::AttributeMap& attrs)
+      : OperatorBase(type, inputs, outputs, attrs) {}
 
-  void Run(const framework::Scope &scope,
-           const platform::Place &dev_place) const override {
+  void Run(const framework::Scope& scope,
+           const platform::Place& place) const override {
     auto ins = Inputs("X");
     auto outs = Outputs("Out");
     std::vector<std::string> epmap = Attr<std::vector<std::string>>("epmap");
-    // TODO(typhoonzero): use async calls to send multiple variable asyncly.
-    for (size_t i = 0; i < ins.size(); ++i) {
-      bool ret = client_map_[epmap[i]]->SendVariable(scope, ins[i]);
-      if (!ret) {
-        LOG(ERROR) << "send variable error: " << ins[i];
-      }
+
+    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+    auto& ctx = *pool.Get(place);
+    for (size_t i = 0; i < ins.size(); i++) {
+      client_.AsyncSendVariable(epmap[i], ctx, scope, ins[i]);
     }
-    // TODO(typhoonzero): support async optimization
-    client_map_[epmap[0]]->Wait();
-    for (size_t i = 0; i < outs.size(); ++i) {
-      bool ret = client_map_[epmap[i]]->GetVariable(scope, outs[i]);
-      if (!ret) {
-        LOG(ERROR) << "GetVariable error: " << outs[i];
-      }
+
+    for (size_t i = 0; i < outs.size(); i++) {
+      client_.AsyncGetVariable(epmap[i], ctx, scope, outs[i]);
     }
+
+    PADDLE_ENFORCE(client_.Wait());
   }
 
- protected:
-  mutable std::unordered_map<std::string, std::shared_ptr<detail::RPCClient>>
-      client_map_;
+ private:
+  mutable detail::RPCClient client_;
 };
 
 class SendOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
-  SendOpMaker(OpProto *proto, OpAttrChecker *op_checker)
+  SendOpMaker(OpProto* proto, OpAttrChecker* op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
     AddInput("X", "(Tensor) Input tensor to be send").AsDuplicable();
     AddOutput("Out", "(Tensor) Output tensor to get from server")
diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc
index fa94424bf9..ea09169479 100644
--- a/paddle/operators/send_recv_op_test.cc
+++ b/paddle/operators/send_recv_op_test.cc
@@ -140,7 +140,7 @@ void StartServerNet(bool is_sparse) {
 
 TEST(SendRecvOp, CPUDense) {
   std::thread server_thread(StartServerNet, false);
-  sleep(3);  // wait server to start
+  sleep(10);  // wait server to start
   // local net
   f::Scope scope;
   p::CPUPlace place;
diff --git a/paddle/operators/sequence_erase_op.cc b/paddle/operators/sequence_erase_op.cc
new file mode 100644
index 0000000000..d17b268623
--- /dev/null
+++ b/paddle/operators/sequence_erase_op.cc
@@ -0,0 +1,89 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/sequence_erase_op.h"
+
+namespace paddle {
+namespace operators {
+
+class SequenceEraseOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of SequenceEraseOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of SequenceEraseOp should not be null.");
+    auto x_dims = ctx->GetInputDim("X");
+    PADDLE_ENFORCE(x_dims.size() == 2 && x_dims[1] == 1,
+                   "Input(X) of SequenceEraseOp should be a 2-D LoDTensor "
+                   "with the 2nd dimension equal to 1.");
+    ctx->SetOutputDim("Out", x_dims);
+  }
+};
+
+class SequenceEraseOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  SequenceEraseOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("X",
+             "(2-D LoDTensor with the 2nd dim. equal to 1) "
+             "Input LoDTensor of SequenceEraseOp.");
+    AddOutput("Out",
+              "(2-D LoDTensor with the 2nd dim. equal to 1) "
+              "Output LoDTensor of SequenceEraseOp.");
+    AddAttr<std::vector<int>>("tokens",
+                              "(vector<int>) Tokens need to be erased from "
+                              "input sequences.");
+    AddComment(R"DOC(
+Sequence Erase Operator.
+
+Sequence erase operator erases tokens specified by Attr(tokens) from the input 
+sequences Input(X), and outputs the remaining data and modifies the LoD 
+information at the same time. For example, given a 2-D LoDTensor
+
+    X = [[2, 2, 6, 1, 3, 9, 6, 1, 0, 1]]^T
+
+with lod = [[0, 3, 6, 10]], there are three sequences in the input:
+   
+     X1 = [[2, 2, 6]]^T, X2 = [[1, 3, 9]]^T and X3 = [[6, 1, 0, 1]]^T.
+
+If the tokens to be erased are Attr(tokens) = [2, 3, 5], after the erasing 
+operation, the three sequences become
+
+    X1' = [[6]]^T, X2' = [[1, 9]]^T and X3' = [[6, 1, 0, 1]]^T.
+
+Hence the LoDTensor Output(Out) should be
+
+    Out = [[6, 1, 9, 6, 1, 0, 1]]^T,
+
+with lod = [[0, 1, 3, 7]].
+
+An example usage for this operator is to remove the special tokens when 
+computing the edit distance between two strings, such as blank, start token, 
+and end token.
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(sequence_erase, ops::SequenceEraseOp,
+                             ops::SequenceEraseOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    sequence_erase,
+    ops::SequenceEraseKernel<paddle::platform::CPUDeviceContext, int32_t>);
diff --git a/paddle/operators/sequence_erase_op.cu b/paddle/operators/sequence_erase_op.cu
new file mode 100644
index 0000000000..5da8eba3e1
--- /dev/null
+++ b/paddle/operators/sequence_erase_op.cu
@@ -0,0 +1,133 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <thrust/device_vector.h>
+#include <thrust/host_vector.h>
+#include "paddle/operators/sequence_erase_op.h"
+#include "paddle/platform/cuda_helper.h"
+
+namespace paddle {
+namespace operators {
+using platform::PADDLE_CUDA_NUM_THREADS;
+using LoDTensor = framework::LoDTensor;
+
+template <typename T>
+__global__ void LabelErasedIdx(const T* in_dat, const int in_len,
+                               const T* tokens, const int tokens_len,
+                               int* num_erased) {
+  int index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (index < in_len) {
+    int erased = 0;
+    for (int i = 0; i < tokens_len; ++i) {
+      if (in_dat[index] == tokens[i]) {
+        erased = 1;
+      }
+    }
+    num_erased[index + 1] = erased;
+    if (index == 0) {
+      num_erased[0] = 0;
+    }
+  }
+}
+
+template <typename T>
+__global__ void GetOutLod(const T* num_erased, const int* in_lod,
+                          const int lod_len, int* out_lod0) {
+  int index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (index < lod_len) {
+    out_lod0[index] = in_lod[index] - num_erased[in_lod[index]];
+  }
+}
+
+template <typename T>
+__global__ void SetOutput(const T* in_dat, const int in_len,
+                          const int* num_erased, T* out_dat) {
+  int index = blockIdx.x * blockDim.x + threadIdx.x;
+  if (index < in_len) {
+    if (in_dat[index] != in_dat[index + 1]) {
+      out_dat[index - num_erased[index]] = in_dat[index];
+    }
+  }
+}
+
+template <typename T>
+class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* in = ctx.Input<LoDTensor>("X");
+    auto* out = ctx.Output<LoDTensor>("Out");
+
+    auto lod = in->lod();
+    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
+    PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(),
+                      "The actual size mismatches with the LoD information.");
+    auto tokens = ctx.Attr<std::vector<T>>("tokens");
+    auto tokens_len = tokens.size();
+    auto in_len = in->numel();
+    auto in_dat = in->data<T>();
+    auto lod0 = lod[0];
+
+    thrust::host_vector<T> host_tokens(tokens_len);
+    for (size_t i = 0; i < tokens.size(); ++i) {
+      host_tokens[i] = tokens[i];
+    }
+    thrust::device_vector<T> dev_tokens = host_tokens;
+    thrust::device_vector<int> num_erased(in_len + 1);
+
+    T* dev_tokens_ptr = thrust::raw_pointer_cast(dev_tokens.data());
+    int* num_erased_ptr = thrust::raw_pointer_cast(num_erased.data());
+
+    auto stream = ctx.cuda_device_context().stream();
+    LabelErasedIdx<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
+                     PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
+        in_dat, in_len, dev_tokens_ptr, tokens_len, num_erased_ptr);
+    thrust::inclusive_scan(num_erased.begin() + 1, num_erased.end(),
+                           num_erased.begin() + 1);
+
+    // Calc LoD
+    auto lod_len = lod0.size();
+    thrust::host_vector<int> host_lod(lod_len);
+    for (size_t i = 0; i < lod_len; ++i) {
+      host_lod[i] = lod0[i];
+    }
+    thrust::device_vector<int> dev_in_lod = host_lod;
+    thrust::device_vector<int> dev_out_lod(lod_len);
+    int* dev_in_lod_ptr = thrust::raw_pointer_cast(dev_in_lod.data());
+    int* dev_out_lod_ptr = thrust::raw_pointer_cast(dev_out_lod.data());
+    GetOutLod<<<(lod_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
+                PADDLE_CUDA_NUM_THREADS, 0, stream>>>(
+        num_erased_ptr, dev_in_lod_ptr, lod_len, dev_out_lod_ptr);
+    thrust::host_vector<int> host_out_lod = dev_out_lod;
+    std::vector<int> out_lod0(lod_len, 0);
+    for (size_t i = 0; i < lod_len; i++) {
+      out_lod0[i] = host_out_lod[i];
+    }
+    framework::LoD out_lod;
+    out_lod.push_back(out_lod0);
+    out->set_lod(out_lod);
+
+    // Set output
+    out->Resize({out_lod0.back(), 1});
+    auto out_dat = out->mutable_data<T>(ctx.GetPlace());
+    SetOutput<<<(in_len - 1) / PADDLE_CUDA_NUM_THREADS + 1,
+                PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_dat, in_len,
+                                                      num_erased_ptr, out_dat);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+REGISTER_OP_CUDA_KERNEL(sequence_erase,
+                        paddle::operators::SequenceEraseOpCUDAKernel<int32_t>);
diff --git a/paddle/operators/sequence_erase_op.h b/paddle/operators/sequence_erase_op.h
new file mode 100644
index 0000000000..cb2d7be009
--- /dev/null
+++ b/paddle/operators/sequence_erase_op.h
@@ -0,0 +1,70 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename DeviceContext, typename T>
+class SequenceEraseKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* in = ctx.Input<framework::LoDTensor>("X");
+    auto* out = ctx.Output<framework::LoDTensor>("Out");
+
+    auto lod = in->lod();
+    PADDLE_ENFORCE_EQ(lod.size(), 1UL, "Only support one level sequence now.");
+    PADDLE_ENFORCE_EQ(lod[0].back(), (size_t)in->numel(),
+                      "The actual size mismatches with the LoD information.");
+    auto tokens = ctx.Attr<std::vector<int>>("tokens");
+    auto in_len = in->numel();
+    auto in_dat = in->data<T>();
+    auto lod0 = lod[0];
+
+    std::vector<size_t> num_erased(in_len + 1, 0);
+    std::vector<size_t> out_lod0(1, 0);
+    for (size_t i = 0; i < lod0.size() - 1; ++i) {
+      size_t num_out = 0;
+      for (auto j = lod0[i] + 1; j <= lod0[i + 1]; ++j) {
+        num_erased[j] = num_erased[j - 1];
+        if (std::find(tokens.begin(), tokens.end(), in_dat[j - 1]) !=
+            tokens.end()) {
+          num_erased[j] += 1;
+        } else {
+          num_out += 1;
+        }
+      }
+      out_lod0.push_back(out_lod0.back() + num_out);
+    }
+
+    auto out_len = in_len - num_erased[in_len];
+    out->Resize({static_cast<int64_t>(out_len), 1});
+    auto out_dat = out->mutable_data<T>(ctx.GetPlace());
+
+    for (int64_t i = 0; i < in_len; ++i) {
+      if (num_erased[i] == num_erased[i + 1]) {
+        out_dat[i - num_erased[i]] = in_dat[i];
+      }
+    }
+    framework::LoD out_lod;
+    out_lod.push_back(out_lod0);
+    out->set_lod(out_lod);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc
index 3f5b2a9b84..bf870115a4 100644
--- a/paddle/operators/shrink_rnn_memory_op.cc
+++ b/paddle/operators/shrink_rnn_memory_op.cc
@@ -45,7 +45,7 @@ class ShrinkRNNMemoryOp : public ArrayOp {
         rank_items.begin();
 
     auto *out_var = scope.FindVar(Output("Out"));
-    PADDLE_ENFORCE(out_var != nullptr, "Output Out must be set");
+    PADDLE_ENFORCE(out_var != nullptr, "Output(Out) must be set.");
     auto &out_tensor = *out_var->GetMutable<framework::LoDTensor>();
 
     size_t height = dst_num_rows;
@@ -76,15 +76,17 @@ class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
              "(LoDTensor) The step index. The RNN step memory 'X' will be "
              "shrinked to match the size of the input of the index'th step.");
     AddOutput("Out", "(LoDTensor) The shrinked RNN step memory.");
-    AddComment(
-        R"DOC(
-        In dynamic RNN, we are able to handle sequences of different lengths.
-        Because of the multiple lengths, the size of each step input can be
-        different, which may lead to a mismatching between the input of
-        the current step and the memory generated by the previous one. This
-        operator shrinks memory according to the size of the next step input,
-        to make sure that they can match each other.
-        )DOC");
+    AddComment(R"DOC(
+This operator is used to shrink output batch of memory defined in dynamic RNN.
+
+Dynamic RNN is able to handle variable-length sequences, in which, sequences in
+a mini-batch are sorted by their lengths first. After that, the longest sequence
+becomes the first one in the sorted batch, followed by the second longest, the
+third longest, and so on. Dynamic RNN then slices a batch input timestep by
+timestep from the sorted input. Once any sequence in the input batch reaches its
+end, memory defined in dynamicRNN has to shrink its outputs to adapt to the input
+batch size for the next time step.
+)DOC");
   }
 };
 
@@ -136,6 +138,7 @@ class ShrinkRNNMemoryGradOp : public ArrayOp {
         math::set_constant(dev_ctx, &rest_tensor, 0.0f);
       }
     }
+    dx_tensor.set_lod(x_tensor.lod());
   }
 };
 
diff --git a/paddle/operators/split_op.cc b/paddle/operators/split_op.cc
index 4dfae043cb..8d55ae5dd7 100644
--- a/paddle/operators/split_op.cc
+++ b/paddle/operators/split_op.cc
@@ -60,6 +60,12 @@ class SplitOp : public framework::OperatorWithKernel {
       }
     }
     ctx->SetOutputsDim("Out", outs_dims);
+    if (axis != 0) {
+      // Only pass LoD when not spliting along the first dim.
+      for (size_t i = 0; i < outs_number; ++i) {
+        ctx->ShareLoD("X", "Out", 0, i);
+      }
+    }
   }
 };
 
diff --git a/paddle/operators/warpctc_op.h b/paddle/operators/warpctc_op.h
index 41899c7fe0..8aea061c00 100644
--- a/paddle/operators/warpctc_op.h
+++ b/paddle/operators/warpctc_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/math/math_function.h"
 #include "paddle/operators/math/sequence_padding.h"
+#include "paddle/operators/math/sequence_scale.h"
 #include "paddle/platform/dynload/warpctc.h"
 
 namespace paddle {
@@ -178,11 +179,14 @@ class WarpCTCKernel : public framework::OpKernel<T> {
     T* warpctc_grad_data =
         warpctc_grad->mutable_data<T>(warpctc_logits.dims(), ctx.GetPlace());
 
+    math::SetConstant<DeviceContext, T>()(
+        ctx.template device_context<DeviceContext>(), warpctc_grad,
+        static_cast<T>(0));
+
     // warpctc accesses labels in CPU memory
     Tensor warpctc_label;
     Copy(*label, platform::CPUPlace(), ctx.device_context(), &warpctc_label);
     const int* warpctc_label_data = warpctc_label.data<int>();
-
     // warpctc stores loss in CPU memory
     Tensor warpctc_loss;
     T* warpctc_loss_data =
@@ -206,11 +210,18 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* warpctc_grad = ctx.Input<Tensor>("WarpCTCGrad");
     auto* logits_grad = ctx.Output<LoDTensor>(framework::GradVarName("Logits"));
+    const Tensor* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
 
+    logits_grad->mutable_data<T>(ctx.GetPlace());
     bool norm_by_times = ctx.Attr<bool>("norm_by_times");
     math::UnpaddingLoDTensorFunctor<DeviceContext, T>()(
         ctx.template device_context<DeviceContext>(), *logits_grad,
         *warpctc_grad, norm_by_times);
+
+    const T* loss_grad_data = loss_grad->data<T>();
+    math::ScaleLoDTensorFunctor<DeviceContext, T>()(
+        ctx.template device_context<DeviceContext>(), *logits_grad,
+        loss_grad_data);
   }
 };
 
diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc
index 7a3400919e..2fdd25dbbe 100644
--- a/paddle/operators/while_op.cc
+++ b/paddle/operators/while_op.cc
@@ -121,8 +121,8 @@ class WhileGradOp : public framework::OperatorBase {
       for (size_t i = 0; i < outside_og_names.size(); ++i) {
         auto outside_og_name = outside_og_names[i];
         auto inside_og_name = inside_og_names[i];
-        VLOG(10) << "Linking outside " << outside_og_name << " --> inside "
-                 << inside_og_name;
+        VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
+                << inside_og_name;
         auto &og_outside =
             detail::Ref(scope.FindVar(outside_og_name),
                         "Cannot find Outside Gradient %s", outside_og_name);
@@ -141,11 +141,11 @@ class WhileGradOp : public framework::OperatorBase {
           auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
           auto &inside_array =
               detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
-          VLOG(10) << outside_og_name << " size = " << outside_array.size();
+          VLOG(8) << outside_og_name << " size = " << outside_array.size();
           inside_array.resize(outside_array.size());
 
           for (size_t j = 0; j < inside_array.size(); ++j) {
-            VLOG(10) << j << " " << outside_array[j].numel();
+            VLOG(8) << j << " " << outside_array[j].numel();
             if (outside_array[j].numel() != 0) {
               inside_array[j].set_lod(outside_array[j].lod());
               inside_array[j].ShareDataWith(outside_array[j]);
@@ -187,10 +187,14 @@ class WhileGradOp : public framework::OperatorBase {
             attrs["shape"] = framework::vectorize2int(inside_tensor.dims());
             attrs["value"] = 0.0f;
 
+            auto var_name = pg_names[param_id];
             auto zero_op = framework::OpRegistry::CreateOp(
                 "fill_constant", framework::VariableNameMap{},
-                {{"Out", {pg_names[param_id]}}}, attrs);
+                {{"Out", {var_name}}}, attrs);
             zero_op->Run(scope, dev_place);
+            scope.FindVar(var_name)
+                ->GetMutable<framework::LoDTensor>()
+                ->set_lod(inside_tensor.lod());
           }
         }
 
@@ -231,7 +235,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
     auto igs = InputGrad(kX, /*do not drop empty gradient*/ false);
     for (auto &each_ig : igs) {
       if (inner_op_outputs.find(each_ig) == inner_op_outputs.end()) {
-        VLOG(10) << "Ignore " << each_ig;
+        VLOG(8) << "Ignore " << each_ig;
         each_ig = framework::kEmptyVarName;
       }
     }
diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h
index bbb1ee4821..9a44a776f2 100644
--- a/paddle/optimizer/lr_policy.h
+++ b/paddle/optimizer/lr_policy.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #include <algorithm>
diff --git a/paddle/optimizer/parameter_optimizer_test.cc b/paddle/optimizer/parameter_optimizer_test.cc
index 83757a3917..795d2de1d6 100644
--- a/paddle/optimizer/parameter_optimizer_test.cc
+++ b/paddle/optimizer/parameter_optimizer_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/optimizer/serialization_test.cc b/paddle/optimizer/serialization_test.cc
index 940e941e90..0f1b14eec1 100644
--- a/paddle/optimizer/serialization_test.cc
+++ b/paddle/optimizer/serialization_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/optimizer/tensor.h b/paddle/optimizer/tensor.h
index 86fa625e01..e999e9bda1 100644
--- a/paddle/optimizer/tensor.h
+++ b/paddle/optimizer/tensor.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 /**
  * @brief tensor used by optimizer
diff --git a/paddle/platform/cpu_info_test.cc b/paddle/platform/cpu_info_test.cc
index 8fb195aa7c..1bfe62c1fb 100644
--- a/paddle/platform/cpu_info_test.cc
+++ b/paddle/platform/cpu_info_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/platform/cpu_info.h"
 #include "paddle/string/printf.h"
 
diff --git a/paddle/platform/dynload/cudnn.cc b/paddle/platform/dynload/cudnn.cc
index 76ec82e108..701f6240fe 100644
--- a/paddle/platform/dynload/cudnn.cc
+++ b/paddle/platform/dynload/cudnn.cc
@@ -44,7 +44,7 @@ CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP);
 
 #ifdef PADDLE_USE_DSO
 bool HasCUDNN() {
-  std::call_once(cudnn_dso_flag, GetCudnnDsoHandle, &cudnn_dso_handle);
+  std::call_once(cudnn_dso_flag, GetCUDNNDsoHandle, &cudnn_dso_handle);
   return cudnn_dso_handle != nullptr;
 }
 
diff --git a/paddle/platform/dynload/cudnn.h b/paddle/platform/dynload/cudnn.h
index 8c937b37d7..b926347949 100644
--- a/paddle/platform/dynload/cudnn.h
+++ b/paddle/platform/dynload/cudnn.h
@@ -36,7 +36,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
     auto operator()(Args... args) -> decltype(__name(args...)) {   \
       using cudnn_func = decltype(__name(args...)) (*)(Args...);   \
       std::call_once(cudnn_dso_flag,                               \
-                     paddle::platform::dynload::GetCudnnDsoHandle, \
+                     paddle::platform::dynload::GetCUDNNDsoHandle, \
                      &cudnn_dso_handle);                           \
       EnforceCUDNNLoaded(#__name);                                 \
       void* p_##__name = dlsym(cudnn_dso_handle, #__name);         \
diff --git a/paddle/platform/dynload/dynamic_loader.cc b/paddle/platform/dynload/dynamic_loader.cc
index 7a82d06a0a..c8c09ae608 100644
--- a/paddle/platform/dynload/dynamic_loader.cc
+++ b/paddle/platform/dynload/dynamic_loader.cc
@@ -134,7 +134,7 @@ void GetCublasDsoHandle(void** dso_handle) {
 #endif
 }
 
-void GetCudnnDsoHandle(void** dso_handle) {
+void GetCUDNNDsoHandle(void** dso_handle) {
 #if defined(__APPLE__) || defined(__OSX__)
   GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", dso_handle,
                              false);
diff --git a/paddle/platform/dynload/dynamic_loader.h b/paddle/platform/dynload/dynamic_loader.h
index c0e5452e5a..7b0c8c16d7 100644
--- a/paddle/platform/dynload/dynamic_loader.h
+++ b/paddle/platform/dynload/dynamic_loader.h
@@ -32,7 +32,7 @@ void GetCublasDsoHandle(void** dso_handle);
  * @param    **dso_handle   dso handler
  *
  */
-void GetCudnnDsoHandle(void** dso_handle);
+void GetCUDNNDsoHandle(void** dso_handle);
 
 /**
  * @brief    load the DSO of CURAND
diff --git a/paddle/platform/hostdevice.h b/paddle/platform/hostdevice.h
index eb2df291cc..fa4659ed29 100644
--- a/paddle/platform/hostdevice.h
+++ b/paddle/platform/hostdevice.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #pragma once
 
 #ifdef __CUDACC__
diff --git a/paddle/platform/place_test.cc b/paddle/platform/place_test.cc
index 4f1eba01df..150b2d3b1f 100644
--- a/paddle/platform/place_test.cc
+++ b/paddle/platform/place_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/platform/place.h"
 #include <sstream>
 #include "gtest/gtest.h"
diff --git a/paddle/pybind/print_operators_doc.cc b/paddle/pybind/print_operators_doc.cc
index f4f281229e..99694fa592 100644
--- a/paddle/pybind/print_operators_doc.cc
+++ b/paddle/pybind/print_operators_doc.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include <iostream>
 #include <sstream>  // std::stringstream
 #include <string>
diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc
index 5d170c66e9..c5d70bc9f9 100644
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -430,13 +430,8 @@ All parameter, weight, gradient are variables in Paddle.
   m.def("init_glog", framework::InitGLOG);
   m.def("init_devices", &framework::InitDevices);
 
-  m.def("use_cpu", framework::UseCPU);
-  m.def("use_mkldnn", framework::UseMKLDNN);
-  m.def("use_cuda", framework::UseCUDA);
-  m.def("use_cudnn", framework::UseCUDNN);
-  m.def("use_all", framework::UseALL);
-
   m.def("is_compile_gpu", IsCompileGPU);
+
   m.def("set_feed_variable", framework::SetFeedVariable);
   m.def("get_fetch_variable", framework::GetFetchVariable);
 
diff --git a/paddle/pybind/tensor_py.h b/paddle/pybind/tensor_py.h
index 6b4290972b..3b5210e2b9 100644
--- a/paddle/pybind/tensor_py.h
+++ b/paddle/pybind/tensor_py.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #pragma once
 #include <string>
-#include "paddle/framework/tensor.h"
+#include "paddle/framework/lod_tensor.h"
 #include "paddle/memory/memcpy.h"
 #include "paddle/platform/device_context.h"
 #include "pybind11/numpy.h"
@@ -97,14 +97,27 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
 
 template <typename T>
 T TensorGetElement(framework::Tensor &self, size_t offset) {
-  PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
-  return self.data<T>()[offset];
+  if (platform::is_cpu_place(self.place())) {
+    return self.data<T>()[offset];
+  } else {
+    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
+    framework::Copy(self, platform::CPUPlace(), dst.get());
+    return dst->data<T>()[offset];
+  }
 }
 
+// TODO(dzhwinter) : fix the redundent Tensor allocate and free
 template <typename T>
 void TensorSetElement(framework::Tensor &self, size_t offset, T elem) {
-  PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
-  self.data<T>()[offset] = elem;
+  if (platform::is_gpu_place(self.place())) {
+    std::shared_ptr<framework::Tensor> dst(new framework::Tensor);
+    framework::Copy(self, platform::CPUPlace(), dst.get());
+    dst->data<T>()[offset] = elem;
+    framework::Copy(*dst.get(), self.place(), &self);
+
+  } else if (platform::is_cpu_place(self.place())) {
+    self.data<T>()[offset] = elem;
+  }
 }
 
 template <typename T>
diff --git a/paddle/scripts/cluster_train/paddle.py b/paddle/scripts/cluster_train/paddle.py
index 9b03ed1d8f..e44bb4505b 100644
--- a/paddle/scripts/cluster_train/paddle.py
+++ b/paddle/scripts/cluster_train/paddle.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
diff --git a/paddle/scripts/cpplint.py b/paddle/scripts/cpplint.py
index dff4339ea3..d0cbb070c4 100644
--- a/paddle/scripts/cpplint.py
+++ b/paddle/scripts/cpplint.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 #
 # Copyright (c) 2009 Google Inc. All rights reserved.
diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in
index 8a352b0078..80fa0c72af 100755
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
@@ -49,7 +49,18 @@ function cpu_config() {
   if [ "@WITH_MKL@" == "OFF" ]; then
     return 0
   fi
-  ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
+  platform="`uname -s`"
+  ht=0
+  if [ $platform == "Linux" ]; then
+    ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
+  elif [ $platform == "Darwin" ]; then
+    if [`sysctl -n hw.physicalcpu` -eq `sysctl -n hw.logicalcpu`]; then
+      # HT is OFF
+      ht=1
+    fi
+  else
+    return 0
+  fi
   if [ $ht -eq 1 ]; then # HT is OFF
     if [ -z "$KMP_AFFINITY" ]; then
       export KMP_AFFINITY="granularity=fine,compact,0,0"
@@ -72,7 +83,15 @@ function threads_config() {
   # according to trainer_count and total processors
   # only when MKL enabled
   # auto set OPENBLAS_NUM_THREADS when do not use MKL
-  processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l`
+  platform="`uname -s`"
+  processors=0
+  if [ $platform == "Linux" ]; then
+    processors=`grep "processor" /proc/cpuinfo|sort -u|wc -l`
+  elif [ $platform == "Darwin" ]; then
+    processors=`sysctl -n hw.logicalcpu`
+  else
+    return 0
+  fi
   trainers=`grep -Eo 'trainer_count.[0-9]+' <<< "$@" |grep -Eo '[0-9]+'|xargs`
   if [ -z $trainers ]; then
     trainers=1
@@ -92,6 +111,9 @@ function threads_config() {
     if [ -z "$OPENBLAS_NUM_THREADS" ]; then
       export OPENBLAS_NUM_THREADS=$threads
     fi
+    if [ $threads -gt 1 ] && [ -z "$OPENBLAS_MAIN_FREE" ]; then
+      export OPENBLAS_MAIN_FREE=1
+    fi
   fi
   
 }
@@ -145,11 +167,7 @@ else:
   sys.exit(0)
 EOF
 
-if [ "`uname -s`" == "Linux" ]; then
-  # only support on linux yet, with mac can use v2
-  cpu_config
-fi
-
+cpu_config
 # echo $KMP_AFFINITY $OMP_DYNAMIC
 
 case "$1" in
diff --git a/paddle/string/piece.cc b/paddle/string/piece.cc
index b80afdec82..2a553e2832 100644
--- a/paddle/string/piece.cc
+++ b/paddle/string/piece.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/string/piece.h b/paddle/string/piece.h
index 7362ce02c7..fc95263379 100644
--- a/paddle/string/piece.h
+++ b/paddle/string/piece.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/string/piece_test.cc b/paddle/string/piece_test.cc
index cf5152ff5a..fb8b972988 100644
--- a/paddle/string/piece_test.cc
+++ b/paddle/string/piece_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/string/printf.h b/paddle/string/printf.h
index 8b5ce63a8e..70d2511531 100644
--- a/paddle/string/printf.h
+++ b/paddle/string/printf.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 /*
   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
 
diff --git a/paddle/string/printf_test.cc b/paddle/string/printf_test.cc
index 2586264046..b5ad35513b 100644
--- a/paddle/string/printf_test.cc
+++ b/paddle/string/printf_test.cc
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 #include "paddle/string/printf.h"
 
 #include <string>
diff --git a/paddle/string/tinyformat/tinyformat.h b/paddle/string/tinyformat/tinyformat.h
index 3516777d9f..092c04c315 100644
--- a/paddle/string/tinyformat/tinyformat.h
+++ b/paddle/string/tinyformat/tinyformat.h
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 // tinyformat.h
 // Copyright (C) 2011, Chris Foster [chris42f (at) gmail (d0t) com]
 //
diff --git a/paddle/string/to_string.h b/paddle/string/to_string.h
index 3b3bcc69a4..178edc1895 100644
--- a/paddle/string/to_string.h
+++ b/paddle/string/to_string.h
@@ -15,9 +15,15 @@ limitations under the License. */
 #pragma once
 #include <sstream>
 #include <string>
+#include <typeindex>
 
 namespace paddle {
 namespace string {
+inline std::ostream& operator<<(std::ostream& s, const std::type_index& t) {
+  s << t.name();
+  return s;
+}
+
 template <typename T>
 inline std::string to_string(T v) {
   std::ostringstream sout;
@@ -25,6 +31,11 @@ inline std::string to_string(T v) {
   return sout.str();
 }
 
+template <>
+inline std::string to_string(std::type_index t) {
+  return t.name();
+}
+
 // Faster std::string/const char* type
 template <>
 inline std::string to_string(std::string v) {
diff --git a/paddle/trainer/tests/simple_sparse_neural_network.py b/paddle/trainer/tests/simple_sparse_neural_network.py
index 30346ef299..ba554d5872 100644
--- a/paddle/trainer/tests/simple_sparse_neural_network.py
+++ b/paddle/trainer/tests/simple_sparse_neural_network.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4)
diff --git a/paddle/trainer/tests/simple_sparse_neural_network_dp.py b/paddle/trainer/tests/simple_sparse_neural_network_dp.py
index 86b272edfe..44e96873f0 100644
--- a/paddle/trainer/tests/simple_sparse_neural_network_dp.py
+++ b/paddle/trainer/tests/simple_sparse_neural_network_dp.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer.PyDataProvider2 import provider, integer_sequence, integer_value
 import random
 
diff --git a/paddle/utils/enable_virtualenv.py b/paddle/utils/enable_virtualenv.py
index ccfaa7c147..29f8deb324 100644
--- a/paddle/utils/enable_virtualenv.py
+++ b/paddle/utils/enable_virtualenv.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os
 
 
diff --git a/proto/OptimizerConfig.proto b/proto/OptimizerConfig.proto
index d27b1bcf80..b341d78d19 100644
--- a/proto/OptimizerConfig.proto
+++ b/proto/OptimizerConfig.proto
@@ -1,3 +1,16 @@
+//  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 syntax = "proto2";
 
 option optimize_for = LITE_RUNTIME;
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
index 01d31ef3fa..c944a96042 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-3, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
index 91849b40a0..27b11ffdfc 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-3, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
index f87237f9b5..6a90051827 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
index 7012dbf6a0..06115d62e7 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/layer_activations.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 '''
 Test all activations.
 '''
diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
index a607a62c99..f5e90fdd89 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/projections.py b/python/paddle/trainer_config_helpers/tests/configs/projections.py
index dc8975cb31..c683d378ca 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/projections.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/projections.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 '''
 Test mixed layer, projections and operators.
 '''
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
index 7c848ef3fc..bf90d1762c 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_fc.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
index c19bb9685a..7cfab83855 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_gru.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
index 565e281a6e..8a425c7062 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/shared_lstm.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
index a5b5bb30b1..8ee213a493 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/simple_rnn_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
index a991b22252..cbd3c3e97f 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_BatchNorm3D.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
index cd7f609638..bed9154fe3 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bi_grumemory.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
index be83f4f83c..7e1da753f5 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_bilinear_interp.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
index f066fe1fb3..0a719b0735 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_clip_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
index 9b791a0222..7003872700 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_config_parser_for_non_file_config.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 #
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
index aa0a2c0d5f..fb2cacd443 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_conv3d_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
index 7ce375c708..a8b5c860ef 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
index caa6aaa943..eba2e1e483 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cost_layers_with_weight.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
index 8314a7e9a5..870388faf7 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_crop.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
index 4a5bdf1181..253244dcd4 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_cross_entropy_over_beam.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 #coding=utf-8
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
index a113279fc1..db950093b3 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_deconv3d_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
index 3572a2cb07..d304a29859 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_detection_output_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
index e52d48dde0..2e5dde2da2 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_dot_prod_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 vec1 = data_layer(name='vector1', size=10)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
index c53f10e0a4..345fb2b6ab 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
index b249de0fee..3a489a39da 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_factorization_machine.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='data', size=1024)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
index 2842d3429c..90b0e37270 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_fc.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
index 9dab45519c..2bd4ab2da4 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_gated_unit_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=256)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
index 474e4f36ba..451909ee18 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_grumemory_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-4)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
index dff1c535b3..3ebe40aadc 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_hsigmoid.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
index 171da10f75..c762467feb 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_kmax_seq_socre_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 #coding=utf-8
 from paddle.trainer_config_helpers import *
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
index b36a5c6d12..58bf3de104 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_l2_distance_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 outputs(
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
index 7ca1cc2db3..8d570706df 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_lstmemory_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
index eb14270baa..3b6117d297 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_maxout.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
index c3376c47bd..083d064367 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_multibox_loss_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
index d250001932..9c14455841 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_multiplex_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
index b7a15666f0..046698fb4e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_ntm_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
index 491e8c8caa..1046db2f09 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pad.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
index 0dbb921d41..37805d4376 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_pooling3D_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
index 45b02fbf32..10d759f6d9 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300, height=10, width=10)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
index 8da26ff44b..22e0ce3e5a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_print_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
index 1a693f8dff..d1d97f1c5e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_recursive_topology.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
index 004e2a5dd4..6818b91f96 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_repeat_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
index 09a6f50733..ce8a22ebb1 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_resize_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
index 91010759e4..79dad5e250 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_rnn_group.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
index b739a81b85..264341f899 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
index ab33c496b0..342a5029a8 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_conv.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
index ac8badb26a..9521fa6c47 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_row_l2_norm_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
index dd589116fa..698d19d037 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='data', size=100)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
index 8d4bf28bf1..22fb25d0f2 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_scale_sub_region_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
index 5c161ba805..1883ed9d4e 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_concat_reshape.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=1000, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
index 510ad32208..12d7f1f33b 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_slice_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 #coding=utf-8
 from paddle.trainer_config_helpers import *
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
index 3c205eabd8..8cf5fd70e3 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
index 66629662dd..7188d82a53 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_smooth_l1.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 data = data_layer(name='input', size=300)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
index 318b4459ba..a628272196 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_split_datasource.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 define_py_data_sources2(
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
index e0b0d0d3be..58c1675e6b 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_spp_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(batch_size=100, learning_rate=1e-5)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
index 6d1c3175ba..64d1d7b6ee 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_sub_nested_seq_select_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/env python
 #coding=utf-8
 from paddle.trainer_config_helpers import *
diff --git a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
index ebb39219bd..6294cb04ef 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/unused_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 settings(batch_size=1000, learning_rate=1e-4)
 
diff --git a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
index 27f1c8e993..89b881b361 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/util_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.trainer_config_helpers import *
 
 settings(learning_rate=1e-4, batch_size=1000)
diff --git a/python/paddle/utils/dump_v2_config.py b/python/paddle/utils/dump_v2_config.py
new file mode 100644
index 0000000000..5dc2111e37
--- /dev/null
+++ b/python/paddle/utils/dump_v2_config.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import collections
+
+from paddle.trainer_config_helpers.layers import LayerOutput
+from paddle.v2.layer import parse_network
+from paddle.proto import TrainerConfig_pb2
+
+__all__ = ["dump_v2_config"]
+
+
+def dump_v2_config(topology, save_path, binary=False):
+    """ Dump the network topology to a specified file.
+
+    This function is only used to dump network defined by using PaddlePaddle V2
+    APIs. This function will NOT dump configurations related to PaddlePaddle
+    optimizer.
+
+    :param topology: The output layers (can be more than one layers given in a
+                     Python List or Tuple) of the entire network. Using the
+                     specified layers (if more than one layer is given) as root,
+                     traversing back to the data layer(s), all the layers
+                     connected to the specified output layers will be dumped.
+                     Layers not connceted to the specified will not be dumped.
+    :type topology: LayerOutput|List|Tuple
+    :param save_path: The path to save the dumped network topology.
+    :type save_path: str
+    :param binary: Whether to dump the serialized network topology or not.
+                   The default value is false. NOTE that, if you call this
+                   function to generate network topology for PaddlePaddle C-API,
+                   a serialized version of network topology is required. When
+                   using PaddlePaddle C-API, this flag MUST be set to True.
+    :type binary: bool
+    """
+
+    if isinstance(topology, LayerOutput):
+        topology = [topology]
+    elif isinstance(topology, collections.Sequence):
+        for out_layer in topology:
+            assert isinstance(out_layer, LayerOutput), (
+                "The type of each element in the parameter topology "
+                "should be LayerOutput.")
+    else:
+        raise RuntimeError("Error input type for parameter topology.")
+
+    model_str = parse_network(topology)
+    with open(save_path, "w") as fout:
+        if binary:
+            fout.write(model_str.SerializeToString())
+        else:
+            fout.write(str(model_str))
diff --git a/python/paddle/utils/image_multiproc.py b/python/paddle/utils/image_multiproc.py
index e8db525ff5..1acf40df58 100644
--- a/python/paddle/utils/image_multiproc.py
+++ b/python/paddle/utils/image_multiproc.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os, sys
 import numpy as np
 from PIL import Image
diff --git a/python/paddle/utils/merge_model.py b/python/paddle/utils/merge_model.py
index 421e953d27..2b10020772 100644
--- a/python/paddle/utils/merge_model.py
+++ b/python/paddle/utils/merge_model.py
@@ -30,7 +30,8 @@ def merge_v2_model(net, param_file, output_file):
     which ends with .tar.gz.
 
     @param  net            The output layer of the network for inference.
-    @param  param_file     Path of the parameters (.tar.gz) which is stored by v2 api.
+    @param  param_file     Path of the parameters (.tar.gz) which is stored by
+                           v2 api.
     @param  output_file    Path of the merged file which will be generated.
 
     Usage:
diff --git a/python/paddle/utils/plotcurve.py b/python/paddle/utils/plotcurve.py
index 27bd8157d3..27a69b6a5c 100644
--- a/python/paddle/utils/plotcurve.py
+++ b/python/paddle/utils/plotcurve.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 #!/usr/bin/python
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
diff --git a/python/paddle/v2/__init__.py b/python/paddle/v2/__init__.py
index 0de417df2c..df710c33d0 100644
--- a/python/paddle/v2/__init__.py
+++ b/python/paddle/v2/__init__.py
@@ -62,12 +62,15 @@ __all__ = [
 cp.begin_parse()
 
 
-def set_omp_mkl_env_vars(trainer_count):
+def set_env_vars(trainer_count):
     '''Auto set CPU environment if have not set before.
-       export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status.
-       export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count.
+       For MKL:
+         export KMP_AFFINITY, OMP_DYNAMIC according to the Hyper Threading status.
+         export OMP_NUM_THREADS, MKL_NUM_THREADS according to trainer_count.
+       For OpenBLAS:
+         export OPENBLAS_NUM_THREADS, OPENBLAS_MAIN_FREE according to trainer_count. 
     '''
-    import platform
+    import platform, paddle
     if not platform.system() in ['Linux', 'Darwin']:
         return
 
@@ -103,16 +106,22 @@ def set_omp_mkl_env_vars(trainer_count):
 
     num_cores = num_physical_cores()
     num_processors = num_logical_processors()
-    if num_processors > num_cores:  # Hyper Threading is enabled
-        set_env("OMP_DYNAMIC", "true")
-        set_env("KMP_AFFINITY", "granularity=fine,compact,1,0")
-    else:
-        set_env("OMP_DYNAMIC", "false")
-        set_env("KMP_AFFINITY", "granularity=fine,compact,0,0")
+    if paddle.version.mkl() == 'ON':
+        if num_processors > num_cores:  # Hyper Threading is enabled
+            set_env("OMP_DYNAMIC", "true")
+            set_env("KMP_AFFINITY", "granularity=fine,compact,1,0")
+        else:
+            set_env("OMP_DYNAMIC", "false")
+            set_env("KMP_AFFINITY", "granularity=fine,compact,0,0")
     threads = num_processors / trainer_count
     threads = '1' if threads < 1 else str(threads)
-    set_env("OMP_NUM_THREADS", threads)
-    set_env("MKL_NUM_THREADS", threads)
+    if paddle.version.mkl() == 'ON':
+        set_env("OMP_NUM_THREADS", threads)
+        set_env("MKL_NUM_THREADS", threads)
+    else:
+        set_env("OPENBLAS_NUM_THREADS", threads)
+        if threads > 1:
+            set_env("OPENBLAS_MAIN_FREE", '1')
 
 
 def init(**kwargs):
@@ -129,7 +138,7 @@ def init(**kwargs):
     for key in args_dict.keys():
         args.append('--%s=%s' % (key, str(args_dict[key])))
 
-    set_omp_mkl_env_vars(kwargs.get('trainer_count', 1))
+    set_env_vars(kwargs.get('trainer_count', 1))
 
     if 'use_gpu' in kwargs:
         cp.g_command_config_args['use_gpu'] = kwargs['use_gpu']
diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py
index 191d9ecfb1..fab8a68b0b 100644
--- a/python/paddle/v2/dataset/common.py
+++ b/python/paddle/v2/dataset/common.py
@@ -23,7 +23,6 @@ import paddle.v2.dataset
 import cPickle
 import glob
 import cPickle as pickle
-import random
 
 __all__ = [
     'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
@@ -206,7 +205,6 @@ def convert(output_path, reader, line_count, name_prefix):
     indx_f = 0
 
     def write_data(indx_f, lines):
-        random.shuffle(lines)
         filename = "%s/%s-%05d" % (output_path, name_prefix, indx_f)
         writer = recordio.writer(filename)
         for l in lines:
diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py
index 21ed7f7a5c..37c4296f9b 100644
--- a/python/paddle/v2/dataset/imdb.py
+++ b/python/paddle/v2/dataset/imdb.py
@@ -25,7 +25,6 @@ import collections
 import tarfile
 import re
 import string
-import random
 
 __all__ = ['build_dict', 'train', 'test', 'convert']
 
@@ -83,7 +82,6 @@ def reader_creator(pos_pattern, neg_pattern, word_idx):
 
     load(pos_pattern, INS, 0)
     load(neg_pattern, INS, 1)
-    random.shuffle(INS)
 
     def reader():
         for doc, label in INS:
diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index b705c9109b..d3b3dd524c 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -24,7 +24,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
 """
 
 import os
-import random
 import functools
 import rarfile
 from common import download
@@ -265,7 +264,7 @@ def query_filter(querylists):
     return filter_query
 
 
-def load_from_text(filepath, shuffle=True, fill_missing=-1):
+def load_from_text(filepath, shuffle=False, fill_missing=-1):
     """
   parse data file into querys
   """
@@ -287,17 +286,14 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1):
             querylist._add_query(query)
     if querylist is not None:
         querylists.append(querylist)
-    if shuffle == True:
-        random.shuffle(querylists)
     return querylists
 
 
-def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
+def __reader__(filepath, format="pairwise", shuffle=False, fill_missing=-1):
     """
   Parameters
   --------
   filename : string
-  shuffle : shuffle query-doc pair under the same query
   fill_missing : fill the missing value. default in MQ2007 is -1
   
   Returns
diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py
index b0b9757c1a..7174413018 100644
--- a/python/paddle/v2/dataset/sentiment.py
+++ b/python/paddle/v2/dataset/sentiment.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 # /usr/bin/env python
 # -*- coding:utf-8 -*-
 
diff --git a/python/paddle/v2/dataset/tests/imikolov_test.py b/python/paddle/v2/dataset/tests/imikolov_test.py
index 4e52810e6b..9b3ab72feb 100644
--- a/python/paddle/v2/dataset/tests/imikolov_test.py
+++ b/python/paddle/v2/dataset/tests/imikolov_test.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.dataset.imikolov
 import unittest
 
diff --git a/python/paddle/v2/dataset/tests/test_sentiment.py b/python/paddle/v2/dataset/tests/test_sentiment.py
index 4074052907..f107948801 100644
--- a/python/paddle/v2/dataset/tests/test_sentiment.py
+++ b/python/paddle/v2/dataset/tests/test_sentiment.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 # /usr/bin/env python
 # -*- coding:utf-8 -*-
 
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
index a0ffd31c54..f322bffe13 100644
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 """
 Testing and training events.
 
diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py
index ccd5998e35..8c29ee741c 100644
--- a/python/paddle/v2/fluid/__init__.py
+++ b/python/paddle/v2/fluid/__init__.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 # import all class inside framework into fluid module
 import framework
@@ -18,14 +31,29 @@ from param_attr import ParamAttr
 from data_feeder import DataFeeder
 from core import LoDTensor, CPUPlace, CUDAPlace
 from distribute_transpiler import DistributeTranspiler
+from distribute_transpiler_simple import SimpleDistributeTranspiler
 import clip
 from memory_optimization_transpiler import memory_optimize
 
 Tensor = LoDTensor
 __all__ = framework.__all__ + executor.__all__ + [
-    'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
-    'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr'
-    'DataFeeder', 'clip', 'DistributeTranspiler', 'memory_optimize'
+    'io',
+    'initializer',
+    'layers',
+    'nets',
+    'optimizer',
+    'backward',
+    'regularizer',
+    'LoDTensor',
+    'CPUPlace',
+    'CUDAPlace',
+    'Tensor',
+    'ParamAttr'
+    'DataFeeder',
+    'clip',
+    'SimpleDistributeTranspiler',
+    'DistributeTranspiler',
+    'memory_optimize',
 ]
 
 
@@ -58,7 +86,7 @@ def __bootstrap__():
 
     read_env_flags = ['use_pinned_memory', 'check_nan_inf']
     if core.is_compile_gpu():
-        read_env_flags.append('fraction_of_gpu_memory_to_use')
+        read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync']
     core.init_gflags([sys.argv[0]] +
                      ["--tryfromenv=" + ",".join(read_env_flags)])
     core.init_glog(sys.argv[0])
diff --git a/python/paddle/v2/fluid/backward.py b/python/paddle/v2/fluid/backward.py
index cea2d1e090..27cf637c48 100644
--- a/python/paddle/v2/fluid/backward.py
+++ b/python/paddle/v2/fluid/backward.py
@@ -1,9 +1,25 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.v2.fluid import framework as framework
 from . import core
 import collections
 import copy
 
-__all__ = ['append_backward', 'calc_gradient']
+__all__ = [
+    'append_backward',
+    'calc_gradient',
+]
 
 
 def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None):
diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py
index b1fd1c2b65..e4d9ed599e 100644
--- a/python/paddle/v2/fluid/clip.py
+++ b/python/paddle/v2/fluid/clip.py
@@ -1,9 +1,25 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import functools
 import layers
 from . import core
 
 __all__ = [
-    'GradientClipByValue', 'append_gradient_clip_ops', 'error_clip_callback'
+    'GradientClipByValue',
+    'ErrorClipByValue',
+    'append_gradient_clip_ops',
+    'error_clip_callback',
 ]
 
 
@@ -23,12 +39,12 @@ class ErrorClipByValue(BaseErrorClipAttr):
         self.min = min
 
     def append_clip_op(self, block, grad_name):
-        block.append_op(
-            type="clip",
-            inputs={"X": grad_name},
-            outputs={"Out": grad_name},
-            attrs={"min": self.min,
-                   "max": self.max})
+        clip_op_desc = block.desc.append_op()
+        clip_op_desc.set_type("clip")
+        clip_op_desc.set_input("X", [grad_name])
+        clip_op_desc.set_output("Out", [grad_name])
+        clip_op_desc.set_attr("min", self.min)
+        clip_op_desc.set_attr("max", self.max)
 
 
 def error_clip_callback(block, context):
@@ -39,6 +55,11 @@ def error_clip_callback(block, context):
                          op_desc.output_arg_names()):
         fwd_var = block.var_recursive(grad_to_var[grad_n])
         error_clip = getattr(fwd_var, "error_clip", None)
+        if not (error_clip is None or isinstance(error_clip,
+                                                 BaseErrorClipAttr)):
+            raise TypeError(
+                "Variable's error_clip should be an instance of BaseErrorClipAttr or None."
+            )
         if error_clip is not None:
             error_clip.append_clip_op(block, grad_n)
 
diff --git a/python/paddle/v2/fluid/data_feeder.py b/python/paddle/v2/fluid/data_feeder.py
index 24036c3e75..bfdd00e3ef 100644
--- a/python/paddle/v2/fluid/data_feeder.py
+++ b/python/paddle/v2/fluid/data_feeder.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import core
 import numpy
diff --git a/python/paddle/v2/fluid/default_scope_funcs.py b/python/paddle/v2/fluid/default_scope_funcs.py
index 60c6165b6b..2218bb140a 100644
--- a/python/paddle/v2/fluid/default_scope_funcs.py
+++ b/python/paddle/v2/fluid/default_scope_funcs.py
@@ -1,16 +1,29 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 """
 Default scope function.
 
-`Paddle` manages Scope as programming language's scope.  It just a 
-thread-local stack of Scope. Top of that stack is current scope, the bottom 
-of that stack is all scopes' parent. 
+`Paddle` manages Scope as programming language's scope.  It just a
+thread-local stack of Scope. Top of that stack is current scope, the bottom
+of that stack is all scopes' parent.
 
-Invoking `var/find_var`  can `new/find` variable in current scope. 
-Invoking `enter_local_scope/leave_local_scope` can create or destroy local 
-scope. 
+Invoking `var/find_var`  can `new/find` variable in current scope.
+Invoking `enter_local_scope/leave_local_scope` can create or destroy local
+scope.
 
-A `scoped_function` will take a `function` as input. That function will be 
-invoked in a new local scope. 
+A `scoped_function` will take a `function` as input. That function will be
+invoked in a new local scope.
 """
 
 import paddle.v2.fluid.core
@@ -19,8 +32,12 @@ import threading
 __tl_scope__ = threading.local()
 
 __all__ = [
-    'get_cur_scope', 'enter_local_scope', 'leave_local_scope', 'var',
-    'find_var', 'scoped_function'
+    'get_cur_scope',
+    'enter_local_scope',
+    'leave_local_scope',
+    'var',
+    'find_var',
+    'scoped_function',
 ]
 
 
@@ -71,7 +88,7 @@ def find_var(name):
 def scoped_function(func):
     """
     invoke `func` in new scope.
-    
+
     :param func: a callable function that will be run in new scope.
     :type func: callable
     """
diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py
index 49ece7b725..06a7b6fb02 100644
--- a/python/paddle/v2/fluid/distribute_transpiler.py
+++ b/python/paddle/v2/fluid/distribute_transpiler.py
@@ -1,51 +1,75 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from __future__ import print_function
 import framework
 from framework import Program, default_main_program, Parameter, Variable
 import optimizer
 from layer_helper import LayerHelper
+from distributed_spliter import *
+import math
 
 
-def hash_name_to_server(params_grads, pserver_endpoints):
-    """
-    :param param_grads:
-    :return: a map of pserver endpoint -> 
-                    params -> [param list]
-                    grads  -> [grad list]
-    """
-
-    def _hash_param(param_name, total):
-        return hash(param_name) % total
-
-    param_grad_map = dict()
-    for param, grad in params_grads:
-        if param.trainable is True and grad is not None:
-            server_id = _hash_param(param.name, len(pserver_endpoints))
-            server_for_param = pserver_endpoints[server_id]
-            if not param_grad_map.has_key(server_for_param):
-                param_grad_map[server_for_param] = {"params": [], "grads": []}
-            param_grad_map[server_for_param]["params"].append(param)
-            param_grad_map[server_for_param]["grads"].append(grad)
-
-    return param_grad_map
+class VarBlock:
+    def __init__(self, varname, offset, size):
+        self.varname = varname
+        # NOTE: real offset is offset * size
+        self.offset = offset
+        self.size = size
 
+    def __str__(self):
+        return "%s:%d:%d" % (self.varname, self.offset, self.size)
 
-def round_robin(params_grads, pserver_endpoints):
-    assert (len(params_grads) > len(pserver_endpoints))
 
-    param_grad_map = dict()
-    pserver_idx = 0
-    for param, grad in params_grads:
-        if param.trainable is True:
-            server_for_param = pserver_endpoints[pserver_idx]
-            if not param_grad_map.has_key(server_for_param):
-                param_grad_map[server_for_param] = {"params": [], "grads": []}
-
-            param_grad_map[server_for_param]["params"].append(param)
-            param_grad_map[server_for_param]["grads"].append(grad)
+def split_dense_variable(var_list,
+                         pserver_count,
+                         min_block_size=1024,
+                         max_block_size=1048576):
+    """
+        We may need to split dense tensor to one or several blocks and put
+        them equally onto parameter server. One block is a sub-tensor
+        aligned by dim[0] of the tensor.
+        
+        We need to have a minimal block size so that the calculations in
+        the parameter server side can gain better performance. By default
+        mininum block size is 1024. The max block size is used to prevent
+        too large block that may causing send error.
+    """
+    blocks = []
+    for var in var_list:
+        split_count = pserver_count
+        var_numel = reduce(lambda x, y: x * y, var.shape)
+        max_pserver_count = int(math.floor(var_numel / float(min_block_size)))
+        if max_pserver_count == 0:
+            max_pserver_count = 1
+        if max_pserver_count < pserver_count:
+            split_count = max_pserver_count
+        block_size = int(math.ceil(var_numel / float(split_count)))
 
-            pserver_idx += 1
-            if pserver_idx >= len(pserver_endpoints):
-                pserver_idx = 0
-    return param_grad_map
+        if len(var.shape) >= 2:
+            # align by dim1(width)
+            dim1 = reduce(lambda x, y: x * y, var.shape[1:])
+            remains = block_size % dim1
+            if remains != 0:
+                block_size += dim1 - remains
+        # update split_count after align
+        split_count = int(math.ceil(var_numel / float(block_size)))
+        for block_id in xrange(split_count):
+            curr_block_size = min(block_size, var_numel - (
+                (block_id) * block_size))
+            block = VarBlock(var.name, block_id, curr_block_size)
+            blocks.append(str(block))
+    return blocks
 
 
 class DistributeTranspiler:
@@ -58,7 +82,6 @@ class DistributeTranspiler:
                   split_method=round_robin):
         """
             Transpile the program to a distributed data-parallelism programs.
-
             The main_program will be transform to use a remote parameter server
             to do parameter optimization. And the optimization graph will be put
             in to a parameter server program.
@@ -66,60 +89,113 @@ class DistributeTranspiler:
             Use different methods to split trainable varialbles to different
             parameter servers.
 
-            Example to run:
-
-            exe = fluid.Executor(place)
-            t = fluid.DistributeTranspiler()
-            t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1)
-
-            pserver_endpoint = os.getenv("PSERVER")
-            if pserver_endpoint:
-                pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
-                exe.run(fluid.default_startup_program())
-                exe.run(pserver_prog)
-            else:
-                feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
-                exe.run(fluid.default_startup_program())
-
-                for pass_id in range(PASS_NUM):
-                    ...
-
             :param optimize_ops: op list of optimization, should be the
                                  return value of Optimizer.minimize
             :type optimize_ops: list
             :param program: program to optimize, default default_main_program
             :param pservers: parameter server endpoints like "m1:6174,m2:6174"
             :type pservers: string
-
             :return: return a list of programs
         """
+        assert (callable(split_method))
         if program is None:
             program = default_main_program()
         self.program = program
         self.trainers = trainers
         self.optimize_ops = optimize_ops
-        self._optimize_distributed(
-            optimize_ops,
-            program,
-            params_grads,
-            pservers=pservers,
-            trainers=trainers,
-            split_method=split_method)
-
-    def _clone_param(self, block, v):
-        assert isinstance(v, Parameter)
-        new_p = Parameter(
-            block=block,
-            shape=v.shape,
-            dtype=v.dtype,
-            type=v.type,
-            lod_level=v.lod_level,
-            stop_gradient=v.stop_gradient,
-            trainable=v.trainable,
-            optimize_attr=v.optimize_attr,
-            regularizer=v.regularizer,
-            name=v.name)
-        block.vars[new_p.name] = new_p
+        # steps to transpile:
+        # 1. split variable to multiple blocks, align by product(dim[1:]) (width).
+        # 2. modify trainer program add split_op to each Grad.
+        # 3. append send_op to trainer.
+        # 4. append concat_op to trainer to update local weights.
+        # 5. create new program as parameter server.
+        # 6. create parameter server program by split_method generated endpoint->VarBlock
+
+        pserver_endpoints = pservers.split(",")
+
+        # step1
+        param_list = [pg[0] for pg in params_grads]
+        grad_list = [pg[1] for pg in params_grads]
+        # TODO: add split selected rows support
+        grad_blocks = split_dense_variable(grad_list, len(pserver_endpoints))
+        param_blocks = split_dense_variable(param_list, len(pserver_endpoints))
+        # step2
+        grad_var_mapping = self._append_split_op(program, grad_blocks)
+
+        # step3
+        send_inputs = []
+        send_outputs = []
+        for b in grad_blocks:  # append by order
+            varname, block_id, _ = b.split(":")
+            send_inputs.append(grad_var_mapping[varname][int(block_id)])
+
+        param_var_mapping = self._create_vars_from_blocklist(program,
+                                                             param_blocks)
+        for b in param_blocks:
+            varname, block_id, _ = b.split(":")
+            send_outputs.append(param_var_mapping[varname][int(block_id)])
+        # let send_op know which endpoint to send which var, eplist is of the same
+        # order of send_inputs.
+        eplist = split_method(send_inputs, pserver_endpoints)
+        # create mapping of endpoint -> splited var to create pserver side program
+        self.param_grad_ep_mapping = dict()
+        for i, ep in enumerate(eplist):
+            param = send_outputs[i]
+            grad = send_inputs[i]
+            if not self.param_grad_ep_mapping.has_key(ep):
+                self.param_grad_ep_mapping[ep] = {"params": [], "grads": []}
+            self.param_grad_ep_mapping[ep]["params"].append(param)
+            self.param_grad_ep_mapping[ep]["grads"].append(grad)
+
+        send_op = program.global_block().append_op(
+            type="send",
+            inputs={"X": send_inputs},
+            outputs={"Out": send_outputs},
+            attrs={"endpoints": pserver_endpoints,
+                   "epmap": eplist})
+        # step4
+        for varname, splited_var in param_var_mapping.iteritems():
+            if len(splited_var) <= 1:
+                continue
+            orig_param = program.global_block().vars[varname]
+            concat = program.global_block().append_op(
+                type="concat",
+                inputs={"X": splited_var},
+                outputs={"Out": [orig_param]},
+                attrs={"axis": 0})
+
+    def _create_vars_from_blocklist(self, program, block_list):
+        block_map = dict()
+        var_mapping = dict()
+        for block_str in block_list:
+            varname, offset, size = block_str.split(":")
+            if not block_map.has_key(varname):
+                block_map[varname] = []
+            block_map[varname].append((long(offset), long(size)))
+        for varname, splited in block_map.iteritems():
+            orig_var = program.global_block().vars[varname]
+            var_mapping[varname] = []
+            if len(splited) == 1:
+                var_mapping[varname] = [orig_var]
+                continue
+            orig_shape = orig_var.shape
+            orig_dim1_flatten = 1
+            if len(orig_shape) >= 2:
+                orig_dim1_flatten = reduce(lambda x, y: x * y, orig_shape[1:])
+
+            for i, block in enumerate(splited):
+                size = block[1]
+                rows = size / orig_dim1_flatten
+                splited_shape = [rows]
+                if len(orig_shape) >= 2:
+                    splited_shape.extend(orig_shape[1:])
+                var = program.global_block().create_var(
+                    name="%s.block%d" % (varname, i),
+                    psersistable=False,
+                    dtype=orig_var.dtype,
+                    shape=splited_shape)  # flattend splited var
+                var_mapping[varname].append(var)
+        return var_mapping
 
     def _clone_var(self, block, var):
         assert isinstance(var, Variable)
@@ -129,34 +205,27 @@ class DistributeTranspiler:
             dtype=var.dtype,
             type=var.type,
             lod_level=var.lod_level,
-            persistable=var.persistable)
+            # HACK: let all param in pserver persistable so child
+            # program in recv can get them
+            persistable=True)
 
-    def _optimize_distributed(self, optimize_ops, program, params_and_grads,
-                              **kwargs):
-        if kwargs.has_key("split_method"):
-            split_method = kwargs["split_method"]
-        else:
-            split_method = round_robin
-
-        assert (callable(split_method))
-        pserver_endpoints = kwargs["pservers"].split(",")
-        self.param_grad_map = split_method(params_and_grads, pserver_endpoints)
-
-        send_op_ordered_inputs = []
-        send_op_ordered_outputs = []
-        epmap = []
-        for ep, v in self.param_grad_map.iteritems():
-            send_op_ordered_inputs.extend(v["grads"])
-            send_op_ordered_outputs.extend(v["params"])
-            for i in v["grads"]:
-                epmap.append(ep)
-        send_op = program.global_block().append_op(
-            type="send",
-            inputs={"X": send_op_ordered_inputs
-                    },  # inputs is a list of tensors to be send
-            outputs={"Out": send_op_ordered_outputs},
-            attrs={"endpoints": pserver_endpoints,
-                   "epmap": epmap})
+    def _append_split_op(self, program, gradblocks):
+        var_mapping = self._create_vars_from_blocklist(program, gradblocks)
+        for varname, splited_vars in var_mapping.iteritems():
+            # variable that don't need to split have empty splited_vars
+            if len(splited_vars) <= 1:
+                continue
+            orig_var = program.global_block().vars[varname]
+            sections = []
+            for v in splited_vars:
+                sections.append(v.shape[0])
+            program.global_block().append_op(
+                type="split",
+                inputs={"X": orig_var},
+                outputs={"Out": splited_vars},
+                attrs={"sections": sections}  # assume split evenly
+            )
+        return var_mapping
 
     def get_trainer_program(self):
         # remove optimize ops and add a send op to main_program
@@ -174,69 +243,267 @@ class DistributeTranspiler:
             var_list.append(var_each)
         return var_list
 
-    def get_pserver_program(self, endpoint, optimize_ops):
-        pserver_program = Program()
-        for v in self.param_grad_map[endpoint]["params"]:
-            self._clone_param(pserver_program.global_block(), v)
+    def _get_optimizer_input_shape(self, op_type, varkey, orig_shape,
+                                   param_shape):
+        """
+        Returns the shape for optimizer inputs that need to be reshaped when
+        Param and Grad is splited to multiple servers.
+        """
+        # HACK(typhoonzero): Should use functions of corresponding optimizer in
+        # optimizer.py to get the shape, do not  bind this in the transpiler.
+        if op_type == "adam":
+            if varkey in ["Moment1", "Moment2"]:
+                return param_shape
+        elif op_type == "adagrad":
+            if varkey == "Moment":
+                return param_shape
+        elif op_type == "adamax":
+            if varkey in ["Moment", "InfNorm"]:
+                return param_shape
+        elif op_type == "momentum":
+            if varkey == "Velocity":
+                return param_shape
+        elif op_type == "":
+            if varkey == "Moment":
+                return param_shape
+        elif op_type == "sgd":
+            pass
+        return orig_shape
 
-        optimize_sub_program = Program()
-        grad_var_names = [
-            var.name for var in self.param_grad_map[endpoint]["grads"]
+    def _is_op_on_pserver(self, endpoint, all_ops, idx):
+        """
+        Recursively check if the op need to run on current server.
+        Assume that ops are in the execution order.
+        """
+        param_names = [
+            p.name for p in self.param_grad_ep_mapping[endpoint]["params"]
         ]
-        for opt_op in optimize_ops:
-            for _, var in opt_op.inputs.iteritems():
-                # NOTE: append operators to merge gradients from multiple
-                # trainers. If trainers == 1, this is not needed.
-                if self.trainers > 1 and var.name in grad_var_names:
+        op = all_ops[idx]
+        if op.inputs.has_key("Param"):
+            if op.inputs["Param"].name in param_names:
+                return True
+            else:
+                for n in param_names:
+                    if n.startswith(op.inputs["Param"].name+".block") and \
+                        n != op.inputs["Param"].name:
+                        return True
+                return False
+        else:
+            j = idx - 1
+            while j >= 0:
+                prev_op = all_ops[j]
+                prev_output_names = [o.name for o in prev_op.outputs.values()]
+                prev_input_names = [o.name for o in prev_op.inputs.values()]
+                found1 = False
+                found2 = False
+                for _, v in op.inputs.iteritems():
+                    if v.name in prev_output_names:
+                        found1 = self._is_op_on_pserver(endpoint, all_ops, j)
+                # later ops may produce output for prev op's next batch use.
+                for _, v in op.outputs.iteritems():
+                    if v.name in prev_input_names:
+                        found2 = self._is_op_on_pserver(endpoint, all_ops, j)
+                if found1 or found2:
+                    return True
+                j -= 1
+            return False
+
+    def _append_pserver_ops(self, program, pserver_program, opt_op, endpoint):
+        new_inputs = dict()
+        # update param/grad shape first, then other inputs like
+        # moment can use the updated shape
+        for key, var in opt_op.inputs.iteritems():
+            if key == "Grad":
+                grad_block = None
+                for g in self.param_grad_ep_mapping[endpoint]["grads"]:
+                    if g.name.startswith(var.name):
+                        grad_block = g
+                        break
+                if not grad_block:
+                    # do not append this op if current endpoint
+                    # is not dealing with this grad block
+                    return
+                merged_var = program.global_block().create_var(
+                    name=grad_block.name,
+                    persistable=grad_block.persistable,
+                    dtype=grad_block.dtype,
+                    shape=grad_block.shape)
+                # append merging ops if trainers > 1
+                if self.trainers > 1:
                     vars2merge = self._create_var_for_trainers(
-                        optimize_sub_program.global_block(), var, self.trainers)
-                    merged_var = optimize_sub_program.global_block().create_var(
-                        name=var.name,
-                        persistable=var.persistable,
-                        dtype=var.dtype,
-                        shape=var.shape)
-                    optimize_sub_program.global_block().append_op(
+                        program.global_block(), grad_block, self.trainers)
+                    program.global_block().append_op(
                         type="sum",
                         inputs={"X": vars2merge},
                         outputs={"Out": merged_var})
-                    optimize_sub_program.global_block().append_op(
+                    program.global_block().append_op(
                         type="scale",
                         inputs={"X": merged_var},
                         outputs={"Out": merged_var},
                         attrs={"scale": 1.0 / float(self.trainers)})
-                else:
-                    optimize_sub_program.global_block().create_var(
-                        name=var.name,
-                        persistable=var.persistable,
-                        dtype=var.dtype,
-                        shape=var.shape)
+                new_inputs[key] = merged_var
+            elif key == "Param":
+                # param is already created on global program
+                param_block = None
+                for p in self.param_grad_ep_mapping[endpoint]["params"]:
+                    if p.name.startswith(var.name):
+                        param_block = p
+                        break
+                if not param_block:
+                    return
+                tmpvar = program.global_block().create_var(
+                    name=param_block.name,
+                    persistable=True,
+                    dtype=param_block.dtype,
+                    shape=param_block.shape)
+
+                new_inputs[key] = tmpvar
 
+        for key, var in opt_op.inputs.iteritems():
+            if key in ["Param", "Grad"]:
+                continue
+            # update accumulator variable shape
+            param_shape = new_inputs["Param"].shape
+            new_shape = self._get_optimizer_input_shape(opt_op.type, key,
+                                                        var.shape, param_shape)
+            tmpvar = program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=new_shape)
+            new_inputs[key] = tmpvar
+            # create var in pserver program global block.
+            # TODO(typhoonzero): put blocks in one program to avoid create two
+            # variables.
+            pserver_program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=new_shape)
+
+        # change outputs ParamOut variable
+        opt_op.outputs["ParamOut"] = new_inputs["Param"]
+        program.global_block().append_op(
+            type=opt_op.type,
+            inputs=new_inputs,
+            outputs=opt_op.outputs,
+            attrs=opt_op.attrs)
+
+    def _append_pserver_non_opt_ops(self, program, pserver_program, opt_op):
+        for _, var in opt_op.inputs.iteritems():
+            program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+            pserver_program.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+        program.global_block().append_op(
+            type=opt_op.type,
+            inputs=opt_op.inputs,
+            outputs=opt_op.outputs,
+            attrs=opt_op.attrs)
+
+    def get_pserver_program(self, endpoint, optimize_ops):
+        """
+        get pserver side program by endpoint
+
+        NOTE: assume blocks of the same variable is not distributed
+        on the same pserver, only change param/grad varnames for
+        trainers to fetch. For each pserver endpoint, server side
+        program must be a sub-set of the original optimization program.
+        """
+        # step5
+        pserver_program = Program()
+        for v in self.param_grad_ep_mapping[endpoint]["params"]:
+            self._clone_var(pserver_program.global_block(), v)
+        # step6
+        optimize_sub_program = Program()
+        for idx, opt_op in enumerate(optimize_ops):
+            is_op_on_pserver = self._is_op_on_pserver(endpoint, optimize_ops,
+                                                      idx)
+            if not is_op_on_pserver:
+                continue
             if opt_op.inputs.has_key("Grad"):
-                if opt_op.inputs["Grad"].name in grad_var_names:
-                    optimize_sub_program.global_block().append_op(
-                        type=opt_op.type,
-                        inputs=opt_op.inputs,
-                        outputs=opt_op.outputs,
-                        attrs=opt_op.attrs)
+                self._append_pserver_ops(optimize_sub_program, pserver_program,
+                                         opt_op, endpoint)
             else:
-                optimize_sub_program.global_block().append_op(
-                    type=opt_op.type,
-                    inputs=opt_op.inputs,
-                    outputs=opt_op.outputs,
-                    attrs=opt_op.attrs)
+                self._append_pserver_non_opt_ops(optimize_sub_program,
+                                                 pserver_program, opt_op)
         pserver_program.global_block().append_op(
             type="recv",
-            inputs={"RX":
-                    self.param_grad_map[endpoint]["grads"]},  # grads to recv
+            inputs={"RX": self.param_grad_ep_mapping[endpoint]["grads"]
+                    },  # grads to recv
             outputs={},
             attrs={
                 "OptimizeProgram": optimize_sub_program.desc,
                 "endpoint": endpoint,
-                "ParamList":
-                [p.name for p in self.param_grad_map[endpoint]["params"]],
-                "GradList":
-                [p.name for p in self.param_grad_map[endpoint]["grads"]],
+                "ParamList": [
+                    p.name
+                    for p in self.param_grad_ep_mapping[endpoint]["params"]
+                ],
+                "GradList": [
+                    p.name
+                    for p in self.param_grad_ep_mapping[endpoint]["grads"]
+                ],
                 "Trainers": self.trainers
             })
         pserver_program.sync_with_cpp()
         return pserver_program
+
+    def get_startup_program(self, endpoint, pserver_program):
+        """
+        Get startup program for current parameter server.
+        Modify operator input variables if there are variables that
+        was splited to several blocks.
+        """
+        s_prog = Program()
+        orig_s_prog = framework.default_startup_program()
+        params = self.param_grad_ep_mapping[endpoint]["params"]
+
+        def _get_splited_name_and_shape(varname):
+            for idx, splited_param in enumerate(params):
+                pname = splited_param.name
+                if pname.startswith(varname) and varname != pname:
+                    return pname, splited_param.shape
+            return "", []
+
+        # 1. create vars in pserver program to startup program
+        pserver_vars = pserver_program.global_block().vars
+        created_var_map = dict()
+        for _, var in pserver_vars.iteritems():
+            tmpvar = s_prog.global_block().create_var(
+                name=var.name,
+                persistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+            created_var_map[var.name] = tmpvar
+
+        # 2. rename op outputs
+        for op in orig_s_prog.global_block().ops:
+            new_outputs = dict()
+            # do not append startup op if var is not on this pserver
+            op_on_pserver = False
+            for key, var in op.outputs.iteritems():
+                newname, _ = _get_splited_name_and_shape(var.name)
+                if newname:
+                    op_on_pserver = True
+                    new_outputs[key] = created_var_map[newname]
+                elif var.name in pserver_vars:
+                    op_on_pserver = True
+                    new_outputs[key] = pserver_vars[var.name]
+
+            if op_on_pserver:
+                if op.type in [
+                        "gaussian_random", "fill_constant", "uniform_random"
+                ]:
+                    op.attrs["shape"] = new_outputs["Out"].shape
+                s_prog.global_block().append_op(
+                    type=op.type,
+                    inputs=op.inputs,
+                    outputs=new_outputs,
+                    attrs=op.attrs)
+        return s_prog
diff --git a/python/paddle/v2/fluid/distribute_transpiler_simple.py b/python/paddle/v2/fluid/distribute_transpiler_simple.py
new file mode 100644
index 0000000000..bd88f02bde
--- /dev/null
+++ b/python/paddle/v2/fluid/distribute_transpiler_simple.py
@@ -0,0 +1,255 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import framework
+from framework import Program, default_main_program, Parameter, Variable
+import optimizer
+from layer_helper import LayerHelper
+
+
+def hash_name_to_server(params_grads, pserver_endpoints):
+    """
+    :param param_grads:
+    :return: a map of pserver endpoint -> 
+                    params -> [param list]
+                    grads  -> [grad list]
+    """
+
+    def _hash_param(param_name, total):
+        return hash(param_name) % total
+
+    param_grad_map = dict()
+    for param, grad in params_grads:
+        if param.trainable is True and grad is not None:
+            server_id = _hash_param(param.name, len(pserver_endpoints))
+            server_for_param = pserver_endpoints[server_id]
+            if not param_grad_map.has_key(server_for_param):
+                param_grad_map[server_for_param] = {"params": [], "grads": []}
+            param_grad_map[server_for_param]["params"].append(param)
+            param_grad_map[server_for_param]["grads"].append(grad)
+
+    return param_grad_map
+
+
+def round_robin(params_grads, pserver_endpoints):
+    assert (len(params_grads) > len(pserver_endpoints))
+
+    param_grad_map = dict()
+    pserver_idx = 0
+    for param, grad in params_grads:
+        if param.trainable is True:
+            server_for_param = pserver_endpoints[pserver_idx]
+            if not param_grad_map.has_key(server_for_param):
+                param_grad_map[server_for_param] = {"params": [], "grads": []}
+
+            param_grad_map[server_for_param]["params"].append(param)
+            param_grad_map[server_for_param]["grads"].append(grad)
+
+            pserver_idx += 1
+            if pserver_idx >= len(pserver_endpoints):
+                pserver_idx = 0
+    return param_grad_map
+
+
+class SimpleDistributeTranspiler:
+    def transpile(self,
+                  optimize_ops,
+                  params_grads,
+                  program=None,
+                  pservers="127.0.0.1:6174",
+                  trainers=1,
+                  split_method=round_robin):
+        """
+            Transpile the program to a distributed data-parallelism programs.
+
+            The main_program will be transform to use a remote parameter server
+            to do parameter optimization. And the optimization graph will be put
+            in to a parameter server program.
+
+            Use different methods to split trainable varialbles to different
+            parameter servers.
+
+            Example to run:
+
+            exe = fluid.Executor(place)
+            t = fluid.DistributeTranspiler()
+            t.transpile(optimize_ops, params_grads, pservers="127.0.0.1:6174", trainers=1)
+
+            pserver_endpoint = os.getenv("PSERVER")
+            if pserver_endpoint:
+                pserver_prog = t.get_pserver_program(pserver_endpoint, optimize_ops)
+                exe.run(fluid.default_startup_program())
+                exe.run(pserver_prog)
+            else:
+                feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
+                exe.run(fluid.default_startup_program())
+
+                for pass_id in range(PASS_NUM):
+                    ...
+
+            :param optimize_ops: op list of optimization, should be the
+                                 return value of Optimizer.minimize
+            :type optimize_ops: list
+            :param program: program to optimize, default default_main_program
+            :param pservers: parameter server endpoints like "m1:6174,m2:6174"
+            :type pservers: string
+
+            :return: return a list of programs
+        """
+        if program is None:
+            program = default_main_program()
+        self.program = program
+        self.trainers = trainers
+        self.optimize_ops = optimize_ops
+        self._optimize_distributed(
+            optimize_ops,
+            program,
+            params_grads,
+            pservers=pservers,
+            trainers=trainers,
+            split_method=split_method)
+
+    def _clone_param(self, block, v):
+        assert isinstance(v, Parameter)
+        new_p = Parameter(
+            block=block,
+            shape=v.shape,
+            dtype=v.dtype,
+            type=v.type,
+            lod_level=v.lod_level,
+            stop_gradient=v.stop_gradient,
+            trainable=v.trainable,
+            optimize_attr=v.optimize_attr,
+            regularizer=v.regularizer,
+            name=v.name)
+        block.vars[new_p.name] = new_p
+
+    def _clone_var(self, block, var):
+        assert isinstance(var, Variable)
+        return block.create_var(
+            name=var.name,
+            shape=var.shape,
+            dtype=var.dtype,
+            type=var.type,
+            lod_level=var.lod_level,
+            persistable=var.persistable)
+
+    def _optimize_distributed(self, optimize_ops, program, params_and_grads,
+                              **kwargs):
+        if kwargs.has_key("split_method"):
+            split_method = kwargs["split_method"]
+        else:
+            split_method = round_robin
+
+        assert (callable(split_method))
+        pserver_endpoints = kwargs["pservers"].split(",")
+        self.param_grad_map = split_method(params_and_grads, pserver_endpoints)
+
+        send_op_ordered_inputs = []
+        send_op_ordered_outputs = []
+        epmap = []
+        for ep, v in self.param_grad_map.iteritems():
+            send_op_ordered_inputs.extend(v["grads"])
+            send_op_ordered_outputs.extend(v["params"])
+            for i in v["grads"]:
+                epmap.append(ep)
+        send_op = program.global_block().append_op(
+            type="send",
+            inputs={"X": send_op_ordered_inputs
+                    },  # inputs is a list of tensors to be send
+            outputs={"Out": send_op_ordered_outputs},
+            attrs={"endpoints": pserver_endpoints,
+                   "epmap": epmap})
+
+    def get_trainer_program(self):
+        # remove optimize ops and add a send op to main_program
+        self.program.global_block().delete_ops(self.optimize_ops)
+        return self.program
+
+    def _create_var_for_trainers(self, block, var, trainers):
+        var_list = []
+        for i in xrange(trainers):
+            var_each = block.create_var(
+                name="%s.trainer_%d" % (var.name, i),
+                psersistable=var.persistable,
+                dtype=var.dtype,
+                shape=var.shape)
+            var_list.append(var_each)
+        return var_list
+
+    def get_pserver_program(self, endpoint, optimize_ops):
+        pserver_program = Program()
+        for v in self.param_grad_map[endpoint]["params"]:
+            self._clone_param(pserver_program.global_block(), v)
+
+        optimize_sub_program = Program()
+        grad_var_names = [
+            var.name for var in self.param_grad_map[endpoint]["grads"]
+        ]
+        for opt_op in optimize_ops:
+            for _, var in opt_op.inputs.iteritems():
+                # NOTE: append operators to merge gradients from multiple
+                # trainers. If trainers == 1, this is not needed.
+                if self.trainers > 1 and var.name in grad_var_names:
+                    vars2merge = self._create_var_for_trainers(
+                        optimize_sub_program.global_block(), var, self.trainers)
+                    merged_var = optimize_sub_program.global_block().create_var(
+                        name=var.name,
+                        persistable=var.persistable,
+                        dtype=var.dtype,
+                        shape=var.shape)
+                    optimize_sub_program.global_block().append_op(
+                        type="sum",
+                        inputs={"X": vars2merge},
+                        outputs={"Out": merged_var})
+                    optimize_sub_program.global_block().append_op(
+                        type="scale",
+                        inputs={"X": merged_var},
+                        outputs={"Out": merged_var},
+                        attrs={"scale": 1.0 / float(self.trainers)})
+                else:
+                    optimize_sub_program.global_block().create_var(
+                        name=var.name,
+                        persistable=var.persistable,
+                        dtype=var.dtype,
+                        shape=var.shape)
+
+            if opt_op.inputs.has_key("Grad"):
+                if opt_op.inputs["Grad"].name in grad_var_names:
+                    optimize_sub_program.global_block().append_op(
+                        type=opt_op.type,
+                        inputs=opt_op.inputs,
+                        outputs=opt_op.outputs,
+                        attrs=opt_op.attrs)
+            else:
+                optimize_sub_program.global_block().append_op(
+                    type=opt_op.type,
+                    inputs=opt_op.inputs,
+                    outputs=opt_op.outputs,
+                    attrs=opt_op.attrs)
+        pserver_program.global_block().append_op(
+            type="recv",
+            inputs={"RX":
+                    self.param_grad_map[endpoint]["grads"]},  # grads to recv
+            outputs={},
+            attrs={
+                "OptimizeProgram": optimize_sub_program.desc,
+                "endpoint": endpoint,
+                "ParamList":
+                [p.name for p in self.param_grad_map[endpoint]["params"]],
+                "GradList":
+                [p.name for p in self.param_grad_map[endpoint]["grads"]],
+                "Trainers": self.trainers
+            })
+        pserver_program.sync_with_cpp()
+        return pserver_program
diff --git a/python/paddle/v2/fluid/distributed_spliter.py b/python/paddle/v2/fluid/distributed_spliter.py
new file mode 100644
index 0000000000..e647f760e9
--- /dev/null
+++ b/python/paddle/v2/fluid/distributed_spliter.py
@@ -0,0 +1,48 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+def hash_name(varlist, pserver_endpoints):
+    """
+    hash variable names to several endpoints.
+
+    :param varlist: a list of Variables
+    :return: a map of pserver endpoint -> varname
+    """
+
+    def _hash_block(block_str, total):
+        return hash(block_str) % total
+
+    eplist = []
+    for var in varlist:
+        server_id = _hash_block(var.name(), len(pserver_endpoints))
+        server_for_param = pserver_endpoints[server_id]
+        eplist.append(server_for_param)
+    return eplist
+
+
+def round_robin(varlist, pserver_endpoints):
+    """
+    distribute variables to several endpoints.
+    """
+    assert (len(varlist) > len(pserver_endpoints))
+
+    eplist = []
+    pserver_idx = 0
+    for var in varlist:
+        server_for_param = pserver_endpoints[pserver_idx]
+        eplist.append(server_for_param)
+
+        pserver_idx += 1
+        if pserver_idx >= len(pserver_endpoints):
+            pserver_idx = 0
+    return eplist
diff --git a/python/paddle/v2/fluid/evaluator.py b/python/paddle/v2/fluid/evaluator.py
index e186ee96c3..adf174a07d 100644
--- a/python/paddle/v2/fluid/evaluator.py
+++ b/python/paddle/v2/fluid/evaluator.py
@@ -1,10 +1,26 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 
 import layers
 from framework import Program, unique_name, Variable, program_guard
 from layer_helper import LayerHelper
 
-__all__ = ['Accuracy', 'ChunkEvaluator']
+__all__ = [
+    'Accuracy',
+    'ChunkEvaluator',
+]
 
 
 def _clone_var_(block, var):
@@ -21,19 +37,19 @@ def _clone_var_(block, var):
 class Evaluator(object):
     """
     Base Class for all evaluators
-    
+
     Args:
-        name(str): The name of evaluator. such as, "accuracy". Used for generate 
+        name(str): The name of evaluator. such as, "accuracy". Used for generate
             temporary variable name.
-        main_program(Program, optional): The evaluator should be added to this 
+        main_program(Program, optional): The evaluator should be added to this
             main_program. Default default_main_program()
-        startup_program(Program, optional):The parameter should be added to this 
+        startup_program(Program, optional):The parameter should be added to this
             startup_program. Default default_startup_program()
-            
+
     Attributes:
-        states(list): The list of state variables. states will be reset to zero 
+        states(list): The list of state variables. states will be reset to zero
             when `reset` is invoked.
-        metrics(list): The list of metrics variables. They will be calculate 
+        metrics(list): The list of metrics variables. They will be calculate
             every mini-batch
     """
 
@@ -66,14 +82,14 @@ class Evaluator(object):
 
     def create_state(self, suffix, dtype, shape):
         """
-        Create state variable. 
-        
+        Create state variable.
+
         NOTE: It is not a public API.
-        
+
         Args:
-            suffix(str): the state suffix. 
-            dtype(str|core.DataType): the state data type 
-            shape(tuple|list): the shape of state 
+            suffix(str): the state suffix.
+            dtype(str|core.DataType): the state data type
+            shape(tuple|list): the shape of state
 
         Returns: State variable
 
@@ -127,8 +143,8 @@ class Accuracy(Evaluator):
 
 class ChunkEvaluator(Evaluator):
     """
-    Accumulate counter numbers output by chunk_eval from mini-batches and 
-    compute the precision recall and F1-score using the accumulated counter 
+    Accumulate counter numbers output by chunk_eval from mini-batches and
+    compute the precision recall and F1-score using the accumulated counter
     numbers.
     """
 
diff --git a/python/paddle/v2/fluid/executor.py b/python/paddle/v2/fluid/executor.py
index 1b2075dcd5..a99c5157b2 100644
--- a/python/paddle/v2/fluid/executor.py
+++ b/python/paddle/v2/fluid/executor.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import contextlib
 from framework import Program, default_main_program
diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py
index 2fb388acfc..8042febfed 100644
--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import collections
 import contextlib
 
@@ -7,9 +20,15 @@ import proto.framework_pb2 as framework_pb2
 from . import core
 
 __all__ = [
-    'Block', 'Variable', 'Program', 'Operator', 'default_startup_program',
-    'default_main_program', 'program_guard', 'switch_startup_program',
-    'switch_main_program'
+    'Block',
+    'Variable',
+    'Program',
+    'Operator',
+    'default_startup_program',
+    'default_main_program',
+    'program_guard',
+    'switch_startup_program',
+    'switch_main_program',
 ]
 
 EMPTY_VAR_NAME = core.kEmptyVarName()
@@ -236,6 +255,9 @@ class Variable(object):
 
     __repr__ = __str__
 
+    def set_desc(self, input):
+        self.desc = input
+
     @property
     def persistable(self):
         return self.desc.persistable()
@@ -271,6 +293,9 @@ class Variable(object):
         uid = core.unique_integer(prefix)  # unique during whole process.
         return "_".join([prefix, str(uid)])
 
+    def set_error_clip(self, error_clip):
+        self.error_clip = error_clip
+
 
 def get_all_op_protos():
     """
diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py
index c0839caaf2..2e8cfa3177 100644
--- a/python/paddle/v2/fluid/initializer.py
+++ b/python/paddle/v2/fluid/initializer.py
@@ -1,7 +1,25 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import framework
 import numpy as np
 
-__all__ = ['Constant', 'Uniform', 'Normal', 'Xavier']
+__all__ = [
+    'Constant',
+    'Uniform',
+    'Normal',
+    'Xavier',
+]
 
 
 class Initializer(object):
diff --git a/python/paddle/v2/fluid/io.py b/python/paddle/v2/fluid/io.py
index c63567601a..499df05e59 100644
--- a/python/paddle/v2/fluid/io.py
+++ b/python/paddle/v2/fluid/io.py
@@ -1,16 +1,45 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import os
 import cPickle as pickle
 
 from paddle.v2.fluid.framework import Program, Parameter, default_main_program, Variable
 
 __all__ = [
-    'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
-    'load_persistables', "save_inference_model", "load_inference_model",
-    "get_inference_program"
+    'save_vars',
+    'save_params',
+    'save_persistables',
+    'load_vars',
+    'load_params',
+    'load_persistables',
+    'save_inference_model',
+    'load_inference_model',
+    'get_inference_program',
 ]
 
 
 def is_parameter(var):
+    """Check whether the variable is a Parameter.
+
+    This function checks whether the input variable is a Parameter.
+
+    Args:
+        var : The input variable.
+
+    Returns:
+        boolean result whether the variable is a Parameter.
+    """
     return isinstance(var, Parameter)
 
 
diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py
index 325735e679..191d2349b5 100644
--- a/python/paddle/v2/fluid/layer_helper.py
+++ b/python/paddle/v2/fluid/layer_helper.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import copy
 import itertools
 
diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py
index 50ac0aba01..c190af3329 100644
--- a/python/paddle/v2/fluid/layers/__init__.py
+++ b/python/paddle/v2/fluid/layers/__init__.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import ops
 from ops import *
 import nn
diff --git a/python/paddle/v2/fluid/layers/control_flow.py b/python/paddle/v2/fluid/layers/control_flow.py
index 9ad021fa99..e72b22c83f 100644
--- a/python/paddle/v2/fluid/layers/control_flow.py
+++ b/python/paddle/v2/fluid/layers/control_flow.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from ..layer_helper import LayerHelper, unique_name
 from ..framework import Program, Variable, Operator
 from .. import core
@@ -12,7 +25,7 @@ __all__ = [
     'array_to_lod_tensor', 'increment', 'array_write', 'create_array',
     'less_than', 'array_read', 'shrink_memory', 'array_length', 'IfElse',
     'DynamicRNN', 'ConditionalBlock', 'StaticRNN', 'reorder_lod_tensor_by_rank',
-    'ParallelDo'
+    'ParallelDo', 'Print'
 ]
 
 
@@ -110,6 +123,67 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0):
     return out
 
 
+def Print(input,
+          first_n=-1,
+          message=None,
+          summarize=-1,
+          print_tensor_name=True,
+          print_tensor_type=True,
+          print_tensor_shape=True,
+          print_tensor_lod=True,
+          print_phase='both'):
+    '''
+    **Print operator**
+
+    This creates a print op that will print when a tensor is accessed.
+
+    Wraps the tensor passed in so that whenever that a tensor is accessed,
+    the message `message` is printed, along with the current value of the
+    tensor `t`.
+
+    Args:
+        input (Variable): A Tensor to print.
+        summarize (int): Print this number of elements in the tensor, will print
+                all if left is negative.
+        message (str): A string message to print as a prefix.
+        first_n (int): Only log `first_n` number of times.
+        print_tensor_name (bool): Print the tensor name.
+        print_tensor_type (bool): Print the tensor type.
+        print_tensor_shape (bool): Print the tensor shape.
+        print_tensor_lod (bool): Print the tensor lod.
+        print_phase (bool): Which phase to displace, including 'forward',
+                'backward' and 'both'. If set to 'backward' or 'both', will
+                print the gradients of input tensor.
+
+    Returns:
+        Variable: Output tensor, same data with input tensor.
+
+    Examples:
+        .. code-block:: python
+
+        value = some_layer(...)
+        Print(value, summarize=10,
+              message="The content of some_layer: ")
+    '''
+    helper = LayerHelper('print', **locals())
+    out = helper.create_tmp_variable(dtype=helper.input_dtype())
+    helper.append_op(
+        type='print',
+        inputs={'In': input},
+        attrs={
+            'first_n': first_n,
+            'summarize': summarize,
+            'message': message or "",
+            'print_tensor_name': print_tensor_name,
+            'print_tensor_type': print_tensor_type,
+            'print_tensor_shape': print_tensor_shape,
+            'print_tensor_lod': print_tensor_lod,
+            'print_phase': print_phase.upper()
+        },
+        outputs={'Out': out})
+    return out
+
+
 class BlockGuard(object):
     """
     BlockGuard class.
@@ -687,11 +761,10 @@ def topk(input, k):
 
 
 def lod_tensor_to_array(x, table):
-    """This function performs the operation that converts an LOD_Tensor to
-       an array.
+    """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
 
     Args:
-        x (Variable|list): The tensor that needs to be converted to an array.
+        x (Variable|list): The LOD tensor to be converted to a LOD tensor array.
         table (ParamAttr|list): The variable that stores the level of lod
                                 which is ordered by sequence length in
                                 descending order.
@@ -721,11 +794,10 @@ def lod_tensor_to_array(x, table):
 
 
 def array_to_lod_tensor(x, table):
-    """This function performs the operations that converts an array to
-       an LOD_Tensor.
+    """Convert a LoD_Tensor_Aarry to an LoDTensor.
 
     Args:
-        x (Variable|list): The array that needs to be converted to a tensor.
+        x (Variable|list): The lod tensor array to be converted to a tensor.
         table (ParamAttr|list): The variable that stores the level of lod
                                 which is ordered by sequence length in
                                 descending order.
@@ -753,7 +825,8 @@ def array_to_lod_tensor(x, table):
 
 
 def increment(x, value=1.0, in_place=True):
-    """This function performs an operation that increments each value in the
+    """
+    This function performs an operation that increments each value in the
     input :math:`x` by an amount: :math:`value` as mentioned in the input
     parameter. This operation is performed in-place by default.
 
@@ -786,17 +859,24 @@ def increment(x, value=1.0, in_place=True):
 
 
 def array_write(x, i, array=None):
-    """This function performs the operation to write the data out as an
-    LOD_TENSOR_ARRAY.
+    """
+    This function writes the given input variable to the specified position
+    indicating by the arrary index to an output LOD_TENSOR_ARRAY. If the
+    output LOD_TENSOR_ARRAY is not given(None), a new one will be created and
+    returned.
 
     Args:
         x (Variable|list): The input tensor from which the data will be read.
-        i (Variable|list): The subscript index in tensor array, that points the
-                           place from which data will be read.
-        array (Variable|list): The data can be read into this variable if
-                               this is assigned.
+        i (Variable|list): The index of the output LOD_TENSOR_ARRAY, pointing to
+                           the position to which the input tensor will be
+                           written.
+        array (Variable|list): The output LOD_TENSOR_ARRAY to which the input
+                               tensor will be written. If this parameter is
+                               NONE, a new LOD_TENSOR_ARRAY will be created and
+                               returned.
+
     Returns:
-        Variable: The tensor type variable that has the data written to it.
+        Variable: The output LOD_TENSOR_ARRAY where the input tensor is written.
 
     Examples:
         .. code-block::python
@@ -1159,7 +1239,8 @@ class DynamicRNN(object):
         self.lod_rank_table = None
         self.max_seq_len = None
         self.step_idx = None
-        self.zero_idx = fill_constant(shape=[1], value=0, dtype='int64')
+        self.zero_idx = fill_constant(
+            shape=[1], value=0, dtype='int64', force_cpu=True)
         self.mem_dict = dict()
         self.output_array = []
         self.outputs = []
@@ -1173,7 +1254,7 @@ class DynamicRNN(object):
         self._assert_in_rnn_block_("step_input")
         if not isinstance(x, Variable):
             raise TypeError(
-                "step_input() can only take a Variable as its input")
+                "step_input() can only take a Variable as its input.")
         parent_block = self._parent_block_()
         if self.lod_rank_table is None:
             self.lod_rank_table = parent_block.create_var(
@@ -1210,11 +1291,32 @@ class DynamicRNN(object):
             outputs={'Out': input_array})
         return array_read(array=input_array, i=self.step_idx)
 
+    def static_input(self, x):
+        self._assert_in_rnn_block_("static_input")
+        if not isinstance(x, Variable):
+            raise TypeError(
+                "static_input() can only take a Variable as its input")
+        if self.lod_rank_table is None:
+            raise RuntimeError(
+                "static_input() must be called after step_input().")
+        parent_block = self._parent_block_()
+        x_reordered = parent_block.create_var(
+            name=unique_name("dynamic_rnn_static_input_reordered"),
+            type=core.VarDesc.VarType.LOD_TENSOR,
+            dtype=x.dtype)
+        parent_block.append_op(
+            type='reorder_lod_tensor_by_rank',
+            inputs={'X': [x],
+                    'RankTable': [self.lod_rank_table]},
+            outputs={'Out': [x_reordered]})
+        return shrink_memory(x_reordered, self.step_idx, self.lod_rank_table)
+
     @contextlib.contextmanager
     def block(self):
         if self.status != DynamicRNN.BEFORE_RNN:
             raise ValueError("rnn.block() can only be invoke once")
-        self.step_idx = fill_constant(shape=[1], dtype='int64', value=0)
+        self.step_idx = fill_constant(
+            shape=[1], dtype='int64', value=0, force_cpu=True)
         self.step_idx.stop_gradient = False
         self.status = DynamicRNN.IN_RNN
         with self.while_op.block():
@@ -1234,27 +1336,51 @@ class DynamicRNN(object):
 
     def __call__(self, *args, **kwargs):
         if self.status != DynamicRNN.AFTER_RNN:
-            raise ValueError(
-                "Dynamic RNN outputs can only be retrieved after rnn block")
+            raise ValueError(("Output of the dynamic RNN can only be visited "
+                              "outside the rnn block."))
         if len(self.outputs) == 1:
             return self.outputs[0]
         else:
             return self.outputs
 
-    def memory(self, init=None, shape=None, value=0.0, dtype='float32'):
+    def memory(self,
+               init=None,
+               shape=None,
+               value=0.0,
+               need_reorder=False,
+               dtype='float32'):
         self._assert_in_rnn_block_('memory')
         if init is not None:
             if not isinstance(init, Variable):
                 raise TypeError(
                     "The input arg `init` of memory() must be a Variable")
             parent_block = self._parent_block_()
+            init_tensor = init
+            if need_reorder == True:
+                if self.lod_rank_table is None:
+                    raise ValueError(
+                        'If set need_reorder to True, make sure step_input be '
+                        'invoked before '
+                        'memory(init=init, need_reordered=True, ...).')
+                init_reordered = parent_block.create_var(
+                    name=unique_name('dynamic_rnn_mem_init_reordered'),
+                    type=core.VarDesc.VarType.LOD_TENSOR,
+                    dtype=init.dtype)
+                parent_block.append_op(
+                    type='reorder_lod_tensor_by_rank',
+                    inputs={
+                        'X': [init_tensor],
+                        'RankTable': [self.lod_rank_table]
+                    },
+                    outputs={'Out': [init_reordered]})
+                init_tensor = init_reordered
             mem_array = parent_block.create_var(
                 name=unique_name('dynamic_rnn_mem_array'),
                 type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
                 dtype=init.dtype)
             parent_block.append_op(
                 type='write_to_array',
-                inputs={'X': init,
+                inputs={'X': init_tensor,
                         'I': self.zero_idx},
                 outputs={'Out': mem_array})
             retv = array_read(array=mem_array, i=self.step_idx)
diff --git a/python/paddle/v2/fluid/layers/device.py b/python/paddle/v2/fluid/layers/device.py
index 775d40e5b5..ef74b2b2f0 100644
--- a/python/paddle/v2/fluid/layers/device.py
+++ b/python/paddle/v2/fluid/layers/device.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 """
 All util layers.
 """
diff --git a/python/paddle/v2/fluid/layers/io.py b/python/paddle/v2/fluid/layers/io.py
index 56c3f7b7b7..6177f0b4d7 100644
--- a/python/paddle/v2/fluid/layers/io.py
+++ b/python/paddle/v2/fluid/layers/io.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from .. import core
 from ..layer_helper import LayerHelper
 
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index b1534c5a88..808f25d6be 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 """
 All layers just related to the neural network.
 """
@@ -9,12 +22,35 @@ from ..param_attr import ParamAttr
 from tensor import concat
 
 __all__ = [
-    'fc', 'embedding', 'dynamic_lstm', 'gru_unit', 'linear_chain_crf',
-    'crf_decoding', 'cos_sim', 'cross_entropy', 'square_error_cost', 'accuracy',
-    'chunk_eval', 'sequence_conv', 'conv2d', 'sequence_pool', 'pool2d',
-    'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
-    'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
-    'sequence_first_step', 'sequence_last_step'
+    'fc',
+    'embedding',
+    'dynamic_lstm',
+    'gru_unit',
+    'linear_chain_crf',
+    'crf_decoding',
+    'cos_sim',
+    'cross_entropy',
+    'square_error_cost',
+    'accuracy',
+    'chunk_eval',
+    'sequence_conv',
+    'conv2d',
+    'sequence_pool',
+    'pool2d',
+    'batch_norm',
+    'beam_search_decode',
+    'conv2d_transpose',
+    'sequence_expand',
+    'lstm_unit',
+    'reduce_sum',
+    'reduce_mean',
+    'reduce_max',
+    'reduce_min',
+    'sequence_first_step',
+    'sequence_last_step',
+    'dropout',
+    'split',
+    'matmul',
 ]
 
 
@@ -248,13 +284,13 @@ def gru_unit(input,
             h_t & = dot((1-u_t), m_t) + dot(u_t, h_{t-1})
 
     The inputs of gru unit includes :math:`z_t`, :math:`h_{t-1}`. In terms
-    of the equation above, the :math:`z_t` is split into 3 parts - 
-    :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to 
-    implement a full GRU unit operator for an input, a fully 
+    of the equation above, the :math:`z_t` is split into 3 parts -
+    :math:`xu_t`, :math:`xr_t` and :math:`xm_t`. This means that in order to
+    implement a full GRU unit operator for an input, a fully
     connected layer has to be applied, such that :math:`z_t = W_{fc}x_t`.
 
-    The terms :math:`u_t` and :math:`r_t` represent the update and reset gates 
-    of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is 
+    The terms :math:`u_t` and :math:`r_t` represent the update and reset gates
+    of the GRU cell. Unlike LSTM, GRU has one lesser gate. However, there is
     an intermediate candidate hidden output, which is denoted by :math:`m_t`.
     This layer has three outputs :math:`h_t`, :math:`dot(r_t, h_{t-1})`
     and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
@@ -276,7 +312,7 @@ def gru_unit(input,
         .. code-block:: python
 
              # assuming we have x_t_data and prev_hidden of size=10
-             x_t = fluid.layers.fc(input=x_t_data, size=30) 
+             x_t = fluid.layers.fc(input=x_t_data, size=30)
              hidden_val, r_h_val, gate_val = fluid.layers.gru_unit(input=x_t,
                                                     hidden = prev_hidden)
 
@@ -386,6 +422,21 @@ def cos_sim(X, Y, **kwargs):
     return out
 
 
+def dropout(x, dropout_prob, is_test=False, seed=0, **kwargs):
+    helper = LayerHelper('dropout', **kwargs)
+    out = helper.create_tmp_variable(dtype=x.dtype)
+    mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True)
+    helper.append_op(
+        type='dropout',
+        inputs={'X': [x]},
+        outputs={'Out': [out],
+                 'Mask': [mask]},
+        attrs={'dropout_prob': dropout_prob,
+               'is_test': is_test,
+               'seed': seed})
+    return out
+
+
 def cross_entropy(input, label, **kwargs):
     """
     **Cross Entropy Layer**
@@ -739,7 +790,7 @@ def conv2d(input,
     pre_bias = helper.create_tmp_variable(dtype)
 
     helper.append_op(
-        type='conv2d_cudnn',
+        type='conv2d',
         inputs={
             'Input': input,
             'Filter': filter_param,
@@ -968,7 +1019,7 @@ def batch_norm(input,
         default_initializer=Constant(1.0))
 
     bias = helper.create_parameter(
-        attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=True)
+        attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
 
     mean = helper.create_global_variable(
         dtype=input.dtype,
@@ -1489,3 +1540,131 @@ def reduce_min(input, dim=None, keep_dim=False):
             'reduce_all': True if dim == None else False
         })
     return out
+
+
+def split(input, num_or_sections, dim=-1):
+    """
+    Splits the tensor into multiple sub-tensors.
+
+    Args:
+        input (Variable): The input variable which is a Tensor or LoDTensor.
+        num_or_sections (int|list): If :attr:`num_or_sections` is an integer, 
+            then the integer indicates the number of equal sized sub-tensors 
+            that the tensor will be divided into. If :attr:`num_or_sections` 
+            is a list of integers, the length of list indicates the number of 
+            sub-tensors and the integers indicate the sizes of sub-tensors' 
+            :attr:`dim` dimension orderly.
+        dim (int): The dimension along which to split. If :math:`dim < 0`, the 
+            dimension to split along is :math:`rank(input) + dim`.
+
+    Returns:
+        List: The list of segmented tensor variables.
+
+    Examples:
+        .. code-block:: python
+
+            # x is a Tensor variable with shape [3, 9, 5]:
+            x0, x1, x2 = fluid.layers.split(x, num_or_sections=3, dim=1)
+            x0.shape  # [3, 3, 5]
+            x1.shape  # [3, 3, 5]
+            x2.shape  # [3, 3, 5]
+            x0, x1, x2 = fluid.layers.split(x, num_or_sections=[2, 3, 4], dim=1)
+            x0.shape  # [3, 2, 5]
+            x1.shape  # [3, 3, 5]
+            x2.shape  # [3, 4, 5]
+    """
+    helper = LayerHelper('split', **locals())
+    input_shape = input.shape
+    dim = (len(input_shape) + dim) if dim < 0 else dim
+    if isinstance(num_or_sections, int):
+        assert num_or_sections > 1, 'num_or_sections must be more than 1.'
+        num = num_or_sections
+    else:
+        assert len(num_or_sections) < input_shape[
+            dim], 'len(num_or_sections) must not be more than input.shape[dim].'
+        num = len(num_or_sections)
+    outs = [
+        helper.create_tmp_variable(dtype=helper.input_dtype())
+        for i in range(num)
+    ]
+    helper.append_op(
+        type='split',
+        inputs={'X': input},
+        outputs={'Out': outs},
+        attrs={
+            'num': num_or_sections if isinstance(num_or_sections, int) else 0,
+            'sections': num_or_sections
+            if isinstance(num_or_sections, list) else [],
+            'axis': dim
+        })
+    return outs
+
+
+def matmul(x, y, transpose_x=False, transpose_y=False, name=None):
+    """
+    Applies matrix multipication to two tensors. Currently only rank 1 to rank 
+    3 input tensors are supported.
+
+    The actual behavior depends on the shapes of :math:`x`, :math:`y` and the 
+    flag values of :attr:`transpose_x`, :attr:`transpose_y`. Specifically:
+
+    - If a transpose flag is specified, the last two dimensions of the tensor 
+      are transposed. If the tensor is rank-1 of shape :math:`[D]`, then for 
+      :math:`x` it is treated as :math:`[1, D]` in nontransposed form and as 
+      :math:`[D, 1]` in transposed form, whereas for :math:`y` it is the 
+      opposite: It is treated as :math:`[D, 1]` in nontransposed form and as 
+      :math:`[1, D]` in transposed form.
+
+    - After transpose, the two tensors are 2-D or 3-D and matrix multipication 
+      performs in the following way.
+
+      - If both are 2-D, they are multiplied like conventional matrices.
+      - If either is 3-D, it is treated as a stack of matrices residing in the 
+        last two dimensions and a batched matrix multiply supporting broadcast 
+        applies on the two tensors.
+
+    Also note that if the raw tensor :math:`x` or :math:`y` is rank-1 and 
+    nontransposed, the prepended or appended dimension :math:`1` will be 
+    removed after matrix multipication.
+
+    Args:
+        x (Variable): The input variable which is a Tensor or LoDTensor.
+        y (Variable): The input variable which is a Tensor or LoDTensor.
+        transpose_x (bool): Whether to transpose :math:`x` before multiplication.
+        transpose_y (bool): Whether to transpose :math:`y` before multiplication.
+        name(str|None): A name for this layer(optional). If set None, the layer 
+            will be named automatically.
+
+    Returns:
+        Variable: The product Tensor variable.
+
+    Examples:
+        .. code-block:: python
+
+            # Examples to clarify shapes of the inputs and output
+            # x: [B, M, K], y: [B, K, N]
+            fluid.layers.matmul(x, y)  # out: [B, M, N]
+            # x: [B, M, K], y: [K, N]
+            fluid.layers.matmul(x, y)  # out: [B, M, N]
+            # x: [B, M, K], y: [K]
+            fluid.layers.matmul(x, y)  # out: [B, M]
+            # x: [M, K], y: [K, N]
+            fluid.layers.matmul(x, y)  # out: [M, N]
+            # x: [K], y: [K]
+            fluid.layers.matmul(x, y)  # out: [1]
+            # x: [M], y: [N]
+            fluid.layers.matmul(x, y, True, True)  # out: [M, N]
+    """
+    helper = LayerHelper('matmul', **locals())
+    assert max(
+        len(x.shape), len(y.shape)
+    ) <= 3, 'Currently only rank 1 to rank 3 input tensors are supported.'
+    out = helper.create_tmp_variable(dtype=helper.input_dtype())
+    helper.append_op(
+        type='matmul',
+        inputs={'X': x,
+                'Y': y},
+        outputs={'Out': out},
+        attrs={'transpose_X': transpose_x,
+               'transpose_Y': transpose_y})
+    return out
diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py
index 544623c4bc..73d7c89580 100644
--- a/python/paddle/v2/fluid/layers/ops.py
+++ b/python/paddle/v2/fluid/layers/ops.py
@@ -1,23 +1,52 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from ..registry import register_layer
 
 __activations__ = [
+    'sigmoid',
+    'logsigmoid',
+    'exp',
+    'relu',
+    'tanh',
+    'tanh_shrink',
+    'softshrink',
+    'sqrt',
     'abs',
     'ceil',
-    'exp',
     'floor',
-    'log',
-    'relu',
     'round',
-    'sigmoid',
-    'sqrt',
+    'reciprocal',
+    'log',
     'square',
-    'tanh',
+    'softplus',
+    'softsign',
+    'brelu',
+    'leaky_relu',
+    'soft_relu',
+    'elu',
+    'relu6',
+    'pow',
+    'stanh',
+    'hard_shrink',
+    'thresholded_relu',
+    'hard_sigmoid',
+    'swish',
 ]
 
 __all__ = [
     'mean',
     'mul',
-    'dropout',
     'reshape',
     'scale',
     'transpose',
diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py
index 5f12ecfc14..255b9d4678 100644
--- a/python/paddle/v2/fluid/layers/tensor.py
+++ b/python/paddle/v2/fluid/layers/tensor.py
@@ -1,9 +1,34 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from ..layer_helper import LayerHelper
 from ..param_attr import ParamAttr
+from ..framework import convert_np_dtype_to_dtype_
+from ..framework import Variable
+from ..core import DataType
+import numpy
 
 __all__ = [
-    'create_tensor', 'create_parameter', 'cast', 'concat', 'sums', 'assign',
-    'fill_constant_batch_size_like', 'fill_constant', 'ones', 'zeros'
+    'create_tensor',
+    'create_parameter',
+    'cast',
+    'concat',
+    'sums',
+    'assign',
+    'fill_constant_batch_size_like',
+    'fill_constant',
+    'ones',
+    'zeros',
 ]
 
 
@@ -121,7 +146,7 @@ def assign(input, output):
     This function copies the *input* Variable to the *output* Variable.
 
     Args:
-        input(Variable): The source variable
+        input(Variable|numpy.ndarray): The source variable
         output(Variable): The destination variable
 
     Returns:
@@ -134,37 +159,64 @@ def assign(input, output):
           fluid.layers.assign(hidden, out)
     """
     helper = LayerHelper('assign', **locals())
-    helper.append_op(
-        type='scale',
-        inputs={'X': [input]},
-        outputs={'Out': [output]},
-        attrs={'scale': 1.0})
+    if isinstance(input, Variable):
+        helper.append_op(
+            type='scale',
+            inputs={'X': [input]},
+            outputs={'Out': [output]},
+            attrs={'scale': 1.0})
+    elif isinstance(input, numpy.ndarray):
+        dtype = convert_np_dtype_to_dtype_(input.dtype)
+        if dtype == DataType.FP32:
+            value_name = "fp32_values"
+            values = [float(v) for v in input.flat]
+        elif dtype == DataType.INT32:
+            value_name = "int32_values"
+            values = [int(v) for v in input.flat]
+        else:
+            raise ValueError("Unsupported dtype %s", input.dtype)
+        if input.size > 1024 * 1024:
+            raise ValueError("The size of input is too big. Please consider "
+                             "saving it to file and 'load_op' to load it")
+
+        helper.append_op(
+            type='assign_value',
+            outputs={'Out': [output]},
+            attrs={
+                'dtype': dtype,
+                'shape': list(input.shape),
+                value_name: values
+            })
+    else:
+        raise ValueError("Wrong type for assign input: %s" % type(input))
+
     return output
 
 
-def fill_constant(shape, dtype, value, out=None):
+def fill_constant(shape, dtype, value, force_cpu=False, out=None):
     """
     **fill_constant**
 
-    This function creates a tensor of specified *shape* and
-    *dtype*, and initializes this with a constant supplied in *value*.
+    This function creates a tensor with specified `shape` and `dtype`, and
+    initializes it with a constant specifed by `value`.
 
-    It also sets *stop_gradient* to True.
+    The attribute `stop_gradient` of the created tensor is set to True.
 
     Args:
-        shape(tuple|list|None): Shape of output tensor
-        dtype(np.dtype|core.DataType|str): Data type of output tensor
-        value(float): Constant value to initialize the output tensor
-        out(Variable): Output Variable to initialize
+        shape(tuple|list|None): Shape of the output tensor.
+        dtype(np.dtype|core.DataType|str): Data type of the output tensor.
+        value(float): The constant value used to initialize the output tensor.
+        out(Variable): The output tensor.
 
     Returns:
-        Variable: The tensor variable storing the output
+        Variable: The tensor variable storing the output.
 
     Examples:
         .. code-block:: python
 
           data = fluid.layers.fill_constant(shape=[1], value=0, dtype='int64')
     """
+
     helper = LayerHelper("fill_constant", **locals())
     if out is None:
         out = helper.create_tmp_variable(dtype=dtype)
@@ -172,9 +224,12 @@ def fill_constant(shape, dtype, value, out=None):
         type='fill_constant',
         inputs={},
         outputs={'Out': [out]},
-        attrs={'shape': shape,
-               'dtype': out.dtype,
-               'value': float(value)})
+        attrs={
+            'shape': shape,
+            'dtype': out.dtype,
+            'value': float(value),
+            'force_cpu': force_cpu
+        })
     out.stop_gradient = True
     return out
 
diff --git a/python/paddle/v2/fluid/memory_optimization_transpiler.py b/python/paddle/v2/fluid/memory_optimization_transpiler.py
index 571fce7fac..89ffe26ed1 100644
--- a/python/paddle/v2/fluid/memory_optimization_transpiler.py
+++ b/python/paddle/v2/fluid/memory_optimization_transpiler.py
@@ -1,8 +1,32 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from collections import defaultdict
 import framework
 from framework import Program, default_main_program, Parameter, Variable
 import backward
 from backward import _rename_arg_
+from . import core
+
+dtype_to_size = {
+    core.DataType.FP16: 2,
+    core.DataType.FP32: 4,
+    core.DataType.FP64: 8,
+    core.DataType.INT16: 2,
+    core.DataType.INT32: 4,
+    core.DataType.INT64: 8,
+    core.DataType.BOOL: 1
+}
 
 
 class ControlFlowGraph(object):
@@ -28,18 +52,33 @@ class ControlFlowGraph(object):
         block_size = program_desc.num_blocks()
 
         # TODO(qijun) handle Program with if/while operators
-        self.global_block = program_desc.block(0)
-        self.op_size = self.global_block.op_size()
+        self.global_block_desc = program_desc.block(0)
+        self.op_size = self.global_block_desc.op_size()
 
         op_node_connections = [(i, i + 1) for i in range(self.op_size - 1)]
         self._add_connections(op_node_connections)
 
-        self.ops = [self.global_block.op(i) for i in range(self.op_size)]
+        self.ops = [self.global_block_desc.op(i) for i in range(self.op_size)]
 
         for i in range(self.op_size):
             self._uses[i].update(self.ops[i].input_arg_names())
             self._defs[i].update(self.ops[i].output_arg_names())
 
+    def _update_graph(self, old_name, new_name, begin_idx=0):
+        for i in range(begin_idx, self.op_size):
+            if old_name in self._uses[i]:
+                self._uses[i].remove(old_name)
+                self._uses[i].add(new_name)
+            if old_name in self._defs[i]:
+                self._defs[i].remove(old_name)
+                self._defs[i].add(new_name)
+            if old_name in self._live_in[i]:
+                self._live_in[i].remove(old_name)
+                self._live_out[i].add(new_name)
+            if old_name in self._live_out[i]:
+                self._live_out[i].remove(old_name)
+                self._live_out[i].add(new_name)
+
     def _reach_fixed_point(self, live_in, live_out):
         if len(live_in) != len(self._live_in):
             return False
@@ -79,30 +118,47 @@ class ControlFlowGraph(object):
         self.pool = []
         for i in range(self.op_size):
             if self.pool:
-                out_pair = [(x, self.global_block.var(str(x)).shape())
+                out_pair = [(x, self.global_block_desc.var(str(x)).shape())
                             for x in self._defs[i]]
                 for x, x_shape in out_pair:
-                    for index, cache_pair in enumerate(self.pool):
-                        cache_var = cache_pair[0]
-                        cache_shape = cache_pair[1]
-                        if x_shape == cache_shape:
-                            print(
-                                "Hit Cache !!!! cache pool index is %d, var name is %s, cached var name is %s, var shape is %s "
-                                % (index, x, cache_var, str(cache_shape)))
-                            self.pool.pop(index)
-                            _rename_arg_(self.ops, x, cache_var, begin_idx=i)
-                            self._dataflow_analyze()
-                            break
+                    if not self.global_block_desc.var(str(x)).persistable():
+                        for index, cache_pair in enumerate(self.pool):
+                            cache_var = cache_pair[0]
+                            cache_shape = cache_pair[1]
+                            if x_shape == cache_shape:
+                                x_dtype = self.global_block_desc.var(str(
+                                    x)).dtype()
+                                cache_dtype = self.global_block_desc.var(
+                                    str(cache_var)).dtype()
+                                # TODO(qijun): actually, we should compare dtype_to_size[x_dtype]
+                                # and dtype_to_size[cache_dtype]
+                                if x_dtype == cache_dtype:
+                                    print(
+                                        ("Hit Cache !!!! cache pool index "
+                                         "is %d, var name is %s, "
+                                         "cached var name is %s, "
+                                         "var shape is %s ") %
+                                        (index, x, cache_var, str(cache_shape)))
+                                    self.pool.pop(index)
+                                    _rename_arg_(
+                                        self.ops, x, cache_var, begin_idx=i)
+                                    self._program.current_block().var(str(
+                                        x)).desc = self.global_block_desc.var(
+                                            str(cache_var))
+                                    self._update_graph(
+                                        x, cache_var, begin_idx=i)
+                                    break
 
             in_diff, out_diff = self._get_diff(self._live_in[i],
                                                self._live_out[i])
             can_optimize = filter(
-                lambda x: not self.global_block.var(str(x)).persistable(),
+                lambda x: not self.global_block_desc.var(str(x)).persistable(),
                 in_diff)
             if can_optimize:
                 for var_name in can_optimize:
-                    self.pool.append((
-                        var_name, self.global_block.var(str(var_name)).shape()))
+                    self.pool.append(
+                        (var_name,
+                         self.global_block_desc.var(str(var_name)).shape()))
 
     def get_program(self):
         return self._program
diff --git a/python/paddle/v2/fluid/net_drawer.py b/python/paddle/v2/fluid/net_drawer.py
index 94fdd5e389..7448975b59 100644
--- a/python/paddle/v2/fluid/net_drawer.py
+++ b/python/paddle/v2/fluid/net_drawer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import argparse
 import json
 import logging
diff --git a/python/paddle/v2/fluid/nets.py b/python/paddle/v2/fluid/nets.py
index 54886a8f2c..ee6f70b899 100644
--- a/python/paddle/v2/fluid/nets.py
+++ b/python/paddle/v2/fluid/nets.py
@@ -1,6 +1,24 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import layers
 
-__all__ = ["simple_img_conv_pool", "sequence_conv_pool"]
+__all__ = [
+    "simple_img_conv_pool",
+    "sequence_conv_pool",
+    "glu",
+    "dot_product_attention",
+]
 
 
 def simple_img_conv_pool(input,
@@ -98,3 +116,90 @@ def sequence_conv_pool(input,
 
     pool_out = layers.sequence_pool(input=conv_out, pool_type=pool_type)
     return pool_out
+
+
+def glu(input, dim=-1):
+    """
+    The gated linear unit composed by split, sigmoid activation and elementwise 
+    multiplication. Specifically, Split the input into two equal sized parts 
+    :math:`a` and :math:`b` along the given dimension and then compute as 
+    following:
+
+        .. math::
+
+            {GLU}(a, b)= a \otimes \sigma(b)
+
+    Refer to `Language Modeling with Gated Convolutional Networks 
+    <https://arxiv.org/pdf/1612.08083.pdf>`_.
+    
+    Args:
+        input (Variable): The input variable which is a Tensor or LoDTensor.
+        dim (int): The dimension along which to split. If :math:`dim < 0`, the 
+            dimension to split along is :math:`rank(input) + dim`.
+
+    Returns:
+        Variable: The Tensor variable with half the size of input.
+
+    Examples:
+        .. code-block:: python
+
+            # x is a Tensor variable with shape [3, 6, 9]
+            fluid.nets.glu(input=x, dim=1)  # shape of output: [3, 3, 9]
+    """
+
+    a, b = layers.split(input, num_or_sections=2, dim=dim)
+    act_b = layers.sigmoid(x=b)
+    out = layers.elementwise_mul(x=a, y=act_b)
+    return out
+
+
+def dot_product_attention(querys, keys, values):
+    """
+    The dot-product attention.
+
+    Attention mechanism can be seen as mapping a query and a set of key-value 
+    pairs to an output. The output is computed as a weighted sum of the values, 
+    where the weight assigned to each value is computed by a compatibility 
+    function (dot-product here) of the query with the corresponding key.
+    
+    The dot-product attention can be implemented through (batch) matrix 
+    multipication as follows:
+
+        .. math::
+
+            Attention(Q, K, V)= softmax(QK^\mathrm{T})V
+
+    Refer to `Attention Is All You Need 
+    <https://arxiv.org/pdf/1706.03762.pdf>`_.
+
+    Note that batch data containing sequences with different lengths is not 
+    supported by this because of the (batch) matrix multipication.
+    
+    Args:
+        query (Variable): The input variable which is a Tensor or LoDTensor.
+        key (Variable): The input variable which is a Tensor or LoDTensor.
+        value (Variable): The input variable which is a Tensor or LoDTensor.
+
+    Returns:
+        tuple: The Tensor variables representing the output and attention scores.
+
+    Examples:
+        .. code-block:: python
+
+            # Suppose q, k, v are tensor variables with the following shape:
+            # q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10]
+            out, attn_scores = fluid.nets.dot_product_attention(q, k, v)
+            out.shape  # [3, 5, 10]
+            attn_scores.shape  # [3, 5, 6]
+    """
+    assert keys.shape[-2] == values.shape[
+        -2], 'The shapes of keys and values mismatch.'
+    assert querys.shape[-1] == keys.shape[
+        -1], 'The shapes of querys and keys mismatch.'
+    product = layers.matmul(x=querys, y=keys, transpose_y=True)
+    attn_scores = layers.reshape(
+        x=layers.reshape(
+            x=product, shape=[-1, product.shape[-1]], act='softmax'),
+        shape=product.shape)
+    out = layers.matmul(attn_scores, values)
+    return out, attn_scores
diff --git a/python/paddle/v2/fluid/op.py b/python/paddle/v2/fluid/op.py
index 5828803497..4bc0f79c64 100644
--- a/python/paddle/v2/fluid/op.py
+++ b/python/paddle/v2/fluid/op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
 
diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py
index 40721b5e97..8bd62ef0c0 100644
--- a/python/paddle/v2/fluid/optimizer.py
+++ b/python/paddle/v2/fluid/optimizer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from collections import defaultdict
 
 import framework
diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py
index ab4561b042..3af0190590 100644
--- a/python/paddle/v2/fluid/param_attr.py
+++ b/python/paddle/v2/fluid/param_attr.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from initializer import Initializer, Xavier, Constant
 from regularizer import WeightDecayRegularizer
 
diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py
index dcecd76224..f049498b9f 100644
--- a/python/paddle/v2/fluid/profiler.py
+++ b/python/paddle/v2/fluid/profiler.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 from contextlib import contextmanager
 import os
diff --git a/python/paddle/v2/fluid/registry.py b/python/paddle/v2/fluid/registry.py
index 7aa8290611..6c0c3a3518 100644
--- a/python/paddle/v2/fluid/registry.py
+++ b/python/paddle/v2/fluid/registry.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import re
 import cStringIO
 import warnings
@@ -8,7 +21,11 @@ import proto.framework_pb2 as framework_pb2
 from framework import OpProtoHolder, Variable, Program, Operator
 from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
 
-__all__ = ['deprecated', 'register_layer', 'autodoc']
+__all__ = [
+    'deprecated',
+    'register_layer',
+    'autodoc',
+]
 
 
 def _convert_(name):
@@ -80,11 +97,10 @@ def _generate_doc_string_(op_proto):
 
 
 def register_layer(op_type):
-    """
-    Register an Python layer for an Operator
+    """Register the Python layer for an Operator.
 
     Args:
-       op_type: The name of the operator to be created
+       op_type: The name of the operator to be created.
 
     This function takes in the operator type (sigmoid, mean , average etc) and
     creates the operator functionality.
@@ -98,16 +114,16 @@ def register_layer(op_type):
 
     if len(not_intermediate_outputs) != 1:
         raise ValueError("Only one non intermediate output operator can be",
-                         "automatically generated")
+                         "automatically generated.")
 
     if not_intermediate_outputs[0].duplicable:
         raise ValueError(
-            "Only non duplicable op can be automatically generated")
+            "Only non duplicable op can be automatically generated.")
 
     for output in intermediate_outputs:
         if output.duplicable:
             raise ValueError("The op can be automatically generated only when ",
-                             "all intermediate ops are not duplicable")
+                             "all intermediate ops are not duplicable.")
 
     o_name = not_intermediate_outputs[0].name
     intermediate_output_names = [output.name for output in intermediate_outputs]
diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py
index d1955b0047..e53dee98fd 100644
--- a/python/paddle/v2/fluid/regularizer.py
+++ b/python/paddle/v2/fluid/regularizer.py
@@ -1,6 +1,23 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import framework
 
-__all__ = ['append_regularization_ops', 'L1Decay', 'L2Decay']
+__all__ = [
+    'append_regularization_ops',
+    'L1Decay',
+    'L2Decay',
+]
 
 
 def append_regularization_ops(parameters_and_grads, regularization=None):
diff --git a/python/paddle/v2/fluid/tests/CMakeLists.txt b/python/paddle/v2/fluid/tests/CMakeLists.txt
index e795627bfe..9a0240cbf6 100644
--- a/python/paddle/v2/fluid/tests/CMakeLists.txt
+++ b/python/paddle/v2/fluid/tests/CMakeLists.txt
@@ -5,3 +5,4 @@ foreach(src ${TEST_OPS})
 endforeach()
 
 add_subdirectory(book)
+add_subdirectory(book_distribute)
diff --git a/python/paddle/v2/fluid/tests/__init__.py b/python/paddle/v2/fluid/tests/__init__.py
index e69de29bb2..2619c1c0e9 100644
--- a/python/paddle/v2/fluid/tests/__init__.py
+++ b/python/paddle/v2/fluid/tests/__init__.py
@@ -0,0 +1,13 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
diff --git a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
index fbf46ac6cb..904df66dc1 100644
--- a/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/v2/fluid/tests/book/test_fit_a_line.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
index 3d336ffe95..a06486aa08 100644
--- a/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
+++ b/python/paddle/v2/fluid/tests/book/test_image_classification_train.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 
 import sys
diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
index 74ca56182c..42971da0f0 100644
--- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import math
 
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/book/test_machine_translation.py b/python/paddle/v2/fluid/tests/book/test_machine_translation.py
index e79864b397..deeb6b1bad 100644
--- a/python/paddle/v2/fluid/tests/book/test_machine_translation.py
+++ b/python/paddle/v2/fluid/tests/book/test_machine_translation.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
index 35bf8da924..1d5defbed3 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_conv.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
index 51bfe2973d..02da2fcc85 100644
--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits_mlp.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py
index e3cc2a8937..47e2afcd83 100644
--- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
index f103358edc..b44d2b41e3 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
index cd28f04b85..5a139c1dcd 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
index 633de66bea..fab8a82f85 100644
--- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
+++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py
index 8b928ff9ee..3d4bbccd33 100644
--- a/python/paddle/v2/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt b/python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
new file mode 100644
index 0000000000..4d7664469e
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/CMakeLists.txt
@@ -0,0 +1,5 @@
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
new file mode 100644
index 0000000000..b886071f94
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py
@@ -0,0 +1,75 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+import os
+
+x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+
+y_predict = fluid.layers.fc(input=x, size=1, act=None)
+
+y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+
+cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+avg_cost = fluid.layers.mean(x=cost)
+
+sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+
+BATCH_SIZE = 20
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.train(), buf_size=500),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
+exe = fluid.Executor(place)
+
+t = fluid.DistributeTranspiler()
+# all parameter server endpoints list for spliting parameters
+pserver_endpoints = os.getenv("PSERVERS")
+# server endpoint for current node
+current_endpoint = os.getenv("SERVER_ENDPOINT")
+# run as trainer or parameter server
+training_role = os.getenv("TRAINING_ROLE",
+                          "TRAINER")  # get the training role: trainer/pserver
+t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
+
+if training_role == "PSERVER":
+    if not current_endpoint:
+        print("need env SERVER_ENDPOINT")
+        exit(1)
+    pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
+    exe.run(fluid.default_startup_program())
+    exe.run(pserver_prog)
+else:
+    trainer_prog = t.get_trainer_program()
+
+    exe.run(fluid.default_startup_program())
+
+    PASS_NUM = 100
+    for pass_id in range(PASS_NUM):
+        fluid.io.save_persistables(exe, "./fit_a_line.model/")
+        fluid.io.load_persistables(exe, "./fit_a_line.model/")
+        for data in train_reader():
+            avg_loss_value, = exe.run(trainer_prog,
+                                      feed=feeder.feed(data),
+                                      fetch_list=[avg_cost])
+
+            if avg_loss_value[0] < 10.0:
+                exit(0)
+exit(1)
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
new file mode 100644
index 0000000000..2b5a098ff2
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_label_semantic_roles.py
@@ -0,0 +1,238 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import math
+
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.dataset.conll05 as conll05
+import paddle.v2.fluid as fluid
+import time
+import os
+
+word_dict, verb_dict, label_dict = conll05.get_dict()
+word_dict_len = len(word_dict)
+label_dict_len = len(label_dict)
+pred_len = len(verb_dict)
+
+mark_dict_len = 2
+word_dim = 32
+mark_dim = 5
+hidden_dim = 512
+depth = 8
+mix_hidden_lr = 1e-3
+
+IS_SPARSE = True
+PASS_NUM = 10
+BATCH_SIZE = 20
+
+embedding_name = 'emb'
+
+
+def load_parameter(file_name, h, w):
+    with open(file_name, 'rb') as f:
+        f.read(16)  # skip header.
+        return np.fromfile(f, dtype=np.float32).reshape(h, w)
+
+
+def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
+            **ignored):
+    # 8 features
+    predicate_embedding = fluid.layers.embedding(
+        input=predicate,
+        size=[pred_len, word_dim],
+        dtype='float32',
+        is_sparse=IS_SPARSE,
+        param_attr='vemb')
+
+    mark_embedding = fluid.layers.embedding(
+        input=mark,
+        size=[mark_dict_len, mark_dim],
+        dtype='float32',
+        is_sparse=IS_SPARSE)
+
+    word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+    emb_layers = [
+        fluid.layers.embedding(
+            size=[word_dict_len, word_dim],
+            input=x,
+            param_attr=fluid.ParamAttr(
+                name=embedding_name, trainable=False)) for x in word_input
+    ]
+    emb_layers.append(predicate_embedding)
+    emb_layers.append(mark_embedding)
+
+    hidden_0_layers = [
+        fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
+    ]
+
+    hidden_0 = fluid.layers.sums(input=hidden_0_layers)
+
+    lstm_0 = fluid.layers.dynamic_lstm(
+        input=hidden_0,
+        size=hidden_dim,
+        candidate_activation='relu',
+        gate_activation='sigmoid',
+        cell_activation='sigmoid')
+
+    # stack L-LSTM and R-LSTM with direct edges
+    input_tmp = [hidden_0, lstm_0]
+
+    for i in range(1, depth):
+        mix_hidden = fluid.layers.sums(input=[
+            fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
+            fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
+        ])
+
+        lstm = fluid.layers.dynamic_lstm(
+            input=mix_hidden,
+            size=hidden_dim,
+            candidate_activation='relu',
+            gate_activation='sigmoid',
+            cell_activation='sigmoid',
+            is_reverse=((i % 2) == 1))
+
+        input_tmp = [mix_hidden, lstm]
+
+    feature_out = fluid.layers.sums(input=[
+        fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
+        fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
+    ])
+
+    return feature_out
+
+
+def to_lodtensor(data, place):
+    seq_lens = [len(seq) for seq in data]
+    cur_len = 0
+    lod = [cur_len]
+    for l in seq_lens:
+        cur_len += l
+        lod.append(cur_len)
+    flattened_data = np.concatenate(data, axis=0).astype("int64")
+    flattened_data = flattened_data.reshape([len(flattened_data), 1])
+    res = fluid.LoDTensor()
+    res.set(flattened_data, place)
+    res.set_lod([lod])
+    return res
+
+
+def main():
+    # define network topology
+    word = fluid.layers.data(
+        name='word_data', shape=[1], dtype='int64', lod_level=1)
+    predicate = fluid.layers.data(
+        name='verb_data', shape=[1], dtype='int64', lod_level=1)
+    ctx_n2 = fluid.layers.data(
+        name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
+    ctx_n1 = fluid.layers.data(
+        name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
+    ctx_0 = fluid.layers.data(
+        name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
+    ctx_p1 = fluid.layers.data(
+        name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
+    ctx_p2 = fluid.layers.data(
+        name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
+    mark = fluid.layers.data(
+        name='mark_data', shape=[1], dtype='int64', lod_level=1)
+    feature_out = db_lstm(**locals())
+    target = fluid.layers.data(
+        name='target', shape=[1], dtype='int64', lod_level=1)
+    crf_cost = fluid.layers.linear_chain_crf(
+        input=feature_out,
+        label=target,
+        param_attr=fluid.ParamAttr(
+            name='crfw', learning_rate=mix_hidden_lr))
+    avg_cost = fluid.layers.mean(x=crf_cost)
+
+    # TODO(qiao)
+    # check other optimizers and check why out will be NAN
+    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
+    optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
+
+    # TODO(qiao)
+    # add dependency track and move this config before optimizer
+    crf_decode = fluid.layers.crf_decoding(
+        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
+
+    chunk_evaluator = fluid.evaluator.ChunkEvaluator(
+        input=crf_decode,
+        label=target,
+        chunk_scheme="IOB",
+        num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
+
+    train_data = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.conll05.test(), buf_size=8192),
+        batch_size=BATCH_SIZE)
+    place = fluid.CPUPlace()
+    feeder = fluid.DataFeeder(
+        feed_list=[
+            word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
+        ],
+        place=place)
+    exe = fluid.Executor(place)
+
+    t = fluid.DistributeTranspiler()
+    pserver_endpoints = os.getenv("PSERVERS")
+    # server endpoint for current node
+    current_endpoint = os.getenv("SERVER_ENDPOINT")
+    # run as trainer or parameter server
+    training_role = os.getenv(
+        "TRAINING_ROLE", "TRAINER")  # get the training role: trainer/pserver
+    t.transpile(
+        optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
+
+    if training_role == "PSERVER":
+        if not current_endpoint:
+            print("need env SERVER_ENDPOINT")
+            exit(1)
+        pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
+        exe.run(fluid.default_startup_program())
+        exe.run(pserver_prog)
+    elif training_role == "TRAINER":
+        trainer_prog = t.get_trainer_program()
+        start_time = time.time()
+        batch_id = 0
+        exe.run(fluid.default_startup_program())
+        embedding_param = fluid.global_scope().find_var(
+            embedding_name).get_tensor()
+        embedding_param.set(
+            load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
+            place)
+        for pass_id in xrange(PASS_NUM):
+            chunk_evaluator.reset(exe)
+            for data in train_data():
+                cost, precision, recall, f1_score = exe.run(
+                    trainer_prog,
+                    feed=feeder.feed(data),
+                    fetch_list=[avg_cost] + chunk_evaluator.metrics)
+                pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
+                    exe)
+
+                if batch_id % 10 == 0:
+                    print("avg_cost:" + str(cost) + " precision:" + str(
+                        precision) + " recall:" + str(recall) + " f1_score:" +
+                          str(f1_score) + " pass_precision:" + str(
+                              pass_precision) + " pass_recall:" + str(
+                                  pass_recall) + " pass_f1_score:" + str(
+                                      pass_f1_score))
+                    if batch_id != 0:
+                        print("second per batch: " + str((time.time(
+                        ) - start_time) / batch_id))
+
+                batch_id = batch_id + 1
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
similarity index 85%
rename from python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py
rename to python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
index b41853784d..dc04af5b7b 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/test_dist_word2vec.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_dist_word2vec.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
index 20b4a8b34c..27512c4f78 100644
--- a/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_recognize_digits_conv_dist.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import numpy as np
 import paddle.v2 as paddle
diff --git a/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
new file mode 100644
index 0000000000..74f20f3f4c
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/notest_understand_sentiment_conv_dist.py
@@ -0,0 +1,123 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from __future__ import print_function
+import os
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+
+def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32,
+                    hid_dim=32):
+    emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim])
+    conv_3 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=3,
+        act="tanh",
+        pool_type="sqrt")
+    conv_4 = fluid.nets.sequence_conv_pool(
+        input=emb,
+        num_filters=hid_dim,
+        filter_size=4,
+        act="tanh",
+        pool_type="sqrt")
+    prediction = fluid.layers.fc(input=[conv_3, conv_4],
+                                 size=class_dim,
+                                 act="softmax")
+    cost = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002)
+    optimize_ops, params_grads = adam_optimizer.minimize(avg_cost)
+    accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
+    return avg_cost, accuracy, accuracy.metrics[0], optimize_ops, params_grads
+
+
+def to_lodtensor(data, place):
+    seq_lens = [len(seq) for seq in data]
+    cur_len = 0
+    lod = [cur_len]
+    for l in seq_lens:
+        cur_len += l
+        lod.append(cur_len)
+    flattened_data = np.concatenate(data, axis=0).astype("int64")
+    flattened_data = flattened_data.reshape([len(flattened_data), 1])
+    res = fluid.LoDTensor()
+    res.set(flattened_data, place)
+    res.set_lod([lod])
+    return res
+
+
+def main():
+    BATCH_SIZE = 100
+    PASS_NUM = 5
+
+    word_dict = paddle.dataset.imdb.word_dict()
+    dict_dim = len(word_dict)
+    class_dim = 2
+
+    data = fluid.layers.data(
+        name="words", shape=[1], dtype="int64", lod_level=1)
+    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    cost, accuracy, acc_out, optimize_ops, params_grads = convolution_net(
+        data, label, input_dim=dict_dim, class_dim=class_dim)
+
+    train_data = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.imdb.train(word_dict), buf_size=1000),
+        batch_size=BATCH_SIZE)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+
+    t = fluid.DistributeTranspiler()
+
+    # all parameter server endpoints list for spliting parameters
+    pserver_endpoints = os.getenv("PSERVERS")
+    # server endpoint for current node
+    current_endpoint = os.getenv("SERVER_ENDPOINT")
+    # run as trainer or parameter server
+    training_role = os.getenv(
+        "TRAINING_ROLE", "TRAINER")  # get the training role: trainer/pserver
+    t.transpile(
+        optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
+
+    exe.run(fluid.default_startup_program())
+
+    if training_role == "PSERVER":
+        if not current_endpoint:
+            print("need env SERVER_ENDPOINT")
+            exit(1)
+        pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
+        exe.run(pserver_prog)
+    elif training_role == "TRAINER":
+        trainer_prog = t.get_trainer_program()
+        feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
+
+        for pass_id in xrange(PASS_NUM):
+            accuracy.reset(exe)
+            for data in train_data():
+                cost_val, acc_val = exe.run(trainer_prog,
+                                            feed=feeder.feed(data),
+                                            fetch_list=[cost, acc_out])
+                pass_acc = accuracy.eval(exe)
+                print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
+                      " pass_acc=" + str(pass_acc))
+                if cost_val < 1.0 and pass_acc > 0.8:
+                    exit(0)
+    else:
+        print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/python/paddle/v2/fluid/tests/book_distribute/test_split_var.py b/python/paddle/v2/fluid/tests/book_distribute/test_split_var.py
new file mode 100644
index 0000000000..f979f642d8
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/book_distribute/test_split_var.py
@@ -0,0 +1,52 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import math
+import unittest
+from paddle.v2.fluid.distribute_transpiler import split_dense_variable
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.core as core
+import random
+
+
+class TestSplitVar(unittest.TestCase):
+    def test_check_output(self):
+        # split below shapes to 10 servers
+        shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10]]
+        expected_sizes = [
+            [15], [1024],
+            [2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 2352, 784],
+            [2040, 2040, 2040, 2040],
+            [1150, 1150, 1150, 1150, 1150, 1150, 1100]
+        ]
+        var_list = []
+        program = fluid.Program()
+        for shape in shapes:
+            var = program.global_block().create_var(
+                name=str(random.randint(10000, 99999)),
+                persistable=True,
+                # dtype=core.VarDesc.VarType.LOD_TENSOR,
+                shape=shape)
+            var_list.append(var)
+        blocks = split_dense_variable(var_list, 10)
+        all_sizes = []
+        for s in expected_sizes:
+            for s2 in s:
+                all_sizes.append(s2)
+        for i, block_str in enumerate(blocks):
+            varname, block_id, size = block_str.split(":")
+            self.assertEqual(int(size), all_sizes[i])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/decorators.py b/python/paddle/v2/fluid/tests/decorators.py
index 154619b0e9..3b314a15e1 100644
--- a/python/paddle/v2/fluid/tests/decorators.py
+++ b/python/paddle/v2/fluid/tests/decorators.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid as fluid
 
 __all__ = ['many_times', 'prog_scope']
diff --git a/python/paddle/v2/fluid/tests/demo/fc_gan.py b/python/paddle/v2/fluid/tests/demo/fc_gan.py
index cae959593e..5f9e8f9507 100644
--- a/python/paddle/v2/fluid/tests/demo/fc_gan.py
+++ b/python/paddle/v2/fluid/tests/demo/fc_gan.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import errno
 import math
 import os
diff --git a/python/paddle/v2/fluid/tests/op_test.py b/python/paddle/v2/fluid/tests/op_test.py
index b77d2b1268..c3b2220e6e 100644
--- a/python/paddle/v2/fluid/tests/op_test.py
+++ b/python/paddle/v2/fluid/tests/op_test.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import random
@@ -31,7 +44,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
             kwargs[in_name] = []
             if in_dup:
                 sub_in = inputs[in_name]
-                for sub_in_name, _ in sub_in:
+                for item in sub_in:
+                    sub_in_name, _ = item[0], item[1]
                     __create_var__(in_name, sub_in_name)
             else:
                 __create_var__(in_name, in_name)
@@ -41,7 +55,8 @@ def create_op(scope, op_type, inputs, outputs, attrs):
             kwargs[out_name] = []
             if out_dup:
                 sub_out = outputs[out_name]
-                for sub_out_name, _ in sub_out:
+                for item in sub_out:
+                    sub_out_name, _ = item[0], item[1]
                     __create_var__(out_name, sub_out_name)
             else:
                 __create_var__(out_name, out_name)
@@ -71,13 +86,15 @@ def set_input(scope, op, inputs, place):
         if in_name in inputs:
             if in_dup:
                 sub_in = inputs[in_name]
-                for sub_in_name, sub_in_val in sub_in:
+                for item in sub_in:
+                    sub_in_name, sub_in_val = item[0], item[1]
                     __set_input__(sub_in_name, sub_in_val)
             else:
                 __set_input__(in_name, inputs[in_name])
 
 
-def get_numeric_gradient(scope,
+def get_numeric_gradient(place,
+                         scope,
                          op,
                          inputs,
                          input_to_check,
@@ -85,7 +102,7 @@ def get_numeric_gradient(scope,
                          delta=0.005,
                          in_place=False):
     # FIXME: change this method by compile time concepts
-    set_input(scope, op, inputs, core.CPUPlace())
+    set_input(scope, op, inputs, place)
 
     def product(dim):
         return reduce(lambda a, b: a * b, dim, 1)
@@ -93,7 +110,7 @@ def get_numeric_gradient(scope,
     def get_output():
         sum = []
         for output_name in output_names:
-            op.run(scope, core.CPUPlace())
+            op.run(scope, place)
             sum.append(
                 np.array(scope.find_var(output_name).get_tensor()).mean())
         return np.array(sum).mean()
@@ -127,7 +144,7 @@ def get_numeric_gradient(scope,
     # we use a for loop to compute the gradient of every element.
     for i in xrange(tensor_size):
         if in_place:
-            set_input(scope, op, inputs, core.CPUPlace())
+            set_input(scope, op, inputs, place)
 
         # get one input element throw it's index i.
         origin = __get_elem__(tensor_to_check, i)
@@ -137,7 +154,7 @@ def get_numeric_gradient(scope,
         y_pos = get_output()
 
         if in_place:
-            set_input(scope, op, inputs, core.CPUPlace())
+            set_input(scope, op, inputs, place)
 
         x_neg = origin - delta
         __set_elem__(tensor_to_check, i, x_neg)
@@ -283,7 +300,8 @@ class OpTest(unittest.TestCase):
                 if not isinstance(sub_out, list):
                     raise AssertionError("sub_out type %s is not list",
                                          type(sub_out))
-                for sub_out_name, expect in sub_out:
+                for item in sub_out:
+                    sub_out_name, expect = item[0], item[1]
                     idx = find_actual(sub_out_name, fetch_list)
                     actual = outs[idx]
                     actual_t = np.array(actual)
@@ -347,6 +365,24 @@ class OpTest(unittest.TestCase):
                    in_place=False,
                    max_relative_error=0.005,
                    user_defined_grads=None):
+        places = [core.CPUPlace()]
+        if core.is_compile_gpu() and core.op_support_gpu(self.op_type):
+            places.append(core.CUDAPlace(0))
+        for place in places:
+            self.check_grad_with_place(place, inputs_to_check, output_names,
+                                       no_grad_set, numeric_grad_delta,
+                                       in_place, max_relative_error,
+                                       user_defined_grads)
+
+    def check_grad_with_place(self,
+                              place,
+                              inputs_to_check,
+                              output_names,
+                              no_grad_set=None,
+                              numeric_grad_delta=0.005,
+                              in_place=False,
+                              max_relative_error=0.005,
+                              user_defined_grads=None):
         self.scope = core.Scope()
         op_inputs = self.inputs if hasattr(self, "inputs") else dict()
         op_outputs = self.outputs if hasattr(self, "outputs") else dict()
@@ -362,6 +398,7 @@ class OpTest(unittest.TestCase):
 
         numeric_grads = user_defined_grads or [
             get_numeric_gradient(
+                place,
                 self.scope,
                 self.op,
                 self.inputs,
@@ -370,22 +407,12 @@ class OpTest(unittest.TestCase):
                 delta=numeric_grad_delta,
                 in_place=in_place) for input_to_check in inputs_to_check
         ]
-        cpu_place = core.CPUPlace()
-        cpu_analytic_grads = self._get_gradient(inputs_to_check, cpu_place,
-                                                output_names, no_grad_set)
-
-        self.__assert_is_close(numeric_grads, cpu_analytic_grads,
-                               inputs_to_check, max_relative_error,
-                               "Gradient Check On %s" % str(cpu_place))
-
-        if core.is_compile_gpu() and self.op.support_gpu():
-            gpu_place = core.CUDAPlace(0)
-            gpu_analytic_grads = self._get_gradient(inputs_to_check, gpu_place,
-                                                    output_names, no_grad_set)
-
-            self.__assert_is_close(numeric_grads, gpu_analytic_grads,
-                                   inputs_to_check, max_relative_error,
-                                   "Gradient Check On %s" % str(gpu_place))
+        analytic_grads = self._get_gradient(inputs_to_check, place,
+                                            output_names, no_grad_set)
+
+        self.__assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
+                               max_relative_error,
+                               "Gradient Check On %s" % str(place))
 
     @staticmethod
     def _create_var_descs_(block, var_dict):
diff --git a/python/paddle/v2/fluid/tests/test_accuracy_op.py b/python/paddle/v2/fluid/tests/test_accuracy_op.py
index 6f72918b71..a20abac8a0 100644
--- a/python/paddle/v2/fluid/tests/test_accuracy_op.py
+++ b/python/paddle/v2/fluid/tests/test_accuracy_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_activation_op.py b/python/paddle/v2/fluid/tests/test_activation_op.py
index 03eb7deb9a..a6a6eb9d63 100644
--- a/python/paddle/v2/fluid/tests/test_activation_op.py
+++ b/python/paddle/v2/fluid/tests/test_activation_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_adadelta_op.py b/python/paddle/v2/fluid/tests/test_adadelta_op.py
index 7105593a98..8de6a1f9a9 100644
--- a/python/paddle/v2/fluid/tests/test_adadelta_op.py
+++ b/python/paddle/v2/fluid/tests/test_adadelta_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_adagrad_op.py b/python/paddle/v2/fluid/tests/test_adagrad_op.py
index 7b2d02fbf4..30ed092d48 100644
--- a/python/paddle/v2/fluid/tests/test_adagrad_op.py
+++ b/python/paddle/v2/fluid/tests/test_adagrad_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_adam_op.py b/python/paddle/v2/fluid/tests/test_adam_op.py
index 7dbc2fa085..32d00cf702 100644
--- a/python/paddle/v2/fluid/tests/test_adam_op.py
+++ b/python/paddle/v2/fluid/tests/test_adam_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_adamax_op.py b/python/paddle/v2/fluid/tests/test_adamax_op.py
index 8e5a15aa3d..35b2bc47ed 100644
--- a/python/paddle/v2/fluid/tests/test_adamax_op.py
+++ b/python/paddle/v2/fluid/tests/test_adamax_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_array_read_write_op.py b/python/paddle/v2/fluid/tests/test_array_read_write_op.py
index 01321de8ea..8775cd4f9f 100644
--- a/python/paddle/v2/fluid/tests/test_array_read_write_op.py
+++ b/python/paddle/v2/fluid/tests/test_array_read_write_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 import paddle.v2.fluid.layers as layers
diff --git a/python/paddle/v2/fluid/tests/test_assign_op.py b/python/paddle/v2/fluid/tests/test_assign_op.py
index 1b0c145f1a..4ac173c96b 100644
--- a/python/paddle/v2/fluid/tests/test_assign_op.py
+++ b/python/paddle/v2/fluid/tests/test_assign_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import op_test
 import numpy
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_assign_value_op.py b/python/paddle/v2/fluid/tests/test_assign_value_op.py
new file mode 100644
index 0000000000..f4e2ff9bde
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_assign_value_op.py
@@ -0,0 +1,53 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.layers as layers
+import op_test
+import numpy
+import unittest
+import paddle.v2.fluid.framework as framework
+
+
+class TestAssignValueOp(op_test.OpTest):
+    def setUp(self):
+        self.op_type = "assign_value"
+        x = numpy.random.random(size=(2, 5)).astype(numpy.float32)
+        self.inputs = {}
+        self.outputs = {'Out': x}
+        self.attrs = {
+            'shape': x.shape,
+            'dtype': framework.convert_np_dtype_to_dtype_(x.dtype),
+            'fp32_values': [float(v) for v in x.flat]
+        }
+
+    def test_forward(self):
+        self.check_output()
+
+    def test_assign(self):
+        val = (
+            -100 + 200 * numpy.random.random(size=(2, 5))).astype(numpy.int32)
+        x = layers.create_tensor(dtype="float32")
+        layers.assign(input=val, output=x)
+        exe = fluid.Executor(fluid.CPUPlace())
+        fetched_x = exe.run(fluid.default_main_program(),
+                            feed={},
+                            fetch_list=[x])[0]
+        self.assertTrue(
+            numpy.array_equal(fetched_x, val),
+            "fetch_x=%s val=%s" % (fetched_x, val))
+        self.assertEqual(fetched_x.dtype, val.dtype)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_auc_op.py b/python/paddle/v2/fluid/tests/test_auc_op.py
index 26ea905d88..aa74d224d5 100644
--- a/python/paddle/v2/fluid/tests/test_auc_op.py
+++ b/python/paddle/v2/fluid/tests/test_auc_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_batch_norm_op.py b/python/paddle/v2/fluid/tests/test_batch_norm_op.py
index ac9418549f..fe82b7d7f3 100644
--- a/python/paddle/v2/fluid/tests/test_batch_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_batch_norm_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py
index f329214dce..9ef6e08cc1 100644
--- a/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py
+++ b/python/paddle/v2/fluid/tests/test_beam_search_decode_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_beam_search_op.py b/python/paddle/v2/fluid/tests/test_beam_search_op.py
index 319a7e49e3..f31c737ba6 100644
--- a/python/paddle/v2/fluid/tests/test_beam_search_op.py
+++ b/python/paddle/v2/fluid/tests/test_beam_search_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import logging
 from paddle.v2.fluid.op import Operator, DynamicRecurrentOp
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py b/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py
index 080ca43b82..aed1bf4d3a 100644
--- a/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py
+++ b/python/paddle/v2/fluid/tests/test_bilinear_tensor_product_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_calc_gradient.py b/python/paddle/v2/fluid/tests/test_calc_gradient.py
index c34c8ff6d1..b99eeb09cd 100644
--- a/python/paddle/v2/fluid/tests/test_calc_gradient.py
+++ b/python/paddle/v2/fluid/tests/test_calc_gradient.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/test_cast_op.py b/python/paddle/v2/fluid/tests/test_cast_op.py
index 4e431bb88d..3795b96dbf 100644
--- a/python/paddle/v2/fluid/tests/test_cast_op.py
+++ b/python/paddle/v2/fluid/tests/test_cast_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import op_test
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_chunk_eval_op.py b/python/paddle/v2/fluid/tests/test_chunk_eval_op.py
index 53bf6f815b..59ef2bbb2f 100644
--- a/python/paddle/v2/fluid/tests/test_chunk_eval_op.py
+++ b/python/paddle/v2/fluid/tests/test_chunk_eval_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_clip.py b/python/paddle/v2/fluid/tests/test_clip.py
new file mode 100644
index 0000000000..63353a1096
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_clip.py
@@ -0,0 +1,80 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from __future__ import print_function
+import numpy as np
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+
+BATCH_SIZE = 128
+CLIP_MAX = 2e-6
+CLIP_MIN = -1e-6
+
+prog = fluid.framework.Program()
+
+with fluid.program_guard(main_program=prog):
+    image = fluid.layers.data(name='x', shape=[784], dtype='float32')
+
+    hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
+    hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
+    predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+
+    label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+
+prog_clip = prog.clone()
+prog_clip.block(0).var(hidden1.name).set_error_clip(
+    fluid.clip.ErrorClipByValue(
+        max=CLIP_MAX, min=CLIP_MIN))
+
+avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
+fluid.backward.append_backward(loss=avg_cost)
+fluid.backward.append_backward(
+    loss=avg_cost_clip, callback=fluid.clip.error_clip_callback)
+
+hidden1_grad = prog.block(0).var(hidden1.name + "@GRAD")
+hidden1_grad_clip = prog_clip.block(0).var(hidden1.name + "@GRAD")
+
+hidden2_grad = prog.block(0).var(hidden2.name + "@GRAD")
+hidden2_grad_clip = prog_clip.block(0).var(hidden2.name + "@GRAD")
+
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.mnist.train(), buf_size=8192),
+    batch_size=BATCH_SIZE)
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
+exe.run(fluid.default_startup_program())
+
+count = 0
+for data in train_reader():
+    count += 1
+    if count > 5:
+        break
+    out1, out2 = exe.run(prog,
+                         feed=feeder.feed(data),
+                         fetch_list=[hidden1_grad, hidden2_grad])
+    out1_clip, out2_clip = exe.run(
+        prog_clip,
+        feed=feeder.feed(data),
+        fetch_list=[hidden1_grad_clip, hidden2_grad_clip])
+    if not ((out1.clip(
+            min=CLIP_MIN, max=CLIP_MAX) == out1_clip).all() and
+            (out2 == out2_clip).all()):
+        exit(1)
+
+exit(0)
diff --git a/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py b/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py
index 02f6108a3a..5147e75046 100644
--- a/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_clip_by_norm_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_clip_op.py b/python/paddle/v2/fluid/tests/test_clip_op.py
index a7e1bf1744..3338dc61b3 100644
--- a/python/paddle/v2/fluid/tests/test_clip_op.py
+++ b/python/paddle/v2/fluid/tests/test_clip_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_compare_op.py b/python/paddle/v2/fluid/tests/test_compare_op.py
index 5d0dfab6ff..fbf8921e40 100644
--- a/python/paddle/v2/fluid/tests/test_compare_op.py
+++ b/python/paddle/v2/fluid/tests/test_compare_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import op_test
 import unittest
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_concat_op.py b/python/paddle/v2/fluid/tests/test_concat_op.py
index a792d1c106..3e413e1540 100644
--- a/python/paddle/v2/fluid/tests/test_concat_op.py
+++ b/python/paddle/v2/fluid/tests/test_concat_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_cond_op.py b/python/paddle/v2/fluid/tests/test_cond_op.py
index 32e54084e4..5312fa51a2 100644
--- a/python/paddle/v2/fluid/tests/test_cond_op.py
+++ b/python/paddle/v2/fluid/tests/test_cond_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import logging
 import paddle.v2.fluid.core as core
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_conditional_block.py b/python/paddle/v2/fluid/tests/test_conditional_block.py
index 7d815123f3..965e7d39c8 100644
--- a/python/paddle/v2/fluid/tests/test_conditional_block.py
+++ b/python/paddle/v2/fluid/tests/test_conditional_block.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.layers as layers
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_const_value.py b/python/paddle/v2/fluid/tests/test_const_value.py
index f8c17c2c98..190bfa779b 100644
--- a/python/paddle/v2/fluid/tests/test_const_value.py
+++ b/python/paddle/v2/fluid/tests/test_const_value.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.framework as framework
 
diff --git a/python/paddle/v2/fluid/tests/test_conv2d_op.py b/python/paddle/v2/fluid/tests/test_conv2d_op.py
index 958300e655..8b03a3ae16 100644
--- a/python/paddle/v2/fluid/tests/test_conv2d_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv2d_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 
@@ -49,7 +62,7 @@ def conv2d_forward_naive(input, filter, group, conv_param):
 
 class TestConv2dOp(OpTest):
     def setUp(self):
-        core.use_cuda()
+        self.use_cudnn = False
         self.init_op_type()
         self.init_group()
         self.init_dilation()
@@ -70,30 +83,59 @@ class TestConv2dOp(OpTest):
             'strides': self.stride,
             'paddings': self.pad,
             'groups': self.groups,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn
         }
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.02)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
 
     def init_test_case(self):
         self.pad = [0, 0]
@@ -167,39 +209,39 @@ class TestWithDilation(TestConv2dOp):
         self.groups = 3
 
 
-#----------------Conv2dCudnn----------------
-class TestCudnn(TestConv2dOp):
+#----------------Conv2dCUDNN----------------
+class TestCUDNN(TestConv2dOp):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWithPad(TestWithPad):
+class TestCUDNNWithPad(TestWithPad):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWithStride(TestWithStride):
+class TestCUDNNWithStride(TestWithStride):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWithGroup(TestWithGroup):
+class TestCUDNNWithGroup(TestWithGroup):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
-class TestCudnnWith1x1(TestWith1x1):
+class TestCUDNNWith1x1(TestWith1x1):
     def init_op_type(self):
-        core.use_cudnn()
-        self.op_type = "conv2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d"
 
 
 #  cudnn v5 does not support dilation conv.
-# class TestCudnnWithDilation(TestWithDilation):
+# class TestCUDNNWithDilation(TestWithDilation):
 #     def init_op_type(self):
 #         self.op_type = "conv_cudnn"
 
diff --git a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
index d59537b924..b7b86c58fb 100644
--- a/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv2d_transpose_op.py
@@ -1,5 +1,20 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -37,6 +52,7 @@ def conv2dtranspose_forward_naive(input_, filter_, attrs):
 class TestConv2dTransposeOp(OpTest):
     def setUp(self):
         # init as conv transpose
+        self.use_cudnn = False
         self.init_op_type()
         self.init_test_case()
 
@@ -47,7 +63,9 @@ class TestConv2dTransposeOp(OpTest):
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         output = conv2dtranspose_forward_naive(input_, filter_,
@@ -56,25 +74,53 @@ class TestConv2dTransposeOp(OpTest):
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Input']))
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.02,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.02)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
 
     def init_test_case(self):
         self.pad = [0, 0]
@@ -119,12 +165,13 @@ class TestWithDilation(TestConv2dTransposeOp):
 
 
 # ------------ test_cudnn ------------
-class TestCudnn(TestConv2dTransposeOp):
+class TestCUDNN(TestConv2dTransposeOp):
     def init_op_type(self):
-        self.op_type = "conv2d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
 
 
-class TestCudnnWithPad(TestWithPad):
+class TestCUDNNWithPad(TestWithPad):
     def init_test_case(self):
         self.pad = [1, 1]
         self.stride = [1, 1]
@@ -134,10 +181,11 @@ class TestCudnnWithPad(TestWithPad):
         self.filter_size = [f_c, 6, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv2d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
 
 
-class TestCudnnWithStride(TestWithStride):
+class TestCUDNNWithStride(TestWithStride):
     def init_test_case(self):
         self.pad = [1, 1]
         self.stride = [2, 2]
@@ -147,11 +195,12 @@ class TestCudnnWithStride(TestWithStride):
         self.filter_size = [f_c, 6, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv2d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
 
 
 # #cudnn v5 does not support dilation conv.
-# class TestCudnnWithDilation(TestWithDilation):
+# class TestCUDNNWithDilation(TestWithDilation):
 #     def init_test_case(self):
 #         self.pad = [1, 1]
 #         self.stride = [2, 2]
@@ -161,7 +210,7 @@ class TestCudnnWithStride(TestWithStride):
 #         self.filter_size = [f_c, 6, 3, 3]
 #
 #     def init_op_type(self):
-#         self.op_type = "conv2d_transpose_cudnn"
+#         self.op_type = "conv2d_transpose"
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_conv3d_op.py b/python/paddle/v2/fluid/tests/test_conv3d_op.py
index 8593dff20b..5b0397cc69 100644
--- a/python/paddle/v2/fluid/tests/test_conv3d_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv3d_op.py
@@ -1,5 +1,20 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -54,6 +69,7 @@ def conv3d_forward_naive(input, filter, group, conv_param):
 
 class TestConv3dOp(OpTest):
     def setUp(self):
+        self.use_cudnn = False
         self.init_group()
         self.init_op_type()
         self.init_dilation()
@@ -62,7 +78,9 @@ class TestConv3dOp(OpTest):
         conv3d_param = {
             'stride': self.stride,
             'pad': self.pad,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
         input = np.random.random(self.input_size).astype("float32")
         filter = np.random.random(self.filter_size).astype("float32")
@@ -79,25 +97,53 @@ class TestConv3dOp(OpTest):
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.03)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.03,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.03,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
 
     def init_test_case(self):
         self.pad = [0, 0, 0]
@@ -169,31 +215,35 @@ class TestWithDilation(TestConv3dOp):
         self.groups = 3
 
 
-class TestCudnn(TestConv3dOp):
+class TestCUDNN(TestConv3dOp):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
-class TestWithGroup1Cudnn(TestWithGroup1):
+class TestWithGroup1CUDNN(TestWithGroup1):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
-class TestWithGroup2Cudnn(TestWithGroup2):
+class TestWithGroup2CUDNN(TestWithGroup2):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
-class TestWith1x1Cudnn(TestWith1x1):
+class TestWith1x1CUDNN(TestWith1x1):
     def init_op_type(self):
-        self.op_type = "conv3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d"
 
 
 # FIXME(typhoonzero): find a way to determine if
 # using cudnn > 6 in python
-# class TestWithDilationCudnn(TestWithDilation):
+# class TestWithDilationCUDNN(TestWithDilation):
 #     def init_op_type(self):
-#         self.op_type = "conv3d_cudnn"
+#         self.op_type = "conv3d"
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
index a353f9b4d4..b08969062a 100644
--- a/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv3d_transpose_op.py
@@ -1,5 +1,20 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -44,6 +59,7 @@ def conv3dtranspose_forward_naive(input_, filter_, attrs):
 class TestConv3dTransposeOp(OpTest):
     def setUp(self):
         # init as conv transpose
+        self.use_cudnn = False
         self.init_op_type()
         self.init_test_case()
 
@@ -54,7 +70,9 @@ class TestConv3dTransposeOp(OpTest):
         self.attrs = {
             'strides': self.stride,
             'paddings': self.pad,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         output = conv3dtranspose_forward_naive(input_, filter_,
@@ -63,25 +81,53 @@ class TestConv3dTransposeOp(OpTest):
         self.outputs = {'Output': output}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(
-            set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place,
+                set(['Input', 'Filter']),
+                'Output',
+                max_relative_error=0.03)
+        else:
+            self.check_grad(
+                set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
 
     def test_check_grad_no_filter(self):
-        self.check_grad(
-            ['Input'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Filter']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
+        else:
+            self.check_grad(
+                ['Input'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Filter']))
 
     def test_check_grad_no_input(self):
-        self.check_grad(
-            ['Filter'],
-            'Output',
-            max_relative_error=0.02,
-            no_grad_set=set(['Input']))
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
+        else:
+            self.check_grad(
+                ['Filter'],
+                'Output',
+                max_relative_error=0.03,
+                no_grad_set=set(['Input']))
 
     def init_test_case(self):
         self.pad = [0, 0, 0]
@@ -126,12 +172,13 @@ class TestWithDilation(TestConv3dTransposeOp):
 
 
 # ------------ test_cudnn ------------
-class TestCudnn(TestConv3dTransposeOp):
+class TestCUDNN(TestConv3dTransposeOp):
     def init_op_type(self):
-        self.op_type = "conv3d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d_transpose"
 
 
-class TestCudnnWithPad(TestWithPad):
+class TestCUDNNWithPad(TestWithPad):
     def init_test_case(self):
         self.pad = [1, 1, 1]
         self.stride = [1, 1, 1]
@@ -141,10 +188,11 @@ class TestCudnnWithPad(TestWithPad):
         self.filter_size = [f_c, 6, 3, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv3d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d_transpose"
 
 
-class TestCudnnWithStride(TestWithStride):
+class TestCUDNNWithStride(TestWithStride):
     def init_test_case(self):
         self.pad = [1, 1, 1]
         self.stride = [2, 2, 2]
@@ -154,11 +202,12 @@ class TestCudnnWithStride(TestWithStride):
         self.filter_size = [f_c, 6, 3, 3, 3]
 
     def init_op_type(self):
-        self.op_type = "conv3d_transpose_cudnn"
+        self.use_cudnn = True
+        self.op_type = "conv3d_transpose"
 
 
 # #cudnn v5 does not support dilation conv.
-# class TestCudnnWithDilation(TestWithDilation):
+# class TestCUDNNWithDilation(TestWithDilation):
 #     def init_test_case(self):
 #         self.pad = [1, 1, 1]
 #         self.stride = [2, 2, 2]
@@ -168,7 +217,7 @@ class TestCudnnWithStride(TestWithStride):
 #         self.filter_size = [f_c, 6, 3, 3, 3]
 #
 #     def init_op_type(self):
-#         self.op_type = "conv3d_transpose_cudnn"
+#         self.op_type = "conv3d_transpose"
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_conv_shift_op.py b/python/paddle/v2/fluid/tests/test_conv_shift_op.py
index b9ab21a06a..14b2640e24 100644
--- a/python/paddle/v2/fluid/tests/test_conv_shift_op.py
+++ b/python/paddle/v2/fluid/tests/test_conv_shift_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_cos_sim_op.py b/python/paddle/v2/fluid/tests/test_cos_sim_op.py
index 47557ccb41..f6e5e2cbe9 100644
--- a/python/paddle/v2/fluid/tests/test_cos_sim_op.py
+++ b/python/paddle/v2/fluid/tests/test_cos_sim_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_create_op_doc_string.py b/python/paddle/v2/fluid/tests/test_create_op_doc_string.py
index 42b6f7a361..6c92264221 100644
--- a/python/paddle/v2/fluid/tests/test_create_op_doc_string.py
+++ b/python/paddle/v2/fluid/tests/test_create_op_doc_string.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.layers as layers
 
diff --git a/python/paddle/v2/fluid/tests/test_crf_decoding_op.py b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py
index ab573da31d..40e80a824a 100644
--- a/python/paddle/v2/fluid/tests/test_crf_decoding_op.py
+++ b/python/paddle/v2/fluid/tests/test_crf_decoding_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import random
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_crop_op.py b/python/paddle/v2/fluid/tests/test_crop_op.py
index 62c883bdc1..a0b2fc954d 100644
--- a/python/paddle/v2/fluid/tests/test_crop_op.py
+++ b/python/paddle/v2/fluid/tests/test_crop_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_cross_entropy_op.py b/python/paddle/v2/fluid/tests/test_cross_entropy_op.py
index b81af9364d..f05e6b2356 100644
--- a/python/paddle/v2/fluid/tests/test_cross_entropy_op.py
+++ b/python/paddle/v2/fluid/tests/test_cross_entropy_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest, randomize_probability
diff --git a/python/paddle/v2/fluid/tests/test_data_feeder.py b/python/paddle/v2/fluid/tests/test_data_feeder.py
index 4549693203..5574766f8f 100644
--- a/python/paddle/v2/fluid/tests/test_data_feeder.py
+++ b/python/paddle/v2/fluid/tests/test_data_feeder.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid as fluid
 
 
diff --git a/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py b/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py
index 674c3fda5c..5e745a2843 100644
--- a/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py
+++ b/python/paddle/v2/fluid/tests/test_decayed_adagrad_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_default_scope_funcs.py b/python/paddle/v2/fluid/tests/test_default_scope_funcs.py
index 738e69529e..7a62168be9 100644
--- a/python/paddle/v2/fluid/tests/test_default_scope_funcs.py
+++ b/python/paddle/v2/fluid/tests/test_default_scope_funcs.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.v2.fluid.default_scope_funcs import *
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_detection_output_op.py b/python/paddle/v2/fluid/tests/test_detection_output_op.py
index 080a9743b0..147a43628c 100644
--- a/python/paddle/v2/fluid/tests/test_detection_output_op.py
+++ b/python/paddle/v2/fluid/tests/test_detection_output_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py
index 2483200212..f401050dcc 100644
--- a/python/paddle/v2/fluid/tests/test_dropout_op.py
+++ b/python/paddle/v2/fluid/tests/test_dropout_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_dyn_rnn.py b/python/paddle/v2/fluid/tests/test_dyn_rnn.py
index 8090c5f478..a946fea58d 100644
--- a/python/paddle/v2/fluid/tests/test_dyn_rnn.py
+++ b/python/paddle/v2/fluid/tests/test_dyn_rnn.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid as fluid
 import paddle.v2 as paddle
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
index c02c59284e..95cc80739d 100644
--- a/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
+++ b/python/paddle/v2/fluid/tests/test_dynrnn_gradient_check.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy
 import random
 import collections
@@ -197,7 +210,24 @@ class BaseRNN(object):
         return numpy.array([o.mean() for o in outs.itervalues()]).mean()
 
 
-class TestSimpleMul(unittest.TestCase):
+class SeedFixedTestCase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Fix random seeds to remove randomness from tests"""
+        cls._np_rand_state = numpy.random.get_state()
+        cls._py_rand_state = random.getstate()
+
+        numpy.random.seed(123)
+        random.seed(124)
+
+    @classmethod
+    def tearDownClass(cls):
+        """Restore random seeds"""
+        numpy.random.set_state(cls._np_rand_state)
+        random.setstate(cls._py_rand_state)
+
+
+class TestSimpleMul(SeedFixedTestCase):
     DATA_NAME = 'X'
     DATA_WIDTH = 32
     PARAM_NAME = 'W'
@@ -263,7 +293,7 @@ class TestSimpleMul(unittest.TestCase):
         self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.05))
 
 
-class TestSimpleMulWithMemory(unittest.TestCase):
+class TestSimpleMulWithMemory(SeedFixedTestCase):
     DATA_WIDTH = 32
     HIDDEN_WIDTH = 20
     DATA_NAME = 'X'
diff --git a/python/paddle/v2/fluid/tests/test_dynrnn_static_input.py b/python/paddle/v2/fluid/tests/test_dynrnn_static_input.py
new file mode 100644
index 0000000000..d6878f0b6d
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_dynrnn_static_input.py
@@ -0,0 +1,205 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import unittest
+import paddle.v2 as paddle
+import paddle.v2.fluid.core as core
+import paddle.v2.fluid as fluid
+from paddle.v2.fluid.backward import append_backward
+import paddle.v2.fluid.framework as framework
+from paddle.v2.fluid.framework import Program, switch_main_program
+import bisect
+import numpy as np
+
+fluid.default_startup_program().random_seed = 1
+
+
+class TestDyRnnStaticInput(unittest.TestCase):
+    def setUp(self):
+        self._delta = 0.005
+        self._max_sequence_len = 3
+        self._program = Program()
+        switch_main_program(self._program)
+        self.output_dim = 10
+        self.place = core.CPUPlace()
+        self.prepare_x_tensor()
+        self.prepare_static_input_tensor()
+        self.exe = fluid.Executor(self.place)
+
+    def prepare_x_tensor(self):
+        self.x_tensor_dim = 10
+        lod = [[0, 2, 3, 6]]
+        shape = [lod[0][-1], self.x_tensor_dim]
+        self.x_tensor_data = np.random.random(shape).astype('float32')
+        self.x_tensor = core.LoDTensor()
+        self.x_tensor.set_lod(lod)
+        self.x_tensor.set(self.x_tensor_data, self.place)
+
+    def prepare_static_input_tensor(self):
+        self.static_input_tensor_dim = 4
+        lod = [[0, 1, 3, 6]]
+        shape = [lod[0][-1], self.static_input_tensor_dim]
+        self.static_input_data = np.random.random(shape).astype('float32')
+        self.static_input_tensor = core.LoDTensor()
+        self.static_input_tensor.set_lod(lod)
+        self.static_input_tensor.set(self.static_input_data, self.place)
+
+    def fetch_value(self, var):
+        fetch_outs = self.exe.run(feed={
+            'x_tensor': self.x_tensor,
+            'static_input_tensor': self.static_input_tensor
+        },
+                                  fetch_list=[var],
+                                  return_numpy=False)
+        return self._lodtensor_to_ndarray(fetch_outs[0])
+
+    def _lodtensor_to_ndarray(self, lod_tensor):
+        dims = lod_tensor.get_dims()
+        ndarray = np.zeros(shape=dims).astype('float32')
+        for i in xrange(np.product(dims)):
+            ndarray.ravel()[i] = lod_tensor.get_float_element(i)
+        return ndarray, lod_tensor.lod()
+
+    def build_graph(self, only_forward=False):
+        x_tensor = fluid.layers.data(
+            name='x_tensor',
+            shape=[self.x_tensor_dim],
+            dtype='float32',
+            lod_level=1)
+        x_tensor.stop_gradient = False
+
+        static_input_tensor = fluid.layers.data(
+            name='static_input_tensor',
+            shape=[self.static_input_tensor_dim],
+            dtype='float32',
+            lod_level=1)
+        static_input_tensor.stop_gradient = False
+
+        if only_forward:
+            static_input_out_array = self._program.global_block().create_var(
+                name='static_input_out_array',
+                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
+                dtype='float32')
+            static_input_out_array.stop_gradient = True
+
+        rnn = fluid.layers.DynamicRNN()
+        with rnn.block():
+            step_x = rnn.step_input(x_tensor)
+            step_static_input = rnn.static_input(static_input_tensor)
+            if only_forward:
+                fluid.layers.array_write(
+                    x=step_static_input,
+                    i=rnn.step_idx,
+                    array=static_input_out_array)
+            last = fluid.layers.sequence_pool(
+                input=step_static_input, pool_type='last')
+            projected = fluid.layers.fc(input=[step_x, last],
+                                        size=self.output_dim)
+            rnn.output(projected)
+
+        if only_forward:
+            static_input_step_outs = []
+            step_idx = fluid.layers.fill_constant(
+                shape=[1], dtype='int64', value=0)
+            step_idx.stop_gradient = True
+
+            for i in xrange(self._max_sequence_len):
+                step_out = fluid.layers.array_read(static_input_out_array,
+                                                   step_idx)
+                step_out.stop_gradient = True
+                static_input_step_outs.append(step_out)
+                fluid.layers.increment(x=step_idx, value=1.0, in_place=True)
+
+        if only_forward:
+            return static_input_step_outs
+
+        last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
+        loss = fluid.layers.mean(x=last)
+        append_backward(loss)
+        static_input_grad = self._program.global_block().var(
+            framework.grad_var_name('static_input_tensor'))
+        return static_input_grad, loss
+
+    def get_seq_len_from_lod(self, lod):
+        return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)]
+
+    def get_expected_static_step_outs(self):
+        x_lod = self.x_tensor.lod()
+        x_seq_len = self.get_seq_len_from_lod(x_lod)
+        x_seq_len_sorted = sorted(x_seq_len)
+        x_sorted_indices = np.argsort(x_seq_len)[::-1]
+
+        static_lod = self.static_input_tensor.lod()
+        static_sliced = [
+            self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]]
+            for i in xrange(len(static_lod[0]) - 1)
+        ]
+        static_seq_len = self.get_seq_len_from_lod(static_lod)
+        static_reordered = []
+        for i in xrange(len(x_sorted_indices)):
+            static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
+        static_seq_len_reordered = [
+            static_seq_len[x_sorted_indices[i]]
+            for i in xrange(len(x_sorted_indices))
+        ]
+
+        static_step_outs = []
+        static_step_lods = []
+
+        for i in xrange(self._max_sequence_len):
+            end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
+            lod = [0]
+            for i in xrange(end):
+                lod.append(static_seq_len_reordered[i] + lod[-1])
+            static_step_lods.append([lod])
+            end = lod[-1]
+            static_step_outs.append(
+                np.array(static_reordered[:end]).astype('float32'))
+
+        return static_step_outs, static_step_lods
+
+    def test_step_out(self):
+        static_step_outs = self.build_graph(only_forward=True)
+        self.exe.run(framework.default_startup_program())
+        expected_outs, expected_lods = self.get_expected_static_step_outs()
+        for i in xrange(self._max_sequence_len):
+            step_out, lod = self.fetch_value(static_step_outs[i])
+            self.assertTrue(np.allclose(step_out, expected_outs[i]))
+            self.assertTrue(np.allclose(lod, expected_lods[i]))
+
+    def test_network_gradient(self):
+        static_input_grad, loss = self.build_graph()
+        self.exe.run(framework.default_startup_program())
+
+        actual_gradients, actual_lod = self.fetch_value(static_input_grad)
+
+        static_input_shape = self.static_input_tensor.get_dims()
+        numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
+        # calculate numeric gradients
+        tensor_size = np.product(static_input_shape)
+        for i in xrange(tensor_size):
+            origin = self.static_input_tensor.get_float_element(i)
+            x_pos = origin + self._delta
+            self.static_input_tensor.set_float_element(i, x_pos)
+            y_pos = self.fetch_value(loss)[0][0]
+            x_neg = origin - self._delta
+            self.static_input_tensor.set_float_element(i, x_neg)
+            y_neg = self.fetch_value(loss)[0][0]
+            self.static_input_tensor.set_float_element(i, origin)
+            numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
+        self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
+        self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod()))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_edit_distance_op.py b/python/paddle/v2/fluid/tests/test_edit_distance_op.py
new file mode 100644
index 0000000000..38e87728b3
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_edit_distance_op.py
@@ -0,0 +1,94 @@
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+def Levenshtein(hyp, ref):
+    """ Compute the Levenshtein distance between two strings.
+
+    :param hyp: hypothesis string in index
+    :type hyp: list
+    :param ref: reference string in index
+    :type ref: list
+    """
+    m = len(hyp)
+    n = len(ref)
+    if m == 0:
+        return n
+    if n == 0:
+        return m
+
+    dist = np.zeros((m + 1, n + 1)).astype("float32")
+    for i in range(0, m + 1):
+        dist[i][0] = i
+    for j in range(0, n + 1):
+        dist[0][j] = j
+
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            cost = 0 if hyp[i - 1] == ref[j - 1] else 1
+            deletion = dist[i - 1][j] + 1
+            insertion = dist[i][j - 1] + 1
+            substitution = dist[i - 1][j - 1] + cost
+            dist[i][j] = min(deletion, insertion, substitution)
+    return dist[m][n]
+
+
+class TestEditDistanceOp(OpTest):
+    def setUp(self):
+        self.op_type = "edit_distance"
+        normalized = False
+        x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int32")
+        x2 = np.array([[0, 12, 4, 7, 8]]).astype("int32")
+        x1 = np.transpose(x1)
+        x2 = np.transpose(x2)
+        x1_lod = [0, 1, 5]
+        x2_lod = [0, 3, 4]
+
+        num_strs = len(x1_lod) - 1
+        distance = np.zeros((num_strs, 1)).astype("float32")
+        for i in range(0, num_strs):
+            distance[i] = Levenshtein(
+                hyp=x1[x1_lod[i]:x1_lod[i + 1]],
+                ref=x2[x2_lod[i]:x2_lod[i + 1]])
+            if normalized is True:
+                len_ref = x2_lod[i + 1] - x2_lod[i]
+                distance[i] = distance[i] / len_ref
+        self.attrs = {'normalized': normalized}
+        self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
+        self.outputs = {'Out': distance}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+class TestEditDistanceOpNormalized(OpTest):
+    def setUp(self):
+        self.op_type = "edit_distance"
+        normalized = True
+        x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int32")
+        x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int32")
+        x1 = np.transpose(x1)
+        x2 = np.transpose(x2)
+        x1_lod = [0, 1, 3, 6]
+        x2_lod = [0, 2, 3, 5]
+
+        num_strs = len(x1_lod) - 1
+        distance = np.zeros((num_strs, 1)).astype("float32")
+        for i in range(0, num_strs):
+            distance[i] = Levenshtein(
+                hyp=x1[x1_lod[i]:x1_lod[i + 1]],
+                ref=x2[x2_lod[i]:x2_lod[i + 1]])
+            if normalized is True:
+                len_ref = x2_lod[i + 1] - x2_lod[i]
+                distance[i] = distance[i] / len_ref
+        self.attrs = {'normalized': normalized}
+        self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
+        self.outputs = {'Out': distance}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_add_op.py b/python/paddle/v2/fluid/tests/test_elementwise_add_op.py
index 57daddd569..1e88231877 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_add_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_add_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_div_op.py b/python/paddle/v2/fluid/tests/test_elementwise_div_op.py
index 41cb2b7767..fbabc79be2 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_div_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_div_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py b/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py
index 261ca9cb3d..ef3a829abc 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_mul_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py b/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py
index be982e8c57..db24db7b30 100644
--- a/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py
+++ b/python/paddle/v2/fluid/tests/test_elementwise_sub_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_exception.py b/python/paddle/v2/fluid/tests/test_exception.py
index b871f40c4a..98c4cbe3f2 100644
--- a/python/paddle/v2/fluid/tests/test_exception.py
+++ b/python/paddle/v2/fluid/tests/test_exception.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_executor_and_mul.py b/python/paddle/v2/fluid/tests/test_executor_and_mul.py
index b1ef87c5cb..e8baf631e5 100644
--- a/python/paddle/v2/fluid/tests/test_executor_and_mul.py
+++ b/python/paddle/v2/fluid/tests/test_executor_and_mul.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_expand_op.py b/python/paddle/v2/fluid/tests/test_expand_op.py
index 0440f7a2bb..0524f2041f 100644
--- a/python/paddle/v2/fluid/tests/test_expand_op.py
+++ b/python/paddle/v2/fluid/tests/test_expand_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_feed_fetch_method.py b/python/paddle/v2/fluid/tests/test_feed_fetch_method.py
index 178c85b0dd..718311517d 100644
--- a/python/paddle/v2/fluid/tests/test_feed_fetch_method.py
+++ b/python/paddle/v2/fluid/tests/test_feed_fetch_method.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py b/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py
index 99de6b5d05..0adc487c04 100644
--- a/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_constant_batch_size_like_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_fill_constant_op.py b/python/paddle/v2/fluid/tests/test_fill_constant_op.py
index dff7b615aa..50d4ccb3bd 100644
--- a/python/paddle/v2/fluid/tests/test_fill_constant_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_constant_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_fill_op.py b/python/paddle/v2/fluid/tests/test_fill_op.py
index 88337598c8..42b06ec87c 100644
--- a/python/paddle/v2/fluid/tests/test_fill_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py
index cd91769a22..a28bed9697 100644
--- a/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py
+++ b/python/paddle/v2/fluid/tests/test_fill_zeros_like_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_framework_debug_str.py b/python/paddle/v2/fluid/tests/test_framework_debug_str.py
index a4cbabdb36..6c82e67220 100644
--- a/python/paddle/v2/fluid/tests/test_framework_debug_str.py
+++ b/python/paddle/v2/fluid/tests/test_framework_debug_str.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 from paddle.v2.fluid.framework import Program
 
diff --git a/python/paddle/v2/fluid/tests/test_ftrl_op.py b/python/paddle/v2/fluid/tests/test_ftrl_op.py
index f77ac4659a..599233efd9 100644
--- a/python/paddle/v2/fluid/tests/test_ftrl_op.py
+++ b/python/paddle/v2/fluid/tests/test_ftrl_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_gather_op.py b/python/paddle/v2/fluid/tests/test_gather_op.py
index b0ab429ef1..95093f9b84 100644
--- a/python/paddle/v2/fluid/tests/test_gather_op.py
+++ b/python/paddle/v2/fluid/tests/test_gather_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py
index 6f6a60ccb3..bf4785211e 100644
--- a/python/paddle/v2/fluid/tests/test_gaussian_random_op.py
+++ b/python/paddle/v2/fluid/tests/test_gaussian_random_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy
 
diff --git a/python/paddle/v2/fluid/tests/test_get_places_op.py b/python/paddle/v2/fluid/tests/test_get_places_op.py
index c4346f6786..b44011fb76 100644
--- a/python/paddle/v2/fluid/tests/test_get_places_op.py
+++ b/python/paddle/v2/fluid/tests/test_get_places_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid as fluid
 import decorators
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_gru_op.py b/python/paddle/v2/fluid/tests/test_gru_op.py
index fa2c5a53ec..a6647d1bf2 100644
--- a/python/paddle/v2/fluid/tests/test_gru_op.py
+++ b/python/paddle/v2/fluid/tests/test_gru_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import math
diff --git a/python/paddle/v2/fluid/tests/test_gru_unit_op.py b/python/paddle/v2/fluid/tests/test_gru_unit_op.py
index 501d5aa579..53f10c32c7 100644
--- a/python/paddle/v2/fluid/tests/test_gru_unit_op.py
+++ b/python/paddle/v2/fluid/tests/test_gru_unit_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import math
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_hinge_loss_op.py b/python/paddle/v2/fluid/tests/test_hinge_loss_op.py
index a8757a891f..dc7774d01c 100644
--- a/python/paddle/v2/fluid/tests/test_hinge_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_hinge_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_huber_loss_op.py b/python/paddle/v2/fluid/tests/test_huber_loss_op.py
index a24fcbec6c..18a48bb18c 100644
--- a/python/paddle/v2/fluid/tests/test_huber_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_huber_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_image_classification_layer.py b/python/paddle/v2/fluid/tests/test_image_classification_layer.py
index b621d1525e..9d676e8759 100644
--- a/python/paddle/v2/fluid/tests/test_image_classification_layer.py
+++ b/python/paddle/v2/fluid/tests/test_image_classification_layer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/test_infer_shape.py b/python/paddle/v2/fluid/tests/test_infer_shape.py
index 9f6695ce02..0c2a6f1423 100644
--- a/python/paddle/v2/fluid/tests/test_infer_shape.py
+++ b/python/paddle/v2/fluid/tests/test_infer_shape.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_inference_model_io.py b/python/paddle/v2/fluid/tests/test_inference_model_io.py
index 71ca3e6c10..c5cad2166b 100644
--- a/python/paddle/v2/fluid/tests/test_inference_model_io.py
+++ b/python/paddle/v2/fluid/tests/test_inference_model_io.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_initializer.py b/python/paddle/v2/fluid/tests/test_initializer.py
index 3175010f48..fa3c2afeed 100644
--- a/python/paddle/v2/fluid/tests/test_initializer.py
+++ b/python/paddle/v2/fluid/tests/test_initializer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_is_empty_op.py b/python/paddle/v2/fluid/tests/test_is_empty_op.py
index 0a4dd0f4fa..d6876a885f 100644
--- a/python/paddle/v2/fluid/tests/test_is_empty_op.py
+++ b/python/paddle/v2/fluid/tests/test_is_empty_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from paddle.v2.fluid.op import Operator
diff --git a/python/paddle/v2/fluid/tests/test_l1_norm_op.py b/python/paddle/v2/fluid/tests/test_l1_norm_op.py
index 3a1d1689fe..92484c49f0 100644
--- a/python/paddle/v2/fluid/tests/test_l1_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_l1_norm_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import unittest
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py
index a56277d216..a4e155b534 100644
--- a/python/paddle/v2/fluid/tests/test_layers.py
+++ b/python/paddle/v2/fluid/tests/test_layers.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
index c26634ff20..cd917dff7f 100644
--- a/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
+++ b/python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import random
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_lod_array_length_op.py b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py
index 8a4be545ed..f80136cb0d 100644
--- a/python/paddle/v2/fluid/tests/test_lod_array_length_op.py
+++ b/python/paddle/v2/fluid/tests/test_lod_array_length_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/fluid/tests/test_lod_rank_table.py b/python/paddle/v2/fluid/tests/test_lod_rank_table.py
index 30d619fe31..673605d79c 100644
--- a/python/paddle/v2/fluid/tests/test_lod_rank_table.py
+++ b/python/paddle/v2/fluid/tests/test_lod_rank_table.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from paddle.v2.fluid.layers import lod_rank_table, data
 from paddle.v2.fluid.executor import Executor
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_lod_reset_op.py b/python/paddle/v2/fluid/tests/test_lod_reset_op.py
index 652ccecfa4..d799dbfa21 100644
--- a/python/paddle/v2/fluid/tests/test_lod_reset_op.py
+++ b/python/paddle/v2/fluid/tests/test_lod_reset_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array.py
index d6d3e23fd8..c593b1e061 100644
--- a/python/paddle/v2/fluid/tests/test_lod_tensor_array.py
+++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py
index c552cb033f..5887f9799a 100644
--- a/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py
+++ b/python/paddle/v2/fluid/tests/test_lod_tensor_array_ops.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_log_loss_op.py b/python/paddle/v2/fluid/tests/test_log_loss_op.py
index 2eeaa90758..fde99bfaa1 100644
--- a/python/paddle/v2/fluid/tests/test_log_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_log_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_logical_op.py b/python/paddle/v2/fluid/tests/test_logical_op.py
index ac90bf839c..8c9e8de739 100644
--- a/python/paddle/v2/fluid/tests/test_logical_op.py
+++ b/python/paddle/v2/fluid/tests/test_logical_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import op_test
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_lookup_table_op.py b/python/paddle/v2/fluid/tests/test_lookup_table_op.py
index a56a549e69..1ff6b305bc 100644
--- a/python/paddle/v2/fluid/tests/test_lookup_table_op.py
+++ b/python/paddle/v2/fluid/tests/test_lookup_table_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lrn_op.py b/python/paddle/v2/fluid/tests/test_lrn_op.py
index 9abb09e53a..051704617e 100644
--- a/python/paddle/v2/fluid/tests/test_lrn_op.py
+++ b/python/paddle/v2/fluid/tests/test_lrn_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lstm_op.py b/python/paddle/v2/fluid/tests/test_lstm_op.py
index 77f062e8c8..76ea8def7c 100644
--- a/python/paddle/v2/fluid/tests/test_lstm_op.py
+++ b/python/paddle/v2/fluid/tests/test_lstm_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_lstm_unit_op.py b/python/paddle/v2/fluid/tests/test_lstm_unit_op.py
index 6bad2e1f7c..c97c1e72aa 100644
--- a/python/paddle/v2/fluid/tests/test_lstm_unit_op.py
+++ b/python/paddle/v2/fluid/tests/test_lstm_unit_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py b/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py
index 63378cbc4e..3d8c1d19f9 100644
--- a/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_margin_rank_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_matmul_op.py b/python/paddle/v2/fluid/tests/test_matmul_op.py
index d51572c8ab..f7dc4e0532 100644
--- a/python/paddle/v2/fluid/tests/test_matmul_op.py
+++ b/python/paddle/v2/fluid/tests/test_matmul_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
@@ -83,18 +96,18 @@ class Generator(object):
         self.outputs = {'Out': Out}
 
     def test_check_output(self):
-        self.check_output(atol=1e-2)
+        self.check_output(atol=1e-3)
 
     def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.5)
+        self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3)
 
     def test_check_grad_ignore_x(self):
         self.check_grad(
-            ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X"))
+            ['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X"))
 
     def test_check_grad_ignore_y(self):
         self.check_grad(
-            ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y'))
+            ['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y'))
 
 
 # Generate test cases for all possibilities
diff --git a/python/paddle/v2/fluid/tests/test_maxout_op.py b/python/paddle/v2/fluid/tests/test_maxout_op.py
index 5fbed43e25..ed8c0d2b67 100644
--- a/python/paddle/v2/fluid/tests/test_maxout_op.py
+++ b/python/paddle/v2/fluid/tests/test_maxout_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_mean_op.py b/python/paddle/v2/fluid/tests/test_mean_op.py
index 7823abd8f8..f9d7d6921e 100644
--- a/python/paddle/v2/fluid/tests/test_mean_op.py
+++ b/python/paddle/v2/fluid/tests/test_mean_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py b/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py
index 5cce75ddb8..76f3c4eb64 100644
--- a/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py
+++ b/python/paddle/v2/fluid/tests/test_memory_optimization_transpiler.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_minus_op.py b/python/paddle/v2/fluid/tests/test_minus_op.py
index c56d7cb548..99c0d9056a 100644
--- a/python/paddle/v2/fluid/tests/test_minus_op.py
+++ b/python/paddle/v2/fluid/tests/test_minus_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
index 33558c6105..18e3991b94 100644
--- a/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
+++ b/python/paddle/v2/fluid/tests/test_mnist_if_else_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.framework import Program, program_guard, default_main_program, default_startup_program
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py b/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py
index 33de8ff721..40955283e6 100644
--- a/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_modified_huber_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_momentum_op.py b/python/paddle/v2/fluid/tests/test_momentum_op.py
index 638095f756..8008a5586f 100644
--- a/python/paddle/v2/fluid/tests/test_momentum_op.py
+++ b/python/paddle/v2/fluid/tests/test_momentum_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_mul_op.py b/python/paddle/v2/fluid/tests/test_mul_op.py
index 57d6d7e7e0..3033b8ef70 100644
--- a/python/paddle/v2/fluid/tests/test_mul_op.py
+++ b/python/paddle/v2/fluid/tests/test_mul_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_multiplex_op.py b/python/paddle/v2/fluid/tests/test_multiplex_op.py
index 5937eb5aa4..5746ab391e 100644
--- a/python/paddle/v2/fluid/tests/test_multiplex_op.py
+++ b/python/paddle/v2/fluid/tests/test_multiplex_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_nce.py b/python/paddle/v2/fluid/tests/test_nce.py
index 8aeba69769..ce66a7c6b3 100644
--- a/python/paddle/v2/fluid/tests/test_nce.py
+++ b/python/paddle/v2/fluid/tests/test_nce.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_net.py b/python/paddle/v2/fluid/tests/test_net.py
index d9fe55a8af..cc78cb4a56 100644
--- a/python/paddle/v2/fluid/tests/test_net.py
+++ b/python/paddle/v2/fluid/tests/test_net.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 from paddle.v2.fluid.op import Operator
 import unittest
diff --git a/python/paddle/v2/fluid/tests/test_norm_op.py b/python/paddle/v2/fluid/tests/test_norm_op.py
index 7d56320489..b053522d72 100644
--- a/python/paddle/v2/fluid/tests/test_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_norm_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_op_support_gpu.py b/python/paddle/v2/fluid/tests/test_op_support_gpu.py
index a0eb4bd5fd..741686a874 100644
--- a/python/paddle/v2/fluid/tests/test_op_support_gpu.py
+++ b/python/paddle/v2/fluid/tests/test_op_support_gpu.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 
diff --git a/python/paddle/v2/fluid/tests/test_operator.py b/python/paddle/v2/fluid/tests/test_operator.py
index c059a2b88b..e75ee41149 100644
--- a/python/paddle/v2/fluid/tests/test_operator.py
+++ b/python/paddle/v2/fluid/tests/test_operator.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid.op as op
diff --git a/python/paddle/v2/fluid/tests/test_operator_desc.py b/python/paddle/v2/fluid/tests/test_operator_desc.py
index ce34d95ac8..ed18fafe33 100644
--- a/python/paddle/v2/fluid/tests/test_operator_desc.py
+++ b/python/paddle/v2/fluid/tests/test_operator_desc.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py
index 1eadb7d912..dbec3a5944 100644
--- a/python/paddle/v2/fluid/tests/test_optimizer.py
+++ b/python/paddle/v2/fluid/tests/test_optimizer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid.framework as framework
diff --git a/python/paddle/v2/fluid/tests/test_pad_op.py b/python/paddle/v2/fluid/tests/test_pad_op.py
index 55f1774e57..1036b6bcad 100644
--- a/python/paddle/v2/fluid/tests/test_pad_op.py
+++ b/python/paddle/v2/fluid/tests/test_pad_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_parallel_op.py b/python/paddle/v2/fluid/tests/test_parallel_op.py
index 59ed041e7f..45196ef6fe 100644
--- a/python/paddle/v2/fluid/tests/test_parallel_op.py
+++ b/python/paddle/v2/fluid/tests/test_parallel_op.py
@@ -1,45 +1,177 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
-import paddle.v2.fluid.layers as layers
 import paddle.v2.fluid as fluid
-from paddle.v2.fluid.framework import Program
-from paddle.v2.fluid.executor import Executor
-from paddle.v2.fluid.backward import append_backward
-import numpy as np
-import paddle.v2.fluid.core as core
-
-
-class ParallelOpTest(unittest.TestCase):
-    def setUp(self):
-        x = layers.data(
-            shape=[-1, 30, 40],
-            dtype='float32',
-            name='x',
-            append_batch_size=False,
-            stop_gradient=False)
-
-        places = layers.get_places(device_count=4)
-        pd = layers.ParallelDo(places=places)
-
-        with pd.do():
-            data = pd.read_input(x)
-            hidden = layers.fc(input=data, size=7)
-            pd.write_output(hidden)
-        data = pd()
-        loss = layers.mean(x=data)
-        sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
-        sgd_optimizer.minimize(loss)
-
-        exe = fluid.Executor(fluid.CPUPlace())
-        exe.run(fluid.default_startup_program())
-        exe.run(fluid.default_main_program(),
-                feed={
-                    x.name: np.random.uniform(0.1, 0.6,
-                                              (20, 30, 40)).astype("float32")
-                })
-
-    def test_forward(self):
-        pass
+import numpy
+import sys
+# TODO(dzhwinter): get places op check need to be enhanced.
+sys.exit(0)
+
+
+class BaseParallelForTest(unittest.TestCase):
+    def run_test(self, callback, feed, fetch):
+        """
+        Run the unittest for parallel.for
+        Args:
+            callback(callable): A callable function returns a generator. There 
+                are two yields in the generator function. The first yield 
+                returns the data layers, and the second yield returns the loss. 
+                The modified data variables will be sent back during the first 
+                yield.
+
+            feed(dict): The executor feeding dictionary.
+            fetch(list|basestr): The fetch name lists. 
+
+        Returns:
+            None
+
+        Raises:
+            AssertionError when the computation of cpu, parallel.for in cpu, 
+                gpu, parallel.for in gpu are different.
+
+        """
+        cpu = fluid.CPUPlace()
+        result_cpu = self._run_test_impl_(
+            callback=callback,
+            feed=feed,
+            fetch=fetch,
+            place=cpu,
+            use_parallel=False)
+        result_cpu_parallel = self._run_test_impl_(
+            callback=callback,
+            feed=feed,
+            fetch=fetch,
+            place=cpu,
+            use_parallel=True)
+        if fluid.core.is_compile_gpu():
+            gpu = fluid.CUDAPlace(0)
+            result_gpu = self._run_test_impl_(
+                callback=callback,
+                feed=feed,
+                fetch=fetch,
+                place=gpu,
+                use_parallel=False)
+            result_gpu_parallel = self._run_test_impl_(
+                callback=callback,
+                feed=feed,
+                fetch=fetch,
+                place=gpu,
+                use_parallel=True)
+            self._assert_same_(fetch, result_cpu, result_cpu_parallel,
+                               result_gpu, result_gpu_parallel)
+        else:
+            self._assert_same_(fetch, result_cpu, result_cpu_parallel)
+
+    def _run_test_impl_(self, callback, feed, fetch, place, use_parallel=False):
+        """
+        Run a single test, returns the fetch values
+        Args:
+            place(Place): the computation place. 
+            use_parallel(bool): Whether use parallel.for or not. 
+
+        Returns:
+            Fetched numpy arrays.
+
+        """
+        if isinstance(fetch, basestring):
+            fetch = [fetch]
+        main = fluid.Program()
+        startup = fluid.Program()
+        # Fix seed
+        main.random_seed = 10
+        startup.random_seed = 10
+
+        with fluid.program_guard(main, startup):
+            generator = callback()
+            # Automatically insert parallel do if use_parallel = True
+            if use_parallel:
+                places = fluid.layers.get_places()
+                pd = fluid.layers.ParallelDo(places)
+                data = next(generator)
+
+                if isinstance(data, fluid.Variable):
+                    data = [data]
+
+                with pd.do():
+                    ins = map(pd.read_input, data)
+                    if len(ins) == 1:
+                        ins = ins[0]
+                    loss = generator.send(ins)  # patch input
+                    pd.write_output(loss)
+
+                loss = pd()
+            else:
+                data = next(generator)
+                loss = generator.send(data)
+            self.assertIsNotNone(loss)
+            avg_loss = fluid.layers.mean(x=loss)
+            fluid.backward.append_backward(loss=avg_loss)
+
+        exe = fluid.Executor(place)
+        exe.run(startup)
+        return exe.run(main, feed=feed, fetch_list=fetch)
+
+    def _assert_same_(self, fetch, *args):
+        """
+        Assert the return values of `run_test` are same.
+        Args:
+            fetch: Fetch list. Used for print error message
+            *args: The fetch result lists of each situations.
+
+        Returns:
+            None
+            
+        Raises:
+            AssertionError
+
+        """
+
+        def _impl_(a, b, fetch_id, item_id):
+            item_str = ['CPU', 'ParallelCPU', 'GPU', 'ParallelGPU']
+            flag = numpy.allclose(a, b, rtol=0.1)
+            self.assertTrue(flag, "The {0} are different in {1}".format(
+                fetch[fetch_id], item_str[item_id]))
+
+        for i, items in enumerate(zip(*args)):
+            self.assertGreater(len(items), 0)
+            for j in range(1, len(items)):
+                _impl_(items[0], items[j], fetch_id=i, item_id=j)
+
+
+class ParallelOpTest(BaseParallelForTest):
+    @staticmethod
+    def __network__():
+        x = fluid.layers.data(shape=[784], dtype='float32', name='img')
+        x = yield x
+        hidden = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
+        loss = fluid.layers.mean(x=hidden)
+        yield loss
+
+    def test_simple_fc(self):
+        self.run_test(
+            callback=ParallelOpTest.__network__,
+            feed={
+                'img': numpy.random.random(size=(51, 784)).astype('float32')
+            },
+            fetch='fc1.w@GRAD')
+
+    def test_fc_with_tiny_data(self):
+        self.run_test(
+            callback=ParallelOpTest.__network__,
+            feed={'img': numpy.random.random(size=(1, 784)).astype('float32')},
+            fetch='fc1.w@GRAD')
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_parameter.py b/python/paddle/v2/fluid/tests/test_parameter.py
index 694344acbb..e0db318345 100644
--- a/python/paddle/v2/fluid/tests/test_parameter.py
+++ b/python/paddle/v2/fluid/tests/test_parameter.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 from paddle.v2.fluid.framework import default_main_program
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_pool2d_op.py b/python/paddle/v2/fluid/tests/test_pool2d_op.py
index 5dff6270f4..ac8b24e7ad 100644
--- a/python/paddle/v2/fluid/tests/test_pool2d_op.py
+++ b/python/paddle/v2/fluid/tests/test_pool2d_op.py
@@ -1,5 +1,20 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -44,6 +59,7 @@ def avg_pool2D_forward_naive(x, ksize, strides, paddings, global_pool=0):
 
 class TestPool2d_Op(OpTest):
     def setUp(self):
+        self.use_cudnn = False
         self.init_test_case()
         self.init_global_pool()
         self.init_op_type()
@@ -62,15 +78,25 @@ class TestPool2d_Op(OpTest):
             'ksize': self.ksize,
             'pooling_type': self.pool_type,
             'global_pooling': self.global_pool,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         self.outputs = {'Out': output.astype('float32')}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        if self.pool_type != "max":
+        if self.use_cudnn and self.pool_type != "max":
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, set(['X']), 'Out', max_relative_error=0.07)
+        elif self.pool_type != "max":
             self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
 
     def init_test_case(self):
@@ -153,35 +179,41 @@ class TestCase5(TestCase2):
         self.pool2D_forward_naive = max_pool2D_forward_naive
 
 
-#--------------------test pool2d_cudnn--------------------
-class TestCudnnCase1(TestPool2d_Op):
+#--------------------test pool2d--------------------
+class TestCUDNNCase1(TestPool2d_Op):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase2(TestCase1):
+class TestCUDNNCase2(TestCase1):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase3(TestCase2):
+class TestCUDNNCase3(TestCase2):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase4(TestCase3):
+class TestCUDNNCase4(TestCase3):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase5(TestCase4):
+class TestCUDNNCase5(TestCase4):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
-class TestCudnnCase6(TestCase5):
+class TestCUDNNCase6(TestCase5):
     def init_op_type(self):
-        self.op_type = "pool2d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool2d"
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_pool3d_op.py b/python/paddle/v2/fluid/tests/test_pool3d_op.py
index 2ba86665a7..54b8df8465 100644
--- a/python/paddle/v2/fluid/tests/test_pool3d_op.py
+++ b/python/paddle/v2/fluid/tests/test_pool3d_op.py
@@ -1,5 +1,20 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
+
+import paddle.v2.fluid.core as core
 from op_test import OpTest
 
 
@@ -52,6 +67,7 @@ def avg_pool3D_forward_naive(x, ksize, strides, paddings, global_pool=0):
 
 class TestPool3d_Op(OpTest):
     def setUp(self):
+        self.use_cudnn = False
         self.init_test_case()
         self.init_global_pool()
         self.init_op_type()
@@ -71,15 +87,25 @@ class TestPool3d_Op(OpTest):
             'ksize': self.ksize,
             'pooling_type': self.pool_type,
             'global_pooling': self.global_pool,
+            'use_cudnn': self.use_cudnn,
+            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
         }
 
         self.outputs = {'Out': output.astype('float32')}
 
     def test_check_output(self):
-        self.check_output()
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-5)
+        else:
+            self.check_output()
 
     def test_check_grad(self):
-        if self.pool_type != "max":
+        if self.use_cudnn and self.pool_type != "max":
+            place = core.CUDAPlace(0)
+            self.check_grad_with_place(
+                place, set(['X']), 'Out', max_relative_error=0.07)
+        elif self.pool_type != "max":
             self.check_grad(set(['X']), 'Out', max_relative_error=0.07)
 
     def init_test_case(self):
@@ -163,35 +189,41 @@ class TestCase5(TestCase2):
         self.pool3D_forward_naive = max_pool3D_forward_naive
 
 
-#--------------------test pool3d_cudnn--------------------
-class TestCudnnCase1(TestPool3d_Op):
+#--------------------test pool3d--------------------
+class TestCUDNNCase1(TestPool3d_Op):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase2(TestCase1):
+class TestCUDNNCase2(TestCase1):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase3(TestCase2):
+class TestCUDNNCase3(TestCase2):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase4(TestCase3):
+class TestCUDNNCase4(TestCase3):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase5(TestCase4):
+class TestCUDNNCase5(TestCase4):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
-class TestCudnnCase6(TestCase5):
+class TestCUDNNCase6(TestCase5):
     def init_op_type(self):
-        self.op_type = "pool3d_cudnn"
+        self.use_cudnn = True
+        self.op_type = "pool3d"
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/v2/fluid/tests/test_pool_max_op.py b/python/paddle/v2/fluid/tests/test_pool_max_op.py
index 9d2d61c438..c4ec0e50cc 100644
--- a/python/paddle/v2/fluid/tests/test_pool_max_op.py
+++ b/python/paddle/v2/fluid/tests/test_pool_max_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py b/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py
index f6a6c428a2..b75f7152ef 100644
--- a/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py
+++ b/python/paddle/v2/fluid/tests/test_positive_negative_pair_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import itertools
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_precision_recall_op.py b/python/paddle/v2/fluid/tests/test_precision_recall_op.py
index d3dbdb6e2a..87c7fcb4b5 100644
--- a/python/paddle/v2/fluid/tests/test_precision_recall_op.py
+++ b/python/paddle/v2/fluid/tests/test_precision_recall_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_prelu_op.py b/python/paddle/v2/fluid/tests/test_prelu_op.py
index 7be932ac8f..38bd260bc9 100644
--- a/python/paddle/v2/fluid/tests/test_prelu_op.py
+++ b/python/paddle/v2/fluid/tests/test_prelu_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_print_op.py b/python/paddle/v2/fluid/tests/test_print_op.py
new file mode 100644
index 0000000000..4e42863af4
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_print_op.py
@@ -0,0 +1,68 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import unittest
+import paddle.v2.fluid.core as core
+from paddle.v2.fluid.executor import Executor
+import paddle.v2.fluid.layers as layers
+from paddle.v2.fluid.backward import append_backward
+from paddle.v2.fluid.framework import switch_main_program
+from paddle.v2.fluid.framework import Program
+import numpy as np
+
+
+class TestPrintOpCPU(unittest.TestCase):
+    def setUp(self):
+        self.place = core.CPUPlace()
+        self.x_tensor = core.LoDTensor()
+        tensor_np = np.random.random(size=(2, 3)).astype('float32')
+        self.x_tensor.set(tensor_np, self.place)
+        self.x_tensor.set_lod([[0, 1, 1]])
+
+    def build_network(self, only_forward, **kargs):
+        x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
+        x.stop_gradient = False
+        printed = layers.Print(input=x, **kargs)
+        if only_forward: return printed
+        loss = layers.mean(x=printed)
+        append_backward(loss=loss)
+        return loss
+
+    def test_forward(self):
+        switch_main_program(Program())
+        printed = self.build_network(True, print_phase='forward')
+        exe = Executor(self.place)
+        outs = exe.run(feed={'x': self.x_tensor},
+                       fetch_list=[printed],
+                       return_numpy=False)
+
+    def test_backward(self):
+        switch_main_program(Program())
+        loss = self.build_network(False, print_phase='backward')
+        exe = Executor(self.place)
+        outs = exe.run(feed={'x': self.x_tensor},
+                       fetch_list=[loss],
+                       return_numpy=False)
+
+
+class TestPrintOpGPU(TestPrintOpCPU):
+    def setUp(self):
+        self.place = core.CUDAPlace(0)
+        self.x_tensor = core.LoDTensor()
+        tensor_np = np.random.random(size=(2, 3)).astype('float32')
+        self.x_tensor.set(tensor_np, self.place)
+        self.x_tensor.set_lod([[0, 1, 1]])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_profiler.py b/python/paddle/v2/fluid/tests/test_profiler.py
index e3f3ac58ef..4b439a16aa 100644
--- a/python/paddle/v2/fluid/tests/test_profiler.py
+++ b/python/paddle/v2/fluid/tests/test_profiler.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import paddle.v2.fluid as fluid
diff --git a/python/paddle/v2/fluid/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py
index 447c746aac..bcaeede93e 100644
--- a/python/paddle/v2/fluid/tests/test_program.py
+++ b/python/paddle/v2/fluid/tests/test_program.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from __future__ import print_function
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_protobuf.py b/python/paddle/v2/fluid/tests/test_protobuf.py
index e064374176..5f0646d036 100644
--- a/python/paddle/v2/fluid/tests/test_protobuf.py
+++ b/python/paddle/v2/fluid/tests/test_protobuf.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.proto.framework_pb2 as framework_pb2
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_protobuf_descs.py b/python/paddle/v2/fluid/tests/test_protobuf_descs.py
index d8abe17606..24638dc0e8 100644
--- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py
+++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 
diff --git a/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py b/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py
index f89a493ab7..c197d850f9 100644
--- a/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py
+++ b/python/paddle/v2/fluid/tests/test_proximal_adagrad_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_proximal_gd_op.py b/python/paddle/v2/fluid/tests/test_proximal_gd_op.py
index 9ca79ce6b3..1545255825 100644
--- a/python/paddle/v2/fluid/tests/test_proximal_gd_op.py
+++ b/python/paddle/v2/fluid/tests/test_proximal_gd_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_rank_loss_op.py b/python/paddle/v2/fluid/tests/test_rank_loss_op.py
index 0e41ab1b3f..b4ba7920cd 100644
--- a/python/paddle/v2/fluid/tests/test_rank_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_rank_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py
index 84f4e36fa7..bcc3457aa3 100644
--- a/python/paddle/v2/fluid/tests/test_recurrent_op.py
+++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid.layers as layers
diff --git a/python/paddle/v2/fluid/tests/test_reduce_op.py b/python/paddle/v2/fluid/tests/test_reduce_op.py
index a021d4dd91..57ee307ba6 100644
--- a/python/paddle/v2/fluid/tests/test_reduce_op.py
+++ b/python/paddle/v2/fluid/tests/test_reduce_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_registry.py b/python/paddle/v2/fluid/tests/test_registry.py
index f8328f31cf..dba1189630 100644
--- a/python/paddle/v2/fluid/tests/test_registry.py
+++ b/python/paddle/v2/fluid/tests/test_registry.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import warnings
 
diff --git a/python/paddle/v2/fluid/tests/test_regularizer.py b/python/paddle/v2/fluid/tests/test_regularizer.py
index 890c881a12..9eaae1904a 100644
--- a/python/paddle/v2/fluid/tests/test_regularizer.py
+++ b/python/paddle/v2/fluid/tests/test_regularizer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 import paddle.v2.fluid.framework as framework
diff --git a/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py b/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py
index 8b79d448e2..0bcdfafcf4 100644
--- a/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py
+++ b/python/paddle/v2/fluid/tests/test_reorder_lod_tensor.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid as fluid
 import paddle.v2.fluid.core as core
@@ -6,8 +19,8 @@ import numpy
 
 class TestReorderLoDTensor(unittest.TestCase):
     num_seq = 5
-    # [name, dim, lod_level] pair indicating data info of source and target
-    data_desc = (['input', 9, 0], ['ref', 5, 1])
+    # [name, shape, lod_level] pair indicating data info of source and target
+    data_desc = (['input', [9], 0], ['ref', [5], 1])
 
     @classmethod
     def setUpClass(cls):
@@ -16,10 +29,10 @@ class TestReorderLoDTensor(unittest.TestCase):
     @classmethod
     def set_program(cls):
         dat = fluid.layers.data(
-            name=cls.data_desc[0][0], shape=[cls.data_desc[0][1]])
+            name=cls.data_desc[0][0], shape=cls.data_desc[0][1])
         dat.stop_gradient = False
         rank_dat = fluid.layers.data(
-            name=cls.data_desc[1][0], shape=[cls.data_desc[1][1]])
+            name=cls.data_desc[1][0], shape=cls.data_desc[1][1])
         table = fluid.layers.lod_rank_table(rank_dat)
         new_dat = fluid.layers.reorder_lod_tensor_by_rank(
             x=dat, rank_table=table)
@@ -49,7 +62,7 @@ class TestReorderLoDTensor(unittest.TestCase):
         self.data = {}
         for desc in self.data_desc:
             data_name = desc[0]
-            data_dim = desc[1]
+            data_shape = desc[1]
             data_lod_level = desc[2]
             data_lod = []
             for i in range(data_lod_level):
@@ -59,9 +72,9 @@ class TestReorderLoDTensor(unittest.TestCase):
                     size=self.num_seq if i == 0 else lod_level_i[-1])
                 lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist()
                 data_lod.append(lod_level_i)
-            data_value = numpy.random.random(size=[
-                data_lod[-1][-1] if data_lod else self.num_seq, data_dim
-            ]).astype('float32')
+            data_value = numpy.random.random(
+                size=[data_lod[-1][-1] if data_lod else self.num_seq
+                      ] + data_shape).astype('float32')
             self.data[data_name] = (data_value, data_lod)
 
     def set_inputs(self, place):
@@ -163,8 +176,6 @@ class TestReorderLoDTensor(unittest.TestCase):
                 numpy.allclose(
                     numpy.array(actual_grad), expect_grad, atol=0.001))
             self.assertEqual(expect_grad_lod, actual_grad.lod())
-        global outputs_from_tensor_implicit_lod
-        outputs_from_tensor_implicit_lod = self.actual_outputs
 
         # compare outputs between LodTensors with explicit and implicit lod
         # use the same data but set the input lod explicitly
diff --git a/python/paddle/v2/fluid/tests/test_reshape_op.py b/python/paddle/v2/fluid/tests/test_reshape_op.py
index 18ee3aece6..d6e6797043 100644
--- a/python/paddle/v2/fluid/tests/test_reshape_op.py
+++ b/python/paddle/v2/fluid/tests/test_reshape_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_rmsprop_op.py b/python/paddle/v2/fluid/tests/test_rmsprop_op.py
index 237bcfccce..27a1ea2137 100644
--- a/python/paddle/v2/fluid/tests/test_rmsprop_op.py
+++ b/python/paddle/v2/fluid/tests/test_rmsprop_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py
index d1bb20f37a..378d7f8523 100644
--- a/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py
+++ b/python/paddle/v2/fluid/tests/test_rnn_memory_helper_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 
 from paddle.v2.fluid.framework import Program
diff --git a/python/paddle/v2/fluid/tests/test_roi_pool_op.py b/python/paddle/v2/fluid/tests/test_roi_pool_op.py
index a28d9c7f82..6d7a698b09 100644
--- a/python/paddle/v2/fluid/tests/test_roi_pool_op.py
+++ b/python/paddle/v2/fluid/tests/test_roi_pool_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import math
diff --git a/python/paddle/v2/fluid/tests/test_row_conv_op.py b/python/paddle/v2/fluid/tests/test_row_conv_op.py
index 1ed86e23ac..1234d289cb 100644
--- a/python/paddle/v2/fluid/tests/test_row_conv_op.py
+++ b/python/paddle/v2/fluid/tests/test_row_conv_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_scale_op.py b/python/paddle/v2/fluid/tests/test_scale_op.py
index 2ea1e18547..9847d3d361 100644
--- a/python/paddle/v2/fluid/tests/test_scale_op.py
+++ b/python/paddle/v2/fluid/tests/test_scale_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_scatter_op.py b/python/paddle/v2/fluid/tests/test_scatter_op.py
index 1032269d5d..b6c4162f6f 100644
--- a/python/paddle/v2/fluid/tests/test_scatter_op.py
+++ b/python/paddle/v2/fluid/tests/test_scatter_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_scope.py b/python/paddle/v2/fluid/tests/test_scope.py
index e4857b590a..adaaf16906 100644
--- a/python/paddle/v2/fluid/tests/test_scope.py
+++ b/python/paddle/v2/fluid/tests/test_scope.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core
 import unittest
 
diff --git a/python/paddle/v2/fluid/tests/test_selected_rows.py b/python/paddle/v2/fluid/tests/test_selected_rows.py
index 93daf37aa2..3179a3caae 100644
--- a/python/paddle/v2/fluid/tests/test_selected_rows.py
+++ b/python/paddle/v2/fluid/tests/test_selected_rows.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 import unittest
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_seq_concat_op.py b/python/paddle/v2/fluid/tests/test_seq_concat_op.py
index dccc6ed8af..1f026fd76e 100644
--- a/python/paddle/v2/fluid/tests/test_seq_concat_op.py
+++ b/python/paddle/v2/fluid/tests/test_seq_concat_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import sys
diff --git a/python/paddle/v2/fluid/tests/test_seq_conv.py b/python/paddle/v2/fluid/tests/test_seq_conv.py
index 14edc5f953..c7e5085194 100644
--- a/python/paddle/v2/fluid/tests/test_seq_conv.py
+++ b/python/paddle/v2/fluid/tests/test_seq_conv.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import random
diff --git a/python/paddle/v2/fluid/tests/test_seq_pool.py b/python/paddle/v2/fluid/tests/test_seq_pool.py
index 512d8b315f..bb15495373 100644
--- a/python/paddle/v2/fluid/tests/test_seq_pool.py
+++ b/python/paddle/v2/fluid/tests/test_seq_pool.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_sequence_erase_op.py b/python/paddle/v2/fluid/tests/test_sequence_erase_op.py
new file mode 100644
index 0000000000..650984009a
--- /dev/null
+++ b/python/paddle/v2/fluid/tests/test_sequence_erase_op.py
@@ -0,0 +1,48 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+def sequence_erase(in_seq, lod0, tokens):
+    new_lod0 = [0]
+    out_seq = []
+    for i in range(0, len(lod0) - 1):
+        num_out = 0
+        for dat in in_seq[lod0[i]:lod0[i + 1]]:
+            if dat not in tokens:
+                out_seq.append(dat)
+                num_out += 1
+        new_lod0.append(new_lod0[-1] + num_out)
+    return np.array(out_seq).astype("int32"), new_lod0
+
+
+class TestSequenceEraseOp(OpTest):
+    def setUp(self):
+        self.op_type = "sequence_erase"
+        in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
+        lod = [[0, 9, 13, 24, 30]]
+        tokens = [2, 3, 5]
+        out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
+        self.attrs = {'tokens': tokens}
+        self.inputs = {'X': (in_seq, lod)}
+        self.outputs = {'Out': (out_seq, [new_lod0])}
+
+    def test_check_output(self):
+        self.check_output()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_sequence_expand.py b/python/paddle/v2/fluid/tests/test_sequence_expand.py
index 0f22612d3d..aacdabf295 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_expand.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_expand.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
index ccd9a05343..94062431f0 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_slice_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import sys
diff --git a/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py b/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py
index 8bffdd5856..8170e4d7f1 100644
--- a/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py
+++ b/python/paddle/v2/fluid/tests/test_sequence_softmax_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_sgd_op.py b/python/paddle/v2/fluid/tests/test_sgd_op.py
index 14d41e172a..4a71fb30a9 100644
--- a/python/paddle/v2/fluid/tests/test_sgd_op.py
+++ b/python/paddle/v2/fluid/tests/test_sgd_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
index a14721b9aa..1825a5258f 100644
--- a/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
+++ b/python/paddle/v2/fluid/tests/test_shrink_rnn_memory.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py b/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py
index c42f578f72..132502c9cb 100644
--- a/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py
+++ b/python/paddle/v2/fluid/tests/test_sigmoid_cross_entropy_with_logits_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 from op_test import OpTest
 from scipy.special import logit
diff --git a/python/paddle/v2/fluid/tests/test_sign_op.py b/python/paddle/v2/fluid/tests/test_sign_op.py
index c6b59bcfd8..f649cb9e7c 100644
--- a/python/paddle/v2/fluid/tests/test_sign_op.py
+++ b/python/paddle/v2/fluid/tests/test_sign_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py b/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py
index b7f13c5699..1052eaa8b0 100644
--- a/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py
+++ b/python/paddle/v2/fluid/tests/test_smooth_l1_loss_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_softmax_op.py b/python/paddle/v2/fluid/tests/test_softmax_op.py
index 136fc0283a..d03e50b2f1 100644
--- a/python/paddle/v2/fluid/tests/test_softmax_op.py
+++ b/python/paddle/v2/fluid/tests/test_softmax_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py b/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py
index c2f07f9096..330467081b 100644
--- a/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py
+++ b/python/paddle/v2/fluid/tests/test_softmax_with_cross_entropy_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 
diff --git a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py
index 2e4defd55d..4e90404eca 100644
--- a/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py
+++ b/python/paddle/v2/fluid/tests/test_split_and_merge_lod_tensor_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.core as core
 import numpy as np
diff --git a/python/paddle/v2/fluid/tests/test_split_op.py b/python/paddle/v2/fluid/tests/test_split_op.py
index 37c6ebb89d..000c300446 100644
--- a/python/paddle/v2/fluid/tests/test_split_op.py
+++ b/python/paddle/v2/fluid/tests/test_split_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_spp_op.py b/python/paddle/v2/fluid/tests/test_spp_op.py
index 007723f0e3..f09bb94b44 100644
--- a/python/paddle/v2/fluid/tests/test_spp_op.py
+++ b/python/paddle/v2/fluid/tests/test_spp_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py b/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py
index dc6ebf5d30..7b80d81d72 100644
--- a/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py
+++ b/python/paddle/v2/fluid/tests/test_squared_l2_distance_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py b/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py
index 5a52c6a66c..80994f5937 100644
--- a/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py
+++ b/python/paddle/v2/fluid/tests/test_squared_l2_norm_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy as np
 import unittest
 from numpy import linalg as LA
diff --git a/python/paddle/v2/fluid/tests/test_sum_op.py b/python/paddle/v2/fluid/tests/test_sum_op.py
index 60254291e2..366708ac83 100644
--- a/python/paddle/v2/fluid/tests/test_sum_op.py
+++ b/python/paddle/v2/fluid/tests/test_sum_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_tensor.py b/python/paddle/v2/fluid/tests/test_tensor.py
index 9f870d9eb3..62a48b206c 100644
--- a/python/paddle/v2/fluid/tests/test_tensor.py
+++ b/python/paddle/v2/fluid/tests/test_tensor.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import paddle.v2.fluid.core as core
 import unittest
 import numpy
diff --git a/python/paddle/v2/fluid/tests/test_top_k_op.py b/python/paddle/v2/fluid/tests/test_top_k_op.py
index 6e8fbefa6e..86968dba14 100644
--- a/python/paddle/v2/fluid/tests/test_top_k_op.py
+++ b/python/paddle/v2/fluid/tests/test_top_k_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_transpose_op.py b/python/paddle/v2/fluid/tests/test_transpose_op.py
index 9409cbaa00..ff2541f450 100644
--- a/python/paddle/v2/fluid/tests/test_transpose_op.py
+++ b/python/paddle/v2/fluid/tests/test_transpose_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_uniform_random_op.py b/python/paddle/v2/fluid/tests/test_uniform_random_op.py
index dbe4d6bcd0..332ac4f07f 100644
--- a/python/paddle/v2/fluid/tests/test_uniform_random_op.py
+++ b/python/paddle/v2/fluid/tests/test_uniform_random_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy
 
diff --git a/python/paddle/v2/fluid/tests/test_unpool_op.py b/python/paddle/v2/fluid/tests/test_unpool_op.py
index e87f283042..988c0c7506 100644
--- a/python/paddle/v2/fluid/tests/test_unpool_op.py
+++ b/python/paddle/v2/fluid/tests/test_unpool_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import numpy as np
 from op_test import OpTest
diff --git a/python/paddle/v2/fluid/tests/test_variable.py b/python/paddle/v2/fluid/tests/test_variable.py
index f1e4c0ba21..199fd4a8c2 100644
--- a/python/paddle/v2/fluid/tests/test_variable.py
+++ b/python/paddle/v2/fluid/tests/test_variable.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 from paddle.v2.fluid.framework import default_main_program, Program, convert_np_dtype_to_dtype_
 import paddle.v2.fluid.core as core
diff --git a/python/paddle/v2/fluid/tests/test_warpctc_op.py b/python/paddle/v2/fluid/tests/test_warpctc_op.py
index 59390d5303..9f565676c5 100644
--- a/python/paddle/v2/fluid/tests/test_warpctc_op.py
+++ b/python/paddle/v2/fluid/tests/test_warpctc_op.py
@@ -1,9 +1,24 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import sys
 import unittest
 import numpy as np
 from op_test import OpTest
 from test_softmax_op import stable_softmax
 
+CUDA_BLOCK_SIZE = 512
+
 
 class CTCForward(object):
     def __init__(self, softmax, softmax_lod, labels, labels_lod, blank,
@@ -154,47 +169,63 @@ class CTCForward(object):
 
 
 class TestWarpCTCOp(OpTest):
+    def config(self):
+        self.batch_size = 4
+        self.num_classes = 8
+        self.logits_lod = [[0, 4, 5, 8, 11]]
+        self.labels_lod = [[0, 3, 4, 8, 12]]
+        self.blank = self.num_classes - 1
+        self.norm_by_times = False
+
     def setUp(self):
         self.op_type = "warpctc"
+        self.config()
 
-        batch_size = 4
-        num_classes = 8
-        logits_lod = [[0, 4, 5, 8, 11]]
-        logits = np.random.uniform(0.1, 1.0,
-                                   [11, num_classes]).astype("float32")
+        logits = np.random.uniform(
+            0.1, 1.0,
+            [self.logits_lod[0][-1], self.num_classes]).astype("float32")
         softmax = np.apply_along_axis(stable_softmax, 1, logits)
-        labels_lod = [[0, 3, 4, 8, 12]]
         # labels should not be blank
-        labels = np.random.randint(0, num_classes - 1, [12, 1], dtype="int32")
-
-        blank = num_classes - 1
-        norm_by_times = False
+        labels = np.random.randint(
+            0, self.num_classes - 1, [self.labels_lod[0][-1], 1], dtype="int32")
 
-        ctc = CTCForward(softmax, logits_lod, labels, labels_lod, blank,
-                         norm_by_times)
+        ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
+                         self.blank, self.norm_by_times)
         loss = ctc.forward()
 
         max_sequence_length = 0
-        for i in range(batch_size):
-            max_sequence_length = max(max_sequence_length,
-                                      logits_lod[0][i + 1] - logits_lod[0][i])
-        gradient = np.zeros(
-            [max_sequence_length, batch_size, num_classes], dtype="float32")
+        for i in range(self.batch_size):
+            max_sequence_length = max(
+                max_sequence_length,
+                self.logits_lod[0][i + 1] - self.logits_lod[0][i])
+        self.gradient = np.zeros(
+            [max_sequence_length, self.batch_size, self.num_classes],
+            dtype="float32")
 
         self.inputs = {
-            "Logits": (logits, logits_lod),
-            "Label": (labels, labels_lod)
+            "Logits": (logits, self.logits_lod),
+            "Label": (labels, self.labels_lod)
         }
         self.outputs = {"Loss": loss}
-        self.attrs = {"blank": blank, "norm_by_times": norm_by_times}
+        self.attrs = {"blank": self.blank, "norm_by_times": self.norm_by_times}
 
     def test_check_output(self):
         self.check_output()
 
+    def test_check_grad(self):
+        self.outputs['WarpCTCGrad'] = self.gradient
+        self.check_grad(["Logits"], "Loss", max_relative_error=0.007)
+
+
+class TestWarpCTCOpCase1(TestWarpCTCOp):
+    def config(self):
+        self.batch_size = 4
+        self.num_classes = CUDA_BLOCK_SIZE + 2
+        self.logits_lod = [[0, 4, 5, 8, 11]]
+        self.labels_lod = [[0, 3, 4, 8, 12]]
+        self.blank = 0
+        self.norm_by_times = False
 
-#    def test_check_grad(self):
-#        self.outputs["WarpCTCGrad"] = None
-#        self.check_grad(["Logits"], "Loss", max_relative_error=0.01)
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/v2/fluid/tests/test_while_op.py b/python/paddle/v2/fluid/tests/test_while_op.py
index 7c5593cc5e..72de0a0361 100644
--- a/python/paddle/v2/fluid/tests/test_while_op.py
+++ b/python/paddle/v2/fluid/tests/test_while_op.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import paddle.v2.fluid.layers as layers
 from paddle.v2.fluid.executor import Executor
diff --git a/python/paddle/v2/image.py b/python/paddle/v2/image.py
index 7408ea8ef6..a6fa0cecb8 100644
--- a/python/paddle/v2/image.py
+++ b/python/paddle/v2/image.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 """
 This file contains some common interfaces for image preprocess.
 Many users are confused about the image layout. We introduce
diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py
index 9148cb56cf..39d1bfff0c 100644
--- a/python/paddle/v2/inference.py
+++ b/python/paddle/v2/inference.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import numpy
 import collections
 import topology
diff --git a/python/paddle/v2/master/__init__.py b/python/paddle/v2/master/__init__.py
index c8975b5d4a..09daaaa75e 100644
--- a/python/paddle/v2/master/__init__.py
+++ b/python/paddle/v2/master/__init__.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 from client import *
 
 __all__ = ['client']
diff --git a/python/paddle/v2/master/client.py b/python/paddle/v2/master/client.py
index fc718f031e..b874c2f349 100644
--- a/python/paddle/v2/master/client.py
+++ b/python/paddle/v2/master/client.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import ctypes
 import os
 
diff --git a/python/paddle/v2/reader/tests/__init__.py b/python/paddle/v2/reader/tests/__init__.py
index e69de29bb2..2619c1c0e9 100644
--- a/python/paddle/v2/reader/tests/__init__.py
+++ b/python/paddle/v2/reader/tests/__init__.py
@@ -0,0 +1,13 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
diff --git a/python/paddle/v2/tests/test_parameters.py b/python/paddle/v2/tests/test_parameters.py
index 7ba8a939fb..ab6863620f 100644
--- a/python/paddle/v2/tests/test_parameters.py
+++ b/python/paddle/v2/tests/test_parameters.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 import unittest
 import sys
 
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index db01ab7374..1a70a7203b 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -1,3 +1,16 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
 """
 Module Trainer
 """
diff --git a/python/setup.py.in b/python/setup.py.in
index 66ccfe8087..65ec58ecf9 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -31,6 +31,7 @@ patch           = '%(patch)d'
 rc              = '%(rc)d'
 istaged         = %(istaged)s
 commit          = '%(commit)s'
+with_mkl        = '%(with_mkl)s'
 
 def show():
     if istaged:
@@ -41,6 +42,9 @@ def show():
         print 'rc:', rc
     else:
         print 'commit:', commit
+
+def mkl():
+    return with_mkl
 '''
     commit = git_commit()
     with open(filename, 'w') as f:
@@ -51,7 +55,8 @@ def show():
             'rc': RC,
             'version': '${PADDLE_VERSION}',
             'commit': commit,
-            'istaged': ISTAGED})
+            'istaged': ISTAGED,
+            'with_mkl': '@WITH_MKL@'})
 
 write_version_py(filename='@PADDLE_SOURCE_DIR@/python/paddle/version.py')
 
diff --git a/tools/manylinux1/Dockerfile.android b/tools/manylinux1/Dockerfile.android
new file mode 100644
index 0000000000..b6cae228a0
--- /dev/null
+++ b/tools/manylinux1/Dockerfile.android
@@ -0,0 +1,55 @@
+FROM ubuntu:16.04
+MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
+
+ARG UBUNTU_MIRROR
+RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
+
+# ENV variables
+ARG ANDROID_ABI
+ARG ANDROID_API
+
+ENV ANDROID_ABI=${ANDROID_ABI:-"armeabi-v7a"}
+ENV ANDROID_API=${ANDROID_API:-21}
+
+ENV HOME=/root \
+    ANDROID_NDK_HOME=/opt/android-ndk-linux \
+    ANDROID_TOOLCHAINS_DIR=/opt/toolchains
+
+RUN apt-get update && \
+    apt-get install -y \
+    git python-dev python-pip python-numpy \
+    wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
+    apt-get clean -y
+
+# Install Go and glide
+RUN wget -qO- go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
+    tar -xz -C /usr/local && \
+    mkdir /root/gopath && \
+    mkdir /root/gopath/bin && \
+    mkdir /root/gopath/src
+ENV GOROOT=/usr/local/go GOPATH=/root/gopath
+# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
+ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
+
+# git credential to skip password typing
+RUN git config --global credential.helper store
+
+# Fix locales to en_US.UTF-8
+RUN localedef -i en_US -f UTF-8 en_US.UTF-8
+
+RUN pip install --upgrade pip && \
+    pip install -U 'protobuf==3.1.0' && \
+    pip install -U wheel sphinx && \
+    pip install pre-commit
+
+# Android NDK
+RUN mkdir -p ${ANDROID_TOOLCHAINS_DIR} && \
+    mkdir -p /opt/android-ndk-tmp && \
+    cd /opt/android-ndk-tmp && \
+    wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \
+    unzip -q android-ndk-r14b-linux-x86_64.zip && \
+    mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
+    rm -rf /opt/android-ndk-tmp
+
+CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
+
diff --git a/tools/manylinux1/Dockerfile.x64 b/tools/manylinux1/Dockerfile.x64
new file mode 100644
index 0000000000..2c6ba650a5
--- /dev/null
+++ b/tools/manylinux1/Dockerfile.x64
@@ -0,0 +1,54 @@
+# NOTE The manylinux1 policy mandates CentOS-5. We replace it with CentOS-6 in
+# order to satisfy the build of capnproto library (a nupic.core dependency),
+# which requires some headers and symbols not present on CentOS-5 (e.g.,
+# signalfd.h, pipe2, O_NONBLOCK, SOCK_NONBLOCK, etc.). See
+# https://github.com/sandstorm-io/capnproto/issues/350.
+FROM nvidia/cuda:<baseimg>
+MAINTAINER Numenta, based on the ManyLinux project
+
+ENV LC_ALL en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US.UTF-8
+ENV PATH /opt/rh/devtoolset-2/root/usr/bin:$PATH
+ENV LD_LIBRARY_PATH /opt/rh/devtoolset-2/root/usr/lib64:/opt/rh/devtoolset-2/root/usr/lib:/usr/local/lib64:/usr/local/lib:${LD_LIBRARY_PATH}
+ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
+
+COPY build_scripts /build_scripts
+RUN bash build_scripts/build.sh && rm -r build_scripts
+
+ENV SSL_CERT_FILE=/opt/_internal/certs.pem
+
+# for paddle
+RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
+    tar -xz -C /usr/local && \
+    mkdir /root/gopath && \
+    mkdir /root/gopath/bin && \
+    mkdir /root/gopath/src
+
+
+ENV GOROOT=/usr/local/go GOPATH=/root/gopath
+ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH}
+
+# protobuf 3.1.0
+RUN cd /opt && wget -q --no-check-certificate https://github.com/google/protobuf/releases/download/v3.1.0/protobuf-cpp-3.1.0.tar.gz && \
+    tar xzf protobuf-cpp-3.1.0.tar.gz && \
+    cd protobuf-3.1.0 && ./configure && make -j4 && make install && cd .. && rm -f protobuf-cpp-3.1.0.tar.gz
+
+
+RUN yum install -y sqlite-devel zlib-devel openssl-devel boost boost-devel pcre-devel vim tk-devel tkinter libtool
+
+RUN wget -O /root/requirements.txt https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt
+
+RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install -r /root/requirements.txt && \
+    LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install -r /root/requirements.txt && \
+    go get github.com/Masterminds/glide && \
+    rm -rf /root/requirements.txt
+
+RUN LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27mu/bin/pip install pre-commit 'ipython==5.3.0' opencv-python && \
+    LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH} /opt/python/cp27-cp27m/bin/pip install pre-commit 'ipython==5.3.0' opencv-python
+
+RUN wget -O /opt/swig-2.0.12.tar.gz https://sourceforge.net/projects/swig/files/swig/swig-2.0.12/swig-2.0.12.tar.gz/download && \
+    cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz
+
+RUN mkdir -p /src && cd /src && git clone https://github.com/NVIDIA/nccl.git nccl && cd nccl &&\
+    make -j `nproc` install <NCCL_MAKE_OPTS>  && cd .. && rm -rf nccl
diff --git a/tools/manylinux1/README.md b/tools/manylinux1/README.md
new file mode 100644
index 0000000000..cb0a9ac22c
--- /dev/null
+++ b/tools/manylinux1/README.md
@@ -0,0 +1,30 @@
+# buildtools
+
+We release PaddlePaddle and PaddlePaddle Fluid as shared libraries,
+which, we hope could be released as wheel packages on PyPI, so we need
+to make sure that the build follows the
+[manulinux1](https://www.python.org/dev/peps/pep-0513/) standard.
+
+The manylinux standard suggests building Python modules on an old
+system, because that a module would anyway depend on some shared
+libraries, and Linux's shared library standard states that those built
+with newer version compilers cannot work with those with older
+versions.  The suggested building environment is as old as CentOS 5.
+However, PaddlePaddle relies on CUDA, and the earlies version of
+[CentOS works with CUDA is 6](https://hub.docker.com/r/nvidia/cuda/).
+So, here we provide a Docker image basing on CentOS 6 and CUDA for
+building PaddlePaddle and making the release supports "as-manylinux as
+possible."  or "sufficiently many Linux" according to [this
+discussion](https://mail.python.org/pipermail/wheel-builders/2016-July/000175.html).
+
+The build output of our Docker image includes multiple wheel files --
+some contain the CPU-only binary, some others support CUDA; some are
+compatible with the cp27m Python ABI, some others with cp27.
+
+To build these wheels, please run the following commands:
+
+```bash
+git clone https://github.com/paddlepaddle/paddle
+cd paddle/tools/manylinux1
+REPO=[yourrepo] ./build_all.sh
+```
diff --git a/tools/manylinux1/build_all.sh b/tools/manylinux1/build_all.sh
new file mode 100755
index 0000000000..097bedb526
--- /dev/null
+++ b/tools/manylinux1/build_all.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+set -xe
+
+REPO="${REPO:-typhoon1986}"
+
+# NOTE: version matches are determined!
+sed 's/<baseimg>/7.5-cudnn5-devel-centos6/g' Dockerfile.x64 | \
+sed 's/<NCCL_MAKE_OPTS>/NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=sm_52"/g'> Dockerfile.tmp
+docker build -t ${REPO}/paddle_manylinux_devel:cuda7.5_cudnn5 -f Dockerfile.tmp .
+docker push ${REPO}/paddle_manylinux_devel:cuda7.5_cudnn5
+
+sed 's/<baseimg>/8.0-cudnn5-devel-centos6/g' Dockerfile.x64 | \
+sed 's/<NCCL_MAKE_OPTS>/NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_62,code=sm_62"/g'> Dockerfile.tmp
+docker build -t ${REPO}/paddle_manylinux_devel:cuda8.0_cudnn5 -f Dockerfile.tmp .
+docker push ${REPO}/paddle_manylinux_devel:cuda8.0_cudnn5
+
+sed 's/<baseimg>/8.0-cudnn7-devel-centos6/g' Dockerfile.x64 | \
+sed 's/<NCCL_MAKE_OPTS>/NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_62,code=sm_62"/g'> Dockerfile.tmp
+
+docker build -t ${REPO}/paddle_manylinux_devel:cuda8.0_cudnn7 -f Dockerfile.tmp .
+docker push ${REPO}/paddle_manylinux_devel:cuda8.0_cudnn7
+
+sed 's/<baseimg>/9.0-cudnn7-devel-centos6/g' Dockerfile.x64 | \
+sed 's/<NCCL_MAKE_OPTS>/NVCC_GENCODE="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_62,code=sm_62 -gencode=arch=compute_70,code=sm_70"/g'> Dockerfile.tmp
+docker build -t ${REPO}/paddle_manylinux_devel:cuda9.0_cudnn7 -f Dockerfile.tmp .
+docker push ${REPO}/paddle_manylinux_devel:cuda9.0_cudnn7
diff --git a/tools/manylinux1/build_scripts/build.sh b/tools/manylinux1/build_scripts/build.sh
new file mode 100644
index 0000000000..93591fa9dd
--- /dev/null
+++ b/tools/manylinux1/build_scripts/build.sh
@@ -0,0 +1,152 @@
+#!/bin/bash
+# Top-level build script called from Dockerfile
+
+# Stop at any error, show all commands
+set -ex
+
+# Python versions to be installed in /opt/$VERSION_NO
+# NOTE Only need python 2.7.11 for nupic.core/nupic.bindings at this time, so
+# remove others to expedite build and reduce docker image size. The original
+# manylinux docker image project builds many python versions.
+# NOTE We added back 3.5.1, since auditwheel requires python 3.3+
+CPYTHON_VERSIONS="2.7.11 3.5.1"
+
+# openssl version to build, with expected sha256 hash of .tar.gz
+# archive
+OPENSSL_ROOT=openssl-1.0.2l
+OPENSSL_HASH=ce07195b659e75f4e1db43552860070061f156a98bb37b672b101ba6e3ddf30c
+EPEL_RPM_HASH=e5ed9ecf22d0c4279e92075a64c757ad2b38049bcf5c16c4f2b75d5f6860dc0d
+DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
+PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
+CURL_ROOT=curl-7.49.1
+CURL_HASH=eb63cec4bef692eab9db459033f409533e6d10e20942f4b060b32819e81885f1
+AUTOCONF_ROOT=autoconf-2.69
+AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
+
+# Dependencies for compiling Python that we want to remove from
+# the final image after compiling Python
+PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel"
+
+# Libraries that are allowed as part of the manylinux1 profile
+MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
+
+# Get build utilities
+MY_DIR=$(dirname "${BASH_SOURCE[0]}")
+source $MY_DIR/build_utils.sh
+
+# EPEL support
+yum -y install wget curl
+curl -sLO https://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
+check_sha256sum epel-release-6-8.noarch.rpm $EPEL_RPM_HASH
+
+# Dev toolset (for LLVM and other projects requiring C++11 support)
+curl -sLO http://people.centos.org/tru/devtools-2/devtools-2.repo
+check_sha256sum devtools-2.repo $DEVTOOLS_HASH
+mv devtools-2.repo /etc/yum.repos.d/devtools-2.repo
+rpm -Uvh --replacepkgs epel-release-6*.rpm
+rm -f epel-release-6*.rpm
+
+# Development tools and libraries
+yum -y install bzip2 make git patch unzip bison yasm diffutils \
+    automake which file \
+    kernel-devel-`uname -r` \
+    devtoolset-2-binutils devtoolset-2-gcc \
+    devtoolset-2-gcc-c++ devtoolset-2-gcc-gfortran \
+    ${PYTHON_COMPILE_DEPS}
+
+# Install more recent version of cmake
+# curl -O https://cmake.org/files/v3.8/cmake-3.8.1-Linux-x86_64.sh
+# /bin/sh cmake-3.8.1-Linux-x86_64.sh --prefix=/usr/local --skip-license
+# rm cmake-3.8.1-Linux-x86_64.sh
+
+wget -q https://cmake.org/files/v3.5/cmake-3.5.2.tar.gz && tar xzf cmake-3.5.2.tar.gz && \
+cd cmake-3.5.2 && ./bootstrap && \
+make -j4 && make install && cd .. && rm cmake-3.5.2.tar.gz
+
+
+# Install newest autoconf
+build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
+autoconf --version
+
+# Compile the latest Python releases.
+# (In order to have a proper SSL module, Python is compiled
+# against a recent openssl [see env vars above], which is linked
+# statically. We delete openssl afterwards.)
+build_openssl $OPENSSL_ROOT $OPENSSL_HASH
+mkdir -p /opt/python
+build_cpythons $CPYTHON_VERSIONS
+
+PY35_BIN=/opt/python/cp35-cp35m/bin
+# NOTE Since our custom manylinux image builds pythons with shared
+# libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running
+# python.
+ORIGINAL_LD_LIBRARY_PATH="${LD_LIBRARY_PATH}"
+LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib"
+
+# Our openssl doesn't know how to find the system CA trust store
+#   (https://github.com/pypa/manylinux/issues/53)
+# And it's not clear how up-to-date that is anyway
+# So let's just use the same one pip and everyone uses
+LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install certifi
+ln -s $($PY35_BIN/python -c 'import certifi; print(certifi.where())') \
+      /opt/_internal/certs.pem
+# If you modify this line you also have to modify the versions in the
+# Dockerfiles:
+export SSL_CERT_FILE=/opt/_internal/certs.pem
+
+# Install newest curl
+build_curl $CURL_ROOT $CURL_HASH
+rm -rf /usr/local/include/curl /usr/local/lib/libcurl* /usr/local/lib/pkgconfig/libcurl.pc
+hash -r
+curl --version
+curl-config --features
+
+# Now we can delete our built SSL
+rm -rf /usr/local/ssl
+
+# Install patchelf (latest with unreleased bug fixes)
+curl -sLO https://nipy.bic.berkeley.edu/manylinux/patchelf-0.9njs2.tar.gz
+check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH
+tar -xzf patchelf-0.9njs2.tar.gz
+(cd patchelf-0.9njs2 && ./configure && make && make install)
+rm -rf patchelf-0.9njs2.tar.gz patchelf-0.9njs2
+
+# Install latest pypi release of auditwheel
+LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY35_BIN})/lib" $PY35_BIN/pip install auditwheel
+ln -s $PY35_BIN/auditwheel /usr/local/bin/auditwheel
+
+# Clean up development headers and other unnecessary stuff for
+# final image
+yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
+    avahi freetype bitstream-vera-fonts \
+    ${PYTHON_COMPILE_DEPS}  > /dev/null 2>&1
+yum -y install ${MANYLINUX1_DEPS}
+yum -y clean all > /dev/null 2>&1
+yum list installed
+# we don't need libpython*.a, and they're many megabytes
+find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
+# Strip what we can -- and ignore errors, because this just attempts to strip
+# *everything*, including non-ELF files:
+find /opt/_internal -type f -print0 \
+    | xargs -0 -n1 strip --strip-unneeded 2>/dev/null || true
+# We do not need the Python test suites, or indeed the precompiled .pyc and
+# .pyo files. Partially cribbed from:
+#    https://github.com/docker-library/python/blob/master/3.4/slim/Dockerfile
+find /opt/_internal \
+     \( -type d -a -name test -o -name tests \) \
+  -o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \
+  -print0 | xargs -0 rm -f
+
+for PYTHON in /opt/python/*/bin/python; do
+    # Add matching directory of libpython shared library to library lookup path
+    LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib"
+
+    # Smoke test to make sure that our Pythons work, and do indeed detect as
+    # being manylinux compatible:
+    LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" $PYTHON $MY_DIR/manylinux1-check.py
+    # Make sure that SSL cert checking works
+    LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" $PYTHON $MY_DIR/ssl-check.py
+done
+
+# Restore LD_LIBRARY_PATH
+LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}"
diff --git a/tools/manylinux1/build_scripts/build_utils.sh b/tools/manylinux1/build_scripts/build_utils.sh
new file mode 100755
index 0000000000..10422ae3bd
--- /dev/null
+++ b/tools/manylinux1/build_scripts/build_utils.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+# Helper utilities for build
+
+PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
+# XXX: the official https server at www.openssl.org cannot be reached
+# with the old versions of openssl and curl in Centos 5.11 hence the fallback
+# to the ftp mirror:
+# OPENSSL_DOWNLOAD_URL=ftp://ftp.openssl.org/source
+OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source
+# Ditto the curl sources
+CURL_DOWNLOAD_URL=http://curl.askapache.com/download
+
+GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
+
+AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf
+
+
+function check_var {
+    if [ -z "$1" ]; then
+        echo "required variable not defined"
+        exit 1
+    fi
+}
+
+
+function lex_pyver {
+    # Echoes Python version string padded with zeros
+    # Thus:
+    # 3.2.1 -> 003002001
+    # 3     -> 003000000
+    echo $1 | awk -F "." '{printf "%03d%03d%03d", $1, $2, $3}'
+}
+
+
+function do_cpython_build {
+    local py_ver=$1
+    check_var $py_ver
+    local ucs_setting=$2
+    check_var $ucs_setting
+    tar -xzf Python-$py_ver.tgz
+    pushd Python-$py_ver
+    if [ "$ucs_setting" = "none" ]; then
+        unicode_flags=""
+        dir_suffix=""
+    else
+        local unicode_flags="--enable-unicode=$ucs_setting"
+        local dir_suffix="-$ucs_setting"
+    fi
+    local prefix="/opt/_internal/cpython-${py_ver}${dir_suffix}"
+    mkdir -p ${prefix}/lib
+    # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
+
+    # NOTE --enable-shared for generating libpython shared library needed for
+    # linking of some of the nupic.core test executables.
+    CFLAGS="-Wformat" ./configure --prefix=${prefix} --enable-shared $unicode_flags > /dev/null
+    make -j2 > /dev/null
+    make install > /dev/null
+    popd
+    echo "ZZZ looking for libpython"
+    find / -name 'libpython*.so*'
+    rm -rf Python-$py_ver
+    # Some python's install as bin/python3. Make them available as
+    # bin/python.
+    if [ -e ${prefix}/bin/python3 ]; then
+        ln -s python3 ${prefix}/bin/python
+    fi
+    # NOTE Make libpython shared library visible to python calls below
+    LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python get-pip.py
+    LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/pip install wheel
+    local abi_tag=$(LD_LIBRARY_PATH="${prefix}/lib" ${prefix}/bin/python ${MY_DIR}/python-tag-abi-tag.py)
+    ln -s ${prefix} /opt/python/${abi_tag}
+}
+
+
+function build_cpython {
+    local py_ver=$1
+    check_var $py_ver
+    check_var $PYTHON_DOWNLOAD_URL
+    wget -q $PYTHON_DOWNLOAD_URL/$py_ver/Python-$py_ver.tgz
+    if [ $(lex_pyver $py_ver) -lt $(lex_pyver 3.3) ]; then
+        # NOTE We only need wide unicode for nupic.bindings wheel
+        do_cpython_build $py_ver ucs2
+        do_cpython_build $py_ver ucs4
+    else
+        do_cpython_build $py_ver none
+    fi
+    rm -f Python-$py_ver.tgz
+}
+
+
+function build_cpythons {
+    check_var $GET_PIP_URL
+    curl -sLO $GET_PIP_URL
+    for py_ver in $@; do
+        build_cpython $py_ver
+    done
+    rm get-pip.py
+}
+
+
+function do_openssl_build {
+    ./config no-ssl2 no-shared -fPIC --prefix=/usr/local/ssl > /dev/null
+    make > /dev/null
+    make install > /dev/null
+}
+
+
+function check_sha256sum {
+    local fname=$1
+    check_var ${fname}
+    local sha256=$2
+    check_var ${sha256}
+
+    echo "${sha256}  ${fname}" > ${fname}.sha256
+    sha256sum -c ${fname}.sha256
+    rm ${fname}.sha256
+}
+
+
+function build_openssl {
+    local openssl_fname=$1
+    check_var ${openssl_fname}
+    local openssl_sha256=$2
+    check_var ${openssl_sha256}
+    check_var ${OPENSSL_DOWNLOAD_URL}
+    curl -sLO ${OPENSSL_DOWNLOAD_URL}/${openssl_fname}.tar.gz
+    check_sha256sum ${openssl_fname}.tar.gz ${openssl_sha256}
+    tar -xzf ${openssl_fname}.tar.gz
+    (cd ${openssl_fname} && do_openssl_build)
+    rm -rf ${openssl_fname} ${openssl_fname}.tar.gz
+}
+
+
+function do_curl_build {
+    LIBS=-ldl ./configure --with-ssl --disable-shared > /dev/null
+    make > /dev/null
+    make install > /dev/null
+}
+
+
+function build_curl {
+    local curl_fname=$1
+    check_var ${curl_fname}
+    local curl_sha256=$2
+    check_var ${curl_sha256}
+    check_var ${CURL_DOWNLOAD_URL}
+    curl -sLO ${CURL_DOWNLOAD_URL}/${curl_fname}.tar.bz2
+    check_sha256sum ${curl_fname}.tar.bz2 ${curl_sha256}
+    tar -jxf ${curl_fname}.tar.bz2
+    (cd ${curl_fname} && do_curl_build)
+    rm -rf ${curl_fname} ${curl_fname}.tar.bz2
+}
+
+
+function do_standard_install {
+    ./configure > /dev/null
+    make > /dev/null
+    make install > /dev/null
+}
+
+
+function build_autoconf {
+    local autoconf_fname=$1
+    check_var ${autoconf_fname}
+    local autoconf_sha256=$2
+    check_var ${autoconf_sha256}
+    check_var ${AUTOCONF_DOWNLOAD_URL}
+    curl -sLO ${AUTOCONF_DOWNLOAD_URL}/${autoconf_fname}.tar.gz
+    check_sha256sum ${autoconf_fname}.tar.gz ${autoconf_sha256}
+    tar -zxf ${autoconf_fname}.tar.gz
+    (cd ${autoconf_fname} && do_standard_install)
+    rm -rf ${autoconf_fname} ${autoconf_fname}.tar.gz
+}
diff --git a/tools/manylinux1/build_scripts/manylinux1-check.py b/tools/manylinux1/build_scripts/manylinux1-check.py
new file mode 100644
index 0000000000..e4bde065a2
--- /dev/null
+++ b/tools/manylinux1/build_scripts/manylinux1-check.py
@@ -0,0 +1,69 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+# Logic copied from PEP 513
+
+
+def is_manylinux1_compatible():
+    # Only Linux, and only x86-64 / i686
+    from distutils.util import get_platform
+    if get_platform() not in ["linux-x86_64", "linux-i686"]:
+        return False
+
+    # Check for presence of _manylinux module
+    try:
+        import _manylinux
+        return bool(_manylinux.manylinux1_compatible)
+    except (ImportError, AttributeError):
+        # Fall through to heuristic check below
+        pass
+
+    # Check glibc version. CentOS 5 uses glibc 2.5.
+    return have_compatible_glibc(2, 5)
+
+
+def have_compatible_glibc(major, minimum_minor):
+    import ctypes
+
+    process_namespace = ctypes.CDLL(None)
+    try:
+        gnu_get_libc_version = process_namespace.gnu_get_libc_version
+    except AttributeError:
+        # Symbol doesn't exist -> therefore, we are not linked to
+        # glibc.
+        return False
+
+    # Call gnu_get_libc_version, which returns a string like "2.5".
+    gnu_get_libc_version.restype = ctypes.c_char_p
+    version_str = gnu_get_libc_version()
+    # py2 / py3 compatibility:
+    if not isinstance(version_str, str):
+        version_str = version_str.decode("ascii")
+
+    # Parse string and check against requested version.
+    version = [int(piece) for piece in version_str.split(".")]
+    assert len(version) == 2
+    if major != version[0]:
+        return False
+    if minimum_minor > version[1]:
+        return False
+    return True
+
+
+import sys
+if is_manylinux1_compatible():
+    print("%s is manylinux1 compatible" % (sys.executable, ))
+    sys.exit(0)
+else:
+    print("%s is NOT manylinux1 compatible" % (sys.executable, ))
+    sys.exit(1)
diff --git a/tools/manylinux1/build_scripts/python-tag-abi-tag.py b/tools/manylinux1/build_scripts/python-tag-abi-tag.py
new file mode 100644
index 0000000000..301fbf07a4
--- /dev/null
+++ b/tools/manylinux1/build_scripts/python-tag-abi-tag.py
@@ -0,0 +1,7 @@
+# Utility script to print the python tag + the abi tag for a Python
+# See PEP 425 for exactly what these are, but an example would be:
+#   cp27-cp27mu
+
+from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
+
+print("{0}{1}-{2}".format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))
diff --git a/tools/manylinux1/build_scripts/ssl-check.py b/tools/manylinux1/build_scripts/ssl-check.py
new file mode 100644
index 0000000000..900185cef1
--- /dev/null
+++ b/tools/manylinux1/build_scripts/ssl-check.py
@@ -0,0 +1,45 @@
+#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+# cf. https://github.com/pypa/manylinux/issues/53
+
+GOOD_SSL = "https://google.com"
+BAD_SSL = "https://self-signed.badssl.com"
+
+import sys
+
+print("Testing SSL certificate checking for Python:", sys.version)
+
+if (sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4)):
+    print("This version never checks SSL certs; skipping tests")
+    sys.exit(0)
+
+if sys.version_info[0] >= 3:
+    from urllib.request import urlopen
+    EXC = OSError
+else:
+    from urllib import urlopen
+    EXC = IOError
+
+print("Connecting to %s should work" % (GOOD_SSL, ))
+urlopen(GOOD_SSL)
+print("...it did, yay.")
+
+print("Connecting to %s should fail" % (BAD_SSL, ))
+try:
+    urlopen(BAD_SSL)
+    # If we get here then we failed:
+    print("...it DIDN'T!!!!!11!!1one!")
+    sys.exit(1)
+except EXC:
+    print("...it did, yay.")
diff --git a/v1_api_demo/README.md b/v1_api_demo/README.md
deleted file mode 100644
index 0460a85fae..0000000000
--- a/v1_api_demo/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-The examples in v1_api_demo are using v1_api currently, and will be upgraded to v2_api later.
-Thus, v1_api_demo is a temporary directory. We decide not to maintain it and will delete it in future.
-
-Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and 
-[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
diff --git a/v1_api_demo/gan/.gitignore b/v1_api_demo/gan/.gitignore
deleted file mode 100644
index 93a6f5080a..0000000000
--- a/v1_api_demo/gan/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-output/
-uniform_params/
-cifar_params/
-mnist_params/
-*.png
-.pydevproject
-.project
-*.log
-*.pyc
-data/mnist_data/
-data/cifar-10-batches-py/
diff --git a/v1_api_demo/gan/README.md b/v1_api_demo/gan/README.md
deleted file mode 100644
index 1908b534b0..0000000000
--- a/v1_api_demo/gan/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Generative Adversarial Networks (GAN) 
-
-This demo implements GAN training described in the original GAN paper (https://arxiv.org/abs/1406.2661) and DCGAN (https://arxiv.org/abs/1511.06434).
-
-The general training procedures are implemented in gan_trainer.py. The neural network configurations are specified in gan_conf.py (for synthetic data) and gan_conf_image.py (for image data).
-
-In order to run the model, first download the corresponding data by running the shell script in ./data.
-Then you can run the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
-
-$python gan_trainer.py -d cifar --use_gpu 1
-
-The generated images will be stored in ./cifar_samples/
-The corresponding models will be stored in ./cifar_params/
diff --git a/v1_api_demo/gan/data/get_mnist_data.sh b/v1_api_demo/gan/data/get_mnist_data.sh
deleted file mode 100755
index a77c81bf5a..0000000000
--- a/v1_api_demo/gan/data/get_mnist_data.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env sh
-# This script downloads the mnist data and unzips it.
-set -e
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-rm -rf "$DIR/mnist_data"
-mkdir "$DIR/mnist_data"
-cd "$DIR/mnist_data"
-
-echo "Downloading..."
-
-for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
-do
-    if [ ! -e $fname ]; then
-        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
-        gunzip ${fname}.gz
-    fi
-done
diff --git a/v1_api_demo/gan/gan_conf.py b/v1_api_demo/gan/gan_conf.py
deleted file mode 100644
index 86ac2dffe5..0000000000
--- a/v1_api_demo/gan/gan_conf.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-
-mode = get_config_arg("mode", str, "generator")
-assert mode in set([
-    "generator", "discriminator", "generator_training", "discriminator_training"
-])
-
-is_generator_training = mode == "generator_training"
-is_discriminator_training = mode == "discriminator_training"
-is_generator = mode == "generator"
-is_discriminator = mode == "discriminator"
-
-# The network structure below follows the ref https://arxiv.org/abs/1406.2661
-# Here we used two hidden layers and batch_norm
-
-print('mode=%s' % mode)
-# the dim of the noise (z) as the input of the generator network
-noise_dim = 10
-# the dim of the hidden layer
-hidden_dim = 10
-# the dim of the generated sample
-sample_dim = 2
-
-settings(
-    batch_size=128,
-    learning_rate=1e-4,
-    learning_method=AdamOptimizer(beta1=0.5))
-
-
-def discriminator(sample):
-    """
-    discriminator ouputs the probablity of a sample is from generator
-    or real data.
-    The output has two dimenstional: dimension 0 is the probablity
-    of the sample is from generator and dimension 1 is the probabblity
-    of the sample is from real data.
-    """
-    param_attr = ParamAttr(is_static=is_generator_training)
-    bias_attr = ParamAttr(
-        is_static=is_generator_training, initial_mean=1.0, initial_std=0)
-
-    hidden = fc_layer(
-        input=sample,
-        name="dis_hidden",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=ReluActivation())
-
-    hidden2 = fc_layer(
-        input=hidden,
-        name="dis_hidden2",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-    hidden_bn = batch_norm_layer(
-        hidden2,
-        act=ReluActivation(),
-        name="dis_hidden_bn",
-        bias_attr=bias_attr,
-        param_attr=ParamAttr(
-            is_static=is_generator_training, initial_mean=1.0,
-            initial_std=0.02),
-        use_global_stats=False)
-
-    return fc_layer(
-        input=hidden_bn,
-        name="dis_prob",
-        size=2,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=SoftmaxActivation())
-
-
-def generator(noise):
-    """
-    generator generates a sample given noise
-    """
-    param_attr = ParamAttr(is_static=is_discriminator_training)
-    bias_attr = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
-
-    hidden = fc_layer(
-        input=noise,
-        name="gen_layer_hidden",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=ReluActivation())
-
-    hidden2 = fc_layer(
-        input=hidden,
-        name="gen_hidden2",
-        size=hidden_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-    hidden_bn = batch_norm_layer(
-        hidden2,
-        act=ReluActivation(),
-        name="gen_layer_hidden_bn",
-        bias_attr=bias_attr,
-        param_attr=ParamAttr(
-            is_static=is_discriminator_training,
-            initial_mean=1.0,
-            initial_std=0.02),
-        use_global_stats=False)
-
-    return fc_layer(
-        input=hidden_bn,
-        name="gen_layer1",
-        size=sample_dim,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-
-if is_generator_training:
-    noise = data_layer(name="noise", size=noise_dim)
-    sample = generator(noise)
-
-if is_discriminator_training:
-    sample = data_layer(name="sample", size=sample_dim)
-
-if is_generator_training or is_discriminator_training:
-    label = data_layer(name="label", size=1)
-    prob = discriminator(sample)
-    cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(
-        input=prob, label=label, name=mode + '_error')
-    outputs(cost)
-
-if is_generator:
-    noise = data_layer(name="noise", size=noise_dim)
-    outputs(generator(noise))
diff --git a/v1_api_demo/gan/gan_conf_image.py b/v1_api_demo/gan/gan_conf_image.py
deleted file mode 100644
index c469227994..0000000000
--- a/v1_api_demo/gan/gan_conf_image.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-
-mode = get_config_arg("mode", str, "generator")
-dataSource = get_config_arg("data", str, "mnist")
-assert mode in set([
-    "generator", "discriminator", "generator_training", "discriminator_training"
-])
-
-is_generator_training = mode == "generator_training"
-is_discriminator_training = mode == "discriminator_training"
-is_generator = mode == "generator"
-is_discriminator = mode == "discriminator"
-
-# The network structure below follows the dcgan paper 
-# (https://arxiv.org/abs/1511.06434)
-
-print('mode=%s' % mode)
-# the dim of the noise (z) as the input of the generator network
-noise_dim = 100
-# the number of filters in the layer in generator/discriminator that is 
-# closet to the image
-gf_dim = 64
-df_dim = 64
-if dataSource == "mnist":
-    sample_dim = 28  # image dim
-    c_dim = 1  # image color
-else:
-    sample_dim = 32
-    c_dim = 3
-s2, s4 = int(sample_dim / 2), int(sample_dim / 4),
-s8, s16 = int(sample_dim / 8), int(sample_dim / 16)
-
-settings(
-    batch_size=128,
-    learning_rate=2e-4,
-    learning_method=AdamOptimizer(beta1=0.5))
-
-
-def conv_bn(input,
-            channels,
-            imgSize,
-            num_filters,
-            output_x,
-            stride,
-            name,
-            param_attr,
-            bias_attr,
-            param_attr_bn,
-            bn,
-            trans=False,
-            act=ReluActivation()):
-    """
-    conv_bn is a utility function that constructs a convolution/deconv layer 
-    with an optional batch_norm layer
-
-    :param bn: whether to use batch_norm_layer
-    :type bn: bool
-    :param trans: whether to use conv (False) or deconv (True)
-    :type trans: bool
-    """
-
-    # calculate the filter_size and padding size based on the given
-    # imgSize and ouput size
-    tmp = imgSize - (output_x - 1) * stride
-    if tmp <= 1 or tmp > 5:
-        raise ValueError("conv input-output dimension does not fit")
-    elif tmp <= 3:
-        filter_size = tmp + 2
-        padding = 1
-    else:
-        filter_size = tmp
-        padding = 0
-
-    print(imgSize, output_x, stride, filter_size, padding)
-
-    if trans:
-        nameApx = "_convt"
-    else:
-        nameApx = "_conv"
-
-    if bn:
-        conv = img_conv_layer(
-            input,
-            filter_size=filter_size,
-            num_filters=num_filters,
-            name=name + nameApx,
-            num_channels=channels,
-            act=LinearActivation(),
-            groups=1,
-            stride=stride,
-            padding=padding,
-            bias_attr=bias_attr,
-            param_attr=param_attr,
-            shared_biases=True,
-            layer_attr=None,
-            filter_size_y=None,
-            stride_y=None,
-            padding_y=None,
-            trans=trans)
-
-        conv_bn = batch_norm_layer(
-            conv,
-            act=act,
-            name=name + nameApx + "_bn",
-            bias_attr=bias_attr,
-            param_attr=param_attr_bn,
-            use_global_stats=False)
-
-        return conv_bn
-    else:
-        conv = img_conv_layer(
-            input,
-            filter_size=filter_size,
-            num_filters=num_filters,
-            name=name + nameApx,
-            num_channels=channels,
-            act=act,
-            groups=1,
-            stride=stride,
-            padding=padding,
-            bias_attr=bias_attr,
-            param_attr=param_attr,
-            shared_biases=True,
-            layer_attr=None,
-            filter_size_y=None,
-            stride_y=None,
-            padding_y=None,
-            trans=trans)
-        return conv
-
-
-def generator(noise):
-    """
-    generator generates a sample given noise
-    """
-    param_attr = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.02)
-    bias_attr = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.0)
-
-    param_attr_bn = ParamAttr(
-        is_static=is_discriminator_training, initial_mean=1.0, initial_std=0.02)
-
-    h1 = fc_layer(
-        input=noise,
-        name="gen_layer_h1",
-        size=s8 * s8 * gf_dim * 4,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=LinearActivation())
-
-    h1_bn = batch_norm_layer(
-        h1,
-        act=ReluActivation(),
-        name="gen_layer_h1_bn",
-        bias_attr=bias_attr,
-        param_attr=param_attr_bn,
-        use_global_stats=False)
-
-    h2_bn = conv_bn(
-        h1_bn,
-        channels=gf_dim * 4,
-        output_x=s8,
-        num_filters=gf_dim * 2,
-        imgSize=s4,
-        stride=2,
-        name="gen_layer_h2",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True,
-        trans=True)
-
-    h3_bn = conv_bn(
-        h2_bn,
-        channels=gf_dim * 2,
-        output_x=s4,
-        num_filters=gf_dim,
-        imgSize=s2,
-        stride=2,
-        name="gen_layer_h3",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True,
-        trans=True)
-
-    return conv_bn(
-        h3_bn,
-        channels=gf_dim,
-        output_x=s2,
-        num_filters=c_dim,
-        imgSize=sample_dim,
-        stride=2,
-        name="gen_layer_h4",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=False,
-        trans=True,
-        act=TanhActivation())
-
-
-def discriminator(sample):
-    """
-    discriminator ouputs the probablity of a sample is from generator
-    or real data.
-    The output has two dimenstional: dimension 0 is the probablity
-    of the sample is from generator and dimension 1 is the probabblity
-    of the sample is from real data.
-    """
-    param_attr = ParamAttr(
-        is_static=is_generator_training, initial_mean=0.0, initial_std=0.02)
-    bias_attr = ParamAttr(
-        is_static=is_generator_training, initial_mean=0.0, initial_std=0.0)
-
-    param_attr_bn = ParamAttr(
-        is_static=is_generator_training, initial_mean=1.0, initial_std=0.02)
-
-    h0 = conv_bn(
-        sample,
-        channels=c_dim,
-        imgSize=sample_dim,
-        num_filters=df_dim,
-        output_x=s2,
-        stride=2,
-        name="dis_h0",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=False)
-
-    h1_bn = conv_bn(
-        h0,
-        channels=df_dim,
-        imgSize=s2,
-        num_filters=df_dim * 2,
-        output_x=s4,
-        stride=2,
-        name="dis_h1",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True)
-
-    h2_bn = conv_bn(
-        h1_bn,
-        channels=df_dim * 2,
-        imgSize=s4,
-        num_filters=df_dim * 4,
-        output_x=s8,
-        stride=2,
-        name="dis_h2",
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        param_attr_bn=param_attr_bn,
-        bn=True)
-
-    return fc_layer(
-        input=h2_bn,
-        name="dis_prob",
-        size=2,
-        bias_attr=bias_attr,
-        param_attr=param_attr,
-        act=SoftmaxActivation())
-
-
-if is_generator_training:
-    noise = data_layer(name="noise", size=noise_dim)
-    sample = generator(noise)
-
-if is_discriminator_training:
-    sample = data_layer(name="sample", size=sample_dim * sample_dim * c_dim)
-
-if is_generator_training or is_discriminator_training:
-    label = data_layer(name="label", size=1)
-    prob = discriminator(sample)
-    cost = cross_entropy(input=prob, label=label)
-    classification_error_evaluator(
-        input=prob, label=label, name=mode + '_error')
-    outputs(cost)
-
-if is_generator:
-    noise = data_layer(name="noise", size=noise_dim)
-    outputs(generator(noise))
diff --git a/v1_api_demo/gan/gan_trainer.py b/v1_api_demo/gan/gan_trainer.py
deleted file mode 100644
index 4a26c230f7..0000000000
--- a/v1_api_demo/gan/gan_trainer.py
+++ /dev/null
@@ -1,349 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import random
-import numpy
-import cPickle
-import sys, os
-from PIL import Image
-
-from paddle.trainer.config_parser import parse_config
-from paddle.trainer.config_parser import logger
-import py_paddle.swig_paddle as api
-import matplotlib.pyplot as plt
-
-
-def plot2DScatter(data, outputfile):
-    '''
-    Plot the data as a 2D scatter plot and save to outputfile
-    data needs to be two dimensinoal
-    '''
-    x = data[:, 0]
-    y = data[:, 1]
-    logger.info("The mean vector is %s" % numpy.mean(data, 0))
-    logger.info("The std vector is %s" % numpy.std(data, 0))
-
-    heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50)
-    extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
-
-    plt.clf()
-    plt.scatter(x, y)
-    plt.savefig(outputfile, bbox_inches='tight')
-
-
-def CHECK_EQ(a, b):
-    assert a == b, "a=%s, b=%s" % (a, b)
-
-
-def copy_shared_parameters(src, dst):
-    '''
-    copy the parameters from src to dst
-    :param src: the source of the parameters
-    :type src: GradientMachine
-    :param dst: the destination of the parameters
-    :type dst: GradientMachine
-    '''
-    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
-    src_params = dict([(p.getName(), p) for p in src_params])
-
-    for i in xrange(dst.getParameterSize()):
-        dst_param = dst.getParameter(i)
-        src_param = src_params.get(dst_param.getName(), None)
-        if src_param is None:
-            continue
-        src_value = src_param.getBuf(api.PARAMETER_VALUE)
-        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
-        CHECK_EQ(len(src_value), len(dst_value))
-        dst_value.copyFrom(src_value)
-        dst_param.setValueUpdated()
-
-
-def print_parameters(src):
-    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
-
-    print "***************"
-    for p in src_params:
-        print "Name is %s" % p.getName()
-        print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray(
-        )
-
-
-def load_mnist_data(imageFile):
-    f = open(imageFile, "rb")
-    f.read(16)
-
-    # Define number of samples for train/test
-    if "train" in imageFile:
-        n = 60000
-    else:
-        n = 10000
-
-    data = numpy.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
-    data = data / 255.0 * 2.0 - 1.0
-
-    f.close()
-    return data.astype('float32')
-
-
-def load_cifar_data(cifar_path):
-    batch_size = 10000
-    data = numpy.zeros((5 * batch_size, 32 * 32 * 3), dtype="float32")
-    for i in range(1, 6):
-        file = cifar_path + "/data_batch_" + str(i)
-        fo = open(file, 'rb')
-        dict = cPickle.load(fo)
-        fo.close()
-        data[(i - 1) * batch_size:(i * batch_size), :] = dict["data"]
-
-    data = data / 255.0 * 2.0 - 1.0
-    return data
-
-
-# synthesize 2-D uniform data
-def load_uniform_data():
-    data = numpy.random.rand(1000000, 2).astype('float32')
-    return data
-
-
-def merge(images, size):
-    if images.shape[1] == 28 * 28:
-        h, w, c = 28, 28, 1
-    else:
-        h, w, c = 32, 32, 3
-    img = numpy.zeros((h * size[0], w * size[1], c))
-    for idx in xrange(size[0] * size[1]):
-        i = idx % size[1]
-        j = idx // size[1]
-        img[j*h:j*h+h, i*w:i*w+w, :] = \
-          ((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
-    return img.astype('uint8')
-
-
-def save_images(images, path):
-    merged_img = merge(images, [8, 8])
-    if merged_img.shape[2] == 1:
-        im = Image.fromarray(numpy.squeeze(merged_img)).convert('RGB')
-    else:
-        im = Image.fromarray(merged_img, mode="RGB")
-    im.save(path)
-
-
-def get_real_samples(batch_size, data_np):
-    return data_np[numpy.random.choice(
-        data_np.shape[0], batch_size, replace=False), :]
-
-
-def get_noise(batch_size, noise_dim):
-    return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
-
-
-def get_fake_samples(generator_machine, batch_size, noise):
-    gen_inputs = api.Arguments.createArguments(1)
-    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
-    gen_outputs = api.Arguments.createArguments(0)
-    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
-    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
-    return fake_samples
-
-
-def get_training_loss(training_machine, inputs):
-    outputs = api.Arguments.createArguments(0)
-    training_machine.forward(inputs, outputs, api.PASS_TEST)
-    loss = outputs.getSlotValue(0).copyToNumpyMat()
-    return numpy.mean(loss)
-
-
-def prepare_discriminator_data_batch_pos(batch_size, data_np):
-    real_samples = get_real_samples(batch_size, data_np)
-    labels = numpy.ones(batch_size, dtype='int32')
-    inputs = api.Arguments.createArguments(2)
-    inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(real_samples))
-    inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
-    return inputs
-
-
-def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
-    fake_samples = get_fake_samples(generator_machine, batch_size, noise)
-    labels = numpy.zeros(batch_size, dtype='int32')
-    inputs = api.Arguments.createArguments(2)
-    inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(fake_samples))
-    inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
-    return inputs
-
-
-def prepare_generator_data_batch(batch_size, noise):
-    label = numpy.ones(batch_size, dtype='int32')
-    inputs = api.Arguments.createArguments(2)
-    inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
-    inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(label))
-    return inputs
-
-
-def find(iterable, cond):
-    for item in iterable:
-        if cond(item):
-            return item
-    return None
-
-
-def get_layer_size(model_conf, layer_name):
-    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
-    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
-    return layer_conf.size
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
-    parser.add_argument(
-        "--use_gpu", default="1", help="1 means use gpu for training")
-    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
-    args = parser.parse_args()
-    data_source = args.data_source
-    use_gpu = args.use_gpu
-    assert data_source in ["mnist", "cifar", "uniform"]
-    assert use_gpu in ["0", "1"]
-
-    if not os.path.exists("./%s_samples/" % data_source):
-        os.makedirs("./%s_samples/" % data_source)
-
-    if not os.path.exists("./%s_params/" % data_source):
-        os.makedirs("./%s_params/" % data_source)
-
-    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
-                   '--log_period=100', '--gpu_id=' + args.gpu_id,
-                   '--save_dir=' + "./%s_params/" % data_source)
-
-    if data_source == "uniform":
-        conf = "gan_conf.py"
-        num_iter = 10000
-    else:
-        conf = "gan_conf_image.py"
-        num_iter = 1000
-
-    gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
-    dis_conf = parse_config(conf,
-                            "mode=discriminator_training,data=" + data_source)
-    generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
-    batch_size = dis_conf.opt_config.batch_size
-    noise_dim = get_layer_size(gen_conf.model_config, "noise")
-
-    if data_source == "mnist":
-        data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
-    elif data_source == "cifar":
-        data_np = load_cifar_data("./data/cifar-10-batches-py/")
-    else:
-        data_np = load_uniform_data()
-
-    # this creates a gradient machine for discriminator
-    dis_training_machine = api.GradientMachine.createFromConfigProto(
-        dis_conf.model_config)
-    # this create a gradient machine for generator    
-    gen_training_machine = api.GradientMachine.createFromConfigProto(
-        gen_conf.model_config)
-
-    # generator_machine is used to generate data only, which is used for
-    # training discriminator
-    logger.info(str(generator_conf.model_config))
-    generator_machine = api.GradientMachine.createFromConfigProto(
-        generator_conf.model_config)
-
-    dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
-
-    gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
-
-    dis_trainer.startTrain()
-    gen_trainer.startTrain()
-
-    # Sync parameters between networks (GradientMachine) at the beginning
-    copy_shared_parameters(gen_training_machine, dis_training_machine)
-    copy_shared_parameters(gen_training_machine, generator_machine)
-
-    # constrain that either discriminator or generator can not be trained
-    # consecutively more than MAX_strike times
-    curr_train = "dis"
-    curr_strike = 0
-    MAX_strike = 5
-
-    for train_pass in xrange(100):
-        dis_trainer.startTrainPass()
-        gen_trainer.startTrainPass()
-        for i in xrange(num_iter):
-            # Do forward pass in discriminator to get the dis_loss
-            noise = get_noise(batch_size, noise_dim)
-            data_batch_dis_pos = prepare_discriminator_data_batch_pos(
-                batch_size, data_np)
-            dis_loss_pos = get_training_loss(dis_training_machine,
-                                             data_batch_dis_pos)
-
-            data_batch_dis_neg = prepare_discriminator_data_batch_neg(
-                generator_machine, batch_size, noise)
-            dis_loss_neg = get_training_loss(dis_training_machine,
-                                             data_batch_dis_neg)
-
-            dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
-
-            # Do forward pass in generator to get the gen_loss
-            data_batch_gen = prepare_generator_data_batch(batch_size, noise)
-            gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
-
-            if i % 100 == 0:
-                print "d_pos_loss is %s     d_neg_loss is %s" % (dis_loss_pos,
-                                                                 dis_loss_neg)
-                print "d_loss is %s    g_loss is %s" % (dis_loss, gen_loss)
-
-            # Decide which network to train based on the training history
-            # And the relative size of the loss        
-            if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
-               ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss):
-                if curr_train == "dis":
-                    curr_strike += 1
-                else:
-                    curr_train = "dis"
-                    curr_strike = 1
-                dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
-                dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
-                copy_shared_parameters(dis_training_machine,
-                                       gen_training_machine)
-
-            else:
-                if curr_train == "gen":
-                    curr_strike += 1
-                else:
-                    curr_train = "gen"
-                    curr_strike = 1
-                gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
-                # TODO: add API for paddle to allow true parameter sharing between different GradientMachines 
-                # so that we do not need to copy shared parameters. 
-                copy_shared_parameters(gen_training_machine,
-                                       dis_training_machine)
-                copy_shared_parameters(gen_training_machine, generator_machine)
-
-        dis_trainer.finishTrainPass()
-        gen_trainer.finishTrainPass()
-        # At the end of each pass, save the generated samples/images
-        fake_samples = get_fake_samples(generator_machine, batch_size, noise)
-        if data_source == "uniform":
-            plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" %
-                          (data_source, train_pass))
-        else:
-            save_images(fake_samples, "./%s_samples/train_pass%s.png" %
-                        (data_source, train_pass))
-    dis_trainer.finishTrain()
-    gen_trainer.finishTrain()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/mnist/.gitignore b/v1_api_demo/mnist/.gitignore
deleted file mode 100644
index 7e61d5e3a0..0000000000
--- a/v1_api_demo/mnist/.gitignore
+++ /dev/null
@@ -1,10 +0,0 @@
-data/raw_data
-data/*.list
-mnist_vgg_model
-plot.png
-train.log
-*pyc
-.ipynb_checkpoints
-params.pkl
-params.tar
-params.tar.gz
diff --git a/v1_api_demo/mnist/api_train.py b/v1_api_demo/mnist/api_train.py
deleted file mode 100644
index ea1caa7dd9..0000000000
--- a/v1_api_demo/mnist/api_train.py
+++ /dev/null
@@ -1,196 +0,0 @@
-"""
-A very basic example for how to use current Raw SWIG API to train mnist network.
-
-Current implementation uses Raw SWIG, which means the API call is directly \
-passed to C++ side of Paddle.
-
-The user api could be simpler and carefully designed.
-"""
-import random
-
-import numpy as np
-import paddle.v2 as paddle_v2
-import py_paddle.swig_paddle as api
-from paddle.trainer_config_helpers import *
-from py_paddle import DataProviderConverter
-
-from mnist_util import read_from_mnist
-
-
-def init_parameter(network):
-    assert isinstance(network, api.GradientMachine)
-    for each_param in network.getParameters():
-        assert isinstance(each_param, api.Parameter)
-        array_size = len(each_param)
-        array = np.random.uniform(-1.0, 1.0, array_size).astype('float32')
-        each_param.getBuf(api.PARAMETER_VALUE).copyFromNumpyArray(array)
-
-
-def generator_to_batch(generator, batch_size):
-    ret_val = list()
-    for each_item in generator:
-        ret_val.append(each_item)
-        if len(ret_val) == batch_size:
-            yield ret_val
-            ret_val = list()
-    if len(ret_val) != 0:
-        yield ret_val
-
-
-class BatchPool(object):
-    def __init__(self, generator, batch_size):
-        self.data = list(generator)
-        self.batch_size = batch_size
-
-    def __call__(self):
-        random.shuffle(self.data)
-        for offset in xrange(0, len(self.data), self.batch_size):
-            limit = min(offset + self.batch_size, len(self.data))
-            yield self.data[offset:limit]
-
-
-def input_order_converter(generator):
-    for each_item in generator:
-        yield each_item['pixel'], each_item['label']
-
-
-def main():
-    api.initPaddle("-use_gpu=false", "-trainer_count=4")  # use 4 cpu cores
-
-    optimizer = paddle_v2.optimizer.Adam(
-        learning_rate=1e-4,
-        batch_size=1000,
-        model_average=ModelAverage(average_window=0.5),
-        regularization=L2Regularization(rate=0.5))
-
-    # Create Local Updater. Local means not run in cluster.
-    # For a cluster training, here we can change to createRemoteUpdater
-    # in future.
-    updater = optimizer.create_local_updater()
-    assert isinstance(updater, api.ParameterUpdater)
-
-    # define network
-    images = paddle_v2.layer.data(
-        name='pixel', type=paddle_v2.data_type.dense_vector(784))
-    label = paddle_v2.layer.data(
-        name='label', type=paddle_v2.data_type.integer_value(10))
-    hidden1 = paddle_v2.layer.fc(input=images, size=200)
-    hidden2 = paddle_v2.layer.fc(input=hidden1, size=200)
-    inference = paddle_v2.layer.fc(input=hidden2,
-                                   size=10,
-                                   act=paddle_v2.activation.Softmax())
-    cost = paddle_v2.layer.classification_cost(input=inference, label=label)
-
-    # Create Simple Gradient Machine.
-    model_config = paddle_v2.layer.parse_network(cost)
-    m = api.GradientMachine.createFromConfigProto(model_config,
-                                                  api.CREATE_MODE_NORMAL,
-                                                  optimizer.enable_types())
-
-    # This type check is not useful. Only enable type hint in IDE.
-    # Such as PyCharm
-    assert isinstance(m, api.GradientMachine)
-
-    # Initialize Parameter by numpy.
-    init_parameter(network=m)
-
-    # Initialize ParameterUpdater.
-    updater.init(m)
-
-    # DataProvider Converter is a utility convert Python Object to Paddle C++
-    # Input. The input format is as same as Paddle's DataProvider.
-    converter = DataProviderConverter(input_types=[images.type, label.type])
-
-    train_file = './data/raw_data/train'
-    test_file = './data/raw_data/t10k'
-
-    # start gradient machine.
-    # the gradient machine must be started before invoke forward/backward.
-    # not just for training, but also for inference.
-    m.start()
-
-    # evaluator can print error rate, etc. It is a C++ class.
-    batch_evaluator = m.makeEvaluator()
-    test_evaluator = m.makeEvaluator()
-
-    # Get Train Data.
-    # TrainData will stored in a data pool. Currently implementation is not care
-    # about memory, speed. Just a very naive implementation.
-    train_data_generator = input_order_converter(read_from_mnist(train_file))
-    train_data = BatchPool(train_data_generator, 512)
-
-    # outArgs is Neural Network forward result. Here is not useful, just passed
-    # to gradient_machine.forward
-    outArgs = api.Arguments.createArguments(0)
-
-    for pass_id in xrange(2):  # we train 2 passes.
-        updater.startPass()
-
-        for batch_id, data_batch in enumerate(train_data()):
-            # data_batch is input images.
-            # here, for online learning, we could get data_batch from network.
-
-            # Start update one batch.
-            pass_type = updater.startBatch(len(data_batch))
-
-            # Start BatchEvaluator.
-            # batch_evaluator can be used between start/finish.
-            batch_evaluator.start()
-
-            # forwardBackward is a shortcut for forward and backward.
-            # It is sometimes faster than invoke forward/backward separately,
-            # because in GradientMachine, it may be async.
-            m.forwardBackward(converter(data_batch), outArgs, pass_type)
-
-            for each_param in m.getParameters():
-                updater.update(each_param)
-
-            # Get cost. We use numpy to calculate total cost for this batch.
-            cost_vec = outArgs.getSlotValue(0)
-            cost_vec = cost_vec.copyToNumpyMat()
-            cost = cost_vec.sum() / len(data_batch)
-
-            # Make evaluator works.
-            m.eval(batch_evaluator)
-
-            # Print logs.
-            print 'Pass id', pass_id, 'Batch id', batch_id, 'with cost=', \
-                cost, batch_evaluator
-
-            batch_evaluator.finish()
-            # Finish batch.
-            #  * will clear gradient.
-            #  * ensure all values should be updated.
-            updater.finishBatch(cost)
-
-        # testing stage. use test data set to test current network.
-        updater.apply()
-        test_evaluator.start()
-        test_data_generator = input_order_converter(read_from_mnist(test_file))
-        for data_batch in generator_to_batch(test_data_generator, 512):
-            # in testing stage, only forward is needed.
-            m.forward(converter(data_batch), outArgs, api.PASS_TEST)
-            m.eval(test_evaluator)
-
-        # print error rate for test data set
-        print 'Pass', pass_id, ' test evaluator: ', test_evaluator
-        test_evaluator.finish()
-        updater.restore()
-
-        updater.catchUpWith()
-        params = m.getParameters()
-        for each_param in params:
-            assert isinstance(each_param, api.Parameter)
-            value = each_param.getBuf(api.PARAMETER_VALUE)
-            value = value.copyToNumpyArray()
-
-            # Here, we could save parameter to every where you want
-            print each_param.getName(), value
-
-        updater.finishPass()
-
-    m.finish()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/mnist/data/generate_list.py b/v1_api_demo/mnist/data/generate_list.py
deleted file mode 100644
index 49981cc7a9..0000000000
--- a/v1_api_demo/mnist/data/generate_list.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-o = open("./" + "train.list", "w")
-o.write("./data/raw_data/train" + "\n")
-o.close()
-
-o = open("./" + "test.list", "w")
-o.write("./data/raw_data/t10k" + "\n")
-o.close()
diff --git a/v1_api_demo/mnist/data/get_mnist_data.sh b/v1_api_demo/mnist/data/get_mnist_data.sh
deleted file mode 100755
index 5a2e34026d..0000000000
--- a/v1_api_demo/mnist/data/get_mnist_data.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env sh
-# This scripts downloads the mnist data and unzips it.
-set -e
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-rm -rf "$DIR/raw_data"
-mkdir "$DIR/raw_data"
-cd "$DIR/raw_data"
-
-echo "Downloading..."
-
-for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
-do
-    if [ ! -e $fname ]; then
-        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
-        gunzip ${fname}.gz
-    fi
-done
-
-cd $DIR
-rm -f *.list
-python generate_list.py
diff --git a/v1_api_demo/mnist/light_mnist.py b/v1_api_demo/mnist/light_mnist.py
deleted file mode 100644
index 3340905435..0000000000
--- a/v1_api_demo/mnist/light_mnist.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-is_predict = get_config_arg("is_predict", bool, False)
-
-####################Data Configuration ##################
-
-if not is_predict:
-    data_dir = './data/'
-    define_py_data_sources2(
-        train_list=data_dir + 'train.list',
-        test_list=data_dir + 'test.list',
-        module='mnist_provider',
-        obj='process')
-
-######################Algorithm Configuration #############
-settings(batch_size=50, learning_rate=0.001, learning_method=AdamOptimizer())
-
-#######################Network Configuration #############
-
-data_size = 1 * 28 * 28
-label_size = 10
-img = data_layer(name='pixel', size=data_size)
-
-
-# light cnn
-# A shallower cnn model: [CNN, BN, ReLU, Max-Pooling] x4 + FC x1
-# Easier to train for mnist dataset and quite efficient
-# Final performance is close to deeper ones on tasks such as digital and character classification 
-def light_cnn(input_image, num_channels, num_classes):
-    def __light__(ipt,
-                  num_filter=128,
-                  times=1,
-                  conv_filter_size=3,
-                  dropouts=0,
-                  num_channels_=None):
-        return img_conv_group(
-            input=ipt,
-            num_channels=num_channels_,
-            pool_size=2,
-            pool_stride=2,
-            conv_padding=0,
-            conv_num_filter=[num_filter] * times,
-            conv_filter_size=conv_filter_size,
-            conv_act=ReluActivation(),
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type=MaxPooling())
-
-    tmp = __light__(input_image, num_filter=128, num_channels_=num_channels)
-    tmp = __light__(tmp, num_filter=128)
-    tmp = __light__(tmp, num_filter=128)
-    tmp = __light__(tmp, num_filter=128, conv_filter_size=1)
-
-    tmp = fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
-    return tmp
-
-
-predict = light_cnn(input_image=img, num_channels=1, num_classes=label_size)
-
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    inputs(img, lbl)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
diff --git a/v1_api_demo/mnist/mnist_provider.py b/v1_api_demo/mnist/mnist_provider.py
deleted file mode 100644
index 888cfef1e7..0000000000
--- a/v1_api_demo/mnist/mnist_provider.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from paddle.trainer.PyDataProvider2 import *
-from mnist_util import read_from_mnist
-
-
-# Define a py data provider
-@provider(
-    input_types={'pixel': dense_vector(28 * 28),
-                 'label': integer_value(10)},
-    cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, filename):  # settings is not used currently.
-    for each in read_from_mnist(filename):
-        yield each
diff --git a/v1_api_demo/mnist/mnist_util.py b/v1_api_demo/mnist/mnist_util.py
deleted file mode 100644
index 3fd88ae7ed..0000000000
--- a/v1_api_demo/mnist/mnist_util.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import numpy
-
-__all__ = ['read_from_mnist']
-
-
-def read_from_mnist(filename):
-    imgf = filename + "-images-idx3-ubyte"
-    labelf = filename + "-labels-idx1-ubyte"
-    f = open(imgf, "rb")
-    l = open(labelf, "rb")
-
-    f.read(16)
-    l.read(8)
-
-    # Define number of samples for train/test
-    if "train" in filename:
-        n = 60000
-    else:
-        n = 10000
-
-    images = numpy.fromfile(
-        f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
-    images = images / 255.0 * 2.0 - 1.0
-    labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
-
-    for i in xrange(n):
-        yield {"pixel": images[i, :], 'label': labels[i]}
-
-    f.close()
-    l.close()
diff --git a/v1_api_demo/mnist/train.sh b/v1_api_demo/mnist/train.sh
deleted file mode 100755
index ca2b1ad9eb..0000000000
--- a/v1_api_demo/mnist/train.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-config=vgg_16_mnist.py
-output=./mnist_vgg_model
-log=train.log
-
-paddle train \
---config=$config \
---dot_period=10 \
---log_period=100 \
---test_all_data_in_one_period=1 \
---use_gpu=0 \
---trainer_count=1 \
---num_passes=100 \
---save_dir=$output \
-2>&1 | tee $log
-paddle usage -l $log -e $? -n "mnist_train" >/dev/null 2>&1
-
-python -m paddle.utils.plotcurve -i $log > plot.png
diff --git a/v1_api_demo/mnist/vgg_16_mnist.py b/v1_api_demo/mnist/vgg_16_mnist.py
deleted file mode 100644
index a819b391c6..0000000000
--- a/v1_api_demo/mnist/vgg_16_mnist.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-is_predict = get_config_arg("is_predict", bool, False)
-
-####################Data Configuration ##################
-
-if not is_predict:
-    data_dir = './data/'
-    define_py_data_sources2(
-        train_list=data_dir + 'train.list',
-        test_list=data_dir + 'test.list',
-        module='mnist_provider',
-        obj='process')
-
-######################Algorithm Configuration #############
-settings(
-    batch_size=128,
-    learning_rate=0.1 / 128.0,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * 128))
-
-#######################Network Configuration #############
-
-data_size = 1 * 28 * 28
-label_size = 10
-img = data_layer(name='pixel', size=data_size)
-
-# small_vgg is predined in trainer_config_helpers.network
-predict = small_vgg(input_image=img, num_channels=1, num_classes=label_size)
-
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    inputs(img, lbl)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
diff --git a/v1_api_demo/model_zoo/embedding/.gitignore b/v1_api_demo/model_zoo/embedding/.gitignore
deleted file mode 100644
index 908f5a3fb2..0000000000
--- a/v1_api_demo/model_zoo/embedding/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-baidu.dict
-model_*.emb
diff --git a/v1_api_demo/model_zoo/embedding/extract_para.py b/v1_api_demo/model_zoo/embedding/extract_para.py
deleted file mode 100755
index 570b90c1f7..0000000000
--- a/v1_api_demo/model_zoo/embedding/extract_para.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python extract_para.py --preModel PREMODEL --preDict PREDICT \
-                            --usrModel USRMODEL --usrDict USRDICT -d DIM
-
-Options:
-    -h, --help          show this help message and exit
-    --preModel PREMODEL the name of pretrained embedding model
-    --preDict PREDICT   the name of pretrained dictionary
-    --usrModel usrModel the name of output usr embedding model
-    --usrDict usrDict   the name of user specified dictionary
-    -d DIM              dimension of parameter
-"""
-from optparse import OptionParser
-import struct
-
-
-def get_row_index(preDict, usrDict):
-    """
-    Get the row positions for all words in user dictionary from pre-trained dictionary.
-    return: a list of row positions
-    Example: preDict='a\nb\nc\n', usrDict='a\nc\n', then return [0,2]
-    """
-    pos = []
-    index = dict()
-    with open(preDict, "r") as f:
-        for line_index, line in enumerate(f):
-            word = line.strip().split()[0]
-            index[word] = line_index
-    with open(usrDict, "r") as f:
-        for line in f:
-            word = line.strip().split()[0]
-            pos.append(index[word])
-    return pos
-
-
-def extract_parameters_by_usrDict(preModel, preDict, usrModel, usrDict,
-                                  paraDim):
-    """
-    Extract desired parameters from a pretrained embedding model based on user dictionary
-    """
-    if paraDim not in [32, 64, 128, 256]:
-        raise RuntimeError("We only support 32, 64, 128, 256 dimensions now")
-
-    fi = open(preModel, "rb")
-    fo = open(usrModel, "wb")
-
-    # write filehead
-    rowIndex = get_row_index(preDict, usrDict)
-    newHead = struct.pack("iil", 0, 4, len(rowIndex) * paraDim)
-    fo.write(newHead)
-    bytes = 4 * paraDim
-    for i in range(0, len(rowIndex)):
-        # find the absolute position of input file
-        fi.seek(rowIndex[i] * bytes + 16, 0)
-        fo.write(fi.read(bytes))
-
-    print "extract parameters finish, total", len(rowIndex), "lines"
-    fi.close()
-
-
-def main():
-    """
-    Main entry for running paraconvert.py 
-    """
-    usage = "usage: \n" \
-            "python %prog --preModel PREMODEL --preDict PREDICT" \
-            " --usrModel USRMODEL --usrDict USRDICT -d DIM"
-    parser = OptionParser(usage)
-    parser.add_option(
-        "--preModel",
-        action="store",
-        dest="preModel",
-        help="the name of pretrained embedding model")
-    parser.add_option(
-        "--preDict",
-        action="store",
-        dest="preDict",
-        help="the name of pretrained dictionary")
-    parser.add_option(
-        "--usrModel",
-        action="store",
-        dest="usrModel",
-        help="the name of output usr embedding model")
-    parser.add_option(
-        "--usrDict",
-        action="store",
-        dest="usrDict",
-        help="the name of user specified dictionary")
-    parser.add_option(
-        "-d", action="store", dest="dim", help="dimension of parameter")
-    (options, args) = parser.parse_args()
-    extract_parameters_by_usrDict(options.preModel, options.preDict,
-                                  options.usrModel, options.usrDict,
-                                  int(options.dim))
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/model_zoo/embedding/paraconvert.py b/v1_api_demo/model_zoo/embedding/paraconvert.py
deleted file mode 100755
index ce7a70efc4..0000000000
--- a/v1_api_demo/model_zoo/embedding/paraconvert.py
+++ /dev/null
@@ -1,159 +0,0 @@
-#!/bin/env python
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Example:
-    python paraconvert.py --b2t -i INPUT -o OUTPUT -d DIM
-    python paraconvert.py --t2b -i INPUT -o OUTPUT
-
-Options:
-    -h, --help  show this help message and exit
-    --b2t       convert parameter file of embedding model from binary to text
-    --t2b       convert parameter file of embedding model from text to binary
-    -i INPUT    input parameter file name
-    -o OUTPUT   output parameter file name
-    -d DIM      dimension of parameter
-"""
-from optparse import OptionParser
-import struct
-
-
-def binary2text(input, output, paraDim):
-    """
-    Convert a binary parameter file of embedding model to be a text file.  
-    input: the name of input binary parameter file, the format is:
-           1) the first 16 bytes is filehead:
-                version(4 bytes): version of paddle, default = 0
-                floatSize(4 bytes): sizeof(float) = 4
-                paraCount(8 bytes): total number of parameter
-           2) the next (paraCount * 4) bytes is parameters, each has 4 bytes 
-    output: the name of output text parameter file, for example:
-           0,4,32156096
-           -0.7845433,1.1937413,-0.1704215,...
-           0.0000909,0.0009465,-0.0008813,...
-           ...
-           the format is:
-           1) the first line is filehead: 
-              version=0, floatSize=4, paraCount=32156096
-           2) other lines print the paramters
-              a) each line prints paraDim paramters splitted by ','
-              b) there is paraCount/paraDim lines (embedding words)
-    paraDim: dimension of parameters 
-    """
-    fi = open(input, "rb")
-    fo = open(output, "w")
-    """
-    """
-    version, floatSize, paraCount = struct.unpack("iil", fi.read(16))
-    newHead = ','.join([str(version), str(floatSize), str(paraCount)])
-    print >> fo, newHead
-
-    bytes = 4 * int(paraDim)
-    format = "%df" % int(paraDim)
-    context = fi.read(bytes)
-    line = 0
-
-    while context:
-        numbers = struct.unpack(format, context)
-        lst = []
-        for i in numbers:
-            lst.append('%8.7f' % i)
-        print >> fo, ','.join(lst)
-        context = fi.read(bytes)
-        line += 1
-    fi.close()
-    fo.close()
-    print "binary2text finish, total", line, "lines"
-
-
-def get_para_count(input):
-    """
-    Compute the total number of embedding parameters in input text file. 
-    input: the name of input text file
-    """
-    numRows = 1
-    paraDim = 0
-    with open(input) as f:
-        line = f.readline()
-        paraDim = len(line.split(","))
-        for line in f:
-            numRows += 1
-    return numRows * paraDim
-
-
-def text2binary(input, output, paddle_head=True):
-    """
-    Convert a text parameter file of embedding model to be a binary file.
-    input: the name of input text parameter file, for example:
-           -0.7845433,1.1937413,-0.1704215,...
-           0.0000909,0.0009465,-0.0008813,... 
-           ...
-           the format is:
-           1) it doesn't have filehead
-           2) each line stores the same dimension of parameters, 
-              the separator is commas ','
-    output: the name of output binary parameter file, the format is:
-           1) the first 16 bytes is filehead: 
-             version(4 bytes), floatSize(4 bytes), paraCount(8 bytes)
-           2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
-    """
-    fi = open(input, "r")
-    fo = open(output, "wb")
-
-    newHead = struct.pack("iil", 0, 4, get_para_count(input))
-    fo.write(newHead)
-
-    count = 0
-    for line in fi:
-        line = line.strip().split(",")
-        for i in range(0, len(line)):
-            binary_data = struct.pack("f", float(line[i]))
-            fo.write(binary_data)
-        count += 1
-    fi.close()
-    fo.close()
-    print "text2binary finish, total", count, "lines"
-
-
-def main():
-    """
-    Main entry for running paraconvert.py 
-    """
-    usage = "usage: \n" \
-            "python %prog --b2t -i INPUT -o OUTPUT -d DIM \n" \
-            "python %prog --t2b -i INPUT -o OUTPUT"
-    parser = OptionParser(usage)
-    parser.add_option(
-        "--b2t",
-        action="store_true",
-        help="convert parameter file of embedding model from binary to text")
-    parser.add_option(
-        "--t2b",
-        action="store_true",
-        help="convert parameter file of embedding model from text to binary")
-    parser.add_option(
-        "-i", action="store", dest="input", help="input parameter file name")
-    parser.add_option(
-        "-o", action="store", dest="output", help="output parameter file name")
-    parser.add_option(
-        "-d", action="store", dest="dim", help="dimension of parameter")
-    (options, args) = parser.parse_args()
-    if options.b2t:
-        binary2text(options.input, options.output, options.dim)
-    if options.t2b:
-        text2binary(options.input, options.output)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh b/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh
deleted file mode 100755
index f61c65a935..0000000000
--- a/v1_api_demo/model_zoo/embedding/pre_DictAndModel.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-set -x
-BASE_URL='http://paddlepaddle.cdn.bcebos.com/model_zoo/embedding'
-
-DOWNLOAD_ITEMS=(baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb)
-ITEM_MD5=(fa03a12321eaab6c30a8fcc9442eaea3
-          f88c8325ee6da6187f1080e8fe66c1cd
-          927cf70f27f860aff1a5703ebf7f1584
-	  a52e43655cd25d279777ed509a1ae27b
-	  b92c67fe9ff70fea53596080e351ac80)
-
-for ((i=0; i<${#ITEM_MD5[@]}; i++))
-do
-  FILENAME=${DOWNLOAD_ITEMS[${i}]}
-  REAL_MD5=`wget ${BASE_URL}/${FILENAME} -O - | tee ${FILENAME} | md5sum | cut -d ' ' -f 1`
-  EXPECTED_MD5=${ITEM_MD5[${i}]}
-  [ "${EXPECTED_MD5}" = "${REAL_MD5}" ]
-done
diff --git a/v1_api_demo/model_zoo/resnet/.gitignore b/v1_api_demo/model_zoo/resnet/.gitignore
deleted file mode 100644
index 7a64209b62..0000000000
--- a/v1_api_demo/model_zoo/resnet/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-fea_output/
-features/
-model.list
-ResNet_50.dot
-ResNet_50.png
diff --git a/v1_api_demo/model_zoo/resnet/classify.py b/v1_api_demo/model_zoo/resnet/classify.py
deleted file mode 100755
index 6074cc1d3a..0000000000
--- a/v1_api_demo/model_zoo/resnet/classify.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import cPickle
-import logging
-from PIL import Image
-import numpy as np
-from optparse import OptionParser
-
-import paddle.utils.image_util as image_util
-
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import dense_vector
-from paddle.trainer.config_parser import parse_config
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
-logging.getLogger().setLevel(logging.INFO)
-
-
-class ImageClassifier():
-    def __init__(self,
-                 train_conf,
-                 model_dir=None,
-                 resize_dim=256,
-                 crop_dim=224,
-                 use_gpu=True,
-                 mean_file=None,
-                 output_layer=None,
-                 oversample=False,
-                 is_color=True):
-        """
-        train_conf: network configure.
-        model_dir: string, directory of model.
-        resize_dim: int, resized image size.
-        crop_dim: int, crop size.
-        mean_file: string, image mean file.
-        oversample: bool, oversample means multiple crops, namely five
-                    patches (the four corner patches and the center
-                    patch) as well as their horizontal reflections,
-                    ten crops in all.
-        """
-        self.train_conf = train_conf
-        self.model_dir = model_dir
-        if model_dir is None:
-            self.model_dir = os.path.dirname(train_conf)
-
-        self.resize_dim = resize_dim
-        self.crop_dims = [crop_dim, crop_dim]
-        self.oversample = oversample
-        self.is_color = is_color
-
-        self.output_layer = output_layer
-        if self.output_layer:
-            assert isinstance(self.output_layer, basestring)
-            self.output_layer = self.output_layer.split(",")
-
-        self.transformer = image_util.ImageTransformer(is_color=is_color)
-        self.transformer.set_transpose((2, 0, 1))
-        self.transformer.set_channel_swap((2, 1, 0))
-
-        self.mean_file = mean_file
-        if self.mean_file is not None:
-            mean = np.load(self.mean_file)['data_mean']
-            mean = mean.reshape(3, self.crop_dims[0], self.crop_dims[1])
-            self.transformer.set_mean(mean)  # mean pixel
-        else:
-            # if you use three mean value, set like:
-            # this three mean value is calculated from ImageNet.
-            self.transformer.set_mean(np.array([103.939, 116.779, 123.68]))
-
-        conf_args = "is_test=1,use_gpu=%d,is_predict=1" % (int(use_gpu))
-        conf = parse_config(train_conf, conf_args)
-        swig_paddle.initPaddle("--use_gpu=%d" % (int(use_gpu)))
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        assert isinstance(self.network, swig_paddle.GradientMachine)
-        self.network.loadParameters(self.model_dir)
-
-        data_size = 3 * self.crop_dims[0] * self.crop_dims[1]
-        slots = [dense_vector(data_size)]
-        self.converter = DataProviderConverter(slots)
-
-    def get_data(self, img_path):
-        """
-        1. load image from img_path.
-        2. resize or oversampling.
-        3. transformer data: transpose, channel swap, sub mean.
-        return K x H x W ndarray.
-
-        img_path: image path.
-        """
-        image = image_util.load_image(img_path, self.is_color)
-        # Another way to extract oversampled features is that
-        # cropping and averaging from large feature map which is
-        # calculated by large size of image.
-        # This way reduces the computation.
-        if self.oversample:
-            # image_util.resize_image: short side is self.resize_dim
-            image = image_util.resize_image(image, self.resize_dim)
-            image = np.array(image)
-            input = np.zeros(
-                (1, image.shape[0], image.shape[1], 3), dtype=np.float32)
-            input[0] = image.astype(np.float32)
-            input = image_util.oversample(input, self.crop_dims)
-        else:
-            image = image.resize(self.crop_dims, Image.ANTIALIAS)
-            input = np.zeros(
-                (1, self.crop_dims[0], self.crop_dims[1], 3), dtype=np.float32)
-            input[0] = np.array(image).astype(np.float32)
-
-        data_in = []
-        for img in input:
-            img = self.transformer.transformer(img).flatten()
-            data_in.append([img.tolist()])
-        # paddle input: [[[]],[[]],...], [[]] is one sample.
-        return data_in
-
-    def forward(self, input_data):
-        """
-        return output arguments which are the Outputs() in network configure.
-
-        input_data: py_paddle input data.
-        call forward.
-        """
-        in_arg = self.converter(input_data)
-        return self.network.forwardTest(in_arg)
-
-    def forward(self, data, output_layer):
-        """
-        return output arguments which are the Outputs() in network configure.
-
-        input_data: py_paddle input data.
-        call forward.
-        """
-        input = self.converter(data)
-        self.network.forwardTest(input)
-        output = self.network.getLayerOutputs(output_layer)
-        res = {}
-        if isinstance(output_layer, basestring):
-            output_layer = [output_layer]
-        for name in output_layer:
-            # For oversampling, average predictions across crops.
-            # If not, the shape of output[name]: (1, class_number),
-            # the mean is also applicable.
-            res[name] = output[name]['value'].mean(0)
-
-        return res
-
-    def predict(self, data_file):
-        """
-        call forward and predicting.
-
-        data_file: input image list.
-        """
-        image_files = open(data_file, 'rb').readlines()
-        results = {}
-        if self.output_layer is None:
-            self.output_layer = ["output"]
-        for line in image_files:
-            image = line.split()[0]
-            data = self.get_data(image)
-            prob = self.forward(data, self.output_layer)
-            lab = np.argsort(-prob[self.output_layer[0]])
-            results[image] = lab[0]
-            logging.info("Label of %s is: %d", image, lab[0])
-        return results
-
-    def extract(self, data_file, output_dir, batch_size=10000):
-        """
-        extract and save features of output layers, which are
-        specify in Outputs() in network configure.
-
-        data_file: file name of input data.
-        output_dir: saved directory of extracted features.
-        batch_size: sample number of one batch file.
-        """
-        if not os.path.exists(output_dir):
-            os.mkdir(output_dir)
-
-        sample_num = 0
-        batch_num = 0
-        image_feature = {}
-        image_files = open(data_file, 'rb').readlines()
-        for idx, line in enumerate(image_files):
-            image = line.split()[0]
-            data = self.get_data(image)
-            feature = self.forward(data, self.output_layer)
-            # save extracted features
-            file_name = image.split("/")[-1]
-            image_feature[file_name] = feature
-            sample_num += 1
-            if sample_num == batch_size:
-                batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
-                self.save_file(image_feature, batch_name)
-                logging.info('Finish batch %d', batch_num)
-                batch_num += 1
-                sample_num = 0
-                image_feature = {}
-            if idx % 1000 == 0:
-                logging.info('%d/%d, %s', idx, len(image_files), file_name)
-        if sample_num > 0:
-            batch_name = os.path.join(output_dir, 'batch_%d' % (batch_num))
-            self.save_file(image_feature, batch_name)
-            logging.info('Finish batch %d', batch_num)
-        logging.info('Done: make image feature batch')
-
-    def save_file(self, data, file):
-        of = open(file, 'wb')
-        cPickle.dump(data, of, protocol=cPickle.HIGHEST_PROTOCOL)
-
-
-def option_parser():
-    """
-    Main entry for predciting
-    """
-    usage = "%prog -c config -i data_list -w model_dir [options]"
-    parser = OptionParser(usage="usage: %s" % usage)
-    parser.add_option(
-        "-j",
-        "--job",
-        action="store",
-        dest="job_type",
-        help="job type: predict, extract\
-                            predict: predicting,\
-                            extract: extract features")
-    parser.add_option(
-        "-c",
-        "--conf",
-        action="store",
-        dest="train_conf",
-        help="network config")
-    parser.add_option(
-        "-i", "--data", action="store", dest="data_file", help="image list")
-    parser.add_option(
-        "-w",
-        "--model",
-        action="store",
-        dest="model_path",
-        default=None,
-        help="model path")
-    parser.add_option(
-        "-g",
-        "--use_gpu",
-        action="store",
-        dest="use_gpu",
-        default=True,
-        help="Whether to use gpu mode.")
-    parser.add_option(
-        "-o",
-        "--output_dir",
-        action="store",
-        dest="output_dir",
-        default="output",
-        help="output path")
-    parser.add_option(
-        "-m",
-        "--mean",
-        action="store",
-        dest="mean",
-        default=None,
-        help="mean file.")
-    parser.add_option(
-        "-p",
-        "--multi_crop",
-        action="store_true",
-        dest="multi_crop",
-        default=False,
-        help="Wether to use multiple crops on image.")
-    parser.add_option("-l", "--output_layer", action="store",
-                      dest="output_layer", default=None,
-                      help="--job=extract, specify layers to extract "\
-                           "features, --job=predict, specify layer of "
-                           "classification probability, output in resnet.py.")
-    return parser.parse_args()
-
-
-def main():
-    """
-    1. parse input arguments.
-    2. predicting or extract features according job type.
-    """
-    options, args = option_parser()
-    obj = ImageClassifier(
-        options.train_conf,
-        options.model_path,
-        use_gpu=options.use_gpu,
-        mean_file=options.mean,
-        output_layer=options.output_layer,
-        oversample=options.multi_crop)
-    if options.job_type == "predict":
-        obj.predict(options.data_file)
-
-    elif options.job_type == "extract":
-        obj.extract(options.data_file, options.output_dir)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/model_zoo/resnet/example/.gitignore b/v1_api_demo/model_zoo/resnet/example/.gitignore
deleted file mode 100644
index 4a2b5962a6..0000000000
--- a/v1_api_demo/model_zoo/resnet/example/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*image_list_provider_copy_1.py
diff --git a/v1_api_demo/model_zoo/resnet/example/cat.jpg b/v1_api_demo/model_zoo/resnet/example/cat.jpg
deleted file mode 100644
index 47b01db90e..0000000000
Binary files a/v1_api_demo/model_zoo/resnet/example/cat.jpg and /dev/null differ
diff --git a/v1_api_demo/model_zoo/resnet/example/dog.jpg b/v1_api_demo/model_zoo/resnet/example/dog.jpg
deleted file mode 100644
index b9cc33cf06..0000000000
Binary files a/v1_api_demo/model_zoo/resnet/example/dog.jpg and /dev/null differ
diff --git a/v1_api_demo/model_zoo/resnet/example/image_list_provider.py b/v1_api_demo/model_zoo/resnet/example/image_list_provider.py
deleted file mode 100644
index 2cd8eb8bf8..0000000000
--- a/v1_api_demo/model_zoo/resnet/example/image_list_provider.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.utils.image_util import *
-from paddle.trainer.PyDataProvider2 import *
-
-
-def hook(settings, image_size, crop_size, color, file_list, is_train, **kwargs):
-    """
-    Description: Init with a list of data file
-    file_list is the name list of input files.
-    kwargs["load_data_args"] is the value of 'load_data_args'
-    which can be set in config.
-    Each args is separated by a column.
-    image_size: the crop image size.
-    mean_meta: the path of the meta file to store the mean image.
-    mean_value: can be mean value, not a file.
-                can not set mean_meta and mean_value at the same time.
-    color: 'color' means a color image. Otherwise, it means a gray image.
-    is_train: whether the data provider is used for training.
-              Data argumentation might be different for training and testing.
-    """
-    settings.img_size = image_size
-    settings.crop_size = crop_size
-    settings.mean_img_size = settings.crop_size
-    settings.color = color  # default is color
-    settings.is_train = is_train
-
-    settings.is_swap_channel = kwargs.get('swap_channel', None)
-    if settings.is_swap_channel is not None:
-        settings.swap_channel = settings.is_swap_channel
-        settings.is_swap_channel = True
-
-    if settings.color:
-        settings.img_input_size = settings.crop_size * settings.crop_size * 3
-    else:
-        settings.img_input_size = settings.crop_size * settings.crop_size
-
-    settings.file_list = file_list
-    settings.mean_meta = kwargs.get('mean_meta', None)
-    settings.mean_value = kwargs.get('mean_value', None)
-    # can not specify both mean_meta and mean_value.
-    assert not (settings.mean_meta and settings.mean_value)
-    if not settings.mean_meta:
-        settings.mean_value = kwargs.get('mean_value')
-        sz = settings.crop_size * settings.crop_size
-        settings.img_mean = np.zeros(sz * 3, dtype=np.single)
-        for idx, value in enumerate(settings.mean_value):
-            settings.img_mean[idx * sz:(idx + 1) * sz] = value
-        settings.img_mean = settings.img_mean.reshape(3, settings.crop_size,
-                                                      settings.crop_size)
-
-    else:
-        settings.img_mean = load_meta(settings.mean_meta,
-                                      settings.mean_img_size,
-                                      settings.crop_size, settings.color)
-
-    settings.input_types = [
-        dense_vector(settings.img_input_size),  # image feature
-        integer_value(1)
-    ]  # labels
-
-    settings.logger.info('Image short side: %s', settings.img_size)
-    settings.logger.info('Crop size: %s', settings.crop_size)
-    settings.logger.info('Meta path: %s', settings.mean_meta)
-    if settings.is_swap_channel:
-        settings.logger.info('swap channel: %s', settings.swap_channel)
-    settings.logger.info('DataProvider Initialization finished')
-
-
-@provider(init_hook=hook, should_shuffle=False)
-def processData(settings, file_list):
-    """
-    The main function for loading data.
-    Load the batch, iterate all the images and labels in this batch.
-    file_name: the batch file name.
-    """
-    img_path, lab = file_list.strip().split(' ')
-    img = Image.open(img_path)
-    img.load()
-    img = img.resize((settings.img_size, settings.img_size), Image.ANTIALIAS)
-    img = np.array(img).astype(np.float32)
-    if len(img.shape) == 3:
-        img = np.swapaxes(img, 1, 2)
-        img = np.swapaxes(img, 1, 0)
-    # swap channel
-    if settings.is_swap_channel:
-        img = img[settings.swap_channel, :, :]
-    img_feat = preprocess_img(img, settings.img_mean, settings.crop_size,
-                              settings.is_train, settings.color)
-    yield img_feat.tolist(), int(lab.strip())
diff --git a/v1_api_demo/model_zoo/resnet/example/test.list b/v1_api_demo/model_zoo/resnet/example/test.list
deleted file mode 100644
index 30bbf630b6..0000000000
--- a/v1_api_demo/model_zoo/resnet/example/test.list
+++ /dev/null
@@ -1,2 +0,0 @@
-example/dog.jpg 0
-example/cat.jpg 0
diff --git a/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh b/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh
deleted file mode 100755
index 5447aa92df..0000000000
--- a/v1_api_demo/model_zoo/resnet/extract_fea_c++.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#set names of layer which you want to extract feature
-#in Outputs() of resnet.py 
-#like: Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
-layer_num=50
-configure=./resnet.py
-model_path=./model/resnet_$layer_num
-fea_dir=fea_output
-#Output is text file.
-#Each line is one sample's features.
-#If you set N layer names in Outputs()
-#each line contains N features sperated by ";". 
-
-# create model list file.
-model_list=./model.list
-touch $model_list | echo $model_path > $model_list
-
-paddle train \
-  --local=true \
-  --job=test \
-  --config=$configure \
-  --model_list=$model_list \
-  --use_gpu=1 \
-  --predict_output_dir=$fea_dir \
-  --config_args=is_test=1,layer_num=$layer_num
diff --git a/v1_api_demo/model_zoo/resnet/extract_fea_py.sh b/v1_api_demo/model_zoo/resnet/extract_fea_py.sh
deleted file mode 100755
index 2e87152f7f..0000000000
--- a/v1_api_demo/model_zoo/resnet/extract_fea_py.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#Note if you use CPU mode, you need to set use_gpu=0 in classify.py. like this:
-#conf_args = "is_test=0,use_gpu=1,is_predict=1"
-#conf = parse_config(train_conf, conf_args)
-#swig_paddle.initPaddle("--use_gpu=0")
-python classify.py \
-     --job=extract \
-     --conf=resnet.py \
-     --use_gpu=1 \
-     --mean=model/mean_meta_224/mean.meta \
-     --model=model/resnet_50 \
-     --data=./example/test.list \
-     --output_layer="res5_3_branch2c_conv,res5_3_branch2c_bn" \
-     --output_dir=features
diff --git a/v1_api_demo/model_zoo/resnet/get_model.sh b/v1_api_demo/model_zoo/resnet/get_model.sh
deleted file mode 100755
index b33d8178ab..0000000000
--- a/v1_api_demo/model_zoo/resnet/get_model.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-mkdir model
-cd model
-
-echo "Downloading ResNet models..."
-
-for file in resnet_50.tar.gz resnet_101.tar.gz resnet_152.tar.gz mean_meta_224.tar.gz 
-do 
-  wget http://paddlepaddle.bj.bcebos.com/model_zoo/imagenet/$file
-  tar -xvf $file 
-  rm $file
-done
-
-echo "Done."
diff --git a/v1_api_demo/model_zoo/resnet/load_feature.py b/v1_api_demo/model_zoo/resnet/load_feature.py
deleted file mode 100644
index 5d3d0c0d30..0000000000
--- a/v1_api_demo/model_zoo/resnet/load_feature.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-import cPickle
-import logging
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s')
-logging.getLogger().setLevel(logging.INFO)
-
-
-def load_feature_c(file):
-    """
-    Load feature extracted by C++ interface.
-    Return a list.
-    file: feature file.
-    """
-    features = []
-    f = open(file, 'r')
-    for line in f:
-        sample = []
-        for slot in line.strip().split(";"):
-            fea = [float(val) for val in slot.strip().split()]
-            if fea:
-                sample.append(fea)
-        features.append(sample)
-    f.close()
-    return features
-
-
-def load_feature_py(feature_dir):
-    """
-    Load feature extracted by python interface.
-    Return a dictionary.
-    feature_dir: directory of feature file.
-    """
-    file_list = os.listdir(feature_dir)
-    file_list = [os.path.join(feature_dir, f) for f in file_list]
-    features = {}
-    for file_name in file_list:
-        with open(file_name, 'rb') as f:
-            feature = cPickle.load(f)
-            features.update(feature)
-            logging.info('Load feature file %s', file_name)
-    return features
-
-
-if __name__ == '__main__':
-    print load_feature_py(sys.argv[1])
-    #print load_feature_c(sys.argv[1]) 
diff --git a/v1_api_demo/model_zoo/resnet/net_diagram.sh b/v1_api_demo/model_zoo/resnet/net_diagram.sh
deleted file mode 100755
index 1b06ffa44e..0000000000
--- a/v1_api_demo/model_zoo/resnet/net_diagram.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-:'
-Visual deep residual network
-1. Using make_model_diagram.py to generate dot file.
-2. Using graphviz to convert dot file.
-
-Usage:
-./net_diagram.sh
-'
-
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-img_type=png
-img_fileprefix=ResNet_50
-conf_filename=resnet.py
-dot_filename=ResNet_50.dot
-config_str="layer_num=50,data_provider=0"
-
-python -m paddle.utils.make_model_diagram $conf_filename $dot_filename $config_str
-
-# If you have installed graphviz, running like this:
-# dot -Tpng -o ResNet.png ResNet.dot
diff --git a/v1_api_demo/model_zoo/resnet/predict.sh b/v1_api_demo/model_zoo/resnet/predict.sh
deleted file mode 100755
index 2b67b17c48..0000000000
--- a/v1_api_demo/model_zoo/resnet/predict.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-python classify.py \
-     --job=predict \
-     --conf=resnet.py\
-     --model=model/resnet_50 \
-     --multi_crop \
-     --use_gpu=1 \
-     --data=./example/test.list
diff --git a/v1_api_demo/model_zoo/resnet/resnet.py b/v1_api_demo/model_zoo/resnet/resnet.py
deleted file mode 100644
index 6fdd97fefc..0000000000
--- a/v1_api_demo/model_zoo/resnet/resnet.py
+++ /dev/null
@@ -1,271 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-"""
-paper: https://arxiv.org/abs/1512.03385
-"""
-is_test = get_config_arg("is_test", bool, False)
-is_predict = get_config_arg("is_predict", bool, False)
-data_provider = get_config_arg("data_provider", bool, True)
-layer_num = get_config_arg("layer_num", int, 50)
-
-if not is_predict and data_provider:
-    train_list = 'train.list' if not is_test else None
-    # mean.meta is mean file of ImageNet dataset.
-    # mean.meta size : 3 x 224 x 224.
-    # If you use three mean value, set like:
-    # "mean_value:103.939,116.779,123.68;"
-    args = {
-        'mean_meta': "model/mean_meta_224/mean.meta",
-        'image_size': 224,
-        'crop_size': 224,
-        'color': True,
-        'swap_channel:': [2, 1, 0]
-    }
-    define_py_data_sources2(
-        train_list,
-        'example/test.list',
-        module="example.image_list_provider",
-        obj="processData",
-        args=args)
-
-batch_size = 1
-learning_rate = 0.1 / batch_size
-momentum = 0.9
-weight_decay = 0.0001 * batch_size
-default_momentum(momentum)
-default_decay_rate(weight_decay)
-
-Settings(
-    algorithm='sgd',
-    batch_size=batch_size,
-    learning_rate=learning_rate,
-
-    # set the appropriate parameters according your schedule
-    learning_method='momentum',
-    learning_rate_decay_a=0.5,
-    learning_rate_decay_b=1200000 * 10,
-    learning_rate_schedule="discexp", )
-
-
-def conv_bn_layer(name,
-                  input,
-                  filter_size,
-                  num_filters,
-                  stride,
-                  padding,
-                  channels=None,
-                  active_type=ReluActivation()):
-    """
-    A wrapper for conv layer with batch normalization layers.
-    Note:
-    conv layer has no activation.
-    """
-
-    tmp = img_conv_layer(
-        name=name + "_conv",
-        input=input,
-        filter_size=filter_size,
-        num_channels=channels,
-        num_filters=num_filters,
-        stride=stride,
-        padding=padding,
-        act=LinearActivation(),
-        bias_attr=False)
-    return batch_norm_layer(
-        name=name + "_bn", input=tmp, act=active_type, use_global_stats=is_test)
-
-
-def bottleneck_block(name, input, num_filters1, num_filters2):
-    """
-    A wrapper for bottlenect building block in ResNet.
-    Last conv_bn_layer has no activation.
-    Addto layer has activation of relu.
-    """
-    last_name = conv_bn_layer(
-        name=name + '_branch2a',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters1,
-        stride=1,
-        padding=0)
-    last_name = conv_bn_layer(
-        name=name + '_branch2b',
-        input=last_name,
-        filter_size=3,
-        num_filters=num_filters1,
-        stride=1,
-        padding=1)
-    last_name = conv_bn_layer(
-        name=name + '_branch2c',
-        input=last_name,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=1,
-        padding=0,
-        active_type=LinearActivation())
-
-    return addto_layer(
-        name=name + "_addto", input=[input, last_name], act=ReluActivation())
-
-
-def mid_projection(name, input, num_filters1, num_filters2, stride=2):
-    """
-    A wrapper for middile projection in ResNet.
-    projection shortcuts are used for increasing dimensions,
-    and other shortcuts are identity
-    branch1: projection shortcuts are used for increasing
-    dimensions, has no activation.
-    branch2x: bottleneck building block, shortcuts are identity.
-    """
-    # stride = 2
-    branch1 = conv_bn_layer(
-        name=name + '_branch1',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=stride,
-        padding=0,
-        active_type=LinearActivation())
-
-    last_name = conv_bn_layer(
-        name=name + '_branch2a',
-        input=input,
-        filter_size=1,
-        num_filters=num_filters1,
-        stride=stride,
-        padding=0)
-    last_name = conv_bn_layer(
-        name=name + '_branch2b',
-        input=last_name,
-        filter_size=3,
-        num_filters=num_filters1,
-        stride=1,
-        padding=1)
-
-    last_name = conv_bn_layer(
-        name=name + '_branch2c',
-        input=last_name,
-        filter_size=1,
-        num_filters=num_filters2,
-        stride=1,
-        padding=0,
-        active_type=LinearActivation())
-
-    return addto_layer(
-        name=name + "_addto", input=[branch1, last_name], act=ReluActivation())
-
-
-def deep_res_net(res2_num=3, res3_num=4, res4_num=6, res5_num=3):
-    """
-    A wrapper for 50,101,152 layers of ResNet.
-    res2_num: number of blocks stacked in conv2_x
-    res3_num: number of blocks stacked in conv3_x
-    res4_num: number of blocks stacked in conv4_x
-    res5_num: number of blocks stacked in conv5_x
-    """
-    # For ImageNet
-    # conv1: 112x112
-    img = data_layer(name='input', size=224 * 224 * 3)
-    tmp = conv_bn_layer(
-        "conv1",
-        img,
-        filter_size=7,
-        channels=3,
-        num_filters=64,
-        stride=2,
-        padding=3)
-    tmp = img_pool_layer(name="pool1", input=tmp, pool_size=3, stride=2)
-
-    # conv2_x: 56x56
-    tmp = mid_projection(
-        name="res2_1", input=tmp, num_filters1=64, num_filters2=256, stride=1)
-    for i in xrange(2, res2_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res2_" + str(i), input=tmp, num_filters1=64, num_filters2=256)
-
-    # conv3_x: 28x28
-    tmp = mid_projection(
-        name="res3_1", input=tmp, num_filters1=128, num_filters2=512)
-    for i in xrange(2, res3_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res3_" + str(i),
-            input=tmp,
-            num_filters1=128,
-            num_filters2=512)
-
-    # conv4_x: 14x14
-    tmp = mid_projection(
-        name="res4_1", input=tmp, num_filters1=256, num_filters2=1024)
-    for i in xrange(2, res4_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res4_" + str(i),
-            input=tmp,
-            num_filters1=256,
-            num_filters2=1024)
-
-    # conv5_x: 7x7
-    tmp = mid_projection(
-        name="res5_1", input=tmp, num_filters1=512, num_filters2=2048)
-    for i in xrange(2, res5_num + 1, 1):
-        tmp = bottleneck_block(
-            name="res5_" + str(i),
-            input=tmp,
-            num_filters1=512,
-            num_filters2=2048)
-
-    tmp = img_pool_layer(
-        name='avgpool',
-        input=tmp,
-        pool_size=7,
-        stride=1,
-        pool_type=AvgPooling())
-
-    output = fc_layer(
-        name='output', input=tmp, size=1000, act=SoftmaxActivation())
-
-    if not is_predict:
-        classification_cost(
-            input=output, label=data_layer(
-                name='label', size=1))
-
-
-def res_net_50():
-    deep_res_net(3, 4, 6, 3)
-
-
-def res_net_101():
-    deep_res_net(3, 4, 23, 3)
-
-
-def res_net_152():
-    deep_res_net(3, 8, 36, 3)
-
-
-if not is_predict:
-    Inputs("input", "label")
-else:
-    Inputs("input")
-# Outputs("cost-softmax" if not is_predict else "output")
-Outputs("res5_3_branch2c_conv", "res5_3_branch2c_bn")
-
-if layer_num == 50:
-    res_net_50()
-elif layer_num == 101:
-    res_net_101()
-elif layer_num == 152:
-    res_net_152()
-else:
-    print("Wrong layer number.")
diff --git a/v1_api_demo/quick_start/.gitignore b/v1_api_demo/quick_start/.gitignore
deleted file mode 100644
index f71662563f..0000000000
--- a/v1_api_demo/quick_start/.gitignore
+++ /dev/null
@@ -1,15 +0,0 @@
-*.pyc
-data/dict.txt
-data/dict_all.txt
-data/labels.list
-data/mosesdecoder-master/
-data/reviews_Electronics_5.json.gz
-data/test.list
-data/test.txt
-data/train.list
-data/train.txt
-data/pred.list
-data/pred.txt
-dataprovider_copy_1.py
-train.log
-output
diff --git a/v1_api_demo/quick_start/api_predict.py b/v1_api_demo/quick_start/api_predict.py
deleted file mode 100755
index 9bdffe1006..0000000000
--- a/v1_api_demo/quick_start/api_predict.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os, sys
-import numpy as np
-from optparse import OptionParser
-from py_paddle import swig_paddle, DataProviderConverter
-from paddle.trainer.PyDataProvider2 import sparse_binary_vector
-from paddle.trainer.config_parser import parse_config
-"""
-Usage: run following command to show help message.
-  python api_predict.py -h
-"""
-
-
-class QuickStartPrediction():
-    def __init__(self, train_conf, dict_file, model_dir=None, label_file=None):
-        """
-        train_conf: trainer configure.
-        dict_file: word dictionary file name.
-        model_dir: directory of model.
-        """
-        self.train_conf = train_conf
-        self.dict_file = dict_file
-        self.word_dict = {}
-        self.dict_dim = self.load_dict()
-        self.model_dir = model_dir
-        if model_dir is None:
-            self.model_dir = os.path.dirname(train_conf)
-
-        self.label = None
-        if label_file is not None:
-            self.load_label(label_file)
-
-        conf = parse_config(train_conf, "is_predict=1")
-        self.network = swig_paddle.GradientMachine.createFromConfigProto(
-            conf.model_config)
-        self.network.loadParameters(self.model_dir)
-        input_types = [sparse_binary_vector(self.dict_dim)]
-        self.converter = DataProviderConverter(input_types)
-
-    def load_dict(self):
-        """
-        Load dictionary from self.dict_file.
-        """
-        for line_count, line in enumerate(open(self.dict_file, 'r')):
-            self.word_dict[line.strip().split('\t')[0]] = line_count
-        return len(self.word_dict)
-
-    def load_label(self, label_file):
-        """
-        Load label.
-        """
-        self.label = {}
-        for v in open(label_file, 'r'):
-            self.label[int(v.split('\t')[1])] = v.split('\t')[0]
-
-    def get_index(self, data):
-        """
-        transform word into integer index according to the dictionary.
-        """
-        words = data.strip().split()
-        word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
-        return word_slot
-
-    def batch_predict(self, data_batch):
-        input = self.converter(data_batch)
-        output = self.network.forwardTest(input)
-        prob = output[0]["id"].tolist()
-        print("predicting labels is:")
-        print prob
-
-
-def option_parser():
-    usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
-    parser = OptionParser(usage="usage: %s [options]" % usage)
-    parser.add_option(
-        "-n",
-        "--tconf",
-        action="store",
-        dest="train_conf",
-        help="network config")
-    parser.add_option(
-        "-d",
-        "--dict",
-        action="store",
-        dest="dict_file",
-        help="dictionary file")
-    parser.add_option(
-        "-b",
-        "--label",
-        action="store",
-        dest="label",
-        default=None,
-        help="dictionary file")
-    parser.add_option(
-        "-c",
-        "--batch_size",
-        type="int",
-        action="store",
-        dest="batch_size",
-        default=1,
-        help="the batch size for prediction")
-    parser.add_option(
-        "-w",
-        "--model",
-        action="store",
-        dest="model_path",
-        default=None,
-        help="model path")
-    return parser.parse_args()
-
-
-def main():
-    options, args = option_parser()
-    train_conf = options.train_conf
-    batch_size = options.batch_size
-    dict_file = options.dict_file
-    model_path = options.model_path
-    label = options.label
-    swig_paddle.initPaddle("--use_gpu=0")
-    predict = QuickStartPrediction(train_conf, dict_file, model_path, label)
-
-    batch = []
-    labels = []
-    for line in sys.stdin:
-        [label, text] = line.split("\t")
-        labels.append(int(label))
-        batch.append([predict.get_index(text)])
-    print("labels is:")
-    print labels
-    predict.batch_predict(batch)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/quick_start/api_predict.sh b/v1_api_demo/quick_start/api_predict.sh
deleted file mode 100755
index 4d9aa9e885..0000000000
--- a/v1_api_demo/quick_start/api_predict.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-#Note the default model is pass-00002, you shold make sure the model path
-#exists or change the mode path.
-#only test on trainer_config.lr.py
-model=output/model/pass-00001/
-config=trainer_config.lr.py
-label=data/labels.list
-dict=data/dict.txt
-batch_size=20
-head -n$batch_size data/test.txt | python api_predict.py \
-     --tconf=$config\
-     --model=$model \
-     --label=$label \
-     --dict=$dict \
-     --batch_size=$batch_size
diff --git a/v1_api_demo/quick_start/api_train.py b/v1_api_demo/quick_start/api_train.py
deleted file mode 100644
index 5699789daa..0000000000
--- a/v1_api_demo/quick_start/api_train.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import itertools
-import random
-
-from paddle.trainer.config_parser import parse_config
-from py_paddle import swig_paddle as api
-from py_paddle import DataProviderConverter
-from paddle.trainer.PyDataProvider2 \
-    import integer_value, integer_value_sequence, sparse_binary_vector
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--train_data", type=str, required=False, help="train data file")
-    parser.add_argument("--test_data", type=str, help="test data file")
-    parser.add_argument(
-        "--config", type=str, required=True, help="config file name")
-    parser.add_argument("--dict_file", required=True, help="dictionary file")
-    parser.add_argument(
-        "--seq", default=1, type=int, help="whether use sequence training")
-    parser.add_argument(
-        "--use_gpu", default=0, type=int, help="whether use GPU for training")
-    parser.add_argument(
-        "--trainer_count",
-        default=1,
-        type=int,
-        help="Number of threads for training")
-    parser.add_argument(
-        "--num_passes", default=5, type=int, help="Number of training passes")
-    return parser.parse_args()
-
-
-UNK_IDX = 0
-
-
-def load_data(file_name, word_dict):
-    with open(file_name, 'r') as f:
-        for line in f:
-            label, comment = line.strip().split('\t')
-            words = comment.split()
-            word_slot = [word_dict.get(w, UNK_IDX) for w in words]
-            yield word_slot, int(label)
-
-
-def load_dict(dict_file):
-    word_dict = dict()
-    with open(dict_file, 'r') as f:
-        for i, line in enumerate(f):
-            w = line.strip().split()[0]
-            word_dict[w] = i
-    return word_dict
-
-
-def main():
-    options = parse_arguments()
-    api.initPaddle("--use_gpu=%s" % options.use_gpu,
-                   "--trainer_count=%s" % options.trainer_count)
-
-    word_dict = load_dict(options.dict_file)
-    train_dataset = list(load_data(options.train_data, word_dict))
-    if options.test_data:
-        test_dataset = list(load_data(options.test_data, word_dict))
-    else:
-        test_dataset = None
-
-    trainer_config = parse_config(options.config,
-                                  "dict_file=%s" % options.dict_file)
-    # No need to have data provider for trainer
-    trainer_config.ClearField('data_config')
-    trainer_config.ClearField('test_data_config')
-
-    # create a GradientMachine from the model configuratin
-    model = api.GradientMachine.createFromConfigProto(
-        trainer_config.model_config)
-    # create a trainer for the gradient machine
-    trainer = api.Trainer.create(trainer_config, model)
-
-    # create a data converter which converts data to PaddlePaddle
-    # internal format
-    input_types = [
-        integer_value_sequence(len(word_dict)) if options.seq else
-        sparse_binary_vector(len(word_dict)), integer_value(2)
-    ]
-    converter = DataProviderConverter(input_types)
-
-    batch_size = trainer_config.opt_config.batch_size
-    trainer.startTrain()
-    for train_pass in xrange(options.num_passes):
-        trainer.startTrainPass()
-        random.shuffle(train_dataset)
-        for pos in xrange(0, len(train_dataset), batch_size):
-            batch = itertools.islice(train_dataset, pos, pos + batch_size)
-            size = min(batch_size, len(train_dataset) - pos)
-            trainer.trainOneDataBatch(size, converter(batch))
-        trainer.finishTrainPass()
-        if test_dataset:
-            trainer.startTestPeriod()
-            for pos in xrange(0, len(test_dataset), batch_size):
-                batch = itertools.islice(test_dataset, pos, pos + batch_size)
-                size = min(batch_size, len(test_dataset) - pos)
-                trainer.testOneDataBatch(size, converter(batch))
-            trainer.finishTestPeriod()
-    trainer.finishTrain()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/quick_start/api_train.sh b/v1_api_demo/quick_start/api_train.sh
deleted file mode 100755
index 9b2a4e2f22..0000000000
--- a/v1_api_demo/quick_start/api_train.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-# Note: if using trainer_config.emb.py, trainer_config.cnn.py
-# or trainer_config.lstm.py, you need to change --seq to --seq=1
-# because they are sequence models.
-python api_train.py \
-  --config=trainer_config.lr.py \
-  --trainer_count=2 \
-  --num_passes=15 \
-  --use_gpu=0 \
-  --seq=0 \
-  --train_data=data/train.txt \
-  --test_data=data/test.txt \
-  --dict_file=data/dict.txt \
-  2>&1 | tee 'train.log'
diff --git a/v1_api_demo/quick_start/cluster/cluster_train.sh b/v1_api_demo/quick_start/cluster/cluster_train.sh
deleted file mode 100755
index a7b1f01064..0000000000
--- a/v1_api_demo/quick_start/cluster/cluster_train.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-# Should run pserver.sh before run this script.
-bin_dir=$(cd `dirname $0`; pwd)
-home_dir=$(cd "${bin_dir}/.."; pwd)
-source "$bin_dir/env.sh"
-
-model_dir="$bin_dir/output"
-log_file="$bin_dir/train.log"
-
-pushd "$home_dir"
-cfg=trainer_config.lr.py
-paddle train \
-  --start_pserver=false \
-  --config=$cfg \
-  --save_dir=${model_dir} \
-  --trainer_count=4 \
-  --local=0 \
-  --log_period=100 \
-  --num_passes=15 \
-  --use_gpu=false \
-  --show_parameter_stats_period=100 \
-  --test_all_data_in_one_period=1 \
-  --num_gradient_servers=1 \
-  --nics=`get_nics` \
-  --port=7164 \
-  --ports_num=1 \
-  --pservers="127.0.0.1" \
-  --comment="paddle_trainer" \
-  2>&1 | tee "$log_file"
-popd
diff --git a/v1_api_demo/quick_start/cluster/env.sh b/v1_api_demo/quick_start/cluster/env.sh
deleted file mode 100644
index a404993835..0000000000
--- a/v1_api_demo/quick_start/cluster/env.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-function get_nics() {
-  machine=`uname -s`
-  local nics=""
-  if [ "$machine" == "Linux" ]; then
-    nics="lo"
-  elif [ "$machine" == "Darwin" ]; then
-    nics="lo0"
-  else
-    nics="unsupport"
-  fi
-  echo $nics
-}
diff --git a/v1_api_demo/quick_start/cluster/pserver.sh b/v1_api_demo/quick_start/cluster/pserver.sh
deleted file mode 100755
index b187c1d9b9..0000000000
--- a/v1_api_demo/quick_start/cluster/pserver.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-bin_dir=$(cd `dirname $0`; pwd)
-source "$bin_dir/env.sh"
-
-paddle pserver \
-  --nics=`get_nics` \
-  --port=7164 \
-  --ports_num=1 \
-  --ports_num_for_sparse=1 \
-  --num_gradient_servers=1 \
-  --comment="paddle_pserver" \
-  2>&1 | tee 'pserver.log'
diff --git a/v1_api_demo/quick_start/data/README.md b/v1_api_demo/quick_start/data/README.md
deleted file mode 100644
index 63abcf7ebf..0000000000
--- a/v1_api_demo/quick_start/data/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-This dataset consists of electronics product reviews associated with
-binary labels (positive/negative) for sentiment classification.
-
-The preprocessed data can be downloaded by script `get_data.sh`.
-The data was derived from reviews_Electronics_5.json.gz at
-
-http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
-
-If you want to process the raw data, you can use the script `proc_from_raw_data/get_data.sh`.
diff --git a/v1_api_demo/quick_start/data/get_data.sh b/v1_api_demo/quick_start/data/get_data.sh
deleted file mode 100755
index a09a18f919..0000000000
--- a/v1_api_demo/quick_start/data/get_data.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-# Download the preprocessed data
-wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
-
-# Extract package
-tar zxvf preprocessed_data.tar.gz
-
-# Remove compressed package
-rm preprocessed_data.tar.gz
diff --git a/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh b/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh
deleted file mode 100755
index d976eaebfa..0000000000
--- a/v1_api_demo/quick_start/data/proc_from_raw_data/get_data.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# 1. size of pos : neg = 1:1.
-# 2. size of testing set = min(25k, len(all_data) * 0.1), others is traning set.
-# 3. distinct train set and test set.
-
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-# Download data
-echo "Downloading Amazon Electronics reviews data..."
-# http://jmcauley.ucsd.edu/data/amazon/
-wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
-echo "Downloading mosesdecoder..."
-# https://github.com/moses-smt/mosesdecoder
-wget https://github.com/moses-smt/mosesdecoder/archive/master.zip
-
-unzip master.zip
-rm master.zip
-
-##################
-# Preprocess data 
-echo "Preprocess data..."
-export LC_ALL=C
-UNAME_STR=`uname`
-
-if [ ${UNAME_STR} == 'Linux' ]; then
-  SHUF_PROG='shuf'
-else
-  SHUF_PROG='gshuf'
-fi
-
-mkdir -p tmp
-python preprocess.py -i reviews_Electronics_5.json.gz
-# uniq and shuffle
-cd tmp
-echo 'Uniq and shuffle...'
-cat pos_*|sort|uniq|${SHUF_PROG}> pos.shuffed
-cat neg_*|sort|uniq|${SHUF_PROG}> neg.shuffed
-
-min_len=`sed -n '$=' neg.shuffed`
-test_num=$((min_len/10))
-if [ $test_num -gt 12500 ];then
- test_num=12500
-fi
-train_num=$((min_len-test_num))
-
-head -n$train_num pos.shuffed >train.pos
-head -n$train_num neg.shuffed >train.neg
-tail -n$test_num pos.shuffed >test.pos
-tail -n$test_num neg.shuffed >test.neg
-
-cat train.pos train.neg | ${SHUF_PROG} >../train.txt
-cat test.pos test.neg | ${SHUF_PROG} >../test.txt
-
-cd -
-echo 'train.txt' > train.list
-echo 'test.txt' > test.list
-
-# use 30k dict
-rm -rf tmp
-mv dict.txt dict_all.txt
-cat dict_all.txt | head -n 30001 > dict.txt
-echo 'Done.'
diff --git a/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py b/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py
deleted file mode 100755
index 72bd95f21d..0000000000
--- a/v1_api_demo/quick_start/data/proc_from_raw_data/preprocess.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# -*- coding: UTF-8 -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-1. Tokenize the words and punctuation 
-2. pos sample : rating score 5; neg sample: rating score 1-2.
-
-Usage:
-    python preprocess.py -i data_file [random seed]
-"""
-
-import sys
-import os
-import operator
-import gzip
-from subprocess import Popen, PIPE
-from optparse import OptionParser
-import json
-from multiprocessing import Queue
-from multiprocessing import Pool
-import multiprocessing
-
-batch_size = 5000
-word_count = {}
-num_tokenize = max(1,
-                   multiprocessing.cpu_count() - 2)  # parse + tokenize + save
-max_queue_size = 8
-parse_queue = Queue(maxsize=max_queue_size + num_tokenize)
-tokenize_queue = Queue(maxsize=max_queue_size + num_tokenize)
-
-
-def create_dict(data):
-    """
-    Create dictionary based on data, and saved in data_dir/dict.txt.
-    The first line is unk \t -1.
-    data: list, input data by batch.
-    """
-    for seq in data:
-        try:
-            for w in seq.lower().split():
-                if w not in word_count:
-                    word_count[w] = 1
-                else:
-                    word_count[w] += 1
-        except:
-            sys.stderr.write(seq + "\tERROR\n")
-
-
-def parse(path):
-    """
-    Open .gz file.
-    """
-    sys.stderr.write(path)
-    g = gzip.open(path, 'r')
-    for l in g:
-        yield json.loads(l)
-    g.close()
-
-
-def tokenize(sentences):
-    """
-    Use tokenizer.perl to tokenize input sentences.
-    tokenizer.perl is tool of Moses.
-    sentences : a list of input sentences.
-    return: a list of processed text.
-    """
-    dir = './mosesdecoder-master/scripts/tokenizer/tokenizer.perl'
-    if not os.path.exists(dir):
-        sys.exit(
-            "The ./mosesdecoder-master/scripts/tokenizer/tokenizer.perl does not exists."
-        )
-    tokenizer_cmd = [dir, '-l', 'en', '-q', '-']
-    assert isinstance(sentences, list)
-    text = "\n".join(sentences)
-    tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
-    tok_text, _ = tokenizer.communicate(text)
-    toks = tok_text.split('\n')[:-1]
-    return toks
-
-
-def save_data(instance, data_dir, pre_fix, batch_num):
-    """
-    save data by batch
-    """
-    label = ['1' if pre_fix == 'pos' else '0' for i in range(len(instance))]
-    lines = ['%s\t%s' % (label[i], instance[i]) for i in range(len(label))]
-    file_name = os.path.join(data_dir, "%s_%s.txt" % (pre_fix, batch_num))
-    file(file_name, 'w').write('\n'.join(lines) + '\n')
-
-
-def tokenize_batch(id):
-    """
-    tokenize data by batch
-    """
-    while True:
-        num_batch, instance, pre_fix = parse_queue.get()
-        if num_batch == -1:  ### parse_queue finished
-            tokenize_queue.put((-1, None, None))
-            sys.stderr.write("Thread %s finish\n" % (id))
-            break
-        tokenize_instance = tokenize(instance)
-        tokenize_queue.put((num_batch, tokenize_instance, pre_fix))
-        sys.stderr.write('.')
-
-
-def save_batch(data_dir, num_tokenize, data_dir_dict):
-    """
-        save data by batch
-        build dict.txt
-    """
-    token_count = 0
-    while True:
-        num_batch, instance, pre_fix = tokenize_queue.get()
-        if num_batch == -1:
-            token_count += 1
-            if token_count == num_tokenize:  #### tokenize finished.
-                break
-            else:
-                continue
-        save_data(instance, data_dir, pre_fix, num_batch)
-        create_dict(instance)  ## update dict
-
-    sys.stderr.write("save file finish\n")
-    f = open(data_dir_dict, 'w')
-    f.write('%s\t%s\n' % ('unk', '-1'))
-    for k, v in sorted(word_count.items(), key=operator.itemgetter(1), \
-                       reverse=True):
-        f.write('%s\t%s\n' % (k, v))
-    f.close()
-    sys.stderr.write("build dict finish\n")
-
-
-def parse_batch(data, num_tokenize):
-    """
-    parse data by batch
-    parse -> tokenize -> save
-    """
-    raw_txt = parse(data)
-    neg, pos = [], []
-    count = 0
-    sys.stderr.write("extract raw data\n")
-    for l in raw_txt:
-        rating = l["overall"]
-        text = l["reviewText"].lower()  # # convert words to lower case
-        if rating == 5.0 and text:
-            pos.append(text)
-        if rating < 3.0 and text:
-            neg.append(text)
-        if len(pos) == batch_size or len(neg) == batch_size:
-            if len(pos) == batch_size:
-                batch = pos
-                pre_fix = 'pos'
-            else:
-                batch = neg
-                pre_fix = 'neg'
-
-            parse_queue.put((count, batch, pre_fix))
-            count += 1
-            if pre_fix == 'pos':
-                pos = []
-            else:
-                neg = []
-
-    if len(pos) > 0:
-        parse_queue.put((count, pos, 'pos'))
-        count += 1
-    if len(neg) > 0:
-        parse_queue.put((count, neg, 'neg'))
-        count += 1
-    for i in range(num_tokenize):
-        parse_queue.put((-1, None, None))  #### for tokenize's input finished
-    sys.stderr.write("parsing finish\n")
-
-
-def option_parser():
-    parser = OptionParser(usage="usage: python preprcoess.py "\
-                                "-i data_path [options]")
-    parser.add_option(
-        "-i", "--data", action="store", dest="input", help="Input data path.")
-    parser.add_option(
-        "-s",
-        "--seed",
-        action="store",
-        dest="seed",
-        default=1024,
-        help="Set random seed.")
-    return parser.parse_args()
-
-
-def main():
-    reload(sys)
-    sys.setdefaultencoding('utf-8')
-    options, args = option_parser()
-    data = options.input
-    seed = options.seed
-    data_dir_dict = os.path.join(os.path.dirname(data), 'dict.txt')
-    data_dir = os.path.join(os.path.dirname(data), 'tmp')
-    pool = Pool(processes=num_tokenize + 2)
-    pool.apply_async(parse_batch, args=(data, num_tokenize))
-    for i in range(num_tokenize):
-        pool.apply_async(tokenize_batch, args=(str(i), ))
-    pool.apply_async(save_batch, args=(data_dir, num_tokenize, data_dir_dict))
-    pool.close()
-    pool.join()
-
-    file(os.path.join(os.path.dirname(data), 'labels.list'),
-         'w').write('neg\t0\npos\t1\n')
-
-
-if __name__ == '__main__':
-    main()
diff --git a/v1_api_demo/quick_start/dataprovider_bow.py b/v1_api_demo/quick_start/dataprovider_bow.py
deleted file mode 100644
index 2745495586..0000000000
--- a/v1_api_demo/quick_start/dataprovider_bow.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-
-# id of the word not in dictionary
-UNK_IDX = 0
-
-
-# initializer is called by the framework during initialization.
-# It allows the user to describe the data types and setup the
-# necessary data structure for later use.
-# `settings` is an object. initializer need to properly fill settings.input_types.
-# initializer can also store other data structures needed to be used at process().
-# In this example, dictionary is stored in settings.
-# `dictionay` and `kwargs` are arguments passed from trainer_config.lr.py
-def initializer(settings, dictionary, **kwargs):
-    # Put the word dictionary into settings
-    settings.word_dict = dictionary
-
-    # setting.input_types specifies what the data types the data provider
-    # generates.
-    settings.input_types = {
-        # The first input is a sparse_binary_vector,
-        # which means each dimension of the vector is either 0 or 1. It is the
-        # bag-of-words (BOW) representation of the texts.
-        'word': sparse_binary_vector(len(dictionary)),
-        # The second input is an integer. It represents the category id of the
-        # sample. 2 means there are two labels in the dataset.
-        # (1 for positive and 0 for negative)
-        'label': integer_value(2)
-    }
-
-
-# Delaring a data provider. It has an initializer 'data_initialzer'.
-# It will cache the generated data of the first pass in memory, so that
-# during later pass, no on-the-fly data generation will be needed.
-# `setting` is the same object used by initializer()
-# `file_name` is the name of a file listed train_list or test_list file given
-# to define_py_data_sources2(). See trainer_config.lr.py.
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    # Open the input data file.
-    with open(file_name, 'r') as f:
-        # Read each line.
-        for line in f:
-            # Each line contains the label and text of the comment, separated by \t.
-            label, comment = line.strip().split('\t')
-
-            # Split the words into a list.
-            words = comment.split()
-
-            # convert the words into a list of ids by looking them up in word_dict.
-            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in words]
-
-            # Return the features for the current comment. The first is a list
-            # of ids representing a 0-1 binary sparse vector of the text,
-            # the second is the integer id of the label.
-            yield {'word': word_vector, 'label': int(label)}
-
-
-def predict_initializer(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = {'word': sparse_binary_vector(len(dictionary))}
-
-
-# Declaring a data provider for prediction. The difference with process
-# is that label is not generated.
-@provider(init_hook=predict_initializer, should_shuffle=False)
-def process_predict(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line in f:
-            comment = line.strip().split()
-            word_vector = [settings.word_dict.get(w, UNK_IDX) for w in comment]
-            yield {'word': word_vector}
diff --git a/v1_api_demo/quick_start/dataprovider_emb.py b/v1_api_demo/quick_start/dataprovider_emb.py
deleted file mode 100755
index ddfa3ce9b7..0000000000
--- a/v1_api_demo/quick_start/dataprovider_emb.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-
-UNK_IDX = 0
-
-
-def initializer(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = {
-        # Define the type of the first input as sequence of integer.
-        # The value of the integers range from 0 to len(dictrionary)-1
-        'word': integer_value_sequence(len(dictionary)),
-        # Define the second input for label id
-        'label': integer_value(2)
-    }
-
-
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line in f:
-            label, comment = line.strip().split('\t')
-            words = comment.split()
-            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]
-            yield {'word': word_slot, 'label': int(label)}
-
-
-def predict_initializer(settings, dictionary, **kwargs):
-    settings.word_dict = dictionary
-    settings.input_types = {'word': integer_value_sequence(len(dictionary))}
-
-
-@provider(init_hook=predict_initializer, should_shuffle=False)
-def process_predict(settings, file_name):
-    with open(file_name, 'r') as f:
-        for line in f:
-            comment = line.strip().split()
-            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in comment]
-            yield {'word': word_slot}
diff --git a/v1_api_demo/quick_start/predict.sh b/v1_api_demo/quick_start/predict.sh
deleted file mode 100755
index e47c2dd01f..0000000000
--- a/v1_api_demo/quick_start/predict.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.lr.py
-#cfg=trainer_config.emb.py
-#cfg=trainer_config.cnn.py
-#cfg=trainer_config.lstm.py
-model="output/pass-00003"
-paddle train \
-    --config=$cfg \
-    --use_gpu=false \
-    --job=test \
-    --init_model_path=$model \
-    --config_args=is_predict=1 \
-    --predict_output_dir=. \
-2>&1 | tee 'predict.log'
-paddle usage -l 'predict.log' -e $? -n "quick_start_predict_${cfg}" >/dev/null 2>&1
-
-mv rank-00000 result.txt
diff --git a/v1_api_demo/quick_start/train.sh b/v1_api_demo/quick_start/train.sh
deleted file mode 100755
index 01697fed48..0000000000
--- a/v1_api_demo/quick_start/train.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.lr.py
-#cfg=trainer_config.emb.py
-#cfg=trainer_config.cnn.py
-#cfg=trainer_config.lstm.py
-#cfg=trainer_config.bidi-lstm.py
-#cfg=trainer_config.db-lstm.py
-#cfg=trainer_config.resnet-lstm.py
-paddle train \
-  --config=$cfg \
-  --save_dir=./output \
-  --trainer_count=4 \
-  --log_period=100 \
-  --num_passes=15 \
-  --use_gpu=false \
-  --show_parameter_stats_period=100 \
-  --test_all_data_in_one_period=1 \
-  2>&1 | tee 'train.log'
-paddle usage -l "train.log" -e $? -n "quick_start_${cfg}" >/dev/null 2>&1
diff --git a/v1_api_demo/quick_start/trainer_config.bidi-lstm.py b/v1_api_demo/quick_start/trainer_config.bidi-lstm.py
deleted file mode 100644
index ca1d1f8d09..0000000000
--- a/v1_api_demo/quick_start/trainer_config.bidi-lstm.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-
-bi_lstm = bidirectional_lstm(input=emb, size=128)
-dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
-
-output = fc_layer(
-    input=dropout, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.cnn.py b/v1_api_demo/quick_start/trainer_config.cnn.py
deleted file mode 100644
index f8c3d511f3..0000000000
--- a/v1_api_demo/quick_start/trainer_config.cnn.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-data = data_layer(name="word", size=len(word_dict))
-embedding = embedding_layer(input=data, size=128)
-conv = sequence_conv_pool(input=embedding, context_len=3, hidden_size=512)
-output = fc_layer(input=conv, size=2, act=SoftmaxActivation())
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.db-lstm.py b/v1_api_demo/quick_start/trainer_config.db-lstm.py
deleted file mode 100644
index fba802b460..0000000000
--- a/v1_api_demo/quick_start/trainer_config.db-lstm.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
-
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-
-hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)])
-lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
-
-input_layers = [hidden_0, lstm_0]
-
-for i in range(1, 8):
-    fc = fc_layer(input=input_layers, size=128)
-    lstm = lstmemory(
-        input=fc,
-        layer_attr=ExtraAttr(drop_rate=0.1),
-        reverse=(i % 2) == 1, )
-    input_layers = [fc, lstm]
-
-lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-
-output = fc_layer(
-    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.emb.py b/v1_api_demo/quick_start/trainer_config.emb.py
deleted file mode 100644
index 7410397ef6..0000000000
--- a/v1_api_demo/quick_start/trainer_config.emb.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size, learning_rate=2e-3, learning_method=AdamOptimizer())
-
-data = data_layer(name="word", size=len(word_dict))
-embedding = embedding_layer(input=data, size=128)
-avg = pooling_layer(input=embedding, pooling_type=AvgPooling())
-output = fc_layer(input=avg, size=2, act=SoftmaxActivation())
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.lr.py b/v1_api_demo/quick_start/trainer_config.lr.py
deleted file mode 100644
index e5105aa895..0000000000
--- a/v1_api_demo/quick_start/trainer_config.lr.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = get_config_arg('dict_file', str, "./data/dict.txt")
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-
-# define the data sources for the model.
-# We need to use different process for training and prediction.
-# For training, the input data includes both word IDs and labels.
-# For prediction, the input data only includs word Ids.
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_bow",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-# Define the data for text features. The size of the data layer is the number
-# of words in the dictionary.
-data = data_layer(name="word", size=len(word_dict))
-
-# Define a fully connected layer with logistic activation.
-# (also called softmax activation).
-output = fc_layer(input=data, size=2, act=SoftmaxActivation())
-
-if not is_predict:
-    # For training, we need label and cost
-
-    # define the category id for each example.
-    # The size of the data layer is the number of labels.
-    label = data_layer(name="label", size=2)
-
-    # Define cross-entropy classification loss and error.
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
-else:
-    # For prediction, no label is needed. We need to output
-    # We need to output classification result, and class probabilities.
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
diff --git a/v1_api_demo/quick_start/trainer_config.lstm.py b/v1_api_demo/quick_start/trainer_config.lstm.py
deleted file mode 100644
index 43b4ddac2d..0000000000
--- a/v1_api_demo/quick_start/trainer_config.lstm.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(
-    input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.25))
-lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(input=lstm_max, size=2, act=SoftmaxActivation())
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/quick_start/trainer_config.resnet-lstm.py b/v1_api_demo/quick_start/trainer_config.resnet-lstm.py
deleted file mode 100644
index 89a837abb7..0000000000
--- a/v1_api_demo/quick_start/trainer_config.resnet-lstm.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# edit-mode: -*- python -*-
-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-This configuration is a demonstration of how to implement the stacked LSTM
-with residual connections, i.e. an LSTM layer takes the sum of the hidden states
-and inputs of the previous LSTM layer instead of only the hidden states.
-This architecture is from:
-Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc V. Le, Mohammad Norouzi,
-Wolfgang Macherey, Maxim Krikun, Yuan Cao, Qin Gao, Klaus Macherey,
-Jeff Klingner, Apurva Shah, Melvin Johnson, Xiaobing Liu, Lukasz Kaiser,
-Stephan Gouws, Yoshikiyo Kato, Taku Kudo, Hideto Kazawa, Keith Stevens,
-George Kurian, Nishant Patil, Wei Wang, Cliff Young, Jason Smith, Jason Riesa,
-Alex Rudnick, Oriol Vinyals, Greg Corrado, Macduff Hughes, Jeffrey Dean. 2016.
-Google's Neural Machine Translation System: Bridging the Gap between Human and
-Machine Translation. In arXiv https://arxiv.org/pdf/1609.08144v2.pdf
-Different from the architecture described in the paper, we use a stack single
-direction LSTM layers as the first layer instead of bi-directional LSTM. Also,
-since this is a demo code, to reduce computation time, we stacked 4 layers
-instead of 8 layers.
-"""
-
-from paddle.trainer_config_helpers import *
-
-dict_file = "./data/dict.txt"
-word_dict = dict()
-with open(dict_file, 'r') as f:
-    for i, line in enumerate(f):
-        w = line.strip().split()[0]
-        word_dict[w] = i
-
-is_predict = get_config_arg('is_predict', bool, False)
-trn = 'data/train.list' if not is_predict else None
-tst = 'data/test.list' if not is_predict else 'data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn,
-    test_list=tst,
-    module="dataprovider_emb",
-    obj=process,
-    args={"dictionary": word_dict})
-
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=2e-3,
-    learning_method=AdamOptimizer(),
-    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
-
-bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
-
-data = data_layer(name="word", size=len(word_dict))
-emb = embedding_layer(input=data, size=128)
-lstm = simple_lstm(input=emb, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
-
-previous_input, previous_hidden_state = emb, lstm
-
-for i in range(3):
-    # The input to the current layer is the sum of the hidden state
-    # and input of the previous layer.
-    current_input = addto_layer(input=[previous_input, previous_hidden_state])
-    hidden_state = simple_lstm(
-        input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
-    previous_input, previous_hidden_state = current_input, hidden_state
-
-lstm = previous_hidden_state
-
-lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
-output = fc_layer(
-    input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
-
-if is_predict:
-    maxid = maxid_layer(output)
-    outputs([maxid, output])
-else:
-    label = data_layer(name="label", size=2)
-    cls = classification_cost(input=output, label=label)
-    outputs(cls)
diff --git a/v1_api_demo/sequence_tagging/data/get_data.sh b/v1_api_demo/sequence_tagging/data/get_data.sh
deleted file mode 100755
index 0cdb394035..0000000000
--- a/v1_api_demo/sequence_tagging/data/get_data.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-wget http://www.cnts.ua.ac.be/conll2000/chunking/train.txt.gz
-wget http://www.cnts.ua.ac.be/conll2000/chunking/test.txt.gz
diff --git a/v1_api_demo/sequence_tagging/data/test.list b/v1_api_demo/sequence_tagging/data/test.list
deleted file mode 100644
index 073c0a0c90..0000000000
--- a/v1_api_demo/sequence_tagging/data/test.list
+++ /dev/null
@@ -1 +0,0 @@
-data/test.txt.gz
diff --git a/v1_api_demo/sequence_tagging/data/train.list b/v1_api_demo/sequence_tagging/data/train.list
deleted file mode 100644
index 43c24d5f64..0000000000
--- a/v1_api_demo/sequence_tagging/data/train.list
+++ /dev/null
@@ -1 +0,0 @@
-data/train.txt.gz
diff --git a/v1_api_demo/sequence_tagging/dataprovider.py b/v1_api_demo/sequence_tagging/dataprovider.py
deleted file mode 100644
index bb4b4465bc..0000000000
--- a/v1_api_demo/sequence_tagging/dataprovider.py
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-import gzip
-import logging
-
-logging.basicConfig(
-    format='[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s', )
-logger = logging.getLogger('paddle')
-logger.setLevel(logging.INFO)
-
-OOV_POLICY_IGNORE = 0
-OOV_POLICY_USE = 1
-OOV_POLICY_ERROR = 2
-
-num_original_columns = 3
-
-# Feature combination patterns.
-# [[-1,0], [0,0]]  means previous token at column 0 and current token at 
-# column 0 are combined as one feature.
-patterns = [
-    [[-2, 0]],
-    [[-1, 0]],
-    [[0, 0]],
-    [[1, 0]],
-    [[2, 0]],
-    [[-1, 0], [0, 0]],
-    [[0, 0], [1, 0]],
-    [[-2, 1]],
-    [[-1, 1]],
-    [[0, 1]],
-    [[1, 1]],
-    [[2, 1]],
-    [[-2, 1], [-1, 1]],
-    [[-1, 1], [0, 1]],
-    [[0, 1], [1, 1]],
-    [[1, 1], [2, 1]],
-    [[-2, 1], [-1, 1], [0, 1]],
-    [[-1, 1], [0, 1], [1, 1]],
-    [[0, 1], [1, 1], [2, 1]],
-]
-
-dict_label = {
-    'B-ADJP': 0,
-    'I-ADJP': 1,
-    'B-ADVP': 2,
-    'I-ADVP': 3,
-    'B-CONJP': 4,
-    'I-CONJP': 5,
-    'B-INTJ': 6,
-    'I-INTJ': 7,
-    'B-LST': 8,
-    'I-LST': 9,
-    'B-NP': 10,
-    'I-NP': 11,
-    'B-PP': 12,
-    'I-PP': 13,
-    'B-PRT': 14,
-    'I-PRT': 15,
-    'B-SBAR': 16,
-    'I-SBAR': 17,
-    'B-UCP': 18,
-    'I-UCP': 19,
-    'B-VP': 20,
-    'I-VP': 21,
-    'O': 22
-}
-
-
-def make_features(sequence):
-    length = len(sequence)
-    num_features = len(sequence[0])
-
-    def get_features(pos):
-        if pos < 0:
-            return ['#B%s' % -pos] * num_features
-        if pos >= length:
-            return ['#E%s' % (pos - length + 1)] * num_features
-        return sequence[pos]
-
-    for i in xrange(length):
-        for pattern in patterns:
-            fname = '/'.join([get_features(i + pos)[f] for pos, f in pattern])
-            sequence[i].append(fname)
-
-
-'''
-Source file format:
-Each line is for one timestep. The features are separated by space.
-An empty line indicates end of a sequence.
-
-cutoff: a list of numbers. If count of a feature is smaller than this,
- it will be ignored.
-if oov_policy[i] is OOV_POLICY_USE, id 0 is reserved for OOV features of
-i-th column.
-
-return a list of dict for each column
-'''
-
-
-def create_dictionaries(filename, cutoff, oov_policy):
-    def add_to_dict(sequence, dicts):
-        num_features = len(dicts)
-        for features in sequence:
-            l = len(features)
-            assert l == num_features, "Wrong number of features " + line
-            for i in xrange(l):
-                if features[i] in dicts[i]:
-                    dicts[i][features[i]] += 1
-                else:
-                    dicts[i][features[i]] = 1
-
-    num_features = len(cutoff)
-    dicts = []
-    for i in xrange(num_features):
-        dicts.append(dict())
-
-    f = gzip.open(filename, 'rb')
-
-    sequence = []
-
-    for line in f:
-        line = line.strip()
-        if not line:
-            make_features(sequence)
-            add_to_dict(sequence, dicts)
-            sequence = []
-            continue
-        features = line.split(' ')
-        sequence.append(features)
-
-    for i in xrange(num_features):
-        dct = dicts[i]
-        n = 1 if oov_policy[i] == OOV_POLICY_USE else 0
-        todo = []
-        for k, v in dct.iteritems():
-            if v < cutoff[i]:
-                todo.append(k)
-            else:
-                dct[k] = n
-                n += 1
-
-        if oov_policy[i] == OOV_POLICY_USE:
-            # placeholder so that len(dct) will be the number of features
-            # including OOV
-            dct['#OOV#'] = 0
-
-        logger.info('column %d dict size=%d, ignored %d' % (i, n, len(todo)))
-        for k in todo:
-            del dct[k]
-
-    f.close()
-    return dicts
-
-
-def initializer(settings, **xargs):
-    cutoff = [3, 1, 0]
-    cutoff += [3] * len(patterns)
-    oov_policy = [OOV_POLICY_IGNORE, OOV_POLICY_ERROR, OOV_POLICY_ERROR]
-    oov_policy += [OOV_POLICY_IGNORE] * len(patterns)
-    dicts = create_dictionaries('data/train.txt.gz', cutoff, oov_policy)
-    dicts[2] = dict_label
-    settings.dicts = dicts
-    settings.oov_policy = oov_policy
-    input_types = []
-    num_features = len(dicts)
-    for i in xrange(num_original_columns):
-        input_types.append(integer_sequence(len(dicts[i])))
-        logger.info("slot %s size=%s" % (i, len(dicts[i])))
-    if patterns:
-        dim = 0
-        for i in xrange(num_original_columns, num_features):
-            dim += len(dicts[i])
-        input_types.append(sparse_binary_vector_sequence(dim))
-        logger.info("feature size=%s" % dim)
-    settings.input_types = input_types
-
-
-'''
-if oov_policy[i] == OOV_POLICY_USE, features in i-th column which are not
-existed in dicts[i] will be assigned to id 0.
-if oov_policy[i] == OOV_POLICY_ERROR, all features in i-th column MUST exist
-in dicts[i].
-'''
-
-
-@provider(init_hook=initializer, cache=CacheType.CACHE_PASS_IN_MEM)
-def process(settings, filename):
-    input_file = filename
-    dicts = settings.dicts
-    oov_policy = settings.oov_policy
-
-    def gen_sample(sequence):
-        num_features = len(dicts)
-        sample = [list() for i in xrange(num_original_columns)]
-        if patterns:
-            sample.append([])
-        for features in sequence:
-            assert len(features) == num_features, \
-                "Wrong number of features: " + line
-            for i in xrange(num_original_columns):
-                id = dicts[i].get(features[i], -1)
-                if id != -1:
-                    sample[i].append(id)
-                elif oov_policy[i] == OOV_POLICY_IGNORE:
-                    sample[i].append(0xffffffff)
-                elif oov_policy[i] == OOV_POLICY_ERROR:
-                    logger.fatal("Unknown token: %s" % features[i])
-                else:
-                    sample[i].append(0)
-
-            if patterns:
-                dim = 0
-                vec = []
-                for i in xrange(num_original_columns, num_features):
-                    id = dicts[i].get(features[i], -1)
-                    if id != -1:
-                        vec.append(dim + id)
-                    elif oov_policy[i] == OOV_POLICY_IGNORE:
-                        pass
-                    elif oov_policy[i] == OOV_POLICY_ERROR:
-                        logger.fatal("Unknown token: %s" % features[i])
-                    else:
-                        vec.ids.append(dim + 0)
-
-                    dim += len(dicts[i])
-                sample[-1].append(vec)
-        return sample
-
-    num_features = len(dicts)
-    f = gzip.open(input_file, 'rb')
-
-    num_sequences = 0
-    sequence = []
-    for line in f:
-        line = line.strip()
-        if not line:
-            make_features(sequence)
-            yield gen_sample(sequence)
-            sequence = []
-            num_sequences += 1
-            continue
-        features = line.split(' ')
-        sequence.append(features)
-
-    f.close()
-
-    logger.info("num_sequences=%s" % num_sequences)
diff --git a/v1_api_demo/sequence_tagging/linear_crf.py b/v1_api_demo/sequence_tagging/linear_crf.py
deleted file mode 100644
index ea012ba1ae..0000000000
--- a/v1_api_demo/sequence_tagging/linear_crf.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-import math
-
-define_py_data_sources2(
-    train_list="data/train.list",
-    test_list="data/test.list",
-    module="dataprovider",
-    obj="process")
-
-batch_size = 1
-settings(
-    learning_method=MomentumOptimizer(),
-    batch_size=batch_size,
-    regularization=L2Regularization(batch_size * 1e-4),
-    model_average=ModelAverage(0.5),
-    learning_rate=1e-1,
-    learning_rate_decay_a=1e-5,
-    learning_rate_decay_b=0.25, )
-
-num_label_types = 23
-
-
-def get_simd_size(size):
-    return int(math.ceil(float(size) / 8)) * 8
-
-
-# Currently, in order to use sparse_update=True,
-# the size has to be aligned.
-num_label_types = get_simd_size(num_label_types)
-
-features = data_layer(name="features", size=76328)
-word = data_layer(name="word", size=6778)
-pos = data_layer(name="pos", size=44)
-chunk = data_layer(name="chunk", size=num_label_types)
-
-crf_input = fc_layer(
-    input=features,
-    size=num_label_types,
-    act=LinearActivation(),
-    bias_attr=False,
-    param_attr=ParamAttr(
-        initial_std=0, sparse_update=True))
-
-crf = crf_layer(
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(
-        name="crfw", initial_std=0), )
-
-crf_decoding = crf_decoding_layer(
-    size=num_label_types,
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(name="crfw"), )
-
-sum_evaluator(
-    name="error",
-    input=crf_decoding, )
-
-chunk_evaluator(
-    name="chunk_f1",
-    input=crf_decoding,
-    label=chunk,
-    chunk_scheme="IOB",
-    num_chunk_types=11, )
-
-inputs(word, pos, chunk, features)
-outputs(crf)
diff --git a/v1_api_demo/sequence_tagging/readme.md b/v1_api_demo/sequence_tagging/readme.md
deleted file mode 100644
index 2e17fffb83..0000000000
--- a/v1_api_demo/sequence_tagging/readme.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Sequence Tagging
-
-This demo is a sequence model for assigning tags to each token in a sentence. The task is described at <a href = "http://www.cnts.ua.ac.be/conll2000/chunking">CONLL2000 Text Chunking</a> task.
-
-## Download data
-```bash
-cd demo/sequence_tagging
-./data/get_data.sh
-```
-
-## Train model
-```bash
-cd demo/sequence_tagging
-./train.sh
-```
-
-## Model description
-
-We provide two models. One is a linear CRF model (linear_crf.py) with is equivalent to the one at <a href="http://leon.bottou.org/projects/sgd#stochastic_gradient_crfs">leon.bottou.org/projects/sgd</a>. The second one is a stacked bidirectional RNN and CRF model (rnn_crf.py).
-<center>
-<table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
-
-<thead>
-<th scope="col" class="left">Model name</th>
-<th scope="col" class="left">Number of parameters</th>
-<th scope="col" class="left">F1 score</th>
-</thead>
-
-<tbody>
-<tr>
-<td class="left">linear_crf</td>
-<td class="left"> 1.8M </td>
-<td class="left"> 0.937</td>
-</tr>
-
-<tr>
-<td class="left">rnn_crf</td>
-<td class="left"> 960K </td>
-<td class="left">0.941</td>
-</tr>
-
-</tbody>
-</table>
-</center>
-<br>
diff --git a/v1_api_demo/sequence_tagging/rnn_crf.py b/v1_api_demo/sequence_tagging/rnn_crf.py
deleted file mode 100644
index 937a34df10..0000000000
--- a/v1_api_demo/sequence_tagging/rnn_crf.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-
-import math
-
-define_py_data_sources2(
-    train_list="data/train.list",
-    test_list="data/test.list",
-    module="dataprovider",
-    obj="process")
-
-batch_size = 16
-settings(
-    learning_method=MomentumOptimizer(),
-    batch_size=batch_size,
-    regularization=L2Regularization(batch_size * 1e-5),
-    model_average=ModelAverage(0.5),
-    learning_rate=2e-3,
-    learning_rate_decay_a=5e-7,
-    learning_rate_decay_b=0.5, )
-
-word_dim = 128
-hidden_dim = 128
-with_rnn = True
-
-initial_std = 1 / math.sqrt(hidden_dim)
-param_attr = ParamAttr(initial_std=initial_std)
-cpu_layer_attr = ExtraLayerAttribute(device=-1)
-
-default_device(0)
-
-num_label_types = 23
-
-features = data_layer(name="features", size=76328)
-word = data_layer(name="word", size=6778)
-pos = data_layer(name="pos", size=44)
-chunk = data_layer(
-    name="chunk", size=num_label_types, layer_attr=cpu_layer_attr)
-
-emb = embedding_layer(
-    input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
-
-hidden1 = mixed_layer(
-    size=hidden_dim,
-    act=STanhActivation(),
-    bias_attr=True,
-    input=[
-        full_matrix_projection(emb), table_projection(
-            pos, param_attr=param_attr)
-    ])
-
-if with_rnn:
-    rnn1 = recurrent_layer(
-        act=ReluActivation(),
-        bias_attr=True,
-        input=hidden1,
-        param_attr=ParamAttr(initial_std=0), )
-
-hidden2 = mixed_layer(
-    size=hidden_dim,
-    act=STanhActivation(),
-    bias_attr=True,
-    input=[full_matrix_projection(hidden1)] +
-    ([full_matrix_projection(
-        rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
-
-if with_rnn:
-    rnn2 = recurrent_layer(
-        reverse=True,
-        act=ReluActivation(),
-        bias_attr=True,
-        input=hidden2,
-        param_attr=ParamAttr(initial_std=0), )
-
-crf_input = mixed_layer(
-    size=num_label_types,
-    bias_attr=False,
-    input=[full_matrix_projection(hidden2), ] +
-    ([full_matrix_projection(
-        rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), )
-
-crf = crf_layer(
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(
-        name="crfw", initial_std=0),
-    layer_attr=cpu_layer_attr, )
-
-crf_decoding = crf_decoding_layer(
-    size=num_label_types,
-    input=crf_input,
-    label=chunk,
-    param_attr=ParamAttr(name="crfw"),
-    layer_attr=cpu_layer_attr, )
-
-sum_evaluator(
-    name="error",
-    input=crf_decoding, )
-
-chunk_evaluator(
-    name="chunk_f1",
-    input=crf_decoding,
-    label=chunk,
-    chunk_scheme="IOB",
-    num_chunk_types=11, )
-
-inputs(word, pos, chunk, features)
-outputs(crf)
diff --git a/v1_api_demo/sequence_tagging/train.sh b/v1_api_demo/sequence_tagging/train.sh
deleted file mode 100755
index 37e196c842..0000000000
--- a/v1_api_demo/sequence_tagging/train.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-paddle train \
-       --config rnn_crf.py \
-       --parallel_nn=1 \
-       --use_gpu=1 \
-       --dot_period=10 \
-       --log_period=1000 \
-       --test_period=0 \
-       --num_passes=10 \
-2>&1 | tee 'train.log'
-paddle usage -l 'train.log' -e $? -n "sequence_tagging_train" >/dev/null 2>&1
diff --git a/v1_api_demo/sequence_tagging/train_linear.sh b/v1_api_demo/sequence_tagging/train_linear.sh
deleted file mode 100755
index ad6e2d8ee7..0000000000
--- a/v1_api_demo/sequence_tagging/train_linear.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-paddle train \
-       --config linear_crf.py \
-       --use_gpu=0 \
-       --dot_period=100 \
-       --log_period=10000 \
-       --test_period=0 \
-       --num_passes=10
-2>&1 | tee 'train_linear.log'
-paddle usage -l 'train_linear.log' -e $? -n "sequence_tagging_train_linear" >/dev/null 2>&1
diff --git a/v1_api_demo/traffic_prediction/README b/v1_api_demo/traffic_prediction/README
deleted file mode 100644
index 4c95188583..0000000000
--- a/v1_api_demo/traffic_prediction/README
+++ /dev/null
@@ -1,7 +0,0 @@
-run by:
-cd ./data
-sh get_data.sh
-cd ..
-sh train.sh
-sh predict.sh
-
diff --git a/v1_api_demo/traffic_prediction/data/get_data.sh b/v1_api_demo/traffic_prediction/data/get_data.sh
deleted file mode 100755
index f2fa548d47..0000000000
--- a/v1_api_demo/traffic_prediction/data/get_data.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-set -x
-
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
-
-#download the dataset
-echo "Downloading traffic data..."
-wget http://paddlepaddle.cdn.bcebos.com/demo/traffic/traffic_data.tar.gz
-
-#extract package
-echo "Unzipping..."
-tar -zxvf traffic_data.tar.gz
-
-echo "data/speeds.csv" > train.list
-echo "data/speeds.csv" > test.list
-echo "data/speeds.csv" > pred.list
-
-echo "Done."
diff --git a/v1_api_demo/traffic_prediction/dataprovider.py b/v1_api_demo/traffic_prediction/dataprovider.py
deleted file mode 100644
index c7883b6950..0000000000
--- a/v1_api_demo/traffic_prediction/dataprovider.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer.PyDataProvider2 import *
-import sys
-import numpy as np
-TERM_NUM = 24
-FORECASTING_NUM = 24
-LABEL_VALUE_NUM = 4
-
-
-def initHook(settings, file_list, **kwargs):
-    """
-    Init hook is invoked before process data. It will set obj.slots and store data meta.
-
-    :param settings: global object. It will passed to process routine.
-    :type obj: object
-    :param file_list: the meta file object, which passed from trainer_config.py,but unused in this function.
-    :param kwargs: unused other arguments.
-    """
-    del kwargs  #unused 
-
-    settings.pool_size = sys.maxint
-    #Use a time seires of the past as feature.
-    #Dense_vector's expression form is [float,float,...,float]
-    settings.input_types = [dense_vector(TERM_NUM)]
-    #There are next FORECASTING_NUM fragments you need predict.
-    #Every predicted condition at time point has four states.
-    for i in range(FORECASTING_NUM):
-        settings.input_types.append(integer_value(LABEL_VALUE_NUM))
-
-
-@provider(
-    init_hook=initHook, cache=CacheType.CACHE_PASS_IN_MEM, should_shuffle=True)
-def process(settings, file_name):
-    with open(file_name) as f:
-        #abandon fields name
-        f.next()
-        for row_num, line in enumerate(f):
-            speeds = map(int, line.rstrip('\r\n').split(",")[1:])
-            # Get the max index.
-            end_time = len(speeds)
-            # Scanning and generating samples
-            for i in range(TERM_NUM, end_time - FORECASTING_NUM):
-                # For dense slot
-                pre_spd = map(float, speeds[i - TERM_NUM:i])
-
-                # Integer value need predicting, values start from 0, so every one minus 1.
-                fol_spd = [j - 1 for j in speeds[i:i + FORECASTING_NUM]]
-
-                # Predicting label is missing, abandon the sample.
-                if -1 in fol_spd:
-                    continue
-                yield [pre_spd] + fol_spd
-
-
-def predict_initHook(settings, file_list, **kwargs):
-    settings.pool_size = sys.maxint
-    settings.input_types = [dense_vector(TERM_NUM)]
-
-
-@provider(init_hook=predict_initHook, should_shuffle=False)
-def process_predict(settings, file_name):
-    with open(file_name) as f:
-        #abandon fields name
-        f.next()
-        for row_num, line in enumerate(f):
-            speeds = map(int, line.rstrip('\r\n').split(","))
-            end_time = len(speeds)
-            pre_spd = map(float, speeds[end_time - TERM_NUM:end_time])
-            yield pre_spd
diff --git a/v1_api_demo/traffic_prediction/gen_result.py b/v1_api_demo/traffic_prediction/gen_result.py
deleted file mode 100644
index 3da70b3031..0000000000
--- a/v1_api_demo/traffic_prediction/gen_result.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-res = []
-with open('./rank-00000') as f:
-    for line in f:
-        pred = map(int, line.strip('\r\n;').split(";"))
-        #raw prediction range from 0 to 3
-        res.append([i + 1 for i in pred])
-
-file_name = open('./data/pred.list').read().strip('\r\n')
-
-FORECASTING_NUM = 24
-header = [
-    'id',
-    '201604200805',
-    '201604200810',
-    '201604200815',
-    '201604200820',
-    '201604200825',
-    '201604200830',
-    '201604200835',
-    '201604200840',
-    '201604200845',
-    '201604200850',
-    '201604200855',
-    '201604200900',
-    '201604200905',
-    '201604200910',
-    '201604200915',
-    '201604200920',
-    '201604200925',
-    '201604200930',
-    '201604200935',
-    '201604200940',
-    '201604200945',
-    '201604200950',
-    '201604200955',
-    '201604201000',
-]
-###################
-## To CSV format ##
-###################
-with open(file_name) as f:
-    f.next()
-    print ','.join(header)
-    for row_num, line in enumerate(f):
-        fields = line.rstrip('\r\n').split(',')
-        linkid = fields[0]
-        print linkid + ',' + ','.join(map(str, res[row_num]))
diff --git a/v1_api_demo/traffic_prediction/predict.sh b/v1_api_demo/traffic_prediction/predict.sh
deleted file mode 100755
index 2dbd5e8805..0000000000
--- a/v1_api_demo/traffic_prediction/predict.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.py
-# pass choice 
-model="output/pass-00000"
-paddle train \
-    --config=$cfg \
-    --use_gpu=false \
-    --job=test \
-    --init_model_path=$model \
-    --config_args=is_predict=1 \
-    --predict_output_dir=. 
-
-python gen_result.py > result.csv
-
-rm -rf rank-00000
diff --git a/v1_api_demo/traffic_prediction/train.sh b/v1_api_demo/traffic_prediction/train.sh
deleted file mode 100755
index 48dfc5604f..0000000000
--- a/v1_api_demo/traffic_prediction/train.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-set -e
-
-cfg=trainer_config.py
-paddle train \
-  --config=$cfg \
-  --save_dir=./output \
-  --trainer_count=4 \
-  --log_period=1000 \
-  --dot_period=10 \
-  --num_passes=10 \
-  --use_gpu=false \
-  --show_parameter_stats_period=3000 \
-  2>&1 | tee 'train.log'
diff --git a/v1_api_demo/traffic_prediction/trainer_config.py b/v1_api_demo/traffic_prediction/trainer_config.py
deleted file mode 100755
index 52d678624a..0000000000
--- a/v1_api_demo/traffic_prediction/trainer_config.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors, Inc. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-
-################################### DATA Configuration #############################################
-is_predict = get_config_arg('is_predict', bool, False)
-trn = './data/train.list' if not is_predict else None
-tst = './data/test.list' if not is_predict else './data/pred.list'
-process = 'process' if not is_predict else 'process_predict'
-define_py_data_sources2(
-    train_list=trn, test_list=tst, module="dataprovider", obj=process)
-################################### Parameter Configuaration #######################################
-TERM_NUM = 24
-FORECASTING_NUM = 24
-emb_size = 16
-batch_size = 128 if not is_predict else 1
-settings(
-    batch_size=batch_size,
-    learning_rate=1e-3,
-    learning_method=RMSPropOptimizer())
-################################### Algorithm Configuration ########################################
-
-output_label = []
-
-link_encode = data_layer(name='link_encode', size=TERM_NUM)
-for i in xrange(FORECASTING_NUM):
-    # Each task share same weight.
-    link_param = ParamAttr(
-        name='_link_vec.w', initial_max=1.0, initial_min=-1.0)
-    link_vec = fc_layer(input=link_encode, size=emb_size, param_attr=link_param)
-    score = fc_layer(input=link_vec, size=4, act=SoftmaxActivation())
-    if is_predict:
-        maxid = maxid_layer(score)
-        output_label.append(maxid)
-    else:
-        # Multi-task training.
-        label = data_layer(name='label_%dmin' % ((i + 1) * 5), size=4)
-        cls = classification_cost(
-            input=score, name="cost_%dmin" % ((i + 1) * 5), label=label)
-        output_label.append(cls)
-outputs(output_label)
diff --git a/v1_api_demo/vae/README.md b/v1_api_demo/vae/README.md
deleted file mode 100644
index e55d483b02..0000000000
--- a/v1_api_demo/vae/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-#Variational Autoencoder (VAE)
-
-This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
-
-
-In order to run the model, first download the MNIST dataset by running the shell script in ./data.
-
-Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
-
-$python vae_train.py [--use_gpu 1]
-
-The generated images will be stored in ./samples/
-The corresponding models will be stored in ./params/
diff --git a/v1_api_demo/vae/data/get_mnist_data.sh b/v1_api_demo/vae/data/get_mnist_data.sh
deleted file mode 100755
index a77c81bf5a..0000000000
--- a/v1_api_demo/vae/data/get_mnist_data.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env sh
-# This script downloads the mnist data and unzips it.
-set -e
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-rm -rf "$DIR/mnist_data"
-mkdir "$DIR/mnist_data"
-cd "$DIR/mnist_data"
-
-echo "Downloading..."
-
-for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
-do
-    if [ ! -e $fname ]; then
-        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
-        gunzip ${fname}.gz
-    fi
-done
diff --git a/v1_api_demo/vae/dataloader.py b/v1_api_demo/vae/dataloader.py
deleted file mode 100644
index e9ff95d44f..0000000000
--- a/v1_api_demo/vae/dataloader.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-
-
-class MNISTloader():
-    def __init__(self,
-                 data_path="./data/mnist_data/",
-                 batch_size=60,
-                 process='train'):
-        self.batch_size = batch_size
-        self.data_path = data_path
-        self._pointer = 0
-        self.image_batches = np.array([])
-        self.process = process
-
-    def _extract_images(self, filename, n):
-        f = open(filename, 'rb')
-        f.read(16)
-        data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
-        #Mapping data into [-1, 1]
-        data = data / 255. * 2. - 1
-        data_batches = np.split(data, 60000 / self.batch_size, 0)
-
-        f.close()
-
-        return data_batches
-
-    @property
-    def pointer(self):
-        return self._pointer
-
-    def load_data(self):
-        TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
-        TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
-
-        if self.process == 'train':
-            self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
-        else:
-            self.image_batches = self._extract_images(TEST_IMAGES, 10000)
-
-    def next_batch(self):
-        batch = self.image_batches[self._pointer]
-        self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
-        return np.array(batch)
-
-    def reset_pointer(self):
-        self._pointer = 0
diff --git a/v1_api_demo/vae/vae_conf.py b/v1_api_demo/vae/vae_conf.py
deleted file mode 100644
index 301dd23793..0000000000
--- a/v1_api_demo/vae/vae_conf.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.trainer_config_helpers import *
-import numpy as np
-
-is_generating = get_config_arg("is_generating", bool, False)
-
-settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
-
-X_dim = 28 * 28
-h_dim = 128
-z_dim = 100
-
-
-def reparameterization(mu, logvar):
-    eps = ParamAttr(initial_mean=0., initial_std=1)
-    with mixed_layer() as sigma:
-        sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
-    return mu + sigma
-
-
-def q_func(X):
-    """
-    xavier initialization
-    """
-    param_attr = ParamAttr(
-        name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
-    mu_param = ParamAttr(
-        name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
-    logvar_param = ParamAttr(
-        name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
-
-    bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
-    mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
-    logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
-
-    share_layer = fc_layer(
-        X,
-        size=h_dim,
-        param_attr=param_attr,
-        bias_attr=bias_attr,
-        act=ReluActivation())
-
-    return (fc_layer(
-        share_layer,
-        size=z_dim,
-        param_attr=mu_param,
-        bias_attr=mu_bias,
-        act=LinearActivation()), fc_layer(
-            share_layer,
-            size=z_dim,
-            param_attr=logvar_param,
-            bias_attr=logvar_bias,
-            act=LinearActivation()))
-
-
-def generator(z):
-
-    hidden_param = ParamAttr(
-        name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
-    hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
-    prob_param = ParamAttr(
-        name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
-    prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
-
-    hidden_layer = fc_layer(
-        z,
-        size=h_dim,
-        act=ReluActivation(),
-        param_attr=hidden_param,
-        bias_attr=hidden_bias)
-    prob = fc_layer(
-        hidden_layer,
-        size=X_dim,
-        act=SigmoidActivation(),
-        param_attr=prob_param,
-        bias_attr=prob_bias)
-
-    return prob
-
-
-def reconstruct_error(prob, X):
-    cost = multi_binary_label_cross_entropy(input=prob, label=X)
-    return cost
-
-
-def KL_loss(mu, logvar):
-    with mixed_layer() as mu_square:
-        mu_square += dotmul_operator(mu, mu, scale=1.)
-
-    cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
-
-    return cost
-
-
-if not is_generating:
-    x_batch = data_layer(name='x_batch', size=X_dim)
-    mu, logvar = q_func(x_batch)
-    z_samples = reparameterization(mu, logvar)
-    prob = generator(z_samples)
-    outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
-else:
-    z_samples = data_layer(name='noise', size=z_dim)
-    outputs(generator(z_samples))
diff --git a/v1_api_demo/vae/vae_train.py b/v1_api_demo/vae/vae_train.py
deleted file mode 100644
index 1babb011c7..0000000000
--- a/v1_api_demo/vae/vae_train.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import random
-import numpy as np
-import cPickle
-import sys, os
-from PIL import Image
-
-from paddle.trainer.config_parser import parse_config
-from paddle.trainer.config_parser import logger
-import py_paddle.swig_paddle as api
-import dataloader
-import matplotlib.pyplot as plt
-
-
-def plot_samples(samples):
-    fig = plt.figure(figsize=(4, 4))
-    gs = gridspec.GridSpec(4, 4)
-    gs.update(wspace=0.05, hspace=0.05)
-    for i, sample in enumerate(samples):
-        plt.subplot(gs[i])
-        plt.axis('off')
-        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
-
-    return fig
-
-
-def CHECK_EQ(a, b):
-    assert a == b, "a=%s, b=%s" % (a, b)
-
-
-def get_fake_samples(generator_machine, batch_size, noise):
-    gen_inputs = api.Arguments.createArguments(1)
-    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
-    gen_outputs = api.Arguments.createArguments(0)
-    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
-    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
-    return fake_samples
-
-
-def copy_shared_parameters(src, dst):
-    '''
-    copy the parameters from src to dst
-    :param src: the source of the parameters
-    :type src: GradientMachine
-    :param dst: the destination of the parameters
-    :type dst: GradientMachine
-    '''
-    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
-    src_params = dict([(p.getName(), p) for p in src_params])
-
-    for i in xrange(dst.getParameterSize()):
-        dst_param = dst.getParameter(i)
-        src_param = src_params.get(dst_param.getName(), None)
-        if src_param is None:
-            continue
-        src_value = src_param.getBuf(api.PARAMETER_VALUE)
-        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
-        CHECK_EQ(len(src_value), len(dst_value))
-        dst_value.copyFrom(src_value)
-        dst_param.setValueUpdated()
-
-
-def find(iterable, cond):
-    for item in iterable:
-        if cond(item):
-            return item
-    return None
-
-
-def get_layer_size(model_conf, layer_name):
-    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
-    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
-    return layer_conf.size
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--use_gpu", default="1", help="1 means use gpu for training")
-    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
-    args = parser.parse_args()
-    use_gpu = args.use_gpu
-    assert use_gpu in ["0", "1"]
-
-    if not os.path.exists("./samples/"):
-        os.makedirs("./samples/")
-
-    if not os.path.exists("./params/"):
-        os.makedirs("./params/")
-
-    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
-                   '--log_period=1000', '--gpu_id=' + args.gpu_id,
-                   '--save_dir=' + "./params/")
-
-    conf = "vae_conf.py"
-
-    trainer_conf = parse_config(conf, "is_generating=False")
-    gener_conf = parse_config(conf, "is_generating=True")
-
-    batch_size = trainer_conf.opt_config.batch_size
-
-    noise_dim = get_layer_size(gener_conf.model_config, "noise")
-
-    mnist = dataloader.MNISTloader(batch_size=batch_size)
-    mnist.load_data()
-
-    training_machine = api.GradientMachine.createFromConfigProto(
-        trainer_conf.model_config)
-
-    generator_machine = api.GradientMachine.createFromConfigProto(
-        gener_conf.model_config)
-
-    trainer = api.Trainer.create(trainer_conf, training_machine)
-
-    trainer.startTrain()
-
-    for train_pass in xrange(100):
-        trainer.startTrainPass()
-        mnist.reset_pointer()
-        i = 0
-        it = 0
-        while mnist.pointer != 0 or i == 0:
-            X = mnist.next_batch().astype('float32')
-
-            inputs = api.Arguments.createArguments(1)
-            inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
-
-            trainer.trainOneDataBatch(batch_size, inputs)
-
-            if it % 1000 == 0:
-
-                outputs = api.Arguments.createArguments(0)
-                training_machine.forward(inputs, outputs, api.PASS_TEST)
-                loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
-                print "\niter: {}".format(str(it).zfill(3))
-                print "VAE loss: {}".format(str(loss).zfill(3))
-
-                #Sync parameters between networks (GradientMachine) at the beginning
-                copy_shared_parameters(training_machine, generator_machine)
-
-                z_samples = np.random.randn(batch_size,
-                                            noise_dim).astype('float32')
-                samples = get_fake_samples(generator_machine, batch_size,
-                                           z_samples)
-
-                #Generating the first 16 images for a picture. 
-                figure = plot_samples(samples[:16])
-                plt.savefig(
-                    "./samples/{}_{}.png".format(
-                        str(train_pass).zfill(3), str(i).zfill(3)),
-                    bbox_inches='tight')
-                plt.close(figure)
-                i += 1
-            it += 1
-
-        trainer.finishTrainPass()
-    trainer.finishTrain()
-
-
-if __name__ == '__main__':
-    main()