Merge branch 'dist_train_benchmark_vgg16' of https://github.com/typhoonzero/Paddle into dist_train_benchmark_vgg16

8 years ago · da3b14bc67
parent b315a408e9 900e911f42
commit da3b14bc67
628 changed files with 10564 additions and 12871 deletions
--- a/.copyright.hook
+++ b/.copyright.hook
@ -49,12 +49,17 @@ def generate_copyright(template, lang='C'):
        LANG_COMMENT_MARK = "//"

    lines = template.split(NEW_LINE_MARK)
-    ans = LANG_COMMENT_MARK + COPYRIGHT_HEADER + NEW_LINE_MARK
+    BLANK = " "
+    ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
    for lino, line in enumerate(lines):
        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
-        ans += LANG_COMMENT_MARK + line + NEW_LINE_MARK
+        if len(line)  == 0:
+            BLANK = ""
+        else:
+            BLANK = " "
+        ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK

-    return ans
+    return ans + "\n"


 def lang_type(filename):
@ -62,6 +67,8 @@ def lang_type(filename):
        return "Python"
    elif filename.endswith(".h"):
        return "C"
+    elif filename.endswith(".c"):
+        return "C"
    elif filename.endswith(".hpp"):
        return "C"
    elif filename.endswith(".cc"):
@ -77,10 +84,13 @@ def lang_type(filename):
    elif filename.endswith(".proto"):
        return "C"
    else:
-        print("Unsupported filetype")
+        print("Unsupported filetype %s", filename)
        exit(0)


+PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
+
+
 def main(argv=None):
    parser = argparse.ArgumentParser(
        description='Checker for copyright declaration.')
@ -89,9 +99,14 @@ def main(argv=None):

    retv = 0
    for filename in args.filenames:
-        first_line = io.open(filename).readline()
-        if "Copyright" in first_line: continue
-        original_contents = io.open(filename).read()
+        fd = io.open(filename, encoding="utf-8")
+        first_line = fd.readline()
+        second_line = fd.readline()
+        if "COPYRIGHT (C)" in first_line.upper(): continue
+        if first_line.startswith("#!") or PYTHON_ENCODE.match(
+                second_line) != None or PYTHON_ENCODE.match(first_line) != None:
+            continue
+        original_contents = io.open(filename, encoding="utf-8").read()
        new_contents = generate_copyright(
            COPYRIGHT, lang_type(filename)) + original_contents
        print('Auto Insert Copyright Header {}'.format(filename))
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@ -0,0 +1,46 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at paddle-dev@baidu.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/4/
--- a/CODE_OF_CONDUCT_cn.md
+++ b/CODE_OF_CONDUCT_cn.md
@ -0,0 +1,50 @@
+# 参与者公约
+
+## 我们的保证
+
+为了促进一个开放透明且友好的环境，我们作为贡献者和维护者保证：无论年龄、种族、民族、性别认同和表达（方式）、体型、身体健全与否、经验水平、国籍、个人表现、宗教或性别取向，参与者在我们项目和社区中都免于骚扰。
+
+## 我们的标准
+
+有助于创造正面环境的行为包括但不限于：
+* 使用友好和包容性语言
+* 尊重不同的观点和经历
+* 耐心地接受建设性批评
+* 关注对社区最有利的事情
+* 友善对待其他社区成员
+
+身为参与者不能接受的行为包括但不限于：
+* 使用与性有关的言语或是图像，以及不受欢迎的性骚扰
+* 捣乱/煽动/造谣的行为或进行侮辱/贬损的评论，人身攻击及政治攻击
+* 公开或私下的骚扰
+* 未经许可地发布他人的个人资料，例如住址或是电子地址
+* 其他可以被合理地认定为不恰当或者违反职业操守的行为
+
+## 我们的责任
+
+项目维护者有责任为「可接受的行为」标准做出诠释，以及对已发生的不被接受的行为采取恰当且公平的纠正措施。
+
+项目维护者有权利及责任去删除、编辑、拒绝与本行为标准有所违背的评论(comments)、提交(commits)、代码、wiki 编辑、问题(issues)和其他贡献，以及项目维护者可暂时或永久性的禁止任何他们认为有不适当、威胁、冒犯、有害行为的贡献者。
+
+## 使用范围
+
+当一个人代表该项目或是其社区时，本行为标准适用于其项目平台和公共平台。
+
+代表项目或是社区的情况，举例来说包括使用官方项目的电子邮件地址、通过官方的社区媒体账号发布或线上或线下事件中担任指定代表。
+
+该项目的呈现方式可由其项目维护者进行进一步的定义及解释。
+
+## 强制执行
+
+可以通过paddle-dev@baidu.com，来联系项目团队来举报滥用、骚扰或其他不被接受的行为。
+
+任何维护团队认为有必要且适合的所有投诉都将进行审查及调查，并做出相对应的回应。项目小组有对事件回报者有保密的义务。具体执行的方针近一步细节可能会单独公布。
+
+没有切实地遵守或是执行本行为标准的项目维护人员，可能会因项目领导人或是其他成员的决定，暂时或是永久地取消其参与资格。
+
+## 来源
+
+本行为标准改编自[贡献者公约][主页]，版本 1.4
+可在此观看https://www.contributor-covenant.org/zh-cn/version/1/4/code-of-conduct.html
+
+[主页]: https://www.contributor-covenant.org
--- a/adversarial/README.md
+++ b/adversarial/README.md
@ -1,9 +0,0 @@
-# Advbox
-
-Advbox is a Python toolbox to create adversarial examples that fool neural networks. It requires Python and paddle.
-
-## How to use
-
-1. train a model and save it's parameters. (like fluid_mnist.py)
-2. load the parameters which is trained in step1, then reconstruct the model.(like mnist_tutorial_fgsm.py)
-3. use advbox to generate the adversarial sample.
--- a/adversarial/advbox/init.py
+++ b/adversarial/advbox/init.py
@ -1,16 +0,0 @@
-# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-   A set of tools for generating adversarial example on paddle platform 
-"""
--- a/adversarial/advbox/attacks/base.py
+++ b/adversarial/advbox/attacks/base.py
@ -1,52 +0,0 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""
-The base model of the model.
-"""
-from abc import ABCMeta, abstractmethod
-
-
-class Attack(object):
-    """
-    Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
-    which search an adversarial example. subclass should implement the _apply() method.
-
-    Args:
-        model(Model): an instance of the class advbox.base.Model.
-
-    """
-    __metaclass__ = ABCMeta
-
-    def __init__(self, model):
-        self.model = model
-
-    def __call__(self, image_label):
-        """
-        Generate the adversarial sample.
-
-        Args:
-        image_label(list): The image and label tuple list with one element.
-        """
-        adv_img = self._apply(image_label)
-        return adv_img
-
-    @abstractmethod
-    def _apply(self, image_label):
-        """
-        Search an adversarial example.
-
-        Args:
-        image_batch(list): The image and label tuple list with one element.
-        """
-        raise NotImplementedError
--- a/adversarial/advbox/attacks/gradientsign.py
+++ b/adversarial/advbox/attacks/gradientsign.py
@ -1,51 +0,0 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""
-This module provide the attack method for FGSM's implement.
-"""
-from __future__ import division
-import numpy as np
-from collections import Iterable
-from .base import Attack
-
-
-class GradientSignAttack(Attack):
-    """
-    This attack was originally implemented by Goodfellow et al. (2015) with the
-    infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
-    the Fast Gradient Method.
-    Paper link: https://arxiv.org/abs/1412.6572
-    """
-
-    def _apply(self, image_label, epsilons=1000):
-        assert len(image_label) == 1
-        pre_label = np.argmax(self.model.predict(image_label))
-
-        min_, max_ = self.model.bounds()
-        gradient = self.model.gradient(image_label)
-        gradient_sign = np.sign(gradient) * (max_ - min_)
-
-        if not isinstance(epsilons, Iterable):
-            epsilons = np.linspace(0, 1, num=epsilons + 1)
-
-        for epsilon in epsilons:
-            adv_img = image_label[0][0].reshape(
-                gradient_sign.shape) + epsilon * gradient_sign
-            adv_img = np.clip(adv_img, min_, max_)
-            adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
-            if pre_label != adv_label:
-                return adv_img
-
-
-FGSM = GradientSignAttack
--- a/adversarial/advbox/models/init.py
+++ b/adversarial/advbox/models/init.py
@ -1,16 +0,0 @@
-# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Paddle model for target of attack 
-"""
--- a/adversarial/advbox/models/base.py
+++ b/adversarial/advbox/models/base.py
@ -1,103 +0,0 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""
-The base model of the model.
-"""
-from abc import ABCMeta
-import abc
-
-abstractmethod = abc.abstractmethod
-
-
-class Model(object):
-    """
-    Base class of model to provide attack.
-
-
-    Args:
-        bounds(tuple): The lower and upper bound for the image pixel.
-        channel_axis(int): The index of the axis that represents the color channel.
-        preprocess(tuple): Two element tuple used to preprocess the input. First
-            substract the first element, then divide the second element.
-    """
-    __metaclass__ = ABCMeta
-
-    def __init__(self, bounds, channel_axis, preprocess=None):
-        assert len(bounds) == 2
-        assert channel_axis in [0, 1, 2, 3]
-
-        if preprocess is None:
-            preprocess = (0, 1)
-        self._bounds = bounds
-        self._channel_axis = channel_axis
-        self._preprocess = preprocess
-
-    def bounds(self):
-        """
-        Return the upper and lower bounds of the model.
-        """
-        return self._bounds
-
-    def channel_axis(self):
-        """
-        Return the channel axis of the model.
-        """
-        return self._channel_axis
-
-    def _process_input(self, input_):
-        res = input_
-        sub, div = self._preprocess
-        if sub != 0:
-            res = input_ - sub
-        assert div != 0
-        if div != 1:
-            res /= div
-        return res
-
-    @abstractmethod
-    def predict(self, image_batch):
-        """
-        Calculate the prediction of the image batch.
-
-        Args:
-            image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
-
-        Return:
-            numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def num_classes(self):
-        """
-        Determine the number of the classes
-
-        Return:
-            int: the number of the classes
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def gradient(self, image_batch):
-        """
-        Calculate the gradient of the cross-entropy loss w.r.t the image.
-
-        Args:
-            image_batch(list): The image and label tuple list.
-
-        Return:
-            numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
-                the shape (height, width, channel).
-        """
-        raise NotImplementedError
--- a/adversarial/advbox/models/paddle.py
+++ b/adversarial/advbox/models/paddle.py
@ -1,114 +0,0 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-
-import numpy as np
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
-from paddle.v2.fluid.framework import program_guard
-
-from .base import Model
-
-
-class PaddleModel(Model):
-    """
-    Create a PaddleModel instance.
-    When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
-
-    Args:
-        program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
-        input_name(string): The name of the input.
-        logits_name(string): The name of the logits.
-        predict_name(string): The name of the predict.
-        cost_name(string): The name of the loss in the program.
-    """
-
-    def __init__(self,
-                 program,
-                 input_name,
-                 logits_name,
-                 predict_name,
-                 cost_name,
-                 bounds,
-                 channel_axis=3,
-                 preprocess=None):
-        super(PaddleModel, self).__init__(
-            bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
-
-        if preprocess is None:
-            preprocess = (0, 1)
-
-        self._program = program
-        self._place = fluid.CPUPlace()
-        self._exe = fluid.Executor(self._place)
-
-        self._input_name = input_name
-        self._logits_name = logits_name
-        self._predict_name = predict_name
-        self._cost_name = cost_name
-
-        # gradient
-        loss = self._program.block(0).var(self._cost_name)
-        param_grads = fluid.backward.append_backward(
-            loss, parameter_list=[self._input_name])
-        self._gradient = dict(param_grads)[self._input_name]
-
-    def predict(self, image_batch):
-        """
-            Predict the label of the image_batch.
-
-            Args:
-                image_batch(list): The image and label tuple list.
-            Return:
-                numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
-        """
-        feeder = fluid.DataFeeder(
-            feed_list=[self._input_name, self._logits_name],
-            place=self._place,
-            program=self._program)
-        predict_var = self._program.block(0).var(self._predict_name)
-        predict = self._exe.run(self._program,
-                                feed=feeder.feed(image_batch),
-                                fetch_list=[predict_var])
-        return predict
-
-    def num_classes(self):
-        """
-            Calculate the number of classes of the output label. 
-
-        Return:
-            int: the number of classes
-        """
-        predict_var = self._program.block(0).var(self._predict_name)
-        assert len(predict_var.shape) == 2
-        return predict_var.shape[1]
-
-    def gradient(self, image_batch):
-        """
-        Calculate the gradient of the loss w.r.t the input.
-
-        Args:
-            image_batch(list): The image and label tuple list.
-        Return:
-            list: The list of the gradient of the image.
-        """
-        feeder = fluid.DataFeeder(
-            feed_list=[self._input_name, self._logits_name],
-            place=self._place,
-            program=self._program)
-
-        grad, = self._exe.run(self._program,
-                              feed=feeder.feed(image_batch),
-                              fetch_list=[self._gradient])
-        return grad
--- a/adversarial/fluid_mnist.py
+++ b/adversarial/fluid_mnist.py
@ -1,99 +0,0 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""
-CNN on mnist data using fluid api of paddlepaddle
-"""
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
-
-
-def mnist_cnn_model(img):
-    """
-    Mnist cnn model
-
-    Args:
-        img(Varaible): the input image to be recognized
-
-    Returns:
-        Variable: the label prediction
-    """
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        num_filters=20,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        num_filters=50,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return logits
-
-
-def main():
-    """
-    Train the cnn model on mnist datasets
-    """
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    logits = mnist_cnn_model(img)
-    cost = fluid.layers.cross_entropy(input=logits, label=label)
-    avg_cost = fluid.layers.mean(x=cost)
-    optimizer = fluid.optimizer.Adam(learning_rate=0.01)
-    optimizer.minimize(avg_cost)
-
-    accuracy = fluid.evaluator.Accuracy(input=logits, label=label)
-
-    BATCH_SIZE = 50
-    PASS_NUM = 3
-    ACC_THRESHOLD = 0.98
-    LOSS_THRESHOLD = 10.0
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
-    exe.run(fluid.default_startup_program())
-
-    for pass_id in range(PASS_NUM):
-        accuracy.reset(exe)
-        for data in train_reader():
-            loss, acc = exe.run(fluid.default_main_program(),
-                                feed=feeder.feed(data),
-                                fetch_list=[avg_cost] + accuracy.metrics)
-            pass_acc = accuracy.eval(exe)
-            print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc="
-                  + str(pass_acc))
-            if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD:
-                break
-
-        pass_acc = accuracy.eval(exe)
-        print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
-    fluid.io.save_params(
-        exe, dirname='./mnist', main_program=fluid.default_main_program())
-    print('train mnist done')
-
-
-if __name__ == '__main__':
-    main()
--- a/adversarial/mnist_tutorial_fgsm.py
+++ b/adversarial/mnist_tutorial_fgsm.py
@ -1,100 +0,0 @@
-#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""
-FGSM demos on mnist using advbox tool.
-"""
-import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
-import matplotlib.pyplot as plt
-import numpy as np
-
-from advbox.models.paddle import PaddleModel
-from advbox.attacks.gradientsign import GradientSignAttack
-
-
-def cnn_model(img):
-    """
-    Mnist cnn model
-    Args:
-        img(Varaible): the input image to be recognized
-    Returns:
-        Variable: the label prediction
-    """
-    #conv1 = fluid.nets.conv2d()
-    conv_pool_1 = fluid.nets.simple_img_conv_pool(
-        input=img,
-        num_filters=20,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    conv_pool_2 = fluid.nets.simple_img_conv_pool(
-        input=conv_pool_1,
-        num_filters=50,
-        filter_size=5,
-        pool_size=2,
-        pool_stride=2,
-        act='relu')
-
-    logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-    return logits
-
-
-def main():
-    """
-    Advbox demo which demonstrate how to use advbox.
-    """
-    IMG_NAME = 'img'
-    LABEL_NAME = 'label'
-
-    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
-    # gradient should flow
-    img.stop_gradient = False
-    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
-    logits = cnn_model(img)
-    cost = fluid.layers.cross_entropy(input=logits, label=label)
-    avg_cost = fluid.layers.mean(x=cost)
-
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    BATCH_SIZE = 1
-    train_reader = paddle.batch(
-        paddle.reader.shuffle(
-            paddle.dataset.mnist.train(), buf_size=500),
-        batch_size=BATCH_SIZE)
-    feeder = fluid.DataFeeder(
-        feed_list=[IMG_NAME, LABEL_NAME],
-        place=place,
-        program=fluid.default_main_program())
-
-    fluid.io.load_params(
-        exe, "./mnist/", main_program=fluid.default_main_program())
-
-    # advbox demo
-    m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
-                    logits.name, avg_cost.name, (-1, 1))
-    att = GradientSignAttack(m)
-    for data in train_reader():
-        # fgsm attack
-        adv_img = att(data)
-        plt.imshow(n[0][0], cmap='Greys_r')
-        plt.show()
-        #np.save('adv_img', adv_img)
-        break
-
-
-if __name__ == '__main__':
-    main()
--- a/benchmark/cluster/v2/Dockerfile
+++ b/benchmark/cluster/v2/Dockerfile
@ -1,5 +0,0 @@
-FROM paddlepaddle/paddlecloud-job
-RUN mkdir -p /workspace && mkdir -p /root/.cache/paddle/dataset/flowers/
-ADD vgg16.py reader.py /workspace/
-COPY 102flowers.tgz imagelabels.mat setid.mat /root/.cache/paddle/dataset/flowers/
-
--- a/benchmark/cluster/vgg16/fluid/Dockerfile
+++ b/benchmark/cluster/vgg16/fluid/Dockerfile
@ -0,0 +1,15 @@
+#FROM paddlepaddle/paddlecloud-job
+#RUN mkdir -p /workspace
+#ADD reader.py /workspace/
+#RUN python /workspace/reader.py
+FROM python:2.7.14
+ADD paddle_k8s /usr/bin
+ADD k8s_tools.py /root
+RUN pip install -U kubernetes opencv-python &&   apt-get update -y &&   apt-get install -y iputils-ping libgtk2.0-dev 
+ADD *.whl /
+RUN pip install /*.whl && rm -f /*.whl
+ENV LD_LIBRARY_PATH=/usr/local/lib
+ADD reader.py /workspace/
+RUN python /workspace/reader.py
+
+ADD vgg16.py /workspace/
--- a/benchmark/cluster/vgg16/fluid/README.md
+++ b/benchmark/cluster/vgg16/fluid/README.md
@ -0,0 +1,15 @@
+# Fluid distributed training perf test
+
+## Steps to get started
+
+1. You must re-compile PaddlePaddle and enable `-DWITH_DISTRIBUTE` to build PaddlePaddle with distributed support.
+1. When the build finishes, copy the output `whl` package located under `build/python/dist` to current directory.
+1. Run `docker build -t [image:tag] .` to build the docker image and run `docker push [image:tag]` to push the image to reponsitory so kubernetes can find it.
+1. Run `kubectl create -f pserver.yaml && kubectl create -f trainer.yaml` to start the job on your kubernetes cluster (you must configure the `kubectl` client before this step).
+1. Run `kubectl get po` to get running pods, and run `kubectl logs [podID]` to fetch the pod log of pservers and trainers.
+
+Check the logs for the distributed training progress and analyze the performance.
+
+## Enable verbos logs
+
+Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` to see what happend in detail.
--- a/benchmark/cluster/vgg16/fluid/k8s_tools.py
+++ b/benchmark/cluster/vgg16/fluid/k8s_tools.py
@ -0,0 +1,94 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/env python
+import os
+import sys
+import time
+import socket
+from kubernetes import client, config
+PADDLE_JOB_NAME = os.getenv("PADDLE_JOB_NAME")
+NAMESPACE = os.getenv("NAMESPACE")
+PORT = os.getenv("PSERVER_PORT")
+if os.getenv("KUBERNETES_SERVICE_HOST", None):
+    config.load_incluster_config()
+else:
+    config.load_kube_config()
+v1 = client.CoreV1Api()
+
+
+def fetch_pods_info(label_selector):
+    api_response = v1.list_namespaced_pod(
+        namespace=NAMESPACE, pretty=True, label_selector=label_selector)
+    pod_list = []
+    for item in api_response.items:
+        pod_list.append((item.status.phase, item.status.pod_ip))
+    return pod_list
+
+
+def wait_pods_running(label_selector, desired):
+    print "label selector: %s, desired: %s" % (label_selector, desired)
+    while True:
+        count = count_pods_by_phase(label_selector, 'Running')
+        # NOTE: pods may be scaled.
+        if count >= int(desired):
+            break
+        print 'current cnt: %d sleep for 5 seconds...' % count
+        time.sleep(5)
+
+
+def count_pods_by_phase(label_selector, phase):
+    pod_list = fetch_pods_info(label_selector)
+    filtered_pod_list = filter(lambda x: x[0] == phase, pod_list)
+    return len(filtered_pod_list)
+
+
+def fetch_pserver_ips():
+    label_selector = "paddle-job-pserver=%s" % PADDLE_JOB_NAME
+    pod_list = fetch_pods_info(label_selector)
+    pserver_ips = [item[1] for item in pod_list]
+    return ",".join(pserver_ips)
+
+
+def fetch_master_ip():
+    label_selector = "paddle-job-master=%s" % PADDLE_JOB_NAME
+    pod_list = fetch_pods_info(label_selector)
+    master_ips = [item[1] for item in pod_list]
+    return master_ips[0]
+
+
+def fetch_trainer_id():
+    label_selector = "paddle-job=%s" % PADDLE_JOB_NAME
+    pod_list = fetch_pods_info(label_selector)
+    trainer_ips = [item[1] for item in pod_list]
+    trainer_ips.sort()
+    local_ip = socket.gethostbyname(socket.gethostname())
+    for i in xrange(len(trainer_ips)):
+        if trainer_ips[i] == local_ip:
+            return i
+    return None
+
+
+if __name__ == "__main__":
+    command = sys.argv[1]
+    if command == "fetch_pserver_ips":
+        print fetch_pserver_ips()
+    elif command == "fetch_trainer_id":
+        print fetch_trainer_id()
+    elif command == "fetch_master_ip":
+        print fetch_master_ip()
+    elif command == "count_pods_by_phase":
+        print count_pods_by_phase(sys.argv[2], sys.argv[3])
+    elif command == "wait_pods_running":
+        wait_pods_running(sys.argv[2], sys.argv[3])
--- a/benchmark/cluster/vgg16/fluid/paddle_k8s
+++ b/benchmark/cluster/vgg16/fluid/paddle_k8s
@ -0,0 +1,199 @@
+#!/bin/bash
+start_pserver() {
+    stdbuf -oL paddle pserver \
+      --use_gpu=0 \
+      --port=$PADDLE_INIT_PORT \
+      --ports_num=$PADDLE_INIT_PORTS_NUM \
+      --ports_num_for_sparse=$PADDLE_INIT_PORTS_NUM_FOR_SPARSE \
+      --nics=$PADDLE_INIT_NICS \
+      --comment=paddle_process_k8s \
+      --num_gradient_servers=$PADDLE_INIT_NUM_GRADIENT_SERVERS
+}
+
+start_new_pserver() {
+  stdbuf -oL python /root/k8s_tools.py wait_pods_running  paddle-job-master=${PADDLE_JOB_NAME} 1
+  export MASTER_IP=$(python /root/k8s_tools.py fetch_master_ip)
+  stdbuf -oL /usr/bin/pserver \
+    -port=$PADDLE_INIT_PORT \
+    -num-pservers=$PSERVERS \
+    -log-level=debug \
+    -etcd-endpoint=http://$MASTER_IP:2379
+}
+
+start_master() {
+  stdbuf -oL /usr/bin/master \
+  -port=8080 \
+  -chunk-per-task=1\
+  -task-timout-dur=16s\
+  -endpoints=http://127.0.0.1:2379
+}
+
+check_failed_cnt() {
+  max_failed=$1
+  failed_count=$(python /root/k8s_tools.py count_pods_by_phase paddle-job=${PADDLE_JOB_NAME} Failed) 
+  if [ $failed_count -gt $max_failed ]; then
+    stdbuf -oL echo "Failed trainer count beyond the threadhold: "$max_failed
+    echo "Failed trainer count beyond the threshold: " $max_failed > /dev/termination-log 
+    exit 0
+  fi
+}
+
+check_trainer_ret() {
+  ret=$1
+  stdbuf -oL echo "job returned $ret...setting pod return message..."
+  stdbuf -oL echo "==============================="
+
+  if [ $ret -eq 136 ] ; then
+    echo "Error Arithmetic Operation(Floating Point Exception)" > /dev/termination-log
+  elif [ $ret -eq 139 ] ; then
+    echo "Segmentation Fault" > /dev/termination-log
+  elif [ $ret -eq 1 ] ; then
+    echo "General Error" > /dev/termination-log
+  elif [ $ret -eq 134 ] ; then
+    echo "Program Abort" > /dev/termination-log
+  fi
+  stdbuf -oL echo "termination log wroted..."
+  exit $ret
+}
+
+start_fluid_process() {
+  stdbuf -oL python /root/k8s_tools.py wait_pods_running paddle-job-pserver=${PADDLE_JOB_NAME} ${PSERVERS}
+  if [ "${TRAINING_ROLE}" == "TRAINER" ]; then
+    check_failed_cnt ${TRAINERS}
+    sleep 5
+    export PADDLE_INIT_TRAINER_ID=$(python /root/k8s_tools.py fetch_trainer_id)
+  fi
+  export PADDLE_INIT_PSERVERS=$(python /root/k8s_tools.py fetch_pserver_ips)
+  stdbuf -oL sh -c "${ENTRY}"
+  check_trainer_ret $?
+}
+
+start_new_trainer() {
+  # FIXME(Yancey1989): use command-line interface to configure the max failed count
+  check_failed_cnt ${TRAINERS}
+  stdbuf -oL python /root/k8s_tools.py wait_pods_running paddle-job-pserver=${PADDLE_JOB_NAME} ${PSERVERS}
+  sleep 5
+  stdbuf -oL python /root/k8s_tools.py wait_pods_running  paddle-job-master=${PADDLE_JOB_NAME} 1
+  export MASTER_IP=$(python /root/k8s_tools.py fetch_master_ip)
+  export ETCD_IP="$MASTER_IP"
+
+  # NOTE: $TRAINER_PACKAGE may be large, do not copy
+  export PYTHONPATH=$TRAINER_PACKAGE:$PYTHONPATH
+  cd $TRAINER_PACKAGE
+
+  stdbuf -oL echo "Starting training job: " $TRAINER_PACKAGE, "num_gradient_servers:" \
+  $PADDLE_INIT_NUM_GRADIENT_SERVERS, "version: " $1 
+
+  stdbuf -oL sh -c "${ENTRY}"
+  check_trainer_ret $?
+}
+
+start_trainer() {
+    # paddle v1 and V2 distributed training does not allow any trainer failed. 
+    check_failed_cnt 0
+    stdbuf -oL python /root/k8s_tools.py wait_pods_running paddle-job-pserver=${PADDLE_JOB_NAME} ${PSERVERS}
+    stdbuf -oL python /root/k8s_tools.py wait_pods_running paddle-job=${PADDLE_JOB_NAME} ${TRAINERS}
+
+    export PADDLE_INIT_PSERVERS=$(python /root/k8s_tools.py fetch_pserver_ips)
+    export PADDLE_INIT_TRAINER_ID=$(python /root/k8s_tools.py fetch_trainer_id)
+    stdbuf -oL echo $PADDLE_INIT_TRAINER_ID > /trainer_id
+    # FIXME: /trainer_count = PADDLE_INIT_NUM_GRADIENT_SERVERS
+    stdbuf -oL echo $PADDLE_INIT_NUM_GRADIENT_SERVERS > /trainer_count
+
+    # NOTE: $TRAINER_PACKAGE may be large, do not copy
+    export PYTHONPATH=$TRAINER_PACKAGE:$PYTHONPATH
+    cd $TRAINER_PACKAGE
+
+    stdbuf -oL echo "Starting training job: " $TRAINER_PACKAGE, "num_gradient_servers:" \
+    $PADDLE_INIT_NUM_GRADIENT_SERVERS, "trainer_id: " $PADDLE_INIT_TRAINER_ID, \
+    "version: " $1
+
+    # FIXME: If we use the new PServer by Golang, add Kubernetes healthz
+    # to wait PServer process get ready.Now only sleep 20 seconds.
+    sleep 20
+
+    case "$1" in
+      "v1")
+        FILE_COUNT=$(wc -l $TRAIN_LIST | awk '{print $1}')
+        if [ $FILE_COUNT -le $PADDLE_INIT_NUM_GRADIENT_SERVERS ]; then
+          echo "file count less than trainers"
+          check_trainer_ret 0
+        fi
+        let lines_per_node="$FILE_COUNT / ($PADDLE_INIT_NUM_GRADIENT_SERVERS + 1)"
+        echo "spliting file to" $lines_per_node
+        cp $TRAIN_LIST /
+        cd /
+        split -l $lines_per_node -d -a 3 $TRAIN_LIST train.list
+        CURRENT_LIST=$(printf "train.list%03d" $PADDLE_INIT_TRAINER_ID)
+        # always use /train.list for paddle v1 for each node.
+        echo "File for current node ${CURRENT_LIST}"
+        sleep 10
+        cp $CURRENT_LIST train.list
+
+        cd $TRAINER_PACKAGE
+
+        stdbuf -oL  paddle train \
+          --port=$PADDLE_INIT_PORT \
+          --nics=$PADDLE_INIT_NICS \
+          --ports_num=$PADDLE_INIT_PORTS_NUM \
+          --ports_num_for_sparse=$PADDLE_INIT_PORTS_NUM_FOR_SPARSE \
+          --num_passes=$PADDLE_INIT_NUM_PASSES \
+          --trainer_count=$PADDLE_INIT_TRAINER_COUNT \
+          --saving_period=1 \
+          --log_period=20 \
+          --local=0 \
+          --rdma_tcp=tcp \
+          --config=$TOPOLOGY \
+          --use_gpu=$PADDLE_INIT_USE_GPU \
+          --trainer_id=$PADDLE_INIT_TRAINER_ID \
+          --save_dir=$OUTPUT \
+          --pservers=$PADDLE_INIT_PSERVERS \
+          --num_gradient_servers=$PADDLE_INIT_NUM_GRADIENT_SERVERS
+        # paddle v1 API does not allow any trainer failed.
+        check_trainer_ret $? 
+        ;;
+      "v2")
+        stdbuf -oL sh -c "${ENTRY}"
+        # paddle v2 API does not allow any trainer failed.
+        check_trainer_ret $? 
+        ;;
+      *)
+        ;;
+    esac
+}
+
+usage() {
+    echo "usage: paddle_k8s [<args>]:"
+    echo "  start_trainer  [v1|v2]    Start a trainer process with v1 or v2 API"
+    echo "  start_pserver             Start a pserver process"
+    echo "  start_new_pserver         Start a new pserver process"
+    echo "  start_new_trainer         Start a new triner process"
+}
+
+case "$1" in
+    start_pserver)
+        start_pserver
+        ;;
+    start_trainer)
+        start_trainer $2
+        ;;
+    start_new_trainer)
+        start_new_trainer
+        ;;
+    start_new_pserver)
+        start_new_pserver
+        ;;
+    start_master)
+        start_master
+        ;;
+    start_fluid)
+        start_fluid_process
+        ;;
+    --help)
+        usage
+        ;;
+    *)
+        usage
+        ;;
+esac
+
--- a/benchmark/cluster/vgg16/fluid/pserver.yaml
+++ b/benchmark/cluster/vgg16/fluid/pserver.yaml
@ -0,0 +1,72 @@
+apiVersion: extensions/v1beta1
+kind: ReplicaSet
+metadata:
+  name: vgg16job-pserver
+spec:
+  replicas: 10
+  template:
+    metadata:
+      labels:
+        paddle-job-pserver: vgg16job
+    spec:
+      hostNetwork: true
+      imagePullSecrets:
+      - name: job-registry-secret
+      containers:
+      - name: pserver
+        image: "registry.baidu.com/paddlepaddle/rawjob:vgg16_fluid"
+        imagePullPolicy: Always
+        ports:
+        - name: jobport-30236
+          containerPort: 30236
+        env:
+        - name: PADDLE_JOB_NAME
+          value: vgg16job
+        - name: MKL_NUM_THREADS
+          value: "1"
+        - name: TRAINING_ROLE
+          value: "PSERVER"
+        - name: TRAINERS
+          value: "20"
+        - name: PSERVERS
+          value: "10"
+        - name: TOPOLOGY
+          value: ""
+        - name: ENTRY
+          value: "LD_LIBRARY_PATH=/usr/local/lib MKL_NUM_THREADS=1 python /workspace/vgg16.py --local 0"
+        - name: TRAINER_PACKAGE
+          value: "/workspace"
+        - name: PADDLE_INIT_PORT
+          value: "30236"
+        - name: PADDLE_INIT_NICS
+          value: "xgbe0"
+        - name: PADDLE_INIT_TRAINER_COUNT
+          value: "1"
+        - name: PADDLE_INIT_PORTS_NUM
+          value: "1"
+        - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
+          value: "1"
+        - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
+          value: "20"
+        - name: PADDLE_INIT_NUM_PASSES
+          value: "1"
+        - name: PADDLE_INIT_USE_GPU
+          value: "0"
+        - name: LD_LIBRARY_PATH
+          value: "/usr/local/nvidia/lib64"
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: "metadata.namespace"
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: "status.podIP"
+        command: ["paddle_k8s", "start_fluid"]
+        resources:
+          requests:
+            memory: 10Gi
+            cpu: 4
+          limits:
+            memory: 10Gi
+            cpu: 4
--- a/v1_api_demo/model_zoo/resnet/example/init.py
+++ b/v1_api_demo/model_zoo/resnet/example/init.py
@ -1,13 +1,16 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-#
+# 
 #     http://www.apache.org/licenses/LICENSE-2.0
-#
+# 
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import paddle.v2 as paddle
+paddle.dataset.cifar.train10()
--- a/benchmark/cluster/vgg16/fluid/trainer.yaml
+++ b/benchmark/cluster/vgg16/fluid/trainer.yaml
@ -0,0 +1,69 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: vgg16job-trainer
+spec:
+  parallelism: 20
+  completions: 20
+  template:
+    metadata:
+      labels:
+        paddle-job: vgg16job
+    spec:
+      imagePullSecrets:
+        - name: job-registry-secret
+      hostNetwork: true
+      containers:
+      - name: trainer
+        image: "registry.baidu.com/paddlepaddle/rawjob:vgg16_fluid"
+        imagePullPolicy: Always
+        command: ["paddle_k8s", "start_fluid"]
+        env:
+        - name: PADDLE_JOB_NAME
+          value: vgg16job
+        - name: TRAINING_ROLE
+          value: "TRAINER"
+        - name: TRAINERS
+          value: "20"
+        - name: PSERVERS
+          value: "10"
+        - name: TOPOLOGY
+          value: ""
+        - name: ENTRY
+          value: "cd /workspace && LD_LIBRARY_PATH=/usr/local/lib MKL_NUM_THREADS=1 python /workspace/vgg16.py --local 0"
+        - name: TRAINER_PACKAGE
+          value: "/workspace"
+        - name: PADDLE_INIT_PORT
+          value: "30236"
+        - name: PADDLE_INIT_NICS
+          value: "xgbe0"
+        - name: PADDLE_INIT_TRAINER_COUNT
+          value: "1"
+        - name: PADDLE_INIT_PORTS_NUM
+          value: "1"
+        - name: PADDLE_INIT_PORTS_NUM_FOR_SPARSE
+          value: "1"
+        - name: PADDLE_INIT_NUM_GRADIENT_SERVERS
+          value: "20"
+        - name: PADDLE_INIT_NUM_PASSES
+          value: "1"
+        - name: PADDLE_INIT_USE_GPU
+          value: "0"
+        - name: LD_LIBRARY_PATH
+          value: "/usr/local/nvidia/lib64"
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: "metadata.namespace"
+        - name: POD_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: "status.podIP"
+        resources:
+          requests:
+            memory: 40Gi
+            cpu: 2
+          limits:
+            memory: 40Gi
+            cpu: 2
+      restartPolicy: Never
--- a/benchmark/cluster/vgg16/fluid/vgg16.py
+++ b/benchmark/cluster/vgg16/fluid/vgg16.py
--- a/benchmark/cluster/vgg16/v2/Dockerfile
+++ b/benchmark/cluster/vgg16/v2/Dockerfile
@ -0,0 +1,7 @@
+FROM paddlepaddle/paddlecloud-job
+RUN mkdir -p /workspace
+ADD reader.py /workspace/
+RUN python /workspace/reader.py
+ADD vgg16.py /workspace/
+
+ADD vgg16_fluid.py /workspace
--- a/benchmark/cluster/vgg16/v2/pserver.yaml
+++ b/benchmark/cluster/vgg16/v2/pserver.yaml
@ -1,13 +1,13 @@
 apiVersion: extensions/v1beta1
 kind: ReplicaSet
 metadata:
-  name: vgg16job-pserver
+  name: vgg16v2job-pserver
 spec:
  replicas: 10
  template:
    metadata:
      labels:
-        paddle-job-pserver: vgg16job
+        paddle-job-pserver: vgg16v2job
    spec:
      hostNetwork: true
      imagePullSecrets:
@ -21,7 +21,7 @@ spec:
          containerPort: 30236
        env:
        - name: PADDLE_JOB_NAME
-          value: vgg16job
+          value: vgg16v2job
        - name: TRAINERS
          value: "20"
        - name: PSERVERS
--- a/benchmark/cluster/vgg16/v2/reader.py
+++ b/benchmark/cluster/vgg16/v2/reader.py
@ -67,4 +67,4 @@ if __name__ == '__main__':
    #    print len(im[0])
    #for im in train_reader('test.list'):
    #    print len(im[0])
-    paddle.dataset.flowers.train()
+    paddle.dataset.cifar.train10()
--- a/benchmark/cluster/vgg16/v2/trainer.yaml
+++ b/benchmark/cluster/vgg16/v2/trainer.yaml
@ -1,14 +1,14 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: vgg16job-trainer
+  name: vgg16v2job-trainer
 spec:
  parallelism: 20
  completions: 20
  template:
    metadata:
      labels:
-        paddle-job: vgg16job
+        paddle-job: vgg16v2job
    spec:
      imagePullSecrets:
        - name: job-registry-secret
@ -20,9 +20,7 @@ spec:
        command: ["paddle_k8s", "start_trainer", "v2"]
        env:
        - name: PADDLE_JOB_NAME
-          value: vgg16job
-        - name: OMP_NUM_THREADS
-          value: "1"
+          value: vgg16v2job
        - name: TRAINERS
          value: "20"
        - name: PSERVERS
@ -30,7 +28,7 @@ spec:
        - name: TOPOLOGY
          value: ""
        - name: ENTRY
-          value: "cd /workspace && python /workspace/vgg16.py"
+          value: "cd /workspace && MKL_NUM_THREADS=1 python /workspace/vgg16.py"
        - name: TRAINER_PACKAGE
          value: "/workspace"
        - name: PADDLE_INIT_PORT
--- a/Show More
+++ b/Show More