From b6a0f9a32a4a3e35f9d8ffa4728c69fada5fe5ed Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Tue, 28 Feb 2017 18:00:34 +0800
Subject: [PATCH 01/11] Add vgg training via api v2

---
 demo/image_classification/train_v2_vgg.py | 85 +++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 demo/image_classification/train_v2_vgg.py

diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/train_v2_vgg.py
new file mode 100644
index 0000000000..33b53b27da
--- /dev/null
+++ b/demo/image_classification/train_v2_vgg.py
@@ -0,0 +1,85 @@
+import paddle.v2 as paddle
+
+
+def event_handler(event):
+    if isinstance(event, paddle.event.EndIteration):
+        if event.batch_id % 100 == 0:
+            print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
+                                                  event.cost)
+    else:
+        pass
+
+
+def vgg_bn_drop(input):
+    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
+        return paddle.layer.img_conv_group(
+            input=ipt,
+            num_channels=num_channels,
+            pool_size=2,
+            pool_stride=2,
+            conv_num_filter=[num_filter] * groups,
+            conv_filter_size=3,
+            conv_act=paddle.activation.Relu(),
+            conv_with_batchnorm=True,
+            conv_batchnorm_drop_rate=dropouts,
+            pool_type=pooling.Max())
+
+    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
+    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
+    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
+    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
+    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
+
+    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
+    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
+    bn = paddle.layer.batch_norm(
+        input=fc1,
+        act=paddle.activation.Relu(),
+        layer_attr=ExtraAttr(drop_rate=0.5))
+    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
+    return fc2
+
+
+def main():
+    datadim = 3 * 32 * 32
+    classdim = 10
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    image = paddle.layer.data(
+        name="image", type=paddle.data_type.dense_vector(datadim))
+    # net = vgg_bn_drop(image)
+    out = paddle.layer.fc(input=image,
+                          size=classdim,
+                          act=paddle.activation.Softmax())
+
+    lbl = paddle.layer.data(
+        name="label", type=paddle.data_type.integer_value(classdim))
+    cost = paddle.layer.classification_cost(input=out, label=lbl)
+
+    parameters = paddle.parameters.create(cost)
+    momentum_optimizer = paddle.optimizer.Momentum(
+        momentum=0.9,
+        regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128),
+        learning_rate=0.1 / 128.0,
+        learning_rate_decay_a=0.1,
+        learning_rate_decay_b=50000 * 100,
+        learning_rate_schedule='discexp',
+        batch_size=128)
+
+    trainer = paddle.trainer.SGD(update_equation=momentum_optimizer)
+    trainer.train(
+        reader=paddle.reader.batched(
+            paddle.reader.shuffle(
+                paddle.dataset.cifar.train10(), buf_size=3072),
+            batch_size=128),
+        cost=cost,
+        num_passes=1,
+        parameters=parameters,
+        event_handler=event_handler,
+        reader_dict={'image': 0,
+                     'label': 1}, )
+
+
+if __name__ == '__main__':
+    main()

From f7ecd312c5a56c48eeafd63fb168f106ad973e66 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 1 Mar 2017 12:49:35 +0800
Subject: [PATCH 02/11] update event handler

---
 demo/image_classification/train_v2_vgg.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/train_v2_vgg.py
index 33b53b27da..25bfd798eb 100644
--- a/demo/image_classification/train_v2_vgg.py
+++ b/demo/image_classification/train_v2_vgg.py
@@ -6,8 +6,6 @@ def event_handler(event):
         if event.batch_id % 100 == 0:
             print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
                                                   event.cost)
-    else:
-        pass
 
 
 def vgg_bn_drop(input):

From d227f4479e5d9b58c45059871c5cd4e221b1a05f Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 1 Mar 2017 15:06:24 +0800
Subject: [PATCH 03/11] Add resnet

---
 demo/image_classification/train_v2_resnet.py | 158 +++++++++++++++++++
 demo/image_classification/train_v2_vgg.py    |  16 +-
 2 files changed, 173 insertions(+), 1 deletion(-)
 create mode 100644 demo/image_classification/train_v2_resnet.py

diff --git a/demo/image_classification/train_v2_resnet.py b/demo/image_classification/train_v2_resnet.py
new file mode 100644
index 0000000000..fdfa87cd87
--- /dev/null
+++ b/demo/image_classification/train_v2_resnet.py
@@ -0,0 +1,158 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2 as paddle
+
+
+def event_handler(event):
+    if isinstance(event, paddle.event.EndIteration):
+        if event.batch_id % 100 == 0:
+            print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
+                                                  event.cost)
+
+
+def conv_bn_layer(input,
+                  ch_out,
+                  filter_size,
+                  stride,
+                  padding,
+                  active_type=paddle.activation.Relu(),
+                  ch_in=None):
+    tmp = paddle.layer.img_conv(
+        input=input,
+        filter_size=filter_size,
+        num_channels=ch_in,
+        num_filters=ch_out,
+        stride=stride,
+        padding=padding,
+        act=paddle.activation.Linear(),
+        bias_attr=False)
+    return paddle.layer.batch_norm(input=tmp, act=active_type)
+
+
+def shortcut(ipt, n_in, n_out, stride):
+    if n_in != n_out:
+        print("n_in != n_out")
+        return conv_bn_layer(ipt, n_out, 1, stride, 0,
+                             paddle.activation.Linear())
+    else:
+        return ipt
+
+
+def basicblock(ipt, ch_out, stride):
+    ch_in = ipt.num_filters
+    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
+    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
+    short = shortcut(ipt, ch_in, ch_out, stride)
+    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
+
+
+def bottleneck(ipt, ch_out, stride):
+    ch_in = ipt.num_filter
+    tmp = conv_bn_layer(ipt, ch_out, 1, stride, 0)
+    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1)
+    tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, paddle.activation.Linear())
+    short = shortcut(ipt, ch_in, ch_out * 4, stride)
+    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
+
+
+def layer_warp(block_func, ipt, features, count, stride):
+    tmp = block_func(ipt, features, stride)
+    for i in range(1, count):
+        tmp = block_func(tmp, features, 1)
+    return tmp
+
+
+def resnet_imagenet(ipt, depth=50):
+    cfg = {
+        18: ([2, 2, 2, 1], basicblock),
+        34: ([3, 4, 6, 3], basicblock),
+        50: ([3, 4, 6, 3], bottleneck),
+        101: ([3, 4, 23, 3], bottleneck),
+        152: ([3, 8, 36, 3], bottleneck)
+    }
+    stages, block_func = cfg[depth]
+    tmp = conv_bn_layer(
+        ipt, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3)
+    tmp = paddle.layer.img_pool(input=tmp, pool_size=3, stride=2)
+    tmp = layer_warp(block_func, tmp, 64, stages[0], 1)
+    tmp = layer_warp(block_func, tmp, 128, stages[1], 2)
+    tmp = layer_warp(block_func, tmp, 256, stages[2], 2)
+    tmp = layer_warp(block_func, tmp, 512, stages[3], 2)
+    tmp = paddle.layer.img_pool(
+        input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
+
+    tmp = paddle.layer.fc(input=tmp, size=1000, act=paddle.activation.Softmax())
+    return tmp
+
+
+def resnet_cifar10(ipt, depth=32):
+    # depth should be one of 20, 32, 44, 56, 110, 1202
+    assert (depth - 2) % 6 == 0
+    n = (depth - 2) / 6
+    nStages = {16, 64, 128}
+    conv1 = conv_bn_layer(
+        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
+    res1 = layer_warp(basicblock, conv1, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 64, n, 2)
+    pool = paddle.layer.img_pool(
+        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
+    return pool
+
+
+def main():
+    datadim = 3 * 32 * 32
+    classdim = 10
+
+    paddle.init(use_gpu=False, trainer_count=1)
+
+    image = paddle.layer.data(
+        name="image", type=paddle.data_type.dense_vector(datadim))
+    net = resnet_cifar10(image, depth=32)
+    out = paddle.layer.fc(input=net,
+                          size=classdim,
+                          act=paddle.activation.Softmax())
+
+    lbl = paddle.layer.data(
+        name="label", type=paddle.data_type.integer_value(classdim))
+    cost = paddle.layer.classification_cost(input=out, label=lbl)
+
+    parameters = paddle.parameters.create(cost)
+
+    momentum_optimizer = paddle.optimizer.Momentum(
+        momentum=0.9,
+        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
+        learning_rate=0.1 / 128.0,
+        learning_rate_decay_a=0.1,
+        learning_rate_decay_b=50000 * 100,
+        learning_rate_schedule='discexp',
+        batch_size=128)
+
+    trainer = paddle.trainer.SGD(update_equation=momentum_optimizer)
+    trainer.train(
+        reader=paddle.reader.batched(
+            paddle.reader.shuffle(
+                paddle.dataset.cifar.train10(), buf_size=3072),
+            batch_size=128),
+        cost=cost,
+        num_passes=1,
+        parameters=parameters,
+        event_handler=event_handler,
+        reader_dict={'image': 0,
+                     'label': 1}, )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/train_v2_vgg.py
index 25bfd798eb..5656ac85c6 100644
--- a/demo/image_classification/train_v2_vgg.py
+++ b/demo/image_classification/train_v2_vgg.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import paddle.v2 as paddle
 
 
@@ -20,7 +34,7 @@ def vgg_bn_drop(input):
             conv_act=paddle.activation.Relu(),
             conv_with_batchnorm=True,
             conv_batchnorm_drop_rate=dropouts,
-            pool_type=pooling.Max())
+            pool_type=paddle.pooling.Max())
 
     conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
     conv2 = conv_block(conv1, 128, 2, [0.4, 0])

From ad44a3ebcaa062342ec799f020bd3975e6b5f899 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 1 Mar 2017 20:14:43 +0800
Subject: [PATCH 04/11] Update vgg and resnet via api v2

---
 demo/image_classification/api_v2_resnet.py    |  74 ++++++++
 .../{train_v2_vgg.py => api_v2_train.py}      |  59 ++-----
 demo/image_classification/api_v2_vgg.py       |  47 ++++++
 demo/image_classification/train_v2_resnet.py  | 158 ------------------
 4 files changed, 139 insertions(+), 199 deletions(-)
 create mode 100644 demo/image_classification/api_v2_resnet.py
 rename demo/image_classification/{train_v2_vgg.py => api_v2_train.py} (55%)
 create mode 100644 demo/image_classification/api_v2_vgg.py
 delete mode 100644 demo/image_classification/train_v2_resnet.py

diff --git a/demo/image_classification/api_v2_resnet.py b/demo/image_classification/api_v2_resnet.py
new file mode 100644
index 0000000000..19d2054078
--- /dev/null
+++ b/demo/image_classification/api_v2_resnet.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2 as paddle
+
+__all__ = ['resnet_cifar10']
+
+
+def conv_bn_layer(input,
+                  ch_out,
+                  filter_size,
+                  stride,
+                  padding,
+                  active_type=paddle.activation.Relu(),
+                  ch_in=None):
+    tmp = paddle.layer.img_conv(
+        input=input,
+        filter_size=filter_size,
+        num_channels=ch_in,
+        num_filters=ch_out,
+        stride=stride,
+        padding=padding,
+        act=paddle.activation.Linear(),
+        bias_attr=False)
+    return paddle.layer.batch_norm(input=tmp, act=active_type)
+
+
+def shortcut(ipt, n_in, n_out, stride):
+    if n_in != n_out:
+        return conv_bn_layer(ipt, n_out, 1, stride, 0,
+                             paddle.activation.Linear())
+    else:
+        return ipt
+
+
+def basicblock(ipt, ch_out, stride):
+    ch_in = ch_out * 2
+    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
+    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
+    short = shortcut(ipt, ch_in, ch_out, stride)
+    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
+
+
+def layer_warp(block_func, ipt, features, count, stride):
+    tmp = block_func(ipt, features, stride)
+    for i in range(1, count):
+        tmp = block_func(tmp, features, 1)
+    return tmp
+
+
+def resnet_cifar10(ipt, depth=32):
+    # depth should be one of 20, 32, 44, 56, 110, 1202
+    assert (depth - 2) % 6 == 0
+    n = (depth - 2) / 6
+    nStages = {16, 64, 128}
+    conv1 = conv_bn_layer(
+        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
+    res1 = layer_warp(basicblock, conv1, 16, n, 1)
+    res2 = layer_warp(basicblock, res1, 32, n, 2)
+    res3 = layer_warp(basicblock, res2, 64, n, 2)
+    pool = paddle.layer.img_pool(
+        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
+    return pool
diff --git a/demo/image_classification/train_v2_vgg.py b/demo/image_classification/api_v2_train.py
similarity index 55%
rename from demo/image_classification/train_v2_vgg.py
rename to demo/image_classification/api_v2_train.py
index 5656ac85c6..44a8db3941 100644
--- a/demo/image_classification/train_v2_vgg.py
+++ b/demo/image_classification/api_v2_train.py
@@ -10,9 +10,10 @@
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
-# limitations under the License.
+# limitations under the License
 
-import paddle.v2 as paddle
+from api_v2_vgg import resnet_cifar10
+from api_v2_resnet import vgg_bn_drop
 
 
 def event_handler(event):
@@ -22,46 +23,21 @@ def event_handler(event):
                                                   event.cost)
 
 
-def vgg_bn_drop(input):
-    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
-        return paddle.layer.img_conv_group(
-            input=ipt,
-            num_channels=num_channels,
-            pool_size=2,
-            pool_stride=2,
-            conv_num_filter=[num_filter] * groups,
-            conv_filter_size=3,
-            conv_act=paddle.activation.Relu(),
-            conv_with_batchnorm=True,
-            conv_batchnorm_drop_rate=dropouts,
-            pool_type=paddle.pooling.Max())
-
-    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
-    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
-    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
-    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
-    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-
-    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
-    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
-    bn = paddle.layer.batch_norm(
-        input=fc1,
-        act=paddle.activation.Relu(),
-        layer_attr=ExtraAttr(drop_rate=0.5))
-    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
-    return fc2
-
-
 def main():
     datadim = 3 * 32 * 32
     classdim = 10
 
-    paddle.init(use_gpu=False, trainer_count=1)
+    paddle.init(use_gpu=True, trainer_count=1)
 
     image = paddle.layer.data(
         name="image", type=paddle.data_type.dense_vector(datadim))
+
+    # option 1. resnet
+    net = resnet_cifar10(image, depth=32)
+    # option 2. vgg
     # net = vgg_bn_drop(image)
-    out = paddle.layer.fc(input=image,
+
+    out = paddle.layer.fc(input=net,
                           size=classdim,
                           act=paddle.activation.Softmax())
 
@@ -70,27 +46,28 @@ def main():
     cost = paddle.layer.classification_cost(input=out, label=lbl)
 
     parameters = paddle.parameters.create(cost)
+
     momentum_optimizer = paddle.optimizer.Momentum(
         momentum=0.9,
-        regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128),
+        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
         learning_rate=0.1 / 128.0,
         learning_rate_decay_a=0.1,
         learning_rate_decay_b=50000 * 100,
         learning_rate_schedule='discexp',
         batch_size=128)
 
-    trainer = paddle.trainer.SGD(update_equation=momentum_optimizer)
+    trainer = paddle.trainer.SGD(cost=cost,
+                                 parameters=parameters,
+                                 update_equation=momentum_optimizer)
     trainer.train(
         reader=paddle.reader.batched(
             paddle.reader.shuffle(
-                paddle.dataset.cifar.train10(), buf_size=3072),
+                paddle.dataset.cifar.train10(), buf_size=50000),
             batch_size=128),
-        cost=cost,
-        num_passes=1,
-        parameters=parameters,
+        num_passes=5,
         event_handler=event_handler,
         reader_dict={'image': 0,
-                     'label': 1}, )
+                     'label': 1})
 
 
 if __name__ == '__main__':
diff --git a/demo/image_classification/api_v2_vgg.py b/demo/image_classification/api_v2_vgg.py
new file mode 100644
index 0000000000..1e0e6b93ad
--- /dev/null
+++ b/demo/image_classification/api_v2_vgg.py
@@ -0,0 +1,47 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2 as paddle
+
+__all__ = ['vgg_bn_drop']
+
+
+def vgg_bn_drop(input):
+    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
+        return paddle.networks.img_conv_group(
+            input=ipt,
+            num_channels=num_channels,
+            pool_size=2,
+            pool_stride=2,
+            conv_num_filter=[num_filter] * groups,
+            conv_filter_size=3,
+            conv_act=paddle.activation.Relu(),
+            conv_with_batchnorm=True,
+            conv_batchnorm_drop_rate=dropouts,
+            pool_type=paddle.pooling.Max())
+
+    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
+    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
+    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
+    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
+    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
+
+    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
+    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
+    bn = paddle.layer.batch_norm(
+        input=fc1,
+        act=paddle.activation.Relu(),
+        layer_attr=paddle.attr.Extra(drop_rate=0.5))
+    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
+    return fc2
diff --git a/demo/image_classification/train_v2_resnet.py b/demo/image_classification/train_v2_resnet.py
deleted file mode 100644
index fdfa87cd87..0000000000
--- a/demo/image_classification/train_v2_resnet.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddle.v2 as paddle
-
-
-def event_handler(event):
-    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
-                                                  event.cost)
-
-
-def conv_bn_layer(input,
-                  ch_out,
-                  filter_size,
-                  stride,
-                  padding,
-                  active_type=paddle.activation.Relu(),
-                  ch_in=None):
-    tmp = paddle.layer.img_conv(
-        input=input,
-        filter_size=filter_size,
-        num_channels=ch_in,
-        num_filters=ch_out,
-        stride=stride,
-        padding=padding,
-        act=paddle.activation.Linear(),
-        bias_attr=False)
-    return paddle.layer.batch_norm(input=tmp, act=active_type)
-
-
-def shortcut(ipt, n_in, n_out, stride):
-    if n_in != n_out:
-        print("n_in != n_out")
-        return conv_bn_layer(ipt, n_out, 1, stride, 0,
-                             paddle.activation.Linear())
-    else:
-        return ipt
-
-
-def basicblock(ipt, ch_out, stride):
-    ch_in = ipt.num_filters
-    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
-    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
-    short = shortcut(ipt, ch_in, ch_out, stride)
-    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
-
-
-def bottleneck(ipt, ch_out, stride):
-    ch_in = ipt.num_filter
-    tmp = conv_bn_layer(ipt, ch_out, 1, stride, 0)
-    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1)
-    tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, paddle.activation.Linear())
-    short = shortcut(ipt, ch_in, ch_out * 4, stride)
-    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
-
-
-def layer_warp(block_func, ipt, features, count, stride):
-    tmp = block_func(ipt, features, stride)
-    for i in range(1, count):
-        tmp = block_func(tmp, features, 1)
-    return tmp
-
-
-def resnet_imagenet(ipt, depth=50):
-    cfg = {
-        18: ([2, 2, 2, 1], basicblock),
-        34: ([3, 4, 6, 3], basicblock),
-        50: ([3, 4, 6, 3], bottleneck),
-        101: ([3, 4, 23, 3], bottleneck),
-        152: ([3, 8, 36, 3], bottleneck)
-    }
-    stages, block_func = cfg[depth]
-    tmp = conv_bn_layer(
-        ipt, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3)
-    tmp = paddle.layer.img_pool(input=tmp, pool_size=3, stride=2)
-    tmp = layer_warp(block_func, tmp, 64, stages[0], 1)
-    tmp = layer_warp(block_func, tmp, 128, stages[1], 2)
-    tmp = layer_warp(block_func, tmp, 256, stages[2], 2)
-    tmp = layer_warp(block_func, tmp, 512, stages[3], 2)
-    tmp = paddle.layer.img_pool(
-        input=tmp, pool_size=7, stride=1, pool_type=paddle.pooling.Avg())
-
-    tmp = paddle.layer.fc(input=tmp, size=1000, act=paddle.activation.Softmax())
-    return tmp
-
-
-def resnet_cifar10(ipt, depth=32):
-    # depth should be one of 20, 32, 44, 56, 110, 1202
-    assert (depth - 2) % 6 == 0
-    n = (depth - 2) / 6
-    nStages = {16, 64, 128}
-    conv1 = conv_bn_layer(
-        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
-    res1 = layer_warp(basicblock, conv1, 16, n, 1)
-    res2 = layer_warp(basicblock, res1, 32, n, 2)
-    res3 = layer_warp(basicblock, res2, 64, n, 2)
-    pool = paddle.layer.img_pool(
-        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
-    return pool
-
-
-def main():
-    datadim = 3 * 32 * 32
-    classdim = 10
-
-    paddle.init(use_gpu=False, trainer_count=1)
-
-    image = paddle.layer.data(
-        name="image", type=paddle.data_type.dense_vector(datadim))
-    net = resnet_cifar10(image, depth=32)
-    out = paddle.layer.fc(input=net,
-                          size=classdim,
-                          act=paddle.activation.Softmax())
-
-    lbl = paddle.layer.data(
-        name="label", type=paddle.data_type.integer_value(classdim))
-    cost = paddle.layer.classification_cost(input=out, label=lbl)
-
-    parameters = paddle.parameters.create(cost)
-
-    momentum_optimizer = paddle.optimizer.Momentum(
-        momentum=0.9,
-        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
-        learning_rate=0.1 / 128.0,
-        learning_rate_decay_a=0.1,
-        learning_rate_decay_b=50000 * 100,
-        learning_rate_schedule='discexp',
-        batch_size=128)
-
-    trainer = paddle.trainer.SGD(update_equation=momentum_optimizer)
-    trainer.train(
-        reader=paddle.reader.batched(
-            paddle.reader.shuffle(
-                paddle.dataset.cifar.train10(), buf_size=3072),
-            batch_size=128),
-        cost=cost,
-        num_passes=1,
-        parameters=parameters,
-        event_handler=event_handler,
-        reader_dict={'image': 0,
-                     'label': 1}, )
-
-
-if __name__ == '__main__':
-    main()

From 49020f0be80428ba22913062ae877605114134eb Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 1 Mar 2017 20:26:42 +0800
Subject: [PATCH 05/11] import paddle.v2

---
 demo/image_classification/api_v2_train.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
index 44a8db3941..e6e4307242 100644
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@@ -14,6 +14,7 @@
 
 from api_v2_vgg import resnet_cifar10
 from api_v2_resnet import vgg_bn_drop
+import paddle.v2 as paddle
 
 
 def event_handler(event):

From 0dc68a2c90e2432a3b5678881268fa22e1f0d990 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 2 Mar 2017 14:48:51 +0800
Subject: [PATCH 06/11] add getNonStaticParameters

---
 demo/image_classification/api_v2_train.py |  4 ++--
 paddle/api/GradientMachine.cpp            | 14 ++++++++++++++
 paddle/api/PaddleAPI.h                    |  3 +++
 paddle/py_paddle/util.py                  |  6 ++++++
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
index e6e4307242..0b4dc4d929 100644
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-from api_v2_vgg import resnet_cifar10
-from api_v2_resnet import vgg_bn_drop
+from api_v2_vgg import vgg_bn_drop
+from api_v2_resnet import resnet_cifar10
 import paddle.v2 as paddle
 
 
diff --git a/paddle/api/GradientMachine.cpp b/paddle/api/GradientMachine.cpp
index 538ca2999f..dcb5fe086f 100644
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
@@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
   }
 }
 
+size_t GradientMachine::getNonStaticParameterSize() const {
+  return m->machine->getNonStaticParameters().size();
+}
+
+Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) {
+  auto params = m->machine->getNonStaticParameters();
+  if (i < params.size()) {
+    return Parameter::createFromSharedPtr(
+        &m->machine->getNonStaticParameters()[i]);
+  } else {
+    throw RangeError();
+  }
+}
+
 void GradientMachine::randParameters() { m->machine->randParameters(); }
 
 Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
diff --git a/paddle/api/PaddleAPI.h b/paddle/api/PaddleAPI.h
index 1831b8e170..764946cf53 100644
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@@ -768,6 +768,9 @@ public:
   size_t getParameterSize() const;
   Parameter* getParameter(size_t i) throw(RangeError);
 
+  size_t getNonStaticParameterSize() const;
+  Parameter* getNonStaticParameter(size_t i) throw(RangeError);
+
   void randParameters();
 
   Arguments* getLayerOutput(const std::string& layerName) const
diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py
index a708def1d2..fb337b8af3 100644
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
@@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__():
 
     swig_paddle.GradientMachine.getParameters = getParameters
 
+    def getNonStaticParameters(self):
+        return (self.getNonStaticParameter(i)
+                for i in xrange(self.getNonStaticParameterSize()))
+
+    swig_paddle.GradientMachine.getParameters = getParameters
+
     def getLayerOutputs(self, layerNames):
         """
         getLayerOutputs. get outputs of layers and return a numpy matrix dict.

From ce3a399d3d5654804c8f258cb4b2d0455e013606 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 2 Mar 2017 14:54:56 +0800
Subject: [PATCH 07/11] update util.py

---
 paddle/py_paddle/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/py_paddle/util.py b/paddle/py_paddle/util.py
index fb337b8af3..1c9455fab5 100644
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
@@ -199,7 +199,7 @@ def __monkeypatch_gradient_machine__():
         return (self.getNonStaticParameter(i)
                 for i in xrange(self.getNonStaticParameterSize()))
 
-    swig_paddle.GradientMachine.getParameters = getParameters
+    swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
 
     def getLayerOutputs(self, layerNames):
         """

From 69bf77fd1e71fc57bf0f15820a9dd34bd98c79b6 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 2 Mar 2017 15:09:49 +0800
Subject: [PATCH 08/11] fix trainer v2 getNonStaticParameters

---
 python/paddle/v2/trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index bf8b181e42..44ba9d7ae1 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -113,7 +113,7 @@ class SGD(ITrainer):
                 gm.forwardBackward(feeder(data_batch), out_args, pass_type)
                 gm.eval(pass_evaluator)
                 gm.eval(batch_evaluator)
-                for each_param in gm.getParameters():
+                for each_param in gm.getNonStaticParameters():
                     updater.update(each_param)
                 # Get cost. We use numpy to calculate total cost for this batch.
                 cost_vec = out_args.getSlotValue(0)

From 1164c287b9db46abd9e591ddebe720bc3e08e22d Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 2 Mar 2017 15:14:34 +0800
Subject: [PATCH 09/11] add datasets import

---
 python/paddle/v2/dataset/__init__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/paddle/v2/dataset/__init__.py b/python/paddle/v2/dataset/__init__.py
index 9647e98503..d222739ba2 100644
--- a/python/paddle/v2/dataset/__init__.py
+++ b/python/paddle/v2/dataset/__init__.py
@@ -1,3 +1,7 @@
 import mnist
+import imikolov
+import imdb
+import cifar
+import movielens
 
-__all__ = ['mnist']
+__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens']

From 6d09f70a860f253e00f91685eb73693e3eef5a76 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 2 Mar 2017 15:43:13 +0800
Subject: [PATCH 10/11] Add event_handler test and comment

---
 demo/image_classification/api_v2_train.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
index 0b4dc4d929..94bf0b5db4 100644
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@@ -12,27 +12,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
+import sys
+import paddle.v2 as paddle
 from api_v2_vgg import vgg_bn_drop
 from api_v2_resnet import resnet_cifar10
-import paddle.v2 as paddle
 
 
+# End batch and end pass event handler
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
-            print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
-                                                  event.cost)
+            print "\nPass %d, Batch %d, Cost %f, %s" % (
+                event.pass_id, event.batch_id, event.cost, event.metrics)
+        else:
+            sys.stdout.write('.')
+            sys.stdout.flush()
+    if isinstance(event, paddle.event.EndPass):
+        result = trainer.test(
+            reader=paddle.reader.batched(
+                paddle.dataset.cifar.test10(), batch_size=128),
+            reader_dict={'image': 0,
+                         'label': 1})
+        print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
 
 
 def main():
     datadim = 3 * 32 * 32
     classdim = 10
 
+    # PaddlePaddle init
     paddle.init(use_gpu=True, trainer_count=1)
 
     image = paddle.layer.data(
         name="image", type=paddle.data_type.dense_vector(datadim))
 
+    # Add neural network config
     # option 1. resnet
     net = resnet_cifar10(image, depth=32)
     # option 2. vgg
@@ -46,8 +60,10 @@ def main():
         name="label", type=paddle.data_type.integer_value(classdim))
     cost = paddle.layer.classification_cost(input=out, label=lbl)
 
+    # Create parameters
     parameters = paddle.parameters.create(cost)
 
+    # Create optimizer
     momentum_optimizer = paddle.optimizer.Momentum(
         momentum=0.9,
         regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
@@ -57,6 +73,7 @@ def main():
         learning_rate_schedule='discexp',
         batch_size=128)
 
+    # Create trainer
     trainer = paddle.trainer.SGD(cost=cost,
                                  parameters=parameters,
                                  update_equation=momentum_optimizer)

From 69ac20c2845fa0bb988407a4cd3af7af1aaa7d0a Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 2 Mar 2017 16:53:31 +0800
Subject: [PATCH 11/11] Fix event_handler trainer

---
 demo/image_classification/api_v2_train.py | 35 +++++++++++------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/demo/image_classification/api_v2_train.py b/demo/image_classification/api_v2_train.py
index 94bf0b5db4..585f61c6fa 100644
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@@ -18,24 +18,6 @@ from api_v2_vgg import vgg_bn_drop
 from api_v2_resnet import resnet_cifar10
 
 
-# End batch and end pass event handler
-def event_handler(event):
-    if isinstance(event, paddle.event.EndIteration):
-        if event.batch_id % 100 == 0:
-            print "\nPass %d, Batch %d, Cost %f, %s" % (
-                event.pass_id, event.batch_id, event.cost, event.metrics)
-        else:
-            sys.stdout.write('.')
-            sys.stdout.flush()
-    if isinstance(event, paddle.event.EndPass):
-        result = trainer.test(
-            reader=paddle.reader.batched(
-                paddle.dataset.cifar.test10(), batch_size=128),
-            reader_dict={'image': 0,
-                         'label': 1})
-        print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
-
-
 def main():
     datadim = 3 * 32 * 32
     classdim = 10
@@ -73,6 +55,23 @@ def main():
         learning_rate_schedule='discexp',
         batch_size=128)
 
+    # End batch and end pass event handler
+    def event_handler(event):
+        if isinstance(event, paddle.event.EndIteration):
+            if event.batch_id % 100 == 0:
+                print "\nPass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+            else:
+                sys.stdout.write('.')
+                sys.stdout.flush()
+        if isinstance(event, paddle.event.EndPass):
+            result = trainer.test(
+                reader=paddle.reader.batched(
+                    paddle.dataset.cifar.test10(), batch_size=128),
+                reader_dict={'image': 0,
+                             'label': 1})
+            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
+
     # Create trainer
     trainer = paddle.trainer.SGD(cost=cost,
                                  parameters=parameters,