From d935d88d42f7fa1aed6d14969bfebe3ffd01928b Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Wed, 1 Mar 2017 20:35:51 +0800
Subject: [PATCH 1/2] mnist api v2

---
 demo/mnist/api_train_v2.py | 87 ++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 17 deletions(-)

diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py
index a59b30ccdb..495c403e40 100644
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@@ -1,48 +1,101 @@
 import paddle.v2 as paddle
 
 
+def softmax_regression(img):
+    predict = paddle.layer.fc(input=img,
+                              size=10,
+                              act=paddle.activation.Softmax())
+    return predict
+
+
+def multilayer_perceptron(img):
+    # The first fully-connected layer
+    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
+    # The second fully-connected layer and the according activation function
+    hidden2 = paddle.layer.fc(input=hidden1,
+                              size=64,
+                              act=paddle.activation.Relu())
+    # The thrid fully-connected layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = paddle.layer.fc(input=hidden2,
+                              size=10,
+                              act=paddle.activation.Softmax())
+    return predict
+
+
+def convolutional_neural_network(img):
+    # first conv layer
+    conv_pool_1 = paddle.networks.simple_img_conv_pool(
+        input=img,
+        filter_size=5,
+        num_filters=20,
+        num_channel=1,
+        pool_size=2,
+        pool_stride=2,
+        act=paddle.activation.Tanh())
+    # second conv layer
+    conv_pool_2 = paddle.networks.simple_img_conv_pool(
+        input=conv_pool_1,
+        filter_size=5,
+        num_filters=50,
+        num_channel=20,
+        pool_size=2,
+        pool_stride=2,
+        act=paddle.activation.Tanh())
+    # The first fully-connected layer
+    fc1 = paddle.layer.fc(input=conv_pool_2,
+                          size=128,
+                          act=paddle.activation.Tanh())
+    # The softmax layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = paddle.layer.fc(input=fc1,
+                              size=10,
+                              act=paddle.activation.Softmax())
+    return predict
+
+
 def main():
-    paddle.init(use_gpu=False, trainer_count=1)
+    paddle.init(use_gpu=True, trainer_count=1)
 
     # define network topology
     images = paddle.layer.data(
         name='pixel', type=paddle.data_type.dense_vector(784))
     label = paddle.layer.data(
         name='label', type=paddle.data_type.integer_value(10))
-    hidden1 = paddle.layer.fc(input=images, size=200)
-    hidden2 = paddle.layer.fc(input=hidden1, size=200)
-    inference = paddle.layer.fc(input=hidden2,
-                                size=10,
-                                act=paddle.activation.Softmax())
-    cost = paddle.layer.classification_cost(input=inference, label=label)
+
+    predict = softmax_regression(images)
+    #predict = multilayer_perceptron(images)
+    #predict = convolutional_neural_network(images)
+
+    cost = paddle.layer.classification_cost(input=predict, label=label)
 
     parameters = paddle.parameters.create(cost)
 
-    adam_optimizer = paddle.optimizer.Adam(learning_rate=0.01)
+    optimizer = paddle.optimizer.Momentum(
+        learning_rate=0.1 / 128.0,
+        momentum=0.9,
+        regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
 
     trainer = paddle.trainer.SGD(cost=cost,
                                  parameters=parameters,
-                                 update_equation=adam_optimizer)
+                                 update_equation=optimizer)
 
     def event_handler(event):
         if isinstance(event, paddle.event.EndIteration):
-            if event.batch_id % 1000 == 0:
+            if event.batch_id % 100 == 0:
                 result = trainer.test(reader=paddle.reader.batched(
-                    paddle.dataset.mnist.test(), batch_size=256))
-
+                    paddle.dataset.mnist.test(), batch_size=128))
                 print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                     event.pass_id, event.batch_id, event.cost, event.metrics,
                     result.metrics)
 
-        else:
-            pass
-
     trainer.train(
         reader=paddle.reader.batched(
             paddle.reader.shuffle(
                 paddle.dataset.mnist.train(), buf_size=8192),
-            batch_size=32),
-        event_handler=event_handler)
+            batch_size=128),
+        event_handler=event_handler,
+        num_passes=100)
 
 
 if __name__ == '__main__':

From a05707ff324e59b032f8ac0c43d62f339ef78db5 Mon Sep 17 00:00:00 2001
From: Luo Tao <luotao02@baidu.com>
Date: Thu, 2 Mar 2017 20:45:37 +0800
Subject: [PATCH 2/2] add test cost

---
 demo/mnist/api_train_v2.py  | 25 +++++++++++++++++++------
 python/paddle/v2/event.py   |  3 ++-
 python/paddle/v2/trainer.py | 10 +++++++++-
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/demo/mnist/api_train_v2.py b/demo/mnist/api_train_v2.py
index 6439d07ac6..341a7165da 100644
--- a/demo/mnist/api_train_v2.py
+++ b/demo/mnist/api_train_v2.py
@@ -63,6 +63,8 @@ def main():
     label = paddle.layer.data(
         name='label', type=paddle.data_type.integer_value(10))
 
+    # Here we can build the prediction network in different ways. Please
+    # choose one by uncomment corresponding line.
     predict = softmax_regression(images)
     #predict = multilayer_perceptron(images)
     #predict = convolutional_neural_network(images)
@@ -80,14 +82,20 @@ def main():
                                  parameters=parameters,
                                  update_equation=optimizer)
 
+    list = []
+
     def event_handler(event):
         if isinstance(event, paddle.event.EndIteration):
             if event.batch_id % 100 == 0:
-                result = trainer.test(reader=paddle.reader.batched(
-                    paddle.dataset.mnist.test(), batch_size=128))
-                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
-                    event.pass_id, event.batch_id, event.cost, event.metrics,
-                    result.metrics)
+                print "Pass %d, Batch %d, Cost %f, %s" % (
+                    event.pass_id, event.batch_id, event.cost, event.metrics)
+        if isinstance(event, paddle.event.EndPass):
+            result = trainer.test(reader=paddle.reader.batched(
+                paddle.dataset.mnist.test(), batch_size=128))
+            print "Test with Pass %d, Cost %f, %s\n" % (
+                event.pass_id, event.cost, result.metrics)
+            list.append((event.pass_id, event.cost,
+                         result.metrics['classification_error_evaluator']))
 
     trainer.train(
         reader=paddle.reader.batched(
@@ -97,10 +105,15 @@ def main():
         event_handler=event_handler,
         num_passes=100)
 
+    # find the best pass
+    best = sorted(list, key=lambda list: float(list[1]))[0]
+    print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
+    print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
+
     # output is a softmax layer. It returns probabilities.
     # Shape should be (100, 10)
     probs = paddle.infer(
-        output=inference,
+        output=predict,
         parameters=parameters,
         reader=paddle.reader.batched(
             paddle.reader.firstn(
diff --git a/python/paddle/v2/event.py b/python/paddle/v2/event.py
index a78bcf076c..6a7bcb8187 100644
--- a/python/paddle/v2/event.py
+++ b/python/paddle/v2/event.py
@@ -52,8 +52,9 @@ class EndPass(WithMetric):
     Event On One Pass Training Complete.
     """
 
-    def __init__(self, pass_id, evaluator):
+    def __init__(self, pass_id, cost, evaluator):
         self.pass_id = pass_id
+        self.cost = cost
         WithMetric.__init__(self, evaluator)
 
 
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index e743a49523..a4ef0df597 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -107,6 +107,8 @@ class SGD(ITrainer):
             event_handler(v2_event.BeginPass(pass_id))
             pass_evaluator.start()
             updater.startPass()
+            total_cost_sum = 0
+            total_batch = 0
             for batch_id, data_batch in enumerate(reader()):
                 pass_type = updater.startBatch(len(data_batch))
                 self.__gradient_machine__.forwardBackward(
@@ -127,6 +129,8 @@ class SGD(ITrainer):
                 cost_vec = out_args.getSlotValue(0)
                 cost_vec = cost_vec.copyToNumpyMat()
                 cost = cost_vec.sum() / len(data_batch)
+                total_cost_sum += cost_vec.sum()
+                total_batch += len(data_batch)
                 updater.finishBatch(cost)
                 batch_evaluator.finish()
                 event_handler(
@@ -138,7 +142,11 @@ class SGD(ITrainer):
 
             updater.finishPass()
             pass_evaluator.finish()
-            event_handler(v2_event.EndPass(pass_id, evaluator=pass_evaluator))
+            event_handler(
+                v2_event.EndPass(
+                    pass_id,
+                    cost=total_cost_sum / total_batch,
+                    evaluator=pass_evaluator))
         self.__gradient_machine__.finish()
 
     def default_reader_dict(self):