From d5b7e92cab3f08f898da5bdb3f576dc9e3a166b3 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Wed, 10 May 2017 10:53:46 +0800
Subject: [PATCH 01/56] Add dockerfile and build script for Android.

---
 Dockerfile.android                     | 43 ++++++++++++++++++++++++++
 paddle/scripts/docker/build_android.sh | 25 +++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 Dockerfile.android
 create mode 100644 paddle/scripts/docker/build_android.sh
diff --git a/Dockerfile.android b/Dockerfile.android
new file mode 100644
index 0000000000..9df291250d
--- /dev/null
+++ b/Dockerfile.android
@@ -0,0 +1,43 @@
+FROM ubuntu:16.04
+MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
+
+ARG UBUNTU_MIRROR
+RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
+
+ENV HOME=/root \
+    ANDROID_HOME=/opt/android-sdk-linux \
+    ANDROID_NDK_HOME=/opt/android-ndk-linux \
+    ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain-gcc \
+    PATH=${PATH}:${ANDROID_HOME}:${ANDROID_NDK_HOME}
+
+RUN apt-get update && \
+    apt-get install -y git python-dev python-pip python-numpy && \
+    apt-get install -y wget curl tar unzip && \
+    apt-get install -y gcc g++ locales swig && \
+    apt-get clean -y
+
+RUN pip install --upgrade pip && \
+    pip install -U 'protobuf==3.1.0' && \
+    pip install -U wheel sphinx && \
+    pip install pre-commit
+
+# git credential to skip password typing
+RUN git config --global credential.helper store
+
+# Fix locales to en_US.UTF-8
+RUN localedef -i en_US -f UTF-8 en_US.UTF-8
+
+RUN curl -sSL https://cmake.org/files/v3.2/cmake-3.2.2.tar.gz | tar -xz && \
+    cd cmake-3.2.2 && ./bootstrap && make -j `nproc` && make install && \
+    cd .. && rm -rf cmake-3.2.2
+
+# Android NDK
+RUN mkdir /opt/android-ndk-tmp && \
+    cd /opt/android-ndk-tmp && \
+    wget -q https://dl.google.com/android/repository/android-ndk-r14b-linux-x86_64.zip && \
+    unzip -q android-ndk-r14b-linux-x86_64.zip && \
+    mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
+    ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_STANDALONE_TOOLCHAIN} && \
+    rm -rf /opt/android-ndk-tmp
+
+CMD ["bash", "paddle/paddle/scripts/docker/build_android.sh"]
diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh
new file mode 100644
index 0000000000..ab432c8524
--- /dev/null
+++ b/paddle/scripts/docker/build_android.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -xe
+
+mkdir -p /paddle/build
+cd /paddle/build
+cmake -DCMAKE_SYSTEM_NAME=Android \
+      -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
+      -DANDROID_ABI=armeabi-v7a \
+      -DANDROID_ARM_NEON=ON \
+      -DANDROID_ARM_MODE=ON \
+      -DHOST_C_COMPILER=/usr/bin/gcc \
+      -DHOST_CXX_COMPILER=/usr/bin/g++ \
+      -DCMAKE_INSTALL_PREFIX=/paddle/install \
+      -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+      -DCMAKE_C_FLAGS_RELWITHDEBINFO="-O3" \
+      -DCMAKE_CXX_FLAGS_RELWITHDEBINFO="-O3" \
+      -DWITH_C_API=ON \
+      -DWITH_SWIG_PY=OFF \
+      ..
+make -j `nproc`
+make install
+
+export PATH=/paddle/install/bin:/paddle/install/opt/paddle/bin:$PATH
+paddle version

From b5dd70fd862170fdcfe0c2e276002421ced76031 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Wed, 10 May 2017 13:03:03 +0800
Subject: [PATCH 02/56] Remove the extra return statement.

---
 paddle/math/MathFunctions.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp
index 1a3bb432bf..7045562dd4 100644
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@@ -180,7 +180,6 @@ int getri<double>(const CBLAS_ORDER order,
                   const int lda,
                   const int* ipiv) {
   return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
-  return 0;
 }
 
 template <>

From 634648c2ecbc7d9332c00f7fdd8b81eae93dd78e Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Wed, 10 May 2017 13:03:03 +0800
Subject: [PATCH 03/56] Remove the extra return statement.

---
 Dockerfile.android            | 2 +-
 paddle/math/MathFunctions.cpp | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/Dockerfile.android b/Dockerfile.android
index 9df291250d..4d9ced33f4 100644
--- a/Dockerfile.android
+++ b/Dockerfile.android
@@ -40,4 +40,4 @@ RUN mkdir /opt/android-ndk-tmp && \
     ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_STANDALONE_TOOLCHAIN} && \
     rm -rf /opt/android-ndk-tmp
 
-CMD ["bash", "paddle/paddle/scripts/docker/build_android.sh"]
+CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
diff --git a/paddle/math/MathFunctions.cpp b/paddle/math/MathFunctions.cpp
index 1a3bb432bf..7045562dd4 100644
--- a/paddle/math/MathFunctions.cpp
+++ b/paddle/math/MathFunctions.cpp
@@ -180,7 +180,6 @@ int getri<double>(const CBLAS_ORDER order,
                   const int lda,
                   const int* ipiv) {
   return dynload::PADDLE_DGETRI(order, N, A, lda, ipiv);
-  return 0;
 }
 
 template <>

From 89bb7fd2bf53ec0fc731f163a70224eff3aa54a4 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Mon, 15 May 2017 17:46:05 +0800
Subject: [PATCH 04/56] Delete the ndk directory in Dockerfile.

---
 Dockerfile.android                     | 7 +++----
 paddle/scripts/docker/build_android.sh | 1 +
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Dockerfile.android b/Dockerfile.android
index 4d9ced33f4..1334799ed2 100644
--- a/Dockerfile.android
+++ b/Dockerfile.android
@@ -5,10 +5,8 @@ ARG UBUNTU_MIRROR
 RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
 
 ENV HOME=/root \
-    ANDROID_HOME=/opt/android-sdk-linux \
     ANDROID_NDK_HOME=/opt/android-ndk-linux \
-    ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain-gcc \
-    PATH=${PATH}:${ANDROID_HOME}:${ANDROID_NDK_HOME}
+    ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain-gcc
 
 RUN apt-get update && \
     apt-get install -y git python-dev python-pip python-numpy && \
@@ -38,6 +36,7 @@ RUN mkdir /opt/android-ndk-tmp && \
     unzip -q android-ndk-r14b-linux-x86_64.zip && \
     mv android-ndk-r14b ${ANDROID_NDK_HOME} && \
     ${ANDROID_NDK_HOME}/build/tools/make-standalone-toolchain.sh --arch=arm --platform=android-21 --install-dir=${ANDROID_STANDALONE_TOOLCHAIN} && \
-    rm -rf /opt/android-ndk-tmp
+    rm -rf /opt/android-ndk-tmp && \
+    rm -rf ${ANDROID_NDK_HOME}
 
 CMD ["bash", "/paddle/paddle/scripts/docker/build_android.sh"]
diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh
index ab432c8524..bfa10c9155 100644
--- a/paddle/scripts/docker/build_android.sh
+++ b/paddle/scripts/docker/build_android.sh
@@ -4,6 +4,7 @@ set -xe
 
 mkdir -p /paddle/build
 cd /paddle/build
+rm -f /paddle/install 2>/dev/null || true
 cmake -DCMAKE_SYSTEM_NAME=Android \
       -DANDROID_STANDALONE_TOOLCHAIN=$ANDROID_STANDALONE_TOOLCHAIN \
       -DANDROID_ABI=armeabi-v7a \

From 1b31a8df8b518bfa2e6f26b2d4d31d8dc3e41700 Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Mon, 15 May 2017 17:47:35 +0800
Subject: [PATCH 05/56] Install the paddle c-api libraries to subdirectory
 named by ANDROID_ABI.

---
 paddle/capi/CMakeLists.txt | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/paddle/capi/CMakeLists.txt b/paddle/capi/CMakeLists.txt
index 1b52a79ceb..206f512563 100644
--- a/paddle/capi/CMakeLists.txt
+++ b/paddle/capi/CMakeLists.txt
@@ -58,10 +58,16 @@ target_include_directories(paddle_capi_shared PUBLIC ${CMAKE_CURRENT_BINARY_DIR}
 link_paddle_exe(paddle_capi_shared)
 
 # install library & headers.
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library} DESTINATION lib)
 install(FILES ${CAPI_HEADERS} DESTINATION include/paddle)
 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h DESTINATION include/paddle)
-install(TARGETS paddle_capi_shared DESTINATION lib)
+if(ANDROID)
+  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library}
+          DESTINATION lib/${ANDROID_ABI})
+  install(TARGETS paddle_capi_shared DESTINATION lib/${ANDROID_ABI})
+else(ANDROID)
+  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${capi_whole_library} DESTINATION lib)
+  install(TARGETS paddle_capi_shared DESTINATION lib)
+endif(ANDROID)
 
 # this variable used for unittest
 set(PADDLE_CAPI_INC_PATH

From 96ca1e966aaf75ab7644e594603e77f71ed14ca7 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Mon, 8 May 2017 16:30:27 +0800
Subject: [PATCH 06/56] "add mq2007 dataset for learning to rank task"

---
 python/paddle/v2/dataset/mq2007.py            | 293 ++++++++++++++++++
 python/paddle/v2/dataset/tests/mq2007_test.py |  31 ++
 2 files changed, 324 insertions(+)
 create mode 100644 python/paddle/v2/dataset/mq2007.py
 create mode 100644 python/paddle/v2/dataset/tests/mq2007_test.py

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
new file mode 100644
index 0000000000..8884dfd5b1
--- /dev/null
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -0,0 +1,293 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+MQ2007 dataset
+
+MQ2007 is a query set from Million Query track of TREC 2007. There are about 1700 queries in it with labeled documents. In MQ2007, the 5-fold cross
+validation strategy is adopted and the 5-fold partitions are included in the package. In each fold, there are three subsets for learning: training set,
+validation set and testing set. 
+
+MQ2007 dataset from 
+http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar and parse training set and test set into paddle reader creators
+
+"""
+
+
+import os
+import random
+import functools
+import rarfile
+from common import download
+import numpy as np
+
+
+# URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar"
+URL = "http://www.bigdatalab.ac.cn/benchmark/upload/download_source/7b6dbbe2-842c-11e4-a536-bcaec51b9163_MQ2007.rar"
+MD5 = "7be1640ae95c6408dab0ae7207bdc706"
+
+
+def __initialize_meta_info__():
+  """
+  download and extract the MQ2007 dataset
+  """
+  fn = fetch()
+  rar = rarfile.RarFile(fn)
+  dirpath = os.path.dirname(fn)
+  rar.extractall(path=dirpath)
+  return dirpath
+
+
+class Query(object):
+  """
+  queries used for learning to rank algorithms. It is created from relevance scores,  query-document feature vectors
+
+  Parameters:
+  ----------
+  query_id : int
+    query_id in dataset, mapping from query to relevance documents
+  relevance_score : int 
+    relevance score of query and document pair
+  feature_vector : array, dense feature
+    feature in vector format
+  description : string
+    comment section in query doc pair data
+  """
+  def __init__(self, query_id=-1, relevance_score=-1,
+               feature_vector=None, description=""):
+    self.query_id = query_id
+    self.relevance_score = relevance_score
+    if feature_vector is None:
+      self.feature_vector = []
+    else:
+      self.feature_vector = feature_vector
+    self.description = description
+
+  def __str__(self):
+    string = "%s %s %s" %(str(self.relevance_score), str(self.query_id), " ".join(str(f) for f in self.feature_vector))
+    return string
+
+  # @classmethod
+  def _parse_(self, text):
+    """
+    parse line into Query
+    """
+    comment_position = text.find('#')
+    line = text[:comment_position].strip()
+    self.description = text[comment_position+1:].strip()
+    parts = line.split()
+    assert(len(parts) == 48), "expect 48 space split parts, get %d" %(len(parts))
+    # format : 0 qid:10 1:0.000272 2:0.000000 .... 
+    self.relevance_score = int(parts[0])
+    self.query_id = int(parts[1].split(':')[1])
+    for p in parts[2:]: 
+      pair = p.split(':')
+      self.feature_vector.append(float(pair[1]))
+    return self
+
+class QueryList(object):
+  """
+  group query into list, every item in list is a Query
+  """
+  def __init__(self, querylist=None):
+    self.query_id = -1
+    if querylist is None:
+      self.querylist = []
+    else:
+      self.querylist = querylist
+      for query in self.querylist:
+        if self.query_id == -1:
+          self.query_id = query.query_id
+        else:
+          if self.query_id != query.query_id:
+            raise ValueError("query in list must be same query_id")
+
+  def __iter__(self):
+    for query in self.querylist:
+      yield query
+
+  def _correct_ranking_(self):
+    if self.querylist is None:
+      return 
+    self.querylist.sort(key=lambda x:x.relevance_score, reverse=True)
+
+  def _add_query(self, query):
+      if self.query_id == -1:
+        self.query_id = query.query_id
+      else:
+        if self.query_id != query.query_id:
+          raise ValueError("query in list must be same query_id")
+      self.querylist.append(query)
+
+
+
+def gen_pair(querylist, partial_order="full"):
+  """
+  gen pair for pair-wise learning to rank algorithm
+  Paramters:
+  --------
+  querylist : querylist, one query match many docment pairs in list, see QueryList
+  pairtial_order : "full" or "neighbour"
+  gen pairs for neighbour items or the full partial order pairs
+
+  return :
+  ------
+  label : np.array, shape=(1)
+  query_left : np.array, shape=(1, feature_dimension)
+  query_right : same as left
+  """
+  if not isinstance(querylist, QueryList):
+    querylist = QueryList(querylist)
+  querylist._correct_ranking_()
+  # C(n,2)
+  if partial_order == "full":
+    for i, query_left in enumerate(querylist):
+      for j, query_right in enumerate(querylist):
+        if query_left.relevance_score > query_right.relevance_score:
+          yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+        else:
+          yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+
+  elif partial_order == "neighbour":
+    # C(n)
+    k = 0 
+    while k < len(querylist)-1:
+      query_left = querylist[k]
+      query_right = querylist[k+1]
+      if query_left.relevance_score > query_right.relevance_score:
+        yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+      else:
+        yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+      k += 1
+  else:
+    raise ValueError("unsupport parameter of partial_order, Only can be neighbour or full")
+
+  
+def gen_list(querylist):
+  """
+  gen pair for pair-wise learning to rank algorithm
+  Paramters:
+  --------
+  querylist : querylist, one query match many docment pairs in list, see QueryList
+
+  return :
+  ------
+  label : np.array, shape=(samples_num, )
+  querylist : np.array, shape=(samples_num, feature_dimension)
+  """
+  if not isinstance(querylist, QueryList):
+    querylist = QueryList(querylist)
+  querylist._correct_ranking_()
+  relevance_score_list = [query.relevance_score for query in querylist]
+  feature_vector_list = [query.feature_vector for query in querylist]
+  yield np.array(relevance_score_list).T, np.array(feature_vector_list)
+
+
+def load_from_text(filepath, shuffle=True, fill_missing=-1):
+  """
+  parse data file into querys
+  """
+  prev_query_id = -1;
+  querylists = []
+  querylist = None
+  fn = __initialize_meta_info__()
+  with open(os.path.join(fn, filepath)) as f:
+    for line in f:
+      query = Query()
+      query = query._parse_(line)
+      if query.query_id != prev_query_id:
+        if querylist is not None:
+          querylists.append(querylist)
+        querylist = QueryList()
+        prev_query_id = query.query_id
+      querylist._add_query(query)
+  if shuffle == True:
+    random.shuffle(querylists)
+  return querylists
+
+
+def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
+  """
+  Parameters
+  --------
+  filename : string
+  shuffle : shuffle query-doc pair under the same query
+  fill_missing : fill the missing value. default in MQ2007 is -1
+  
+  Returns
+  ------
+  yield
+    label query_left, query_right  # format = "pairwise"
+    label querylist # format = "listwise"
+  """
+  querylists = load_from_text(filepath, shuffle=shuffle, fill_missing=fill_missing)
+  for querylist in querylists:
+    if format == "pairwise":
+      for pair in gen_pair(querylist):
+        yield pair
+    elif format == "listwise":
+      yield next(gen_list(querylist))
+
+train = functools.partial(__reader__,filepath="MQ2007/MQ2007/Fold1/train.txt")
+test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt")
+# def __parse_line__(line_stream):
+#   """
+#   return : score, qid, 46-dim feature vector
+#   parse line of file 
+#   """
+#   score = -1, qid = -1, features = []
+#   line = line_stream[:line_stream.find('#')].strip()
+#   parts = line.split()
+#   assert(len(parts) == 48), "expect 48 space split parts, get ", len(parts)
+#   # format : 0 qid:10 1:0.000272 2:0.000000 .... 
+#   score = int(parts[0])
+#   qid = int(parts[1].split(':')[1])
+#   for p in parts[2:]: 
+#     pair = p.split(':')
+#     features.append(float(part[1]))
+#   return score, qid, features
+
+
+# def __reader__(filename, rand_seed=0, is_test=False, test_rate=0.0):
+#   """
+#   create a line reader Generator
+
+#   Parameters
+#   --------
+#   filename : string
+#   rand_seed : sample instance from dataset, set the sample random seed
+#   is_test : sample test set or generate train set
+#   test_rate : sample test set rate
+
+#   Returns
+#   ------
+#   yield
+#     int int lists
+#     score query_id, features
+#   """
+#   rand = random.Random(x=rand_seed)
+#   with open(file_name, 'r') as f:
+#     for line in f:
+#       if (rand.random() < test_rate) == is_test:
+#         yield __parse_line__(line)
+
+
+# def __pair_reader__(filename, shuffle=True):
+  
+
+def fetch():
+  return download(URL, "MQ2007", MD5)
+
+if __name__ == "__main__":
+  fetch()
+
diff --git a/python/paddle/v2/dataset/tests/mq2007_test.py b/python/paddle/v2/dataset/tests/mq2007_test.py
new file mode 100644
index 0000000000..c9bddddeb0
--- /dev/null
+++ b/python/paddle/v2/dataset/tests/mq2007_test.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2.dataset.mq2007
+import unittest
+
+
+class TestMQ2007(unittest.TestCase):
+  def test_pairwise(self):
+    for label, query_left, query_right in paddle.v2.dataset.mq2007.test(format="pairwise"):
+      self.assertEqual(query_left.shape(), (46, ))
+      self.assertEqual(query_right.shape(), (46, ))
+
+  def test_listwise(self):
+    for label_array, query_array in paddle.v2.dataset.mq2007.test(format="listwise"):
+      self.assertEqual(len(label_array), len(query_array))
+
+
+if __name__ == "__main__":
+  unittest.main()

From d7ef56245020a7fbb71677337db4da8b01532802 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Mon, 8 May 2017 16:36:59 +0800
Subject: [PATCH 07/56] "better format"

---
 python/paddle/v2/dataset/mq2007.py | 45 +-----------------------------
 1 file changed, 1 insertion(+), 44 deletions(-)

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index 8884dfd5b1..5705ba60de 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -240,50 +240,7 @@ def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
 
 train = functools.partial(__reader__,filepath="MQ2007/MQ2007/Fold1/train.txt")
 test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt")
-# def __parse_line__(line_stream):
-#   """
-#   return : score, qid, 46-dim feature vector
-#   parse line of file 
-#   """
-#   score = -1, qid = -1, features = []
-#   line = line_stream[:line_stream.find('#')].strip()
-#   parts = line.split()
-#   assert(len(parts) == 48), "expect 48 space split parts, get ", len(parts)
-#   # format : 0 qid:10 1:0.000272 2:0.000000 .... 
-#   score = int(parts[0])
-#   qid = int(parts[1].split(':')[1])
-#   for p in parts[2:]: 
-#     pair = p.split(':')
-#     features.append(float(part[1]))
-#   return score, qid, features
-
-
-# def __reader__(filename, rand_seed=0, is_test=False, test_rate=0.0):
-#   """
-#   create a line reader Generator
-
-#   Parameters
-#   --------
-#   filename : string
-#   rand_seed : sample instance from dataset, set the sample random seed
-#   is_test : sample test set or generate train set
-#   test_rate : sample test set rate
-
-#   Returns
-#   ------
-#   yield
-#     int int lists
-#     score query_id, features
-#   """
-#   rand = random.Random(x=rand_seed)
-#   with open(file_name, 'r') as f:
-#     for line in f:
-#       if (rand.random() < test_rate) == is_test:
-#         yield __parse_line__(line)
-
-
-# def __pair_reader__(filename, shuffle=True):
-  
+
 
 def fetch():
   return download(URL, "MQ2007", MD5)

From 16d6bd7c38d91c2fe8e7b4d5f46902233a5913a8 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Mon, 8 May 2017 21:43:01 +0800
Subject: [PATCH 08/56] "fix label genenerate type. avoid IVector create error
 when init label"

---
 python/paddle/v2/dataset/mq2007.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index 5705ba60de..9dd33a22ba 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -154,9 +154,9 @@ def gen_pair(querylist, partial_order="full"):
     for i, query_left in enumerate(querylist):
       for j, query_right in enumerate(querylist):
         if query_left.relevance_score > query_right.relevance_score:
-          yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+          yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
         else:
-          yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+          yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
 
   elif partial_order == "neighbour":
     # C(n)
@@ -165,9 +165,9 @@ def gen_pair(querylist, partial_order="full"):
       query_left = querylist[k]
       query_right = querylist[k+1]
       if query_left.relevance_score > query_right.relevance_score:
-        yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+        yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
       else:
-        yield np.ones(1), np.array(query_left.feature_vector), np.array(query_right.feature_vector)
+        yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
       k += 1
   else:
     raise ValueError("unsupport parameter of partial_order, Only can be neighbour or full")

From 82eb0fe45b293a884cd6e8be805a60366be88d6b Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Mon, 8 May 2017 22:26:46 +0800
Subject: [PATCH 09/56] "fix len type error of QueryList"

---
 python/paddle/v2/dataset/mq2007.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index 9dd33a22ba..2e8a2f9b08 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -116,6 +116,9 @@ class QueryList(object):
     for query in self.querylist:
       yield query
 
+  def __len__(self):
+    return len(self.querylist)
+
   def _correct_ranking_(self):
     if self.querylist is None:
       return 
@@ -175,7 +178,7 @@ def gen_pair(querylist, partial_order="full"):
   
 def gen_list(querylist):
   """
-  gen pair for pair-wise learning to rank algorithm
+  gen item in list for list-wise learning to rank algorithm
   Paramters:
   --------
   querylist : querylist, one query match many docment pairs in list, see QueryList
@@ -190,7 +193,9 @@ def gen_list(querylist):
   querylist._correct_ranking_()
   relevance_score_list = [query.relevance_score for query in querylist]
   feature_vector_list = [query.feature_vector for query in querylist]
-  yield np.array(relevance_score_list).T, np.array(feature_vector_list)
+  # yield np.array(relevance_score_list).T, np.array(feature_vector_list)
+  for i in range(len(querylist)):
+    yield relevance_score_list[i], np.array(feature_vector_list[i])
 
 
 def load_from_text(filepath, shuffle=True, fill_missing=-1):
@@ -236,7 +241,9 @@ def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
       for pair in gen_pair(querylist):
         yield pair
     elif format == "listwise":
-      yield next(gen_list(querylist))
+      # yield next(gen_list(querylist))
+      for instance in gen_list(querylist):
+        yield instance
 
 train = functools.partial(__reader__,filepath="MQ2007/MQ2007/Fold1/train.txt")
 test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt")

From d86fb1d133b13128822e8ee276ab02bc3a2f8594 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Fri, 12 May 2017 22:45:12 +0800
Subject: [PATCH 10/56] "precommit format with github style"

---
 python/paddle/v2/dataset/mq2007.py | 287 +++++++++++++++--------------
 1 file changed, 150 insertions(+), 137 deletions(-)

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index 2e8a2f9b08..1122ca88bf 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -23,7 +23,6 @@ http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ20
 
 """
 
-
 import os
 import random
 import functools
@@ -31,25 +30,24 @@ import rarfile
 from common import download
 import numpy as np
 
-
 # URL = "http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar"
 URL = "http://www.bigdatalab.ac.cn/benchmark/upload/download_source/7b6dbbe2-842c-11e4-a536-bcaec51b9163_MQ2007.rar"
 MD5 = "7be1640ae95c6408dab0ae7207bdc706"
 
 
 def __initialize_meta_info__():
-  """
+    """
   download and extract the MQ2007 dataset
   """
-  fn = fetch()
-  rar = rarfile.RarFile(fn)
-  dirpath = os.path.dirname(fn)
-  rar.extractall(path=dirpath)
-  return dirpath
+    fn = fetch()
+    rar = rarfile.RarFile(fn)
+    dirpath = os.path.dirname(fn)
+    rar.extractall(path=dirpath)
+    return dirpath
 
 
 class Query(object):
-  """
+    """
   queries used for learning to rank algorithms. It is created from relevance scores,  query-document feature vectors
 
   Parameters:
@@ -63,79 +61,86 @@ class Query(object):
   description : string
     comment section in query doc pair data
   """
-  def __init__(self, query_id=-1, relevance_score=-1,
-               feature_vector=None, description=""):
-    self.query_id = query_id
-    self.relevance_score = relevance_score
-    if feature_vector is None:
-      self.feature_vector = []
-    else:
-      self.feature_vector = feature_vector
-    self.description = description
 
-  def __str__(self):
-    string = "%s %s %s" %(str(self.relevance_score), str(self.query_id), " ".join(str(f) for f in self.feature_vector))
-    return string
+    def __init__(self,
+                 query_id=-1,
+                 relevance_score=-1,
+                 feature_vector=None,
+                 description=""):
+        self.query_id = query_id
+        self.relevance_score = relevance_score
+        if feature_vector is None:
+            self.feature_vector = []
+        else:
+            self.feature_vector = feature_vector
+        self.description = description
 
-  # @classmethod
-  def _parse_(self, text):
-    """
+    def __str__(self):
+        string = "%s %s %s" % (str(self.relevance_score), str(self.query_id),
+                               " ".join(str(f) for f in self.feature_vector))
+        return string
+
+    # @classmethod
+    def _parse_(self, text):
+        """
     parse line into Query
     """
-    comment_position = text.find('#')
-    line = text[:comment_position].strip()
-    self.description = text[comment_position+1:].strip()
-    parts = line.split()
-    assert(len(parts) == 48), "expect 48 space split parts, get %d" %(len(parts))
-    # format : 0 qid:10 1:0.000272 2:0.000000 .... 
-    self.relevance_score = int(parts[0])
-    self.query_id = int(parts[1].split(':')[1])
-    for p in parts[2:]: 
-      pair = p.split(':')
-      self.feature_vector.append(float(pair[1]))
-    return self
+        comment_position = text.find('#')
+        line = text[:comment_position].strip()
+        self.description = text[comment_position + 1:].strip()
+        parts = line.split()
+        assert (len(parts) == 48), "expect 48 space split parts, get %d" % (
+            len(parts))
+        # format : 0 qid:10 1:0.000272 2:0.000000 .... 
+        self.relevance_score = int(parts[0])
+        self.query_id = int(parts[1].split(':')[1])
+        for p in parts[2:]:
+            pair = p.split(':')
+            self.feature_vector.append(float(pair[1]))
+        return self
+
 
 class QueryList(object):
-  """
+    """
   group query into list, every item in list is a Query
   """
-  def __init__(self, querylist=None):
-    self.query_id = -1
-    if querylist is None:
-      self.querylist = []
-    else:
-      self.querylist = querylist
-      for query in self.querylist:
+
+    def __init__(self, querylist=None):
+        self.query_id = -1
+        if querylist is None:
+            self.querylist = []
+        else:
+            self.querylist = querylist
+            for query in self.querylist:
+                if self.query_id == -1:
+                    self.query_id = query.query_id
+                else:
+                    if self.query_id != query.query_id:
+                        raise ValueError("query in list must be same query_id")
+
+    def __iter__(self):
+        for query in self.querylist:
+            yield query
+
+    def __len__(self):
+        return len(self.querylist)
+
+    def _correct_ranking_(self):
+        if self.querylist is None:
+            return
+        self.querylist.sort(key=lambda x: x.relevance_score, reverse=True)
+
+    def _add_query(self, query):
         if self.query_id == -1:
-          self.query_id = query.query_id
+            self.query_id = query.query_id
         else:
-          if self.query_id != query.query_id:
-            raise ValueError("query in list must be same query_id")
-
-  def __iter__(self):
-    for query in self.querylist:
-      yield query
-
-  def __len__(self):
-    return len(self.querylist)
-
-  def _correct_ranking_(self):
-    if self.querylist is None:
-      return 
-    self.querylist.sort(key=lambda x:x.relevance_score, reverse=True)
-
-  def _add_query(self, query):
-      if self.query_id == -1:
-        self.query_id = query.query_id
-      else:
-        if self.query_id != query.query_id:
-          raise ValueError("query in list must be same query_id")
-      self.querylist.append(query)
-
+            if self.query_id != query.query_id:
+                raise ValueError("query in list must be same query_id")
+        self.querylist.append(query)
 
 
 def gen_pair(querylist, partial_order="full"):
-  """
+    """
   gen pair for pair-wise learning to rank algorithm
   Paramters:
   --------
@@ -149,35 +154,41 @@ def gen_pair(querylist, partial_order="full"):
   query_left : np.array, shape=(1, feature_dimension)
   query_right : same as left
   """
-  if not isinstance(querylist, QueryList):
-    querylist = QueryList(querylist)
-  querylist._correct_ranking_()
-  # C(n,2)
-  if partial_order == "full":
-    for i, query_left in enumerate(querylist):
-      for j, query_right in enumerate(querylist):
-        if query_left.relevance_score > query_right.relevance_score:
-          yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
-        else:
-          yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
-
-  elif partial_order == "neighbour":
-    # C(n)
-    k = 0 
-    while k < len(querylist)-1:
-      query_left = querylist[k]
-      query_right = querylist[k+1]
-      if query_left.relevance_score > query_right.relevance_score:
-        yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
-      else:
-        yield 1, np.array(query_left.feature_vector), np.array(query_right.feature_vector)
-      k += 1
-  else:
-    raise ValueError("unsupport parameter of partial_order, Only can be neighbour or full")
+    if not isinstance(querylist, QueryList):
+        querylist = QueryList(querylist)
+    querylist._correct_ranking_()
+    # C(n,2)
+    if partial_order == "full":
+        for i, query_left in enumerate(querylist):
+            for j, query_right in enumerate(querylist):
+                if query_left.relevance_score > query_right.relevance_score:
+                    yield 1, np.array(query_left.feature_vector), np.array(
+                        query_right.feature_vector)
+                else:
+                    yield 1, np.array(query_left.feature_vector), np.array(
+                        query_right.feature_vector)
+
+    elif partial_order == "neighbour":
+        # C(n)
+        k = 0
+        while k < len(querylist) - 1:
+            query_left = querylist[k]
+            query_right = querylist[k + 1]
+            if query_left.relevance_score > query_right.relevance_score:
+                yield 1, np.array(query_left.feature_vector), np.array(
+                    query_right.feature_vector)
+            else:
+                yield 1, np.array(query_left.feature_vector), np.array(
+                    query_right.feature_vector)
+            k += 1
+    else:
+        raise ValueError(
+            "unsupport parameter of partial_order, Only can be neighbour or full"
+        )
+
 
-  
 def gen_list(querylist):
-  """
+    """
   gen item in list for list-wise learning to rank algorithm
   Paramters:
   --------
@@ -188,41 +199,39 @@ def gen_list(querylist):
   label : np.array, shape=(samples_num, )
   querylist : np.array, shape=(samples_num, feature_dimension)
   """
-  if not isinstance(querylist, QueryList):
-    querylist = QueryList(querylist)
-  querylist._correct_ranking_()
-  relevance_score_list = [query.relevance_score for query in querylist]
-  feature_vector_list = [query.feature_vector for query in querylist]
-  # yield np.array(relevance_score_list).T, np.array(feature_vector_list)
-  for i in range(len(querylist)):
-    yield relevance_score_list[i], np.array(feature_vector_list[i])
+    if not isinstance(querylist, QueryList):
+        querylist = QueryList(querylist)
+    # querylist._correct_ranking_()
+    relevance_score_list = [query.relevance_score for query in querylist]
+    feature_vector_list = [query.feature_vector for query in querylist]
+    yield np.array(relevance_score_list).T, np.array(feature_vector_list)
 
 
 def load_from_text(filepath, shuffle=True, fill_missing=-1):
-  """
+    """
   parse data file into querys
   """
-  prev_query_id = -1;
-  querylists = []
-  querylist = None
-  fn = __initialize_meta_info__()
-  with open(os.path.join(fn, filepath)) as f:
-    for line in f:
-      query = Query()
-      query = query._parse_(line)
-      if query.query_id != prev_query_id:
-        if querylist is not None:
-          querylists.append(querylist)
-        querylist = QueryList()
-        prev_query_id = query.query_id
-      querylist._add_query(query)
-  if shuffle == True:
-    random.shuffle(querylists)
-  return querylists
+    prev_query_id = -1
+    querylists = []
+    querylist = None
+    fn = __initialize_meta_info__()
+    with open(os.path.join(fn, filepath)) as f:
+        for line in f:
+            query = Query()
+            query = query._parse_(line)
+            if query.query_id != prev_query_id:
+                if querylist is not None:
+                    querylists.append(querylist)
+                querylist = QueryList()
+                prev_query_id = query.query_id
+            querylist._add_query(query)
+    if shuffle == True:
+        random.shuffle(querylists)
+    return querylists
 
 
 def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
-  """
+    """
   Parameters
   --------
   filename : string
@@ -235,23 +244,27 @@ def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
     label query_left, query_right  # format = "pairwise"
     label querylist # format = "listwise"
   """
-  querylists = load_from_text(filepath, shuffle=shuffle, fill_missing=fill_missing)
-  for querylist in querylists:
-    if format == "pairwise":
-      for pair in gen_pair(querylist):
-        yield pair
-    elif format == "listwise":
-      # yield next(gen_list(querylist))
-      for instance in gen_list(querylist):
-        yield instance
-
-train = functools.partial(__reader__,filepath="MQ2007/MQ2007/Fold1/train.txt")
+    querylists = load_from_text(
+        filepath, shuffle=shuffle, fill_missing=fill_missing)
+    for querylist in querylists:
+        if format == "pairwise":
+            for pair in gen_pair(querylist):
+                yield pair
+        elif format == "listwise":
+            yield next(gen_list(querylist))
+
+
+train = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/train.txt")
 test = functools.partial(__reader__, filepath="MQ2007/MQ2007/Fold1/test.txt")
 
 
 def fetch():
-  return download(URL, "MQ2007", MD5)
+    return download(URL, "MQ2007", MD5)
 
-if __name__ == "__main__":
-  fetch()
 
+if __name__ == "__main__":
+    fetch()
+    for i, (score,
+            samples) in enumerate(train(
+                format="listwise", shuffle=False)):
+        np.savetxt("query_%d" % (i), score, fmt="%.2f")

From 4ac5caaaa72635b0a948bd8b9826efb98d73a0c9 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Fri, 12 May 2017 22:58:04 +0800
Subject: [PATCH 11/56] "formatter"

---
 python/paddle/v2/dataset/mq2007.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index 1122ca88bf..9c6f8927b7 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -16,9 +16,9 @@ MQ2007 dataset
 
 MQ2007 is a query set from Million Query track of TREC 2007. There are about 1700 queries in it with labeled documents. In MQ2007, the 5-fold cross
 validation strategy is adopted and the 5-fold partitions are included in the package. In each fold, there are three subsets for learning: training set,
-validation set and testing set. 
+validation set and testing set.
 
-MQ2007 dataset from 
+MQ2007 dataset from website
 http://research.microsoft.com/en-us/um/beijing/projects/letor/LETOR4.0/Data/MQ2007.rar and parse training set and test set into paddle reader creators
 
 """

From a4313de808e7c7d269f5fa0a12511884fe3dedf8 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Mon, 15 May 2017 17:51:32 +0800
Subject: [PATCH 12/56] "remove the pairwise other genereate method"

---
 python/paddle/v2/dataset/mq2007.py | 139 +++++++++++++++++++++--------
 1 file changed, 103 insertions(+), 36 deletions(-)

diff --git a/python/paddle/v2/dataset/mq2007.py b/python/paddle/v2/dataset/mq2007.py
index 9c6f8927b7..fd71b34166 100644
--- a/python/paddle/v2/dataset/mq2007.py
+++ b/python/paddle/v2/dataset/mq2007.py
@@ -89,8 +89,10 @@ class Query(object):
         line = text[:comment_position].strip()
         self.description = text[comment_position + 1:].strip()
         parts = line.split()
-        assert (len(parts) == 48), "expect 48 space split parts, get %d" % (
-            len(parts))
+        if len(parts) != 48:
+            sys.stdout.write("expect 48 space split parts, get %d" %
+                             (len(parts)))
+            return None
         # format : 0 qid:10 1:0.000272 2:0.000000 .... 
         self.relevance_score = int(parts[0])
         self.query_id = int(parts[1].split(':')[1])
@@ -125,6 +127,9 @@ class QueryList(object):
     def __len__(self):
         return len(self.querylist)
 
+    def __getitem__(self, i):
+        return self.querylist[i]
+
     def _correct_ranking_(self):
         if self.querylist is None:
             return
@@ -139,6 +144,46 @@ class QueryList(object):
         self.querylist.append(query)
 
 
+def gen_plain_txt(querylist):
+    """
+  gen plain text in list for other usage
+  Paramters:
+  --------
+  querylist : querylist, one query match many docment pairs in list, see QueryList
+
+  return :
+  ------
+  query_id : np.array, shape=(samples_num, )
+  label : np.array, shape=(samples_num, )
+  querylist : np.array, shape=(samples_num, feature_dimension)
+    """
+    if not isinstance(querylist, QueryList):
+        querylist = QueryList(querylist)
+    querylist._correct_ranking_()
+    for query in querylist:
+        yield querylist.query_id, query.relevance_score, np.array(
+            query.feature_vector)
+
+
+def gen_point(querylist):
+    """
+  gen item in list for point-wise learning to rank algorithm
+  Paramters:
+  --------
+  querylist : querylist, one query match many docment pairs in list, see QueryList
+
+  return :
+  ------
+  label : np.array, shape=(samples_num, )
+  querylist : np.array, shape=(samples_num, feature_dimension)
+  """
+    if not isinstance(querylist, QueryList):
+        querylist = QueryList(querylist)
+    querylist._correct_ranking_()
+    for query in querylist:
+        yield query.relevance_score, np.array(query.feature_vector)
+
+
 def gen_pair(querylist, partial_order="full"):
     """
   gen pair for pair-wise learning to rank algorithm
@@ -146,6 +191,7 @@ def gen_pair(querylist, partial_order="full"):
   --------
   querylist : querylist, one query match many docment pairs in list, see QueryList
   pairtial_order : "full" or "neighbour"
+    there is redudant in all possiable pair combinations, which can be simplifed
   gen pairs for neighbour items or the full partial order pairs
 
   return :
@@ -157,34 +203,28 @@ def gen_pair(querylist, partial_order="full"):
     if not isinstance(querylist, QueryList):
         querylist = QueryList(querylist)
     querylist._correct_ranking_()
+    labels = []
+    docpairs = []
+
     # C(n,2)
-    if partial_order == "full":
-        for i, query_left in enumerate(querylist):
-            for j, query_right in enumerate(querylist):
-                if query_left.relevance_score > query_right.relevance_score:
-                    yield 1, np.array(query_left.feature_vector), np.array(
-                        query_right.feature_vector)
-                else:
-                    yield 1, np.array(query_left.feature_vector), np.array(
-                        query_right.feature_vector)
-
-    elif partial_order == "neighbour":
-        # C(n)
-        k = 0
-        while k < len(querylist) - 1:
-            query_left = querylist[k]
-            query_right = querylist[k + 1]
+    for i in range(len(querylist)):
+        query_left = querylist[i]
+        for j in range(i + 1, len(querylist)):
+            query_right = querylist[j]
             if query_left.relevance_score > query_right.relevance_score:
-                yield 1, np.array(query_left.feature_vector), np.array(
-                    query_right.feature_vector)
-            else:
-                yield 1, np.array(query_left.feature_vector), np.array(
-                    query_right.feature_vector)
-            k += 1
-    else:
-        raise ValueError(
-            "unsupport parameter of partial_order, Only can be neighbour or full"
-        )
+                labels.append(1)
+                docpairs.append([
+                    np.array(query_left.feature_vector),
+                    np.array(query_right.feature_vector)
+                ])
+            elif query_left.relevance_score < query_right.relevance_score:
+                labels.append(1)
+                docpairs.append([
+                    np.array(query_right.feature_vector),
+                    np.array(query_left.feature_vector)
+                ])
+    for label, pair in zip(labels, docpairs):
+        yield label, pair[0], pair[1]
 
 
 def gen_list(querylist):
@@ -201,12 +241,30 @@ def gen_list(querylist):
   """
     if not isinstance(querylist, QueryList):
         querylist = QueryList(querylist)
-    # querylist._correct_ranking_()
+    querylist._correct_ranking_()
     relevance_score_list = [query.relevance_score for query in querylist]
     feature_vector_list = [query.feature_vector for query in querylist]
     yield np.array(relevance_score_list).T, np.array(feature_vector_list)
 
 
+def query_filter(querylists):
+    """
+    filter query get only document with label 0.
+    label 0, 1, 2 means the relevance score document with query
+    parameters :
+      querylist : QueyList list
+
+    return :
+      querylist : QueyList list
+    """
+    filter_query = []
+    for querylist in querylists:
+        relevance_score_list = [query.relevance_score for query in querylist]
+        if sum(relevance_score_list) != .0:
+            filter_query.append(querylist)
+    return filter_query
+
+
 def load_from_text(filepath, shuffle=True, fill_missing=-1):
     """
   parse data file into querys
@@ -219,12 +277,16 @@ def load_from_text(filepath, shuffle=True, fill_missing=-1):
         for line in f:
             query = Query()
             query = query._parse_(line)
+            if query == None:
+                continue
             if query.query_id != prev_query_id:
                 if querylist is not None:
                     querylists.append(querylist)
                 querylist = QueryList()
                 prev_query_id = query.query_id
             querylist._add_query(query)
+    if querylist is not None:
+        querylists.append(querylist)
     if shuffle == True:
         random.shuffle(querylists)
     return querylists
@@ -244,10 +306,15 @@ def __reader__(filepath, format="pairwise", shuffle=True, fill_missing=-1):
     label query_left, query_right  # format = "pairwise"
     label querylist # format = "listwise"
   """
-    querylists = load_from_text(
-        filepath, shuffle=shuffle, fill_missing=fill_missing)
+    querylists = query_filter(
+        load_from_text(
+            filepath, shuffle=shuffle, fill_missing=fill_missing))
     for querylist in querylists:
-        if format == "pairwise":
+        if format == "plain_txt":
+            yield next(gen_plain_txt(querylist))
+        elif format == "pointwise":
+            yield next(gen_point(querylist))
+        elif format == "pairwise":
             for pair in gen_pair(querylist):
                 yield pair
         elif format == "listwise":
@@ -264,7 +331,7 @@ def fetch():
 
 if __name__ == "__main__":
     fetch()
-    for i, (score,
-            samples) in enumerate(train(
-                format="listwise", shuffle=False)):
-        np.savetxt("query_%d" % (i), score, fmt="%.2f")
+    mytest = functools.partial(
+        __reader__, filepath="MQ2007/MQ2007/Fold1/sample", format="listwise")
+    for label, query in mytest():
+        print label, query

From 590c6038fcf5e378c562b57db12891dc2db79ea6 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Mon, 15 May 2017 21:30:27 +0800
Subject: [PATCH 13/56] "format Test"

---
 python/paddle/v2/dataset/tests/mq2007_test.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/python/paddle/v2/dataset/tests/mq2007_test.py b/python/paddle/v2/dataset/tests/mq2007_test.py
index c9bddddeb0..59847b6c18 100644
--- a/python/paddle/v2/dataset/tests/mq2007_test.py
+++ b/python/paddle/v2/dataset/tests/mq2007_test.py
@@ -17,15 +17,17 @@ import unittest
 
 
 class TestMQ2007(unittest.TestCase):
-  def test_pairwise(self):
-    for label, query_left, query_right in paddle.v2.dataset.mq2007.test(format="pairwise"):
-      self.assertEqual(query_left.shape(), (46, ))
-      self.assertEqual(query_right.shape(), (46, ))
+    def test_pairwise(self):
+        for label, query_left, query_right in paddle.v2.dataset.mq2007.test(
+                format="pairwise"):
+            self.assertEqual(query_left.shape(), (46, ))
+            self.assertEqual(query_right.shape(), (46, ))
 
-  def test_listwise(self):
-    for label_array, query_array in paddle.v2.dataset.mq2007.test(format="listwise"):
-      self.assertEqual(len(label_array), len(query_array))
+    def test_listwise(self):
+        for label_array, query_array in paddle.v2.dataset.mq2007.test(
+                format="listwise"):
+            self.assertEqual(len(label_array), len(query_array))
 
 
 if __name__ == "__main__":
-  unittest.main()
+    unittest.main()

From 8411a73e877f6f235bc2bf8c757e36c68b00dc62 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Tue, 16 May 2017 15:52:33 +0800
Subject: [PATCH 14/56] overload several virtual functions to make
 ChunkEvaluator output multiple metrics

---
 paddle/gserver/evaluators/ChunkEvaluator.cpp | 56 ++++++++++++++++++--
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp
index 13f02e51fe..b94a641b4a 100644
--- a/paddle/gserver/evaluators/ChunkEvaluator.cpp
+++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <vector>
 
 #include "paddle/math/Vector.h"
+#include "paddle/utils/StringUtil.h"
 
 #include "Evaluator.h"
 
@@ -121,11 +122,9 @@ public:
   }
 
   virtual void printStats(std::ostream& os) const {
-    double precision = (double)numCorrect_ / numOutputSegments_;
-    double recall = (double)numCorrect_ / numLabelSegments_;
-    double f1 =
-        !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall);
-    os << config_.name() << "=" << f1 << " true_chunks=" << numLabelSegments_
+    storeLocalValues();
+    os << config_.name() << "=" << values_["F1-score"]
+       << " true_chunks=" << numLabelSegments_
        << " result_chunks=" << numOutputSegments_
        << " correct_chunks=" << numCorrect_;
   }
@@ -243,6 +242,53 @@ public:
     if (tag == tagSingle_) return true;
     return false;
   }
+
+public:
+  // three metrics: precision, recall and F1-score
+  void getNames(std::vector<std::string>* names) {
+    this->storeLocalValues();
+    names->reserve(this->values_.size());
+    for (auto it = this->values_.begin(); it != this->values_.end(); ++it) {
+      names->push_back(this->config_.name() + "." + it->first);
+    }
+  }
+
+  // get value by field name
+  real getValue(const std::string& name, Error* err) const {
+    this->storeLocalValues();
+    std::vector<std::string> buffers;
+    paddle::str::split(name, '.', &buffers);
+    auto it = this->values_.find(buffers[buffers.size() - 1]);
+    if (it == this->values_.end()) {  // not found
+      *err = Error("No such key %s", name.c_str());
+      return 0.0f;
+    }
+
+    return it->second;
+  }
+
+  // get type of evaluator
+  std::string getType(const std::string& name, Error* err) const {
+    this->getValue(name, err);
+    if (!err->isOK()) {
+      return std::string();
+    }
+    return "chunk";
+  }
+
+private:
+  void storeLocalValues() const {
+    CHECK_GT(numOutputSegments_, 0);
+    CHECK_GT(numLabelSegments_, 0);
+    double precision = (double)numCorrect_ / numOutputSegments_;
+    double recall = (double)numCorrect_ / numLabelSegments_;
+    values_["precision"] = precision;
+    values_["recall"] = recall;
+    values_["F1-score"] =
+        !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall);
+  }
+
+  mutable std::unordered_map<std::string, real> values_;
 };
 
 REGISTER_EVALUATOR(chunk, ChunkEvaluator);

From a74060d48e735a77d8746685a8fac1f640cf5f9b Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Tue, 16 May 2017 19:00:13 +0800
Subject: [PATCH 15/56] modify usage document of chunk evaluator

---
 .../trainer_config_helpers/evaluators.py      | 47 ++++++++++++-------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
index 567521ee9d..8704a8cde2 100644
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -347,32 +347,45 @@ def chunk_evaluator(
         excluded_chunk_types=None, ):
     """
     Chunk evaluator is used to evaluate segment labelling accuracy for a
-    sequence. It calculates the chunk detection F1 score.
+    sequence. It calculates precision, recall and F1 score of the chunk detection.
 
-    A chunk is correctly detected if its beginning, end and type are correct.
-    Other chunk type is ignored.
-
-    For each label in the label sequence, we have:
+    To use chunk evaluator, the construction of label dict should obey the following rules:
+    (1) Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.
 
+    .. code-block:: python
+     Scheme Begin Inside End   Single
+      plain  0     -      -     -
+      IOB    0     1      -     -
+      IOE    -     0      1     -
+      IOBES  0     1      2     3
     .. code-block:: python
 
-       tagType = label % numTagType
-       chunkType = label / numTagType
-       otherChunkType = numChunkTypes
+    To make it clear, let's illustrate by a NER example.
+    Assuming that there are two named entity types including ORG and PER which are called 'chunk type' here,
+    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER and O,
+    in which B-ORG for begining of ORG and I-ORG for end of ORG.
+    Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I.
+    Of course, the training data should be labeled accordingly.
 
-    The total number of different labels is numTagType*numChunkTypes+1.
-    We support 4 labelling scheme.
-    The tag type for each of the scheme is shown as follows:
+    (2) Map can be done correctly by the listed equations.
 
+    .. code-block:: python
+    tagType = label % numTagType
+    chunkType = label / numTagType
+    otherChunkType = numChunkTypes
     .. code-block:: python
 
-       Scheme Begin Inside End   Single
-       plain  0     -      -     -
-       IOB    0     1      -     -
-       IOE    -     0      1     -
-       IOBES  0     1      2     3
+    Continue the NER example, and the label dict should like this to satify above equations:
+
+    .. code-block:: python
+      B-ORG  0
+      I-ORG  1
+      B-PER  2
+      I-PER  3
+      O      4
+    .. code-block:: python
 
-    'plain' means the whole chunk must contain exactly the same chunk label.
+    Realizing that the number of is chunk type is 2 and number of tag type is 2, it is easy to validate this.
 
     The simple usage is:
 

From f3eb9cb36a37efd58cbbf91f9f9e4c888bec1d65 Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Wed, 17 May 2017 16:38:21 +0800
Subject: [PATCH 16/56] Override getValueImpl and revise document

---
 paddle/gserver/evaluators/ChunkEvaluator.cpp  | 36 +++++-------
 .../trainer_config_helpers/evaluators.py      | 57 +++++++++++++------
 2 files changed, 56 insertions(+), 37 deletions(-)

diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp
index b94a641b4a..1658282f3a 100644
--- a/paddle/gserver/evaluators/ChunkEvaluator.cpp
+++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp
@@ -75,6 +75,7 @@ class ChunkEvaluator : public Evaluator {
   std::vector<Segment> labelSegments_;
   std::vector<Segment> outputSegments_;
   std::set<int> excludedChunkTypes_;
+  mutable std::unordered_map<std::string, real> values_;
 
 public:
   virtual void init(const EvaluatorConfig& config) {
@@ -243,23 +244,22 @@ public:
     return false;
   }
 
-public:
   // three metrics: precision, recall and F1-score
   void getNames(std::vector<std::string>* names) {
-    this->storeLocalValues();
-    names->reserve(this->values_.size());
-    for (auto it = this->values_.begin(); it != this->values_.end(); ++it) {
-      names->push_back(this->config_.name() + "." + it->first);
+    storeLocalValues();
+    names->reserve(names->size() + values_.size());
+    for (auto it = values_.begin(); it != values_.end(); ++it) {
+      names->push_back(config_.name() + "." + it->first);
     }
   }
 
   // get value by field name
   real getValue(const std::string& name, Error* err) const {
-    this->storeLocalValues();
+    storeLocalValues();
     std::vector<std::string> buffers;
     paddle::str::split(name, '.', &buffers);
-    auto it = this->values_.find(buffers[buffers.size() - 1]);
-    if (it == this->values_.end()) {  // not found
+    auto it = values_.find(buffers.back());
+    if (it == values_.end()) {  // not found
       *err = Error("No such key %s", name.c_str());
       return 0.0f;
     }
@@ -268,27 +268,21 @@ public:
   }
 
   // get type of evaluator
-  std::string getType(const std::string& name, Error* err) const {
-    this->getValue(name, err);
-    if (!err->isOK()) {
-      return std::string();
-    }
-    return "chunk";
-  }
+  std::string getTypeImpl() const { return "chunk"; }
 
 private:
   void storeLocalValues() const {
-    CHECK_GT(numOutputSegments_, 0);
-    CHECK_GT(numLabelSegments_, 0);
-    double precision = (double)numCorrect_ / numOutputSegments_;
-    double recall = (double)numCorrect_ / numLabelSegments_;
+    CHECK_GE(numOutputSegments_, 0);
+    CHECK_GE(numLabelSegments_, 0);
+    double precision =
+        !numOutputSegments_ ? 0 : (double)numCorrect_ / numOutputSegments_;
+    double recall =
+        !numLabelSegments_ ? 0 : (double)numCorrect_ / numLabelSegments_;
     values_["precision"] = precision;
     values_["recall"] = recall;
     values_["F1-score"] =
         !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall);
   }
-
-  mutable std::unordered_map<std::string, real> values_;
 };
 
 REGISTER_EVALUATOR(chunk, ChunkEvaluator);
diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
index 8704a8cde2..6900133fde 100644
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -347,45 +347,68 @@ def chunk_evaluator(
         excluded_chunk_types=None, ):
     """
     Chunk evaluator is used to evaluate segment labelling accuracy for a
-    sequence. It calculates precision, recall and F1 score of the chunk detection.
+    sequence. It calculates precision, recall and F1 scores for the chunk detection.
 
-    To use chunk evaluator, the construction of label dict should obey the following rules:
+    To use chunk evaluator, several concepts need to be clarified firstly.
+    Chunk type is the type of the whole chunk and a chunk consists of one or several words.  (For example in NER, ORG for organization name, PER for person name etc.)
+    Tag indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single)
+    We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name)
+
+    The construction of label dict should obey the following rules:
     (1) Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.
 
     .. code-block:: python
-     Scheme Begin Inside End   Single
-      plain  0     -      -     -
-      IOB    0     1      -     -
-      IOE    -     0      1     -
-      IOBES  0     1      2     3
+     Scheme    Description                                                                                  
+      plain    Use the same label for the whole chunk.
+      IOB      Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. 
+      IOE      Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside.
+      IOBES    Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. 
     .. code-block:: python
-
-    To make it clear, let's illustrate by a NER example.
-    Assuming that there are two named entity types including ORG and PER which are called 'chunk type' here,
-    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER and O,
-    in which B-ORG for begining of ORG and I-ORG for end of ORG.
+   
+    To make it clear, let's illustrate by an NER example.
+    Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here,
+    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER, B-LOC, I-LOC and O,
+    in which B-ORG for begining of ORG and I-ORG for inside of ORG.
     Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I.
     Of course, the training data should be labeled accordingly.
 
-    (2) Map can be done correctly by the listed equations.
+    (2) Mapping is done correctly by the listed equations and assigning protocol.
+
+    The following table are equations to extract tag type and chunk type from a label.
 
     .. code-block:: python
     tagType = label % numTagType
     chunkType = label / numTagType
     otherChunkType = numChunkTypes
     .. code-block:: python
+    
+    The following table shows the mapping rule between tagType and tag type in each scheme.
 
-    Continue the NER example, and the label dict should like this to satify above equations:
+    .. code-block:: python
+     Scheme Begin Inside End   Single
+      plain  0     -      -     -
+      IOB    0     1      -     -
+      IOE    -     0      1     -
+      IOBES  0     1      2     3
+    .. code-block:: python
+
+    Continue the NER example, and the label dict should look like this to satify above equations:
 
     .. code-block:: python
       B-ORG  0
       I-ORG  1
       B-PER  2
       I-PER  3
-      O      4
+      B-LOC  4
+      I-LOC  5
+      O      6
     .. code-block:: python
 
-    Realizing that the number of is chunk type is 2 and number of tag type is 2, it is easy to validate this.
+    In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is
+    "IOB" so tagType has two values: 0 for B and 1 for I. 
+    Here we will use I-LOC to explain the above mapping rules in detail.
+    For I-LOC, the label id is 5, so we can get tagType=1 and ChunkType=2, which means I-LOC is a part of NER chunk LOC
+    and the tag is I.
 
     The simple usage is:
 
@@ -393,6 +416,8 @@ def chunk_evaluator(
 
        eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types)
 
+    .. code-block:: python
+    
     :param input: The input layers.
     :type input: LayerOutput
     :param label: An input layer containing the ground truth label.

From c9cdc986279606b7016d22c04e784f4077805241 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 17 May 2017 17:45:27 +0800
Subject: [PATCH 17/56] detect golang in cmake

---
 cmake/CMakeDetermineGoCompiler.cmake | 44 ++++++++++++++++++++++++++++
 cmake/CMakeGoCompiler.cmake.in       |  8 +++++
 cmake/CMakeGoInformation.cmake       |  7 +++++
 cmake/CMakeTestGoCompiler.cmake      |  1 +
 4 files changed, 60 insertions(+)
 create mode 100644 cmake/CMakeDetermineGoCompiler.cmake
 create mode 100644 cmake/CMakeGoCompiler.cmake.in
 create mode 100644 cmake/CMakeGoInformation.cmake
 create mode 100644 cmake/CMakeTestGoCompiler.cmake

diff --git a/cmake/CMakeDetermineGoCompiler.cmake b/cmake/CMakeDetermineGoCompiler.cmake
new file mode 100644
index 0000000000..b3f8fbe271
--- /dev/null
+++ b/cmake/CMakeDetermineGoCompiler.cmake
@@ -0,0 +1,44 @@
+if(NOT CMAKE_Go_COMPILER)
+  if(NOT $ENV{GO_COMPILER} STREQUAL "")
+    get_filename_component(CMAKE_Go_COMPILER_INIT $ENV{GO_COMPILER} PROGRAM PROGRAM_ARGS CMAKE_Go_FLAGS_ENV_INIT)
+
+    if(CMAKE_Go_FLAGS_ENV_INIT)
+      set(CMAKE_Go_COMPILER_ARG1 "${CMAKE_Go_FLAGS_ENV_INIT}" CACHE STRING "First argument to Go compiler")
+    endif()
+
+    if(NOT EXISTS ${CMAKE_Go_COMPILER_INIT})
+      message(SEND_ERROR "Could not find compiler set in environment variable GO_COMPILER:\n$ENV{GO_COMPILER}.")
+    endif()
+
+  endif()
+
+  set(Go_BIN_PATH
+    $ENV{GOPATH}
+    $ENV{GOROOT}
+    $ENV{GOROOT}/../bin
+    $ENV{GO_COMPILER}
+    /usr/bin
+    /usr/local/bin
+    )
+
+  if(CMAKE_Go_COMPILER_INIT)
+    set(CMAKE_Go_COMPILER ${CMAKE_Go_COMPILER_INIT} CACHE PATH "Go Compiler")
+  else()
+    find_program(CMAKE_Go_COMPILER
+      NAMES go
+      PATHS ${Go_BIN_PATH}
+    )
+    EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION)
+    STRING(REGEX MATCH "go[0-9]+.[0-9]+.[0-9]+[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}")
+    message("-- The Golang compiler identification is ${VERSION}")
+    message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}")
+  endif()
+
+endif()
+
+mark_as_advanced(CMAKE_Go_COMPILER)
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/CMakeGoCompiler.cmake.in
+  ${CMAKE_PLATFORM_INFO_DIR}/CMakeGoCompiler.cmake @ONLY)
+
+set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER")
diff --git a/cmake/CMakeGoCompiler.cmake.in b/cmake/CMakeGoCompiler.cmake.in
new file mode 100644
index 0000000000..a71f08e064
--- /dev/null
+++ b/cmake/CMakeGoCompiler.cmake.in
@@ -0,0 +1,8 @@
+set(CMAKE_Go_COMPILER "@CMAKE_Go_COMPILER@")
+set(CMAKE_Go_COMPILER_LOADED 1)
+
+set(CMAKE_Go_SOURCE_FILE_EXTENSIONS go)
+set(CMAKE_Go_LINKER_PREFERENCE 40)
+set(CMAKE_Go_OUTPUT_EXTENSION .o)
+set(CMAKE_Go_OUTPUT_EXTENSION_REPLACE 1)
+set(CMAKE_Go_COMPILER_ENV_VAR "GO_COMPILER")
diff --git a/cmake/CMakeGoInformation.cmake b/cmake/CMakeGoInformation.cmake
new file mode 100644
index 0000000000..ba51ac93fc
--- /dev/null
+++ b/cmake/CMakeGoInformation.cmake
@@ -0,0 +1,7 @@
+if(NOT CMAKE_Go_COMPILE_OBJECT)
+  set(CMAKE_Go_COMPILE_OBJECT "go tool compile -l -N -o <OBJECT> <SOURCE> ")
+endif()
+
+if(NOT CMAKE_Go_LINK_EXECUTABLE)
+  set(CMAKE_Go_LINK_EXECUTABLE "go tool link -o <TARGET> <OBJECTS>  ")
+endif()
diff --git a/cmake/CMakeTestGoCompiler.cmake b/cmake/CMakeTestGoCompiler.cmake
new file mode 100644
index 0000000000..b9891b015b
--- /dev/null
+++ b/cmake/CMakeTestGoCompiler.cmake
@@ -0,0 +1 @@
+set(CMAKE_Go_COMPILER_WORKS 1 CACHE INTERNAL "")

From 4dcb9f1ca1995011b8a0c1f65663e756ff95e854 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 17 May 2017 20:44:29 +0800
Subject: [PATCH 18/56] add go_xxx to simplify cmake

---
 CMakeLists.txt      |  3 +-
 cmake/generic.cmake | 97 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 cmake/generic.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc85f83b94..79210d0436 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 
 include(system)
 
-project(paddle CXX C)
+project(paddle CXX C Go)
 
 find_package(Sphinx)
 if(NOT CMAKE_CROSSCOMPILING)
@@ -92,6 +92,7 @@ include(external/swig)      # download, build, install swig
 include(external/warpctc)   # download, build, install warpctc
 include(external/any)       # download libn::any
 
+include(generic)            # simplify cmake module
 include(package)            # set paddle packages
 include(cpplint)            # set paddle c++ style
 include(ccache)             # set ccache for compilation
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
new file mode 100644
index 0000000000..063a09b63e
--- /dev/null
+++ b/cmake/generic.cmake
@@ -0,0 +1,97 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# To simplify the build process of PaddlePaddle, we defined couple of
+# fundamental abstractions, e.g., how to build library, binary and
+# test in C++, CUDA and Go.
+# 
+# -------------------------------------------
+#    C++	      CUDA C++	      Go
+# -------------------------------------------
+# cc_library	 nv_library	  go_library
+# cc_binary  	 nv_binary	  go_binary
+# cc_test        nv_test	  go_test
+# -------------------------------------------
+#
+# cmake_parse_arguments can help us to achieve this goal.
+# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
+
+
+set(GOPATH "${CMAKE_CURRENT_BINARY_DIR}/go")
+file(MAKE_DIRECTORY ${GOPATH})
+
+# Because api.go defines a GO wrapper to ops and tensor, it depends on
+# both.  This implies that if any of tensor.{h,cc}, ops.{h,cu}, or
+# api.go is changed, api need to be re-built.
+# go_library(api
+#   SRCS
+#   api.go
+#   DEPS
+#   tensor # Because ops depend on tensor, this line is optional.
+#   ops)
+function(go_library TARGET_NAME)
+  set(options OPTIONAL)
+  set(oneValueArgs "")
+  set(multiValueArgs SRCS DEPS)
+  cmake_parse_arguments(go_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+  if (${go_library_OPTIONAL} STREQUAL "SHARED")
+    set(BUILD_MODE "-buildmode=c-shared")
+    if(APPLE)
+      set(LIB_NAME "lib${TARGET_NAME}.dylib")
+    else()
+      set(LIB_NAME "lib${TARGET_NAME}.so")
+    endif()  
+  else()
+    set(BUILD_MODE "-buildmode=c-archive")
+    set(LIB_NAME "lib${TARGET_NAME}.a")
+  endif()
+  add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
+    COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
+    -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
+    ${go_library_SRCS}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
+  add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS})
+  add_library(${TARGET_NAME} STATIC IMPORTED)
+  set_target_properties(${TARGET_NAME} PROPERTIES
+    IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME})
+endfunction(go_library)
+
+function(go_binary TARGET_NAME)
+  set(options OPTIONAL)
+  set(oneValueArgs "")
+  set(multiValueArgs SRCS DEPS)
+  cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+  add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
+    COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
+    -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
+    ${go_library_SRCS}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})  
+  add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_binary_DEPS})  
+  install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
+endfunction(go_binary)
+
+function(go_test TARGET_NAME)
+  set(options OPTIONAL)
+  set(oneValueArgs "")
+  set(multiValueArgs SRCS DEPS)
+  cmake_parse_arguments(go_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+  add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
+    COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} test
+    -c -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
+    ${go_test_SRCS}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})  
+  add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})  
+  add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
+endfunction(go_test)

From e254c7a799db1b309d87da9fce5bc1c0d0e5b718 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 17 May 2017 20:44:43 +0800
Subject: [PATCH 19/56] add go unit test

---
 paddle/CMakeLists.txt    |  4 ++++
 paddle/go/CMakeLists.txt | 13 +++++++++++++
 paddle/go/adder.go       | 10 ++++++++++
 paddle/go/cgo_test.cc    |  7 +++++++
 4 files changed, 34 insertions(+)
 create mode 100644 paddle/go/CMakeLists.txt
 create mode 100644 paddle/go/adder.go
 create mode 100644 paddle/go/cgo_test.cc

diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index 7699554909..694ebb9ba0 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -9,6 +9,10 @@ add_subdirectory(pserver)
 add_subdirectory(trainer)
 add_subdirectory(scripts)
 
+if(${CMAKE_Go_COMPILER})
+  add_subdirectory(go)
+endif()
+
 find_package(Boost QUIET)
 
 if(Boost_FOUND)
diff --git a/paddle/go/CMakeLists.txt b/paddle/go/CMakeLists.txt
new file mode 100644
index 0000000000..482c948a51
--- /dev/null
+++ b/paddle/go/CMakeLists.txt
@@ -0,0 +1,13 @@
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+go_library(adder SRCS adder.go)
+
+# cc_test(cgo_test
+#         SRCS
+#         cgo_test.cc
+#         DEPS
+#         adder)
+add_executable(cgo_test cgo_test.cc)
+add_dependencies(cgo_test adder)
+target_link_libraries(cgo_test ${GTEST_LIBRARIES} ${GTEST_MAIN_LIBRARIES} adder)
+add_test(cgo_test cgo_test)
diff --git a/paddle/go/adder.go b/paddle/go/adder.go
new file mode 100644
index 0000000000..e14f40fd9f
--- /dev/null
+++ b/paddle/go/adder.go
@@ -0,0 +1,10 @@
+package main
+
+import "C"
+
+//export GoAdder
+func GoAdder(x, y int) int {
+	return x + y
+}
+
+func main() {} // Required but ignored
diff --git a/paddle/go/cgo_test.cc b/paddle/go/cgo_test.cc
new file mode 100644
index 0000000000..8d89a92c08
--- /dev/null
+++ b/paddle/go/cgo_test.cc
@@ -0,0 +1,7 @@
+#include "libadder.h"
+#include <iostream>
+#include "gtest/gtest.h"
+
+TEST(Cgo, Invoke) {
+  EXPECT_EQ(GoAdder(30, 12), 42);
+}

From 4d5417b55d69df5268ff32d6d4dc8de0a4208f53 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 17 May 2017 20:57:30 +0800
Subject: [PATCH 20/56] modify travis

---
 .travis.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 387367a230..d1e5080f68 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,7 @@
-language: cpp
+language: go
+go:
+  - 1.8
+
 cache:
   directories:
     - $HOME/third_party

From 447145207608f2825e8d4dd0351a9167b0893a9f Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 17 May 2017 21:15:02 +0800
Subject: [PATCH 21/56] update travis

---
 .travis.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d1e5080f68..387367a230 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,4 @@
-language: go
-go:
-  - 1.8
-
+language: cpp
 cache:
   directories:
     - $HOME/third_party

From af065196400ca0254e61df3916888f4341306e97 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Wed, 17 May 2017 21:21:45 +0800
Subject: [PATCH 22/56] clang-format test.cc

---
 paddle/go/cgo_test.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/paddle/go/cgo_test.cc b/paddle/go/cgo_test.cc
index 8d89a92c08..64efa606ff 100644
--- a/paddle/go/cgo_test.cc
+++ b/paddle/go/cgo_test.cc
@@ -1,7 +1,5 @@
-#include "libadder.h"
 #include <iostream>
 #include "gtest/gtest.h"
+#include "libadder.h"
 
-TEST(Cgo, Invoke) {
-  EXPECT_EQ(GoAdder(30, 12), 42);
-}
+TEST(Cgo, Invoke) { EXPECT_EQ(GoAdder(30, 12), 42); }

From a7edafc4bf653c05444939c5fd6dc5482f6a51cb Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 18 May 2017 00:34:43 +0800
Subject: [PATCH 23/56] add cc_test

---
 cmake/CMakeDetermineGoCompiler.cmake | 10 ++++++----
 cmake/generic.cmake                  |  5 +++--
 paddle/CMakeLists.txt                |  2 +-
 paddle/go/CMakeLists.txt             | 14 +++++---------
 4 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/cmake/CMakeDetermineGoCompiler.cmake b/cmake/CMakeDetermineGoCompiler.cmake
index b3f8fbe271..9196880c0e 100644
--- a/cmake/CMakeDetermineGoCompiler.cmake
+++ b/cmake/CMakeDetermineGoCompiler.cmake
@@ -28,10 +28,12 @@ if(NOT CMAKE_Go_COMPILER)
       NAMES go
       PATHS ${Go_BIN_PATH}
     )
-    EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION)
-    STRING(REGEX MATCH "go[0-9]+.[0-9]+.[0-9]+[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}")
-    message("-- The Golang compiler identification is ${VERSION}")
-    message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}")
+    if(CMAKE_Go_COMPILER)
+      EXEC_PROGRAM(${CMAKE_Go_COMPILER} ARGS version OUTPUT_VARIABLE GOLANG_VERSION)
+      STRING(REGEX MATCH "go[0-9]+[.0-9]*[ /A-Za-z0-9]*" VERSION "${GOLANG_VERSION}")
+      message("-- The Golang compiler identification is ${VERSION}")
+      message("-- Check for working Golang compiler: ${CMAKE_Go_COMPILER}")
+    endif()
   endif()
 
 endif()
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 555faff499..e4c1e2b41a 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -163,8 +163,9 @@ function(go_library TARGET_NAME)
     WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR})
   add_custom_target(${TARGET_NAME}_lib ALL DEPENDS ${TARGET_NAME}_timestamp ${go_library_DEPS})
   add_library(${TARGET_NAME} STATIC IMPORTED)
-  set_target_properties(${TARGET_NAME} PROPERTIES
-    IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME})
+  set_property(TARGET ${TARGET_NAME} PROPERTY
+    IMPORTED_LOCATION "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}")
+  add_dependencies(${TARGET_NAME} ${TARGET_NAME}_lib)
 endfunction(go_library)
 
 function(go_binary TARGET_NAME)
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index 694ebb9ba0..cf31b4a342 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -9,7 +9,7 @@ add_subdirectory(pserver)
 add_subdirectory(trainer)
 add_subdirectory(scripts)
 
-if(${CMAKE_Go_COMPILER})
+if(CMAKE_Go_COMPILER)
   add_subdirectory(go)
 endif()
 
diff --git a/paddle/go/CMakeLists.txt b/paddle/go/CMakeLists.txt
index 482c948a51..20f1476943 100644
--- a/paddle/go/CMakeLists.txt
+++ b/paddle/go/CMakeLists.txt
@@ -2,12 +2,8 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
 go_library(adder SRCS adder.go)
 
-# cc_test(cgo_test
-#         SRCS
-#         cgo_test.cc
-#         DEPS
-#         adder)
-add_executable(cgo_test cgo_test.cc)
-add_dependencies(cgo_test adder)
-target_link_libraries(cgo_test ${GTEST_LIBRARIES} ${GTEST_MAIN_LIBRARIES} adder)
-add_test(cgo_test cgo_test)
+cc_test(cgo_test
+        SRCS
+        cgo_test.cc
+        DEPS
+        adder)

From 589cea1f920ae2aae89b315ef24f3da9875e5e63 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 18 May 2017 00:38:11 +0800
Subject: [PATCH 24/56] add $ENV{GOROOT}/bin

---
 cmake/CMakeDetermineGoCompiler.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/CMakeDetermineGoCompiler.cmake b/cmake/CMakeDetermineGoCompiler.cmake
index 9196880c0e..abf0a00c5e 100644
--- a/cmake/CMakeDetermineGoCompiler.cmake
+++ b/cmake/CMakeDetermineGoCompiler.cmake
@@ -15,7 +15,7 @@ if(NOT CMAKE_Go_COMPILER)
   set(Go_BIN_PATH
     $ENV{GOPATH}
     $ENV{GOROOT}
-    $ENV{GOROOT}/../bin
+    $ENV{GOROOT}/bin
     $ENV{GO_COMPILER}
     /usr/bin
     /usr/local/bin

From 071c65f9cc4d3db168397af9ce4f0bbf5d752552 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 18 May 2017 00:46:11 +0800
Subject: [PATCH 25/56] add go extern

---
 cmake/generic.cmake | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index e4c1e2b41a..90ec9532e5 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -195,3 +195,10 @@ function(go_test TARGET_NAME)
   add_custom_target(${TARGET_NAME} ALL DEPENDS ${TARGET_NAME}_timestamp ${go_test_DEPS})  
   add_test(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME})
 endfunction(go_test)
+
+# go_extern will download extern go project.
+# go_extern(target_name extern_source)
+# go_extern(go_redis github.com/hoisie/redis)
+function(go_extern TARGET_NAME)
+  add_custom_target(${TARGET_NAME} env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get ${ARGN})
+endfunction(go_extern)

From 4f837b9f382814c883fa9c0fa9b9003f5db6e094 Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Thu, 18 May 2017 09:38:12 +0800
Subject: [PATCH 26/56] add ${CMAKE_THREAD_LIBS_INIT}

---
 cmake/generic.cmake | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index 90ec9532e5..89bf1ef1ec 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -58,7 +58,7 @@ function(cc_binary TARGET_NAME)
   cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
   add_executable(${TARGET_NAME} ${cc_binary_SRCS})
   add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
-  target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
+  target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS} ${CMAKE_THREAD_LIBS_INIT})
 endfunction(cc_binary)
 
 # The dependency to target tensor implies that if any of
@@ -75,7 +75,11 @@ function(cc_test TARGET_NAME)
   cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
   add_executable(${TARGET_NAME} ${cc_test_SRCS})
   add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies})
-  target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
+  target_link_libraries(${TARGET_NAME}
+    ${cc_test_DEPS}
+    ${GTEST_MAIN_LIBRARIES}
+    ${GTEST_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT})
   add_test(${TARGET_NAME} ${TARGET_NAME})
 endfunction(cc_test)
 
@@ -107,7 +111,7 @@ function(nv_binary TARGET_NAME)
   cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
   cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
   add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies})
-  target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
+  target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS} ${CMAKE_THREAD_LIBS_INIT})
 endfunction(nv_binary)
 
 # The dependency to target tensor implies that if any of
@@ -124,7 +128,11 @@ function(nv_test TARGET_NAME)
   cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
   cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
   add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies})
-  target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
+  target_link_libraries(${TARGET_NAME}
+    ${nv_test_DEPS}
+    ${GTEST_MAIN_LIBRARIES}
+    ${GTEST_LIBRARIES}
+    ${CMAKE_THREAD_LIBS_INIT})
   add_test(${TARGET_NAME} ${TARGET_NAME})
 endfunction(nv_test)
 

From 44a022b0c7a5bedddb91cac5da2485b8e773c84c Mon Sep 17 00:00:00 2001
From: Liu Yiqun <liuyiqun01@baidu.com>
Date: Thu, 18 May 2017 13:57:40 +0800
Subject: [PATCH 27/56] Use apt to install cmake.

---
 Dockerfile.android | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/Dockerfile.android b/Dockerfile.android
index 1334799ed2..fa24f6f06c 100644
--- a/Dockerfile.android
+++ b/Dockerfile.android
@@ -9,25 +9,21 @@ ENV HOME=/root \
     ANDROID_STANDALONE_TOOLCHAIN=/opt/android-toolchain-gcc
 
 RUN apt-get update && \
-    apt-get install -y git python-dev python-pip python-numpy && \
-    apt-get install -y wget curl tar unzip && \
-    apt-get install -y gcc g++ locales swig && \
+    apt-get install -y \
+    git python-dev python-pip python-numpy \
+    wget curl tar unzip gcc g++ locales clang-format-3.8 swig cmake && \
     apt-get clean -y
 
-RUN pip install --upgrade pip && \
-    pip install -U 'protobuf==3.1.0' && \
-    pip install -U wheel sphinx && \
-    pip install pre-commit
-
 # git credential to skip password typing
 RUN git config --global credential.helper store
 
 # Fix locales to en_US.UTF-8
 RUN localedef -i en_US -f UTF-8 en_US.UTF-8
 
-RUN curl -sSL https://cmake.org/files/v3.2/cmake-3.2.2.tar.gz | tar -xz && \
-    cd cmake-3.2.2 && ./bootstrap && make -j `nproc` && make install && \
-    cd .. && rm -rf cmake-3.2.2
+RUN pip install --upgrade pip && \
+    pip install -U 'protobuf==3.1.0' && \
+    pip install -U wheel sphinx && \
+    pip install pre-commit
 
 # Android NDK
 RUN mkdir /opt/android-ndk-tmp && \

From dfc27aaadc4a9f0929a2b4639f7b9d37cb2080cf Mon Sep 17 00:00:00 2001
From: yangyaming <yangyaming@baidu.com>
Date: Fri, 19 May 2017 11:21:11 +0800
Subject: [PATCH 28/56] fix document formation bugs.

---
 .../trainer_config_helpers/evaluators.py      | 72 ++++++++++---------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py
index 6900133fde..a5234f3e47 100644
--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -350,20 +350,23 @@ def chunk_evaluator(
     sequence. It calculates precision, recall and F1 scores for the chunk detection.
 
     To use chunk evaluator, several concepts need to be clarified firstly.
-    Chunk type is the type of the whole chunk and a chunk consists of one or several words.  (For example in NER, ORG for organization name, PER for person name etc.)
-    Tag indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single)
+
+    * **Chunk type** is the type of the whole chunk and a chunk consists of one or several words.  (For example in NER, ORG for organization name, PER for person name etc.)
+
+    * **Tag type** indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single)
     We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name)
 
-    The construction of label dict should obey the following rules:
-    (1) Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.
+    The construction of label dictionary should obey the following rules:
 
-    .. code-block:: python
-     Scheme    Description                                                                                  
-      plain    Use the same label for the whole chunk.
-      IOB      Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. 
-      IOE      Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside.
-      IOBES    Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. 
-    .. code-block:: python
+    - Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.
+
+    .. code-block:: text
+
+        Scheme    Description                                                                                  
+        plain    Use the same label for the whole chunk.
+        IOB      Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. 
+        IOE      Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside.
+        IOBES    Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. 
    
     To make it clear, let's illustrate by an NER example.
     Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here,
@@ -372,42 +375,42 @@ def chunk_evaluator(
     Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I.
     Of course, the training data should be labeled accordingly.
 
-    (2) Mapping is done correctly by the listed equations and assigning protocol.
+    - Mapping is done correctly by the listed equations and assigning protocol.
 
     The following table are equations to extract tag type and chunk type from a label.
 
-    .. code-block:: python
-    tagType = label % numTagType
-    chunkType = label / numTagType
-    otherChunkType = numChunkTypes
-    .. code-block:: python
+    .. code-block:: text
+
+        tagType = label % numTagType
+        chunkType = label / numTagType
+        otherChunkType = numChunkTypes
     
     The following table shows the mapping rule between tagType and tag type in each scheme.
 
-    .. code-block:: python
-     Scheme Begin Inside End   Single
-      plain  0     -      -     -
-      IOB    0     1      -     -
-      IOE    -     0      1     -
-      IOBES  0     1      2     3
-    .. code-block:: python
+    .. code-block:: text
+
+        Scheme Begin Inside End   Single
+        plain  0     -      -     -
+        IOB    0     1      -     -
+        IOE    -     0      1     -
+        IOBES  0     1      2     3
 
     Continue the NER example, and the label dict should look like this to satify above equations:
 
-    .. code-block:: python
-      B-ORG  0
-      I-ORG  1
-      B-PER  2
-      I-PER  3
-      B-LOC  4
-      I-LOC  5
-      O      6
-    .. code-block:: python
+    .. code-block:: text
+
+        B-ORG  0
+        I-ORG  1
+        B-PER  2
+        I-PER  3
+        B-LOC  4
+        I-LOC  5
+        O      6
 
     In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is
     "IOB" so tagType has two values: 0 for B and 1 for I. 
     Here we will use I-LOC to explain the above mapping rules in detail.
-    For I-LOC, the label id is 5, so we can get tagType=1 and ChunkType=2, which means I-LOC is a part of NER chunk LOC
+    For I-LOC, the label id is 5, so we can get tagType=1 and chunkType=2, which means I-LOC is a part of NER chunk LOC
     and the tag is I.
 
     The simple usage is:
@@ -416,7 +419,6 @@ def chunk_evaluator(
 
        eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types)
 
-    .. code-block:: python
     
     :param input: The input layers.
     :type input: LayerOutput

From 4d617d2458403f626e61e37b1e89cf82877f32ec Mon Sep 17 00:00:00 2001
From: hedaoyuan <hedaoyuan@github.com>
Date: Fri, 19 May 2017 16:26:42 +0800
Subject: [PATCH 29/56] Add a error message.

---
 paddle/math/TensorEvaluate.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/paddle/math/TensorEvaluate.h b/paddle/math/TensorEvaluate.h
index 9de2099b85..687bad3711 100644
--- a/paddle/math/TensorEvaluate.h
+++ b/paddle/math/TensorEvaluate.h
@@ -103,7 +103,10 @@ inline void TensorGpuApply(LeftType& lhs, const RightType& rhs) {
 }
 #else
 template <class T, typename LeftType, typename RightType>
-inline void TensorGpuApply(LeftType& lhs, RightType& rhs) {}
+inline void TensorGpuApply(LeftType& lhs, RightType& rhs) {
+  LOG(FATAL) << "Since it is gcc compiled, "
+                "this calculation does not support GPU implementation.";
+}
 #endif
 
 }  // namespace paddle

From 50d0e26746e1d9ced00449238043bbe36dc5843d Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Sat, 20 May 2017 09:06:17 +0800
Subject: [PATCH 30/56] pass travis ci

---
 cmake/generic.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index d73ab176f2..efc49b8fd3 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -27,6 +27,7 @@
 #
 # cmake_parse_arguments can help us to achieve this goal.
 # https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
+#
 
 # cc_library parses tensor.cc and figures out that target also depend on tensor.h.
 # cc_library(tensor

From 55217c962d127e72ee88e042d2dd95cfe7375a65 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Tue, 16 May 2017 18:41:01 -0400
Subject: [PATCH 31/56] Implement Pserver RPC, gradient update logic, cgo part

---
 paddle/go/cmd/pserver/.gitignore  |   1 +
 paddle/go/cmd/pserver/pserver.go  |  33 ++++++
 paddle/go/pserver/optimizer.c     |  22 ++++
 paddle/go/pserver/optimizer.go    |  51 +++++++++
 paddle/go/pserver/optimizer.h     |  19 ++++
 paddle/go/pserver/service.go      | 165 ++++++++++++++++++++++++++++++
 paddle/go/pserver/service_test.go | 154 ++++++++++++++++++++++++++++
 7 files changed, 445 insertions(+)
 create mode 100644 paddle/go/cmd/pserver/.gitignore
 create mode 100644 paddle/go/cmd/pserver/pserver.go
 create mode 100644 paddle/go/pserver/optimizer.c
 create mode 100644 paddle/go/pserver/optimizer.go
 create mode 100644 paddle/go/pserver/optimizer.h
 create mode 100644 paddle/go/pserver/service.go
 create mode 100644 paddle/go/pserver/service_test.go

diff --git a/paddle/go/cmd/pserver/.gitignore b/paddle/go/cmd/pserver/.gitignore
new file mode 100644
index 0000000000..fffd9adc4f
--- /dev/null
+++ b/paddle/go/cmd/pserver/.gitignore
@@ -0,0 +1 @@
+pserver
diff --git a/paddle/go/cmd/pserver/pserver.go b/paddle/go/cmd/pserver/pserver.go
new file mode 100644
index 0000000000..41417875fb
--- /dev/null
+++ b/paddle/go/cmd/pserver/pserver.go
@@ -0,0 +1,33 @@
+package main
+
+import (
+	"flag"
+	"net"
+	"net/http"
+	"net/rpc"
+	"strconv"
+
+	"github.com/PaddlePaddle/Paddle/paddle/go/pserver"
+)
+
+func main() {
+	port := flag.Int("p", 0, "port of the pserver")
+	flag.Parse()
+
+	s := pserver.NewService()
+	err := rpc.Register(s)
+	if err != nil {
+		panic(err)
+	}
+
+	rpc.HandleHTTP()
+	l, err := net.Listen("tcp", ":"+strconv.Itoa(*port))
+	if err != nil {
+		panic(err)
+	}
+
+	err = http.Serve(l, nil)
+	if err != nil {
+		panic(err)
+	}
+}
diff --git a/paddle/go/pserver/optimizer.c b/paddle/go/pserver/optimizer.c
new file mode 100644
index 0000000000..d83409297b
--- /dev/null
+++ b/paddle/go/pserver/optimizer.c
@@ -0,0 +1,22 @@
+#include <stdlib.h>
+
+#include "optimizer.h"
+
+typedef struct {
+  double learning_rate;
+} SGD_optimizer;
+
+paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) {
+  SGD_optimizer* o = (SGD_optimizer*)malloc(sizeof(SGD_optimizer));
+  o->learning_rate = learning_rate;
+  return (paddle_optimizer*)o;
+}
+
+void paddle_release_optimizer(paddle_optimizer* o) {
+  free(o);
+}
+
+int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type datatype, const void* gradient, int num_bytes) {
+  // TODO
+  return 0;
+}
diff --git a/paddle/go/pserver/optimizer.go b/paddle/go/pserver/optimizer.go
new file mode 100644
index 0000000000..aa02bed3e0
--- /dev/null
+++ b/paddle/go/pserver/optimizer.go
@@ -0,0 +1,51 @@
+package pserver
+
+/*
+#include "optimizer.h"
+*/
+import "C"
+import (
+	"fmt"
+	"unsafe"
+)
+
+type optimizerType int
+
+const (
+	sgd optimizerType = iota
+)
+
+var nullPtr = unsafe.Pointer(uintptr(0))
+
+type optimizer struct {
+	opt *C.paddle_optimizer
+}
+
+func newOptimizer(t optimizerType, learning_rate float64) *optimizer {
+	o := &optimizer{}
+	o.opt = C.paddle_create_SGD_optimizer(C.double(learning_rate))
+	return o
+}
+
+func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error {
+	if len(p.Content) != len(g.Content) {
+		return fmt.Errorf("parameter and gradient length not match, parameter: %d, gradient: %d", len(p.Content), len(g.Content))
+	}
+
+	if p.ElementType != g.ElementType {
+		return fmt.Errorf("parameter and gradient element type not match, parameter: %v, gradient: %v", p.ElementType, g.ElementType)
+	}
+
+	r := C.paddle_update_parameter(o.opt, unsafe.Pointer(&p.Content[0]), C.paddle_element_type(p.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content)))
+	if r != 0 {
+		return fmt.Errorf("optimier returned error code: %d", r)
+	}
+	return nil
+}
+
+func (o *optimizer) Cleanup() {
+	if unsafe.Pointer(o.opt) != nullPtr {
+		C.paddle_release_optimizer(o.opt)
+		o.opt = (*C.paddle_optimizer)(nullPtr)
+	}
+}
diff --git a/paddle/go/pserver/optimizer.h b/paddle/go/pserver/optimizer.h
new file mode 100644
index 0000000000..e1750ca608
--- /dev/null
+++ b/paddle/go/pserver/optimizer.h
@@ -0,0 +1,19 @@
+#ifndef PADDLE_PSERVER_OPTIMIZER_H
+#define PADDLE_PSERVER_OPTIMIZER_H
+
+typedef enum {
+  PADDLE_ELEMENT_TYPE_INT32   = 0,
+  PADDLE_ELEMENT_TYPE_UINT32  = 1,
+  PADDLE_ELEMENT_TYPE_INT64   = 2,
+  PADDLE_ELEMENT_TYPE_UINT64  = 3,
+  PADDLE_ELEMENT_TYPE_FLOAT32 = 4,
+  PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
+} paddle_element_type;
+
+typedef struct paddle_optimizer paddle_optimizer;
+
+paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate);
+void paddle_release_optimizer(paddle_optimizer* o);
+int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type datatype, const void* gradient, int num_bytes);
+
+#endif /* PADDLE_PSERVER_OPTIMIZER_H */
diff --git a/paddle/go/pserver/service.go b/paddle/go/pserver/service.go
new file mode 100644
index 0000000000..0d10da9880
--- /dev/null
+++ b/paddle/go/pserver/service.go
@@ -0,0 +1,165 @@
+package pserver
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+)
+
+// ElementType is the type of elements of a Parameter.
+type ElementType int
+
+var ErrUnintialized = errors.New("pserver not initialized")
+var ErrAlreadyIntialized = errors.New("pserver already initialized")
+
+// Supported element types
+const (
+	Int32 ElementType = iota
+	UInt32
+	Int64
+	UInt64
+	Float32
+	Float64
+)
+
+// Parameter is a piece of data to sync with the parameter server.
+type Parameter struct {
+	Name        string
+	ElementType ElementType
+	Content     []byte
+}
+
+// ParameterWithConfig contains the parameter and the configuration.
+type ParameterWithConfig struct {
+	Param  Parameter
+	Config []byte // parameter configuration in Proto Buffer format
+}
+
+// Gradient is the gradient of the parameter.
+type Gradient Parameter
+
+type Service struct {
+	initialized chan struct{}
+
+	mu       sync.Mutex
+	opt      *optimizer
+	paramMap map[string]Parameter
+}
+
+func NewService() *Service {
+	s := &Service{}
+	s.paramMap = make(map[string]Parameter)
+	s.initialized = make(chan struct{})
+	return s
+}
+
+func (s *Service) BeginInitParams(config []byte, dummy *int) error {
+	select {
+	case <-s.initialized:
+		return ErrAlreadyIntialized
+	default:
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	if s.opt != nil {
+		s.opt.Cleanup()
+	}
+
+	// TODO(helin): parse learning rate from config
+	s.opt = newOptimizer(sgd, 0.01)
+	return nil
+}
+
+func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error {
+	select {
+	case <-s.initialized:
+		return ErrAlreadyIntialized
+	default:
+	}
+
+	// TODO(helin): parse parameter config
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	// TODO(helin): check if paramWithConfigs.Param.Content is
+	// properly memory aligned, if not, make copy to a memory
+	// aligned region.
+	s.paramMap[paramWithConfigs.Param.Name] = paramWithConfigs.Param
+	return nil
+}
+
+func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
+	select {
+	case <-s.initialized:
+		return ErrAlreadyIntialized
+	default:
+	}
+
+	close(s.initialized)
+	return nil
+}
+
+func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
+	select {
+	case <-s.initialized:
+	default:
+		return ErrUnintialized
+	}
+
+	s.mu.Lock()
+	s.mu.Unlock()
+
+	for _, g := range grads {
+		if _, ok := s.paramMap[g.Name]; !ok {
+			return fmt.Errorf("parameter: %s does not exist", g.Name)
+		}
+	}
+
+	var wg sync.WaitGroup
+	for _, g := range grads {
+		wg.Add(1)
+		go func(p Parameter, g Gradient) {
+			s.opt.UpdateParameter(p, g)
+			wg.Done()
+		}(s.paramMap[g.Name], g)
+	}
+
+	wg.Wait()
+	return nil
+}
+
+func (s *Service) GetParams(names []string, parameters *[]Parameter) error {
+	<-s.initialized
+	s.mu.Lock()
+	s.mu.Unlock()
+
+	for _, n := range names {
+		if _, ok := s.paramMap[n]; !ok {
+			return fmt.Errorf("parameter: %s does not exist", n)
+		}
+	}
+
+	*parameters = make([]Parameter, len(names))
+	for i, n := range names {
+		// The parameter content (a byte slice) may change
+		// during RPC serialization due to write from other
+		// goroutine, we allow it since mini-batch based deep
+		// learning optimization methods are stochastic in
+		// nature. This race condition is allowed deliberately
+		// to save the program from making a copy of the
+		// paramter content.
+		(*parameters)[i] = s.paramMap[n]
+	}
+
+	return nil
+}
+
+func (s *Service) SaveModel(path string, dummy *int) error {
+	<-s.initialized
+
+	// TODO
+	return nil
+}
diff --git a/paddle/go/pserver/service_test.go b/paddle/go/pserver/service_test.go
new file mode 100644
index 0000000000..ebeff1fb89
--- /dev/null
+++ b/paddle/go/pserver/service_test.go
@@ -0,0 +1,154 @@
+package pserver_test
+
+import (
+	"reflect"
+	"sync"
+	"testing"
+
+	"github.com/PaddlePaddle/Paddle/paddle/go/pserver"
+)
+
+func TestFull(t *testing.T) {
+	s := pserver.NewService()
+	var dummy int
+	err := s.BeginInitParams(nil, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	var p pserver.Parameter
+	p.Name = "param_a"
+	p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0}
+	p.ElementType = pserver.Int32
+	err = s.InitParam(pserver.ParameterWithConfig{p, nil}, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	var p1 pserver.Parameter
+	p1.Name = "param_b"
+	p1.Content = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+	p1.ElementType = pserver.Float32
+	err = s.InitParam(pserver.ParameterWithConfig{p1, nil}, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	err = s.FinishInitParams(0, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	var params []pserver.Parameter
+	err = s.GetParams([]string{"param_b", "param_a"}, &params)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if len(params) != 2 || !reflect.DeepEqual(params[0], p1) || !reflect.DeepEqual(params[0], p1) {
+		t.FailNow()
+	}
+
+	grads := []pserver.Gradient{pserver.Gradient(p1), pserver.Gradient(p)}
+	err = s.SendGrads(grads, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	var params1 []pserver.Parameter
+	err = s.GetParams([]string{"param_b", "param_a"}, &params1)
+	if err != nil {
+		t.FailNow()
+	}
+
+	if len(params) != 2 {
+		t.FailNow()
+	}
+
+	// we don't care the content, since it's already optimized with gradient
+	params1[0].Content = nil
+	params1[0].Content = nil
+	p.Content = nil
+	p1.Content = nil
+
+	if !reflect.DeepEqual(params1[0], p1) || !reflect.DeepEqual(params1[0], p1) {
+		t.FailNow()
+	}
+}
+
+func TestMultipleInit(t *testing.T) {
+	s := pserver.NewService()
+	var dummy int
+	err := s.BeginInitParams(nil, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	// this is fine, it's possible for client to call init
+	// multiple times.
+	err = s.BeginInitParams(nil, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	err = s.FinishInitParams(0, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	err = s.FinishInitParams(0, &dummy)
+	if err != pserver.ErrAlreadyIntialized {
+		t.FailNow()
+	}
+
+	err = s.BeginInitParams(nil, &dummy)
+	if err != pserver.ErrAlreadyIntialized {
+		t.FailNow()
+	}
+}
+
+func TestUninitialized(t *testing.T) {
+	s := pserver.NewService()
+	var dummy int
+	err := s.SendGrads(nil, &dummy)
+	if err != pserver.ErrUnintialized {
+		t.FailNow()
+	}
+}
+
+func TestBlockUntilInitialized(t *testing.T) {
+	s := pserver.NewService()
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		var params []pserver.Parameter
+		err := s.GetParams(nil, &params)
+		if err != nil {
+			t.FailNow()
+		}
+		wg.Done()
+	}()
+
+	wg.Add(1)
+	go func() {
+		var dummy int
+		err := s.SaveModel("", &dummy)
+		if err != nil {
+			t.FailNow()
+		}
+		wg.Done()
+	}()
+
+	var dummy int
+	err := s.BeginInitParams(nil, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	err = s.FinishInitParams(0, &dummy)
+	if err != nil {
+		t.FailNow()
+	}
+
+	wg.Wait()
+}

From 6ee5bc81c021e063369d1e9ba9333d534219a2cb Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 19:51:36 -0400
Subject: [PATCH 32/56] use function pointer for updater dispatching

---
 paddle/go/pserver/optimizer.c  | 32 +++++++++++++++++++++++---------
 paddle/go/pserver/optimizer.go |  4 ++--
 paddle/go/pserver/optimizer.h  |  9 ++++-----
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/paddle/go/pserver/optimizer.c b/paddle/go/pserver/optimizer.c
index d83409297b..123684970f 100644
--- a/paddle/go/pserver/optimizer.c
+++ b/paddle/go/pserver/optimizer.c
@@ -2,21 +2,35 @@
 
 #include "optimizer.h"
 
-typedef struct {
-  double learning_rate;
-} SGD_optimizer;
+typedef int (*update_func)(void*, void *, paddle_element_type, const void*, int);
 
-paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) {
-  SGD_optimizer* o = (SGD_optimizer*)malloc(sizeof(SGD_optimizer));
-  o->learning_rate = learning_rate;
-  return (paddle_optimizer*)o;
-}
+typedef struct paddle_optimizer{
+  update_func func;
+  void* optimizer;
+} paddle_optimizer;
 
 void paddle_release_optimizer(paddle_optimizer* o) {
   free(o);
 }
 
-int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type datatype, const void* gradient, int num_bytes) {
+int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
+  return o->func(o->optimizer, buffer, element_type, gradient, num_bytes);
+}
+
+typedef struct {
+  double learning_rate;
+} SGD_optimizer;
+
+int paddle_SGD_update_parameter(void* optimizer, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
   // TODO
   return 0;
 }
+
+paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) {
+  SGD_optimizer* o = (SGD_optimizer*)malloc(sizeof(SGD_optimizer));
+  o->learning_rate = learning_rate;
+  paddle_optimizer* container = (paddle_optimizer*)malloc(sizeof(paddle_optimizer));
+  container->func = paddle_SGD_update_parameter;
+  container->optimizer = o;
+  return container;
+}
diff --git a/paddle/go/pserver/optimizer.go b/paddle/go/pserver/optimizer.go
index aa02bed3e0..8c6450bca0 100644
--- a/paddle/go/pserver/optimizer.go
+++ b/paddle/go/pserver/optimizer.go
@@ -18,7 +18,7 @@ const (
 var nullPtr = unsafe.Pointer(uintptr(0))
 
 type optimizer struct {
-	opt *C.paddle_optimizer
+	opt *C.struct_paddle_optimizer
 }
 
 func newOptimizer(t optimizerType, learning_rate float64) *optimizer {
@@ -46,6 +46,6 @@ func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error {
 func (o *optimizer) Cleanup() {
 	if unsafe.Pointer(o.opt) != nullPtr {
 		C.paddle_release_optimizer(o.opt)
-		o.opt = (*C.paddle_optimizer)(nullPtr)
+		o.opt = (*C.struct_paddle_optimizer)(nullPtr)
 	}
 }
diff --git a/paddle/go/pserver/optimizer.h b/paddle/go/pserver/optimizer.h
index e1750ca608..cde8da70cc 100644
--- a/paddle/go/pserver/optimizer.h
+++ b/paddle/go/pserver/optimizer.h
@@ -10,10 +10,9 @@ typedef enum {
   PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
 } paddle_element_type;
 
-typedef struct paddle_optimizer paddle_optimizer;
-
-paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate);
-void paddle_release_optimizer(paddle_optimizer* o);
-int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type datatype, const void* gradient, int num_bytes);
+struct paddle_optimizer;
+struct paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate);
+void paddle_release_optimizer(struct paddle_optimizer* o);
+int paddle_update_parameter(struct paddle_optimizer* o, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes);
 
 #endif /* PADDLE_PSERVER_OPTIMIZER_H */

From bd2469f21c663c192a7ab73b2b2fdfafffdf5edb Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 20:01:29 -0400
Subject: [PATCH 33/56] correct optimizer release, add test

---
 paddle/go/pserver/optimizer.c       | 18 ++++++++++++++----
 paddle/go/pserver/optimizer_test.go |  8 ++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 paddle/go/pserver/optimizer_test.go

diff --git a/paddle/go/pserver/optimizer.c b/paddle/go/pserver/optimizer.c
index 123684970f..36a612a56f 100644
--- a/paddle/go/pserver/optimizer.c
+++ b/paddle/go/pserver/optimizer.c
@@ -3,34 +3,44 @@
 #include "optimizer.h"
 
 typedef int (*update_func)(void*, void *, paddle_element_type, const void*, int);
+typedef void (*release_func)(void*);
 
 typedef struct paddle_optimizer{
-  update_func func;
+  update_func update;
+  release_func release;
   void* optimizer;
 } paddle_optimizer;
 
 void paddle_release_optimizer(paddle_optimizer* o) {
+  o->release(o->optimizer);
   free(o);
 }
 
 int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
-  return o->func(o->optimizer, buffer, element_type, gradient, num_bytes);
+  return o->update(o->optimizer, buffer, element_type, gradient, num_bytes);
 }
 
 typedef struct {
   double learning_rate;
 } SGD_optimizer;
 
-int paddle_SGD_update_parameter(void* optimizer, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
+int update_SGD(void* optimizer, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
+  SGD_optimizer* o = (SGD_optimizer*)optimizer;
   // TODO
   return 0;
 }
 
+void release_SGD(void *optimizer) {
+    SGD_optimizer* o = (SGD_optimizer*)optimizer;
+    // nothing allocated on heap
+}
+
 paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) {
   SGD_optimizer* o = (SGD_optimizer*)malloc(sizeof(SGD_optimizer));
   o->learning_rate = learning_rate;
   paddle_optimizer* container = (paddle_optimizer*)malloc(sizeof(paddle_optimizer));
-  container->func = paddle_SGD_update_parameter;
+  container->update = update_SGD;
+  container->release = release_SGD;
   container->optimizer = o;
   return container;
 }
diff --git a/paddle/go/pserver/optimizer_test.go b/paddle/go/pserver/optimizer_test.go
new file mode 100644
index 0000000000..64d6d092aa
--- /dev/null
+++ b/paddle/go/pserver/optimizer_test.go
@@ -0,0 +1,8 @@
+package pserver
+
+import "testing"
+
+func TestSGDCreateRelease(t *testing.T) {
+	o := newOptimizer(sgd, 1)
+	o.Cleanup()
+}

From 4808e22e04c2448f6c65933197716a9bbb037766 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 20:15:50 -0400
Subject: [PATCH 34/56] fix typo

---
 paddle/go/pserver/optimizer.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paddle/go/pserver/optimizer.go b/paddle/go/pserver/optimizer.go
index 8c6450bca0..64bdefe660 100644
--- a/paddle/go/pserver/optimizer.go
+++ b/paddle/go/pserver/optimizer.go
@@ -38,7 +38,7 @@ func (o *optimizer) UpdateParameter(p Parameter, g Gradient) error {
 
 	r := C.paddle_update_parameter(o.opt, unsafe.Pointer(&p.Content[0]), C.paddle_element_type(p.ElementType), unsafe.Pointer(&g.Content[0]), C.int(len(g.Content)))
 	if r != 0 {
-		return fmt.Errorf("optimier returned error code: %d", r)
+		return fmt.Errorf("optimizer update returned error code: %d", r)
 	}
 	return nil
 }

From bc33f9b165d15254a434b3175f465dd2e4e7f70f Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 20:19:14 -0400
Subject: [PATCH 35/56] fix bug lock is released too soon

---
 paddle/go/pserver/service.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddle/go/pserver/service.go b/paddle/go/pserver/service.go
index 0d10da9880..22f6cdf40d 100644
--- a/paddle/go/pserver/service.go
+++ b/paddle/go/pserver/service.go
@@ -110,7 +110,7 @@ func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
 	}
 
 	s.mu.Lock()
-	s.mu.Unlock()
+	defer s.mu.Unlock()
 
 	for _, g := range grads {
 		if _, ok := s.paramMap[g.Name]; !ok {
@@ -134,7 +134,7 @@ func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
 func (s *Service) GetParams(names []string, parameters *[]Parameter) error {
 	<-s.initialized
 	s.mu.Lock()
-	s.mu.Unlock()
+	defer s.mu.Unlock()
 
 	for _, n := range names {
 		if _, ok := s.paramMap[n]; !ok {

From e39e14d1572be690a513dff435b639221c35311d Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 20:25:47 -0400
Subject: [PATCH 36/56] handle error from s.opt.UpdateParameter

---
 paddle/go/pserver/service.go | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/paddle/go/pserver/service.go b/paddle/go/pserver/service.go
index 22f6cdf40d..47a862c5ad 100644
--- a/paddle/go/pserver/service.go
+++ b/paddle/go/pserver/service.go
@@ -109,6 +109,11 @@ func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
 		return ErrUnintialized
 	}
 
+	count := len(grads)
+	if count == 0 {
+		return nil
+	}
+
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
@@ -118,16 +123,25 @@ func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
 		}
 	}
 
-	var wg sync.WaitGroup
+	errCh := make(chan error, count)
 	for _, g := range grads {
-		wg.Add(1)
 		go func(p Parameter, g Gradient) {
-			s.opt.UpdateParameter(p, g)
-			wg.Done()
+			err := s.opt.UpdateParameter(p, g)
+			errCh <- err
 		}(s.paramMap[g.Name], g)
 	}
 
-	wg.Wait()
+	recv := 0
+	for err := range errCh {
+		if err != nil {
+			return err
+		}
+
+		recv++
+		if recv == count {
+			break
+		}
+	}
 	return nil
 }
 

From f4bc10daac82d8e43406f19faac673ae972b9152 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 20:27:39 -0400
Subject: [PATCH 37/56] update comment

---
 paddle/go/pserver/service_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/paddle/go/pserver/service_test.go b/paddle/go/pserver/service_test.go
index ebeff1fb89..78dd4d6b58 100644
--- a/paddle/go/pserver/service_test.go
+++ b/paddle/go/pserver/service_test.go
@@ -65,7 +65,8 @@ func TestFull(t *testing.T) {
 		t.FailNow()
 	}
 
-	// we don't care the content, since it's already optimized with gradient
+	// don't compare content, since it's already changed by
+	// gradient update.
 	params1[0].Content = nil
 	params1[0].Content = nil
 	p.Content = nil

From 9920a06cc6a4a8987b85cd2ad0d0898c74a2bacf Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 17 May 2017 20:32:40 -0400
Subject: [PATCH 38/56] rename local variable

---
 paddle/go/pserver/optimizer.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/paddle/go/pserver/optimizer.c b/paddle/go/pserver/optimizer.c
index 36a612a56f..8d63089b4c 100644
--- a/paddle/go/pserver/optimizer.c
+++ b/paddle/go/pserver/optimizer.c
@@ -36,11 +36,11 @@ void release_SGD(void *optimizer) {
 }
 
 paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) {
-  SGD_optimizer* o = (SGD_optimizer*)malloc(sizeof(SGD_optimizer));
-  o->learning_rate = learning_rate;
-  paddle_optimizer* container = (paddle_optimizer*)malloc(sizeof(paddle_optimizer));
-  container->update = update_SGD;
-  container->release = release_SGD;
-  container->optimizer = o;
-  return container;
+  SGD_optimizer* impl = (SGD_optimizer*)malloc(sizeof(SGD_optimizer));
+  impl->learning_rate = learning_rate;
+  paddle_optimizer* opt = (paddle_optimizer*)malloc(sizeof(paddle_optimizer));
+  opt->update = update_SGD;
+  opt->release = release_SGD;
+  opt->optimizer = impl;
+  return opt;
 }

From 27fdccc38040349125f0ab601870649a4c5d4e3e Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Fri, 19 May 2017 15:04:45 -0400
Subject: [PATCH 39/56] fix according to comments

---
 paddle/go/pserver/service.go      | 15 +++++----------
 paddle/go/pserver/service_test.go | 23 ++++++++++++-----------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/paddle/go/pserver/service.go b/paddle/go/pserver/service.go
index 47a862c5ad..3bf26b7651 100644
--- a/paddle/go/pserver/service.go
+++ b/paddle/go/pserver/service.go
@@ -9,8 +9,7 @@ import (
 // ElementType is the type of elements of a Parameter.
 type ElementType int
 
-var ErrUnintialized = errors.New("pserver not initialized")
-var ErrAlreadyIntialized = errors.New("pserver already initialized")
+var ErrAlreadyInitialized = errors.New("pserver already initialized")
 
 // Supported element types
 const (
@@ -56,7 +55,7 @@ func NewService() *Service {
 func (s *Service) BeginInitParams(config []byte, dummy *int) error {
 	select {
 	case <-s.initialized:
-		return ErrAlreadyIntialized
+		return ErrAlreadyInitialized
 	default:
 	}
 
@@ -75,7 +74,7 @@ func (s *Service) BeginInitParams(config []byte, dummy *int) error {
 func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error {
 	select {
 	case <-s.initialized:
-		return ErrAlreadyIntialized
+		return ErrAlreadyInitialized
 	default:
 	}
 
@@ -94,7 +93,7 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er
 func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
 	select {
 	case <-s.initialized:
-		return ErrAlreadyIntialized
+		return ErrAlreadyInitialized
 	default:
 	}
 
@@ -103,11 +102,7 @@ func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
 }
 
 func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
-	select {
-	case <-s.initialized:
-	default:
-		return ErrUnintialized
-	}
+	<-s.initialized
 
 	count := len(grads)
 	if count == 0 {
diff --git a/paddle/go/pserver/service_test.go b/paddle/go/pserver/service_test.go
index 78dd4d6b58..437d14b28c 100644
--- a/paddle/go/pserver/service_test.go
+++ b/paddle/go/pserver/service_test.go
@@ -98,21 +98,12 @@ func TestMultipleInit(t *testing.T) {
 	}
 
 	err = s.FinishInitParams(0, &dummy)
-	if err != pserver.ErrAlreadyIntialized {
+	if err != pserver.ErrAlreadyInitialized {
 		t.FailNow()
 	}
 
 	err = s.BeginInitParams(nil, &dummy)
-	if err != pserver.ErrAlreadyIntialized {
-		t.FailNow()
-	}
-}
-
-func TestUninitialized(t *testing.T) {
-	s := pserver.NewService()
-	var dummy int
-	err := s.SendGrads(nil, &dummy)
-	if err != pserver.ErrUnintialized {
+	if err != pserver.ErrAlreadyInitialized {
 		t.FailNow()
 	}
 }
@@ -140,6 +131,16 @@ func TestBlockUntilInitialized(t *testing.T) {
 		wg.Done()
 	}()
 
+	wg.Add(1)
+	go func() {
+		var dummy int
+		err := s.SendGrads(nil, &dummy)
+		if err != nil {
+			t.FailNow()
+		}
+		wg.Done()
+	}()
+
 	var dummy int
 	err := s.BeginInitParams(nil, &dummy)
 	if err != nil {

From 44d60bd91eb696a617f50cacc6257b9af76accc1 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Fri, 19 May 2017 15:11:23 -0400
Subject: [PATCH 40/56] add comments for exported functions

---
 paddle/go/pserver/service.go      | 13 ++++++++++++-
 paddle/go/pserver/service_test.go |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/paddle/go/pserver/service.go b/paddle/go/pserver/service.go
index 3bf26b7651..a009b45633 100644
--- a/paddle/go/pserver/service.go
+++ b/paddle/go/pserver/service.go
@@ -37,6 +37,7 @@ type ParameterWithConfig struct {
 // Gradient is the gradient of the parameter.
 type Gradient Parameter
 
+// Service is the RPC service for pserver.
 type Service struct {
 	initialized chan struct{}
 
@@ -45,6 +46,7 @@ type Service struct {
 	paramMap map[string]Parameter
 }
 
+// NewService creates a new service.
 func NewService() *Service {
 	s := &Service{}
 	s.paramMap = make(map[string]Parameter)
@@ -52,6 +54,8 @@ func NewService() *Service {
 	return s
 }
 
+// BeginInitParams tells the parameter server that the parameter
+// initialization has begun.
 func (s *Service) BeginInitParams(config []byte, dummy *int) error {
 	select {
 	case <-s.initialized:
@@ -71,6 +75,7 @@ func (s *Service) BeginInitParams(config []byte, dummy *int) error {
 	return nil
 }
 
+// InitParam initializes a parameter.
 func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) error {
 	select {
 	case <-s.initialized:
@@ -90,6 +95,8 @@ func (s *Service) InitParam(paramWithConfigs ParameterWithConfig, dummy *int) er
 	return nil
 }
 
+// FinishInitParams tells the parameter server that the parameter
+// initialization has finished.
 func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
 	select {
 	case <-s.initialized:
@@ -101,6 +108,8 @@ func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
 	return nil
 }
 
+// SendGrads sends gradients to parameter servers for parameter
+// optimization.
 func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
 	<-s.initialized
 
@@ -140,6 +149,7 @@ func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
 	return nil
 }
 
+// GetParams gets parameters from the parameter server.
 func (s *Service) GetParams(names []string, parameters *[]Parameter) error {
 	<-s.initialized
 	s.mu.Lock()
@@ -166,7 +176,8 @@ func (s *Service) GetParams(names []string, parameters *[]Parameter) error {
 	return nil
 }
 
-func (s *Service) SaveModel(path string, dummy *int) error {
+// Save tells the parameter server to save parameters.
+func (s *Service) Save(path string, dummy *int) error {
 	<-s.initialized
 
 	// TODO
diff --git a/paddle/go/pserver/service_test.go b/paddle/go/pserver/service_test.go
index 437d14b28c..23b2d17dc7 100644
--- a/paddle/go/pserver/service_test.go
+++ b/paddle/go/pserver/service_test.go
@@ -124,7 +124,7 @@ func TestBlockUntilInitialized(t *testing.T) {
 	wg.Add(1)
 	go func() {
 		var dummy int
-		err := s.SaveModel("", &dummy)
+		err := s.Save("", &dummy)
 		if err != nil {
 			t.FailNow()
 		}

From ea18f2eeb6bd609be321d997a3c21ded52801fa7 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Fri, 19 May 2017 15:15:46 -0400
Subject: [PATCH 41/56] make test more precise

---
 paddle/go/pserver/service_test.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/paddle/go/pserver/service_test.go b/paddle/go/pserver/service_test.go
index 23b2d17dc7..6aa7f47c74 100644
--- a/paddle/go/pserver/service_test.go
+++ b/paddle/go/pserver/service_test.go
@@ -110,6 +110,7 @@ func TestMultipleInit(t *testing.T) {
 
 func TestBlockUntilInitialized(t *testing.T) {
 	s := pserver.NewService()
+	ch := make(chan struct{}, 3)
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
@@ -119,6 +120,7 @@ func TestBlockUntilInitialized(t *testing.T) {
 			t.FailNow()
 		}
 		wg.Done()
+		ch <- struct{}{}
 	}()
 
 	wg.Add(1)
@@ -129,6 +131,7 @@ func TestBlockUntilInitialized(t *testing.T) {
 			t.FailNow()
 		}
 		wg.Done()
+		ch <- struct{}{}
 	}()
 
 	wg.Add(1)
@@ -139,6 +142,7 @@ func TestBlockUntilInitialized(t *testing.T) {
 			t.FailNow()
 		}
 		wg.Done()
+		ch <- struct{}{}
 	}()
 
 	var dummy int
@@ -147,6 +151,13 @@ func TestBlockUntilInitialized(t *testing.T) {
 		t.FailNow()
 	}
 
+	select {
+	case <-ch:
+		// some function returned before initialization is completed.
+		t.FailNow()
+	default:
+	}
+
 	err = s.FinishInitParams(0, &dummy)
 	if err != nil {
 		t.FailNow()

From e2fae1685de73772fb2b46ed67b4ddc0b897c83c Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Fri, 19 May 2017 15:30:53 -0400
Subject: [PATCH 42/56] SendGrad will return error if pserver is not
 initialized.

---
 paddle/go/pserver/service.go      |  7 ++++++-
 paddle/go/pserver/service_test.go | 22 ++++++++++------------
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/paddle/go/pserver/service.go b/paddle/go/pserver/service.go
index a009b45633..f43e59403a 100644
--- a/paddle/go/pserver/service.go
+++ b/paddle/go/pserver/service.go
@@ -10,6 +10,7 @@ import (
 type ElementType int
 
 var ErrAlreadyInitialized = errors.New("pserver already initialized")
+var ErrUninitialized = errors.New("pserver not fully initialized")
 
 // Supported element types
 const (
@@ -111,7 +112,11 @@ func (s *Service) FinishInitParams(dummy0 int, dummy1 *int) error {
 // SendGrads sends gradients to parameter servers for parameter
 // optimization.
 func (s *Service) SendGrads(grads []Gradient, dummy *int) error {
-	<-s.initialized
+	select {
+	case <-s.initialized:
+	default:
+		return ErrUninitialized
+	}
 
 	count := len(grads)
 	if count == 0 {
diff --git a/paddle/go/pserver/service_test.go b/paddle/go/pserver/service_test.go
index 6aa7f47c74..10185bd0f2 100644
--- a/paddle/go/pserver/service_test.go
+++ b/paddle/go/pserver/service_test.go
@@ -108,9 +108,18 @@ func TestMultipleInit(t *testing.T) {
 	}
 }
 
+func TestUninitialized(t *testing.T) {
+	s := pserver.NewService()
+	var dummy int
+	err := s.SendGrads(nil, &dummy)
+	if err != pserver.ErrUninitialized {
+		t.FailNow()
+	}
+}
+
 func TestBlockUntilInitialized(t *testing.T) {
 	s := pserver.NewService()
-	ch := make(chan struct{}, 3)
+	ch := make(chan struct{}, 2)
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
@@ -134,17 +143,6 @@ func TestBlockUntilInitialized(t *testing.T) {
 		ch <- struct{}{}
 	}()
 
-	wg.Add(1)
-	go func() {
-		var dummy int
-		err := s.SendGrads(nil, &dummy)
-		if err != nil {
-			t.FailNow()
-		}
-		wg.Done()
-		ch <- struct{}{}
-	}()
-
 	var dummy int
 	err := s.BeginInitParams(nil, &dummy)
 	if err != nil {

From 599eb3663e3124f976dfa3d487e47534df61a899 Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Sun, 21 May 2017 19:29:34 -0400
Subject: [PATCH 43/56] do clang-format

---
 paddle/go/pserver/optimizer.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/paddle/go/pserver/optimizer.c b/paddle/go/pserver/optimizer.c
index 8d63089b4c..b8da3ec959 100644
--- a/paddle/go/pserver/optimizer.c
+++ b/paddle/go/pserver/optimizer.c
@@ -2,10 +2,10 @@
 
 #include "optimizer.h"
 
-typedef int (*update_func)(void*, void *, paddle_element_type, const void*, int);
+typedef int (*update_func)(void*, void*, paddle_element_type, const void*, int);
 typedef void (*release_func)(void*);
 
-typedef struct paddle_optimizer{
+typedef struct paddle_optimizer {
   update_func update;
   release_func release;
   void* optimizer;
@@ -16,23 +16,29 @@ void paddle_release_optimizer(paddle_optimizer* o) {
   free(o);
 }
 
-int paddle_update_parameter(paddle_optimizer* o, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
+int paddle_update_parameter(paddle_optimizer* o,
+                            void* buffer,
+                            paddle_element_type element_type,
+                            const void* gradient,
+                            int num_bytes) {
   return o->update(o->optimizer, buffer, element_type, gradient, num_bytes);
 }
 
-typedef struct {
-  double learning_rate;
-} SGD_optimizer;
+typedef struct { double learning_rate; } SGD_optimizer;
 
-int update_SGD(void* optimizer, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes) {
+int update_SGD(void* optimizer,
+               void* buffer,
+               paddle_element_type element_type,
+               const void* gradient,
+               int num_bytes) {
   SGD_optimizer* o = (SGD_optimizer*)optimizer;
   // TODO
   return 0;
 }
 
-void release_SGD(void *optimizer) {
-    SGD_optimizer* o = (SGD_optimizer*)optimizer;
-    // nothing allocated on heap
+void release_SGD(void* optimizer) {
+  SGD_optimizer* o = (SGD_optimizer*)optimizer;
+  // nothing allocated on heap
 }
 
 paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate) {

From 97dbb7609c80eb6195f18f28ddf8c7f9134a9c4b Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Sun, 21 May 2017 19:30:52 -0400
Subject: [PATCH 44/56] resolve compile caused by merging branches

---
 paddle/go/pserver/client.go | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/paddle/go/pserver/client.go b/paddle/go/pserver/client.go
index 5b110af648..1c98aea6d1 100644
--- a/paddle/go/pserver/client.go
+++ b/paddle/go/pserver/client.go
@@ -1,34 +1,5 @@
 package pserver
 
-// ElementType is the type of elements of a Parameter.
-type ElementType int
-
-// Supported element types
-const (
-	Int32 ElementType = iota
-	UInt32
-	Int64
-	UInt64
-	Float32
-	Float64
-)
-
-// Parameter is a piece of data to sync with the parameter server.
-type Parameter struct {
-	Name        string
-	ElementType ElementType
-	Content     []byte
-}
-
-// ParameterWithConfig contains the parameter and the configuration.
-type ParameterWithConfig struct {
-	Param  Parameter
-	Config []byte // parameter configuration in Proto Buffer format
-}
-
-// Gradient is the gradient of the parameter.
-type Gradient Parameter
-
 // Client is the client to parameter servers.
 type Client struct {
 }

From 25c3f118f00b601fa64d3984a7eb44c572bf287e Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Sun, 21 May 2017 20:32:55 -0400
Subject: [PATCH 45/56] apply clang-format

---
 paddle/go/pserver/optimizer.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/paddle/go/pserver/optimizer.h b/paddle/go/pserver/optimizer.h
index cde8da70cc..a7e3ff0530 100644
--- a/paddle/go/pserver/optimizer.h
+++ b/paddle/go/pserver/optimizer.h
@@ -2,10 +2,10 @@
 #define PADDLE_PSERVER_OPTIMIZER_H
 
 typedef enum {
-  PADDLE_ELEMENT_TYPE_INT32   = 0,
-  PADDLE_ELEMENT_TYPE_UINT32  = 1,
-  PADDLE_ELEMENT_TYPE_INT64   = 2,
-  PADDLE_ELEMENT_TYPE_UINT64  = 3,
+  PADDLE_ELEMENT_TYPE_INT32 = 0,
+  PADDLE_ELEMENT_TYPE_UINT32 = 1,
+  PADDLE_ELEMENT_TYPE_INT64 = 2,
+  PADDLE_ELEMENT_TYPE_UINT64 = 3,
   PADDLE_ELEMENT_TYPE_FLOAT32 = 4,
   PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
 } paddle_element_type;
@@ -13,6 +13,10 @@ typedef enum {
 struct paddle_optimizer;
 struct paddle_optimizer* paddle_create_SGD_optimizer(double learning_rate);
 void paddle_release_optimizer(struct paddle_optimizer* o);
-int paddle_update_parameter(struct paddle_optimizer* o, void *buffer, paddle_element_type element_type, const void* gradient, int num_bytes);
+int paddle_update_parameter(struct paddle_optimizer* o,
+                            void* buffer,
+                            paddle_element_type element_type,
+                            const void* gradient,
+                            int num_bytes);
 
 #endif /* PADDLE_PSERVER_OPTIMIZER_H */

From a0a3a68551dd41a168eaaf88b9de7bcd37bfe5e8 Mon Sep 17 00:00:00 2001
From: xuwei06 <xuwei06@baidu.com>
Date: Fri, 19 May 2017 11:52:15 -0700
Subject: [PATCH 46/56] Add Sqrt and Reciprocal activation

---
 .../activations/ActivationFunction.cpp        | 38 +++++++++++++++++++
 .../trainer_config_helpers/activations.py     | 26 ++++++++++++-
 .../trainer_config_helpers/layer_math.py      |  2 +
 .../tests/configs/math_ops.py                 |  2 +
 .../tests/configs/protostr/math_ops.protostr  | 34 ++++++++++++++++-
 python/paddle/v2/trainer.py                   |  2 +-
 6 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
index c541b72e10..a40530f413 100644
--- a/paddle/gserver/activations/ActivationFunction.cpp
+++ b/paddle/gserver/activations/ActivationFunction.cpp
@@ -396,6 +396,44 @@ Error __must_check backward(Argument& act) {
 }
 END_DEFINE_ACTIVATION(exponential)
 
+/**
+ * @brief Reciprocal Activation.
+ * \f[
+ * f(z) = 1/z
+ * \f]
+ */
+BEGIN_DEFINE_ACTIVATION(reciprocal)
+Error __must_check forward(Argument& act) {
+  act.value->reciprocal2();
+  return Error();
+}
+
+Error __must_check backward(Argument& act) {
+  act.grad->dotMulSquare(*act.value);
+  act.grad->neg();
+  return Error();
+}
+END_DEFINE_ACTIVATION(reciprocal)
+
+/**
+ * @brief Square Root Activation.
+ * \f[
+ * f(z) = sqrt(z)
+ * \f]
+ */
+BEGIN_DEFINE_ACTIVATION(sqrt)
+Error __must_check forward(Argument& act) {
+  act.value->sqrt2();
+  return Error();
+}
+
+Error __must_check backward(Argument& act) {
+  act.grad->dotDiv(*act.grad, *act.value);
+  act.grad->mulScalar(0.5);
+  return Error();
+}
+END_DEFINE_ACTIVATION(sqrt)
+
 /**
  * @brief Logarithm Activation.
  * \f[
diff --git a/python/paddle/trainer_config_helpers/activations.py b/python/paddle/trainer_config_helpers/activations.py
index 06be3e4599..c749fa827f 100644
--- a/python/paddle/trainer_config_helpers/activations.py
+++ b/python/paddle/trainer_config_helpers/activations.py
@@ -17,7 +17,7 @@ __all__ = [
     "IdentityActivation", "LinearActivation", 'SequenceSoftmaxActivation',
     'ExpActivation', "ReluActivation", "BReluActivation", "SoftReluActivation",
     "STanhActivation", "AbsActivation", "SquareActivation", "BaseActivation",
-    "LogActivation"
+    "LogActivation", "SqrtActivation", "ReciprocalActivation"
 ]
 
 
@@ -224,3 +224,27 @@ class LogActivation(BaseActivation):
 
     def __init__(self):
         BaseActivation.__init__(self, 'log', False)
+
+
+class SqrtActivation(BaseActivation):
+    """
+    Square Root Activation.
+
+    .. math::
+       f(z) = sqrt(z)
+    """
+
+    def __init__(self):
+        BaseActivation.__init__(self, 'sqrt', False)
+
+
+class ReciprocalActivation(BaseActivation):
+    """
+    Reciprocal Activation.
+
+    .. math::
+       f(z) = 1/z
+    """
+
+    def __init__(self):
+        BaseActivation.__init__(self, 'reciprocal', False)
diff --git a/python/paddle/trainer_config_helpers/layer_math.py b/python/paddle/trainer_config_helpers/layer_math.py
index 544b443825..e1c8f0c350 100644
--- a/python/paddle/trainer_config_helpers/layer_math.py
+++ b/python/paddle/trainer_config_helpers/layer_math.py
@@ -40,6 +40,8 @@ register_unary_math_op('sigmoid', act.SigmoidActivation())
 register_unary_math_op('tanh', act.TanhActivation())
 register_unary_math_op('square', act.SquareActivation())
 register_unary_math_op('relu', act.ReluActivation())
+register_unary_math_op('sqrt', act.SqrtActivation())
+register_unary_math_op('reciprocal', act.ReciprocalActivation())
 
 
 def add(layeroutput, other):
diff --git a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
index 24c901c8ee..a607a62c99 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/math_ops.py
@@ -4,6 +4,8 @@ settings(batch_size=1000, learning_rate=1e-5)
 
 x = data_layer(name='data', size=100)
 x = layer_math.exp(x)
+x = layer_math.sqrt(x)
+x = layer_math.reciprocal(x)
 x = layer_math.log(x)
 x = layer_math.abs(x)
 x = layer_math.sigmoid(x)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
index 9b8a2ad968..eaaf7fd6f5 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/math_ops.protostr
@@ -20,13 +20,43 @@ layers {
     }
   }
 }
+layers {
+  name: "__sqrt_0__"
+  type: "mixed"
+  size: 100
+  active_type: "sqrt"
+  inputs {
+    input_layer_name: "__exp_0__"
+    proj_conf {
+      type: "identity"
+      name: "___sqrt_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
+layers {
+  name: "__reciprocal_0__"
+  type: "mixed"
+  size: 100
+  active_type: "reciprocal"
+  inputs {
+    input_layer_name: "__sqrt_0__"
+    proj_conf {
+      type: "identity"
+      name: "___reciprocal_0__.w0"
+      input_size: 100
+      output_size: 100
+    }
+  }
+}
 layers {
   name: "__log_0__"
   type: "mixed"
   size: 100
   active_type: "log"
   inputs {
-    input_layer_name: "__exp_0__"
+    input_layer_name: "__reciprocal_0__"
     proj_conf {
       type: "identity"
       name: "___log_0__.w0"
@@ -351,6 +381,8 @@ sub_models {
   name: "root"
   layer_names: "data"
   layer_names: "__exp_0__"
+  layer_names: "__sqrt_0__"
+  layer_names: "__reciprocal_0__"
   layer_names: "__log_0__"
   layer_names: "__abs_0__"
   layer_names: "__sigmoid_0__"
diff --git a/python/paddle/v2/trainer.py b/python/paddle/v2/trainer.py
index ec9fcfb749..8fdb67cc26 100644
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@@ -177,7 +177,7 @@ class SGD(object):
         Testing method. Will test input data.
 
         :param reader: A reader that reads and yeilds data items.
-        :type reader: collections.Iterable  
+        :type reader: collections.Iterable
         :param feeding: Feeding is a map of neural network input name and array
                         index that reader returns.
         :type feeding: dict

From b3ea63470518247c7df929459372c8f424294b44 Mon Sep 17 00:00:00 2001
From: Helin Wang <helinwang@baidu.com>
Date: Mon, 22 May 2017 12:47:54 -0700
Subject: [PATCH 47/56] add go 1.8.1 into dev image

---
 Dockerfile | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index ad0d086d3c..571c3e1476 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -33,6 +33,15 @@ RUN apt-get update && \
     clang-3.8 llvm-3.8 libclang-3.8-dev && \
     apt-get clean -y
 
+# Install Go
+RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \
+    tar -C /usr/local -xzf go.tgz && \
+    mkdir /root/gopath && \
+    rm go.tgz
+ENV GOROOT=/usr/local/go GOPATH=/root/gopath
+# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
+ENV PATH=${PATH}:${GOROOT}/bin
+
 # git credential to skip password typing
 RUN git config --global credential.helper store
 

From 2e4c0bd2eac57f1b31c11011e9ba2160646110ce Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Fri, 19 May 2017 17:06:27 +0800
Subject: [PATCH 48/56] enable global gradient_clipping_threshold

correct a typo

optimize code

fix a bug
---
 paddle/parameter/FirstOrderOptimizer.cpp      | 30 +++++++++++++++----
 paddle/parameter/OptimizerWithRegularizer.cpp |  3 +-
 paddle/parameter/ParameterOptimizer.h         | 10 +++++++
 proto/TrainerConfig.proto                     |  3 ++
 python/paddle/trainer/config_parser.py        |  1 +
 .../trainer_config_helpers/optimizers.py      |  3 +-
 6 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp
index dbb738e98b..02e600adb9 100644
--- a/paddle/parameter/FirstOrderOptimizer.cpp
+++ b/paddle/parameter/FirstOrderOptimizer.cpp
@@ -161,6 +161,7 @@ void AdaDeltaParameterOptimizer::update(const VectorPtr vecs[],
                                         const ParameterConfig& config,
                                         size_t sparseId) const {
   CHECK(sparseId == -1LU) << "Sparse update is not supported";
+
   BaseMatrix& value = *vecs[PARAMETER_VALUE];
   BaseMatrix& grad = *vecs[PARAMETER_GRADIENT];
   BaseMatrix& mom = *vecs[PARAMETER_MOMENTUM];
@@ -265,6 +266,7 @@ void AdamParameterOptimizer::update(const VectorPtr vecs[],
                                     const ParameterConfig& config,
                                     size_t sparseId) const {
   CHECK(sparseId == -1UL) << "Sparse update is not supported";
+
   real beta1_power = std::pow(beta1_, step_);
   real beta2_power = std::pow(beta2_, step_);
   real learningRate = config.learning_rate() * learningRate_;
@@ -303,18 +305,34 @@ void AdamaxParameterOptimizer::update(const VectorPtr vecs[],
 void OptimizerWithGradientClipping::update(const VectorPtr vecs[],
                                            const ParameterConfig& config,
                                            size_t sparseId) const {
+  // globalGradientClipping(vecs, config, FLAGS_log_clipping);
+  real global_thres_ = optConfig_.gradient_clipping_threshold();
+  real local_thres_ = config.gradient_clipping_threshold();
+
+  real threshold;
+  std::string field;
+  if (global_thres_ > 0.0f && local_thres_ > 0.0f) {
+    threshold = global_thres_ < local_thres_ ? global_thres_ : local_thres_;
+    field = global_thres_ < local_thres_ ? "global" : "local";
+  } else if (global_thres_ > 0.0f) {
+    threshold = global_thres_;
+    field = "global";
+  } else {
+    threshold = local_thres_;
+    field = "local";
+  }
+
   real maxAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsMax();
-  if (maxAbsGrad > config.gradient_clipping_threshold()) {
+  if (maxAbsGrad > threshold) {
     if (FLAGS_log_clipping) {
       real avgAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsSum() /
                         vecs[PARAMETER_GRADIENT]->getSize();
-      LOG(INFO) << "parameter=" << config.name() << " need clipping,"
-                << " max grad=" << maxAbsGrad << " avg grad=" << avgAbsGrad;
+      LOG(INFO) << "parameter=" << config.name() << " need clipping by "
+                << field << " threshold=" << threshold
+                << ", max grad=" << maxAbsGrad << ", avg grad=" << avgAbsGrad;
     }
-    vecs[PARAMETER_GRADIENT]->clip(-config.gradient_clipping_threshold(),
-                                   config.gradient_clipping_threshold());
+    vecs[PARAMETER_GRADIENT]->clip(-threshold, threshold);
   }
-
   optimizer_->update(vecs, config, sparseId);
 }
 
diff --git a/paddle/parameter/OptimizerWithRegularizer.cpp b/paddle/parameter/OptimizerWithRegularizer.cpp
index 85f13c8bc0..7910b12444 100644
--- a/paddle/parameter/OptimizerWithRegularizer.cpp
+++ b/paddle/parameter/OptimizerWithRegularizer.cpp
@@ -131,7 +131,8 @@ ParameterOptimizer* OptimizerWithRegularizer::create(
     bool inPserver) {
   ParameterOptimizer* optimizer =
       ParameterOptimizer::create(optConfig, inPserver);
-  if (paraConfig.gradient_clipping_threshold() > 0.0f &&
+  if ((optConfig.gradient_clipping_threshold() > 0.0f ||
+       paraConfig.gradient_clipping_threshold() > 0.0f) &&
       !dynamic_cast<AddOptimizer*>(optimizer)) {
     optimizer = new OptimizerWithGradientClipping(optConfig, optimizer);
   }
diff --git a/paddle/parameter/ParameterOptimizer.h b/paddle/parameter/ParameterOptimizer.h
index 2bdc793d60..38d432ba9b 100644
--- a/paddle/parameter/ParameterOptimizer.h
+++ b/paddle/parameter/ParameterOptimizer.h
@@ -167,8 +167,12 @@ public:
     }
     parameterTypes_.push_back(type);
   }
+
   real getLearningRate() const { return learningRate_; }
 
+  // real getGradientClippingThreshold() const {return
+  // gradientClippingThreshold_;}
+
   virtual void setNoDecay() { applyDecay_ = false; }
 
   static ParameterOptimizer* create(const OptimizationConfig& optConfig,
@@ -201,6 +205,12 @@ protected:
    * so, if lr change in StartBatch, please assign to learningRate_
    */
   real learningRate_;
+
+  /**
+   * global threshold for gradient clipping,
+   * init value is opt_config.gradient_clipping_thresholod
+   */
+
   std::unique_ptr<LearningRateScheduler> learningRateScheduler_;
   int64_t pass_;  // current training pass (starting from 0)
   bool firstTime_;
diff --git a/proto/TrainerConfig.proto b/proto/TrainerConfig.proto
index a334e07b62..a819d20d11 100644
--- a/proto/TrainerConfig.proto
+++ b/proto/TrainerConfig.proto
@@ -128,6 +128,9 @@ message OptimizationConfig {
   // when async_lagged_grad_discard_ratio * num_gradient_servers commit passed,
   // current async gradient will be discard silently.
   optional double async_lagged_grad_discard_ratio = 37 [default = 1.5];
+
+  // global threshold for gradient clipping 
+  optional double gradient_clipping_threshold = 38 [default = 0.0];
 };
 
 message TrainerConfig {
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 9135f38719..9fe8794691 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -3377,6 +3377,7 @@ settings = dict(
     algorithm='async_sgd',
     async_lagged_grad_discard_ratio=1.5,
     learning_method='momentum',
+    gradient_clipping_threshold=None,
     num_batches_per_send_parameter=None,
     num_batches_per_get_parameter=None,
     center_parameter_update_method=None,
diff --git a/python/paddle/trainer_config_helpers/optimizers.py b/python/paddle/trainer_config_helpers/optimizers.py
index a53ebe160b..c3495ee110 100644
--- a/python/paddle/trainer_config_helpers/optimizers.py
+++ b/python/paddle/trainer_config_helpers/optimizers.py
@@ -408,7 +408,8 @@ def settings(batch_size,
 
     args = [
         'batch_size', 'learning_rate', 'learning_rate_decay_a',
-        'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args'
+        'learning_rate_decay_b', 'learning_rate_schedule', 'learning_rate_args',
+        'gradient_clipping_threshold'
     ]
     kwargs = dict()
     kwargs['algorithm'] = algorithm

From 4d4593b91392a2a1414a2cceca7cea62879d01ee Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Mon, 22 May 2017 18:24:54 +0800
Subject: [PATCH 49/56] code cleanup

---
 paddle/parameter/FirstOrderOptimizer.cpp | 3 ++-
 paddle/parameter/ParameterOptimizer.h    | 8 --------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp
index 02e600adb9..207fb33f4e 100644
--- a/paddle/parameter/FirstOrderOptimizer.cpp
+++ b/paddle/parameter/FirstOrderOptimizer.cpp
@@ -305,12 +305,13 @@ void AdamaxParameterOptimizer::update(const VectorPtr vecs[],
 void OptimizerWithGradientClipping::update(const VectorPtr vecs[],
                                            const ParameterConfig& config,
                                            size_t sparseId) const {
-  // globalGradientClipping(vecs, config, FLAGS_log_clipping);
   real global_thres_ = optConfig_.gradient_clipping_threshold();
   real local_thres_ = config.gradient_clipping_threshold();
 
   real threshold;
   std::string field;
+  // Get the minimum of local and global threshold
+  // as the real threshold for clipping
   if (global_thres_ > 0.0f && local_thres_ > 0.0f) {
     threshold = global_thres_ < local_thres_ ? global_thres_ : local_thres_;
     field = global_thres_ < local_thres_ ? "global" : "local";
diff --git a/paddle/parameter/ParameterOptimizer.h b/paddle/parameter/ParameterOptimizer.h
index 38d432ba9b..f98ba569b5 100644
--- a/paddle/parameter/ParameterOptimizer.h
+++ b/paddle/parameter/ParameterOptimizer.h
@@ -170,9 +170,6 @@ public:
 
   real getLearningRate() const { return learningRate_; }
 
-  // real getGradientClippingThreshold() const {return
-  // gradientClippingThreshold_;}
-
   virtual void setNoDecay() { applyDecay_ = false; }
 
   static ParameterOptimizer* create(const OptimizationConfig& optConfig,
@@ -206,11 +203,6 @@ protected:
    */
   real learningRate_;
 
-  /**
-   * global threshold for gradient clipping,
-   * init value is opt_config.gradient_clipping_thresholod
-   */
-
   std::unique_ptr<LearningRateScheduler> learningRateScheduler_;
   int64_t pass_;  // current training pass (starting from 0)
   bool firstTime_;

From 8c9ab5f183870065f973170b56ca73a38ee6ce80 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Tue, 23 May 2017 15:06:58 +0800
Subject: [PATCH 50/56] rename two variables

---
 paddle/parameter/FirstOrderOptimizer.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp
index 207fb33f4e..826864b99e 100644
--- a/paddle/parameter/FirstOrderOptimizer.cpp
+++ b/paddle/parameter/FirstOrderOptimizer.cpp
@@ -305,21 +305,22 @@ void AdamaxParameterOptimizer::update(const VectorPtr vecs[],
 void OptimizerWithGradientClipping::update(const VectorPtr vecs[],
                                            const ParameterConfig& config,
                                            size_t sparseId) const {
-  real global_thres_ = optConfig_.gradient_clipping_threshold();
-  real local_thres_ = config.gradient_clipping_threshold();
+  real globalThreshold = optConfig_.gradient_clipping_threshold();
+  real localThreshold = config.gradient_clipping_threshold();
 
   real threshold;
   std::string field;
   // Get the minimum of local and global threshold
   // as the real threshold for clipping
-  if (global_thres_ > 0.0f && local_thres_ > 0.0f) {
-    threshold = global_thres_ < local_thres_ ? global_thres_ : local_thres_;
-    field = global_thres_ < local_thres_ ? "global" : "local";
-  } else if (global_thres_ > 0.0f) {
-    threshold = global_thres_;
+  if (globalThreshold > 0.0f && localThreshold > 0.0f) {
+    threshold =
+        globalThreshold < localThreshold ? globalThreshold : localThreshold;
+    field = globalThreshold < localThreshold ? "global" : "local";
+  } else if (globalThreshold > 0.0f) {
+    threshold = globalThreshold;
     field = "global";
   } else {
-    threshold = local_thres_;
+    threshold = localThreshold;
     field = "local";
   }
 

From c64a142ceaef016020961a50aff8359cc76bde83 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Tue, 23 May 2017 17:23:54 +0800
Subject: [PATCH 51/56] change the way of setting threshold

---
 paddle/parameter/FirstOrderOptimizer.cpp | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp
index 826864b99e..d829260162 100644
--- a/paddle/parameter/FirstOrderOptimizer.cpp
+++ b/paddle/parameter/FirstOrderOptimizer.cpp
@@ -310,18 +310,14 @@ void OptimizerWithGradientClipping::update(const VectorPtr vecs[],
 
   real threshold;
   std::string field;
-  // Get the minimum of local and global threshold
-  // as the real threshold for clipping
-  if (globalThreshold > 0.0f && localThreshold > 0.0f) {
-    threshold =
-        globalThreshold < localThreshold ? globalThreshold : localThreshold;
-    field = globalThreshold < localThreshold ? "global" : "local";
-  } else if (globalThreshold > 0.0f) {
-    threshold = globalThreshold;
-    field = "global";
-  } else {
+  // Use local gradient clipping threshold if it's enabled,
+  // otherwise using the global one.
+  if (localThreshold > 0.0f) {
     threshold = localThreshold;
     field = "local";
+  } else {
+    threshold = globalThreshold;
+    field = "global";
   }
 
   real maxAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsMax();

From 5cf2b2e81cab838833911c3f54c033fb3dacdf62 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Tue, 23 May 2017 18:18:38 +0800
Subject: [PATCH 52/56] compress code

---
 paddle/parameter/FirstOrderOptimizer.cpp | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/parameter/FirstOrderOptimizer.cpp
index d829260162..5938b2210c 100644
--- a/paddle/parameter/FirstOrderOptimizer.cpp
+++ b/paddle/parameter/FirstOrderOptimizer.cpp
@@ -308,17 +308,10 @@ void OptimizerWithGradientClipping::update(const VectorPtr vecs[],
   real globalThreshold = optConfig_.gradient_clipping_threshold();
   real localThreshold = config.gradient_clipping_threshold();
 
-  real threshold;
-  std::string field;
   // Use local gradient clipping threshold if it's enabled,
   // otherwise using the global one.
-  if (localThreshold > 0.0f) {
-    threshold = localThreshold;
-    field = "local";
-  } else {
-    threshold = globalThreshold;
-    field = "global";
-  }
+  real threshold = localThreshold > 0.0f ? localThreshold : globalThreshold;
+  std::string field = localThreshold > 0.0f ? "local" : "global";
 
   real maxAbsGrad = vecs[PARAMETER_GRADIENT]->getAbsMax();
   if (maxAbsGrad > threshold) {

From 3712822e9fc794713e39d0416665cdb53f6a3acf Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Thu, 25 May 2017 00:49:39 +0800
Subject: [PATCH 53/56] modify seq2seq demo to show gradient/error clipping

---
 demo/seqToseq/api_train_v2.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py
index 3072c37512..bb535f0926 100644
--- a/demo/seqToseq/api_train_v2.py
+++ b/demo/seqToseq/api_train_v2.py
@@ -21,9 +21,12 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
         size=word_vector_dim,
         param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
     src_forward = paddle.networks.simple_gru(
-        input=src_embedding, size=encoder_size)
+        name='src_forward_gru', input=src_embedding, size=encoder_size)
     src_backward = paddle.networks.simple_gru(
-        input=src_embedding, size=encoder_size, reverse=True)
+        name='src_backward_gru',
+        input=src_embedding,
+        size=encoder_size,
+        reverse=True)
     encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
 
     #### Decoder
@@ -34,7 +37,9 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
     backward_first = paddle.layer.first_seq(input=src_backward)
 
     with paddle.layer.mixed(
-            size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
+            name="decoder_boot_mixed",
+            size=decoder_size,
+            act=paddle.activation.Tanh()) as decoder_boot:
         decoder_boot += paddle.layer.full_matrix_projection(
             input=backward_first)
 
@@ -44,11 +49,17 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
             name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
 
         context = paddle.networks.simple_attention(
+            name="simple_attention",
             encoded_sequence=enc_vec,
             encoded_proj=enc_proj,
             decoder_state=decoder_mem)
 
-        with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
+        with paddle.layer.mixed(
+                name="input_recurrent",
+                size=decoder_size * 3,
+                # enable error clipping 
+                layer_attr=paddle.attr.ExtraAttr(
+                    error_clipping_threshold=100.0)) as decoder_inputs:
             decoder_inputs += paddle.layer.full_matrix_projection(input=context)
             decoder_inputs += paddle.layer.full_matrix_projection(
                 input=current_word)
@@ -57,9 +68,12 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
             name='gru_decoder',
             input=decoder_inputs,
             output_mem=decoder_mem,
+            # uncomment to enable local threshold for gradient clipping
+            # param_attr=paddle.attr.ParamAttr(gradient_clipping_threshold=9.9),
             size=decoder_size)
 
         with paddle.layer.mixed(
+                name="gru_step_output",
                 size=target_dict_dim,
                 bias_attr=True,
                 act=paddle.activation.Softmax()) as out:
@@ -125,7 +139,13 @@ def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
 
 
 def main():
-    paddle.init(use_gpu=False, trainer_count=1)
+    paddle.init(
+        use_gpu=False,
+        trainer_count=1,
+        # log gradient clipping info
+        log_clipping=True,
+        # log error clipping info
+        log_error_clipping=True)
     is_generating = False
 
     # source and target dict dim.
@@ -140,6 +160,8 @@ def main():
         # define optimize method and trainer
         optimizer = paddle.optimizer.Adam(
             learning_rate=5e-5,
+            # uncomment to enable global threshold for gradient clipping
+            # gradient_clipping_threshold=10.0,
             regularization=paddle.optimizer.L2Regularization(rate=8e-4))
         trainer = paddle.trainer.SGD(cost=cost,
                                      parameters=parameters,

From c6b8c13721e5a05e5c8787546c4b870fa32b54ef Mon Sep 17 00:00:00 2001
From: Helin Wang <ustc.harry@gmail.com>
Date: Wed, 24 May 2017 20:16:08 -0400
Subject: [PATCH 54/56] move recordio to Paddle from
 github.com/wangkuiyi/recordio

---
 paddle/go/recordio/README.md                 |  36 ++++
 paddle/go/recordio/chunk.go                  | 181 +++++++++++++++++++
 paddle/go/recordio/header.go                 |  59 ++++++
 paddle/go/recordio/reader.go                 | 135 ++++++++++++++
 paddle/go/recordio/recordio_internal_test.go |  90 +++++++++
 paddle/go/recordio/recordio_test.go          |  81 +++++++++
 paddle/go/recordio/writer.go                 |  60 ++++++
 7 files changed, 642 insertions(+)
 create mode 100644 paddle/go/recordio/README.md
 create mode 100644 paddle/go/recordio/chunk.go
 create mode 100644 paddle/go/recordio/header.go
 create mode 100644 paddle/go/recordio/reader.go
 create mode 100644 paddle/go/recordio/recordio_internal_test.go
 create mode 100644 paddle/go/recordio/recordio_test.go
 create mode 100644 paddle/go/recordio/writer.go

diff --git a/paddle/go/recordio/README.md b/paddle/go/recordio/README.md
new file mode 100644
index 0000000000..8b0b9308b1
--- /dev/null
+++ b/paddle/go/recordio/README.md
@@ -0,0 +1,36 @@
+# RecordIO
+
+## Write
+
+```go
+f, e := os.Create("a_file.recordio")
+w := recordio.NewWriter(f)
+w.Write([]byte("Hello"))
+w.Write([]byte("World!"))
+w.Close()
+```
+
+## Read
+
+1. Load chunk index:
+
+   ```go
+   f, e := os.Open("a_file.recordio")
+   idx, e := recordio.LoadIndex(f)
+   fmt.Println("Total records: ", idx.Len())
+   ```
+
+2. Create one or more scanner to read a range of records.  The
+   following example reads the range
+   [1, 3), i.e., the second and the third records:
+
+   ```go
+   f, e := os.Open("a_file.recordio")
+   s := recrodio.NewScanner(f, idx, 1, 3)
+   for s.Scan() {
+      fmt.Println(string(s.Record()))
+   }
+   if s.Err() != nil && s.Err() != io.EOF {
+      log.Fatalf("Something wrong with scanning: %v", e)
+   }
+   ```
diff --git a/paddle/go/recordio/chunk.go b/paddle/go/recordio/chunk.go
new file mode 100644
index 0000000000..4e983ab72b
--- /dev/null
+++ b/paddle/go/recordio/chunk.go
@@ -0,0 +1,181 @@
+package recordio
+
+import (
+	"bytes"
+	"compress/gzip"
+	"encoding/binary"
+	"fmt"
+	"hash/crc32"
+	"io"
+
+	"github.com/golang/snappy"
+)
+
+// A Chunk contains the Header and optionally compressed records.  To
+// create a chunk, just use ch := &Chunk{}.
+type Chunk struct {
+	records  [][]byte
+	numBytes int // sum of record lengths.
+}
+
+func (ch *Chunk) add(record []byte) {
+	ch.records = append(ch.records, record)
+	ch.numBytes += len(record)
+}
+
+// dump the chunk into w, and clears the chunk and makes it ready for
+// the next add invocation.
+func (ch *Chunk) dump(w io.Writer, compressorIndex int) error {
+	// NOTE: don't check ch.numBytes instead, because empty
+	// records are allowed.
+	if len(ch.records) == 0 {
+		return nil
+	}
+
+	// Write raw records and their lengths into data buffer.
+	var data bytes.Buffer
+
+	for _, r := range ch.records {
+		var rs [4]byte
+		binary.LittleEndian.PutUint32(rs[:], uint32(len(r)))
+
+		if _, e := data.Write(rs[:]); e != nil {
+			return fmt.Errorf("Failed to write record length: %v", e)
+		}
+
+		if _, e := data.Write(r); e != nil {
+			return fmt.Errorf("Failed to write record: %v", e)
+		}
+	}
+
+	compressed, e := compressData(&data, compressorIndex)
+	if e != nil {
+		return e
+	}
+
+	// Write chunk header and compressed data.
+	hdr := &Header{
+		checkSum:       crc32.ChecksumIEEE(compressed.Bytes()),
+		compressor:     uint32(compressorIndex),
+		compressedSize: uint32(compressed.Len()),
+		numRecords:     uint32(len(ch.records)),
+	}
+
+	if _, e := hdr.write(w); e != nil {
+		return fmt.Errorf("Failed to write chunk header: %v", e)
+	}
+
+	if _, e := w.Write(compressed.Bytes()); e != nil {
+		return fmt.Errorf("Failed to write chunk data: %v", e)
+	}
+
+	// Clear the current chunk.
+	ch.records = nil
+	ch.numBytes = 0
+
+	return nil
+}
+
+type noopCompressor struct {
+	*bytes.Buffer
+}
+
+func (c *noopCompressor) Close() error {
+	return nil
+}
+
+func compressData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
+	compressed := new(bytes.Buffer)
+	var compressor io.WriteCloser
+
+	switch compressorIndex {
+	case NoCompression:
+		compressor = &noopCompressor{compressed}
+	case Snappy:
+		compressor = snappy.NewBufferedWriter(compressed)
+	case Gzip:
+		compressor = gzip.NewWriter(compressed)
+	default:
+		return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
+	}
+
+	if _, e := io.Copy(compressor, src); e != nil {
+		return nil, fmt.Errorf("Failed to compress chunk data: %v", e)
+	}
+	compressor.Close()
+
+	return compressed, nil
+}
+
+// parse the specified chunk from r.
+func parseChunk(r io.ReadSeeker, chunkOffset int64) (*Chunk, error) {
+	var e error
+	var hdr *Header
+
+	if _, e = r.Seek(chunkOffset, io.SeekStart); e != nil {
+		return nil, fmt.Errorf("Failed to seek chunk: %v", e)
+	}
+
+	hdr, e = parseHeader(r)
+	if e != nil {
+		return nil, fmt.Errorf("Failed to parse chunk header: %v", e)
+	}
+
+	var buf bytes.Buffer
+	if _, e = io.CopyN(&buf, r, int64(hdr.compressedSize)); e != nil {
+		return nil, fmt.Errorf("Failed to read chunk data: %v", e)
+	}
+
+	if hdr.checkSum != crc32.ChecksumIEEE(buf.Bytes()) {
+		return nil, fmt.Errorf("Checksum checking failed.")
+	}
+
+	deflated, e := deflateData(&buf, int(hdr.compressor))
+	if e != nil {
+		return nil, e
+	}
+
+	ch := &Chunk{}
+	for i := 0; i < int(hdr.numRecords); i++ {
+		var rs [4]byte
+		if _, e = deflated.Read(rs[:]); e != nil {
+			return nil, fmt.Errorf("Failed to read record length: %v", e)
+		}
+
+		r := make([]byte, binary.LittleEndian.Uint32(rs[:]))
+		if _, e = deflated.Read(r); e != nil {
+			return nil, fmt.Errorf("Failed to read a record: %v", e)
+		}
+
+		ch.records = append(ch.records, r)
+		ch.numBytes += len(r)
+	}
+
+	return ch, nil
+}
+
+func deflateData(src io.Reader, compressorIndex int) (*bytes.Buffer, error) {
+	var e error
+	var deflator io.Reader
+
+	switch compressorIndex {
+	case NoCompression:
+		deflator = src
+	case Snappy:
+		deflator = snappy.NewReader(src)
+	case Gzip:
+		deflator, e = gzip.NewReader(src)
+		if e != nil {
+			return nil, fmt.Errorf("Failed to create gzip reader: %v", e)
+		}
+	default:
+		return nil, fmt.Errorf("Unknown compression algorithm: %d", compressorIndex)
+	}
+
+	deflated := new(bytes.Buffer)
+	if _, e = io.Copy(deflated, deflator); e != nil {
+		return nil, fmt.Errorf("Failed to deflate chunk data: %v", e)
+	}
+
+	return deflated, nil
+}
diff --git a/paddle/go/recordio/header.go b/paddle/go/recordio/header.go
new file mode 100644
index 0000000000..d3aefae364
--- /dev/null
+++ b/paddle/go/recordio/header.go
@@ -0,0 +1,59 @@
+package recordio
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+)
+
+const (
+	// NoCompression means writing raw chunk data into files.
+	// With other choices, chunks are compressed before written.
+	NoCompression = iota
+	// Snappy had been the default compressing algorithm widely
+	// used in Google.  It compromises between speech and
+	// compression ratio.
+	Snappy
+	// Gzip is a well-known compression algorithm.  It is
+	// recommmended only you are looking for compression ratio.
+	Gzip
+
+	magicNumber       uint32 = 0x01020304
+	defaultCompressor        = Snappy
+)
+
+// Header is the metadata of Chunk.
+type Header struct {
+	checkSum       uint32
+	compressor     uint32
+	compressedSize uint32
+	numRecords     uint32
+}
+
+func (c *Header) write(w io.Writer) (int, error) {
+	var buf [20]byte
+	binary.LittleEndian.PutUint32(buf[0:4], magicNumber)
+	binary.LittleEndian.PutUint32(buf[4:8], c.checkSum)
+	binary.LittleEndian.PutUint32(buf[8:12], c.compressor)
+	binary.LittleEndian.PutUint32(buf[12:16], c.compressedSize)
+	binary.LittleEndian.PutUint32(buf[16:20], c.numRecords)
+	return w.Write(buf[:])
+}
+
+func parseHeader(r io.Reader) (*Header, error) {
+	var buf [20]byte
+	if _, e := r.Read(buf[:]); e != nil {
+		return nil, e
+	}
+
+	if v := binary.LittleEndian.Uint32(buf[0:4]); v != magicNumber {
+		return nil, fmt.Errorf("Failed to parse magic number")
+	}
+
+	return &Header{
+		checkSum:       binary.LittleEndian.Uint32(buf[4:8]),
+		compressor:     binary.LittleEndian.Uint32(buf[8:12]),
+		compressedSize: binary.LittleEndian.Uint32(buf[12:16]),
+		numRecords:     binary.LittleEndian.Uint32(buf[16:20]),
+	}, nil
+}
diff --git a/paddle/go/recordio/reader.go b/paddle/go/recordio/reader.go
new file mode 100644
index 0000000000..a12c604f7b
--- /dev/null
+++ b/paddle/go/recordio/reader.go
@@ -0,0 +1,135 @@
+package recordio
+
+import "io"
+
+// Index consists offsets and sizes of the consequetive chunks in a RecordIO file.
+type Index struct {
+	chunkOffsets []int64
+	chunkLens    []uint32
+	numRecords   int   // the number of all records in a file.
+	chunkRecords []int // the number of records in chunks.
+}
+
+// LoadIndex scans the file and parse chunkOffsets, chunkLens, and len.
+func LoadIndex(r io.ReadSeeker) (*Index, error) {
+	f := &Index{}
+	offset := int64(0)
+	var e error
+	var hdr *Header
+
+	for {
+		hdr, e = parseHeader(r)
+		if e != nil {
+			break
+		}
+
+		f.chunkOffsets = append(f.chunkOffsets, offset)
+		f.chunkLens = append(f.chunkLens, hdr.numRecords)
+		f.chunkRecords = append(f.chunkRecords, int(hdr.numRecords))
+		f.numRecords += int(hdr.numRecords)
+
+		offset, e = r.Seek(int64(hdr.compressedSize), io.SeekCurrent)
+		if e != nil {
+			break
+		}
+	}
+
+	if e == io.EOF {
+		return f, nil
+	}
+	return nil, e
+}
+
+// NumRecords returns the total number of records in a RecordIO file.
+func (r *Index) NumRecords() int {
+	return r.numRecords
+}
+
+// NumChunks returns the total number of chunks in a RecordIO file.
+func (r *Index) NumChunks() int {
+	return len(r.chunkLens)
+}
+
+// ChunkIndex return the Index of i-th Chunk.
+func (r *Index) ChunkIndex(i int) *Index {
+	idx := &Index{}
+	idx.chunkOffsets = []int64{r.chunkOffsets[i]}
+	idx.chunkLens = []uint32{r.chunkLens[i]}
+	idx.chunkRecords = []int{r.chunkRecords[i]}
+	idx.numRecords = idx.chunkRecords[0]
+	return idx
+}
+
+// Locate returns the index of chunk that contains the given record,
+// and the record index within the chunk.  It returns (-1, -1) if the
+// record is out of range.
+func (r *Index) Locate(recordIndex int) (int, int) {
+	sum := 0
+	for i, l := range r.chunkLens {
+		sum += int(l)
+		if recordIndex < sum {
+			return i, recordIndex - sum + int(l)
+		}
+	}
+	return -1, -1
+}
+
+// Scanner scans records in a specified range within [0, numRecords).
+type Scanner struct {
+	reader          io.ReadSeeker
+	index           *Index
+	start, end, cur int
+	chunkIndex      int
+	chunk           *Chunk
+	err             error
+}
+
+// NewScanner creates a scanner that sequencially reads records in the
+// range [start, start+len).  If start < 0, it scans from the
+// beginning.  If len < 0, it scans till the end of file.
+func NewScanner(r io.ReadSeeker, index *Index, start, len int) *Scanner {
+	if start < 0 {
+		start = 0
+	}
+	if len < 0 || start+len >= index.NumRecords() {
+		len = index.NumRecords() - start
+	}
+
+	return &Scanner{
+		reader:     r,
+		index:      index,
+		start:      start,
+		end:        start + len,
+		cur:        start - 1, // The intial status required by Scan.
+		chunkIndex: -1,
+		chunk:      &Chunk{},
+	}
+}
+
+// Scan moves the cursor forward for one record and loads the chunk
+// containing the record if not yet.
+func (s *Scanner) Scan() bool {
+	s.cur++
+
+	if s.cur >= s.end {
+		s.err = io.EOF
+	} else {
+		if ci, _ := s.index.Locate(s.cur); s.chunkIndex != ci {
+			s.chunkIndex = ci
+			s.chunk, s.err = parseChunk(s.reader, s.index.chunkOffsets[ci])
+		}
+	}
+
+	return s.err == nil
+}
+
+// Record returns the record under the current cursor.
+func (s *Scanner) Record() []byte {
+	_, ri := s.index.Locate(s.cur)
+	return s.chunk.records[ri]
+}
+
+// Error returns the error that stopped Scan.
+func (s *Scanner) Error() error {
+	return s.err
+}
diff --git a/paddle/go/recordio/recordio_internal_test.go b/paddle/go/recordio/recordio_internal_test.go
new file mode 100644
index 0000000000..e0f7dd0407
--- /dev/null
+++ b/paddle/go/recordio/recordio_internal_test.go
@@ -0,0 +1,90 @@
+package recordio
+
+import (
+	"bytes"
+	"testing"
+	"unsafe"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestChunkHead(t *testing.T) {
+	assert := assert.New(t)
+
+	c := &Header{
+		checkSum:       123,
+		compressor:     456,
+		compressedSize: 789,
+	}
+
+	var buf bytes.Buffer
+	_, e := c.write(&buf)
+	assert.Nil(e)
+
+	cc, e := parseHeader(&buf)
+	assert.Nil(e)
+	assert.Equal(c, cc)
+}
+
+func TestWriteAndRead(t *testing.T) {
+	assert := assert.New(t)
+
+	data := []string{
+		"12345",
+		"1234",
+		"12"}
+
+	var buf bytes.Buffer
+	w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
+
+	n, e := w.Write([]byte(data[0])) // not exceed chunk size.
+	assert.Nil(e)
+	assert.Equal(5, n)
+
+	n, e = w.Write([]byte(data[1])) // not exceed chunk size.
+	assert.Nil(e)
+	assert.Equal(4, n)
+
+	n, e = w.Write([]byte(data[2])) // exeeds chunk size, dump and create a new chunk.
+	assert.Nil(e)
+	assert.Equal(n, 2)
+
+	assert.Nil(w.Close()) // flush the second chunk.
+	assert.Nil(w.Writer)
+
+	n, e = w.Write([]byte("anything")) // not effective after close.
+	assert.NotNil(e)
+	assert.Equal(n, 0)
+
+	idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
+	assert.Nil(e)
+	assert.Equal([]uint32{2, 1}, idx.chunkLens)
+	assert.Equal(
+		[]int64{0,
+			int64(4 + // magic number
+				unsafe.Sizeof(Header{}) +
+				5 + // first record
+				4 + // second record
+				2*4)}, // two record legnths
+		idx.chunkOffsets)
+
+	s := NewScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
+	i := 0
+	for s.Scan() {
+		assert.Equal(data[i], string(s.Record()))
+		i++
+	}
+}
+
+func TestWriteEmptyFile(t *testing.T) {
+	assert := assert.New(t)
+
+	var buf bytes.Buffer
+	w := NewWriter(&buf, 10, NoCompression) // use a small maxChunkSize.
+	assert.Nil(w.Close())
+	assert.Equal(0, buf.Len())
+
+	idx, e := LoadIndex(bytes.NewReader(buf.Bytes()))
+	assert.Nil(e)
+	assert.Equal(0, idx.NumRecords())
+}
diff --git a/paddle/go/recordio/recordio_test.go b/paddle/go/recordio/recordio_test.go
new file mode 100644
index 0000000000..8bf1b020ab
--- /dev/null
+++ b/paddle/go/recordio/recordio_test.go
@@ -0,0 +1,81 @@
+package recordio_test
+
+import (
+	"bytes"
+	"reflect"
+	"testing"
+
+	"github.com/PaddlePaddle/Paddle/paddle/go/recordio"
+)
+
+func TestWriteRead(t *testing.T) {
+	const total = 1000
+	var buf bytes.Buffer
+	w := recordio.NewWriter(&buf, 0, -1)
+	for i := 0; i < total; i++ {
+		_, err := w.Write(make([]byte, i))
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	w.Close()
+
+	idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if idx.NumRecords() != total {
+		t.Fatal("num record does not match:", idx.NumRecords(), total)
+	}
+
+	s := recordio.NewScanner(bytes.NewReader(buf.Bytes()), idx, -1, -1)
+	i := 0
+	for s.Scan() {
+		if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
+			t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
+		}
+		i++
+	}
+
+	if i != total {
+		t.Fatal("total count not match:", i, total)
+	}
+}
+
+func TestChunkIndex(t *testing.T) {
+	const total = 1000
+	var buf bytes.Buffer
+	w := recordio.NewWriter(&buf, 0, -1)
+	for i := 0; i < total; i++ {
+		_, err := w.Write(make([]byte, i))
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	w.Close()
+
+	idx, err := recordio.LoadIndex(bytes.NewReader(buf.Bytes()))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if idx.NumChunks() != total {
+		t.Fatal("unexpected chunk num:", idx.NumChunks(), total)
+	}
+
+	for i := 0; i < total; i++ {
+		newIdx := idx.ChunkIndex(i)
+		s := recordio.NewScanner(bytes.NewReader(buf.Bytes()), newIdx, -1, -1)
+		j := 0
+		for s.Scan() {
+			if !reflect.DeepEqual(s.Record(), make([]byte, i)) {
+				t.Fatal("not equal:", len(s.Record()), len(make([]byte, i)))
+			}
+			j++
+		}
+		if j != 1 {
+			t.Fatal("unexpected record per chunk:", j)
+		}
+	}
+}
diff --git a/paddle/go/recordio/writer.go b/paddle/go/recordio/writer.go
new file mode 100644
index 0000000000..39112e518f
--- /dev/null
+++ b/paddle/go/recordio/writer.go
@@ -0,0 +1,60 @@
+package recordio
+
+import (
+	"fmt"
+	"io"
+)
+
+const (
+	defaultMaxChunkSize = 32 * 1024 * 1024
+)
+
+// Writer creates a RecordIO file.
+type Writer struct {
+	io.Writer    // Set to nil to mark a closed writer.
+	chunk        *Chunk
+	maxChunkSize int // total records size, excluding metadata, before compression.
+	compressor   int
+}
+
+// NewWriter creates a RecordIO file writer.  Each chunk is compressed
+// using the deflate algorithm given compression level.  Note that
+// level 0 means no compression and -1 means default compression.
+func NewWriter(w io.Writer, maxChunkSize, compressor int) *Writer {
+	if maxChunkSize < 0 {
+		maxChunkSize = defaultMaxChunkSize
+	}
+
+	if compressor < 0 {
+		compressor = defaultCompressor
+	}
+
+	return &Writer{
+		Writer:       w,
+		chunk:        &Chunk{},
+		maxChunkSize: maxChunkSize,
+		compressor:   compressor}
+}
+
+// Writes a record.  It returns an error if Close has been called.
+func (w *Writer) Write(record []byte) (int, error) {
+	if w.Writer == nil {
+		return 0, fmt.Errorf("Cannot write since writer had been closed")
+	}
+
+	if w.chunk.numBytes+len(record) > w.maxChunkSize {
+		if e := w.chunk.dump(w.Writer, w.compressor); e != nil {
+			return 0, e
+		}
+	}
+
+	w.chunk.add(record)
+	return len(record), nil
+}
+
+// Close flushes the current chunk and makes the writer invalid.
+func (w *Writer) Close() error {
+	e := w.chunk.dump(w.Writer, w.compressor)
+	w.Writer = nil
+	return e
+}

From 398bab41b84b39f0f0341b79b40314d932bf03f9 Mon Sep 17 00:00:00 2001
From: Helin Wang <helinwang@baidu.com>
Date: Wed, 24 May 2017 19:17:18 -0700
Subject: [PATCH 55/56] fix hardcoded cmake for cclient/test

---
 paddle/go/cclient/CMakeLists.txt      | 7 ++-----
 paddle/go/cclient/test/CMakeLists.txt | 5 +++--
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/paddle/go/cclient/CMakeLists.txt b/paddle/go/cclient/CMakeLists.txt
index 29a2089fb1..c85ff3db09 100644
--- a/paddle/go/cclient/CMakeLists.txt
+++ b/paddle/go/cclient/CMakeLists.txt
@@ -2,12 +2,9 @@ cmake_minimum_required(VERSION 3.0)
 
 if(GTEST_INCLUDE_DIR AND GTEST_LIBRARIES)
   message("-- Found gtest (include: ${GTEST_INCLUDE_DIR}, library: ${GTEST_LIBRARIES})")
-else()	
-  # find #include <majel/xx.h>
-  get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
-  include_directories(${PARENT_DIR})
-
+else()
   # find cmake directory modules
+  get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
   get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
   get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
 
diff --git a/paddle/go/cclient/test/CMakeLists.txt b/paddle/go/cclient/test/CMakeLists.txt
index c899bd275d..185e7ec80f 100644
--- a/paddle/go/cclient/test/CMakeLists.txt
+++ b/paddle/go/cclient/test/CMakeLists.txt
@@ -1,8 +1,9 @@
 cmake_minimum_required(VERSION 3.0)
 
-include_directories(/env/gopath/src/github.com/PaddlePaddle/Paddle/paddle/go/cclient/build/)
+include_directories(${CMAKE_BINARY_DIR})
 
 add_executable(main main.c)
 add_dependencies(main client)
 set (CMAKE_EXE_LINKER_FLAGS "-pthread")
-target_link_libraries(main /env/gopath/src/github.com/PaddlePaddle/Paddle/paddle/go/cclient/build/libclient.a) # ${GTEST_LIBRARIES})
+message(${CMAKE_BINARY_DIR})
+target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)

From 25ca5a31c718ab37c131af3e0b6f7ffaf00c23f5 Mon Sep 17 00:00:00 2001
From: Helin Wang <helinwang@baidu.com>
Date: Wed, 24 May 2017 19:19:28 -0700
Subject: [PATCH 56/56] remove debug message

---
 paddle/go/cclient/test/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddle/go/cclient/test/CMakeLists.txt b/paddle/go/cclient/test/CMakeLists.txt
index 185e7ec80f..de7ef6a47a 100644
--- a/paddle/go/cclient/test/CMakeLists.txt
+++ b/paddle/go/cclient/test/CMakeLists.txt
@@ -5,5 +5,4 @@ include_directories(${CMAKE_BINARY_DIR})
 add_executable(main main.c)
 add_dependencies(main client)
 set (CMAKE_EXE_LINKER_FLAGS "-pthread")
-message(${CMAKE_BINARY_DIR})
 target_link_libraries(main ${CMAKE_BINARY_DIR}/libclient.a)