merge develop, fix conflict

9 years ago · 96615fe329
parent 496d64ebdb 4f1bf30d0c
commit 96615fe329
514 changed files with 13472 additions and 9077 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -7,18 +7,14 @@
    hooks:
    -   id: yapf
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    sha: 4ef03c4223ad322c7adaa6c6c0efb26b57df3b71
+    sha: 7539d8bd1a00a3c1bfd34cdb606d3a6372e83469
    hooks:
    -   id: check-added-large-files
    -   id: check-merge-conflict
    -   id: check-symlinks
    -   id: detect-private-key
    -   id: end-of-file-fixer
-# TODO(yuyang): trailing whitespace has some bugs on markdown 
-# files now, please not add it to pre-commit hook now
-#    -   id: trailing-whitespace
-#
-# TODO(yuyang): debug-statements not fit for Paddle, because
-# not all of our python code is runnable. Some are used for 
-# documenation
-#    -   id: debug-statements
+-   repo: https://github.com/PaddlePaddle/clang-format-pre-commit-hook.git
+    sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
+    hooks:
+    -   id: clang-formater
--- a/.travis.yml
+++ b/.travis.yml
@ -42,7 +42,7 @@ addons:
 before_install:
  - |
    if [ ${JOB} == "BUILD_AND_TEST" ]; then
-      if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)'
+      if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
      then
        echo "Only markdown docs were updated, stopping build process."
        exit
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8)

 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
-set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b3)
+set(PADDLE_MINOR_VERSION 9)
+set(PADDLE_PATCH_VERSION 0a0)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})

 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
@ -36,6 +36,7 @@ option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF)
 option(WITH_GLOG "Compile PaddlePaddle use glog, otherwise use a log implement internally" ${LIBGLOG_FOUND})
 option(WITH_GFLAGS "Compile PaddlePaddle use gflags, otherwise use a flag implement internally" ${GFLAGS_FOUND})
 option(WITH_TIMER "Compile PaddlePaddle use timer" OFF)
+option(WITH_PROFILER "Compile PaddlePaddle use gpu profiler" OFF)
 option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
 option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
 option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
@ -115,7 +116,6 @@ else()
    endif(WITH_AVX)

    if(WITH_DSO)
-        set(CUDA_LIBRARIES "")
        add_definitions(-DPADDLE_USE_DSO)
    endif(WITH_DSO)

@ -135,6 +135,10 @@ if(NOT WITH_TIMER)
    add_definitions(-DPADDLE_DISABLE_TIMER)
 endif(NOT WITH_TIMER)

+if(NOT WITH_PROFILER)
+    add_definitions(-DPADDLE_DISABLE_PROFILER)
+endif(NOT WITH_PROFILER)
+
 if(WITH_AVX)
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
--- a/README.md
+++ b/README.md
@ -1,10 +1,13 @@
 # PaddlePaddle


-[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
-[![Coverage Status](https://coveralls.io/repos/github/baidu/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/baidu/Paddle?branch=develop)
-[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
+[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/cn/index.html)
+[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+

 Welcome to the PaddlePaddle GitHub.

@ -14,7 +17,7 @@ developed by Baidu scientists and engineers for the purpose of applying deep
 learning to many products at Baidu.

 Our vision is to enable deep learning for everyone via PaddlePaddle.
-Please refer to our [release announcement](https://github.com/baidu/Paddle/releases) to track the latest feature of PaddlePaddle. 
+Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle.

 ## Features

@ -26,15 +29,15 @@ Please refer to our [release announcement](https://github.com/baidu/Paddle/relea
    connection.

 -  **Efficiency**
-  
+
    In order to unleash the power of heterogeneous computing resource,
    optimization occurs at different levels of PaddlePaddle, including
    computing, memory, architecture and communication. The following are some
    examples:

      - Optimized math operations through SSE/AVX intrinsics, BLAS libraries
-      (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels. 
-      - Highly optimized recurrent networks which can handle **variable-length** 
+      (e.g. MKL, ATLAS, cuBLAS) or customized CPU/GPU kernels.
+      - Highly optimized recurrent networks which can handle **variable-length**
      sequence without padding.
      - Optimized local and distributed training for models with high dimensional
      sparse data.
@ -57,41 +60,39 @@ Please refer to our [release announcement](https://github.com/baidu/Paddle/relea

 ## Installation
 Check out the [Install Guide](http://paddlepaddle.org/doc/build/) to install from
-pre-built packages (**docker image**, **deb package**) or 
+pre-built packages (**docker image**, **deb package**) or
 directly build on **Linux** and **Mac OS X** from the source code.
- 
+
 ## Documentation
 Both [English Docs](http://paddlepaddle.org/doc/) and [Chinese Docs](http://paddlepaddle.org/doc_cn/) are provided for our users and developers.

 - [Quick Start](http://paddlepaddle.org/doc/demo/quick_start/index_en) <br>
   You can follow the quick start tutorial to learn how use PaddlePaddle
   step-by-step.
-    
+
 - [Example and Demo](http://paddlepaddle.org/doc/demo/) <br>
   We provide five demos, including: image classification, sentiment analysis,
-   sequence to sequence model, recommendation, semantic role labeling. 
-   
+   sequence to sequence model, recommendation, semantic role labeling.
+
 - [Distributed Training](http://paddlepaddle.org/doc/cluster) <br>
  This system supports training deep learning models on multiple machines
  with data parallelism.
-   
+
 - [Python API](http://paddlepaddle.org/doc/ui/) <br>
   PaddlePaddle supports using either Python interface or C++ to build your
   system. We also use SWIG to wrap C++ source code to create a user friendly
   interface for Python. You can also use SWIG to create interface for your
   favorite programming language.
- 
+
 - [How to Contribute](http://paddlepaddle.org/doc/build/contribute_to_paddle.html) <br>
   We sincerely appreciate your interest and contributions. If you would like to
-   contribute, please read the contribution guide.   
+   contribute, please read the contribution guide.

 - [Source Code Documents](http://paddlepaddle.org/doc/source/) <br>

 ## Ask Questions
-Please join the [**gitter chat**](https://gitter.im/PaddlePaddle/Deep_Learning) or send email to
-**paddle-dev@baidu.com** to ask questions and talk about methods and models.
-Framework development discussions and
-bug reports are collected on [Issues](https://github.com/baidu/paddle/issues).
+
+You are welcome to submit questions and bug reports as [Github Issues](https://github.com/PaddlePaddle/Paddle/issues).

 ## Copyright and License
 PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
--- a/demo/image_classification/train.sh
+++ b/demo/image_classification/train.sh
@ -24,7 +24,7 @@ paddle train \
 --test_all_data_in_one_period=1 \
 --use_gpu=1 \
 --trainer_count=1 \
--num_passes=200 \
+--num_passes=300 \
 --save_dir=$output \
 2>&1 | tee $log

--- a/demo/model_zoo/embedding/pre_DictAndModel.sh
+++ b/demo/model_zoo/embedding/pre_DictAndModel.sh
@ -18,7 +18,5 @@ set -x
 # download the dictionary and pretrained model 
 for file in baidu.dict model_32.emb model_64.emb model_128.emb model_256.emb
 do 
-  # following is the google drive address
-  # you can also directly download from https://pan.baidu.com/s/1o8q577s
-  wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/$file --no-check-certificate
+  wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/$file
 done
--- a/demo/model_zoo/resnet/get_model.sh
+++ b/demo/model_zoo/resnet/get_model.sh
@ -24,9 +24,7 @@ echo "Downloading ResNet models..."

 for file in resnet_50.tar.gz resnet_101.tar.gz resnet_152.tar.gz mean_meta_224.tar.gz 
 do 
-  # following is the google drive address
-  # you can also directly download from https://pan.baidu.com/s/1o8q577s
-  wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/imagenet/$file --no-check-certificate
+  wget http://paddlepaddle.bj.bcebos.com/model_zoo/imagenet/$file
  tar -xvf $file 
  rm $file
 done
--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/preprocess.sh
@ -23,7 +23,7 @@ set -e
 export LC_ALL=C
 UNAME_STR=`uname`

-if [[ ${UNAME_STR} == 'Linux' ]]; then
+if [ ${UNAME_STR} == 'Linux' ]; then
  SHUF_PROG='shuf'
 else
  SHUF_PROG='gshuf'
--- a/demo/semantic_role_labeling/data/extract_dict_feature.py
+++ b/demo/semantic_role_labeling/data/extract_dict_feature.py
@ -17,24 +17,15 @@ import os
 from optparse import OptionParser


-def extract_dict_features(pair_file, feature_file, src_dict_file,
-                          tgt_dict_file):
-    src_dict = set()
-    tgt_dict = set()
-
-    with open(pair_file) as fin, open(feature_file, 'w') as feature_out, open(
-            src_dict_file, 'w') as src_dict_out, open(tgt_dict_file,
-                                                      'w') as tgt_dict_out:
+def extract_dict_features(pair_file, feature_file):
+
+    with open(pair_file) as fin, open(feature_file, 'w') as feature_out:
        for line in fin:
-            sentence, labels = line.strip().split('\t')
+            sentence, predicate, labels = line.strip().split('\t')
            sentence_list = sentence.split()
            labels_list = labels.split()

-            src_dict.update(sentence_list)
-            tgt_dict.update(labels_list)
-
            verb_index = labels_list.index('B-V')
-            verb_feature = sentence_list[verb_index]

            mark = [0] * len(labels_list)
            if verb_index > 0:
@ -42,47 +33,50 @@ def extract_dict_features(pair_file, feature_file, src_dict_file,
                ctx_n1 = sentence_list[verb_index - 1]
            else:
                ctx_n1 = 'bos'
-            ctx_n1_feature = ctx_n1
+            
+            if verb_index > 1:
+                mark[verb_index - 2] = 1
+                ctx_n2 = sentence_list[verb_index - 2]
+            else:
+                ctx_n2 = 'bos'

            mark[verb_index] = 1
-            ctx_0_feature = sentence_list[verb_index]
+            ctx_0 = sentence_list[verb_index]

            if verb_index < len(labels_list) - 2:
                mark[verb_index + 1] = 1
                ctx_p1 = sentence_list[verb_index + 1]
            else:
                ctx_p1 = 'eos'
-            ctx_p1_feature = ctx_p1
+            
+            if verb_index < len(labels_list) - 3:
+                mark[verb_index + 2] = 1
+                ctx_p2 = sentence_list[verb_index + 2]
+            else:
+                ctx_p2 = 'eos'
+

            feature_str  = sentence + '\t' \
-                           + verb_feature + '\t' \
-                           + ctx_n1_feature + '\t' \
-                           + ctx_0_feature + '\t' \
-                           + ctx_p1_feature + '\t' \
+                           + predicate + '\t' \
+                           + ctx_n2 + '\t' \
+                           + ctx_n1 + '\t' \
+                           + ctx_0 + '\t' \
+                           + ctx_p1 + '\t' \
+                           + ctx_p2 + '\t' \
                           + ' '.join([str(i) for i in mark]) + '\t' \
                           + labels

            feature_out.write(feature_str + '\n')

-        src_dict_out.write('<unk>\n')
-        src_dict_out.write('\n'.join(list(src_dict)))
-
-        tgt_dict_out.write('\n'.join(list(tgt_dict)))


 if __name__ == '__main__':

-    usage = '-p pair_file -f feature_file -s source dictionary -t target dictionary '
+    usage = '-p pair_file -f feature_file'
    parser = OptionParser(usage)
    parser.add_option('-p', dest='pair_file', help='the pair file')
-    parser.add_option(
-        '-f', dest='feature_file', help='the file to store feature')
-    parser.add_option(
-        '-s', dest='src_dict', help='the file to store source dictionary')
-    parser.add_option(
-        '-t', dest='tgt_dict', help='the file to store target dictionary')
+    parser.add_option('-f', dest='feature_file', help='the feature file')

    (options, args) = parser.parse_args()

-    extract_dict_features(options.pair_file, options.feature_file,
-                          options.src_dict, options.tgt_dict)
+    extract_dict_features(options.pair_file, options.feature_file)
--- a/demo/semantic_role_labeling/data/extract_pairs.py
+++ b/demo/semantic_role_labeling/data/extract_pairs.py
@ -51,7 +51,7 @@ def read_sentences(words_file):
        for line in fin:
            line = line.strip()
            if line == '':
-                sentences.append(s.lower())
+                sentences.append(s)
                s = ''
            else:
                s += line + ' '
@ -64,6 +64,11 @@ def transform_labels(sentences, labels):
        if len(labels[i]) == 1:
            continue
        else:
+            verb_list = []
+            for x in labels[i][0]:
+                if x !='-':
+                   verb_list.append(x)
+
            for j in xrange(1, len(labels[i])):
                label_list = labels[i][j]
                current_tag = 'O'
@ -88,8 +93,7 @@ def transform_labels(sentences, labels):
                        is_in_bracket = True
                    else:
                        print 'error:', ll
-
-                sen_lab_pair.append((sentences[i], label_seq))
+                sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
    return sen_lab_pair


@ -97,9 +101,9 @@ def write_file(sen_lab_pair, output_file):
    with open(output_file, 'w') as fout:
        for x in sen_lab_pair:
            sentence = x[0]
-            label_seq = ' '.join(x[1])
-            assert len(sentence.split()) == len(x[1])
-            fout.write(sentence + '\t' + label_seq + '\n')
+            label_seq = ' '.join(x[2])
+            assert len(sentence.split()) == len(x[2])
+            fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')


 if __name__ == '__main__':
--- a/demo/semantic_role_labeling/data/get_data.sh
+++ b/demo/semantic_role_labeling/data/get_data.sh
@ -14,6 +14,10 @@
 # limitations under the License.
 set -e
 wget http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/verbDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/targetDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/wordDict.txt --no-check-certificate
+wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/semantic_role_labeling/emb --no-check-certificate
 tar -xzvf conll05st-tests.tar.gz
 rm conll05st-tests.tar.gz
 cp ./conll05st-release/test.wsj/words/test.wsj.words.gz  .
@ -22,4 +26,4 @@ gunzip test.wsj.words.gz
 gunzip test.wsj.props.gz

 python extract_pairs.py  -w test.wsj.words -p test.wsj.props -o test.wsj.seq_pair
-python extract_dict_feature.py -p test.wsj.seq_pair -f feature  -s src.dict  -t tgt.dict
+python extract_dict_feature.py -p test.wsj.seq_pair -f feature 
--- a/demo/semantic_role_labeling/dataprovider.py
+++ b/demo/semantic_role_labeling/dataprovider.py
@ -17,41 +17,52 @@ from paddle.trainer.PyDataProvider2 import *
 UNK_IDX = 0


-def hook(settings, word_dict, label_dict, **kwargs):
+def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
    settings.word_dict = word_dict
    settings.label_dict = label_dict
+    settings.predicate_dict = predicate_dict
+   
    #all inputs are integral and sequential type
    settings.slots = [
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
        integer_value_sequence(len(word_dict)),
-        integer_value_sequence(len(word_dict)), integer_value_sequence(2),
+        integer_value_sequence(len(word_dict)),
+        integer_value_sequence(len(word_dict)), 
+        integer_value_sequence(len(predicate_dict)),
+        integer_value_sequence(2),
        integer_value_sequence(len(label_dict))
    ]


-@provider(init_hook=hook)
-def process(obj, file_name):
+def get_batch_size(yeild_data):
+    return len(yeild_data[0])
+    
+
+@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size, 
+          can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_name):
    with open(file_name, 'r') as fdata:
        for line in fdata:
-            sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = \
+            sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,  mark, label = \
                line.strip().split('\t')
-
+           
            words = sentence.split()
            sen_len = len(words)
-            word_slot = [obj.word_dict.get(w, UNK_IDX) for w in words]
+            word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]

-            predicate_slot = [obj.word_dict.get(predicate, UNK_IDX)] * sen_len
-            ctx_n1_slot = [obj.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
-            ctx_0_slot = [obj.word_dict.get(ctx_0, UNK_IDX)] * sen_len
-            ctx_p1_slot = [obj.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+            predicate_slot = [settings.predicate_dict.get(predicate)] * sen_len
+            ctx_n2_slot = [settings.word_dict.get(ctx_n2, UNK_IDX)] * sen_len
+            ctx_n1_slot = [settings.word_dict.get(ctx_n1, UNK_IDX)] * sen_len
+            ctx_0_slot = [settings.word_dict.get(ctx_0, UNK_IDX)] * sen_len
+            ctx_p1_slot = [settings.word_dict.get(ctx_p1, UNK_IDX)] * sen_len
+            ctx_p2_slot = [settings.word_dict.get(ctx_p2, UNK_IDX)] * sen_len

            marks = mark.split()
            mark_slot = [int(w) for w in marks]

            label_list = label.split()
-            label_slot = [obj.label_dict.get(w) for w in label_list]
-
-            yield word_slot, predicate_slot, ctx_n1_slot, \
-                  ctx_0_slot, ctx_p1_slot, mark_slot, label_slot
+            label_slot = [settings.label_dict.get(w) for w in label_list]
+            yield word_slot, ctx_n2_slot, ctx_n1_slot, \
+                  ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot, label_slot
--- a/demo/semantic_role_labeling/db_lstm.py
+++ b/demo/semantic_role_labeling/db_lstm.py
@ -18,8 +18,9 @@ import sys
 from paddle.trainer_config_helpers import *

 #file paths
-word_dict_file = './data/src.dict'
-label_dict_file = './data/tgt.dict'
+word_dict_file = './data/wordDict.txt'
+label_dict_file = './data/targetDict.txt'
+predicate_file= './data/verbDict.txt'
 train_list_file = './data/train.list'
 test_list_file = './data/test.list'

@ -30,8 +31,10 @@ if not is_predict:
    #load dictionaries
    word_dict = dict()
    label_dict = dict()
+    predicate_dict = dict()
    with open(word_dict_file, 'r') as f_word, \
-         open(label_dict_file, 'r') as f_label:
+         open(label_dict_file, 'r') as f_label, \
+         open(predicate_file, 'r') as f_pre:
        for i, line in enumerate(f_word):
            w = line.strip()
            word_dict[w] = i
@ -40,6 +43,11 @@ if not is_predict:
            w = line.strip()
            label_dict[w] = i

+        for i, line in enumerate(f_pre):
+            w = line.strip()
+            predicate_dict[w] = i
+
+
    if is_test:
        train_list_file = None

@ -50,91 +58,157 @@ if not is_predict:
        module='dataprovider',
        obj='process',
        args={'word_dict': word_dict,
-              'label_dict': label_dict})
+              'label_dict': label_dict,
+              'predicate_dict': predicate_dict })

    word_dict_len = len(word_dict)
    label_dict_len = len(label_dict)
+    pred_len = len(predicate_dict)

 else:
    word_dict_len = get_config_arg('dict_len', int)
    label_dict_len = get_config_arg('label_len', int)
+    pred_len = get_config_arg('pred_len', int)

+############################## Hyper-parameters ##################################
 mark_dict_len = 2
 word_dim = 32
 mark_dim = 5
-hidden_dim = 128
+hidden_dim = 512
 depth = 8
-emb_lr = 1e-2
-fc_lr = 1e-2
-lstm_lr = 2e-2
+
+
+
+########################### Optimizer #######################################
+

 settings(
    batch_size=150,
-    learning_method=AdamOptimizer(),
-    learning_rate=1e-3,
+    learning_method=MomentumOptimizer(momentum=0),
+    learning_rate=2e-2,
    regularization=L2Regularization(8e-4),
-    gradient_clipping_threshold=25)
+    is_async=False,
+    model_average=ModelAverage(average_window=0.5,
+                               max_average_window=10000),
+                               
+)

-#6 features
+
+
+
+####################################### network ##############################
+#8 features and 1 target
 word = data_layer(name='word_data', size=word_dict_len)
-predicate = data_layer(name='verb_data', size=word_dict_len)
+predicate = data_layer(name='verb_data', size=pred_len)
+
+ctx_n2 = data_layer(name='ctx_n2_data', size=word_dict_len)
 ctx_n1 = data_layer(name='ctx_n1_data', size=word_dict_len)
 ctx_0 = data_layer(name='ctx_0_data', size=word_dict_len)
 ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
+ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
 mark = data_layer(name='mark_data', size=mark_dict_len)

+
 if not is_predict:
    target = data_layer(name='target', size=label_dict_len)

-ptt = ParameterAttribute(name='src_emb', learning_rate=emb_lr)
-layer_attr = ExtraLayerAttribute(drop_rate=0.5)
-fc_para_attr = ParameterAttribute(learning_rate=fc_lr)
-lstm_para_attr = ParameterAttribute(initial_std=0., learning_rate=lstm_lr)
-para_attr = [fc_para_attr, lstm_para_attr]

-word_embedding = embedding_layer(size=word_dim, input=word, param_attr=ptt)
-predicate_embedding = embedding_layer(
-    size=word_dim, input=predicate, param_attr=ptt)
-ctx_n1_embedding = embedding_layer(size=word_dim, input=ctx_n1, param_attr=ptt)
-ctx_0_embedding = embedding_layer(size=word_dim, input=ctx_0, param_attr=ptt)
-ctx_p1_embedding = embedding_layer(size=word_dim, input=ctx_p1, param_attr=ptt)
-mark_embedding = embedding_layer(size=mark_dim, input=mark)
+default_std=1/math.sqrt(hidden_dim)/3.0
+
+emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
+std_0 = ParameterAttribute(initial_std=0.)
+std_default = ParameterAttribute(initial_std=default_std) 
+
+predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
+mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
+
+word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
+emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
+emb_layers.append(predicate_embedding)
+emb_layers.append(mark_embedding)

 hidden_0 = mixed_layer(
+    name='hidden0',
    size=hidden_dim,
-    input=[
-        full_matrix_projection(input=word_embedding),
-        full_matrix_projection(input=predicate_embedding),
-        full_matrix_projection(input=ctx_n1_embedding),
-        full_matrix_projection(input=ctx_0_embedding),
-        full_matrix_projection(input=ctx_p1_embedding),
-        full_matrix_projection(input=mark_embedding),
-    ])
+    bias_attr=std_default,
+    input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
+

-lstm_0 = lstmemory(input=hidden_0, layer_attr=layer_attr)
+mix_hidden_lr = 1e-3
+lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
+hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
+
+lstm_0 = lstmemory(name='lstm0',
+                   input=hidden_0, 
+                   act=ReluActivation(),
+                   gate_act=SigmoidActivation(),
+                   state_act=SigmoidActivation(),
+                   bias_attr=std_0,
+                   param_attr=lstm_para_attr)

 #stack L-LSTM and R-LSTM with direct edges
 input_tmp = [hidden_0, lstm_0]

+
 for i in range(1, depth):

-    fc = fc_layer(input=input_tmp, size=hidden_dim, param_attr=para_attr)
+    mix_hidden = mixed_layer(name='hidden'+str(i),
+                             size=hidden_dim, 
+                             bias_attr=std_default,
+                             input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+                                    full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+                                   ]
+                             )
+
+    lstm = lstmemory(name='lstm'+str(i),
+                     input=mix_hidden,
+                     act=ReluActivation(),
+                     gate_act=SigmoidActivation(),
+                     state_act=SigmoidActivation(),
+                     reverse=((i % 2)==1),
+                     bias_attr=std_0,
+                     param_attr=lstm_para_attr)
+
+    input_tmp = [mix_hidden, lstm]
+
+feature_out = mixed_layer(name='output',
+                          size=label_dict_len,
+                          bias_attr=std_default, 
+                          input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
+                                 full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
+                                ],
+                          )

-    lstm = lstmemory(
-        input=fc,
-        act=ReluActivation(),
-        reverse=(i % 2) == 1,
-        layer_attr=layer_attr)
-    input_tmp = [fc, lstm]

-prob = fc_layer(
-    input=input_tmp,
-    size=label_dict_len,
-    act=SoftmaxActivation(),
-    param_attr=para_attr)

 if not is_predict:
-    cls = classification_cost(input=prob, label=target)
-    outputs(cls)
+    crf_l = crf_layer( name = 'crf',
+                       size = label_dict_len,
+                       input = feature_out, 
+                       label = target,
+                       param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
+
+                      )
+
+    
+    crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+                                   size = label_dict_len,
+                                   input = feature_out,
+                                   label = target,
+                                   param_attr=ParameterAttribute(name='crfw')
+                                       )
+
+
+    eval = sum_evaluator(input=crf_dec_l)
+        
+    outputs(crf_l)
+
 else:
-    outputs(prob)
+    crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
+                                   size = label_dict_len,
+                                   input = feature_out,
+                                   param_attr=ParameterAttribute(name='crfw')
+                                       )
+
+    outputs(crf_dec_l)
+
--- a/demo/semantic_role_labeling/predict.py
+++ b/demo/semantic_role_labeling/predict.py
@ -26,7 +26,7 @@ UNK_IDX = 0


 class Prediction():
-    def __init__(self, train_conf, dict_file, model_dir, label_file):
+    def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
        """
        train_conf: trainer configure.
        dict_file: word dictionary file name.
@ -35,26 +35,37 @@ class Prediction():

        self.dict = {}
        self.labels = {}
+        self.predicate_dict={}
        self.labels_reverse = {}
-        self.load_dict_label(dict_file, label_file)
+        self.load_dict_label(dict_file, label_file, predicate_dict_file)

        len_dict = len(self.dict)
        len_label = len(self.labels)
-
-        conf = parse_config(train_conf, 'dict_len=' + str(len_dict) +
-                            ',label_len=' + str(len_label) + ',is_predict=True')
+        len_pred = len(self.predicate_dict)
+
+        conf = parse_config(
+            train_conf,
+            'dict_len=' + str(len_dict) + 
+            ',label_len=' + str(len_label) +
+            ',pred_len=' + str(len_pred) +
+            ',is_predict=True')
        self.network = swig_paddle.GradientMachine.createFromConfigProto(
            conf.model_config)
        self.network.loadParameters(model_dir)

        slots = [
-            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict), integer_value_sequence(len_dict),
-            integer_value_sequence(len_dict), integer_value_sequence(2)
-        ]
+            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict),
+            integer_value_sequence(len_dict), 
+            integer_value_sequence(len_pred),
+            integer_value_sequence(2)
+            ]
        self.converter = DataProviderConverter(slots)

-    def load_dict_label(self, dict_file, label_file):
+    def load_dict_label(self, dict_file, label_file, predicate_dict_file):
        """
        Load dictionary from self.dict_file.
        """
@ -65,39 +76,42 @@ class Prediction():
            self.labels[line.strip()] = line_count
            self.labels_reverse[line_count] = line.strip()

+        for line_count, line in enumerate(open(predicate_dict_file, 'r')):
+            self.predicate_dict[line.strip()] = line_count
    def get_data(self, data_file):
        """
        Get input data of paddle format.
        """
        with open(data_file, 'r') as fdata:
            for line in fdata:
-                sentence, predicate, ctx_n1, ctx_0, ctx_p1, mark, label = line.strip(
+                sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = line.strip(
                ).split('\t')
                words = sentence.split()
                sen_len = len(words)
-
+                 
                word_slot = [self.dict.get(w, UNK_IDX) for w in words]
-                predicate_slot = [self.dict.get(predicate, UNK_IDX)] * sen_len
+                predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
+                ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
                ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
                ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
                ctx_p1_slot = [self.dict.get(ctx_p1, UNK_IDX)] * sen_len
+                ctx_p2_slot = [self.dict.get(ctx_p2, UNK_IDX)] * sen_len

                marks = mark.split()
                mark_slot = [int(w) for w in marks]
+                
+                yield word_slot, ctx_n2_slot, ctx_n1_slot, \
+                      ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot

-                yield word_slot, predicate_slot, ctx_n1_slot, \
-                      ctx_0_slot, ctx_p1_slot, mark_slot
-
-    def predict(self, data_file):
+    def predict(self, data_file, output_file):
        """
        data_file: file name of input data.
        """
        input = self.converter(self.get_data(data_file))
        output = self.network.forwardTest(input)
-        prob = output[0]["value"]
-        lab = list(np.argsort(-prob)[:, 0])
+        lab = output[0]["id"].tolist()

-        with open(data_file, 'r') as fin, open('predict.res', 'w') as fout:
+        with open(data_file, 'r') as fin, open(output_file, 'w') as fout:
            index = 0
            for line in fin:
                sen = line.split('\t')[0]
@ -109,8 +123,8 @@ class Prediction():


 def option_parser():
-    usage = ("python predict.py -c config -w model_dir "
-             "-d word dictionary -l label_file -i input_file")
+    usage = ("python predict.py -c config -w model_dir " 
+             "-d word dictionary -l label_file -i input_file  -p pred_dict_file")
    parser = OptionParser(usage="usage: %s [options]" % usage)
    parser.add_option(
        "-c",
@ -131,6 +145,13 @@ def option_parser():
        dest="label_file",
        default=None,
        help="label file")
+    parser.add_option(
+        "-p",
+        "--predict_dict_file",
+        action="store",
+        dest="predict_dict_file",
+        default=None,
+        help="predict_dict_file")
    parser.add_option(
        "-i",
        "--data",
@ -144,6 +165,14 @@ def option_parser():
        dest="model_path",
        default=None,
        help="model path")
+
+    parser.add_option(
+        "-o",
+        "--output_file",
+        action="store",
+        dest="output_file",
+        default=None,
+        help="output file")
    return parser.parse_args()


@ -154,10 +183,12 @@ def main():
    dict_file = options.dict_file
    model_path = options.model_path
    label_file = options.label_file
+    predict_dict_file = options.predict_dict_file
+    output_file = options.output_file

    swig_paddle.initPaddle("--use_gpu=0")
-    predict = Prediction(train_conf, dict_file, model_path, label_file)
-    predict.predict(data_file)
+    predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
+    predict.predict(data_file,output_file)


 if __name__ == '__main__':
--- a/demo/semantic_role_labeling/predict.sh
+++ b/demo/semantic_role_labeling/predict.sh
@ -18,7 +18,7 @@ set -e
 function get_best_pass() {
  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
  sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' | \
-  sort | head -n 1
+  sort -n | head -n 1
 }   

 log=train.log
@ -26,15 +26,18 @@ LOG=`get_best_pass $log`
 LOG=(${LOG})
 best_model_path="output/pass-${LOG[1]}"

-
 config_file=db_lstm.py
-dict_file=./data/src.dict
-label_file=./data/tgt.dict 
+dict_file=./data/wordDict.txt
+label_file=./data/targetDict.txt 
+predicate_dict_file=./data/verbDict.txt
 input_file=./data/feature
+output_file=predict.res
 
 python predict.py \
     -c $config_file \
     -w $best_model_path \
     -l $label_file \
+     -p $predicate_dict_file  \
     -d $dict_file \
-     -i $input_file
+     -i $input_file \
+     -o $output_file
--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
@ -18,7 +18,7 @@ set -e
 function get_best_pass() {
  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
  sed  -r 'N;s/Test.* cost=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
-  sort | head -n 1
+  sort -n | head -n 1
 }

 log=train.log
@ -36,4 +36,5 @@ paddle train \
  --job=test \
  --use_gpu=false \
  --config_args=is_test=1 \
+  --test_all_data_in_one_period=1 \
 2>&1 | tee 'test.log'
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
@ -16,11 +16,14 @@
 set -e
 paddle train \
  --config=./db_lstm.py \
+  --use_gpu=0 \
+  --log_period=5000 \
+  --trainer_count=1 \
+  --show_parameter_stats_period=5000 \
  --save_dir=./output \
-  --trainer_count=4 \
-  --log_period=10 \
-  --num_passes=500 \
-  --use_gpu=false \
-  --show_parameter_stats_period=10 \
+  --num_passes=10000 \
+  --average_test_period=10000000 \
+  --init_model_path=./data \
+  --load_missing_parameter_strategy=rand \
  --test_all_data_in_one_period=1 \
-2>&1 | tee 'train.log'
+  2>&1 | tee 'train.log'
--- a/demo/sentiment/test.sh
+++ b/demo/sentiment/test.sh
@ -17,7 +17,7 @@ set -e
 function get_best_pass() {
  cat $1  | grep -Pzo 'Test .*\n.*pass-.*' | \
  sed  -r 'N;s/Test.* classification_error_evaluator=([0-9]+\.[0-9]+).*\n.*pass-([0-9]+)/\1 \2/g' |\
-  sort | head -n 1
+  sort -n | head -n 1
 }

 log=train.log
--- a/demo/sentiment/trainer_config.py
+++ b/demo/sentiment/trainer_config.py
@ -29,6 +29,7 @@ settings(
    batch_size=128,
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
+    average_window=0.5,
    regularization=L2Regularization(8e-4),
    gradient_clipping_threshold=25)

--- a/demo/seqToseq/data/paraphrase_data.sh
+++ b/demo/seqToseq/data/paraphrase_data.sh
@ -16,9 +16,7 @@ set -e
 set -x

 # download the in-house paraphrase dataset
-# following is the google drive address
-# you can also directly download from https://pan.baidu.com/s/1o8q577s
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/embedding/paraphrase.tar.gz --no-check-certificate
+wget http://paddlepaddle.bj.bcebos.com/model_zoo/embedding/paraphrase.tar.gz

 # untar the dataset
 tar -zxvf paraphrase.tar.gz
--- a/demo/seqToseq/data/wmt14_model.sh
+++ b/demo/seqToseq/data/wmt14_model.sh
@ -16,9 +16,7 @@ set -e
 set -x

 # download the pretrained model
-# following is the google drive address
-# you can also directly download from https://pan.baidu.com/s/1o8q577s
-wget https://www.googledrive.com/host/0B7Q8d52jqeI9ejh6Q1RpMTFQT1k/wmt14_model.tar.gz --no-check-certificate
+wget http://paddlepaddle.bj.bcebos.com/model_zoo/wmt14_model.tar.gz

 # untar the model
 tar -zxvf wmt14_model.tar.gz
--- a/doc/algorithm/rnn/rnn.rst
+++ b/doc/algorithm/rnn/rnn.rst
@ -17,7 +17,7 @@ PaddlePaddle does not need any preprocessing to sequence data, such as padding.

 .. code-block:: python

-    settings.slots = [
+    settings.input_types = [
      integer_value_sequence(len(settings.src_dict)),
      integer_value_sequence(len(settings.trg_dict)),
      integer_value_sequence(len(settings.trg_dict))]
--- a/doc/build/build_from_source.md
+++ b/doc/build/build_from_source.md
@ -6,10 +6,10 @@ Installing from Sources
 * [3. Build on Ubuntu](#ubuntu)

 ## <span id="download">Download and Setup</span> 
-You can download PaddlePaddle from the [github source](https://github.com/gangliao/Paddle).
+You can download PaddlePaddle from the [github source](https://github.com/PaddlePaddle/Paddle).

 ```bash
-git clone https://github.com/baidu/Paddle paddle
+git clone https://github.com/PaddlePaddle/Paddle paddle
 cd paddle
 ```

@ -95,7 +95,7 @@ As a simple example, consider the following:
    ```bash
    # necessary
    sudo apt-get update
-    sudo apt-get install -y g++ make cmake build-essential libatlas-base-dev python python-pip libpython-dev m4 libprotobuf-dev protobuf-compiler python-protobuf python-numpy git
+    sudo apt-get install -y g++ make cmake swig build-essential libatlas-base-dev python python-pip libpython-dev m4 libprotobuf-dev protobuf-compiler python-protobuf python-numpy git
    # optional
    sudo apt-get install libgoogle-glog-dev
    sudo apt-get install libgflags-dev
@ -149,15 +149,15 @@ If still not found, you can manually set it based on CMake error information fro

 As a simple example, consider the following:

- **Only CPU**
+- **Only CPU with swig**

  ```bash
-  cmake  .. -DWITH_GPU=OFF
+  cmake  .. -DWITH_GPU=OFF -DWITH_SWIG_PY=ON
  ```
- **GPU**
+- **GPU with swig**

  ```bash
-  cmake .. -DWITH_GPU=ON
+  cmake .. -DWITH_GPU=ON -DWITH_SWIG_PY=ON
  ```

 - **GPU with doc and swig**
@ -170,15 +170,13 @@ Finally, you can build PaddlePaddle:

 ```bash
 # you can add build option here, such as:    
-cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<path to install>
+cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<path to install> -DWITH_SWIG_PY=ON
 # please use sudo make install, if you want to install PaddlePaddle into the system
 make -j `nproc` && make install
 # set PaddlePaddle installation path in ~/.bashrc
 export PATH=<path to install>/bin:$PATH
 ```

-**Note:**
-
 If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.
 Otherwise, PaddlePaddle will automatically install python dependencies
 at first time when user run paddle commands, such as `paddle version`, `paddle train`.
--- a/doc/demo/quick_start/index_en.md
+++ b/doc/demo/quick_start/index_en.md
@ -477,7 +477,7 @@ The scripts of data downloading, network configurations, and training scrips are
 <td class="left">Word embedding</td>
 <td class="left"> 15MB </td>
 <td class="left"> 8.484%</td>
-<td class="left">trainer_config.bow.py</td>
+<td class="left">trainer_config.emb.py</td>
 </tr>

 <tr>
--- a/doc/demo/semantic_role_labeling/curve.jpg
+++ b/doc/demo/semantic_role_labeling/curve.jpg
--- a/Show More
+++ b/Show More