Merge conflict with maxout layer

8 years ago · fd4eeaf59c
parent ddfff3a7fd 46bd5f53e3
commit fd4eeaf59c
122 changed files with 3254 additions and 753 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,6 @@ build/
 *.user
 .vscode
-.idea
+.idea
 .project
 .pydevproject
--- a/.travis.yml
+++ b/.travis.yml
@ -2,9 +2,17 @@ language: cpp
 cache: ccache
 sudo: required
 dist: trusty
 os:
  - linux
  - osx
 env:
  - JOB=DOCS
  - JOB=BUILD_AND_TEST
 matrix:
  exclude:
    - os: osx
      env: JOB=DOCS  # Only generate documentation in linux
 addons:
  apt:
    packages:
@ -27,9 +35,11 @@ addons:
      - libgoogle-glog-dev
      - libgflags-dev
      - libgtest-dev
      - graphviz
 before_install:
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
  - pip install wheel protobuf sphinx breathe recommonmark
  - sudo paddle/scripts/travis/before_install.sh
 script:
  - paddle/scripts/travis/main.sh
 notifications:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8)
 project(paddle CXX C)
 set(PADDLE_MAJOR_VERSION 0)
 set(PADDLE_MINOR_VERSION 8)
-set(PADDLE_PATCH_VERSION 0b1)
+set(PADDLE_PATCH_VERSION 0b2)
 set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
@ -104,7 +104,7 @@ else()
 endif(NOT WITH_GPU)
 if(WITH_DOUBLE)
-    add_definitions(-DPADDLE_TYPE_DOUBLE -DHPPL_TYPE_DOUBLE)
+    add_definitions(-DPADDLE_TYPE_DOUBLE)
    set(ACCURACY double)
 else(WITH_DOUBLE)
    set(ACCURACY float)
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@ -17,10 +17,17 @@
 ## Find MKL First.
 set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
-find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT}/include)
+find_path(MKL_INCLUDE_DIR mkl.h PATHS
-find_library(MKL_CORE_LIB NAMES mkl_core PATHS ${MKL_ROOT}/lib)
+  ${MKL_ROOT}/include)
-find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS ${MKL_ROOT}/lib)
+find_library(MKL_CORE_LIB NAMES mkl_core PATHS
-find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS ${MKL_ROOT}/lib)
+  ${MKL_ROOT}/lib
  ${MKL_ROOT}/lib/intel64)
 find_library(MKL_SEQUENTIAL_LIB NAMES mkl_sequential PATHS
  ${MKL_ROOT}/lib
  ${MKL_ROOT}/lib/intel64)
 find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
  ${MKL_ROOT}/lib
  ${MKL_ROOT}/lib/intel64)
 if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
--- a/cmake/flags.cmake
+++ b/cmake/flags.cmake
@ -64,7 +64,9 @@ set(COMMON_FLAGS
    -Wdelete-non-virtual-dtor
    -Wno-unused-parameter
    -Wno-error=literal-suffix
-    -Wno-error=unused-local-typedefs)
+    -Wno-error=unused-local-typedefs
    -Wno-error=unused-function  # Warnings in Numpy Header.
 )
 foreach(flag ${COMMON_FLAGS})
    safe_set_cflag(CMAKE_C_FLAGS ${flag})
--- a/cmake/util.cmake
+++ b/cmake/util.cmake
@ -184,3 +184,20 @@ macro(add_paddle_culib TARGET_NAME)
    cuda_add_library(${TARGET_NAME} STATIC ${ARGN})
    set(CUDA_NVCC_FLAGS ${NVCC_FLAG})
 endmacro()
 # Creates C resources file from files in given resource file
 function(create_resources res_file output)
    # Create empty output file
    file(WRITE ${output} "")
    # Get short filename
    string(REGEX MATCH "([^/]+)$" filename ${res_file})
    # Replace filename spaces & extension separator for C compatibility
    string(REGEX REPLACE "\\.| |-" "_" filename ${filename})
    # Read hex data from file
    file(READ ${res_file} filedata HEX)
    # Convert hex data for C compatibility
    string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," filedata ${filedata})
    # Append data to output file
    file(APPEND ${output} "const unsigned char ${filename}[] = {${filedata}};\nconst unsigned ${filename}_size = sizeof(${filename});\n")
 endfunction()
--- a/demo/mnist/.gitignore
+++ b/demo/mnist/.gitignore
@ -0,0 +1,6 @@
 data/raw_data
 data/*.list
 mnist_vgg_model
 plot.png
 train.log
 *pyc
--- a/demo/mnist/data/generate_list.py
+++ b/demo/mnist/data/generate_list.py
@ -0,0 +1,21 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 o = open("./" + "train.list", "w")
 o.write("./data/raw_data/train" +"\n")
 o.close()
 o = open("./" + "test.list", "w")
 o.write("./data/raw_data/t10k" +"\n")
 o.close()
--- a/demo/mnist/data/get_mnist_data.sh
+++ b/demo/mnist/data/get_mnist_data.sh
@ -0,0 +1,22 @@
 #!/usr/bin/env sh
 # This scripts downloads the mnist data and unzips it.
 set -e
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
 rm -rf "$DIR/raw_data"
 mkdir "$DIR/raw_data"
 cd "$DIR/raw_data"
 echo "Downloading..."
 for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
 do
    if [ ! -e $fname ]; then
        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
        gunzip ${fname}.gz
    fi
 done
 cd $DIR
 rm -f *.list
 python generate_list.py
--- a/demo/mnist/mnist_provider.py
+++ b/demo/mnist/mnist_provider.py
@ -0,0 +1,32 @@
 from paddle.trainer.PyDataProvider2 import *
 # Define a py data provider
@provider(input_types={
    'pixel': dense_vector(28 * 28),
    'label': integer_value(10)
 })
 def process(settings, filename):  # settings is not used currently.
    imgf = filename + "-images-idx3-ubyte"
    labelf = filename + "-labels-idx1-ubyte"
    f = open(imgf, "rb")
    l = open(labelf, "rb")
    f.read(16)
    l.read(8)
    # Define number of samples for train/test
    if "train" in filename:
        n = 60000
    else:
        n = 10000
    for i in range(n):
        label = ord(l.read(1))
        pixels = []
        for j in range(28 * 28):
            pixels.append(float(ord(f.read(1))) / 255.0)
        yield {"pixel": pixels, 'label': label}
    f.close()
    l.close()
--- a/demo/mnist/train.sh
+++ b/demo/mnist/train.sh
@ -0,0 +1,31 @@
 #!/bin/bash
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 set -e
 config=vgg_16_mnist.py
 output=./mnist_vgg_model
 log=train.log
 paddle train \
 --config=$config \
 --dot_period=10 \
 --log_period=100 \
 --test_all_data_in_one_period=1 \
 --use_gpu=0 \
 --trainer_count=1 \
 --num_passes=100 \
 --save_dir=$output \
 2>&1 | tee $log
 python -m paddle.utils.plotcurve -i $log > plot.png
--- a/demo/mnist/vgg_16_mnist.py
+++ b/demo/mnist/vgg_16_mnist.py
@ -0,0 +1,53 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 is_predict = get_config_arg("is_predict", bool, False)
 ####################Data Configuration ##################
 if not is_predict:
  data_dir='./data/'
  define_py_data_sources2(train_list= data_dir + 'train.list',
                        test_list= data_dir + 'test.list',
                        module='mnist_provider',
                        obj='process')
 ######################Algorithm Configuration #############
 settings(
    batch_size = 128,
    learning_rate = 0.1 / 128.0,
    learning_method = MomentumOptimizer(0.9),
    regularization = L2Regularization(0.0005 * 128)
 )
 #######################Network Configuration #############
 data_size=1*28*28
 label_size=10
 img = data_layer(name='pixel', size=data_size)
 # small_vgg is predined in trainer_config_helpers.network
 predict = small_vgg(input_image=img,
                    num_channels=1,
                    num_classes=label_size)
 if not is_predict:
    lbl = data_layer(name="label", size=label_size)
    inputs(img, lbl)
    outputs(classification_cost(input=predict, label=lbl))
 else:
    outputs(predict)
--- a/demo/quick_start/preprocess.sh
+++ b/demo/quick_start/preprocess.sh
@ -20,6 +20,8 @@
 set -e
 export LC_ALL=C
 mkdir -p data/tmp
 python preprocess.py -i data/reviews_Electronics_5.json.gz
 # uniq and shuffle
--- a/demo/quick_start/train.sh
+++ b/demo/quick_start/train.sh
@ -18,6 +18,8 @@ cfg=trainer_config.lr.py
 #cfg=trainer_config.emb.py
 #cfg=trainer_config.cnn.py
 #cfg=trainer_config.lstm.py
 #cfg=trainer_config.bidi-lstm.py
 #cfg=trainer_config.db-lstm.py
 paddle train \
  --config=$cfg \
  --save_dir=./output \
--- a/demo/quick_start/trainer_config.bidi-lstm.py
+++ b/demo/quick_start/trainer_config.bidi-lstm.py
@ -0,0 +1,62 @@
 # edit-mode: -*- python -*-
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 dict_file = "./data/dict.txt"
 word_dict = dict()
 with open(dict_file, 'r') as f:
    for i, line in enumerate(f):
        w = line.strip().split()[0]
        word_dict[w] = i
 is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
 define_py_data_sources2(train_list=trn,
                        test_list=tst,
                        module="dataprovider_emb",
                        obj=process,
                        args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
    batch_size=batch_size,
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
    gradient_clipping_threshold=25
 )
 bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
 bi_lstm = bidirectional_lstm(input=emb, size=128)
 dropout = dropout_layer(input=bi_lstm, dropout_rate=0.5)
 output = fc_layer(input=dropout, size=2,
                  bias_attr=bias_attr,
                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)
    outputs([maxid, output])
 else:
    label = data_layer(name="label", size=2)
    cls = classification_cost(input=output, label=label)
    outputs(cls)
--- a/demo/quick_start/trainer_config.db-lstm.py
+++ b/demo/quick_start/trainer_config.db-lstm.py
@ -0,0 +1,73 @@
 # edit-mode: -*- python -*-
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 dict_file = "./data/dict.txt"
 word_dict = dict()
 with open(dict_file, 'r') as f:
    for i, line in enumerate(f):
        w = line.strip().split()[0]
        word_dict[w] = i
 is_predict = get_config_arg('is_predict', bool, False)
 trn = 'data/train.list' if not is_predict else None
 tst = 'data/test.list' if not is_predict else 'data/pred.list'
 process = 'process' if not is_predict else 'process_predict'
 define_py_data_sources2(train_list=trn,
                        test_list=tst,
                        module="dataprovider_emb",
                        obj=process,
                        args={"dictionary": word_dict})
 batch_size = 128 if not is_predict else 1
 settings(
    batch_size=batch_size,
    learning_rate=2e-3,
    learning_method=AdamOptimizer(),
    regularization=L2Regularization(8e-4),
    gradient_clipping_threshold=25
 )
 bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
 data = data_layer(name="word", size=len(word_dict))
 emb = embedding_layer(input=data, size=128)
 hidden_0 = mixed_layer(size=128, input=[full_matrix_projection(input=emb)])
 lstm_0 = lstmemory(input=hidden_0, layer_attr=ExtraAttr(drop_rate=0.1))
 input_layers = [hidden_0, lstm_0]
 for i in range(1,8):
    fc = fc_layer(input=input_layers, size=128)
    lstm = lstmemory(input=fc, layer_attr=ExtraAttr(drop_rate=0.1),
                    reverse=(i % 2) == 1,)
    input_layers = [fc, lstm]
 lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
 output = fc_layer(input=lstm_last, size=2,
                  bias_attr=bias_attr,
                  act=SoftmaxActivation())
 if is_predict:
    maxid = maxid_layer(output)
    outputs([maxid, output])
 else:
    label = data_layer(name="label", size=2)
    cls = classification_cost(input=output, label=label)
    outputs(cls)
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@ -96,12 +96,12 @@ def gru_encoder_decoder(data_conf,
    encoded_vector = concat_layer(input=[src_forward, src_backward])
    with mixed_layer(size=decoder_size) as encoded_proj:
-        encoded_proj += full_matrix_projection(encoded_vector)
+        encoded_proj += full_matrix_projection(input=encoded_vector)
    backward_first = first_seq(input=src_backward)
    with mixed_layer(size=decoder_size,
                     act=TanhActivation(), ) as decoder_boot:
-        decoder_boot += full_matrix_projection(backward_first)
+        decoder_boot += full_matrix_projection(input=backward_first)
    def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
        decoder_mem = memory(name='gru_decoder',
@ -113,8 +113,8 @@ def gru_encoder_decoder(data_conf,
                                   decoder_state=decoder_mem, )
        with mixed_layer(size=decoder_size * 3) as decoder_inputs:
-            decoder_inputs += full_matrix_projection(context)
+            decoder_inputs += full_matrix_projection(input=context)
-            decoder_inputs += full_matrix_projection(current_word)
+            decoder_inputs += full_matrix_projection(input=current_word)
        gru_step = gru_step_layer(name='gru_decoder',
                                  input=decoder_inputs,
--- a/demo/sequence_tagging/data/get_data.sh
+++ b/demo/sequence_tagging/data/get_data.sh
@ -0,0 +1,21 @@
 #!/bin/bash
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 set -e
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
 cd $DIR
 wget http://www.cnts.ua.ac.be/conll2000/chunking/train.txt.gz
 wget http://www.cnts.ua.ac.be/conll2000/chunking/test.txt.gz
--- a/demo/sequence_tagging/data/test.list
+++ b/demo/sequence_tagging/data/test.list
@ -0,0 +1 @@
 data/test.txt.gz
--- a/demo/sequence_tagging/data/train.list
+++ b/demo/sequence_tagging/data/train.list
@ -0,0 +1 @@
 data/train.txt.gz
--- a/demo/sequence_tagging/dataprovider.py
+++ b/demo/sequence_tagging/dataprovider.py
--- a/demo/sequence_tagging/linear_crf.py
+++ b/demo/sequence_tagging/linear_crf.py
@ -0,0 +1,84 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 import math
 define_py_data_sources2(train_list="data/train.list",
                        test_list="data/test.list",
                        module="dataprovider",
                        obj="process")
 batch_size = 1
 settings(
    learning_method=MomentumOptimizer(),
    batch_size=batch_size,
    regularization=L2Regularization(batch_size * 1e-4),
    average_window=0.5,
    learning_rate=1e-1,
    learning_rate_decay_a=1e-5,
    learning_rate_decay_b=0.25,
 )
 num_label_types=23
 def get_simd_size(size):
    return int(math.ceil(float(size) / 8)) * 8
 # Currently, in order to use sparse_update=True,
 # the size has to be aligned.
 num_label_types = get_simd_size(num_label_types)
 features = data_layer(name="features", size=76328)
 word = data_layer(name="word", size=6778)
 pos = data_layer(name="pos", size=44)
 chunk = data_layer(name="chunk",
                   size=num_label_types)
 crf_input = fc_layer(
    input=features,
    size=num_label_types,
    act=LinearActivation(),
    bias_attr=False,
    param_attr=ParamAttr(initial_std=0, sparse_update=True))
 crf=crf_layer(
    input=crf_input,
    label=chunk,
    param_attr=ParamAttr(name="crfw", initial_std=0),
 )
 crf_decoding=crf_decoding_layer(
    size=num_label_types,
    input=crf_input,
    label=chunk,
    param_attr=ParamAttr(name="crfw"),
 )
 sum_evaluator(
    name="error",
    input=crf_decoding,
 )
 chunk_evaluator(
    name="chunk_f1",
    input =[crf_decoding, chunk],
    chunk_scheme="IOB",
    num_chunk_types=11,
 )
 inputs(word, pos, chunk, features)
 outputs(crf)
--- a/demo/sequence_tagging/readme.md
+++ b/demo/sequence_tagging/readme.md
@ -0,0 +1,45 @@
 # Sequence Tagging
 This demo is a sequence model for assigning tags to each token in a sentence. The task is described at <a href = "http://www.cnts.ua.ac.be/conll2000/chunking">CONLL2000 Text Chunking</a> task.
 ## Download data
 ```bash
 cd demo/sequence_tagging
 ./data/get_data.sh
 ```
 ## Train model
 ```bash
 cd demo/sequence_tagging
 ./train.sh
 ```
 ## Model description
 We provide two models. One is a linear CRF model (linear_crf.py) with is equivalent to the one at <a href="http://leon.bottou.org/projects/sgd#stochastic_gradient_crfs">leon.bottou.org/projects/sgd</a>. The second one is a stacked bidirectional RNN and CRF model (rnn_crf.py).
 <center>
 <table border="2" cellspacing="0" cellpadding="6" rules="all" frame="border">
 <thead>
 <th scope="col" class="left">Model name</th>
 <th scope="col" class="left">Number of parameters</th>
 <th scope="col" class="left">F1 score</th>
 </thead>
 <tbody>
 <tr>
 <td class="left">linear_crf</td>
 <td class="left"> 1.8M </td>
 <td class="left"> 0.937</td>
 </tr>
 <tr>
 <td class="left">rnn_crf</td>
 <td class="left"> 960K </td>
 <td class="left">0.941</td>
 </tr>
 </tbody>
 </table>
 </center>
 <br>
--- a/demo/sequence_tagging/rnn_crf.py
+++ b/demo/sequence_tagging/rnn_crf.py
@ -0,0 +1,130 @@
 # Copyright (c) 2016 Baidu, Inc. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 import math
 define_py_data_sources2(train_list="data/train.list",
                        test_list="data/test.list",
                        module="dataprovider",
                        obj="process")
 batch_size = 16
 settings(
    learning_method=MomentumOptimizer(),
    batch_size=batch_size,
    regularization=L2Regularization(batch_size * 1e-5),
    average_window=0.5,
    learning_rate = 2e-3,
    learning_rate_decay_a = 5e-7,
    learning_rate_decay_b = 0.5,
 )
 word_dim=128
 hidden_dim = 128
 with_rnn = True
 initial_std=1/math.sqrt(hidden_dim)
 param_attr=ParamAttr(initial_std=initial_std)
 cpu_layer_attr=ExtraLayerAttribute(device=-1)
 default_device(0)
 num_label_types=23
 features = data_layer(name="features", size=76328)
 word = data_layer(name="word", size=6778)
 pos = data_layer(name="pos", size=44)
 chunk = data_layer(name="chunk",
                   size=num_label_types,
                   layer_attr=cpu_layer_attr)
 emb = embedding_layer(
    input=word, size=word_dim, param_attr=ParamAttr(initial_std=0))
 hidden1 = mixed_layer(
    size=hidden_dim,
    act=STanhActivation(),
    bias_attr=True,
    input=[full_matrix_projection(emb),
           table_projection(pos, param_attr=param_attr)]
 )
 if with_rnn:
    rnn1 = recurrent_layer(
        act=ReluActivation(),
        bias_attr=True,
        input=hidden1,
        param_attr=ParamAttr(initial_std=0),
    )
 hidden2 = mixed_layer(
    size=hidden_dim,
    act=STanhActivation(),
    bias_attr=True,
    input=[full_matrix_projection(hidden1)
    ] + ([
        full_matrix_projection(rnn1, param_attr=ParamAttr(initial_std=0))
    ] if with_rnn else []),
 )
 if with_rnn:
    rnn2=recurrent_layer(
        reverse=True,
        act=ReluActivation(),
        bias_attr=True,
        input=hidden2,
        param_attr=ParamAttr(initial_std=0),
    )
 crf_input = mixed_layer(
    size=num_label_types,
    bias_attr=False,
    input=[
        full_matrix_projection(hidden2),
    ] + ([
        full_matrix_projection(rnn2, param_attr=ParamAttr(initial_std=0))
    ] if with_rnn else []),
 )
 crf = crf_layer(
    input=crf_input,
    label=chunk,
    param_attr=ParamAttr(name="crfw", initial_std=0),
    layer_attr=cpu_layer_attr,
 )
 crf_decoding = crf_decoding_layer(
    size=num_label_types,
    input=crf_input,
    label=chunk,
    param_attr=ParamAttr(name="crfw"),
    layer_attr=cpu_layer_attr,
 )
 sum_evaluator(
    name="error",
    input=crf_decoding,
 )
 chunk_evaluator(
    name="chunk_f1",
    input =[crf_decoding, chunk],
    chunk_scheme="IOB",
    num_chunk_types=11,
 )
 inputs(word, pos, chunk, features)
 outputs(crf)
--- a/demo/sequence_tagging/train.sh
+++ b/demo/sequence_tagging/train.sh
@ -0,0 +1,10 @@
 #!/bin/bash
 paddle train \
       --config rnn_crf.py \
       --parallel_nn=1 \
       --use_gpu=1 \
       --dot_period=10 \
       --log_period=1000 \
       --test_period=0 \
       --num_passes=10
--- a/Show More
+++ b/Show More