Merge branch 'develop' of github.com:baidu/Paddle into feature/clean_mnist_v2

8 years ago · cbcd53af39
parent 9435025b8e 2f604064f5
commit cbcd53af39
6 changed files with 546 additions and 113 deletions
--- a/doc/api/trainer_config_helpers/layers.rst
+++ b/doc/api/trainer_config_helpers/layers.rst
@ -139,24 +139,12 @@ lstmemory
    :members: lstmemory
    :noindex:
 lstm_step_layer
 ---------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: lstm_step_layer
    :noindex:
 grumemory
 ---------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: grumemory
    :noindex:
 gru_step_layer
 ---------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: gru_step_layer
    :noindex:
 Recurrent Layer Group
 =====================
@ -172,6 +160,18 @@ recurrent_group
    :members: recurrent_group
    :noindex:
 lstm_step_layer
 ---------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: lstm_step_layer
    :noindex:
 gru_step_layer
 ---------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: gru_step_layer
    :noindex:
 beam_search
 ------------
 ..  automodule:: paddle.trainer_config_helpers.layers
@ -308,6 +308,12 @@ repeat_layer
    :members: repeat_layer
    :noindex:
 rotate_layer
 ------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: rotate_layer
    :noindex:
 seq_reshape_layer
 -----------------
 ..  automodule:: paddle.trainer_config_helpers.layers
@ -462,6 +468,12 @@ ctc_layer
    :members: ctc_layer
    :noindex:
 warp_ctc_layer
 --------------
 ..  automodule:: paddle.trainer_config_helpers.layers
    :members: warp_ctc_layer
    :noindex:
 nce_layer
 -----------
 ..  automodule:: paddle.trainer_config_helpers.layers
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@ -112,6 +112,8 @@ __all__ = [
    'priorbox_layer',
    'spp_layer',
    'pad_layer',
    'eos_layer',
    'layer_support',
 ]
@ -708,6 +710,7 @@ class MixedLayerType(LayerOutput):
        # update the size which might be computed inside MixedLayer
        # according to the operator's output size
        self.size = ml.config.size
        self.finalized = True
@wrap_name_default("mixed")
@ -1287,6 +1290,12 @@ def last_seq(input,
    """
    Get Last Timestamp Activation of a sequence.
    The simple usage is:
    .. code-block:: python
       seq = last_seq(input=layer)
    :param agg_level: Aggregated level
    :param name: Layer name.
    :type name: basestring
@ -1325,6 +1334,12 @@ def first_seq(input,
    """
    Get First Timestamp Activation of a sequence.
    The simple usage is:
    .. code-block:: python
       seq = first_seq(input=layer)
    :param agg_level: aggregation level
    :param name: Layer name.
    :type name: basestring
@ -1425,7 +1440,7 @@ def repeat_layer(input, num_repeats, name=None, layer_attr=None):
    .. code-block:: python
-       expand = repeat_layer(layer, 4)
+       expand = repeat_layer(input=layer, num_repeats=4)
    :param input: Input layer
    :type input: LayerOutput
@ -1797,6 +1812,12 @@ def cos_sim(a, b, scale=1, size=1, name=None, layer_attr=None):
    Note that the above computation is for one sample. Multiple samples are
    processed in one batch.
    The example usage is:
    .. code-block:: python
       cos = cos_sim(a=layer1, b=layer2, size=3)
    :param name: layer name
    :type name: basestring
    :param a: input layer a
@ -1958,6 +1979,16 @@ def img_conv_layer(input,
    pieces. First 256/4 = 64 channels will process by first 32 filters. The
    rest channels will be processed by rest group of filters.
    The example usage is:
    ..  code-block:: python
        conv = img_conv_layer(input=data, filter_size=1, filter_size_y=1,
                              num_channels=8,
                              num_filters=16, stride=1,
                              bias_attr=False,
                              act=ReluActivation())
    :param name: Layer name.
    :type name: basestring
    :param input: Layer Input.
@ -2097,6 +2128,34 @@ def img_pool_layer(input,
    .. _pooling: http://ufldl.stanford.edu/tutorial/supervised/Pooling/
    - ceil_mode=True:
    ..  math::
        w = 1 + int(ceil(input\_width + 2 * padding - pool\_size) / float(stride))
        h = 1 + int(ceil(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
    - ceil_mode=False:
    ..  math::
        w = 1 + int(floor(input\_width + 2 * padding - pool\_size) / float(stride))
        h = 1 + int(floor(input\_height + 2 * padding\_y - pool\_size\_y) / float(stride\_y))
    The example usage is:
    ..  code-block:: python
        maxpool = img_pool_layer(input=conv,
                                 pool_size=3,
                                 pool_size_y=5,
                                 num_channels=8,
                                 stride=1,
                                 stride_y=2,
                                 padding=1,
                                 padding_y=2,
                                 pool_type=MaxPooling())
    :param padding: pooling padding width.
    :type padding: int
    :param padding_y: pooling padding height. It's equal to padding by default.
@ -2123,19 +2182,6 @@ def img_pool_layer(input,
    :param ceil_mode: Wether to use ceil mode to calculate output height and with.
                      Defalut is True. If set false, Otherwise use floor.
                      - ceil_mode=True:
                      ..  math::
                          w = 1 + int(ceil(input_width + 2 * padding - pool_size) / float(stride))
                          h = 1 + int(ceil(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
                      - ceil_mode=False:
                      ..  math::
                          w = 1 + int(floor(input_width + 2 * padding - pool_size) / float(stride))
                          h = 1 + int(floor(input_height + 2 * padding_y - pool_size_y) / float(stride_y))
    :type ceil_mode: bool
    :return: LayerOutput object.
    :rtype: LayerOutput
@ -2197,6 +2243,15 @@ def spp_layer(input,
    The details please refer to
    `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
    The example usage is:
    ..  code-block:: python
        spp = spp_layer(input=data, 
                        pyramid_height=2, 
                        num_channels=16, 
                        pool_type=MaxPooling())
    :param name: layer name.
    :type name: basestring
    :param input: layer's input.
@ -2285,6 +2340,12 @@ def img_cmrnorm_layer(input,
    The details please refer to
    `Alex's paper <http://www.cs.toronto.edu/~fritz/absps/imagenet.pdf>`_.
    The example usage is:
    ..  code-block:: python
        norm = img_cmrnorm_layer(input=net, size=5)
    :param name: layer name.
    :type name: None|basestring
    :param input: layer's input.
@ -2340,6 +2401,12 @@ def batch_norm_layer(input,
    The details of batch normalization please refer to this
    `paper <http://arxiv.org/abs/1502.03167>`_.
    The example usage is:
    ..  code-block:: python
        norm = batch_norm_layer(input=net, act=ReluActivation())
    :param name: layer name.
    :type name: basestring
    :param input: batch normalization input. Better be linear activation.
@ -3903,13 +3970,13 @@ def conv_shift_layer(a, b, name=None, layer_attr=None):
    .. code-block:: python
-       conv_shift = conv_shift_layer(input=[layer1, layer2])
+       conv_shift = conv_shift_layer(a=layer1, b=layer2)
    :param name: layer name
    :type name: basestring
    :param a: Input layer a.
    :type a: LayerOutput
-    :param b: input layer b
+    :param b: input layer b.
    :type b: LayerOutput
    :param layer_attr: layer's extra attribute.
    :type layer_attr: ExtraLayerAttribute
@ -4001,8 +4068,8 @@ def tensor_layer(a,
@wrap_act_default()
@layer_support()
 def selective_fc_layer(input,
                       select,
                       size,
                       select=None,
                       act=None,
                       name=None,
                       pass_generation=False,
@ -4029,6 +4096,7 @@ def selective_fc_layer(input,
    :type input: LayerOutput|list|tuple
    :param select: The select layer. The output of select layer should be a
                   sparse binary matrix, and treat as the mask of selective fc.
                   If is None, acts exactly like fc_layer.
    :type select: LayerOutput
    :param size: The layer dimension.
    :type size: int
@ -4257,7 +4325,7 @@ def block_expand_layer(input,
    .. code-block:: python
-       block_expand = block_expand_layer(input,
+       block_expand = block_expand_layer(input=layer,
                                         num_channels=128,
                                         stride_x=1,
                                         stride_y=1,
@ -4461,7 +4529,7 @@ def warp_ctc_layer(input,
        - You can set 'blank' to any value ranged in [0, num_classes], which
          should be consistent as that used in your labels.
        - As a native 'softmax' activation is interated to the warp-ctc library,
-         'linear' activation is expected instead in the 'input' layer.
+          'linear' activation is expected instead in the 'input' layer.
    The simple usage:
@ -4594,6 +4662,13 @@ def crf_decoding_layer(input,
    this layer will also calculate error. output.value[i] is 1 for incorrect
    decoding or 0 for correct decoding.
    The simple usage:
    .. code-block:: python
      crf_decoding = crf_decoding_layer(input=input,
                                        size=label_dim)
    :param input: The first input layer.
    :type input: LayerOutput
    :param size: size of this layer.
--- a/python/paddle/v2/init.py
+++ b/python/paddle/v2/init.py
@ -22,11 +22,12 @@ import data_feeder
 from . import dataset
 from . import reader
 import attr
 import pooling
 import py_paddle.swig_paddle as api
 __all__ = [
    'optimizer', 'layer', 'activation', 'parameters', 'init', 'trainer',
-    'event', 'data_type', 'attr', 'data_feeder', 'dataset', 'reader'
+    'event', 'data_type', 'attr', 'pooling', 'data_feeder', 'dataset', 'reader'
 ]
--- a/python/paddle/v2/layer.py
+++ b/python/paddle/v2/layer.py
--- a/python/paddle/v2/pooling.py
+++ b/python/paddle/v2/pooling.py
@ -0,0 +1,24 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers.poolings import *
 __all__ = ["Max", "CudnnMax", "Avg", "CudnnAvg", "Sum", "SquareRootN"]
 Max = MaxPooling
 CudnnMax = CudnnMaxPooling
 Avg = AvgPooling
 CudnnAvg = CudnnAvgPooling
 Sum = SumPooling
 SquareRootN = SquareRootNPooling
--- a/python/paddle/v2/tests/test_layer.py
+++ b/python/paddle/v2/tests/test_layer.py
@ -19,18 +19,106 @@ import paddle.v2.activation as activation
 import paddle.v2.attr as attr
 import paddle.v2.data_type as data_type
 import paddle.v2.layer as layer
 import paddle.v2.pooling as pooling
 from paddle.trainer_config_helpers.config_parser_utils import \
    parse_network_config as parse_network
-pixel = layer.data(name='pixel', type=data_type.dense_vector(784))
+pixel = layer.data(name='pixel', type=data_type.dense_vector(128))
 label = layer.data(name='label', type=data_type.integer_value(10))
 weight = layer.data(name='weight', type=data_type.dense_vector(10))
 score = layer.data(name='score', type=data_type.dense_vector(1))
 hidden = layer.fc(input=pixel,
                  size=100,
                  act=activation.Sigmoid(),
                  param_attr=attr.Param(name='hidden'))
 inference = layer.fc(input=hidden, size=10, act=activation.Softmax())
 conv = layer.img_conv(
    input=pixel,
    filter_size=1,
    filter_size_y=1,
    num_channels=8,
    num_filters=16,
    act=activation.Linear())
 class ImageLayerTest(unittest.TestCase):
    def test_conv_layer(self):
        conv_shift = layer.conv_shift(a=pixel, b=score)
        print layer.parse_network(conv, conv_shift)
    def test_pooling_layer(self):
        maxpool = layer.img_pool(
            input=conv,
            pool_size=2,
            num_channels=16,
            padding=1,
            pool_type=pooling.Max())
        spp = layer.spp(input=conv,
                        pyramid_height=2,
                        num_channels=16,
                        pool_type=pooling.Max())
        maxout = layer.maxout(input=conv, num_channels=16, groups=4)
        print layer.parse_network(maxpool, spp, maxout)
    def test_norm_layer(self):
        norm1 = layer.img_cmrnorm(input=conv, size=5)
        norm2 = layer.batch_norm(input=conv)
        norm3 = layer.sum_to_one_norm(input=conv)
        print layer.parse_network(norm1, norm2, norm3)
 class AggregateLayerTest(unittest.TestCase):
    def test_aggregate_layer(self):
        pool = layer.pool(
            input=pixel,
            pooling_type=pooling.Avg(),
            agg_level=layer.AggregateLevel.EACH_SEQUENCE)
        last_seq = layer.last_seq(input=pixel)
        first_seq = layer.first_seq(input=pixel)
        concat = layer.concat(input=[last_seq, first_seq])
        seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
        print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
 class MathLayerTest(unittest.TestCase):
    def test_math_layer(self):
        addto = layer.addto(input=[pixel, pixel])
        linear_comb = layer.linear_comb(weights=weight, vectors=hidden, size=10)
        interpolation = layer.interpolation(
            input=[hidden, hidden], weight=score)
        bilinear = layer.bilinear_interp(input=conv, out_size_x=4, out_size_y=4)
        power = layer.power(input=pixel, weight=score)
        scaling = layer.scaling(input=pixel, weight=score)
        slope = layer.slope_intercept(input=pixel)
        tensor = layer.tensor(a=pixel, b=pixel, size=1000)
        cos_sim = layer.cos_sim(a=pixel, b=pixel)
        trans = layer.trans(input=tensor)
        print layer.parse_network(addto, linear_comb, interpolation, power,
                                  scaling, slope, tensor, cos_sim, trans)
 class ReshapeLayerTest(unittest.TestCase):
    def test_reshape_layer(self):
        block_expand = layer.block_expand(
            input=conv, num_channels=4, stride_x=1, block_x=1)
        expand = layer.expand(
            input=weight,
            expand_as=pixel,
            expand_level=layer.ExpandLevel.FROM_TIMESTEP)
        repeat = layer.repeat(input=pixel, num_repeats=4)
        reshape = layer.seq_reshape(input=pixel, reshape_size=4)
        rotate = layer.rotate(input=pixel, height=16, width=49)
        print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
 class RecurrentLayerTest(unittest.TestCase):
    def test_recurrent_layer(self):
        word = layer.data(name='word', type=data_type.integer_value(12))
        recurrent = layer.recurrent(input=word)
        lstm = layer.lstmemory(input=word)
        gru = layer.grumemory(input=word)
        print layer.parse_network(recurrent, lstm, gru)
 class CostLayerTest(unittest.TestCase):
@ -51,12 +139,120 @@ class CostLayerTest(unittest.TestCase):
        cost10 = layer.sum_cost(input=inference)
        cost11 = layer.huber_cost(input=score, label=label)
-        print dir(layer)
+        print layer.parse_network(cost1, cost2)
-        layer.parse_network(cost1, cost2)
+        print layer.parse_network(cost3, cost4)
-        print dir(layer)
+        print layer.parse_network(cost5, cost6)
-        #print layer.parse_network(cost3, cost4)
+        print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
-        #print layer.parse_network(cost5, cost6)
+
-        #print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
+        crf = layer.crf(input=inference, label=label)
        crf_decoding = layer.crf_decoding(input=inference, size=3)
        ctc = layer.ctc(input=inference, label=label)
        warp_ctc = layer.warp_ctc(input=pixel, label=label)
        nce = layer.nce(input=inference, label=label, num_classes=3)
        hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
        print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce,
                                  hsigmoid)
 class OtherLayerTest(unittest.TestCase):
    def test_sampling_layer(self):
        maxid = layer.max_id(input=inference)
        sampling_id = layer.sampling_id(input=inference)
        eos = layer.eos(input=maxid, eos_id=5)
        print layer.parse_network(maxid, sampling_id, eos)
    def test_slicing_joining_layer(self):
        pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])
        print layer.parse_network(pad)
 class ProjOpTest(unittest.TestCase):
    def test_projection(self):
        input = layer.data(name='data', type=data_type.dense_vector(784))
        word = layer.data(
            name='word', type=data_type.integer_value_sequence(10000))
        fc0 = layer.fc(input=input, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=input, size=200, act=activation.Sigmoid())
        mixed0 = layer.mixed(
            size=256,
            input=[
                layer.full_matrix_projection(input=fc0),
                layer.full_matrix_projection(input=fc1)
            ])
        with layer.mixed(size=200) as mixed1:
            mixed1 += layer.full_matrix_projection(input=fc0)
            mixed1 += layer.identity_projection(input=fc1)
        table = layer.table_projection(input=word)
        emb0 = layer.mixed(size=512, input=table)
        with layer.mixed(size=512) as emb1:
            emb1 += table
        scale = layer.scaling_projection(input=fc0)
        scale0 = layer.mixed(size=100, input=scale)
        with layer.mixed(size=100) as scale1:
            scale1 += scale
        dotmul = layer.dotmul_projection(input=fc0)
        dotmul0 = layer.mixed(size=100, input=dotmul)
        with layer.mixed(size=100) as dotmul1:
            dotmul1 += dotmul
        context = layer.context_projection(input=fc0, context_len=5)
        context0 = layer.mixed(size=100, input=context)
        with layer.mixed(size=100) as context1:
            context1 += context
        conv = layer.conv_projection(
            input=input,
            filter_size=1,
            num_channels=1,
            num_filters=128,
            stride=1,
            padding=0)
        conv0 = layer.mixed(input=conv, bias_attr=True)
        with layer.mixed(bias_attr=True) as conv1:
            conv1 += conv
        print layer.parse_network(mixed0)
        print layer.parse_network(mixed1)
        print layer.parse_network(emb0)
        print layer.parse_network(emb1)
        print layer.parse_network(scale0)
        print layer.parse_network(scale1)
        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
    def test_operator(self):
        ipt0 = layer.data(name='data', type=data_type.dense_vector(784))
        ipt1 = layer.data(name='word', type=data_type.dense_vector(128))
        fc0 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
        fc1 = layer.fc(input=ipt0, size=100, act=activation.Sigmoid())
        dotmul_op = layer.dotmul_operator(a=fc0, b=fc1)
        dotmul0 = layer.mixed(input=dotmul_op)
        with layer.mixed() as dotmul1:
            dotmul1 += dotmul_op
        conv = layer.conv_operator(
            img=ipt0,
            filter=ipt1,
            filter_size=1,
            num_channels=1,
            num_filters=128,
            stride=1,
            padding=0)
        conv0 = layer.mixed(input=conv)
        with layer.mixed() as conv1:
            conv1 += conv
        print layer.parse_network(dotmul0)
        print layer.parse_network(dotmul1)
        print layer.parse_network(conv0)
        print layer.parse_network(conv1)
 if __name__ == '__main__':