update

8 years ago · 80c9f66144
parent 465878a9d2 e95c22835b
commit 80c9f66144
9 changed files with 243 additions and 4 deletions
--- a/demo/image_classification/api_v2_resnet.py
+++ b/demo/image_classification/api_v2_resnet.py
@ -0,0 +1,74 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle.v2 as paddle
 __all__ = ['resnet_cifar10']
 def conv_bn_layer(input,
                  ch_out,
                  filter_size,
                  stride,
                  padding,
                  active_type=paddle.activation.Relu(),
                  ch_in=None):
    tmp = paddle.layer.img_conv(
        input=input,
        filter_size=filter_size,
        num_channels=ch_in,
        num_filters=ch_out,
        stride=stride,
        padding=padding,
        act=paddle.activation.Linear(),
        bias_attr=False)
    return paddle.layer.batch_norm(input=tmp, act=active_type)
 def shortcut(ipt, n_in, n_out, stride):
    if n_in != n_out:
        return conv_bn_layer(ipt, n_out, 1, stride, 0,
                             paddle.activation.Linear())
    else:
        return ipt
 def basicblock(ipt, ch_out, stride):
    ch_in = ch_out * 2
    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, paddle.activation.Linear())
    short = shortcut(ipt, ch_in, ch_out, stride)
    return paddle.layer.addto(input=[tmp, short], act=paddle.activation.Relu())
 def layer_warp(block_func, ipt, features, count, stride):
    tmp = block_func(ipt, features, stride)
    for i in range(1, count):
        tmp = block_func(tmp, features, 1)
    return tmp
 def resnet_cifar10(ipt, depth=32):
    # depth should be one of 20, 32, 44, 56, 110, 1202
    assert (depth - 2) % 6 == 0
    n = (depth - 2) / 6
    nStages = {16, 64, 128}
    conv1 = conv_bn_layer(
        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
    res1 = layer_warp(basicblock, conv1, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 64, n, 2)
    pool = paddle.layer.img_pool(
        input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg())
    return pool
--- a/demo/image_classification/api_v2_train.py
+++ b/demo/image_classification/api_v2_train.py
@ -0,0 +1,91 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License
 import sys
 import paddle.v2 as paddle
 from api_v2_vgg import vgg_bn_drop
 from api_v2_resnet import resnet_cifar10
 def main():
    datadim = 3 * 32 * 32
    classdim = 10
    # PaddlePaddle init
    paddle.init(use_gpu=True, trainer_count=1)
    image = paddle.layer.data(
        name="image", type=paddle.data_type.dense_vector(datadim))
    # Add neural network config
    # option 1. resnet
    net = resnet_cifar10(image, depth=32)
    # option 2. vgg
    # net = vgg_bn_drop(image)
    out = paddle.layer.fc(input=net,
                          size=classdim,
                          act=paddle.activation.Softmax())
    lbl = paddle.layer.data(
        name="label", type=paddle.data_type.integer_value(classdim))
    cost = paddle.layer.classification_cost(input=out, label=lbl)
    # Create parameters
    parameters = paddle.parameters.create(cost)
    # Create optimizer
    momentum_optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
        learning_rate=0.1 / 128.0,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=50000 * 100,
        learning_rate_schedule='discexp',
        batch_size=128)
    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                reader=paddle.reader.batched(
                    paddle.dataset.cifar.test10(), batch_size=128),
                reader_dict={'image': 0,
                             'label': 1})
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
    # Create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=momentum_optimizer)
    trainer.train(
        reader=paddle.reader.batched(
            paddle.reader.shuffle(
                paddle.dataset.cifar.train10(), buf_size=50000),
            batch_size=128),
        num_passes=5,
        event_handler=event_handler,
        reader_dict={'image': 0,
                     'label': 1})
 if __name__ == '__main__':
    main()
--- a/demo/image_classification/api_v2_vgg.py
+++ b/demo/image_classification/api_v2_vgg.py
@ -0,0 +1,47 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import paddle.v2 as paddle
 __all__ = ['vgg_bn_drop']
 def vgg_bn_drop(input):
    def conv_block(ipt, num_filter, groups, dropouts, num_channels=None):
        return paddle.networks.img_conv_group(
            input=ipt,
            num_channels=num_channels,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act=paddle.activation.Relu(),
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type=paddle.pooling.Max())
    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
    drop = paddle.layer.dropout(input=conv5, dropout_rate=0.5)
    fc1 = paddle.layer.fc(input=drop, size=512, act=paddle.activation.Linear())
    bn = paddle.layer.batch_norm(
        input=fc1,
        act=paddle.activation.Relu(),
        layer_attr=paddle.attr.Extra(drop_rate=0.5))
    fc2 = paddle.layer.fc(input=bn, size=512, act=paddle.activation.Linear())
    return fc2
--- a/demo/introduction/api_train_v2.py
+++ b/demo/introduction/api_train_v2.py
@ -40,8 +40,7 @@ def main():
                reader_dict={'x': 0,
                             'y': 1})
            if event.pass_id % 10 == 0:
-                print "Test %d, Cost %f, %s" % (event.pass_id, event.cost,
+                print "Test %d, %s" % (event.pass_id, result.metrics)
                                                result.metrics)
    # training
    trainer.train(
--- a/paddle/api/GradientMachine.cpp
+++ b/paddle/api/GradientMachine.cpp
@ -142,6 +142,20 @@ Parameter* GradientMachine::getParameter(size_t i) throw(RangeError) {
  }
 }
 size_t GradientMachine::getNonStaticParameterSize() const {
  return m->machine->getNonStaticParameters().size();
 }
 Parameter* GradientMachine::getNonStaticParameter(size_t i) throw(RangeError) {
  auto params = m->machine->getNonStaticParameters();
  if (i < params.size()) {
    return Parameter::createFromSharedPtr(
        &m->machine->getNonStaticParameters()[i]);
  } else {
    throw RangeError();
  }
 }
 void GradientMachine::randParameters() { m->machine->randParameters(); }
 Arguments* GradientMachine::getLayerOutput(const std::string& layerName) const
--- a/paddle/api/PaddleAPI.h
+++ b/paddle/api/PaddleAPI.h
@ -771,6 +771,9 @@ public:
  size_t getParameterSize() const;
  Parameter* getParameter(size_t i) throw(RangeError);
  size_t getNonStaticParameterSize() const;
  Parameter* getNonStaticParameter(size_t i) throw(RangeError);
  void randParameters();
  Arguments* getLayerOutput(const std::string& layerName) const
--- a/paddle/py_paddle/util.py
+++ b/paddle/py_paddle/util.py
@ -195,6 +195,12 @@ def __monkeypatch_gradient_machine__():
    swig_paddle.GradientMachine.getParameters = getParameters
    def getNonStaticParameters(self):
        return (self.getNonStaticParameter(i)
                for i in xrange(self.getNonStaticParameterSize()))
    swig_paddle.GradientMachine.getNonStaticParameters = getNonStaticParameters
    def getLayerOutputs(self, layerNames):
        """
        getLayerOutputs. get outputs of layers and return a numpy matrix dict.
--- a/python/paddle/v2/dataset/init.py
+++ b/python/paddle/v2/dataset/init.py
@ -1,4 +1,8 @@
 import mnist
 import imikolov
 import imdb
 import cifar
 import movielens
 import uci_housing
-__all__ = ['mnist', 'uci_housing']
+__all__ = ['mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'uci_housing']
--- a/python/paddle/v2/trainer.py
+++ b/python/paddle/v2/trainer.py
@ -120,7 +120,8 @@ class SGD(ITrainer):
                    feeder(data_batch), out_args, pass_type)
                self.__gradient_machine__.eval(pass_evaluator)
                self.__gradient_machine__.eval(batch_evaluator)
-                for each_param in self.__gradient_machine__.getParameters():
+                for each_param in self.__gradient_machine__.getNonStaticParameters(
                ):
                    updater.update(each_param)
                # Get cost. We use numpy to calculate total cost for this batch.
                cost_vec = out_args.getSlotValue(0)