You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							381 lines
						
					
					
						
							14 KiB
						
					
					
				
			
		
		
	
	
							381 lines
						
					
					
						
							14 KiB
						
					
					
				| # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| import numpy as np
 | |
| import os
 | |
| from paddle.trainer.config_parser import *
 | |
| from paddle.utils.preprocess_img import \
 | |
|     ImageClassificationDatasetCreater
 | |
| from paddle.trainer_config_helpers import *
 | |
| 
 | |
| 
 | |
| def image_data(data_dir,
 | |
|                processed_image_size,
 | |
|                overwrite=False,
 | |
|                color=True,
 | |
|                train_list="batches/train.list",
 | |
|                test_list="batches/test.list",
 | |
|                meta_file="batches/batches.meta",
 | |
|                use_jpeg=1):
 | |
|     """
 | |
|     Predefined image data provider for image classification.
 | |
|     train_list: a text file containing a list of training batches.
 | |
|     test_list: a text file containing a list of test batches.
 | |
|     processed_image_size: all the input images will be resized into this size.
 | |
|        If the image is not square. Then the shorter edge will be resized into
 | |
|        this size, and the aspect ratio is kept the same.
 | |
|     color: whether the images are color or gray.
 | |
|     meta_path: the path of the meta file that stores the mean image file and
 | |
|                other dataset information, such as the size of images,
 | |
|                the size of the mean image, the number of classes.
 | |
|     async_load_data: whether to load image data asynchronuously.
 | |
|     """
 | |
|     data_creator = ImageClassificationDatasetCreater(
 | |
|         data_dir, processed_image_size, color)
 | |
|     batch_data_dir = data_dir
 | |
|     train_list = os.path.join(batch_data_dir, train_list)
 | |
|     test_list = os.path.join(batch_data_dir, test_list)
 | |
|     meta_path = os.path.join(batch_data_dir, meta_file)
 | |
|     image_size = processed_image_size
 | |
|     conf = np.load(meta_path)
 | |
|     mean_image_size = conf["mean_image_size"]
 | |
|     is_color = conf["color"]
 | |
|     num_classes = conf["num_classes"]
 | |
|     color_string = "color" if is_color else "gray"
 | |
| 
 | |
|     args = {
 | |
|         'meta': meta_path,
 | |
|         'mean_img_size': mean_image_size,
 | |
|         'img_size': image_size,
 | |
|         'num_classes': num_classes,
 | |
|         'use_jpeg': use_jpeg != 0,
 | |
|         'color': color_string
 | |
|     }
 | |
| 
 | |
|     define_py_data_sources2(
 | |
|         train_list,
 | |
|         test_list,
 | |
|         module='image_provider',
 | |
|         obj='processData',
 | |
|         args=args)
 | |
|     return {
 | |
|         "image_size": image_size,
 | |
|         "num_classes": num_classes,
 | |
|         "is_color": is_color
 | |
|     }
 | |
| 
 | |
| 
 | |
| def get_extra_layer_attr(drop_rate):
 | |
|     if drop_rate == 0:
 | |
|         return None
 | |
|     else:
 | |
|         return ExtraLayerAttribute(drop_rate=drop_rate)
 | |
| 
 | |
| 
 | |
| def image_data_layers(image_size, num_classes, is_color=False,
 | |
|                       is_predict=False):
 | |
|     """
 | |
|     Data layers for image classification.
 | |
|     image_size: image size.
 | |
|     num_classes: num of classes.
 | |
|     is_color: whether the input images are color.
 | |
|     is_predict: whether the network is used for prediction.
 | |
|     """
 | |
|     num_image_channels = 3 if is_color else 1
 | |
|     data_input = data_layer("input",
 | |
|                             image_size * image_size * num_image_channels)
 | |
|     if is_predict:
 | |
|         return data_input, None, num_image_channels
 | |
|     else:
 | |
|         label_input = data_layer("label", 1)
 | |
|         return data_input, label_input, num_image_channels
 | |
| 
 | |
| 
 | |
| def simple_conv_net(data_conf, is_color=False):
 | |
|     """
 | |
|     A Wrapper for a simple network for MNIST digit recognition.
 | |
|     It contains two convolutional layers, one fully conencted layer, and
 | |
|     one softmax layer.
 | |
|     data_conf is a dictionary with the following keys:
 | |
|         image_size: image size.
 | |
|         num_classes: num of classes.
 | |
|         is_color: whether the input images are color.
 | |
|     """
 | |
|     for k, v in data_conf.iteritems():
 | |
|         globals()[k] = v
 | |
|     data_input, label_input, num_image_channels = \
 | |
|         image_data_layers(image_size, num_classes, is_color, is_predict)
 | |
|     filter_sizes = [5, 5]
 | |
|     num_channels = [32, 64]
 | |
|     strides = [1, 1]
 | |
|     fc_dims = [500]
 | |
|     conv_bn_pool1 = img_conv_bn_pool(
 | |
|         name="g1",
 | |
|         input=data_input,
 | |
|         filter_size=filter_sizes[0],
 | |
|         num_channel=num_image_channels,
 | |
|         num_filters=num_channels[0],
 | |
|         conv_stride=1,
 | |
|         conv_padding=0,
 | |
|         pool_size=3,
 | |
|         pool_stride=2,
 | |
|         act=ReluActivation())
 | |
|     conv_bn_pool2 = img_conv_bn_pool(
 | |
|         name="g2",
 | |
|         input=conv_bn_pool1,
 | |
|         filter_size=filter_sizes[1],
 | |
|         num_channel=num_channels[0],
 | |
|         num_filters=num_channels[1],
 | |
|         conv_stride=1,
 | |
|         conv_padding=0,
 | |
|         pool_size=3,
 | |
|         pool_stride=2,
 | |
|         act=ReluActivation())
 | |
|     fc3 = fc_layer(
 | |
|         name="fc3", input=conv_bn_pool2, dim=fc_dims[0], act=ReluActivation())
 | |
|     fc3_dropped = dropout_layer(name="fc3_dropped", input=fc3, dropout_rate=0.5)
 | |
|     output = fc_layer(
 | |
|         name="output",
 | |
|         input=fc3_dropped,
 | |
|         dim=fc_dims[0],
 | |
|         act=SoftmaxActivation())
 | |
|     if is_predict:
 | |
|         end_of_network(output)
 | |
|     else:
 | |
|         cost = classify(name="cost", input=output, label=label_input)
 | |
|         end_of_network(cost)
 | |
| 
 | |
| 
 | |
| def conv_layer_group(prefix_num,
 | |
|                      num_layers,
 | |
|                      input,
 | |
|                      input_channels,
 | |
|                      output_channels,
 | |
|                      drop_rates=[],
 | |
|                      strides=[],
 | |
|                      with_bn=[]):
 | |
|     """
 | |
|     A set of convolution layers, and batch normalization layers,
 | |
|     followed by one pooling layer.
 | |
|     It is utilized in VGG network for image classifcation.
 | |
|     prefix_num: the prefix number of the layer names.
 | |
|                 For example, if prefix_num = 1, the first convolutioal layer's
 | |
|                 name will be conv_1_1.
 | |
|     num_layers: number of the convolutional layers.
 | |
|     input: the name of the input layer.
 | |
|     input_channels: the number of channels of the input feature map.
 | |
|     output_channels: the number of channels of the output feature map.
 | |
|     drop_rates: the drop rates of the BN layers. It will be all zero by default.
 | |
|     strides: the stride of the convolution for the layers.
 | |
|              It will be all 1 by  default.
 | |
|     with_bn: whether to use Batch Normalization for Conv layers.
 | |
|              By default,  it is all false.
 | |
|     """
 | |
|     if len(drop_rates) == 0: drop_rates = [0] * num_layers
 | |
|     if len(strides) == 0: strides = [1] * num_layers
 | |
|     if len(with_bn) == 0: with_bn = [False] * num_layers
 | |
|     assert (len(drop_rates) == num_layers)
 | |
|     assert (len(strides) == num_layers)
 | |
| 
 | |
|     for i in range(1, num_layers + 1):
 | |
|         if i == 1:
 | |
|             i_conv_in = input
 | |
|         else:
 | |
|             i_conv_in = group_output
 | |
|         i_channels_conv = input_channels if i == 1 else output_channels
 | |
|         conv_act = LinearActivation() if with_bn[i - 1] else ReluActivation()
 | |
|         conv_output = img_conv_layer(
 | |
|             name="conv%d_%d" % (prefix_num, i),
 | |
|             input=i_conv_in,
 | |
|             filter_size=3,
 | |
|             num_channels=i_channels_conv,
 | |
|             num_filters=output_channels,
 | |
|             stride=strides[i - 1],
 | |
|             padding=1,
 | |
|             act=conv_act)
 | |
|         if with_bn[i - 1]:
 | |
|             bn = batch_norm_layer(
 | |
|                 name="conv%d_%d_bn" % (prefix_num, i),
 | |
|                 input=conv_output,
 | |
|                 num_channels=output_channels,
 | |
|                 act=ReluActivation(),
 | |
|                 layer_attr=get_extra_layer_attr(drop_rate=drop_rates[i - 1]))
 | |
|             group_output = bn
 | |
|         else:
 | |
|             group_output = conv_output
 | |
|     pool = img_pool_layer(
 | |
|         name="pool%d" % prefix_num,
 | |
|         input=group_output,
 | |
|         pool_size=2,
 | |
|         num_channels=output_channels,
 | |
|         stride=2)
 | |
|     return pool
 | |
| 
 | |
| 
 | |
| def vgg_conv_net(image_size,
 | |
|                  num_classes,
 | |
|                  num_layers,
 | |
|                  channels,
 | |
|                  strides,
 | |
|                  with_bn,
 | |
|                  fc_dims,
 | |
|                  drop_rates,
 | |
|                  drop_rates_fc=[],
 | |
|                  is_color=True,
 | |
|                  is_predict=False):
 | |
|     """
 | |
|     A Wrapper for a VGG network for image classification.
 | |
|     It is a set of convolutional groups followed by several fully
 | |
|     connected layers, and a cross-entropy classifiation loss.
 | |
|     The detailed architecture of the paper can be found here:
 | |
|       Very Deep Convolutional Networks for Large-Scale Visual Recognition
 | |
|       http://www.robots.ox.ac.uk/~vgg/research/very_deep/
 | |
|     image_size: image size.
 | |
|     num_classes: num of classes.
 | |
|     num_layers: the number of layers for all the convolution groups.
 | |
|     channels: the number of output filters for all the convolution groups.
 | |
|     with_bn: whether each layer of a convolution group is followed by a
 | |
|     batch normalization.
 | |
|     drop_rates: the dropout rates for all the convolutional layers.
 | |
|     fc_dims: the dimension for all the fully connected layers.
 | |
|     is_color: whether the input images are color.
 | |
|     """
 | |
|     data_input, label_input, num_image_channels = \
 | |
|         image_data_layers(image_size, num_classes, is_color, is_predict)
 | |
|     assert (len(num_layers) == len(channels))
 | |
|     assert (len(num_layers) == len(strides))
 | |
|     assert (len(num_layers) == len(with_bn))
 | |
|     num_fc_layers = len(fc_dims)
 | |
|     assert (num_fc_layers + 1 == len(drop_rates_fc))
 | |
| 
 | |
|     for i in range(len(num_layers)):
 | |
|         input_layer = data_input if i == 0 else group_output
 | |
|         input_channels = 3 if i == 0 else channels[i - 1]
 | |
|         group_output = conv_layer_group(
 | |
|             prefix_num=i + 1,
 | |
|             num_layers=num_layers[i],
 | |
|             input=input_layer,
 | |
|             input_channels=input_channels,
 | |
|             output_channels=channels[i],
 | |
|             drop_rates=drop_rates[i],
 | |
|             strides=strides[i],
 | |
|             with_bn=with_bn[i])
 | |
|     conv_output_name = group_output
 | |
|     if drop_rates_fc[0] != 0.0:
 | |
|         dropped_pool_name = "pool_dropped"
 | |
|         conv_output_name = dropout_layer(
 | |
|             name=dropped_pool_name,
 | |
|             input=conv_output_name,
 | |
|             dropout_rate=drop_rates_fc[0])
 | |
|     for i in range(len(fc_dims)):
 | |
|         input_layer_name = conv_output_name if i == 0 else fc_output
 | |
|         active_type = LinearActivation() if i == len(
 | |
|             fc_dims) - 1 else ReluActivation()
 | |
|         drop_rate = 0.0 if i == len(fc_dims) - 1 else drop_rates_fc[i + 1]
 | |
|         fc_output = fc_layer(
 | |
|             name="fc%d" % (i + 1),
 | |
|             input=input_layer_name,
 | |
|             size=fc_dims[i],
 | |
|             act=active_type,
 | |
|             layer_attr=get_extra_layer_attr(drop_rate))
 | |
|     bn = batch_norm_layer(
 | |
|         name="fc_bn",
 | |
|         input=fc_output,
 | |
|         num_channels=fc_dims[len(fc_dims) - 1],
 | |
|         act=ReluActivation(),
 | |
|         layer_attr=get_extra_layer_attr(drop_rate=drop_rates_fc[-1]))
 | |
|     output = fc_layer(
 | |
|         name="output", input=bn, size=num_classes, act=SoftmaxActivation())
 | |
|     if is_predict:
 | |
|         outputs(output)
 | |
|     else:
 | |
|         cost = classification_cost(name="cost", input=output, label=label_input)
 | |
|         outputs(cost)
 | |
| 
 | |
| 
 | |
| def vgg16_conv_net(image_size, num_classes, is_color=True, is_predict=False):
 | |
|     """
 | |
|     A Wrapper for a 16 layers VGG network for image classification.
 | |
|     The detailed architecture of the paper can be found here:
 | |
|       Very Deep Convolutional Networks for Large-Scale Visual Recognition
 | |
|       http://www.robots.ox.ac.uk/~vgg/research/very_deep/
 | |
|     image_size: image size.
 | |
|     num_classes: num of classes.
 | |
|     is_color: whether the input images are color.
 | |
|     """
 | |
|     vgg_conv_net(image_size, num_classes,
 | |
|                  num_layers=[2, 2, 3, 3, 3],
 | |
|                  channels=[64, 128, 256, 512, 512],
 | |
|                  strides=[[], [], [], [], []],
 | |
|                  with_bn=[[False, True], [False, True], [False, False, True], \
 | |
|                           [False, False, True], [False, False, True]],
 | |
|                  drop_rates=[[]] * 5,
 | |
|                  drop_rates_fc=[0.0, 0.5, 0.5],
 | |
|                  fc_dims=[4096, 4096],
 | |
|                  is_predict=is_predict)
 | |
| 
 | |
| 
 | |
| def small_vgg(data_conf, is_predict=False):
 | |
|     """
 | |
|     A Wrapper for a small VGG network for CIFAR-10 image classification.
 | |
|     The detailed architecture of the paper can be found here:
 | |
|       92.45% on CIFAR-10 in Torch
 | |
|       http://torch.ch/blog/2015/07/30/cifar.html
 | |
|     Due to the constraints of CuDNN, it only has four convolutional groups
 | |
|     rather than five.
 | |
|     Thus, it only achieves 91.2% test accuracy and 98.1% training accuracy.
 | |
|     data_conf is a dictionary with the following keys:
 | |
|         image_size: image size.
 | |
|         num_classes: num of classes.
 | |
|         is_color: whether the input images are color.
 | |
|     """
 | |
|     for k, v in data_conf.iteritems():
 | |
|         globals()[k] = v
 | |
|     vgg_conv_net(image_size, num_classes,
 | |
|                  num_layers=[2, 2, 3, 3],
 | |
|                  channels=[64, 128, 256, 512],
 | |
|                  strides=[[], [], [], []],
 | |
|                  with_bn=[[True, True], [True, True], [True, True, True], \
 | |
|                           [True, True, True]],
 | |
|                  drop_rates=[[0.3, 0.0], [0.4, 0.0],
 | |
|                              [0.4, 0.4, 0.0], [0.4, 0.4, 0.0]],
 | |
|                  drop_rates_fc=[0.5, 0.5],
 | |
|                  fc_dims=[512],
 | |
|                  is_predict=is_predict)
 | |
| 
 | |
| 
 | |
| def training_settings(learning_rate=0.1,
 | |
|                       batch_size=128,
 | |
|                       algorithm="sgd",
 | |
|                       momentum=0.9,
 | |
|                       decay_rate=0.001):
 | |
|     """
 | |
|     Training settings.
 | |
|     learning_rate: learning rate of the training.
 | |
|     batch_size: the size of each training batch.
 | |
|     algorithm: training algorithm, can be
 | |
|        - sgd
 | |
|        - adagrad
 | |
|        - adadelta
 | |
|        - rmsprop
 | |
|     momentum: momentum of the training algorithm.
 | |
|     decay_rate: weight decay rate.
 | |
|     """
 | |
|     Settings(
 | |
|         algorithm=algorithm,
 | |
|         batch_size=batch_size,
 | |
|         learning_rate=learning_rate / float(batch_size))
 | |
|     default_momentum(momentum)
 | |
|     default_decay_rate(decay_rate * batch_size)
 |