diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/gserver/layers/ExpandConvLayer.cpp index 9f30fcf00a..5ea1fdece5 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/gserver/layers/ExpandConvLayer.cpp @@ -32,10 +32,7 @@ void ExpandConvLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one colum, and the - * transOutValue correspond sample to one row */ - int batchSize = inputLayers_[0]->getOutputValue()->getWidth(); - batchSize = inputLayers_[0]->getOutputValue()->getHeight(); + int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); resetOutput(batchSize, getOutputSize()); MatrixPtr image = nullptr; diff --git a/paddle/gserver/layers/ExpandConvTransLayer.cpp b/paddle/gserver/layers/ExpandConvTransLayer.cpp index 4c4016c301..a3e160f1f4 100644 --- a/paddle/gserver/layers/ExpandConvTransLayer.cpp +++ b/paddle/gserver/layers/ExpandConvTransLayer.cpp @@ -38,8 +38,6 @@ void ExpandConvTransLayer::forward(PassType passType) { Layer::forward(passType); /* malloc memory for the output_ if necessary */ - /* note: one sample correspond to one colum, and the - * transOutValue correspond sample to one row */ int batchSize = inputLayers_[0]->getOutputValue()->getHeight(); resetOutput(batchSize, getOutputSize()); diff --git a/paddle/gserver/layers/ExpandConvTransLayer.h b/paddle/gserver/layers/ExpandConvTransLayer.h index d0c0469c35..87c464a97f 100644 --- a/paddle/gserver/layers/ExpandConvTransLayer.h +++ b/paddle/gserver/layers/ExpandConvTransLayer.h @@ -26,7 +26,7 @@ namespace paddle { * This layer expands input and use matrix multiplication to * calculate convolution transpose (deconv) operation. * - * The config file api is img_convTrans_layer. + * The config file api is img_conv_layer with flag trans=True. */ class ExpandConvTransLayer : public ExpandConvBaseLayer { public: diff --git a/paddle/math/MathUtils.h b/paddle/math/MathUtils.h index ae035e55bc..91683dc3e9 100644 --- a/paddle/math/MathUtils.h +++ b/paddle/math/MathUtils.h @@ -60,6 +60,10 @@ void sparseRand(int* major, int* minor, int nnz, int majorLen, int minorMax, int outputSize(int imageSize, int filterSize, int padding, int stride, bool caffeMode); +/** + * Calculate image size based on output size and caffeMode_. + * It is the reverse function of outputSize() + */ int imageSize(int outputSize, int filterSize, int padding, int stride, bool caffeMode); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 8b13e4a142..4a701326e4 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1017,6 +1017,17 @@ def cnn_output_size(img_size, filter_size, padding, stride, caffe_mode): else: return 1 + int(math.ceil(output)) +''' +calcualte image_size based on output_size for convolution. +It is the reverse function of cnn_output_size +''' +def cnn_image_size(output_size, filter_size, padding, stride, caffe_mode): + if caffe_mode: + img_size = (output_size - 1) * stride + filter_size - 2 * padding + else: + img_size = (output_size - 2) * stride + filter_size - 2 * padding + 1 + return img_size + def parse_pool(pool, input_layer_name, pool_conf): pool_conf.pool_type = pool.pool_type config_assert(pool.pool_type in ['max-projection', 'avg-projection', @@ -1120,14 +1131,9 @@ def parse_conv(conv, input_layer_name, conv_conf, trans=False): ("Input layer %s: Incorrect input image size %d for input " + "image pixels %d") % (input_layer_name, conv_conf.output_x, outputSize)) - if conv.caffe_mode: - conv_conf.img_size = \ - (conv_conf.output_x - 1) * conv.stride \ - + conv.filter_size - 2 * conv.padding - else: - conv_conf.img_size = \ - (conv_conf.output_x - 2) * conv.stride \ - + conv.filter_size - 2 * conv.padding + 1 + conv_conf.img_size = cnn_image_size( + conv_conf.output_x, conv_conf.filter_size, + conv_conf.padding, conv_conf.stride, conv_conf.caffe_mode) def parse_block_expand(block_expand, input_layer_name, block_expand_conf): block_expand_conf.channels = block_expand.channels @@ -1656,12 +1662,6 @@ class ConvTransLayerBase(LayerBase): use_gpu = int(g_command_config_args.get("use_gpu", 0)) parallel_nn = int(g_command_config_args.get("parallel_nn", 0)) - # Automatically select cudnn_type for GPU and exconv for CPU - # if set type=conv, but still reserve the way user specify - # exconv or cudnn_conv manually. - if self.layer_type == "cudnn_convt": - config_assert(use_gpu, "cudnn_convt only support GPU") - # cudnn_convt has not been implemented so use exconvt only self.layer_type = "exconvt" # need to specify layer in config diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh index 77774f6fcf..b8687e1d48 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh @@ -9,7 +9,7 @@ protostr=$PWD/protostr configs=(test_fc layer_activations projections test_print_layer test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid -img_layers util_layers simple_rnn_layers unused_layers test_cost_layers +img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight test_maxout test_bi_grumemory math_ops) diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py new file mode 100644 index 0000000000..077c78d201 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/img_trans_layers.py @@ -0,0 +1,22 @@ +from paddle.trainer_config_helpers import * + +settings( + learning_rate=1e-3, + batch_size=1000 +) + +img = data_layer(name='image', size=227*227) + +# the parse_conv in config_parse.py is not strictly accurate when filter_size +# is not square. So here set square filter_size. +img_conv = img_conv_layer(input=img, num_channels=1, num_filters=64, + filter_size=(32, 32), padding=(1, 1), stride=(1, 1), + act=LinearActivation(), trans=True) +img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) + +img_norm = img_cmrnorm_layer(input=img_bn, size=32) + +img_pool = img_pool_layer(input=img_conv, pool_size=32, pool_type=MaxPooling()) + + +outputs(img_pool, img_norm)