From c0ecd5c4c565f012a78e48504f5bd0b436e883b5 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 31 Jul 2017 15:08:17 +0800 Subject: [PATCH 01/44] add config helper. --- proto/ModelConfig.proto | 2 + python/paddle/trainer/config_parser.py | 19 ++ .../paddle/trainer_config_helpers/layers.py | 166 ++++++++---------- .../tests/configs/file_list.sh | 2 +- .../protostr/test_seq_select_layers.protostr | 63 +++++++ .../tests/configs/test_seq_select_layers.py | 9 + 6 files changed, 163 insertions(+), 98 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 83f72c137b..ce4b3aad01 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -482,6 +482,8 @@ message LayerConfig { repeated uint32 offset = 55; repeated uint32 shape = 56; + // for sub_nest_seq layer to select top k sequence with highest scores + optional uint32 top_k = 57 [default = 1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5477158ecb..f8ab0ae80a 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2614,6 +2614,25 @@ class SubSequenceLayer(LayerBase): self.create_bias_parameter(bias, size) +@config_layer('sub_nested_seq') +class SubNestedSequenceLayer(LayerBase): + def __init__(self, name, inputs, top_k=1, bias=False, **xargs): + super(SubNestedSequenceLayer, self).__init__( + name, 'sub_nested_seq', 0, inputs=inputs, **xargs) + config_assert( + len(inputs) == 2, + ('SubNestSequenceLayer must have 2 inputs: ' + 'input1 is a nested sequence; input2 is a learnable distribution ' + 'or scores over each sentence in the nested sequence. ')) + input_layer0 = self.get_input_layer(0) + size = input_layer0.size + self.set_layer_size(size) + + self.config.top_k = top_k + input_layer1 = self.get_input_layer(1) + assert (input_layer1.size == 1) + + @config_layer('out_prod') class OuterProdLayer(LayerBase): def __init__(self, name, inputs, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 14f072fc55..d266026a46 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,103 +31,33 @@ except ImportError: import copy __all__ = [ - 'full_matrix_projection', - 'AggregateLevel', - 'ExpandLevel', - 'identity_projection', - 'dotmul_projection', - 'dotmul_operator', - 'repeat_layer', - 'seq_reshape_layer', - 'table_projection', - 'mixed_layer', - 'data_layer', - 'embedding_layer', - 'fc_layer', - 'grumemory', - 'pooling_layer', - 'lstmemory', - 'last_seq', - 'first_seq', - 'cos_sim', - 'hsigmoid', - 'conv_projection', - 'mse_cost', - 'regression_cost', - 'classification_cost', - 'LayerOutput', - 'img_conv_layer', - 'img_pool_layer', - 'batch_norm_layer', - 'img_cmrnorm_layer', - 'addto_layer', - 'concat_layer', - 'seq_concat_layer', - 'lstm_step_layer', - 'recurrent_group', - 'memory', - 'StaticInput', - 'expand_layer', - 'scaling_layer', - 'scaling_projection', - 'power_layer', - 'interpolation_layer', - 'bilinear_interp_layer', - 'trans_layer', - 'rotate_layer', - 'sum_to_one_norm_layer', - 'get_output_layer', - 'LayerType', - 'context_projection', - 'beam_search', - 'maxid_layer', - 'GeneratedInput', - 'SubsequenceInput', - 'gru_step_layer', - 'gru_step_naive_layer', - 'recurrent_layer', - 'BaseGeneratedInput', - 'conv_operator', - 'conv_shift_layer', - 'tensor_layer', - 'selective_fc_layer', - 'sampling_id_layer', - 'slope_intercept_layer', - 'trans_full_matrix_projection', - 'linear_comb_layer', - 'convex_comb_layer', - 'ctc_layer', - 'warp_ctc_layer', - 'crf_layer', - 'crf_decoding_layer', - 'nce_layer', - 'cross_entropy_with_selfnorm', - 'cross_entropy', - 'multi_binary_label_cross_entropy', - 'sum_cost', - 'rank_cost', - 'lambda_cost', - 'huber_cost', - 'block_expand_layer', - 'maxout_layer', - 'out_prod_layer', - 'printer_layer', - 'print_layer', - 'priorbox_layer', - 'cross_channel_norm_layer', - 'multibox_loss_layer', - 'detection_output_layer', - 'spp_layer', - 'pad_layer', - 'eos_layer', - 'smooth_l1_cost', - 'layer_support', - 'multiplex_layer', - 'row_conv_layer', - 'dropout_layer', - 'prelu_layer', - 'gated_unit_layer', - 'crop_layer', + 'full_matrix_projection', 'AggregateLevel', 'ExpandLevel', + 'identity_projection', 'dotmul_projection', 'dotmul_operator', + 'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer', + 'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer', + 'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid', + 'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost', + 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', + 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer', + 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', + 'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer', + 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', + 'rotate_layer', 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', + 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', + 'SubsequenceInput', 'gru_step_layer', 'gru_step_naive_layer', + 'recurrent_layer', 'BaseGeneratedInput', 'conv_operator', + 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', + 'sampling_id_layer', 'slope_intercept_layer', + 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', + 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', + 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', + 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', + 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', + 'printer_layer', 'print_layer', 'priorbox_layer', + 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support', + 'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer', + 'gated_unit_layer', 'crop_layer', 'sub_nested_seq_layer' ] @@ -220,6 +150,7 @@ class LayerType(object): PRELU = 'prelu' CROP_LAYER = 'crop' + SUB_NESTED_SEQ = 'sub_nested_seq' @staticmethod def is_layer_type(type_name): @@ -6006,3 +5937,44 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): layer_type=LayerType.CROP_LAYER, parents=input, size=l.config.size) + + +@wrap_name_default() +@layer_support() +def sub_nested_seq_layer(input, name=None, top_k=1): + """ + The sub_nest_seq_layer accepts two inputs: the first one is a nested + sequence in PaddlePaddle; the second one is a learnable score or + distribution over each sequence in the nested sequence. + + Then sub_nest_seq_layer selects top k sentences with highest scores or + probabilites according to the second input. + + The example usage is: + + .. code-block:: python + prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) + sub_nest_seq = sub_nest_seq_layer(input=[data, prob], top_k=3) + + :param input: The two input layers. The first input must be a nested + sequence. The second input is a learnable scores, whose size must be 1. + :type input: LayerOutput + :param name: name of this layer. + :type name: basestring + :param top_k: number of sequences with highest probabilies to select. + :type top_k: int + :return: LayerOutput object. + :rtype: LayerOutput + """ + assert isinstance(input, collections.Sequence) and len(input) == 2, ( + 'sub_nest_seq_layer has exactly two inputs.') + l = Layer( + inputs=[x.name for x in input], + name=name, + top_k=top_k, + type=LayerType.SUB_NESTED_SEQ) + return LayerOutput( + name=name, + layer_type=LayerType.SUB_NESTED_SEQ, + parents=input, + size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index cdf9b2eab7..1a1120d59b 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer -test_recursive_topology test_gated_unit_layer) +test_recursive_topology test_gated_unit_layer test_seq_select_layers) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr new file mode 100644 index 0000000000..8f41be1042 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr @@ -0,0 +1,63 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__fc_layer_0__" + type: "fc" + size: 1 + active_type: "sequence_softmax" + inputs { + input_layer_name: "input" + input_parameter_name: "___fc_layer_0__.w0" + } + bias_parameter_name: "___fc_layer_0__.wbias" +} +layers { + name: "__sub_nested_seq_layer_0__" + type: "sub_nested_seq" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + } + inputs { + input_layer_name: "__fc_layer_0__" + } + top_k: 1 +} +parameters { + name: "___fc_layer_0__.w0" + size: 300 + initial_mean: 0.0 + initial_std: 0.057735026919 + dims: 300 + dims: 1 + initial_strategy: 0 + initial_smart: true +} +parameters { + name: "___fc_layer_0__.wbias" + size: 1 + initial_mean: 0.0 + initial_std: 0.0 + dims: 1 + dims: 1 + initial_strategy: 0 + initial_smart: false +} +input_layer_names: "input" +output_layer_names: "__sub_nested_seq_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__fc_layer_0__" + layer_names: "__sub_nested_seq_layer_0__" + input_layer_names: "input" + output_layer_names: "__sub_nested_seq_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py new file mode 100644 index 0000000000..f2553f6b6a --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +#coding=utf-8 +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) +sub_nest_seq = sub_nested_seq_layer(input=[data, prob], top_k=1) + +outputs(sub_nest_seq) From 8bd73159c7c51b009fd0413b4639748ce546b3d8 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 1 Aug 2017 10:35:58 -0700 Subject: [PATCH 02/44] Add unittest for `FillZerosLikeOp` --- paddle/operators/fill_zeros_like_op.cc | 2 -- paddle/operators/fill_zeros_like_op.h | 4 +--- paddle/pybind/CMakeLists.txt | 2 +- paddle/pybind/pybind.cc | 1 + python/paddle/v2/framework/tests/CMakeLists.txt | 1 + 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index d641bc4ada..44300f54b4 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/fill_zeros_like_op.h" -#include "paddle/framework/op_registry.h" -#include "paddle/framework/tensor.h" namespace paddle { namespace operators { diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index ca44a201f7..3c157c3d4e 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -13,9 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "glog/logging.h" -#include "paddle/framework/eigen.h" -#include "paddle/framework/operator.h" +#include "paddle/operators/type_alias.h" namespace paddle { namespace operators { diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index fd1a142b40..7802c87dd4 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python - add_op fc_op sgd_op cross_entropy_op) + add_op fc_op sgd_op cross_entropy_op fill_zeros_like_op) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index ccefcd2511..f8a9253774 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -36,6 +36,7 @@ USE_OP(mul); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); +USE_OP(fill_zeros_like); template void ExposeOperator(ClassType& m) { diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index cdaaa60674..4b70af6861 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -13,4 +13,5 @@ add_python_test(test_framework test_sigmoid_op.py test_softmax_op.py test_rowwise_add_op.py + test_fill_zeros_like_op.py test_network.py) From c59fe7a039c1b6b9334094cfd151d79bf86a37c1 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 1 Aug 2017 10:47:27 -0700 Subject: [PATCH 03/44] Add unittest for FillZerosLikeOp --- .../tests/test_fill_zeros_like_op.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 python/paddle/v2/framework/tests/test_fill_zeros_like_op.py diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py new file mode 100644 index 0000000000..772edf895c --- /dev/null +++ b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py @@ -0,0 +1,35 @@ +import unittest +import paddle.v2.framework.create_op_creation_methods as creation +import paddle.v2.framework.core as core +from op_test_util import OpTestMeta +import numpy + + +class TestFillZerosLikeOp(unittest.TestCase): + def test_fill(self): + scope = core.Scope(None) + a = scope.create_var("input") + a_tensor = a.get_tensor() + a_tensor.set_dims([546, 291]) + a_tensor.alloc_float() + a_tensor.set(numpy.random.random((546, 291)).astype("float32")) + + op = creation.op_creations.fill_zeros_like(Src="input", Dst="output") + + for out in op.outputs(): + if scope.get_var(out) is None: + scope.create_var(out).get_tensor() + + b_tensor = scope.get_var("output").get_tensor() + op.infer_shape(scope) + self.assertEqual([546, 291], b_tensor.shape()) + ctx = core.DeviceContext.cpu_context() + op.run(scope, ctx) + b_tensor_array = numpy.array(b_tensor) + for r in range(0, 546): + for c in range(0, 291): + self.assertEqual(b_tensor_array[r][c], 0.0) + + +if __name__ == '__main__': + unittest.main() From 244c343b66fe7d5f2ee2744ebe32d6e72748e4ff Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Tue, 1 Aug 2017 15:18:22 -0700 Subject: [PATCH 04/44] Fix a test error --- python/paddle/v2/framework/tests/test_fill_zeros_like_op.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py index 772edf895c..69d89f537d 100644 --- a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py +++ b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py @@ -1,13 +1,12 @@ import unittest import paddle.v2.framework.create_op_creation_methods as creation import paddle.v2.framework.core as core -from op_test_util import OpTestMeta import numpy class TestFillZerosLikeOp(unittest.TestCase): def test_fill(self): - scope = core.Scope(None) + scope = core.Scope() a = scope.create_var("input") a_tensor = a.get_tensor() a_tensor.set_dims([546, 291]) From 4b39f92bd860e9e7bb3522ca0752380fe9260e27 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 31 Jul 2017 17:33:15 +0800 Subject: [PATCH 05/44] add implementation of SubNestedSequenceLayer. --- .../gserver/layers/SubNestedSequenceLayer.cpp | 179 ++++++++++++++++++ paddle/gserver/tests/LayerGradUtil.cpp | 14 +- paddle/gserver/tests/LayerGradUtil.h | 5 +- paddle/gserver/tests/test_LayerGrad.cpp | 79 +++++++- .../paddle/trainer_config_helpers/layers.py | 125 +++++++++--- 5 files changed, 365 insertions(+), 37 deletions(-) create mode 100644 paddle/gserver/layers/SubNestedSequenceLayer.cpp diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp new file mode 100644 index 0000000000..6887df353e --- /dev/null +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -0,0 +1,179 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Layer.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/Vector.h" +#include "paddle/utils/Logging.h" +#include "paddle/utils/Stat.h" + +namespace paddle { + +class SubNestedSequenceLayer : public Layer { +public: + explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; + +private: + void checkInputs(const Argument& inputSeq, const Argument& seqScores); + void calSelectedCols(const Argument& scores, + const int* subSeqStartPos, + size_t topK); + void partialSortIndex(const std::vector& values, + int k, + std::vector& indices); + void buildOutputSeqInfo(); + + std::vector outSeqStartInfo_; + std::vector outSubSeqStartInfo_; + + MatrixPtr scoreOverInputSeq_; + + // rowIdx_ and selectedRows_ actually share a same memory. + IVectorPtr rowIndice_; + std::vector selectedRows_; +}; + +REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer); + +bool SubNestedSequenceLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + /* Initialize the basic parent class */ + Layer::init(layerMap, parameterMap); + CHECK_EQ(2U, inputLayers_.size()); + setNeedSequenceInfo(false); + return true; +} + +void SubNestedSequenceLayer::checkInputs(const Argument& inputSeq, + const Argument& seqScores) { + CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " + << "must be a nested sequence."; + CHECK(seqScores.hasSeq()) + << "The second input of SubNestSequence layer must be a sequence."; + CHECK_EQ(seqScores.value->getWidth(), 1U) + << "The second input of SubNestedSequenceLayer is scores " + << "over each sequence in a nested sequence, " + << "so its size should be 1."; + CHECK_EQ(inputSeq.getNumSubSequences(), seqScores.value->getHeight()) + << "The second input of SubNestedSequenceLayer is scores " + << "over each sequence in a nested sequence, so its height should be " + << "equal to number of sequence in the first input."; +} + +void SubNestedSequenceLayer::partialSortIndex(const std::vector& values, + int k, + std::vector& indices) { + CHECK_GE(values.size(), k); + indices.resize(values.size(), 0); + std::iota(begin(indices), end(indices), 0U); + std::partial_sort(begin(indices), + begin(indices) + k, + end(indices), + [&](size_t a, size_t b) { return values[a] > values[b]; }); +} + +void SubNestedSequenceLayer::calSelectedCols(const Argument& scores, + const int* subSeqStartPos, + size_t topK) { + selectedRows_.clear(); + outSubSeqStartInfo_.resize(1, 0); + outSeqStartInfo_.resize(1, 0); + + real* seqScores = nullptr; + if (useGpu_) { + Matrix::resizeOrCreate(scoreOverInputSeq_, + scores.value->getHeight(), + scores.value->getWidth(), + false /* trans */, + false /* useGpu */); + scoreOverInputSeq_->copyFrom(*scores.value); + seqScores = scoreOverInputSeq_->getData(); + } else { + seqScores = scores.value->getData(); + } + + int* scoreSeqStartPos = scores.sequenceStartPositions->getMutableData(false); + for (int i = 0; i < scores.getNumSequences(); ++i) { + int seqLen = scoreSeqStartPos[i + 1] - scoreSeqStartPos[i]; + int selectedSeqNum = std::min(static_cast(config_.top_k()), seqLen); + + std::vector sortedIdx; + partialSortIndex(std::vector(seqScores + scoreSeqStartPos[i], + seqScores + scoreSeqStartPos[i + 1]), + selectedSeqNum, + sortedIdx); + + for (int j = 0; j < selectedSeqNum; ++j) { + int begPos = subSeqStartPos[scoreSeqStartPos[i] + sortedIdx[j]]; + int endPos = subSeqStartPos[scoreSeqStartPos[i] + sortedIdx[j] + 1]; + for (int m = begPos; m < endPos; ++m) selectedRows_.push_back(m); + outSubSeqStartInfo_.push_back(outSubSeqStartInfo_.back() + endPos - + begPos); + } + outSeqStartInfo_.push_back(outSubSeqStartInfo_.back()); + } +} + +void SubNestedSequenceLayer::buildOutputSeqInfo() { + Argument& output = getOutput(); + + ICpuGpuVector::resizeOrCreate( + output.sequenceStartPositions, outSeqStartInfo_.size(), false); + output.sequenceStartPositions->copyFrom( + outSeqStartInfo_.data(), outSeqStartInfo_.size(), false); + + ICpuGpuVector::resizeOrCreate( + output.subSequenceStartPositions, outSubSeqStartInfo_.size(), false); + output.subSequenceStartPositions->copyFrom( + outSubSeqStartInfo_.data(), outSubSeqStartInfo_.size(), false); +} + +void SubNestedSequenceLayer::forward(PassType passType) { + Layer::forward(passType); + const Argument& inputSeq = getInput(0); + const Argument& seqScores = getInput(1); + + checkInputs(inputSeq, seqScores); + + calSelectedCols(seqScores, + inputSeq.subSequenceStartPositions->getMutableData(false), + config_.top_k()); + resetOutput(selectedRows_.size(), getSize()); + buildOutputSeqInfo(); + + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + + getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); +} + +void SubNestedSequenceLayer::backward(const UpdateCallback& callback) { + MatrixPtr inputGrad1 = getInputGrad(0); + MatrixPtr outputGrad = getOutputGrad(); + + if (inputGrad1) outputGrad->addToRows(*inputGrad1, *rowIndice_); +} + +} // namespace paddle diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/gserver/tests/LayerGradUtil.cpp index 9eca58f1a1..fd9cfa1dc7 100644 --- a/paddle/gserver/tests/LayerGradUtil.cpp +++ b/paddle/gserver/tests/LayerGradUtil.cpp @@ -400,7 +400,6 @@ void initDataLayer(TestConfig testConf, const std::vector& labelSeqStartPositions = testConf.inputDefs[i].labelSeqStartPositions; if (labelSeqStartPositions.size() != 0) { - CHECK(!sequenceStartPositions); CHECK_GE(static_cast(labelSeqStartPositions.size()), 2); sequenceStartPositions = @@ -410,6 +409,19 @@ void initDataLayer(TestConfig testConf, useGpu); data.sequenceStartPositions = sequenceStartPositions; } + + const std::vector& labelSubSeqStartPositions = + testConf.inputDefs[i].labelSubSeqStartPositions; + if (labelSubSeqStartPositions.size() != 0) { + CHECK_GE(static_cast(labelSubSeqStartPositions.size()), 2); + + subSequenceStartPositions = + ICpuGpuVector::create(labelSubSeqStartPositions.size(), useGpu); + subSequenceStartPositions->copyFrom(labelSubSeqStartPositions.data(), + labelSubSeqStartPositions.size(), + useGpu); + data.subSequenceStartPositions = subSequenceStartPositions; + } break; } default: diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/gserver/tests/LayerGradUtil.h index d299b4dd09..5debedf5ef 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/gserver/tests/LayerGradUtil.h @@ -67,6 +67,7 @@ struct InputDef { bool isStatic; std::vector labelInitValue; std::vector labelSeqStartPositions; + std::vector labelSubSeqStartPositions; MatrixPtr selfDefinedData; InputDef(InputType type, string nameIn, size_t dimIn, size_t sizeIn) { @@ -81,8 +82,10 @@ struct InputDef { InputDef(InputType type, string nameIn, MatrixPtr selfDefinedData, - std::vector selfDefinedSeqStartPos = {}) + std::vector selfDefinedSeqStartPos = {}, + std::vector selfDefinedSubSeqStartPos = {}) : labelSeqStartPositions(selfDefinedSeqStartPos), + labelSubSeqStartPositions(selfDefinedSubSeqStartPos), selfDefinedData(selfDefinedData) { inputType = type; name = nameIn; diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 0975c3bc95..20d843157f 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -920,14 +920,15 @@ TEST(Layer, SequenceLastInstanceLayer) { } TEST(Layer, AverageLayer) { - testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq - testDegradeLayer(false, - "average", - "non-seq", - 5); // seq average to a shorten seq, stride window = 5 - testDegradeLayer( - true, "average", "non-seq", -1); // hasSubseq average to non-seq - testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq + testDegradeLayer(false, "average", "non-seq", -1); // seq average to + non - seq testDegradeLayer( + false, + "average", + "non-seq", + 5); // seq average to a shorten seq, stride window = 5 + testDegradeLayer(true, "average", "non-seq", -1); // hasSubseq average to + non - seq testDegradeLayer( + true, "average", "seq", -1); // hasSubseq average to seq } TEST(Layer, SequenceConcatLayer) { @@ -1879,6 +1880,68 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, SubNestedSequenceLayer) { + const int layerSize = 128; + + TestConfig config; + config.layerConfig.set_type("sub_nested_seq"); + config.layerConfig.set_top_k(2); + config.layerConfig.set_name("sub_nested_seq_layer"); + config.layerConfig.set_size(layerSize); + + // Generate the first input + srand((size_t)(time(NULL))); + const int batchSize = 128; + const int maxSeqLen = 100; + const int maxSubSeqNum = 50; + // sequenceStartPositioins info for the first input. + vector seqStartPos1(batchSize + 1, 0); + // subSequenceStartPositioins info for the first input. + vector subSeqStartPos; + subSeqStartPos.push_back(0); + + // sequenceStartPositioins info for the second input. + vector seqStartPos2(batchSize + 1, 0); + + size_t curPos = 0; + for (int i = 1; i < batchSize + 1; ++i) { + int seqNum = uniformRandom(maxSubSeqNum); + seqStartPos2[i] = seqStartPos2[i - 1] + seqNum; + for (int j = 0; j < seqNum; ++j) { + int seqLen = uniformRandom(maxSeqLen); + subSeqStartPos.push_back(curPos + seqLen); + curPos += seqLen; + } + seqStartPos1[i] = curPos; + } + + MatrixPtr dataInputPtr1 = Matrix::create(curPos, layerSize, false, false); + dataInputPtr1->randomizeUniform(); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, + "layer_0", + dataInputPtr1, + seqStartPos1, + subSeqStartPos}); + config.layerConfig.add_inputs(); + + // Generate the second input + MatrixPtr dataInputPtr2 = + Matrix::create(seqStartPos2[batchSize], 1, false, false); + dataInputPtr2->randomizeUniform(); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "layer_1", dataInputPtr2, seqStartPos2}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "sub_nested_seq", + /* batchSize */ 100, + /* trans */ false, + /* useGpu*/ useGpu, + /* useWeight */ false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index d266026a46..7d1780e1ff 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,33 +31,104 @@ except ImportError: import copy __all__ = [ - 'full_matrix_projection', 'AggregateLevel', 'ExpandLevel', - 'identity_projection', 'dotmul_projection', 'dotmul_operator', - 'repeat_layer', 'seq_reshape_layer', 'table_projection', 'mixed_layer', - 'data_layer', 'embedding_layer', 'fc_layer', 'grumemory', 'pooling_layer', - 'lstmemory', 'last_seq', 'first_seq', 'cos_sim', 'hsigmoid', - 'conv_projection', 'mse_cost', 'regression_cost', 'classification_cost', - 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', - 'img_cmrnorm_layer', 'addto_layer', 'concat_layer', 'seq_concat_layer', - 'lstm_step_layer', 'recurrent_group', 'memory', 'StaticInput', - 'expand_layer', 'scaling_layer', 'scaling_projection', 'power_layer', - 'interpolation_layer', 'bilinear_interp_layer', 'trans_layer', - 'rotate_layer', 'sum_to_one_norm_layer', 'get_output_layer', 'LayerType', - 'context_projection', 'beam_search', 'maxid_layer', 'GeneratedInput', - 'SubsequenceInput', 'gru_step_layer', 'gru_step_naive_layer', - 'recurrent_layer', 'BaseGeneratedInput', 'conv_operator', - 'conv_shift_layer', 'tensor_layer', 'selective_fc_layer', - 'sampling_id_layer', 'slope_intercept_layer', - 'trans_full_matrix_projection', 'linear_comb_layer', 'convex_comb_layer', - 'ctc_layer', 'warp_ctc_layer', 'crf_layer', 'crf_decoding_layer', - 'nce_layer', 'cross_entropy_with_selfnorm', 'cross_entropy', - 'multi_binary_label_cross_entropy', 'sum_cost', 'rank_cost', 'lambda_cost', - 'huber_cost', 'block_expand_layer', 'maxout_layer', 'out_prod_layer', - 'printer_layer', 'print_layer', 'priorbox_layer', - 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', - 'spp_layer', 'pad_layer', 'eos_layer', 'smooth_l1_cost', 'layer_support', - 'multiplex_layer', 'row_conv_layer', 'dropout_layer', 'prelu_layer', - 'gated_unit_layer', 'crop_layer', 'sub_nested_seq_layer' + 'full_matrix_projection', + 'AggregateLevel', + 'ExpandLevel', + 'identity_projection', + 'dotmul_projection', + 'dotmul_operator', + 'repeat_layer', + 'seq_reshape_layer', + 'table_projection', + 'mixed_layer', + 'data_layer', + 'embedding_layer', + 'fc_layer', + 'grumemory', + 'pooling_layer', + 'lstmemory', + 'last_seq', + 'first_seq', + 'cos_sim', + 'hsigmoid', + 'conv_projection', + 'mse_cost', + 'regression_cost', + 'classification_cost', + 'LayerOutput', + 'img_conv_layer', + 'img_pool_layer', + 'batch_norm_layer', + 'img_cmrnorm_layer', + 'addto_layer', + 'concat_layer', + 'seq_concat_layer', + 'lstm_step_layer', + 'recurrent_group', + 'memory', + 'StaticInput', + 'expand_layer', + 'scaling_layer', + 'scaling_projection', + 'power_layer', + 'interpolation_layer', + 'bilinear_interp_layer', + 'trans_layer', + 'rotate_layer', + 'sum_to_one_norm_layer', + 'get_output_layer', + 'LayerType', + 'context_projection', + 'beam_search', + 'maxid_layer', + 'GeneratedInput', + 'SubsequenceInput', + 'gru_step_layer', + 'gru_step_naive_layer', + 'recurrent_layer', + 'BaseGeneratedInput', + 'conv_operator', + 'conv_shift_layer', + 'tensor_layer', + 'selective_fc_layer', + 'sampling_id_layer', + 'slope_intercept_layer', + 'trans_full_matrix_projection', + 'linear_comb_layer', + 'convex_comb_layer', + 'ctc_layer', + 'warp_ctc_layer', + 'crf_layer', + 'crf_decoding_layer', + 'nce_layer', + 'cross_entropy_with_selfnorm', + 'cross_entropy', + 'multi_binary_label_cross_entropy', + 'sum_cost', + 'rank_cost', + 'lambda_cost', + 'huber_cost', + 'block_expand_layer', + 'maxout_layer', + 'out_prod_layer', + 'printer_layer', + 'print_layer', + 'priorbox_layer', + 'cross_channel_norm_layer', + 'multibox_loss_layer', + 'detection_output_layer', + 'spp_layer', + 'pad_layer', + 'eos_layer', + 'smooth_l1_cost', + 'layer_support', + 'multiplex_layer', + 'row_conv_layer', + 'dropout_layer', + 'prelu_layer', + 'gated_unit_layer', + 'crop_layer', + 'sub_nested_seq_layer', ] From 589c3d864b8d4aec71528d77185d328cb46a1f72 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Wed, 2 Aug 2017 17:15:09 -0700 Subject: [PATCH 06/44] Rewirte test based on python test frameworks --- .../tests/test_fill_zeros_like_op.py | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py index 69d89f537d..6e496f1be9 100644 --- a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py +++ b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py @@ -1,33 +1,15 @@ import unittest -import paddle.v2.framework.create_op_creation_methods as creation -import paddle.v2.framework.core as core +from op_test_util import OpTestMeta import numpy class TestFillZerosLikeOp(unittest.TestCase): - def test_fill(self): - scope = core.Scope() - a = scope.create_var("input") - a_tensor = a.get_tensor() - a_tensor.set_dims([546, 291]) - a_tensor.alloc_float() - a_tensor.set(numpy.random.random((546, 291)).astype("float32")) + __metaclass__ = OpTestMeta - op = creation.op_creations.fill_zeros_like(Src="input", Dst="output") - - for out in op.outputs(): - if scope.get_var(out) is None: - scope.create_var(out).get_tensor() - - b_tensor = scope.get_var("output").get_tensor() - op.infer_shape(scope) - self.assertEqual([546, 291], b_tensor.shape()) - ctx = core.DeviceContext.cpu_context() - op.run(scope, ctx) - b_tensor_array = numpy.array(b_tensor) - for r in range(0, 546): - for c in range(0, 291): - self.assertEqual(b_tensor_array[r][c], 0.0) + def setUp(self): + self.type = "fill_zeros_like" + self.Src = numpy.random.random((219, 232)).astype("float32") + self.Dst = numpy.zeros_like(self.Src) if __name__ == '__main__': From 2c554646dd7fa11c77047afcc410d222db117d52 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 3 Aug 2017 17:01:52 -0700 Subject: [PATCH 07/44] Fix bug caused by merge --- paddle/framework/CMakeLists.txt | 1 + paddle/framework/pybind.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 9c39430835..e69c2ada5f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -41,4 +41,5 @@ cc_library(paddle_pybind SHARED add_op mean_op cross_entropy_op + fill_zeros_like_op recurrent_op) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index b4f0f3ef7e..58b65af7c9 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -40,6 +40,7 @@ USE_OP(mean); USE_OP(sigmoid); USE_OP(softmax); USE_OP(rowwise_add); +USE_OP(fill_zeros_like); USE_OP_WITHOUT_KERNEL(recurrent_op); namespace paddle { namespace framework { From 5d7e8bfb5c022f1e5f343defd9c5bab7ac5eae0e Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Thu, 3 Aug 2017 20:38:09 -0700 Subject: [PATCH 08/44] fix bug --- paddle/operators/fill_zeros_like_op.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/operators/fill_zeros_like_op.cu b/paddle/operators/fill_zeros_like_op.cu index 55ad58f4f1..cd1a0e053b 100644 --- a/paddle/operators/fill_zeros_like_op.cu +++ b/paddle/operators/fill_zeros_like_op.cu @@ -1,3 +1,4 @@ +#define EIGEN_USE_GPU #include "paddle/framework/op_registry.h" #include "paddle/operators/fill_zeros_like_op.h" From 53f85df1abbafdd248c06c065beebfa2b5d27b29 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 4 Aug 2017 14:16:12 +0800 Subject: [PATCH 09/44] Start doing `python.framework.operator` --- ...ate_op_creation_methods.py => operator.py} | 68 ++++++++++++------- 1 file changed, 42 insertions(+), 26 deletions(-) rename python/paddle/v2/framework/{create_op_creation_methods.py => operator.py} (81%) diff --git a/python/paddle/v2/framework/create_op_creation_methods.py b/python/paddle/v2/framework/operator.py similarity index 81% rename from python/paddle/v2/framework/create_op_creation_methods.py rename to python/paddle/v2/framework/operator.py index b034efffb6..d4c34d7fa6 100644 --- a/python/paddle/v2/framework/create_op_creation_methods.py +++ b/python/paddle/v2/framework/operator.py @@ -216,38 +216,54 @@ def create_op_creation_method(op_proto): opdesc = method(*args, **kwargs) return core.Operator.create(opdesc.SerializeToString()) - __impl__.__doc__ = get_docstring_from_op_proto(op_proto) - __impl__.all_input_args = [var.name for var in op_proto.inputs] - __impl__.all_output_args = [var.name for var in op_proto.outputs] - __impl__.all_attr_args = [attr.name for attr in op_proto.attrs] - __impl__.all_not_temp_output_args = [ - var.name for var in op_proto.outputs if not var.temporary - ] + return { + 'method': __impl__, + 'name': op_proto.type, + 'all_inputs': [var.name for var in op_proto.inputs], + 'all_outputs': [var.name for var in op_proto.outputs], + 'all_attrs': [attr.name for attr in op_proto.attrs], + 'all_no_temp_outputs': + [var.name for var in op_proto.outputs if not var.temporary] + } + + +class OperatorFactory(object): + def __init__(self): + self.op_methods = dict() + for op_proto in get_all_op_protos(): + method = create_op_creation_method(op_proto) + self.op_methods[method.name] = method - return __impl__ + def __call__(self, *args, **kwargs): + if 'type' in kwargs: + if len(args) != 0: + raise ValueError("All Paddle argument should be key-word " + "argument except type") + t = kwargs.pop('type') + else: + if len(args) != 1: + raise ValueError("All Paddle argument should be key-word " + "argument except type") + t = args[0] + return self.get_op_creation_info(t)['method'](**kwargs) -class OpCreationsHolder(object): - """ - A object will holds all op creation methods. - - Use `op_creations.xxx_op` to access them. - """ - pass + def get_op_creation_info(self, t): + if t not in self.op_methods: + raise ValueError("operator %s is not registered", t) + return self.op_methods.get(t) + def get_op_input_names(self, type): + return self.get_op_creation_info(type)['all_inputs'] -op_creations = OpCreationsHolder() + def get_op_output_names(self, type): + return self.get_op_creation_info(type)['all_outputs'] + def get_op_attr_names(self, type): + return self.get_op_creation_info(type)['all_attrs'] -def __bootstrap__(): - """ - Bootstrap function for this module. It will dynamic create all op creation - methods in runtime. - """ - for op_proto in get_all_op_protos(): - func = create_op_creation_method(op_proto) - func.__name__ = str(op_proto.type) - setattr(op_creations, func.__name__, func) + def get_op_no_temp_output_names(self, type): + return self.get_op_creation_info(type)['all_no_temp_outputs'] -__bootstrap__() +Operator = OperatorFactory() # Default global factory From a239418bdf5df5295c445be16618713f989d6cdd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 4 Aug 2017 14:40:27 +0800 Subject: [PATCH 10/44] Fix unittest for operator.py Rename operator.py to op.py because it is conflict with protobuf --- .../v2/framework/{operator.py => op.py} | 67 +--------- ...p_creation_methods.py => test_operator.py} | 119 +++++------------- 2 files changed, 37 insertions(+), 149 deletions(-) rename python/paddle/v2/framework/{operator.py => op.py} (78%) rename python/paddle/v2/framework/tests/{test_op_creation_methods.py => test_operator.py} (64%) diff --git a/python/paddle/v2/framework/operator.py b/python/paddle/v2/framework/op.py similarity index 78% rename from python/paddle/v2/framework/operator.py rename to python/paddle/v2/framework/op.py index d4c34d7fa6..1fbaaf60c7 100644 --- a/python/paddle/v2/framework/operator.py +++ b/python/paddle/v2/framework/op.py @@ -1,8 +1,7 @@ import paddle.v2.framework.core as core -import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 -import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 -import cStringIO +import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 +import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 def get_all_op_protos(): @@ -146,66 +145,6 @@ class OpDescCreationMethod(object): return False -def get_docstring_from_op_proto(op_proto): - """ - Generate docstring from a OpProto - :param op_proto: a OpProto instance. - :type op_proto: op_proto_pb2.OpProto - :return: docstring - """ - if not isinstance(op_proto, op_proto_pb2.OpProto): - raise TypeError("Input must be OpProto") - f = cStringIO.StringIO() - f.write(op_proto.comment) - f.write("\n") - - def __append_param__(name, comment, type): - # Maybe replace the following line with template engine is better. - f.write(":param ") - f.write(name) - f.write(": ") - f.write(comment) - f.write("\n") - f.write(":type ") - f.write(name) - f.write(": ") - f.write(type) - f.write("\n") - - for ipt in op_proto.inputs: - __append_param__(ipt.name, ipt.comment, "list | basestr" - if ipt.multiple else "basestr") - - temp_var_prefix = \ - "This is a temporary variable. It does not have to set by user. " - for opt in op_proto.outputs: - __append_param__(opt.name, opt.comment if not opt.temporary else - temp_var_prefix + opt.comment, "list | basestr" - if opt.multiple else "basestr") - - for attr in op_proto.attrs: - attr_type = None - if attr.type == attr_type_pb2.INT: - attr_type = "int" - elif attr.type == attr_type_pb2.FLOAT: - attr_type = "float" - elif attr.type == attr_type_pb2.STRING: - attr_type = "basestr" - elif attr.type == attr_type_pb2.INTS: - attr_type = "list of int" - elif attr.type == attr_type_pb2.FLOATS: - attr_type = "list of float" - elif attr.type == attr_type_pb2.STRINGS: - attr_type = "list of basestr" - - if attr_type is None: - raise RuntimeError("Not supported attribute type " + attr.type) - - __append_param__(attr.name, attr.comment, attr_type) - - return f.getvalue() - - def create_op_creation_method(op_proto): """ Generate op creation method for an OpProto @@ -232,7 +171,7 @@ class OperatorFactory(object): self.op_methods = dict() for op_proto in get_all_op_protos(): method = create_op_creation_method(op_proto) - self.op_methods[method.name] = method + self.op_methods[method['name']] = method def __call__(self, *args, **kwargs): if 'type' in kwargs: diff --git a/python/paddle/v2/framework/tests/test_op_creation_methods.py b/python/paddle/v2/framework/tests/test_operator.py similarity index 64% rename from python/paddle/v2/framework/tests/test_op_creation_methods.py rename to python/paddle/v2/framework/tests/test_operator.py index 41db7c0d53..947138d349 100644 --- a/python/paddle/v2/framework/tests/test_op_creation_methods.py +++ b/python/paddle/v2/framework/tests/test_operator.py @@ -1,5 +1,5 @@ import unittest -import paddle.v2.framework.create_op_creation_methods as creation +import paddle.v2.framework.op as op import paddle.v2.framework.core as core import paddle.v2.framework.proto.op_proto_pb2 as op_proto_pb2 import paddle.v2.framework.proto.op_desc_pb2 as op_desc_pb2 @@ -8,7 +8,7 @@ import paddle.v2.framework.proto.attr_type_pb2 as attr_type_pb2 class TestGetAllProtos(unittest.TestCase): def test_all(self): - all_protos = creation.get_all_op_protos() + all_protos = op.get_all_op_protos() self.assertNotEqual(0, len(all_protos)) for each in all_protos: @@ -17,25 +17,25 @@ class TestGetAllProtos(unittest.TestCase): class TestOpDescCreationMethod(unittest.TestCase): def test_plain_input_output(self): - op = op_proto_pb2.OpProto() - op.type = "test" - ipt = op.inputs.add() + op_proto = op_proto_pb2.OpProto() + op_proto.type = "test" + ipt = op_proto.inputs.add() ipt.name = "X" ipt.comment = "not matter" - ipt = op.inputs.add() + ipt = op_proto.inputs.add() ipt.name = "Y" ipt.comment = "not matter" - opt = op.outputs.add() + opt = op_proto.outputs.add() opt.name = "Z" opt.comment = "not matter" - op.comment = "not matter" + op_proto.comment = "not matter" - self.assertTrue(op.IsInitialized()) + self.assertTrue(op_proto.IsInitialized()) - method = creation.OpDescCreationMethod(op) + method = op.OpDescCreationMethod(op_proto) output = method(X="a", Y="b", Z="c") expected = op_desc_pb2.OpDesc() @@ -45,29 +45,29 @@ class TestOpDescCreationMethod(unittest.TestCase): self.assertEqual(expected, output) def test_multiple_input_plain_output(self): - op = op_proto_pb2.OpProto() - op.type = "fc" - ipt = op.inputs.add() + op_proto = op_proto_pb2.OpProto() + op_proto.type = "fc" + ipt = op_proto.inputs.add() ipt.name = "X" ipt.comment = "" ipt.multiple = True - ipt = op.inputs.add() + ipt = op_proto.inputs.add() ipt.name = "W" ipt.comment = "" ipt.multiple = True - ipt = op.inputs.add() + ipt = op_proto.inputs.add() ipt.name = "b" ipt.comment = "" - out = op.outputs.add() + out = op_proto.outputs.add() out.name = "Y" out.comment = "" - op.comment = "" - self.assertTrue(op.IsInitialized()) - method = creation.OpDescCreationMethod(op) + op_proto.comment = "" + self.assertTrue(op_proto.IsInitialized()) + method = op.OpDescCreationMethod(op_proto) generated1 = method(X="x", W="w", b="b", Y="y") expected1 = op_desc_pb2.OpDesc() @@ -93,14 +93,14 @@ class TestOpDescCreationMethod(unittest.TestCase): self.assertEqual(expected2, generated2) def test_attrs(self): - op = op_proto_pb2.OpProto() - op.type = "test" - ipt = op.inputs.add() + op_proto = op_proto_pb2.OpProto() + op_proto.type = "test" + ipt = op_proto.inputs.add() ipt.name = 'X' ipt.comment = "" def __add_attr__(name, type): - attr = op.attrs.add() + attr = op_proto.attrs.add() attr.name = name attr.comment = "" attr.type = type @@ -112,10 +112,10 @@ class TestOpDescCreationMethod(unittest.TestCase): __add_attr__("floats_attr", attr_type_pb2.FLOATS) __add_attr__("strings_attr", attr_type_pb2.STRINGS) - op.comment = "" - self.assertTrue(op.IsInitialized()) + op_proto.comment = "" + self.assertTrue(op_proto.IsInitialized()) - method = creation.OpDescCreationMethod(op) + method = op.OpDescCreationMethod(op_proto) generated = method( X="a", @@ -162,23 +162,23 @@ class TestOpDescCreationMethod(unittest.TestCase): self.assertEqual(expected, generated) def test_input_temporary_output(self): - op = op_proto_pb2.OpProto() - op.type = "test" - out = op.outputs.add() + op_proto = op_proto_pb2.OpProto() + op_proto.type = "test" + out = op_proto.outputs.add() out.name = "OUT" out.comment = "" - out = op.outputs.add() + out = op_proto.outputs.add() out.name = "TMP" out.comment = "" out.temporary = True - out = op.outputs.add() + out = op_proto.outputs.add() out.name = "OUT2" out.comment = "" - op.comment = "" + op_proto.comment = "" - method = creation.OpDescCreationMethod(op) + method = op.OpDescCreationMethod(op_proto) generated = method(OUT="a", OUT2="b") desc = op_desc_pb2.OpDesc() desc.outputs.extend(["a", core.var_names.temp(), "b"]) @@ -190,60 +190,9 @@ class TestOpDescCreationMethod(unittest.TestCase): self.assertEqual(generated, desc) -class TestOpCreationDocStr(unittest.TestCase): - def test_all(self): - op = op_proto_pb2.OpProto() - op.type = "test" - op.comment = """Test Op. - -This op is used for unit test, not a real op. -""" - a = op.inputs.add() - a.name = "a" - a.comment = "Input a for test op" - a.multiple = True - - b = op.inputs.add() - b.name = "b" - b.comment = "Input b for test op" - self.assertTrue(op.IsInitialized()) - - o1 = op.outputs.add() - o1.name = "output" - o1.comment = "The output of test op" - - o2 = op.outputs.add() - o2.name = "temp output" - o2.comment = "The temporary output of test op" - o2.temporary = True - - test_str = op.attrs.add() - test_str.name = "str_attr" - test_str.type = attr_type_pb2.STRING - test_str.comment = "A string attribute for test op" - - actual = creation.get_docstring_from_op_proto(op) - expected_docstring = '''Test Op. - -This op is used for unit test, not a real op. - -:param a: Input a for test op -:type a: list | basestr -:param b: Input b for test op -:type b: basestr -:param output: The output of test op -:type output: basestr -:param temp output: This is a temporary variable. It does not have to set by user. The temporary output of test op -:type temp output: basestr -:param str_attr: A string attribute for test op -:type str_attr: basestr -''' - self.assertEqual(expected_docstring, actual) - - class TestOpCreations(unittest.TestCase): def test_all(self): - add_op = creation.op_creations.add_two(X="a", Y="b", Out="z") + add_op = op.Operator("add_two", X="a", Y="b", Out="z") self.assertIsNotNone(add_op) # Invoke C++ DebugString() self.assertEqual('Op(add_two), inputs:(a, b), outputs:(z).', From 89d33ff83f5ccb791d19666d588af0ea4464835d Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Fri, 4 Aug 2017 15:22:00 +0800 Subject: [PATCH 11/44] Complete chagne op creation method. Currently use `Operator("fc", X="x", W='w1', B='b1')` as operator creation method. Fix #3198 --- python/paddle/v2/framework/network.py | 131 ------------------ python/paddle/v2/framework/op.py | 3 + .../paddle/v2/framework/tests/CMakeLists.txt | 6 +- .../v2/framework/tests/gradient_checker.py | 4 +- .../paddle/v2/framework/tests/op_test_util.py | 18 +-- .../v2/framework/tests/test_add_two_op.py | 4 +- .../paddle/v2/framework/tests/test_fc_op.py | 4 +- python/paddle/v2/framework/tests/test_net.py | 6 +- .../paddle/v2/framework/tests/test_network.py | 32 ----- .../v2/framework/tests/test_softmax_op.py | 4 +- 10 files changed, 24 insertions(+), 188 deletions(-) delete mode 100644 python/paddle/v2/framework/network.py delete mode 100644 python/paddle/v2/framework/tests/test_network.py diff --git a/python/paddle/v2/framework/network.py b/python/paddle/v2/framework/network.py deleted file mode 100644 index cfeb0e3dec..0000000000 --- a/python/paddle/v2/framework/network.py +++ /dev/null @@ -1,131 +0,0 @@ -import paddle.v2.framework.core as core -from paddle.v2.framework.create_op_creation_methods import op_creations -from default_scope_funcs import new_var, find_var, get_cur_scope - -__all__ = ['Network'] # Only expose Network - - -class NetworkFunctor(object): - """ - Network Op Creation Function. Used internally in this module. - It convert string input to Variable. If it is not created before, just - create in scope. - - It is a functor object. means the instances are callable. - - :param func: The op creation function which generated in Python. - :param net: The Network instance. - """ - - def __init__(self, func, net): - self.func = func - self.net = net - - def __call__(self, *args, **kwargs): - if len(args) != 0: - raise ValueError("Paddle must use keyword argument") - inputs = self.func.all_input_args - for ipt in inputs: - if ipt in kwargs: - var = kwargs[ipt] - if isinstance(var, basestring): - tmp = new_var(var) - self.net.var_names[tmp] = var - var = tmp - - if not isinstance(var, core.Variable): - raise TypeError( - "Input of op creation must be string or variable") - - kwargs[ipt] = self.net.var_names[var] - - notemp_outputs = self.func.all_not_temp_output_args - - for name in notemp_outputs: - if name not in kwargs: - kwargs[ - name] = self.func.__name__ + "@OUT@%d" % core.unique_integer( - ) - - outputs = self.func.all_output_args - for opt in outputs: - if opt in kwargs: - var = kwargs[opt] - if isinstance(var, basestring): - tmp = new_var(var) - self.net.var_names[tmp] = var - var = tmp - - if not isinstance(var, core.Variable): - raise TypeError( - "Output of op creation must be string or variable") - kwargs[opt] = self.net.var_names[var] - - op = self.func(**kwargs) - - self.net.net.add_op(op) - - lst = [find_var(kwargs[opt]) for opt in notemp_outputs] - if len(lst) == 1: - return lst[0] - elif len(lst) == 0: - return None - else: - return lst - - -class Network(object): - """ - The network concept. It avoid user to manually create operator, create - variable, and combine them into a Net. Just use Network.xxx can create the - operator, create variables in default scope, and add them into `self.net`. - - For example: - - .. code-block: python - - net = Network() - out = net.add_two(X="a", Y="b") - fc_out = net.fc(X="out", W="fc.w") - - net.run(...) - """ - - def __init__(self): - self.net = core.Net.create() - funcs = (func_name for func_name in dir(op_creations) - if not func_name.startswith("__")) - self.var_names = dict() - - # TODO(yuyang18): This code can work, but do not generate a good - # docstring, try to give a better way generate function in runtime - # later. - for func_name in funcs: - func = getattr(op_creations, func_name) - impl = NetworkFunctor(func, self) - setattr(self, func_name, impl.__call__) - self.__complete_add_op__ = False - - def infer_shape(self): - self.complete_add_op() - self.net.infer_shape(get_cur_scope()) - - def run(self, device_context): - self.complete_add_op() - self.net.run(get_cur_scope(), device_context) - - def __str__(self): - return str(self.net) - - def complete_add_op(self): - if not self.__complete_add_op__: - self.net.complete_add_op() - self.__complete_add_op__ = True - - -if __name__ == '__main__': - net = Network() - out = net.add_two(X="a", Y="b") - fc_out = net.fc(X=out, W="fc.w", b="fc.b", activation="softmax") - net.complete_add_op() - print net diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 1fbaaf60c7..bfeff643e7 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -187,6 +187,9 @@ class OperatorFactory(object): return self.get_op_creation_info(t)['method'](**kwargs) + def types(self): + return self.op_methods.keys() + def get_op_creation_info(self, t): if t not in self.op_methods: raise ValueError("operator %s is not registered", t) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index e66197030e..41c2c83c32 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -1,8 +1,8 @@ add_python_test(test_framework test_protobuf.py test_scope.py + test_operator.py test_default_scope_funcs.py - test_op_creation_methods.py test_net.py test_tensor.py test_fc_op.py @@ -13,5 +13,5 @@ add_python_test(test_framework test_sigmoid_op.py test_softmax_op.py test_rowwise_add_op.py - test_network.py - gradient_checker.py) + gradient_checker.py + ) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index 4022de1c40..cfd29932f5 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -1,5 +1,5 @@ import paddle.v2.framework.core as core -from paddle.v2.framework.create_op_creation_methods import op_creations +from paddle.v2.framework.op import Operator import numpy import unittest @@ -80,7 +80,7 @@ if __name__ == '__main__': class GetNumericGradientTest(unittest.TestCase): def test_add_op(self): - add_op = op_creations.add_two(X="X", Y="Y", Out="Z") + add_op = Operator('add_two', X="X", Y="Y", Out="Z") x = numpy.random.random((10, 1)).astype("float32") y = numpy.random.random((10, 1)).astype("float32") diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index 98fae1b975..3a8d253fe0 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -1,7 +1,7 @@ import paddle.v2.framework.core as core import unittest import numpy -import paddle.v2.framework.create_op_creation_methods as creation +from paddle.v2.framework.op import Operator class OpTestMeta(type): @@ -21,18 +21,14 @@ class OpTestMeta(type): obj = super(OpTestMeta, cls).__new__(cls, name, bases, attrs) def test_all(self): - func = getattr(creation.op_creations, self.type, None) - self.assertIsNotNone(func) - scope = core.Scope() kwargs = dict() - places = [] - places.append(core.CPUPlace()) + places = [core.CPUPlace()] if core.is_compile_gpu(): places.append(core.GPUPlace(0)) for place in places: - for in_name in func.all_input_args: + for in_name in Operator.get_op_input_names(self.type): if hasattr(self, in_name): kwargs[in_name] = in_name var = scope.new_var(in_name).get_tensor() @@ -42,23 +38,23 @@ class OpTestMeta(type): else: kwargs[in_name] = "@EMPTY@" - for out_name in func.all_output_args: + for out_name in Operator.get_op_output_names(self.type): if hasattr(self, out_name): kwargs[out_name] = out_name scope.new_var(out_name).get_tensor() - for attr_name in func.all_attr_args: + for attr_name in Operator.get_op_attr_names(self.type): if hasattr(self, attr_name): kwargs[attr_name] = getattr(self, attr_name) - op = func(**kwargs) + op = Operator(self.type, **kwargs) op.infer_shape(scope) ctx = core.DeviceContext.create(place) op.run(scope, ctx) - for out_name in func.all_output_args: + for out_name in Operator.get_op_output_names(self.type): actual = numpy.array(scope.find_var(out_name).get_tensor()) expect = getattr(self, out_name) # TODO(qijun) The default decimal is 7, but numpy.dot and eigen.mul diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_two_op.py index 6e6643201b..de89d1613f 100644 --- a/python/paddle/v2/framework/tests/test_add_two_op.py +++ b/python/paddle/v2/framework/tests/test_add_two_op.py @@ -2,7 +2,7 @@ import unittest import numpy import paddle.v2.framework.core as core -import paddle.v2.framework.create_op_creation_methods as creation +from paddle.v2.framework.op import Operator from op_test_util import OpTestMeta @@ -19,7 +19,7 @@ class TestAddOp(unittest.TestCase): class TestAddGradOp(unittest.TestCase): def test_add_grad(self): - op = creation.op_creations.add_two(X="X", Y="Y", Out="Out") + op = Operator('add_two', X="X", Y="Y", Out="Out") backward_op = core.Operator.backward(op, set()) self.assertEqual(backward_op.type(), "add_two_grad") expected = '''Op(add_two_grad), inputs:(X, Y, Out, Out@GRAD), outputs:(X@GRAD, Y@GRAD).''' diff --git a/python/paddle/v2/framework/tests/test_fc_op.py b/python/paddle/v2/framework/tests/test_fc_op.py index 00dc4399aa..e24435839d 100644 --- a/python/paddle/v2/framework/tests/test_fc_op.py +++ b/python/paddle/v2/framework/tests/test_fc_op.py @@ -1,7 +1,7 @@ import paddle.v2.framework.core as core import unittest import numpy -import paddle.v2.framework.create_op_creation_methods as creation +from paddle.v2.framework.op import Operator class TestFc(unittest.TestCase): @@ -24,7 +24,7 @@ class TestFc(unittest.TestCase): # Set a real numpy array here. # x_tensor.set(numpy.array([])) - op = creation.op_creations.fc(X="X", Y="Y", W="W") + op = Operator("fc", X="X", Y="Y", W="W") for out in op.outputs(): if scope.find_var(out) is None: diff --git a/python/paddle/v2/framework/tests/test_net.py b/python/paddle/v2/framework/tests/test_net.py index db776d6b64..b30896553d 100644 --- a/python/paddle/v2/framework/tests/test_net.py +++ b/python/paddle/v2/framework/tests/test_net.py @@ -1,16 +1,16 @@ import paddle.v2.framework.core as core -from paddle.v2.framework.create_op_creation_methods import op_creations +from paddle.v2.framework.op import Operator import unittest class TestNet(unittest.TestCase): def test_net_all(self): net = core.Net.create() - op1 = op_creations.add_two(X="X", Y="Y", Out="Out") + op1 = Operator("add_two", X="X", Y="Y", Out="Out") net.add_op(op1) net2 = core.Net.create() - net2.add_op(op_creations.fc(X="X", W="w", Y="fc.out")) + net2.add_op(Operator("fc", X="X", W="w", Y="fc.out")) net2.complete_add_op(True) net.add_op(net2) net.complete_add_op(True) diff --git a/python/paddle/v2/framework/tests/test_network.py b/python/paddle/v2/framework/tests/test_network.py deleted file mode 100644 index 6d53e233e9..0000000000 --- a/python/paddle/v2/framework/tests/test_network.py +++ /dev/null @@ -1,32 +0,0 @@ -from paddle.v2.framework.network import Network -import paddle.v2.framework.core as core -import unittest - - -class TestNet(unittest.TestCase): - def test_net_all(self): - net = Network() - out = net.add_two(X="X", Y="Y") - fc_out = net.fc(X=out, W="w") - net.complete_add_op() - self.assertTrue(isinstance(fc_out, core.Variable)) - self.assertEqual( - '''Op(plain_net), inputs:(@EMPTY@, X, Y, w), outputs:(@TEMP@fc@0, add_two@OUT@0, fc@OUT@1). - Op(add_two), inputs:(X, Y), outputs:(add_two@OUT@0). - Op(fc), inputs:(add_two@OUT@0, w, @EMPTY@), outputs:(fc@OUT@1, @TEMP@fc@0). - Op(mul), inputs:(add_two@OUT@0, w), outputs:(@TEMP@fc@0). - Op(sigmoid), inputs:(@TEMP@fc@0), outputs:(fc@OUT@1). -''', str(net)) - - net2 = Network() - tmp = net2.add_two(X="X", Y="Y") - self.assertTrue(isinstance(tmp, core.Variable)) - net2.complete_add_op() - self.assertEqual( - '''Op(plain_net), inputs:(X, Y), outputs:(add_two@OUT@2). - Op(add_two), inputs:(X, Y), outputs:(add_two@OUT@2). -''', str(net2)) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/v2/framework/tests/test_softmax_op.py b/python/paddle/v2/framework/tests/test_softmax_op.py index c808881287..47abccd4b5 100644 --- a/python/paddle/v2/framework/tests/test_softmax_op.py +++ b/python/paddle/v2/framework/tests/test_softmax_op.py @@ -2,7 +2,7 @@ import unittest import numpy as np import paddle.v2.framework.core as core -import paddle.v2.framework.create_op_creation_methods as creation +from paddle.v2.framework.op import Operator from op_test_util import OpTestMeta @@ -25,7 +25,7 @@ class TestSoftmaxOp(unittest.TestCase): class TestSoftmaxGradOp(unittest.TestCase): def test_softmax_grad(self): - op = creation.op_creations.softmax(X="X", Y="Y") + op = Operator('softmax', X="X", Y="Y") backward_op = core.Operator.backward(op, set()) self.assertEqual(backward_op.type(), "softmax_grad") expected = '''Op(softmax_grad), inputs:(X, Y, Y@GRAD), outputs:(X@GRAD).''' From 9d68c252e9665f9d37b0a08f8a4ade9e83ad9525 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Fri, 4 Aug 2017 10:33:04 -0700 Subject: [PATCH 12/44] test --- paddle/operators/fill_zeros_like_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 87f5f09ce9..ace8525132 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -class FillZerosLikeOp : public framework::OperatorWithKernel { +class FillZerosLikeOp : public OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE(ctx.InputSize() == 1UL, @@ -33,7 +33,7 @@ protected: } }; -class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { +class FillZerosLikeOpMaker : public OpProtoAndCheckerMaker { public: FillZerosLikeOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) From 83ce2dce5f13e6391d465d946d544bb9b6aeea0d Mon Sep 17 00:00:00 2001 From: caoying03 Date: Sat, 5 Aug 2017 08:54:54 +0800 Subject: [PATCH 13/44] split sorting into another layer. fix config helper. --- proto/ModelConfig.proto | 2 - python/paddle/trainer/config_parser.py | 28 ++++++----- .../paddle/trainer_config_helpers/layers.py | 34 ++++++++------ .../protostr/test_seq_select_layers.protostr | 46 ++++--------------- .../tests/configs/test_seq_select_layers.py | 8 ++-- 5 files changed, 51 insertions(+), 67 deletions(-) diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index d45e34b83c..b50b73c7e1 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -497,8 +497,6 @@ message LayerConfig { repeated uint32 offset = 55; repeated uint32 shape = 56; - // for sub_nest_seq layer to select top k sequence with highest scores - optional uint32 top_k = 57 [default = 1]; } message EvaluatorConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 43a6914a50..c8fc49e20d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2659,22 +2659,28 @@ class SubSequenceLayer(LayerBase): @config_layer('sub_nested_seq') class SubNestedSequenceLayer(LayerBase): - def __init__(self, name, inputs, top_k=1, bias=False, **xargs): + def __init__(self, name, inputs, selected_indices, bias=False, **xargs): + if isinstance(inputs, list): + assert len(inputs) == 1, ('the first input of sub_nested_seq ' + 'layer is a single nested sequence.') + inputs = inputs[0] + if isinstance(selected_indices, list): + assert len(selected_indices) == 1, ( + 'the second input of ' + 'sub_nested_seq layer is a single layer which is a ' + 'set of selected indices.') + selected_indices = selected_indices[0] + super(SubNestedSequenceLayer, self).__init__( - name, 'sub_nested_seq', 0, inputs=inputs, **xargs) - config_assert( - len(inputs) == 2, - ('SubNestSequenceLayer must have 2 inputs: ' - 'input1 is a nested sequence; input2 is a learnable distribution ' - 'or scores over each sentence in the nested sequence. ')) + name, + 'sub_nested_seq', + 0, + inputs=[inputs, selected_indices], + **xargs) input_layer0 = self.get_input_layer(0) size = input_layer0.size self.set_layer_size(size) - self.config.top_k = top_k - input_layer1 = self.get_input_layer(1) - assert (input_layer1.size == 1) - @config_layer('out_prod') class OuterProdLayer(LayerBase): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 179a009c3d..ebbe95a0c7 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6092,37 +6092,41 @@ def crop_layer(input, offset, axis=2, shape=None, name=None, layer_attr=None): @wrap_name_default() @layer_support() -def sub_nested_seq_layer(input, name=None, top_k=1): +def sub_nested_seq_layer(input, selected_indices, name=None): """ The sub_nested_seq_layer accepts two inputs: the first one is a nested - sequence in PaddlePaddle; the second one is a learnable score or - distribution over each sequence in the nested sequence. + sequence; the second one is a set of selceted indices in the nested sequence. - Then sub_nest_seq_layer selects top k sentences with highest scores or - probabilites according to the second input. + + Then sub_nest_seq_layer selects trims the first input according to the + selected indices to give a new output. This layer is used in beam training. The example usage is: .. code-block:: python - prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) - sub_nest_seq = sub_nested_seq_layer(input=[data, prob], top_k=3) + sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) + - :param input: The two input layers. The first input must be a nested - sequence. The second input is a learnable scores, whose size must be 1. + :param input: A nested sequence. + :type input: LayerOutput + :param selected_indices: a set of sequence indices in the nested sequence. :type input: LayerOutput :param name: name of this layer. :type name: basestring - :param top_k: number of sequences with highest probabilies to select. - :type top_k: int :return: LayerOutput object. :rtype: LayerOutput """ - assert isinstance(input, collections.Sequence) and len(input) == 2, ( - 'sub_nest_seq_layer has exactly two inputs.') + assert isinstance(input, LayerOutput), ( + 'The first input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + assert isinstance(selected_indices, LayerOutput), ( + 'The second input of ' + 'sub_nested_seq_layer must be a Paddle layer.') + l = Layer( - inputs=[x.name for x in input], + inputs=input.name, + selected_indices=selected_indices.name, name=name, - top_k=top_k, type=LayerType.SUB_NESTED_SEQ) return LayerOutput( name=name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr index 8f41be1042..4b906b113e 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_select_layers.protostr @@ -1,20 +1,15 @@ type: "nn" layers { - name: "input" + name: "input_seq" type: "data" size: 300 active_type: "" } layers { - name: "__fc_layer_0__" - type: "fc" - size: 1 - active_type: "sequence_softmax" - inputs { - input_layer_name: "input" - input_parameter_name: "___fc_layer_0__.w0" - } - bias_parameter_name: "___fc_layer_0__.wbias" + name: "input" + type: "data" + size: 5 + active_type: "" } layers { name: "__sub_nested_seq_layer_0__" @@ -22,41 +17,20 @@ layers { size: 300 active_type: "" inputs { - input_layer_name: "input" + input_layer_name: "input_seq" } inputs { - input_layer_name: "__fc_layer_0__" + input_layer_name: "input" } - top_k: 1 -} -parameters { - name: "___fc_layer_0__.w0" - size: 300 - initial_mean: 0.0 - initial_std: 0.057735026919 - dims: 300 - dims: 1 - initial_strategy: 0 - initial_smart: true -} -parameters { - name: "___fc_layer_0__.wbias" - size: 1 - initial_mean: 0.0 - initial_std: 0.0 - dims: 1 - dims: 1 - initial_strategy: 0 - initial_smart: false } -input_layer_names: "input" +input_layer_names: "input_seq" output_layer_names: "__sub_nested_seq_layer_0__" sub_models { name: "root" + layer_names: "input_seq" layer_names: "input" - layer_names: "__fc_layer_0__" layer_names: "__sub_nested_seq_layer_0__" - input_layer_names: "input" + input_layer_names: "input_seq" output_layer_names: "__sub_nested_seq_layer_0__" is_recurrent_layer_group: false } diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py index f2553f6b6a..6d1c3175ba 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_seq_select_layers.py @@ -2,8 +2,10 @@ #coding=utf-8 from paddle.trainer_config_helpers import * -data = data_layer(name='input', size=300) -prob = fc_layer(input=data, size=1, act=SequenceSoftmaxActivation()) -sub_nest_seq = sub_nested_seq_layer(input=[data, prob], top_k=1) +beam_size = 5 + +data = data_layer(name='input_seq', size=300) +selected_ids = data_layer(name='input', size=beam_size) +sub_nest_seq = sub_nested_seq_layer(input=data, selected_indices=selected_ids) outputs(sub_nest_seq) From 70076d087c98012314064c0b9e48853f1d94071d Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Sat, 5 Aug 2017 18:00:07 +0800 Subject: [PATCH 14/44] update docker docs --- .../build_and_install/docker_install_cn.rst | 49 ++++++++++++++-- .../build_and_install/docker_install_en.rst | 58 +++++++++---------- 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 87c286a1af..02b96bb413 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -3,6 +3,43 @@ PaddlePaddle的Docker容器使用方式 PaddlePaddle目前唯一官方支持的运行的方式是Docker容器。因为Docker能在所有主要操作系统(包括Linux,Mac OS X和Windows)上运行。 请注意,您需要更改 `Dockers设置 `_ 才能充分利用Mac OS X和Windows上的硬件资源。 +Docker使用入门 +------------------------------ + +几个基础的概念帮助理解和使用Docker: + +- *镜像*:一个Docker镜像是一个打包好的软件。它包含了这个软件本身和它所依赖的运行环境。PaddlePaddle的Docker镜像就包含了PaddlePaddle的Python库以及其依赖的多个Python库。这样我们可以直接在Docker中运行需要的程序而不需要安装后在执行。可以执行: + + .. code-block:: bash + + docker images + + 来列出当前系统中的所有镜像,同样可以执行: + + .. code-block:: bash + + docker pull paddlepaddle/paddle:0.10.0 + + 来下载Docker镜像,paddlepaddle/paddle是从官方镜像源Dockerhub.com下载的,推荐国内用户使用ocker.paddlepaddle.org/paddle下载。 + +- *容器*: 如果说一个Docker镜像就是一个程序,那容器就是这个程序运行时产生的“进程”。 + 实际上,一个容器就是一个操作系统的进程,但是是运行在独立的进程空间,文件系统以及网络之上。 + 可以执行: + + .. code-block:: bash + + docker run paddlepaddle/paddle:0.10.0 + + 来使用一个镜像启动一个容器。 + +- 默认情况下,Docker容器会运行在独立的文件系统空间之上,我们无法在Docker容器中 + 访问到主机上的文件。可以通过*挂载Volume*的方式,将主机上的文件或目录挂载到 + Docker容器中。下面的命令把当前目录挂载到了容器中的 /data 目录下,容器使用 + debian镜像,并且启动后执行 :code:`ls /data`。 + + .. code-block:: bash + + docker run --rm -v $(pwd):/data debian ls /data PaddlePaddle发布的Docker镜像使用说明 ------------------------------ @@ -12,11 +49,11 @@ PaddlePaddle需要的所有编译工具。把编译出来的PaddlePaddle也打 像,称为生产镜像,里面涵盖了PaddlePaddle运行所需的所有环境。每次 PaddlePaddle发布新版本的时候都会发布对应版本的生产镜像以及开发镜像。运 行镜像包括纯CPU版本和GPU版本以及其对应的非AVX版本。我们会在 -`dockerhub.com `_ 提供最新 -的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。为了方便在国 -内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您 -在国内,请把文档里命令中的paddlepaddle/paddle替换成 -docker.paddlepaddle.org/paddle。 +`dockerhub.com `_ +和国内镜像`docker.paddlepaddle.org` 提供最新 +的Docker镜像,可以在"tags"标签下找到最新的Paddle镜像版本。 + +**注意:为了方便在国内的开发者下载Docker镜像,我们提供了国内的镜像服务器供大家使用。如果您在国内,请把文档里命令中的paddlepaddle/paddle替换成docker.paddlepaddle.org/paddle。** 1. 开发镜像::code:`paddlepaddle/paddle:0.10.0-dev` @@ -68,6 +105,8 @@ docker.paddlepaddle.org/paddle。 如果输出是No,就需要选择使用no-AVX的镜像 + **注:在0.10.0之后的版本,PaddlePaddle都可以自动判断硬件是否支持AVX,所以无需判断AVX即可使用** + 以上方法在GPU镜像里也能用,只是请不要忘记提前在物理机上安装GPU最新驱动。 为了保证GPU驱动能够在镜像里面正常运行,我们推荐使用[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)来运行镜像。 diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index b6fd3329b2..94860240f6 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -63,12 +63,35 @@ CPU-only version and a CUDA GPU version and their no-AVX versions. We put the docker images on `dockerhub.com `_. You can find the -latest versions under "tags" tab at dockerhub.com. If you are in -China, you can use our Docker image registry mirror to speed up the -download process. To use it, please replace all paddlepaddle/paddle in -the commands to docker.paddlepaddle.org/paddle. +latest versions under "tags" tab at dockerhub.com. -1. Production images, this image might have multiple variants: +** NOTE: If you are in China, you can use our Docker image registry mirror to speed up the download process. To use it, please replace all paddlepaddle/paddle in the commands to docker.paddlepaddle.org/paddle.** + + +1. development image :code:`paddlepaddle/paddle:-dev` + + This image has packed related develop tools and runtime + environment. Users and developers can use this image instead of + their own local computer to accomplish development, build, + releasing, document writing etc. While different version of paddle + may depends on different version of libraries and tools, if you + want to setup a local environment, you must pay attention to the + versions. The development image contains: + + - gcc/clang + - nvcc + - Python + - sphinx + - woboq + - sshd + + Many developers use servers with GPUs, they can use ssh to login to + the server and run :code:`docker exec` to enter the docker + container and start their work. Also they can start a development + docker image with SSHD service, so they can login to the container + and start work. + +2. Production images, this image might have multiple variants: - GPU/AVX::code:`paddlepaddle/paddle:-gpu` - GPU/no-AVX::code:`paddlepaddle/paddle:-gpu-noavx` @@ -84,7 +107,7 @@ the commands to docker.paddlepaddle.org/paddle. if cat /proc/cpuinfo | grep -i avx; then echo Yes; else echo No; fi - + **NOTE:versions after 0.10.0 will automatically detect system AVX support, so manual detect is not needed in this case.** To run the CPU-only image as an interactive container: .. code-block:: bash @@ -103,29 +126,6 @@ the commands to docker.paddlepaddle.org/paddle. nvidia-docker run -it --rm paddlepaddle/paddle:0.10.0-gpu /bin/bash -2. development image :code:`paddlepaddle/paddle:-dev` - - This image has packed related develop tools and runtime - environment. Users and developers can use this image instead of - their own local computer to accomplish development, build, - releasing, document writing etc. While different version of paddle - may depends on different version of libraries and tools, if you - want to setup a local environment, you must pay attention to the - versions. The development image contains: - - - gcc/clang - - nvcc - - Python - - sphinx - - woboq - - sshd - - Many developers use servers with GPUs, they can use ssh to login to - the server and run :code:`docker exec` to enter the docker - container and start their work. Also they can start a development - docker image with SSHD service, so they can login to the container - and start work. - Train Model Using Python API ---------------------------- From 29fa73bc40ec6d79216fd351b53626fe0aa10227 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Sun, 6 Aug 2017 16:06:06 +0800 Subject: [PATCH 15/44] fix unittest. --- .../gserver/layers/SubNestedSequenceLayer.cpp | 95 +- paddle/gserver/tests/test_LayerGrad.cpp | 3890 +++++++++-------- 2 files changed, 1975 insertions(+), 2010 deletions(-) diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 6887df353e..443396a14d 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -31,13 +31,9 @@ public: void backward(const UpdateCallback& callback = nullptr) override; private: - void checkInputs(const Argument& inputSeq, const Argument& seqScores); - void calSelectedCols(const Argument& scores, - const int* subSeqStartPos, - size_t topK); - void partialSortIndex(const std::vector& values, - int k, - std::vector& indices); + void calSelectedCols(const MatrixPtr scores, + const int* seqStartPos, + const int* subSeqStartPos); void buildOutputSeqInfo(); std::vector outSeqStartInfo_; @@ -61,74 +57,12 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::checkInputs(const Argument& inputSeq, - const Argument& seqScores) { - CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " - << "must be a nested sequence."; - CHECK(seqScores.hasSeq()) - << "The second input of SubNestSequence layer must be a sequence."; - CHECK_EQ(seqScores.value->getWidth(), 1U) - << "The second input of SubNestedSequenceLayer is scores " - << "over each sequence in a nested sequence, " - << "so its size should be 1."; - CHECK_EQ(inputSeq.getNumSubSequences(), seqScores.value->getHeight()) - << "The second input of SubNestedSequenceLayer is scores " - << "over each sequence in a nested sequence, so its height should be " - << "equal to number of sequence in the first input."; -} - -void SubNestedSequenceLayer::partialSortIndex(const std::vector& values, - int k, - std::vector& indices) { - CHECK_GE(values.size(), k); - indices.resize(values.size(), 0); - std::iota(begin(indices), end(indices), 0U); - std::partial_sort(begin(indices), - begin(indices) + k, - end(indices), - [&](size_t a, size_t b) { return values[a] > values[b]; }); -} - -void SubNestedSequenceLayer::calSelectedCols(const Argument& scores, - const int* subSeqStartPos, - size_t topK) { +void SubNestedSequenceLayer::calSelectedCols(const MatrixPtr selected_indices, + const int* seqStartPos, + const int* subSeqStartPos) { selectedRows_.clear(); outSubSeqStartInfo_.resize(1, 0); outSeqStartInfo_.resize(1, 0); - - real* seqScores = nullptr; - if (useGpu_) { - Matrix::resizeOrCreate(scoreOverInputSeq_, - scores.value->getHeight(), - scores.value->getWidth(), - false /* trans */, - false /* useGpu */); - scoreOverInputSeq_->copyFrom(*scores.value); - seqScores = scoreOverInputSeq_->getData(); - } else { - seqScores = scores.value->getData(); - } - - int* scoreSeqStartPos = scores.sequenceStartPositions->getMutableData(false); - for (int i = 0; i < scores.getNumSequences(); ++i) { - int seqLen = scoreSeqStartPos[i + 1] - scoreSeqStartPos[i]; - int selectedSeqNum = std::min(static_cast(config_.top_k()), seqLen); - - std::vector sortedIdx; - partialSortIndex(std::vector(seqScores + scoreSeqStartPos[i], - seqScores + scoreSeqStartPos[i + 1]), - selectedSeqNum, - sortedIdx); - - for (int j = 0; j < selectedSeqNum; ++j) { - int begPos = subSeqStartPos[scoreSeqStartPos[i] + sortedIdx[j]]; - int endPos = subSeqStartPos[scoreSeqStartPos[i] + sortedIdx[j] + 1]; - for (int m = begPos; m < endPos; ++m) selectedRows_.push_back(m); - outSubSeqStartInfo_.push_back(outSubSeqStartInfo_.back() + endPos - - begPos); - } - outSeqStartInfo_.push_back(outSubSeqStartInfo_.back()); - } } void SubNestedSequenceLayer::buildOutputSeqInfo() { @@ -147,14 +81,17 @@ void SubNestedSequenceLayer::buildOutputSeqInfo() { void SubNestedSequenceLayer::forward(PassType passType) { Layer::forward(passType); + const Argument& inputSeq = getInput(0); - const Argument& seqScores = getInput(1); + const MatrixPtr selected_indices = getInputValue(1); + CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " + << "must be a nested sequence."; + CHECK_EQ(inputSeq.getNumSequences(), selected_indices->getHeight()); - checkInputs(inputSeq, seqScores); + calSelectedCols(selected_indices, + inputSeq.sequenceStartPositions->getMutableData(false), + inputSeq.subSequenceStartPositions->getMutableData(false)); - calSelectedCols(seqScores, - inputSeq.subSequenceStartPositions->getMutableData(false), - config_.top_k()); resetOutput(selectedRows_.size(), getSize()); buildOutputSeqInfo(); @@ -170,10 +107,10 @@ void SubNestedSequenceLayer::forward(PassType passType) { } void SubNestedSequenceLayer::backward(const UpdateCallback& callback) { - MatrixPtr inputGrad1 = getInputGrad(0); + MatrixPtr inputSeqGrad = getInputGrad(0); MatrixPtr outputGrad = getOutputGrad(); - if (inputGrad1) outputGrad->addToRows(*inputGrad1, *rowIndice_); + if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_); } } // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index bd7770059e..da546b979e 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -32,1964 +32,1992 @@ DECLARE_double(checkgrad_eps); DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(prev_batch_state); -TEST(Operator, dot_mul) { - TestConfig config; - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); - operatorConf.set_type("dot_mul"); - operatorConf.set_dotmul_scale(-1); - - testOperatorGrad(config, operatorConf, 100, false, false); -} - -TEST(Projection, context) { - for (auto contextStart : {-5, -3, -1, 0, 3}) { - for (auto contextLength : {1, 2, 5, 7}) { - for (auto batchSize : {1, 2, 5, 20, 50}) { - for (auto trainablePadding : {false, true}) { - LOG(INFO) << " contextStart=" << contextStart - << " contextLength=" << contextLength - << " batchSize=" << batchSize - << " trainablePadding=" << trainablePadding; - ProjectionConfig conf; - conf.set_type("context"); - conf.set_input_size(10); - conf.set_context_start(contextStart); - conf.set_context_length(contextLength); - conf.set_trainable_padding(trainablePadding); - conf.set_output_size(conf.context_length() * conf.input_size()); - int pad = - std::max(0, -conf.context_start()) + - std::max(0, conf.context_start() + conf.context_length() - 1); - for (auto useGpu : {false, true}) { - testProjectionGrad( - conf, - INPUT_SEQUENCE_DATA, - trainablePadding ? conf.input_size() * pad : 0, - batchSize, - useGpu, - contextStart + contextLength <= 1); // = testState - } - } - } - } - } -} - -TEST(Projection, trans_fc) { - ProjectionConfig conf; - conf.set_type("trans_fc"); - conf.set_input_size(50); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 1000, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, fc) { - ProjectionConfig conf; - conf.set_type("fc"); - conf.set_input_size(10); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 200, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, dot_mul) { - ProjectionConfig conf; - conf.set_type("dot_mul"); - conf.set_input_size(20); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 20, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, table) { - ProjectionConfig conf; - conf.set_type("table"); - conf.set_input_size(10); - conf.set_output_size(20); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_LABEL, - /* parameterSize */ 200, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, identity) { - ProjectionConfig conf; - conf.set_type("identity"); - conf.set_input_size(10); - conf.set_output_size(10); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 0, - /* batchSize */ 100, - useGpu); - } -} - -TEST(Projection, slice) { - ProjectionConfig conf; - conf.set_type("slice"); - conf.set_input_size(100); - SliceConfig& slice1 = *conf.add_slices(); - slice1.set_start(10); - slice1.set_end(20); - SliceConfig& slice2 = *conf.add_slices(); - slice2.set_start(50); - slice2.set_end(70); - conf.set_output_size(30); - for (auto useGpu : {false, true}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 0, - /* batchSize */ 10, - useGpu); - } -} - -TEST(Projection, scaling) { - ProjectionConfig conf; - conf.set_type("scaling"); - conf.set_input_size(10); - conf.set_output_size(10); - for (auto useGpu : {false}) { - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ 1, - /* batchSize */ 100, - useGpu); - } -} - -void testProjectionConv(size_t groups, bool isDeconv) { - const int NUM_FILTERS = 18; - const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 4; - const int CHANNELS = 3; - const int IMAGE_SIZE = 16; - - ProjectionConfig conf; - if (isDeconv) { - conf.set_type("convt"); - } else { - conf.set_type("conv"); - } - conf.set_num_filters(NUM_FILTERS); - - ConvConfig* conv = conf.mutable_conv_conf(); - conv->set_filter_size(FILTER_SIZE); - conv->set_filter_size_y(FILTER_SIZE_Y); - conv->set_channels(CHANNELS); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(groups); - if (isDeconv) { - conv->set_filter_channels(NUM_FILTERS / conv->groups()); - } else { - conv->set_filter_channels(conv->channels() / conv->groups()); - } - conv->set_img_size(IMAGE_SIZE); - int output_x = outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true); - int output_y = outputSize(conv->img_size(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true); - conv->set_output_x(output_x); - conv->set_output_y(output_y); - if (isDeconv) { - conf.set_input_size(output_x * output_y * CHANNELS); - conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); - } else { - conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); - conf.set_output_size(output_x * output_y * NUM_FILTERS); - } - - testProjectionGrad(conf, - INPUT_DATA, - /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * - FILTER_SIZE_Y / groups, - /* batchSize */ 100, - true, - false, - NUM_FILTERS, - true); -} - -#ifndef PADDLE_ONLY_CPU -TEST(Projection, conv) { - /// test ConvProjection - testProjectionConv(1, false); - testProjectionConv(3, false); - /// test ConvTransProjection - testProjectionConv(1, true); - testProjectionConv(3, true); -} -#endif - -TEST(Layer, BilinearInterpLayer) { - TestConfig config; - config.layerConfig.set_type("bilinear_interp"); - config.biasSize = 0; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); - - LayerInputConfig* input = config.layerConfig.add_inputs(); - BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); - ImageConfig* image = bilinear->mutable_image_conf(); - image->set_img_size(32); - image->set_img_size_y(32); - image->set_channels(4); - - for (auto useGpu : {false, true}) { - for (auto outSize : {32, 64}) { - bilinear->set_out_size_x(outSize); - bilinear->set_out_size_y(outSize); - testLayerGrad(config, "bilinear_interp", 10, false, useGpu); - } - } -} - -TEST(Layer, concat) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("concat"); - config.layerConfig.set_size(15); - config.layerConfig.set_active_type("sigmoid"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "concat", 100, false, useGpu); - } -} - -TEST(Layer, AddtoLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("addto"); - config.layerConfig.set_size(10); - config.layerConfig.set_active_type("sigmoid"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "addto", 100, false, useGpu); - } -} - -TEST(Layer, CTCLayer) { - TestConfig config; - config.layerConfig.set_type("ctc"); - config.layerConfig.set_norm_by_times(false); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "ctc", - 100, - /* trans */ false, /* useGpu */ - useGpu); - } -} - -TEST(Layer, cosSimLayer) { - TestConfig config; - config.layerConfig.set_type("cos"); - config.layerConfig.set_size(1); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "cos", 100, false, useGpu); - } -} - -TEST(Layer, CosSimVecMatLayer) { - TestConfig config; - config.layerConfig.set_type("cos_vm"); - config.layerConfig.set_size(5); // output size - config.layerConfig.set_cos_scale(2.0); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "cos_vm", 100, false, useGpu); - } -} - -void testDepthwiseConvLayer(const string& type, bool useGpu) { - TestConfig config; - config.biasSize = 32; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(32); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(3); - conv->set_channels(16); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(16); - conv->set_filter_channels(conv->channels() / conv->groups()); - conv->set_img_size(16); - conv->set_img_size_y(8); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - config.layerConfig.num_filters()); - - testLayerGrad(config, "depthwise_conv", 100, false, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); -} - -TEST(Layer, depthwiseConvLayer) { - // 'depthwise_conv' is a sepecial case of 'exconv' whose - // groups size equals to the input channels size. - testDepthwiseConvLayer("exconv", /* useGpu= */ false); -#ifndef PADDLE_ONLY_CPU - testDepthwiseConvLayer("exconv", /* useGpu= */ true); -#endif -} - -void testConvLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - config.biasSize = 16; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(16); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(3); - conv->set_channels(3); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_filter_channels(conv->channels() / conv->groups()); - conv->set_img_size(16); - conv->set_img_size_y(8); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - config.layerConfig.num_filters()); - - testLayerGrad(config, "conv", 100, trans, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); -} - -TEST(Layer, convLayer) { - testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); -#ifndef PADDLE_ONLY_CPU - testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); - testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); -#endif -} - -void testConvTransLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - config.biasSize = 3; - config.layerConfig.set_type(type); - config.layerConfig.set_num_filters(3); - config.layerConfig.set_partial_sum(1); - config.layerConfig.set_shared_biases(true); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ConvConfig* conv = input->mutable_conv_conf(); - conv->set_filter_size(2); - conv->set_filter_size_y(4); - conv->set_channels(16); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_filter_channels(3 / conv->groups()); - conv->set_img_size(16); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - - config.layerConfig.set_size(conv->img_size() * conv->img_size() * - config.layerConfig.num_filters()); - - testLayerGrad(config, "convTrans", 100, trans, useGpu); - // Use small batch_size and useWeight=true to test biasGrad - testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); -} - -TEST(Layer, convTransLayer) { - for (auto useGpu : {false, true}) { - testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); - } -#ifndef PADDLE_ONLY_CPU - testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); -#endif -} - -TEST(Layer, blockExpandLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("blockexpand"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); - blockExpand->set_img_size_x(64); - blockExpand->set_img_size_y(32); - blockExpand->set_channels(3); - blockExpand->set_padding_x(0); - blockExpand->set_padding_y(0); - blockExpand->set_block_x(4); - blockExpand->set_block_y(32); - blockExpand->set_stride_x(2); - blockExpand->set_stride_y(2); - blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), - blockExpand->block_x(), - blockExpand->padding_x(), - blockExpand->stride_x(), - /* caffeMode */ false)); - blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), - blockExpand->block_y(), - blockExpand->padding_y(), - blockExpand->stride_y(), - /* caffeMode */ false)); - config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() * - blockExpand->channels()); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "blockexpand", 100, false, useGpu); - } -} - -TEST(Layer, maxoutLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("maxout"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - MaxOutConfig* maxout = input->mutable_maxout_conf(); - ImageConfig* image = maxout->mutable_image_conf(); - - image->set_img_size(32); - image->set_img_size_y(32); - image->set_channels(4); - maxout->set_groups(2); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "maxout", 10, false, useGpu); - } -} -void testFcLayer(string format, size_t nnz) { - TestConfig config; - config.biasSize = 4096; - config.layerConfig.set_type("fc"); - config.layerConfig.set_size(4096); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_drop_rate(0.1); - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); - config.layerConfig.add_inputs(); - - LOG(INFO) << config.inputDefs[0].sparse.sparse << " " - << config.inputDefs[0].sparse.format; - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "fc", - 100, - /* trans */ false, - useGpu, - /* weight */ true); - } -} - -TEST(Layer, fcLayer) { - testFcLayer("", 4096 * 4096 * 2); - testFcLayer("csc", 4096 * 40); - testFcLayer("csr", 4096 * 40); -} - -TEST(Layer, SelectiveFullyConnectedLayer) { - TestConfig config; - size_t nin = 16; - size_t nout = 256; - config.layerConfig.set_type("selective_fc"); - config.layerConfig.set_size(nout); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_has_selected_colums(true); - config.layerConfig.set_selective_fc_pass_generation(false); - config.biasSize = nout; - - config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back( - {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", true)}); - config.layerConfig.add_inputs(); - - testLayerGrad(config, - "selective_fc", - 100, - /* trans= */ false, - /* useGup= */ false, - false); -#ifndef PADDLE_ONLY_CPU - testLayerGrad(config, - "selective_fc", - 100, - /* trans= */ false, - /* useGup= */ true, - false); -#endif -} - -TEST(Layer, DataNormLayer) { - TestConfig config; - config.layerConfig.set_type("data_norm"); - config.layerConfig.set_size(20); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); - config.inputDefs.back().isStatic = true; - config.layerConfig.add_inputs(); - - for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { - config.layerConfig.set_data_norm_strategy(strategy); - // The parameters are static, so not support GPU now - testLayerGrad(config, - "data_norm", - 200, - /* trans */ false, - /* useGpu */ false); - } -} - -TEST(Layer, hsigmoidLayer) { - TestConfig config; - config.layerConfig.set_type("hsigmoid"); - config.layerConfig.set_num_classes(5); - config.layerConfig.set_size(1); - config.biasSize = config.layerConfig.num_classes() - 1; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // Not support GPU now - testLayerGrad(config, - "hsigmoid", - 100, - /* trans */ false, /* useGpu */ - false); -} - -TEST(Layer, multi_cross) { - TestConfig config; - config.layerConfig.set_type("multi-class-cross-entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad( - config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); - } -} - -TEST(Layer, multi_binary_label_sparse_mat) { - TestConfig config; - config.layerConfig.set_type("multi_binary_label_cross_entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "multi_binary_label_cross_entropy", - 100, - /* trans */ false, - useGpu); - } -} - -TEST(layer, multi_binary_label_id) { - TestConfig config; - config.layerConfig.set_type("multi_binary_label_cross_entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "multi_binary_label_cross_entropy", - 100, - /* trans */ false, - useGpu); - } -} - -TEST(Layer, multi_cross_with_selfnorm) { - TestConfig config; - config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); - config.layerConfig.set_softmax_selfnorm_alpha(0.1); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // Not support GPU now - testLayerGrad(config, - "multi_class_cross_entropy_with_selfnorm", - 100, - /* trans */ false, - /* useGpu */ false); -} - -TEST(Layer, multi_cross_soft) { - TestConfig config; - config.layerConfig.set_type("soft_binary_class_cross_entropy"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, - "soft_binary_class_cross_entropy", - 100, - /* trans */ false, - useGpu); - } -} - -TEST(Layer, square_error) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); - } -} - -TEST(Layer, sparse_square_error) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // "GpuSparseMatrix" as label is not supported - testLayerGrad(config, - "square_error", - 100, - /* trans */ false, - /* useGpu */ false); -} - -TEST(Layer, sparse_float_square_error) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); - config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // "GpuSparseMatrix" as label is not supported - testLayerGrad(config, - "square_error", - 100, - /* trans */ false, - /* useGpu */ false); -} - -TEST(Layer, square_error_weighted) { - TestConfig config; - config.layerConfig.set_type("square_error"); - config.biasSize = 0; - config.testAccumulate = false; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); - } -} - -TEST(Layer, huber_two_class) { - TestConfig config; - config.layerConfig.set_type("huber"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); - } -} - -void testExpandLayer(string trans_type, bool hasSubseq) { - TestConfig config; - config.layerConfig.set_type("expand"); - - config.inputDefs.push_back( - {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, - "layer_0", - 10, - 0}); - config.inputDefs.push_back( - {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, - "layer_1", - 10, - 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.set_trans_type(trans_type); - LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "expand", 30, false, useGpu); - } -} - -TEST(Layer, ExpandLayer) { - testExpandLayer("non-seq", false); // non-seq expand to seq - testExpandLayer("non-seq", true); // non-seq expand to hasSubseq - testExpandLayer("seq", true); // seq expand to hasSubseq -} - -void testDegradeLayer(bool hasSubseq, - string layer_type, - string trans_type, - int stride) { - TestConfig config; - config.layerConfig.set_type(layer_type); - config.layerConfig.set_size(10); - config.layerConfig.set_seq_pool_stride(stride); - config.biasSize = 0; - - config.inputDefs.push_back( - {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, - "layer_0", - 10, - 0}); - config.layerConfig.add_inputs(); - config.layerConfig.set_trans_type(trans_type); - - auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { - for (auto useGpu : {false, true}) { - testLayerGrad(config, layer_type, 100, false, useGpu); - } - }; - - if (layer_type == "average") { - for (auto strategy : {"average", "sum", "squarerootn"}) { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " average_strategy=" << strategy - << " seq_pool_stride=" << stride; - config.layerConfig.set_average_strategy(strategy); - testDegradeLayerGrad(config, layer_type); - } - } else { - LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type - << " seq_pool_stride=" << stride; - testDegradeLayerGrad(config, layer_type); - } -} - -TEST(Layer, MaxLayer) { - testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq - testDegradeLayer(false, - "max", - "non-seq", - 5); // seq max to a shorten seq, stride window = 5 - testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq - testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq -} - -TEST(Layer, SequenceLastInstanceLayer) { - testDegradeLayer(false, - "seqlastins", - "non-seq", - -1); // seq seqlastins to non-seq - testDegradeLayer(false, - "seqlastins", - "non-seq", - 5); // seq seqlastins to a shorten seq, stride window = 5 - testDegradeLayer(true, - "seqlastins", - "non-seq", - -1); // hasSubseq seqlastins to non-seq - testDegradeLayer( - true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq -} - -TEST(Layer, AverageLayer) { - testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq - testDegradeLayer(false, - "average", - "non-seq", - 5); // seq average to a shorten seq, stride window = 5 - testDegradeLayer( - true, "average", "non-seq", -1); // hasSubseq average to non-seq - testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq -} - -TEST(Layer, SequenceConcatLayer) { - TestConfig config; - config.layerConfig.set_type("seqconcat"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "seqconcat", 100, false, useGpu); - } -} - -TEST(Layer, SequenceReshapeLayer) { - TestConfig config; - config.layerConfig.set_type("seqreshape"); - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "seqreshape", 100, false, useGpu); - } -} - -TEST(Layer, ConvShiftLayer) { - TestConfig config; - config.layerConfig.set_type("conv_shift"); - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - // Not support GPU now - testLayerGrad(config, "conv_shift", 100, false, false); -} - -TEST(Layer, PowerLayer) { - TestConfig config; - config.layerConfig.set_type("power"); - config.layerConfig.set_size(10); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "power", 100, false, useGpu); - } -} - -TEST(Layer, ConvexCombinationLayer) { - TestConfig config; - config.layerConfig.set_type("convex_comb"); - config.layerConfig.set_size(20); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "convex_comb", 100, false, useGpu); - } -} - -TEST(Layer, InterpolationLayer) { - TestConfig config; - config.layerConfig.set_type("interpolation"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "interpolation", 100, false, useGpu); - } -} - -TEST(Layer, OuterProdLayer) { - TestConfig config; - config.layerConfig.set_type("out_prod"); - config.layerConfig.set_size(100); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "out_prod", 100, false, useGpu); - } -} - -TEST(Layer, SlopeInterceptLayer) { - TestConfig config; - config.layerConfig.set_type("slope_intercept"); - config.layerConfig.set_size(10); - config.layerConfig.set_slope(1.0); - config.layerConfig.set_intercept(0.1); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "slope_intercept", 100, false, useGpu); - } -} - -TEST(Layer, ScalingLayer) { - TestConfig config; - config.layerConfig.set_type("scaling"); - config.layerConfig.set_size(10); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.layerConfig.add_inputs(); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "scaling", 100, false, useGpu); - } -} - -void testNormLayer(const string& normType, bool trans, bool useGpu) { - TestConfig config; - config.layerConfig.set_type("norm"); - config.layerConfig.set_active_type("relu"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - NormConfig* norm = input->mutable_norm_conf(); - norm->set_norm_type(normType); - norm->set_channels(16); - norm->set_size(5); - norm->set_scale(0.001); - norm->set_pow(0.75); - norm->set_blocked(0); - norm->set_img_size(14); - norm->set_img_size_y(7); - norm->set_output_x(norm->img_size()); - norm->set_output_y(norm->img_size_y()); - if (norm->norm_type() == "cmrnorm" || - norm->norm_type() == "cmrnorm-projection") { - norm->set_scale(norm->scale() / norm->size()); - } else { - norm->set_scale(norm->scale() / (norm->size() * norm->size())); - } - - config.layerConfig.set_size(norm->output_x() * norm->output_y() * - norm->channels()); - config.biasSize = 0; - - testLayerGrad(config, "norm", 100, trans, useGpu); -} - -TEST(Layer, NormLayer) { - testNormLayer("cmrnorm-projection", - /* trans= */ false, /* useGpu= */ - true); - testNormLayer("cmrnorm-projection", - /* trans= */ false, /* useGpu= */ - false); -} - -void setPoolConfig(TestConfig* config, - PoolConfig* pool, - const string& poolType) { - (*config).biasSize = 0; - (*config).layerConfig.set_type("pool"); - (*config).layerConfig.set_num_filters(16); - - int kw = 3, kh = 3; - int pw = 0, ph = 0; - int sw = 2, sh = 2; - pool->set_pool_type(poolType); - pool->set_channels(16); - pool->set_size_x(kw); - pool->set_size_y(kh); - pool->set_start(0); - pool->set_padding(pw); - pool->set_padding_y(ph); - pool->set_stride(sw); - pool->set_stride_y(sh); - - int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); - int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); - pool->set_output_x(ow); - pool->set_output_y(oh); -} - -void testPoolLayer(const string& poolType, bool trans, bool useGpu) { - TestConfig config; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - - pool->set_img_size(14); - pool->set_img_size_y(14); - setPoolConfig(&config, pool, poolType); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - testLayerGrad(config, "pool", 100, trans, useGpu); -} - -#ifndef PADDLE_ONLY_CPU -void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { - TestConfig config; - config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PoolConfig* pool = input->mutable_pool_conf(); - - pool->set_size_y(4); - pool->set_stride_y(3); - pool->set_img_size(10); - pool->set_img_size_y(20); - setPoolConfig(&config, pool, poolType); - pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / - ((float)pool->stride_y()) + - 1.5); - config.layerConfig.set_size(pool->output_x() * pool->output_y() * - pool->channels()); - - testLayerGrad(config, "pool", 100, trans, useGpu); -} -#endif - -TEST(Layer, PoolLayer) { - testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); - testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); - -#ifndef PADDLE_ONLY_CPU - testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); - testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); - testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); - testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); -#endif -} - -void testSppLayer(const string& poolType, - const int pyramidHeight, - bool trans, - bool useGpu) { - TestConfig config; - config.layerConfig.set_type("spp"); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - SppConfig* sppConfig = input->mutable_spp_conf(); - sppConfig->set_pool_type(poolType); - sppConfig->set_pyramid_height(pyramidHeight); - ImageConfig* imageConfig = sppConfig->mutable_image_conf(); - imageConfig->set_channels(16); - imageConfig->set_img_size(10); - imageConfig->set_img_size_y(20); - int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); - config.layerConfig.set_size(outputSize * imageConfig->channels()); - testLayerGrad(config, "spp", 100, trans, useGpu); -} - -TEST(Layer, SpatialPyramidPoolLayer) { - for (auto useGpu : {false, true}) { - for (auto pyramidHeight : {1, 2, 3}) { - testSppLayer("avg-projection", pyramidHeight, false, useGpu); - testSppLayer("max-projection", pyramidHeight, false, useGpu); - } - } -} - -TEST(Layer, rankCostLayer) { - TestConfig config; - config.layerConfig.set_type("rank-cost"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "rank-cost", 100, false, useGpu); - } -} - -TEST(Layer, sumCostLayer) { - TestConfig config; - config.layerConfig.set_type("sum_cost"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "sum_cost", 100, false, useGpu); - } -} - -TEST(Layer, weightedRankCostLayer) { - TestConfig config; - config.layerConfig.set_type("rank-cost"); - config.biasSize = 0; - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); - } -} - -TEST(Layer, TensorLayer) { - TestConfig config; - config.layerConfig.set_type("tensor"); - config.layerConfig.set_size(10); - config.layerConfig.set_active_type("sigmoid"); - config.biasSize = config.layerConfig.size(); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); - config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "tensor", 100, false, useGpu); - } -} - -TEST(Layer, RecurrentLayer) { - TestConfig config; - config.layerConfig.set_type("recurrent"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("tanh"); - config.biasSize = 4; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - config.layerConfig.set_reversed(reversed); - config.testState = !reversed; - testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); - } - } -} - -TEST(Layer, LstmLayer) { - TestConfig config; - config.layerConfig.set_type("lstmemory"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("tanh"); - config.layerConfig.set_active_state_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 28; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - config.layerConfig.set_reversed(reversed); - config.testState = !reversed; - testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); - } - } - for (auto useGpu : {true}) { - config.testBatchState = true; - config.layerConfig.set_reversed(false); - testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); - } -} - -TEST(Layer, MDLstmLayer) { - TestConfig config; - config.layerConfig.set_type("mdlstmemory"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_state_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 4 * 9; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); - config.layerConfig.add_inputs(); - config.layerConfig.add_directions(true); - config.layerConfig.add_directions(true); - - for (auto useGpu : {false, true}) { - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - config.layerConfig.set_directions(0, bool(i)); - config.layerConfig.set_directions(1, bool(j)); - testLayerGrad(config, "mdlstmemory", 100, false, useGpu); - } - } - } -} - -TEST(Layer, ParameterReluLayer) { - auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { - TestConfig config; - config.layerConfig.set_type("prelu"); - config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); - config.layerConfig.add_inputs(); - config.layerConfig.set_size(inputSize); - config.layerConfig.set_partial_sum(inputSize / - channels); // size of feature map - for (auto useGpu : {false, true}) { - testLayerGrad(config, "prelu", 100, false, useGpu); - } - }; - - testParameterReluLayer(192, 1); - testParameterReluLayer(192, 3); - testParameterReluLayer(192, 192); -} - -TEST(Layer, ResizeLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("resize"); - config.layerConfig.set_size(64); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "resize", 100, false, useGpu); - } -} - -TEST(Layer, RotateLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("rotate"); - const int CHANNEL = 2; - const int HEIGHT = 8; - const int WIDTH = 4; - const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; - config.layerConfig.set_size(INPUT_SIZE); - config.layerConfig.set_height(HEIGHT); - config.layerConfig.set_width(WIDTH); - config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "rotate", 100, false, useGpu); - } -} - -TEST(Layer, NCELayer) { - TestConfig config; - size_t numClasses = 4; - config.layerConfig.set_type("nce"); - config.layerConfig.set_size(1); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_num_classes(numClasses); - config.biasSize = numClasses; - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * numClasses}); - config.inputDefs.push_back( - {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto withWeight : {false, true}) { - if (withWeight) { - config.inputDefs.push_back( - {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - } - - for (auto isIdLabel : {false, true}) { - config.inputDefs[1] = { - isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, - "label", - /* dim= */ numClasses, - /* paraSize= */ 0}; - - for (auto withDist : {false, true}) { - config.layerConfig.clear_neg_sampling_dist(); - if (withDist) { - double sum = 0; - for (size_t i = 0; i < numClasses; ++i) { - real p = rand(); // NOLINT use rand_r - config.layerConfig.add_neg_sampling_dist(p); - sum += p; - } - for (size_t i = 0; i < numClasses; ++i) { - real p = config.layerConfig.neg_sampling_dist(i) / sum; - config.layerConfig.set_neg_sampling_dist(i, p); - } - } - LOG(INFO) << "NCELayer " - << " isIdLabel=" << isIdLabel << " withWeight=" << withWeight - << " withDist=" << withDist; - // Not support GPU now - testLayerGrad(config, - "nce", - 100, - /* trans= */ false, - /* useGpu */ false); - } - } - } -} - -TEST(Layer, GatedRecurrentLayer) { - TestConfig config; - config.layerConfig.set_type("gated_recurrent"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 12; - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - for (auto reversed : {false, true}) { - config.layerConfig.set_reversed(reversed); - config.testState = !reversed; - testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, useGpu); - } - } -} - -TEST(Layer, GruStepLayer) { - TestConfig config; - config.layerConfig.set_type("gru_step"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 12; - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); - } -} - -TEST(Layer, LstmStepLayer) { - TestConfig config; - config.layerConfig.set_type("lstm_step"); - config.layerConfig.set_size(4); - config.layerConfig.set_active_type("sigmoid"); - config.layerConfig.set_active_state_type("sigmoid"); - config.layerConfig.set_active_gate_type("sigmoid"); - config.biasSize = 12; - config.testAccumulate = false; - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); - } -} - -void testBatchNormLayer(const string& type, bool trans, bool useGpu) { - TestConfig config; - const int CHANNELS = 10; - const int IMG_SIZE = 16; - const int IMG_SIZE_Y = 8; - size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; - config.layerConfig.set_type(type); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type("sigmoid"); - config.biasSize = CHANNELS; - config.inputDefs.push_back({INPUT_DATA, - "layer_0", - /* dim= */ size, - /* paraSize= */ CHANNELS}); - - config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); - config.inputDefs.back().isStatic = true; - config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); - config.inputDefs.back().isStatic = true; - - LayerInputConfig* input = config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - ImageConfig* img_conf = input->mutable_image_conf(); - img_conf->set_channels(CHANNELS); - img_conf->set_img_size(IMG_SIZE); - img_conf->set_img_size_y(IMG_SIZE_Y); - - testLayerGrad(config, - "batch_norm", - 64, - /* trans= */ trans, - useGpu, - /* useWeight */ true); -} - -TEST(Layer, BatchNormalizationLayer) { - testBatchNormLayer("batch_norm", false, false); -#ifndef PADDLE_ONLY_CPU - testBatchNormLayer("batch_norm", false, true); - if (hl_get_cudnn_lib_version() >= int(4000)) { - testBatchNormLayer("cudnn_batch_norm", false, true); - } -#endif -} - -void testConvOperator(bool isDeconv) { - TestConfig config; - const int NUM_FILTERS = 16; - const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 3; - const int CHANNELS = 3; - const int IMAGE_SIZE = 16; - const int IMAGE_SIZE_Y = 9; - OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); - if (isDeconv) { - operatorConf.set_type("convt"); - } else { - operatorConf.set_type("conv"); - } - ConvConfig* conv = operatorConf.mutable_conv_conf(); - operatorConf.set_num_filters(NUM_FILTERS); - conv->set_filter_size(FILTER_SIZE); - conv->set_filter_size_y(FILTER_SIZE_Y); - conv->set_channels(CHANNELS); - conv->set_padding(0); - conv->set_padding_y(1); - conv->set_stride(2); - conv->set_stride_y(2); - conv->set_groups(1); - conv->set_img_size(IMAGE_SIZE); - conv->set_img_size_y(IMAGE_SIZE_Y); - conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), - conv->padding(), - conv->stride(), - /* caffeMode */ true)); - conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), - conv->padding_y(), - conv->stride_y(), - /* caffeMode */ true)); - - if (isDeconv) { - conv->set_filter_channels(NUM_FILTERS / conv->groups()); - config.inputDefs.push_back({INPUT_DATA, - "layer_0", - conv->output_x() * conv->output_y() * CHANNELS, - 0}); - config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); - } else { - conv->set_filter_channels(conv->channels() / conv->groups()); - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); - config.layerConfig.set_size(conv->output_x() * conv->output_y() * - NUM_FILTERS); - } - - config.inputDefs.push_back( - {INPUT_DATA, - "layer_1", - FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, - 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); -} - -TEST(Operator, conv) { - testConvOperator(/*isDeconv*/ true); - testConvOperator(/*isDeconv*/ false); -} - -TEST(Layer, FeatureMapExpandLayer) { - TestConfig config; - config.layerConfig.set_type("featmap_expand"); - const int CHANNELS = 10; - const int INPUT_SIZE = 100; - config.layerConfig.set_size(INPUT_SIZE * CHANNELS); - config.layerConfig.set_num_filters(CHANNELS); - config.inputDefs.push_back({INPUT_SEQUENCE_DATA, - "layer_0", - /* dim= */ INPUT_SIZE, - /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - for (auto useGpu : {false, true}) { - for (auto asRowVec : {false, true}) { - config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec"); - testLayerGrad(config, - "featmap_expand", - /*batch_size*/ 100, - /* trans= */ false, - useGpu, - /* useWeight */ true); - } - } -} - -TEST(Layer, MultiplexLayer) { - TestConfig config; - const int LAYER_SIZE = 100; - config.layerConfig.set_type("multiplex"); - config.layerConfig.set_size(LAYER_SIZE); - - config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); - config.inputDefs.push_back( - {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); - } -} - -TEST(Layer, PadLayer) { - TestConfig config; - config.biasSize = 0; - config.layerConfig.set_type("pad"); - - int c = 4; - int h = 31; - int w = 36; - size_t size = c * h * w; - config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - PadConfig* pad = input->mutable_pad_conf(); - ImageConfig* image = pad->mutable_image_conf(); - - image->set_channels(c); - image->set_img_size(h); - image->set_img_size_y(w); - pad->add_pad_c(1); - pad->add_pad_c(2); - pad->add_pad_h(2); - pad->add_pad_h(3); - pad->add_pad_w(3); - pad->add_pad_w(5); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "pad", 10, false, useGpu); - } -} - -TEST(Layer, CrossChannelNormLayer) { - TestConfig config; - config.paramInitialMean = 1.; - config.paramInitialStd = 0.; - config.layerConfig.set_type("norm"); - config.layerConfig.set_size(100); - LayerInputConfig* input = config.layerConfig.add_inputs(); - NormConfig* norm = input->mutable_norm_conf(); - norm->set_norm_type("cross-channel-norm"); - norm->set_channels(10); - norm->set_size(100); - norm->set_scale(0); - norm->set_pow(0); - norm->set_blocked(0); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); - } -} - -TEST(Layer, smooth_l1) { - TestConfig config; - config.layerConfig.set_type("smooth_l1"); - - config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); - config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); - } -} - -TEST(Layer, multibox_loss) { - TestConfig config; - config.layerConfig.set_type("multibox_loss"); - config.biasSize = 0; - LayerInputConfig* input = config.layerConfig.add_inputs(); - MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); - multiboxLoss->set_num_classes(21); - multiboxLoss->set_input_num(1); - multiboxLoss->set_overlap_threshold(0.5); - multiboxLoss->set_neg_pos_ratio(3); - multiboxLoss->set_neg_overlap(0.5); - multiboxLoss->set_background_id(0); - multiboxLoss->set_height(3); - multiboxLoss->set_width(3); - - size_t gtNum = 1; - MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); - labelValue->randomizeUniform(); - labelValue->add(-0.5); - labelValue->sigmoid(*labelValue); - real* labelData = labelValue->getData(); - size_t labelWidth = labelValue->getWidth(); - for (size_t i = 0; i < gtNum; ++i) { - *(labelData + i * labelWidth) = std::rand() % 20 + 1; - *(labelData + i * labelWidth + 1) = 0.400259; - *(labelData + i * labelWidth + 2) = 0.377857; - *(labelData + i * labelWidth + 3) = 0.525712; - *(labelData + i * labelWidth + 4) = 0.519368; - } - vector seqStartPositions(gtNum + 1, 0); - for (size_t i = 1; i <= gtNum; ++i) { - seqStartPositions[i] = i; - } - - // Ensure at lease one matched bbox - MatrixPtr priorValue = Matrix::create(1, 72, false, false); - priorValue->randomizeUniform(); - priorValue->add(-0.5); - priorValue->sigmoid(*priorValue); - real* priorData = priorValue->getData(); - *(priorData) = 0.424811; - *(priorData + 1) = 0.397059; - *(priorData + 2) = 0.538905; - *(priorData + 3) = 0.447091; - *(priorData + 4) = 0.425720; - *(priorData + 5) = 0.515228; - *(priorData + 6) = 0.519452; - *(priorData + 7) = 0.591065; - - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); - config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); - config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); - config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); - } -} - -TEST(Layer, TransLayer) { - TestConfig config; - const int height = 128; - const int width = 1028; - config.layerConfig.set_type("trans"); - config.layerConfig.set_size(width); - - config.inputDefs.push_back( - {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); - config.layerConfig.add_inputs(); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); - } -} - -TEST(Layer, RowConvLayer) { - const int context = 3; - const int size = 512; - - TestConfig config; - config.layerConfig.set_type("row_conv"); - config.layerConfig.set_size(size); - config.layerConfig.set_active_type("sigmoid"); - - config.inputDefs.push_back( - {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - RowConvConfig* conv = input->mutable_row_conv_conf(); - conv->set_context_length(context); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "row_conv", 100, false, useGpu, false); - } -} - -TEST(Layer, CropLayer) { - TestConfig config; - // config input_0 - config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ImageConfig* img = input->mutable_image_conf(); - img->set_channels(4); - img->set_img_size(16); - config.layerConfig.set_axis(2); - config.layerConfig.add_offset(0); - config.layerConfig.add_offset(0); - - // config input_1 - config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); - input = config.layerConfig.add_inputs(); - img = input->mutable_image_conf(); - img->set_channels(2); - img->set_img_size(8); - - // config crop layer - config.layerConfig.set_type("crop"); - config.layerConfig.set_name("cropLayer"); - - for (auto useGpu : {false, true}) { - testLayerGrad(config, "crop", 100, false, useGpu, false); - } +// TEST(Operator, dot_mul) { +// TestConfig config; +// config.layerConfig.set_size(10); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); +// operatorConf.set_type("dot_mul"); +// operatorConf.set_dotmul_scale(-1); +// +// testOperatorGrad(config, operatorConf, 100, false, false); +// } +// +// TEST(Projection, context) { +// for (auto contextStart : {-5, -3, -1, 0, 3}) { +// for (auto contextLength : {1, 2, 5, 7}) { +// for (auto batchSize : {1, 2, 5, 20, 50}) { +// for (auto trainablePadding : {false, true}) { +// LOG(INFO) << " contextStart=" << contextStart +// << " contextLength=" << contextLength +// << " batchSize=" << batchSize +// << " trainablePadding=" << trainablePadding; +// ProjectionConfig conf; +// conf.set_type("context"); +// conf.set_input_size(10); +// conf.set_context_start(contextStart); +// conf.set_context_length(contextLength); +// conf.set_trainable_padding(trainablePadding); +// conf.set_output_size(conf.context_length() * conf.input_size()); +// int pad = +// std::max(0, -conf.context_start()) + +// std::max(0, conf.context_start() + conf.context_length() - 1); +// for (auto useGpu : {false, true}) { +// testProjectionGrad( +// conf, +// INPUT_SEQUENCE_DATA, +// trainablePadding ? conf.input_size() * pad : 0, +// batchSize, +// useGpu, +// contextStart + contextLength <= 1); // = testState +// } +// } +// } +// } +// } +// } +// +// TEST(Projection, trans_fc) { +// ProjectionConfig conf; +// conf.set_type("trans_fc"); +// conf.set_input_size(50); +// conf.set_output_size(20); +// for (auto useGpu : {false, true}) { +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ 1000, +// /* batchSize */ 100, +// useGpu); +// } +// } +// +// TEST(Projection, fc) { +// ProjectionConfig conf; +// conf.set_type("fc"); +// conf.set_input_size(10); +// conf.set_output_size(20); +// for (auto useGpu : {false, true}) { +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ 200, +// /* batchSize */ 100, +// useGpu); +// } +// } +// +// TEST(Projection, dot_mul) { +// ProjectionConfig conf; +// conf.set_type("dot_mul"); +// conf.set_input_size(20); +// conf.set_output_size(20); +// for (auto useGpu : {false, true}) { +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ 20, +// /* batchSize */ 100, +// useGpu); +// } +// } +// +// TEST(Projection, table) { +// ProjectionConfig conf; +// conf.set_type("table"); +// conf.set_input_size(10); +// conf.set_output_size(20); +// for (auto useGpu : {false, true}) { +// testProjectionGrad(conf, +// INPUT_LABEL, +// /* parameterSize */ 200, +// /* batchSize */ 100, +// useGpu); +// } +// } +// +// TEST(Projection, identity) { +// ProjectionConfig conf; +// conf.set_type("identity"); +// conf.set_input_size(10); +// conf.set_output_size(10); +// for (auto useGpu : {false, true}) { +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ 0, +// /* batchSize */ 100, +// useGpu); +// } +// } +// +// TEST(Projection, slice) { +// ProjectionConfig conf; +// conf.set_type("slice"); +// conf.set_input_size(100); +// SliceConfig& slice1 = *conf.add_slices(); +// slice1.set_start(10); +// slice1.set_end(20); +// SliceConfig& slice2 = *conf.add_slices(); +// slice2.set_start(50); +// slice2.set_end(70); +// conf.set_output_size(30); +// for (auto useGpu : {false, true}) { +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ 0, +// /* batchSize */ 10, +// useGpu); +// } +// } +// +// TEST(Projection, scaling) { +// ProjectionConfig conf; +// conf.set_type("scaling"); +// conf.set_input_size(10); +// conf.set_output_size(10); +// for (auto useGpu : {false}) { +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ 1, +// /* batchSize */ 100, +// useGpu); +// } +// } +// +// void testProjectionConv(size_t groups, bool isDeconv) { +// const int NUM_FILTERS = 18; +// const int FILTER_SIZE = 2; +// const int FILTER_SIZE_Y = 4; +// const int CHANNELS = 3; +// const int IMAGE_SIZE = 16; +// +// ProjectionConfig conf; +// if (isDeconv) { +// conf.set_type("convt"); +// } else { +// conf.set_type("conv"); +// } +// conf.set_num_filters(NUM_FILTERS); +// +// ConvConfig* conv = conf.mutable_conv_conf(); +// conv->set_filter_size(FILTER_SIZE); +// conv->set_filter_size_y(FILTER_SIZE_Y); +// conv->set_channels(CHANNELS); +// conv->set_padding(0); +// conv->set_padding_y(1); +// conv->set_stride(2); +// conv->set_stride_y(2); +// conv->set_groups(groups); +// if (isDeconv) { +// conv->set_filter_channels(NUM_FILTERS / conv->groups()); +// } else { +// conv->set_filter_channels(conv->channels() / conv->groups()); +// } +// conv->set_img_size(IMAGE_SIZE); +// int output_x = outputSize(conv->img_size(), +// conv->filter_size(), +// conv->padding(), +// conv->stride(), +// /* caffeMode */ true); +// int output_y = outputSize(conv->img_size(), +// conv->filter_size_y(), +// conv->padding_y(), +// conv->stride_y(), +// /* caffeMode */ true); +// conv->set_output_x(output_x); +// conv->set_output_y(output_y); +// if (isDeconv) { +// conf.set_input_size(output_x * output_y * CHANNELS); +// conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); +// } else { +// conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); +// conf.set_output_size(output_x * output_y * NUM_FILTERS); +// } +// +// testProjectionGrad(conf, +// INPUT_DATA, +// /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE +// * +// FILTER_SIZE_Y / groups, +// /* batchSize */ 100, +// true, +// false, +// NUM_FILTERS, +// true); +// } +// +// #ifndef PADDLE_ONLY_CPU +// TEST(Projection, conv) { +// /// test ConvProjection +// testProjectionConv(1, false); +// testProjectionConv(3, false); +// /// test ConvTransProjection +// testProjectionConv(1, true); +// testProjectionConv(3, true); +// } +// #endif +// +// TEST(Layer, BilinearInterpLayer) { +// TestConfig config; +// config.layerConfig.set_type("bilinear_interp"); +// config.biasSize = 0; +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); +// +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); +// ImageConfig* image = bilinear->mutable_image_conf(); +// image->set_img_size(32); +// image->set_img_size_y(32); +// image->set_channels(4); +// +// for (auto useGpu : {false, true}) { +// for (auto outSize : {32, 64}) { +// bilinear->set_out_size_x(outSize); +// bilinear->set_out_size_y(outSize); +// testLayerGrad(config, "bilinear_interp", 10, false, useGpu); +// } +// } +// } +// +// TEST(Layer, concat) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("concat"); +// config.layerConfig.set_size(15); +// config.layerConfig.set_active_type("sigmoid"); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "concat", 100, false, useGpu); +// } +// } +// +// TEST(Layer, AddtoLayer) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("addto"); +// config.layerConfig.set_size(10); +// config.layerConfig.set_active_type("sigmoid"); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "addto", 100, false, useGpu); +// } +// } +// +// TEST(Layer, CTCLayer) { +// TestConfig config; +// config.layerConfig.set_type("ctc"); +// config.layerConfig.set_norm_by_times(false); +// config.layerConfig.set_size(10); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); +// config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, +// "ctc", +// 100, +// /* trans */ false, /* useGpu */ +// useGpu); +// } +// } +// +// TEST(Layer, cosSimLayer) { +// TestConfig config; +// config.layerConfig.set_type("cos"); +// config.layerConfig.set_size(1); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "cos", 100, false, useGpu); +// } +// } +// +// TEST(Layer, CosSimVecMatLayer) { +// TestConfig config; +// config.layerConfig.set_type("cos_vm"); +// config.layerConfig.set_size(5); // output size +// config.layerConfig.set_cos_scale(2.0); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "cos_vm", 100, false, useGpu); +// } +// } +// +// void testDepthwiseConvLayer(const string& type, bool useGpu) { +// TestConfig config; +// config.biasSize = 32; +// config.layerConfig.set_type(type); +// config.layerConfig.set_num_filters(32); +// config.layerConfig.set_partial_sum(1); +// config.layerConfig.set_shared_biases(true); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// ConvConfig* conv = input->mutable_conv_conf(); +// conv->set_filter_size(2); +// conv->set_filter_size_y(3); +// conv->set_channels(16); +// conv->set_padding(0); +// conv->set_padding_y(1); +// conv->set_stride(2); +// conv->set_stride_y(2); +// conv->set_groups(16); +// conv->set_filter_channels(conv->channels() / conv->groups()); +// conv->set_img_size(16); +// conv->set_img_size_y(8); +// conv->set_output_x(outputSize(conv->img_size(), +// conv->filter_size(), +// conv->padding(), +// conv->stride(), +// /* caffeMode */ true)); +// conv->set_output_y(outputSize(conv->img_size_y(), +// conv->filter_size_y(), +// conv->padding_y(), +// conv->stride_y(), +// /* caffeMode */ true)); +// config.layerConfig.set_size(conv->output_x() * conv->output_y() * +// config.layerConfig.num_filters()); +// +// testLayerGrad(config, "depthwise_conv", 100, false, useGpu); +// // Use small batch_size and useWeight=true to test biasGrad +// testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); +// } +// +// TEST(Layer, depthwiseConvLayer) { +// // 'depthwise_conv' is a sepecial case of 'exconv' whose +// // groups size equals to the input channels size. +// testDepthwiseConvLayer("exconv", /* useGpu= */ false); +// #ifndef PADDLE_ONLY_CPU +// testDepthwiseConvLayer("exconv", /* useGpu= */ true); +// #endif +// } +// +// void testConvLayer(const string& type, bool trans, bool useGpu) { +// TestConfig config; +// config.biasSize = 16; +// config.layerConfig.set_type(type); +// config.layerConfig.set_num_filters(16); +// config.layerConfig.set_partial_sum(1); +// config.layerConfig.set_shared_biases(true); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// ConvConfig* conv = input->mutable_conv_conf(); +// conv->set_filter_size(2); +// conv->set_filter_size_y(3); +// conv->set_channels(3); +// conv->set_padding(0); +// conv->set_padding_y(1); +// conv->set_stride(2); +// conv->set_stride_y(2); +// conv->set_groups(1); +// conv->set_filter_channels(conv->channels() / conv->groups()); +// conv->set_img_size(16); +// conv->set_img_size_y(8); +// conv->set_output_x(outputSize(conv->img_size(), +// conv->filter_size(), +// conv->padding(), +// conv->stride(), +// /* caffeMode */ true)); +// conv->set_output_y(outputSize(conv->img_size_y(), +// conv->filter_size_y(), +// conv->padding_y(), +// conv->stride_y(), +// /* caffeMode */ true)); +// config.layerConfig.set_size(conv->output_x() * conv->output_y() * +// config.layerConfig.num_filters()); +// +// testLayerGrad(config, "conv", 100, trans, useGpu); +// // Use small batch_size and useWeight=true to test biasGrad +// testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); +// } +// +// TEST(Layer, convLayer) { +// testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); +// #ifndef PADDLE_ONLY_CPU +// testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); +// testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); +// #endif +// } +// +// void testConvTransLayer(const string& type, bool trans, bool useGpu) { +// TestConfig config; +// config.biasSize = 3; +// config.layerConfig.set_type(type); +// config.layerConfig.set_num_filters(3); +// config.layerConfig.set_partial_sum(1); +// config.layerConfig.set_shared_biases(true); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// ConvConfig* conv = input->mutable_conv_conf(); +// conv->set_filter_size(2); +// conv->set_filter_size_y(4); +// conv->set_channels(16); +// conv->set_padding(0); +// conv->set_padding_y(1); +// conv->set_stride(2); +// conv->set_stride_y(2); +// conv->set_groups(1); +// conv->set_filter_channels(3 / conv->groups()); +// conv->set_img_size(16); +// conv->set_output_x(outputSize(conv->img_size(), +// conv->filter_size(), +// conv->padding(), +// conv->stride(), +// /* caffeMode */ true)); +// +// config.layerConfig.set_size(conv->img_size() * conv->img_size() * +// config.layerConfig.num_filters()); +// +// testLayerGrad(config, "convTrans", 100, trans, useGpu); +// // Use small batch_size and useWeight=true to test biasGrad +// testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); +// } +// +// TEST(Layer, convTransLayer) { +// for (auto useGpu : {false, true}) { +// testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); +// } +// #ifndef PADDLE_ONLY_CPU +// testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); +// #endif +// } +// +// TEST(Layer, blockExpandLayer) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("blockexpand"); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); +// blockExpand->set_img_size_x(64); +// blockExpand->set_img_size_y(32); +// blockExpand->set_channels(3); +// blockExpand->set_padding_x(0); +// blockExpand->set_padding_y(0); +// blockExpand->set_block_x(4); +// blockExpand->set_block_y(32); +// blockExpand->set_stride_x(2); +// blockExpand->set_stride_y(2); +// blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), +// blockExpand->block_x(), +// blockExpand->padding_x(), +// blockExpand->stride_x(), +// /* caffeMode */ false)); +// blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), +// blockExpand->block_y(), +// blockExpand->padding_y(), +// blockExpand->stride_y(), +// /* caffeMode */ false)); +// config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() +// * +// blockExpand->channels()); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "blockexpand", 100, false, useGpu); +// } +// } +// +// TEST(Layer, maxoutLayer) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("maxout"); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// MaxOutConfig* maxout = input->mutable_maxout_conf(); +// ImageConfig* image = maxout->mutable_image_conf(); +// +// image->set_img_size(32); +// image->set_img_size_y(32); +// image->set_channels(4); +// maxout->set_groups(2); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "maxout", 10, false, useGpu); +// } +// } +// void testFcLayer(string format, size_t nnz) { +// TestConfig config; +// config.biasSize = 4096; +// config.layerConfig.set_type("fc"); +// config.layerConfig.set_size(4096); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_drop_rate(0.1); +// +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); +// config.layerConfig.add_inputs(); +// +// LOG(INFO) << config.inputDefs[0].sparse.sparse << " " +// << config.inputDefs[0].sparse.format; +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, +// "fc", +// 100, +// /* trans */ false, +// useGpu, +// /* weight */ true); +// } +// } +// +// TEST(Layer, fcLayer) { +// testFcLayer("", 4096 * 4096 * 2); +// testFcLayer("csc", 4096 * 40); +// testFcLayer("csr", 4096 * 40); +// } +// +// TEST(Layer, SelectiveFullyConnectedLayer) { +// TestConfig config; +// size_t nin = 16; +// size_t nout = 256; +// config.layerConfig.set_type("selective_fc"); +// config.layerConfig.set_size(nout); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_has_selected_colums(true); +// config.layerConfig.set_selective_fc_pass_generation(false); +// config.biasSize = nout; +// +// config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back( +// {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", +// true)}); +// config.layerConfig.add_inputs(); +// +// testLayerGrad(config, +// "selective_fc", +// 100, +// /* trans= */ false, +// /* useGup= */ false, +// false); +// #ifndef PADDLE_ONLY_CPU +// testLayerGrad(config, +// "selective_fc", +// 100, +// /* trans= */ false, +// /* useGup= */ true, +// false); +// #endif +// } +// +// TEST(Layer, DataNormLayer) { +// TestConfig config; +// config.layerConfig.set_type("data_norm"); +// config.layerConfig.set_size(20); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); +// config.inputDefs.back().isStatic = true; +// config.layerConfig.add_inputs(); +// +// for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { +// config.layerConfig.set_data_norm_strategy(strategy); +// // The parameters are static, so not support GPU now +// testLayerGrad(config, +// "data_norm", +// 200, +// /* trans */ false, +// /* useGpu */ false); +// } +// } +// +// TEST(Layer, hsigmoidLayer) { +// TestConfig config; +// config.layerConfig.set_type("hsigmoid"); +// config.layerConfig.set_num_classes(5); +// config.layerConfig.set_size(1); +// config.biasSize = config.layerConfig.num_classes() - 1; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); +// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// // Not support GPU now +// testLayerGrad(config, +// "hsigmoid", +// 100, +// /* trans */ false, /* useGpu */ +// false); +// } +// +// TEST(Layer, multi_cross) { +// TestConfig config; +// config.layerConfig.set_type("multi-class-cross-entropy"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad( +// config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); +// } +// } +// +// TEST(Layer, multi_binary_label_sparse_mat) { +// TestConfig config; +// config.layerConfig.set_type("multi_binary_label_cross_entropy"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, +// 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, +// "multi_binary_label_cross_entropy", +// 100, +// /* trans */ false, +// useGpu); +// } +// } +// +// TEST(layer, multi_binary_label_id) { +// TestConfig config; +// config.layerConfig.set_type("multi_binary_label_cross_entropy"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, +// "multi_binary_label_cross_entropy", +// 100, +// /* trans */ false, +// useGpu); +// } +// } +// +// TEST(Layer, multi_cross_with_selfnorm) { +// TestConfig config; +// config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); +// config.layerConfig.set_softmax_selfnorm_alpha(0.1); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// // Not support GPU now +// testLayerGrad(config, +// "multi_class_cross_entropy_with_selfnorm", +// 100, +// /* trans */ false, +// /* useGpu */ false); +// } +// +// TEST(Layer, multi_cross_soft) { +// TestConfig config; +// config.layerConfig.set_type("soft_binary_class_cross_entropy"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, +// "soft_binary_class_cross_entropy", +// 100, +// /* trans */ false, +// useGpu); +// } +// } +// +// TEST(Layer, square_error) { +// TestConfig config; +// config.layerConfig.set_type("square_error"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); +// } +// } +// +// TEST(Layer, sparse_square_error) { +// TestConfig config; +// config.layerConfig.set_type("square_error"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, +// 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// // "GpuSparseMatrix" as label is not supported +// testLayerGrad(config, +// "square_error", +// 100, +// /* trans */ false, +// /* useGpu */ false); +// } +// +// TEST(Layer, sparse_float_square_error) { +// TestConfig config; +// config.layerConfig.set_type("square_error"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); +// config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, +// 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// // "GpuSparseMatrix" as label is not supported +// testLayerGrad(config, +// "square_error", +// 100, +// /* trans */ false, +// /* useGpu */ false); +// } +// +// TEST(Layer, square_error_weighted) { +// TestConfig config; +// config.layerConfig.set_type("square_error"); +// config.biasSize = 0; +// config.testAccumulate = false; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); +// } +// } +// +// TEST(Layer, huber_two_class) { +// TestConfig config; +// config.layerConfig.set_type("huber"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); +// } +// } +// +// void testExpandLayer(string trans_type, bool hasSubseq) { +// TestConfig config; +// config.layerConfig.set_type("expand"); +// +// config.inputDefs.push_back( +// {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, +// "layer_0", +// 10, +// 0}); +// config.inputDefs.push_back( +// {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, +// "layer_1", +// 10, +// 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.set_trans_type(trans_type); +// LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "expand", 30, false, useGpu); +// } +// } +// +// TEST(Layer, ExpandLayer) { +// testExpandLayer("non-seq", false); // non-seq expand to seq +// testExpandLayer("non-seq", true); // non-seq expand to hasSubseq +// testExpandLayer("seq", true); // seq expand to hasSubseq +// } +// +// void testDegradeLayer(bool hasSubseq, +// string layer_type, +// string trans_type, +// int stride) { +// TestConfig config; +// config.layerConfig.set_type(layer_type); +// config.layerConfig.set_size(10); +// config.layerConfig.set_seq_pool_stride(stride); +// config.biasSize = 0; +// +// config.inputDefs.push_back( +// {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, +// "layer_0", +// 10, +// 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.set_trans_type(trans_type); +// +// auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, layer_type, 100, false, useGpu); +// } +// }; +// +// if (layer_type == "average") { +// for (auto strategy : {"average", "sum", "squarerootn"}) { +// LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type +// << " average_strategy=" << strategy +// << " seq_pool_stride=" << stride; +// config.layerConfig.set_average_strategy(strategy); +// testDegradeLayerGrad(config, layer_type); +// } +// } else { +// LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type +// << " seq_pool_stride=" << stride; +// testDegradeLayerGrad(config, layer_type); +// } +// } +// +// TEST(Layer, MaxLayer) { +// testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq +// testDegradeLayer(false, +// "max", +// "non-seq", +// 5); // seq max to a shorten seq, stride window = 5 +// testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq +// testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq +// } +// +// TEST(Layer, SequenceLastInstanceLayer) { +// testDegradeLayer(false, +// "seqlastins", +// "non-seq", +// -1); // seq seqlastins to non-seq +// testDegradeLayer(false, +// "seqlastins", +// "non-seq", +// 5); // seq seqlastins to a shorten seq, stride window = 5 +// testDegradeLayer(true, +// "seqlastins", +// "non-seq", +// -1); // hasSubseq seqlastins to non-seq +// testDegradeLayer( +// true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq +// } +// +// TEST(Layer, AverageLayer) { +// testDegradeLayer(false, "average", "non-seq", -1); // seq average to +// non-seq +// testDegradeLayer(false, +// "average", +// "non-seq", +// 5); // seq average to a shorten seq, stride window = 5 +// testDegradeLayer( +// true, "average", "non-seq", -1); // hasSubseq average to +// non-seq +// testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq +// } +// +// TEST(Layer, SequenceConcatLayer) { +// TestConfig config; +// config.layerConfig.set_type("seqconcat"); +// config.layerConfig.set_size(10); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "seqconcat", 100, false, useGpu); +// } +// } +// +// TEST(Layer, SequenceReshapeLayer) { +// TestConfig config; +// config.layerConfig.set_type("seqreshape"); +// config.layerConfig.set_size(10); +// +// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "seqreshape", 100, false, useGpu); +// } +// } +// +// TEST(Layer, ConvShiftLayer) { +// TestConfig config; +// config.layerConfig.set_type("conv_shift"); +// config.layerConfig.set_size(10); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// // Not support GPU now +// testLayerGrad(config, "conv_shift", 100, false, false); +// } +// +// TEST(Layer, PowerLayer) { +// TestConfig config; +// config.layerConfig.set_type("power"); +// config.layerConfig.set_size(10); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "power", 100, false, useGpu); +// } +// } +// +// TEST(Layer, ConvexCombinationLayer) { +// TestConfig config; +// config.layerConfig.set_type("convex_comb"); +// config.layerConfig.set_size(20); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "convex_comb", 100, false, useGpu); +// } +// } +// +// TEST(Layer, InterpolationLayer) { +// TestConfig config; +// config.layerConfig.set_type("interpolation"); +// config.layerConfig.set_size(10); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "interpolation", 100, false, useGpu); +// } +// } +// +// TEST(Layer, OuterProdLayer) { +// TestConfig config; +// config.layerConfig.set_type("out_prod"); +// config.layerConfig.set_size(100); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "out_prod", 100, false, useGpu); +// } +// } +// +// TEST(Layer, SlopeInterceptLayer) { +// TestConfig config; +// config.layerConfig.set_type("slope_intercept"); +// config.layerConfig.set_size(10); +// config.layerConfig.set_slope(1.0); +// config.layerConfig.set_intercept(0.1); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "slope_intercept", 100, false, useGpu); +// } +// } +// +// TEST(Layer, ScalingLayer) { +// TestConfig config; +// config.layerConfig.set_type("scaling"); +// config.layerConfig.set_size(10); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.layerConfig.add_inputs(); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "scaling", 100, false, useGpu); +// } +// } +// +// void testNormLayer(const string& normType, bool trans, bool useGpu) { +// TestConfig config; +// config.layerConfig.set_type("norm"); +// config.layerConfig.set_active_type("relu"); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// NormConfig* norm = input->mutable_norm_conf(); +// norm->set_norm_type(normType); +// norm->set_channels(16); +// norm->set_size(5); +// norm->set_scale(0.001); +// norm->set_pow(0.75); +// norm->set_blocked(0); +// norm->set_img_size(14); +// norm->set_img_size_y(7); +// norm->set_output_x(norm->img_size()); +// norm->set_output_y(norm->img_size_y()); +// if (norm->norm_type() == "cmrnorm" || +// norm->norm_type() == "cmrnorm-projection") { +// norm->set_scale(norm->scale() / norm->size()); +// } else { +// norm->set_scale(norm->scale() / (norm->size() * norm->size())); +// } +// +// config.layerConfig.set_size(norm->output_x() * norm->output_y() * +// norm->channels()); +// config.biasSize = 0; +// +// testLayerGrad(config, "norm", 100, trans, useGpu); +// } +// +// TEST(Layer, NormLayer) { +// testNormLayer("cmrnorm-projection", +// /* trans= */ false, /* useGpu= */ +// true); +// testNormLayer("cmrnorm-projection", +// /* trans= */ false, /* useGpu= */ +// false); +// } +// +// void setPoolConfig(TestConfig* config, +// PoolConfig* pool, +// const string& poolType) { +// (*config).biasSize = 0; +// (*config).layerConfig.set_type("pool"); +// (*config).layerConfig.set_num_filters(16); +// +// int kw = 3, kh = 3; +// int pw = 0, ph = 0; +// int sw = 2, sh = 2; +// pool->set_pool_type(poolType); +// pool->set_channels(16); +// pool->set_size_x(kw); +// pool->set_size_y(kh); +// pool->set_start(0); +// pool->set_padding(pw); +// pool->set_padding_y(ph); +// pool->set_stride(sw); +// pool->set_stride_y(sh); +// +// int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); +// int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); +// pool->set_output_x(ow); +// pool->set_output_y(oh); +// } +// +// void testPoolLayer(const string& poolType, bool trans, bool useGpu) { +// TestConfig config; +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// PoolConfig* pool = input->mutable_pool_conf(); +// +// pool->set_img_size(14); +// pool->set_img_size_y(14); +// setPoolConfig(&config, pool, poolType); +// config.layerConfig.set_size(pool->output_x() * pool->output_y() * +// pool->channels()); +// +// testLayerGrad(config, "pool", 100, trans, useGpu); +// } +// +// #ifndef PADDLE_ONLY_CPU +// void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { +// TestConfig config; +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// PoolConfig* pool = input->mutable_pool_conf(); +// +// pool->set_size_y(4); +// pool->set_stride_y(3); +// pool->set_img_size(10); +// pool->set_img_size_y(20); +// setPoolConfig(&config, pool, poolType); +// pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / +// ((float)pool->stride_y()) + +// 1.5); +// config.layerConfig.set_size(pool->output_x() * pool->output_y() * +// pool->channels()); +// +// testLayerGrad(config, "pool", 100, trans, useGpu); +// } +// #endif +// +// TEST(Layer, PoolLayer) { +// testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); +// testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); +// +// #ifndef PADDLE_ONLY_CPU +// testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); +// testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); +// testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); +// testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); +// testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); +// testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); +// #endif +// } +// +// void testSppLayer(const string& poolType, +// const int pyramidHeight, +// bool trans, +// bool useGpu) { +// TestConfig config; +// config.layerConfig.set_type("spp"); +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// SppConfig* sppConfig = input->mutable_spp_conf(); +// sppConfig->set_pool_type(poolType); +// sppConfig->set_pyramid_height(pyramidHeight); +// ImageConfig* imageConfig = sppConfig->mutable_image_conf(); +// imageConfig->set_channels(16); +// imageConfig->set_img_size(10); +// imageConfig->set_img_size_y(20); +// int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); +// config.layerConfig.set_size(outputSize * imageConfig->channels()); +// testLayerGrad(config, "spp", 100, trans, useGpu); +// } +// +// TEST(Layer, SpatialPyramidPoolLayer) { +// for (auto useGpu : {false, true}) { +// for (auto pyramidHeight : {1, 2, 3}) { +// testSppLayer("avg-projection", pyramidHeight, false, useGpu); +// testSppLayer("max-projection", pyramidHeight, false, useGpu); +// } +// } +// } +// +// TEST(Layer, rankCostLayer) { +// TestConfig config; +// config.layerConfig.set_type("rank-cost"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "rank-cost", 100, false, useGpu); +// } +// } +// +// TEST(Layer, sumCostLayer) { +// TestConfig config; +// config.layerConfig.set_type("sum_cost"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "sum_cost", 100, false, useGpu); +// } +// } +// +// TEST(Layer, weightedRankCostLayer) { +// TestConfig config; +// config.layerConfig.set_type("rank-cost"); +// config.biasSize = 0; +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); +// } +// } +// +// TEST(Layer, TensorLayer) { +// TestConfig config; +// config.layerConfig.set_type("tensor"); +// config.layerConfig.set_size(10); +// config.layerConfig.set_active_type("sigmoid"); +// config.biasSize = config.layerConfig.size(); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "tensor", 100, false, useGpu); +// } +// } +// +// TEST(Layer, RecurrentLayer) { +// TestConfig config; +// config.layerConfig.set_type("recurrent"); +// config.layerConfig.set_size(4); +// config.layerConfig.set_active_type("tanh"); +// config.biasSize = 4; +// +// config.inputDefs.push_back( +// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// for (auto reversed : {false, true}) { +// config.layerConfig.set_reversed(reversed); +// config.testState = !reversed; +// testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); +// } +// } +// } +// +// TEST(Layer, LstmLayer) { +// TestConfig config; +// config.layerConfig.set_type("lstmemory"); +// config.layerConfig.set_size(4); +// config.layerConfig.set_active_type("tanh"); +// config.layerConfig.set_active_state_type("sigmoid"); +// config.layerConfig.set_active_gate_type("sigmoid"); +// config.biasSize = 28; +// +// config.inputDefs.push_back( +// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// for (auto reversed : {false, true}) { +// config.layerConfig.set_reversed(reversed); +// config.testState = !reversed; +// testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); +// } +// } +// for (auto useGpu : {true}) { +// config.testBatchState = true; +// config.layerConfig.set_reversed(false); +// testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); +// } +// } +// +// TEST(Layer, MDLstmLayer) { +// TestConfig config; +// config.layerConfig.set_type("mdlstmemory"); +// config.layerConfig.set_size(4); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_active_state_type("sigmoid"); +// config.layerConfig.set_active_gate_type("sigmoid"); +// config.biasSize = 4 * 9; +// +// config.inputDefs.push_back( +// {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_directions(true); +// config.layerConfig.add_directions(true); +// +// for (auto useGpu : {false, true}) { +// for (int i = 0; i < 2; i++) { +// for (int j = 0; j < 2; j++) { +// config.layerConfig.set_directions(0, bool(i)); +// config.layerConfig.set_directions(1, bool(j)); +// testLayerGrad(config, "mdlstmemory", 100, false, useGpu); +// } +// } +// } +// } +// +// TEST(Layer, ParameterReluLayer) { +// auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { +// TestConfig config; +// config.layerConfig.set_type("prelu"); +// config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); +// config.layerConfig.add_inputs(); +// config.layerConfig.set_size(inputSize); +// config.layerConfig.set_partial_sum(inputSize / +// channels); // size of feature map +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "prelu", 100, false, useGpu); +// } +// }; +// +// testParameterReluLayer(192, 1); +// testParameterReluLayer(192, 3); +// testParameterReluLayer(192, 192); +// } +// +// TEST(Layer, ResizeLayer) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("resize"); +// config.layerConfig.set_size(64); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "resize", 100, false, useGpu); +// } +// } +// +// TEST(Layer, RotateLayer) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("rotate"); +// const int CHANNEL = 2; +// const int HEIGHT = 8; +// const int WIDTH = 4; +// const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; +// config.layerConfig.set_size(INPUT_SIZE); +// config.layerConfig.set_height(HEIGHT); +// config.layerConfig.set_width(WIDTH); +// config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "rotate", 100, false, useGpu); +// } +// } +// +// TEST(Layer, NCELayer) { +// TestConfig config; +// size_t numClasses = 4; +// config.layerConfig.set_type("nce"); +// config.layerConfig.set_size(1); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_num_classes(numClasses); +// config.biasSize = numClasses; +// +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * +// numClasses}); +// config.inputDefs.push_back( +// {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto withWeight : {false, true}) { +// if (withWeight) { +// config.inputDefs.push_back( +// {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// } +// +// for (auto isIdLabel : {false, true}) { +// config.inputDefs[1] = { +// isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, +// "label", +// /* dim= */ numClasses, +// /* paraSize= */ 0}; +// +// for (auto withDist : {false, true}) { +// config.layerConfig.clear_neg_sampling_dist(); +// if (withDist) { +// double sum = 0; +// for (size_t i = 0; i < numClasses; ++i) { +// real p = rand(); // NOLINT use rand_r +// config.layerConfig.add_neg_sampling_dist(p); +// sum += p; +// } +// for (size_t i = 0; i < numClasses; ++i) { +// real p = config.layerConfig.neg_sampling_dist(i) / sum; +// config.layerConfig.set_neg_sampling_dist(i, p); +// } +// } +// LOG(INFO) << "NCELayer " +// << " isIdLabel=" << isIdLabel << " withWeight=" << +// withWeight +// << " withDist=" << withDist; +// // Not support GPU now +// testLayerGrad(config, +// "nce", +// 100, +// /* trans= */ false, +// /* useGpu */ false); +// } +// } +// } +// } +// +// TEST(Layer, GatedRecurrentLayer) { +// TestConfig config; +// config.layerConfig.set_type("gated_recurrent"); +// config.layerConfig.set_size(4); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_active_gate_type("sigmoid"); +// config.biasSize = 12; +// +// config.inputDefs.push_back( +// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// for (auto reversed : {false, true}) { +// config.layerConfig.set_reversed(reversed); +// config.testState = !reversed; +// testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, +// useGpu); +// } +// } +// } +// +// TEST(Layer, GruStepLayer) { +// TestConfig config; +// config.layerConfig.set_type("gru_step"); +// config.layerConfig.set_size(4); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_active_gate_type("sigmoid"); +// config.biasSize = 12; +// +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); +// } +// } +// +// TEST(Layer, LstmStepLayer) { +// TestConfig config; +// config.layerConfig.set_type("lstm_step"); +// config.layerConfig.set_size(4); +// config.layerConfig.set_active_type("sigmoid"); +// config.layerConfig.set_active_state_type("sigmoid"); +// config.layerConfig.set_active_gate_type("sigmoid"); +// config.biasSize = 12; +// config.testAccumulate = false; +// +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); +// } +// } +// +// void testBatchNormLayer(const string& type, bool trans, bool useGpu) { +// TestConfig config; +// const int CHANNELS = 10; +// const int IMG_SIZE = 16; +// const int IMG_SIZE_Y = 8; +// size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; +// config.layerConfig.set_type(type); +// config.layerConfig.set_size(size); +// config.layerConfig.set_active_type("sigmoid"); +// config.biasSize = CHANNELS; +// config.inputDefs.push_back({INPUT_DATA, +// "layer_0", +// /* dim= */ size, +// /* paraSize= */ CHANNELS}); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, +// CHANNELS}); +// config.inputDefs.back().isStatic = true; +// config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, +// CHANNELS}); +// config.inputDefs.back().isStatic = true; +// +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// ImageConfig* img_conf = input->mutable_image_conf(); +// img_conf->set_channels(CHANNELS); +// img_conf->set_img_size(IMG_SIZE); +// img_conf->set_img_size_y(IMG_SIZE_Y); +// +// testLayerGrad(config, +// "batch_norm", +// 64, +// /* trans= */ trans, +// useGpu, +// /* useWeight */ true); +// } +// +// TEST(Layer, BatchNormalizationLayer) { +// testBatchNormLayer("batch_norm", false, false); +// #ifndef PADDLE_ONLY_CPU +// testBatchNormLayer("batch_norm", false, true); +// if (hl_get_cudnn_lib_version() >= int(4000)) { +// testBatchNormLayer("cudnn_batch_norm", false, true); +// } +// #endif +// } +// +// void testConvOperator(bool isDeconv) { +// TestConfig config; +// const int NUM_FILTERS = 16; +// const int FILTER_SIZE = 2; +// const int FILTER_SIZE_Y = 3; +// const int CHANNELS = 3; +// const int IMAGE_SIZE = 16; +// const int IMAGE_SIZE_Y = 9; +// OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); +// if (isDeconv) { +// operatorConf.set_type("convt"); +// } else { +// operatorConf.set_type("conv"); +// } +// ConvConfig* conv = operatorConf.mutable_conv_conf(); +// operatorConf.set_num_filters(NUM_FILTERS); +// conv->set_filter_size(FILTER_SIZE); +// conv->set_filter_size_y(FILTER_SIZE_Y); +// conv->set_channels(CHANNELS); +// conv->set_padding(0); +// conv->set_padding_y(1); +// conv->set_stride(2); +// conv->set_stride_y(2); +// conv->set_groups(1); +// conv->set_img_size(IMAGE_SIZE); +// conv->set_img_size_y(IMAGE_SIZE_Y); +// conv->set_output_x(outputSize(conv->img_size(), +// conv->filter_size(), +// conv->padding(), +// conv->stride(), +// /* caffeMode */ true)); +// conv->set_output_y(outputSize(conv->img_size_y(), +// conv->filter_size_y(), +// conv->padding_y(), +// conv->stride_y(), +// /* caffeMode */ true)); +// +// if (isDeconv) { +// conv->set_filter_channels(NUM_FILTERS / conv->groups()); +// config.inputDefs.push_back({INPUT_DATA, +// "layer_0", +// conv->output_x() * conv->output_y() * +// CHANNELS, +// 0}); +// config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); +// } else { +// conv->set_filter_channels(conv->channels() / conv->groups()); +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); +// config.layerConfig.set_size(conv->output_x() * conv->output_y() * +// NUM_FILTERS); +// } +// +// config.inputDefs.push_back( +// {INPUT_DATA, +// "layer_1", +// FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, +// 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); +// } +// +// TEST(Operator, conv) { +// testConvOperator(/*isDeconv*/ true); +// testConvOperator(/*isDeconv*/ false); +// } +// +// TEST(Layer, FeatureMapExpandLayer) { +// TestConfig config; +// config.layerConfig.set_type("featmap_expand"); +// const int CHANNELS = 10; +// const int INPUT_SIZE = 100; +// config.layerConfig.set_size(INPUT_SIZE * CHANNELS); +// config.layerConfig.set_num_filters(CHANNELS); +// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, +// "layer_0", +// /* dim= */ INPUT_SIZE, +// /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// for (auto useGpu : {false, true}) { +// for (auto asRowVec : {false, true}) { +// config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : +// "as_col_vec"); +// testLayerGrad(config, +// "featmap_expand", +// /*batch_size*/ 100, +// /* trans= */ false, +// useGpu, +// /* useWeight */ true); +// } +// } +// } +// +// TEST(Layer, MultiplexLayer) { +// TestConfig config; +// const int LAYER_SIZE = 100; +// config.layerConfig.set_type("multiplex"); +// config.layerConfig.set_size(LAYER_SIZE); +// +// config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); +// } +// } +// +// TEST(Layer, PadLayer) { +// TestConfig config; +// config.biasSize = 0; +// config.layerConfig.set_type("pad"); +// +// int c = 4; +// int h = 31; +// int w = 36; +// size_t size = c * h * w; +// config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// PadConfig* pad = input->mutable_pad_conf(); +// ImageConfig* image = pad->mutable_image_conf(); +// +// image->set_channels(c); +// image->set_img_size(h); +// image->set_img_size_y(w); +// pad->add_pad_c(1); +// pad->add_pad_c(2); +// pad->add_pad_h(2); +// pad->add_pad_h(3); +// pad->add_pad_w(3); +// pad->add_pad_w(5); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "pad", 10, false, useGpu); +// } +// } +// +// TEST(Layer, CrossChannelNormLayer) { +// TestConfig config; +// config.paramInitialMean = 1.; +// config.paramInitialStd = 0.; +// config.layerConfig.set_type("norm"); +// config.layerConfig.set_size(100); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// NormConfig* norm = input->mutable_norm_conf(); +// norm->set_norm_type("cross-channel-norm"); +// norm->set_channels(10); +// norm->set_size(100); +// norm->set_scale(0); +// norm->set_pow(0); +// norm->set_blocked(0); +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); +// } +// } +// +// TEST(Layer, smooth_l1) { +// TestConfig config; +// config.layerConfig.set_type("smooth_l1"); +// +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); +// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); +// } +// } +// +// TEST(Layer, multibox_loss) { +// TestConfig config; +// config.layerConfig.set_type("multibox_loss"); +// config.biasSize = 0; +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); +// multiboxLoss->set_num_classes(21); +// multiboxLoss->set_input_num(1); +// multiboxLoss->set_overlap_threshold(0.5); +// multiboxLoss->set_neg_pos_ratio(3); +// multiboxLoss->set_neg_overlap(0.5); +// multiboxLoss->set_background_id(0); +// multiboxLoss->set_height(3); +// multiboxLoss->set_width(3); +// +// size_t gtNum = 1; +// MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); +// labelValue->randomizeUniform(); +// labelValue->add(-0.5); +// labelValue->sigmoid(*labelValue); +// real* labelData = labelValue->getData(); +// size_t labelWidth = labelValue->getWidth(); +// for (size_t i = 0; i < gtNum; ++i) { +// *(labelData + i * labelWidth) = std::rand() % 20 + 1; +// *(labelData + i * labelWidth + 1) = 0.400259; +// *(labelData + i * labelWidth + 2) = 0.377857; +// *(labelData + i * labelWidth + 3) = 0.525712; +// *(labelData + i * labelWidth + 4) = 0.519368; +// } +// vector seqStartPositions(gtNum + 1, 0); +// for (size_t i = 1; i <= gtNum; ++i) { +// seqStartPositions[i] = i; +// } +// +// // Ensure at lease one matched bbox +// MatrixPtr priorValue = Matrix::create(1, 72, false, false); +// priorValue->randomizeUniform(); +// priorValue->add(-0.5); +// priorValue->sigmoid(*priorValue); +// real* priorData = priorValue->getData(); +// *(priorData) = 0.424811; +// *(priorData + 1) = 0.397059; +// *(priorData + 2) = 0.538905; +// *(priorData + 3) = 0.447091; +// *(priorData + 4) = 0.425720; +// *(priorData + 5) = 0.515228; +// *(priorData + 6) = 0.519452; +// *(priorData + 7) = 0.591065; +// +// config.inputDefs.push_back( +// {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); +// config.inputDefs.push_back( +// {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); +// config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); +// config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); +// } +// } +// +// TEST(Layer, TransLayer) { +// TestConfig config; +// const int height = 128; +// const int width = 1028; +// config.layerConfig.set_type("trans"); +// config.layerConfig.set_size(width); +// +// config.inputDefs.push_back( +// {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); +// config.layerConfig.add_inputs(); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); +// } +// } +// +// TEST(Layer, RowConvLayer) { +// const int context = 3; +// const int size = 512; +// +// TestConfig config; +// config.layerConfig.set_type("row_conv"); +// config.layerConfig.set_size(size); +// config.layerConfig.set_active_type("sigmoid"); +// +// config.inputDefs.push_back( +// {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// RowConvConfig* conv = input->mutable_row_conv_conf(); +// conv->set_context_length(context); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "row_conv", 100, false, useGpu, false); +// } +// } +// +// TEST(Layer, CropLayer) { +// TestConfig config; +// // config input_0 +// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// ImageConfig* img = input->mutable_image_conf(); +// img->set_channels(4); +// img->set_img_size(16); +// config.layerConfig.set_axis(2); +// config.layerConfig.add_offset(0); +// config.layerConfig.add_offset(0); +// +// // config input_1 +// config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); +// input = config.layerConfig.add_inputs(); +// img = input->mutable_image_conf(); +// img->set_channels(2); +// img->set_img_size(8); +// +// // config crop layer +// config.layerConfig.set_type("crop"); +// config.layerConfig.set_name("cropLayer"); +// +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "crop", 100, false, useGpu, false); +// } +// } + +vector randSampling(real range, int n) { + CHECK_GE(range, n); + vector num(range); + iota(begin(num), end(num), 0.); + if (range == n) return num; + + random_shuffle(begin(num), end(num)); + num.resize(n); + sort(begin(num), end(num)); + return num; } TEST(Layer, SubNestedSequenceLayer) { - const int layerSize = 128; + // layer size is not crutial for this layer, + // so use a small layer size in unittest + const int layerSize = 8; + const int maxSeqNum = 5; + const int maxSeqLen = 5; + const int beamSize = 3; TestConfig config; config.layerConfig.set_type("sub_nested_seq"); - config.layerConfig.set_top_k(2); config.layerConfig.set_name("sub_nested_seq_layer"); config.layerConfig.set_size(layerSize); - // Generate the first input - srand((size_t)(time(NULL))); - const int batchSize = 128; - const int maxSeqLen = 100; - const int maxSubSeqNum = 50; - // sequenceStartPositioins info for the first input. - vector seqStartPos1(batchSize + 1, 0); - // subSequenceStartPositioins info for the first input. - vector subSeqStartPos; - subSeqStartPos.push_back(0); - - // sequenceStartPositioins info for the second input. - vector seqStartPos2(batchSize + 1, 0); - - size_t curPos = 0; - for (int i = 1; i < batchSize + 1; ++i) { - int seqNum = uniformRandom(maxSubSeqNum); - seqStartPos2[i] = seqStartPos2[i - 1] + seqNum; - for (int j = 0; j < seqNum; ++j) { - int seqLen = uniformRandom(maxSeqLen); - subSeqStartPos.push_back(curPos + seqLen); - curPos += seqLen; + // srand((size_t)(time(NULL))); + srand(1); + int seqNum = 1 + (rand() % maxSeqNum); + + // sequence information for the first input, it is a nested sequence + vector seqStartPos(seqNum + 1, 0); + vector subSeqStartPos(1, 0); + + // selected indices + MatrixPtr selectedIndices = Matrix::create(seqNum, beamSize, false, false); + selectedIndices->one(); + selectedIndices->mulScalar(-1.); + real* indicesData = selectedIndices->getData(); + + for (int i = 0; i < seqNum; ++i) { + int subSeqNum = 1 + (rand() % maxSeqNum); + for (int j = 0; j < subSeqNum; ++j) { + subSeqStartPos.push_back(subSeqStartPos.back() + + (1 + (rand() % maxSeqLen))); } - seqStartPos1[i] = curPos; + vector selSeqs = + randSampling(static_cast(subSeqNum), min(beamSize, subSeqNum)); + memcpy(indicesData + (i * beamSize), + selSeqs.data(), + selSeqs.size() * sizeof(real)); + seqStartPos[i + 1] = subSeqStartPos.back(); } - MatrixPtr dataInputPtr1 = Matrix::create(curPos, layerSize, false, false); - dataInputPtr1->randomizeUniform(); + MatrixPtr seqInputPtr = + Matrix::create(seqStartPos.back(), layerSize, false, false); config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, - "layer_0", - dataInputPtr1, - seqStartPos1, + "nested_seq_input", + seqInputPtr, + seqStartPos, subSeqStartPos}); config.layerConfig.add_inputs(); - - // Generate the second input - MatrixPtr dataInputPtr2 = - Matrix::create(seqStartPos2[batchSize], 1, false, false); - dataInputPtr2->randomizeUniform(); config.inputDefs.push_back( - {INPUT_SELF_DEFINE_DATA, "layer_1", dataInputPtr2, seqStartPos2}); + {INPUT_SELF_DEFINE_DATA, "selected_indices", selectedIndices}); config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { testLayerGrad(config, "sub_nested_seq", - /* batchSize */ 100, + /* batchSize */ seqNum, /* trans */ false, /* useGpu*/ useGpu, /* useWeight */ false); } } -TEST(Layer, ClipLayer) { - const size_t batchSize = 128; - const size_t size = 512; - TestConfig config; - config.layerConfig.set_type("clip"); - config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); - LayerInputConfig* input = config.layerConfig.add_inputs(); - ClipConfig* layerConf = input->mutable_clip_conf(); - double p1 = std::rand() / (double)RAND_MAX; - double p2 = std::rand() / (double)RAND_MAX; - layerConf->set_min(std::min(p1, p2)); - layerConf->set_max(std::max(p1, p2)); - for (auto useGpu : {false, true}) { - testLayerGrad(config, "clip", batchSize, false, useGpu, false); - } -} - -TEST(Layer, RowL2NormLayer) { - const size_t batchSize = 128; - const size_t size = 512; - TestConfig config; - config.layerConfig.set_type("row_l2_norm"); - config.layerConfig.set_size(size); - config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); - config.layerConfig.add_inputs(); - for (auto useGpu : {false, true}) { - testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); - } -} +// TEST(Layer, ClipLayer) { +// const size_t batchSize = 128; +// const size_t size = 512; +// TestConfig config; +// config.layerConfig.set_type("clip"); +// config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); +// LayerInputConfig* input = config.layerConfig.add_inputs(); +// ClipConfig* layerConf = input->mutable_clip_conf(); +// double p1 = std::rand() / (double)RAND_MAX; +// double p2 = std::rand() / (double)RAND_MAX; +// layerConf->set_min(std::min(p1, p2)); +// layerConf->set_max(std::max(p1, p2)); +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "clip", batchSize, false, useGpu, false); +// } +// } +// +// TEST(Layer, RowL2NormLayer) { +// const size_t batchSize = 128; +// const size_t size = 512; +// TestConfig config; +// config.layerConfig.set_type("row_l2_norm"); +// config.layerConfig.set_size(size); +// config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); +// config.layerConfig.add_inputs(); +// for (auto useGpu : {false, true}) { +// testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); +// } +// } int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); From ffafc5c911c38ff1245d21c73b1bb7936df490f7 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 7 Aug 2017 08:54:18 +0800 Subject: [PATCH 16/44] fix the SubNestedSequenceLayer implementations. --- .../gserver/layers/SubNestedSequenceLayer.cpp | 88 +- paddle/gserver/tests/test_LayerGrad.cpp | 3820 ++++++++--------- .../paddle/trainer_config_helpers/layers.py | 6 +- 3 files changed, 1982 insertions(+), 1932 deletions(-) diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 443396a14d..f875fdea45 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -31,16 +31,22 @@ public: void backward(const UpdateCallback& callback = nullptr) override; private: - void calSelectedCols(const MatrixPtr scores, - const int* seqStartPos, - const int* subSeqStartPos); + void reorganizeSeqInfo(const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos); + void calSelectedCols(const MatrixPtr selectedIndices, + const std::vector> inputSeqInfo); void buildOutputSeqInfo(); std::vector outSeqStartInfo_; std::vector outSubSeqStartInfo_; - MatrixPtr scoreOverInputSeq_; + // if the second input of this layer is on GPU memory, copy it to CPU memory. + MatrixPtr selIdsCpu_; + // reorganize sequenceStartPositions and subSequenceStartPositions altogether + // into a 2d vector to facilitate the sequence selection process. + std::vector> inputSeqInfo_; + // the final seleted row indices in a batch, // rowIdx_ and selectedRows_ actually share a same memory. IVectorPtr rowIndice_; std::vector selectedRows_; @@ -57,12 +63,47 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::calSelectedCols(const MatrixPtr selected_indices, - const int* seqStartPos, - const int* subSeqStartPos) { +void SubNestedSequenceLayer::reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos) { + int* seqStarts = seqStartPos->getMutableData(false); + int* subSeqStarts = subSeqStartPos->getMutableData(false); + + int seqNum = seqStartPos->getSize() - 1; + inputSeqInfo_.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { + inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]); + } + } +} + +void SubNestedSequenceLayer::calSelectedCols( + const MatrixPtr selectedIndices, + const std::vector> inputSeqInfo) { selectedRows_.clear(); outSubSeqStartInfo_.resize(1, 0); outSeqStartInfo_.resize(1, 0); + + size_t seqNum = selectedIndices->getHeight(); + size_t beamSize = selectedIndices->getWidth(); + for (size_t i = 0; i < seqNum; ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (selectedIndices->getElement(i, j) == -1.) break; + int selSubSeqIdx = selectedIndices->getElement(i, j); + CHECK_GT(inputSeqInfo_[i].size() - 1, selSubSeqIdx); + + size_t subSeqLen = + inputSeqInfo_[i][selSubSeqIdx + 1] - inputSeqInfo_[i][selSubSeqIdx]; + for (size_t k = 0; k < subSeqLen; ++k) + selectedRows_.push_back(inputSeqInfo_[i][selSubSeqIdx] + k); + outSubSeqStartInfo_.push_back(outSubSeqStartInfo_.back() + subSeqLen); + } + outSeqStartInfo_.push_back(outSubSeqStartInfo_.back()); + } } void SubNestedSequenceLayer::buildOutputSeqInfo() { @@ -83,17 +124,35 @@ void SubNestedSequenceLayer::forward(PassType passType) { Layer::forward(passType); const Argument& inputSeq = getInput(0); - const MatrixPtr selected_indices = getInputValue(1); CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " << "must be a nested sequence."; - CHECK_EQ(inputSeq.getNumSequences(), selected_indices->getHeight()); - - calSelectedCols(selected_indices, - inputSeq.sequenceStartPositions->getMutableData(false), - inputSeq.subSequenceStartPositions->getMutableData(false)); + const MatrixPtr selectedIndices = getInputValue(1); + CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); + + if (dynamic_cast(selectedIndices.get())) { + /* + * Currently, the second input for this layer generated by + * kmax_sequence_score_layer whose output is always stored on CPU, + * or a data_layer which canbe on GPU. + * + * If the second input is on GPU, copy it to CPU memory, because this + * input always uses very few memory, and operations related to it are + * all logic control, not computations. + */ + Matrix::resizeOrCreate(selIdsCpu_, + selectedIndices->getHeight(), + selectedIndices->getWidth(), + false /* trans */, + false /* useGpu */); + selIdsCpu_->copyFrom(*selectedIndices); + } else { + selIdsCpu_ = selectedIndices; + } + reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions); + calSelectedCols(selIdsCpu_, inputSeqInfo_); resetOutput(selectedRows_.size(), getSize()); - buildOutputSeqInfo(); if (useGpu_) { rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); @@ -103,6 +162,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); } + buildOutputSeqInfo(); getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); } diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index da546b979e..0f312b6ca5 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -32,1887 +32,1872 @@ DECLARE_double(checkgrad_eps); DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(prev_batch_state); -// TEST(Operator, dot_mul) { -// TestConfig config; -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); -// operatorConf.set_type("dot_mul"); -// operatorConf.set_dotmul_scale(-1); -// -// testOperatorGrad(config, operatorConf, 100, false, false); -// } -// -// TEST(Projection, context) { -// for (auto contextStart : {-5, -3, -1, 0, 3}) { -// for (auto contextLength : {1, 2, 5, 7}) { -// for (auto batchSize : {1, 2, 5, 20, 50}) { -// for (auto trainablePadding : {false, true}) { -// LOG(INFO) << " contextStart=" << contextStart -// << " contextLength=" << contextLength -// << " batchSize=" << batchSize -// << " trainablePadding=" << trainablePadding; -// ProjectionConfig conf; -// conf.set_type("context"); -// conf.set_input_size(10); -// conf.set_context_start(contextStart); -// conf.set_context_length(contextLength); -// conf.set_trainable_padding(trainablePadding); -// conf.set_output_size(conf.context_length() * conf.input_size()); -// int pad = -// std::max(0, -conf.context_start()) + -// std::max(0, conf.context_start() + conf.context_length() - 1); -// for (auto useGpu : {false, true}) { -// testProjectionGrad( -// conf, -// INPUT_SEQUENCE_DATA, -// trainablePadding ? conf.input_size() * pad : 0, -// batchSize, -// useGpu, -// contextStart + contextLength <= 1); // = testState -// } -// } -// } -// } -// } -// } -// -// TEST(Projection, trans_fc) { -// ProjectionConfig conf; -// conf.set_type("trans_fc"); -// conf.set_input_size(50); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 1000, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, fc) { -// ProjectionConfig conf; -// conf.set_type("fc"); -// conf.set_input_size(10); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 200, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, dot_mul) { -// ProjectionConfig conf; -// conf.set_type("dot_mul"); -// conf.set_input_size(20); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 20, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, table) { -// ProjectionConfig conf; -// conf.set_type("table"); -// conf.set_input_size(10); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_LABEL, -// /* parameterSize */ 200, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, identity) { -// ProjectionConfig conf; -// conf.set_type("identity"); -// conf.set_input_size(10); -// conf.set_output_size(10); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 0, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, slice) { -// ProjectionConfig conf; -// conf.set_type("slice"); -// conf.set_input_size(100); -// SliceConfig& slice1 = *conf.add_slices(); -// slice1.set_start(10); -// slice1.set_end(20); -// SliceConfig& slice2 = *conf.add_slices(); -// slice2.set_start(50); -// slice2.set_end(70); -// conf.set_output_size(30); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 0, -// /* batchSize */ 10, -// useGpu); -// } -// } -// -// TEST(Projection, scaling) { -// ProjectionConfig conf; -// conf.set_type("scaling"); -// conf.set_input_size(10); -// conf.set_output_size(10); -// for (auto useGpu : {false}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 1, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// void testProjectionConv(size_t groups, bool isDeconv) { -// const int NUM_FILTERS = 18; -// const int FILTER_SIZE = 2; -// const int FILTER_SIZE_Y = 4; -// const int CHANNELS = 3; -// const int IMAGE_SIZE = 16; -// -// ProjectionConfig conf; -// if (isDeconv) { -// conf.set_type("convt"); -// } else { -// conf.set_type("conv"); -// } -// conf.set_num_filters(NUM_FILTERS); -// -// ConvConfig* conv = conf.mutable_conv_conf(); -// conv->set_filter_size(FILTER_SIZE); -// conv->set_filter_size_y(FILTER_SIZE_Y); -// conv->set_channels(CHANNELS); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(groups); -// if (isDeconv) { -// conv->set_filter_channels(NUM_FILTERS / conv->groups()); -// } else { -// conv->set_filter_channels(conv->channels() / conv->groups()); -// } -// conv->set_img_size(IMAGE_SIZE); -// int output_x = outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true); -// int output_y = outputSize(conv->img_size(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true); -// conv->set_output_x(output_x); -// conv->set_output_y(output_y); -// if (isDeconv) { -// conf.set_input_size(output_x * output_y * CHANNELS); -// conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); -// } else { -// conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); -// conf.set_output_size(output_x * output_y * NUM_FILTERS); -// } -// -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE -// * -// FILTER_SIZE_Y / groups, -// /* batchSize */ 100, -// true, -// false, -// NUM_FILTERS, -// true); -// } -// -// #ifndef PADDLE_ONLY_CPU -// TEST(Projection, conv) { -// /// test ConvProjection -// testProjectionConv(1, false); -// testProjectionConv(3, false); -// /// test ConvTransProjection -// testProjectionConv(1, true); -// testProjectionConv(3, true); -// } -// #endif -// -// TEST(Layer, BilinearInterpLayer) { -// TestConfig config; -// config.layerConfig.set_type("bilinear_interp"); -// config.biasSize = 0; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); -// -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); -// ImageConfig* image = bilinear->mutable_image_conf(); -// image->set_img_size(32); -// image->set_img_size_y(32); -// image->set_channels(4); -// -// for (auto useGpu : {false, true}) { -// for (auto outSize : {32, 64}) { -// bilinear->set_out_size_x(outSize); -// bilinear->set_out_size_y(outSize); -// testLayerGrad(config, "bilinear_interp", 10, false, useGpu); -// } -// } -// } -// -// TEST(Layer, concat) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("concat"); -// config.layerConfig.set_size(15); -// config.layerConfig.set_active_type("sigmoid"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "concat", 100, false, useGpu); -// } -// } -// -// TEST(Layer, AddtoLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("addto"); -// config.layerConfig.set_size(10); -// config.layerConfig.set_active_type("sigmoid"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "addto", 100, false, useGpu); -// } -// } -// -// TEST(Layer, CTCLayer) { -// TestConfig config; -// config.layerConfig.set_type("ctc"); -// config.layerConfig.set_norm_by_times(false); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "ctc", -// 100, -// /* trans */ false, /* useGpu */ -// useGpu); -// } -// } -// -// TEST(Layer, cosSimLayer) { -// TestConfig config; -// config.layerConfig.set_type("cos"); -// config.layerConfig.set_size(1); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "cos", 100, false, useGpu); -// } -// } -// -// TEST(Layer, CosSimVecMatLayer) { -// TestConfig config; -// config.layerConfig.set_type("cos_vm"); -// config.layerConfig.set_size(5); // output size -// config.layerConfig.set_cos_scale(2.0); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "cos_vm", 100, false, useGpu); -// } -// } -// -// void testDepthwiseConvLayer(const string& type, bool useGpu) { -// TestConfig config; -// config.biasSize = 32; -// config.layerConfig.set_type(type); -// config.layerConfig.set_num_filters(32); -// config.layerConfig.set_partial_sum(1); -// config.layerConfig.set_shared_biases(true); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ConvConfig* conv = input->mutable_conv_conf(); -// conv->set_filter_size(2); -// conv->set_filter_size_y(3); -// conv->set_channels(16); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(16); -// conv->set_filter_channels(conv->channels() / conv->groups()); -// conv->set_img_size(16); -// conv->set_img_size_y(8); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// conv->set_output_y(outputSize(conv->img_size_y(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true)); -// config.layerConfig.set_size(conv->output_x() * conv->output_y() * -// config.layerConfig.num_filters()); -// -// testLayerGrad(config, "depthwise_conv", 100, false, useGpu); -// // Use small batch_size and useWeight=true to test biasGrad -// testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); -// } -// -// TEST(Layer, depthwiseConvLayer) { -// // 'depthwise_conv' is a sepecial case of 'exconv' whose -// // groups size equals to the input channels size. -// testDepthwiseConvLayer("exconv", /* useGpu= */ false); -// #ifndef PADDLE_ONLY_CPU -// testDepthwiseConvLayer("exconv", /* useGpu= */ true); -// #endif -// } -// -// void testConvLayer(const string& type, bool trans, bool useGpu) { -// TestConfig config; -// config.biasSize = 16; -// config.layerConfig.set_type(type); -// config.layerConfig.set_num_filters(16); -// config.layerConfig.set_partial_sum(1); -// config.layerConfig.set_shared_biases(true); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ConvConfig* conv = input->mutable_conv_conf(); -// conv->set_filter_size(2); -// conv->set_filter_size_y(3); -// conv->set_channels(3); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(1); -// conv->set_filter_channels(conv->channels() / conv->groups()); -// conv->set_img_size(16); -// conv->set_img_size_y(8); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// conv->set_output_y(outputSize(conv->img_size_y(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true)); -// config.layerConfig.set_size(conv->output_x() * conv->output_y() * -// config.layerConfig.num_filters()); -// -// testLayerGrad(config, "conv", 100, trans, useGpu); -// // Use small batch_size and useWeight=true to test biasGrad -// testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); -// } -// -// TEST(Layer, convLayer) { -// testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); -// #ifndef PADDLE_ONLY_CPU -// testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); -// testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); -// #endif -// } -// -// void testConvTransLayer(const string& type, bool trans, bool useGpu) { -// TestConfig config; -// config.biasSize = 3; -// config.layerConfig.set_type(type); -// config.layerConfig.set_num_filters(3); -// config.layerConfig.set_partial_sum(1); -// config.layerConfig.set_shared_biases(true); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ConvConfig* conv = input->mutable_conv_conf(); -// conv->set_filter_size(2); -// conv->set_filter_size_y(4); -// conv->set_channels(16); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(1); -// conv->set_filter_channels(3 / conv->groups()); -// conv->set_img_size(16); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// -// config.layerConfig.set_size(conv->img_size() * conv->img_size() * -// config.layerConfig.num_filters()); -// -// testLayerGrad(config, "convTrans", 100, trans, useGpu); -// // Use small batch_size and useWeight=true to test biasGrad -// testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); -// } -// -// TEST(Layer, convTransLayer) { -// for (auto useGpu : {false, true}) { -// testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); -// } -// #ifndef PADDLE_ONLY_CPU -// testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); -// #endif -// } -// -// TEST(Layer, blockExpandLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("blockexpand"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); -// blockExpand->set_img_size_x(64); -// blockExpand->set_img_size_y(32); -// blockExpand->set_channels(3); -// blockExpand->set_padding_x(0); -// blockExpand->set_padding_y(0); -// blockExpand->set_block_x(4); -// blockExpand->set_block_y(32); -// blockExpand->set_stride_x(2); -// blockExpand->set_stride_y(2); -// blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), -// blockExpand->block_x(), -// blockExpand->padding_x(), -// blockExpand->stride_x(), -// /* caffeMode */ false)); -// blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), -// blockExpand->block_y(), -// blockExpand->padding_y(), -// blockExpand->stride_y(), -// /* caffeMode */ false)); -// config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() -// * -// blockExpand->channels()); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "blockexpand", 100, false, useGpu); -// } -// } -// -// TEST(Layer, maxoutLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("maxout"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// MaxOutConfig* maxout = input->mutable_maxout_conf(); -// ImageConfig* image = maxout->mutable_image_conf(); -// -// image->set_img_size(32); -// image->set_img_size_y(32); -// image->set_channels(4); -// maxout->set_groups(2); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "maxout", 10, false, useGpu); -// } -// } -// void testFcLayer(string format, size_t nnz) { -// TestConfig config; -// config.biasSize = 4096; -// config.layerConfig.set_type("fc"); -// config.layerConfig.set_size(4096); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_drop_rate(0.1); -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); -// config.layerConfig.add_inputs(); -// -// LOG(INFO) << config.inputDefs[0].sparse.sparse << " " -// << config.inputDefs[0].sparse.format; -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "fc", -// 100, -// /* trans */ false, -// useGpu, -// /* weight */ true); -// } -// } -// -// TEST(Layer, fcLayer) { -// testFcLayer("", 4096 * 4096 * 2); -// testFcLayer("csc", 4096 * 40); -// testFcLayer("csr", 4096 * 40); -// } -// -// TEST(Layer, SelectiveFullyConnectedLayer) { -// TestConfig config; -// size_t nin = 16; -// size_t nout = 256; -// config.layerConfig.set_type("selective_fc"); -// config.layerConfig.set_size(nout); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_has_selected_colums(true); -// config.layerConfig.set_selective_fc_pass_generation(false); -// config.biasSize = nout; -// -// config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back( -// {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", -// true)}); -// config.layerConfig.add_inputs(); -// -// testLayerGrad(config, -// "selective_fc", -// 100, -// /* trans= */ false, -// /* useGup= */ false, -// false); -// #ifndef PADDLE_ONLY_CPU -// testLayerGrad(config, -// "selective_fc", -// 100, -// /* trans= */ false, -// /* useGup= */ true, -// false); -// #endif -// } -// -// TEST(Layer, DataNormLayer) { -// TestConfig config; -// config.layerConfig.set_type("data_norm"); -// config.layerConfig.set_size(20); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); -// config.inputDefs.back().isStatic = true; -// config.layerConfig.add_inputs(); -// -// for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { -// config.layerConfig.set_data_norm_strategy(strategy); -// // The parameters are static, so not support GPU now -// testLayerGrad(config, -// "data_norm", -// 200, -// /* trans */ false, -// /* useGpu */ false); -// } -// } -// -// TEST(Layer, hsigmoidLayer) { -// TestConfig config; -// config.layerConfig.set_type("hsigmoid"); -// config.layerConfig.set_num_classes(5); -// config.layerConfig.set_size(1); -// config.biasSize = config.layerConfig.num_classes() - 1; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // Not support GPU now -// testLayerGrad(config, -// "hsigmoid", -// 100, -// /* trans */ false, /* useGpu */ -// false); -// } -// -// TEST(Layer, multi_cross) { -// TestConfig config; -// config.layerConfig.set_type("multi-class-cross-entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad( -// config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); -// } -// } -// -// TEST(Layer, multi_binary_label_sparse_mat) { -// TestConfig config; -// config.layerConfig.set_type("multi_binary_label_cross_entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "multi_binary_label_cross_entropy", -// 100, -// /* trans */ false, -// useGpu); -// } -// } -// -// TEST(layer, multi_binary_label_id) { -// TestConfig config; -// config.layerConfig.set_type("multi_binary_label_cross_entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "multi_binary_label_cross_entropy", -// 100, -// /* trans */ false, -// useGpu); -// } -// } -// -// TEST(Layer, multi_cross_with_selfnorm) { -// TestConfig config; -// config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); -// config.layerConfig.set_softmax_selfnorm_alpha(0.1); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // Not support GPU now -// testLayerGrad(config, -// "multi_class_cross_entropy_with_selfnorm", -// 100, -// /* trans */ false, -// /* useGpu */ false); -// } -// -// TEST(Layer, multi_cross_soft) { -// TestConfig config; -// config.layerConfig.set_type("soft_binary_class_cross_entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "soft_binary_class_cross_entropy", -// 100, -// /* trans */ false, -// useGpu); -// } -// } -// -// TEST(Layer, square_error) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); -// } -// } -// -// TEST(Layer, sparse_square_error) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // "GpuSparseMatrix" as label is not supported -// testLayerGrad(config, -// "square_error", -// 100, -// /* trans */ false, -// /* useGpu */ false); -// } -// -// TEST(Layer, sparse_float_square_error) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // "GpuSparseMatrix" as label is not supported -// testLayerGrad(config, -// "square_error", -// 100, -// /* trans */ false, -// /* useGpu */ false); -// } -// -// TEST(Layer, square_error_weighted) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// config.testAccumulate = false; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); -// } -// } -// -// TEST(Layer, huber_two_class) { -// TestConfig config; -// config.layerConfig.set_type("huber"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); -// } -// } -// -// void testExpandLayer(string trans_type, bool hasSubseq) { -// TestConfig config; -// config.layerConfig.set_type("expand"); -// -// config.inputDefs.push_back( -// {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, -// "layer_0", -// 10, -// 0}); -// config.inputDefs.push_back( -// {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, -// "layer_1", -// 10, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.set_trans_type(trans_type); -// LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "expand", 30, false, useGpu); -// } -// } -// -// TEST(Layer, ExpandLayer) { -// testExpandLayer("non-seq", false); // non-seq expand to seq -// testExpandLayer("non-seq", true); // non-seq expand to hasSubseq -// testExpandLayer("seq", true); // seq expand to hasSubseq -// } -// -// void testDegradeLayer(bool hasSubseq, -// string layer_type, -// string trans_type, -// int stride) { -// TestConfig config; -// config.layerConfig.set_type(layer_type); -// config.layerConfig.set_size(10); -// config.layerConfig.set_seq_pool_stride(stride); -// config.biasSize = 0; -// -// config.inputDefs.push_back( -// {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, -// "layer_0", -// 10, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.set_trans_type(trans_type); -// -// auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, layer_type, 100, false, useGpu); -// } -// }; -// -// if (layer_type == "average") { -// for (auto strategy : {"average", "sum", "squarerootn"}) { -// LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type -// << " average_strategy=" << strategy -// << " seq_pool_stride=" << stride; -// config.layerConfig.set_average_strategy(strategy); -// testDegradeLayerGrad(config, layer_type); -// } -// } else { -// LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type -// << " seq_pool_stride=" << stride; -// testDegradeLayerGrad(config, layer_type); -// } -// } -// -// TEST(Layer, MaxLayer) { -// testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq -// testDegradeLayer(false, -// "max", -// "non-seq", -// 5); // seq max to a shorten seq, stride window = 5 -// testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq -// testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq -// } -// -// TEST(Layer, SequenceLastInstanceLayer) { -// testDegradeLayer(false, -// "seqlastins", -// "non-seq", -// -1); // seq seqlastins to non-seq -// testDegradeLayer(false, -// "seqlastins", -// "non-seq", -// 5); // seq seqlastins to a shorten seq, stride window = 5 -// testDegradeLayer(true, -// "seqlastins", -// "non-seq", -// -1); // hasSubseq seqlastins to non-seq -// testDegradeLayer( -// true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq -// } -// -// TEST(Layer, AverageLayer) { -// testDegradeLayer(false, "average", "non-seq", -1); // seq average to -// non-seq -// testDegradeLayer(false, -// "average", -// "non-seq", -// 5); // seq average to a shorten seq, stride window = 5 -// testDegradeLayer( -// true, "average", "non-seq", -1); // hasSubseq average to -// non-seq -// testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq -// } -// -// TEST(Layer, SequenceConcatLayer) { -// TestConfig config; -// config.layerConfig.set_type("seqconcat"); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "seqconcat", 100, false, useGpu); -// } -// } -// -// TEST(Layer, SequenceReshapeLayer) { -// TestConfig config; -// config.layerConfig.set_type("seqreshape"); -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "seqreshape", 100, false, useGpu); -// } -// } -// -// TEST(Layer, ConvShiftLayer) { -// TestConfig config; -// config.layerConfig.set_type("conv_shift"); -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // Not support GPU now -// testLayerGrad(config, "conv_shift", 100, false, false); -// } -// -// TEST(Layer, PowerLayer) { -// TestConfig config; -// config.layerConfig.set_type("power"); -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "power", 100, false, useGpu); -// } -// } -// -// TEST(Layer, ConvexCombinationLayer) { -// TestConfig config; -// config.layerConfig.set_type("convex_comb"); -// config.layerConfig.set_size(20); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "convex_comb", 100, false, useGpu); -// } -// } -// -// TEST(Layer, InterpolationLayer) { -// TestConfig config; -// config.layerConfig.set_type("interpolation"); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "interpolation", 100, false, useGpu); -// } -// } -// -// TEST(Layer, OuterProdLayer) { -// TestConfig config; -// config.layerConfig.set_type("out_prod"); -// config.layerConfig.set_size(100); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "out_prod", 100, false, useGpu); -// } -// } -// -// TEST(Layer, SlopeInterceptLayer) { -// TestConfig config; -// config.layerConfig.set_type("slope_intercept"); -// config.layerConfig.set_size(10); -// config.layerConfig.set_slope(1.0); -// config.layerConfig.set_intercept(0.1); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "slope_intercept", 100, false, useGpu); -// } -// } -// -// TEST(Layer, ScalingLayer) { -// TestConfig config; -// config.layerConfig.set_type("scaling"); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "scaling", 100, false, useGpu); -// } -// } -// -// void testNormLayer(const string& normType, bool trans, bool useGpu) { -// TestConfig config; -// config.layerConfig.set_type("norm"); -// config.layerConfig.set_active_type("relu"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// NormConfig* norm = input->mutable_norm_conf(); -// norm->set_norm_type(normType); -// norm->set_channels(16); -// norm->set_size(5); -// norm->set_scale(0.001); -// norm->set_pow(0.75); -// norm->set_blocked(0); -// norm->set_img_size(14); -// norm->set_img_size_y(7); -// norm->set_output_x(norm->img_size()); -// norm->set_output_y(norm->img_size_y()); -// if (norm->norm_type() == "cmrnorm" || -// norm->norm_type() == "cmrnorm-projection") { -// norm->set_scale(norm->scale() / norm->size()); -// } else { -// norm->set_scale(norm->scale() / (norm->size() * norm->size())); -// } -// -// config.layerConfig.set_size(norm->output_x() * norm->output_y() * -// norm->channels()); -// config.biasSize = 0; -// -// testLayerGrad(config, "norm", 100, trans, useGpu); -// } -// -// TEST(Layer, NormLayer) { -// testNormLayer("cmrnorm-projection", -// /* trans= */ false, /* useGpu= */ -// true); -// testNormLayer("cmrnorm-projection", -// /* trans= */ false, /* useGpu= */ -// false); -// } -// -// void setPoolConfig(TestConfig* config, -// PoolConfig* pool, -// const string& poolType) { -// (*config).biasSize = 0; -// (*config).layerConfig.set_type("pool"); -// (*config).layerConfig.set_num_filters(16); -// -// int kw = 3, kh = 3; -// int pw = 0, ph = 0; -// int sw = 2, sh = 2; -// pool->set_pool_type(poolType); -// pool->set_channels(16); -// pool->set_size_x(kw); -// pool->set_size_y(kh); -// pool->set_start(0); -// pool->set_padding(pw); -// pool->set_padding_y(ph); -// pool->set_stride(sw); -// pool->set_stride_y(sh); -// -// int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); -// int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); -// pool->set_output_x(ow); -// pool->set_output_y(oh); -// } -// -// void testPoolLayer(const string& poolType, bool trans, bool useGpu) { -// TestConfig config; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// PoolConfig* pool = input->mutable_pool_conf(); -// -// pool->set_img_size(14); -// pool->set_img_size_y(14); -// setPoolConfig(&config, pool, poolType); -// config.layerConfig.set_size(pool->output_x() * pool->output_y() * -// pool->channels()); -// -// testLayerGrad(config, "pool", 100, trans, useGpu); -// } -// -// #ifndef PADDLE_ONLY_CPU -// void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { -// TestConfig config; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// PoolConfig* pool = input->mutable_pool_conf(); -// -// pool->set_size_y(4); -// pool->set_stride_y(3); -// pool->set_img_size(10); -// pool->set_img_size_y(20); -// setPoolConfig(&config, pool, poolType); -// pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / -// ((float)pool->stride_y()) + -// 1.5); -// config.layerConfig.set_size(pool->output_x() * pool->output_y() * -// pool->channels()); -// -// testLayerGrad(config, "pool", 100, trans, useGpu); -// } -// #endif -// -// TEST(Layer, PoolLayer) { -// testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); -// testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); -// -// #ifndef PADDLE_ONLY_CPU -// testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); -// #endif -// } -// -// void testSppLayer(const string& poolType, -// const int pyramidHeight, -// bool trans, -// bool useGpu) { -// TestConfig config; -// config.layerConfig.set_type("spp"); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// SppConfig* sppConfig = input->mutable_spp_conf(); -// sppConfig->set_pool_type(poolType); -// sppConfig->set_pyramid_height(pyramidHeight); -// ImageConfig* imageConfig = sppConfig->mutable_image_conf(); -// imageConfig->set_channels(16); -// imageConfig->set_img_size(10); -// imageConfig->set_img_size_y(20); -// int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); -// config.layerConfig.set_size(outputSize * imageConfig->channels()); -// testLayerGrad(config, "spp", 100, trans, useGpu); -// } -// -// TEST(Layer, SpatialPyramidPoolLayer) { -// for (auto useGpu : {false, true}) { -// for (auto pyramidHeight : {1, 2, 3}) { -// testSppLayer("avg-projection", pyramidHeight, false, useGpu); -// testSppLayer("max-projection", pyramidHeight, false, useGpu); -// } -// } -// } -// -// TEST(Layer, rankCostLayer) { -// TestConfig config; -// config.layerConfig.set_type("rank-cost"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "rank-cost", 100, false, useGpu); -// } -// } -// -// TEST(Layer, sumCostLayer) { -// TestConfig config; -// config.layerConfig.set_type("sum_cost"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "sum_cost", 100, false, useGpu); -// } -// } -// -// TEST(Layer, weightedRankCostLayer) { -// TestConfig config; -// config.layerConfig.set_type("rank-cost"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); -// } -// } -// -// TEST(Layer, TensorLayer) { -// TestConfig config; -// config.layerConfig.set_type("tensor"); -// config.layerConfig.set_size(10); -// config.layerConfig.set_active_type("sigmoid"); -// config.biasSize = config.layerConfig.size(); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "tensor", 100, false, useGpu); -// } -// } -// -// TEST(Layer, RecurrentLayer) { -// TestConfig config; -// config.layerConfig.set_type("recurrent"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("tanh"); -// config.biasSize = 4; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// for (auto reversed : {false, true}) { -// config.layerConfig.set_reversed(reversed); -// config.testState = !reversed; -// testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); -// } -// } -// } -// -// TEST(Layer, LstmLayer) { -// TestConfig config; -// config.layerConfig.set_type("lstmemory"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("tanh"); -// config.layerConfig.set_active_state_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 28; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// for (auto reversed : {false, true}) { -// config.layerConfig.set_reversed(reversed); -// config.testState = !reversed; -// testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); -// } -// } -// for (auto useGpu : {true}) { -// config.testBatchState = true; -// config.layerConfig.set_reversed(false); -// testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, MDLstmLayer) { -// TestConfig config; -// config.layerConfig.set_type("mdlstmemory"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_state_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 4 * 9; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_directions(true); -// config.layerConfig.add_directions(true); -// -// for (auto useGpu : {false, true}) { -// for (int i = 0; i < 2; i++) { -// for (int j = 0; j < 2; j++) { -// config.layerConfig.set_directions(0, bool(i)); -// config.layerConfig.set_directions(1, bool(j)); -// testLayerGrad(config, "mdlstmemory", 100, false, useGpu); -// } -// } -// } -// } -// -// TEST(Layer, ParameterReluLayer) { -// auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { -// TestConfig config; -// config.layerConfig.set_type("prelu"); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); -// config.layerConfig.add_inputs(); -// config.layerConfig.set_size(inputSize); -// config.layerConfig.set_partial_sum(inputSize / -// channels); // size of feature map -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "prelu", 100, false, useGpu); -// } -// }; -// -// testParameterReluLayer(192, 1); -// testParameterReluLayer(192, 3); -// testParameterReluLayer(192, 192); -// } -// -// TEST(Layer, ResizeLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("resize"); -// config.layerConfig.set_size(64); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "resize", 100, false, useGpu); -// } -// } -// -// TEST(Layer, RotateLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("rotate"); -// const int CHANNEL = 2; -// const int HEIGHT = 8; -// const int WIDTH = 4; -// const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; -// config.layerConfig.set_size(INPUT_SIZE); -// config.layerConfig.set_height(HEIGHT); -// config.layerConfig.set_width(WIDTH); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "rotate", 100, false, useGpu); -// } -// } -// -// TEST(Layer, NCELayer) { -// TestConfig config; -// size_t numClasses = 4; -// config.layerConfig.set_type("nce"); -// config.layerConfig.set_size(1); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_num_classes(numClasses); -// config.biasSize = numClasses; -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * -// numClasses}); -// config.inputDefs.push_back( -// {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto withWeight : {false, true}) { -// if (withWeight) { -// config.inputDefs.push_back( -// {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// } -// -// for (auto isIdLabel : {false, true}) { -// config.inputDefs[1] = { -// isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, -// "label", -// /* dim= */ numClasses, -// /* paraSize= */ 0}; -// -// for (auto withDist : {false, true}) { -// config.layerConfig.clear_neg_sampling_dist(); -// if (withDist) { -// double sum = 0; -// for (size_t i = 0; i < numClasses; ++i) { -// real p = rand(); // NOLINT use rand_r -// config.layerConfig.add_neg_sampling_dist(p); -// sum += p; -// } -// for (size_t i = 0; i < numClasses; ++i) { -// real p = config.layerConfig.neg_sampling_dist(i) / sum; -// config.layerConfig.set_neg_sampling_dist(i, p); -// } -// } -// LOG(INFO) << "NCELayer " -// << " isIdLabel=" << isIdLabel << " withWeight=" << -// withWeight -// << " withDist=" << withDist; -// // Not support GPU now -// testLayerGrad(config, -// "nce", -// 100, -// /* trans= */ false, -// /* useGpu */ false); -// } -// } -// } -// } -// -// TEST(Layer, GatedRecurrentLayer) { -// TestConfig config; -// config.layerConfig.set_type("gated_recurrent"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 12; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// for (auto reversed : {false, true}) { -// config.layerConfig.set_reversed(reversed); -// config.testState = !reversed; -// testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, -// useGpu); -// } -// } -// } -// -// TEST(Layer, GruStepLayer) { -// TestConfig config; -// config.layerConfig.set_type("gru_step"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 12; -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, LstmStepLayer) { -// TestConfig config; -// config.layerConfig.set_type("lstm_step"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_state_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 12; -// config.testAccumulate = false; -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); -// } -// } -// -// void testBatchNormLayer(const string& type, bool trans, bool useGpu) { -// TestConfig config; -// const int CHANNELS = 10; -// const int IMG_SIZE = 16; -// const int IMG_SIZE_Y = 8; -// size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; -// config.layerConfig.set_type(type); -// config.layerConfig.set_size(size); -// config.layerConfig.set_active_type("sigmoid"); -// config.biasSize = CHANNELS; -// config.inputDefs.push_back({INPUT_DATA, -// "layer_0", -// /* dim= */ size, -// /* paraSize= */ CHANNELS}); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, -// CHANNELS}); -// config.inputDefs.back().isStatic = true; -// config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, -// CHANNELS}); -// config.inputDefs.back().isStatic = true; -// -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// ImageConfig* img_conf = input->mutable_image_conf(); -// img_conf->set_channels(CHANNELS); -// img_conf->set_img_size(IMG_SIZE); -// img_conf->set_img_size_y(IMG_SIZE_Y); -// -// testLayerGrad(config, -// "batch_norm", -// 64, -// /* trans= */ trans, -// useGpu, -// /* useWeight */ true); -// } -// -// TEST(Layer, BatchNormalizationLayer) { -// testBatchNormLayer("batch_norm", false, false); -// #ifndef PADDLE_ONLY_CPU -// testBatchNormLayer("batch_norm", false, true); -// if (hl_get_cudnn_lib_version() >= int(4000)) { -// testBatchNormLayer("cudnn_batch_norm", false, true); -// } -// #endif -// } -// -// void testConvOperator(bool isDeconv) { -// TestConfig config; -// const int NUM_FILTERS = 16; -// const int FILTER_SIZE = 2; -// const int FILTER_SIZE_Y = 3; -// const int CHANNELS = 3; -// const int IMAGE_SIZE = 16; -// const int IMAGE_SIZE_Y = 9; -// OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); -// if (isDeconv) { -// operatorConf.set_type("convt"); -// } else { -// operatorConf.set_type("conv"); -// } -// ConvConfig* conv = operatorConf.mutable_conv_conf(); -// operatorConf.set_num_filters(NUM_FILTERS); -// conv->set_filter_size(FILTER_SIZE); -// conv->set_filter_size_y(FILTER_SIZE_Y); -// conv->set_channels(CHANNELS); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(1); -// conv->set_img_size(IMAGE_SIZE); -// conv->set_img_size_y(IMAGE_SIZE_Y); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// conv->set_output_y(outputSize(conv->img_size_y(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true)); -// -// if (isDeconv) { -// conv->set_filter_channels(NUM_FILTERS / conv->groups()); -// config.inputDefs.push_back({INPUT_DATA, -// "layer_0", -// conv->output_x() * conv->output_y() * -// CHANNELS, -// 0}); -// config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); -// } else { -// conv->set_filter_channels(conv->channels() / conv->groups()); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); -// config.layerConfig.set_size(conv->output_x() * conv->output_y() * -// NUM_FILTERS); -// } -// -// config.inputDefs.push_back( -// {INPUT_DATA, -// "layer_1", -// FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); -// } -// -// TEST(Operator, conv) { -// testConvOperator(/*isDeconv*/ true); -// testConvOperator(/*isDeconv*/ false); -// } -// -// TEST(Layer, FeatureMapExpandLayer) { -// TestConfig config; -// config.layerConfig.set_type("featmap_expand"); -// const int CHANNELS = 10; -// const int INPUT_SIZE = 100; -// config.layerConfig.set_size(INPUT_SIZE * CHANNELS); -// config.layerConfig.set_num_filters(CHANNELS); -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, -// "layer_0", -// /* dim= */ INPUT_SIZE, -// /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// for (auto useGpu : {false, true}) { -// for (auto asRowVec : {false, true}) { -// config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : -// "as_col_vec"); -// testLayerGrad(config, -// "featmap_expand", -// /*batch_size*/ 100, -// /* trans= */ false, -// useGpu, -// /* useWeight */ true); -// } -// } -// } -// -// TEST(Layer, MultiplexLayer) { -// TestConfig config; -// const int LAYER_SIZE = 100; -// config.layerConfig.set_type("multiplex"); -// config.layerConfig.set_size(LAYER_SIZE); -// -// config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, PadLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("pad"); -// -// int c = 4; -// int h = 31; -// int w = 36; -// size_t size = c * h * w; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// PadConfig* pad = input->mutable_pad_conf(); -// ImageConfig* image = pad->mutable_image_conf(); -// -// image->set_channels(c); -// image->set_img_size(h); -// image->set_img_size_y(w); -// pad->add_pad_c(1); -// pad->add_pad_c(2); -// pad->add_pad_h(2); -// pad->add_pad_h(3); -// pad->add_pad_w(3); -// pad->add_pad_w(5); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "pad", 10, false, useGpu); -// } -// } -// -// TEST(Layer, CrossChannelNormLayer) { -// TestConfig config; -// config.paramInitialMean = 1.; -// config.paramInitialStd = 0.; -// config.layerConfig.set_type("norm"); -// config.layerConfig.set_size(100); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// NormConfig* norm = input->mutable_norm_conf(); -// norm->set_norm_type("cross-channel-norm"); -// norm->set_channels(10); -// norm->set_size(100); -// norm->set_scale(0); -// norm->set_pow(0); -// norm->set_blocked(0); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); -// } -// } -// -// TEST(Layer, smooth_l1) { -// TestConfig config; -// config.layerConfig.set_type("smooth_l1"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); -// } -// } -// -// TEST(Layer, multibox_loss) { -// TestConfig config; -// config.layerConfig.set_type("multibox_loss"); -// config.biasSize = 0; -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); -// multiboxLoss->set_num_classes(21); -// multiboxLoss->set_input_num(1); -// multiboxLoss->set_overlap_threshold(0.5); -// multiboxLoss->set_neg_pos_ratio(3); -// multiboxLoss->set_neg_overlap(0.5); -// multiboxLoss->set_background_id(0); -// multiboxLoss->set_height(3); -// multiboxLoss->set_width(3); -// -// size_t gtNum = 1; -// MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); -// labelValue->randomizeUniform(); -// labelValue->add(-0.5); -// labelValue->sigmoid(*labelValue); -// real* labelData = labelValue->getData(); -// size_t labelWidth = labelValue->getWidth(); -// for (size_t i = 0; i < gtNum; ++i) { -// *(labelData + i * labelWidth) = std::rand() % 20 + 1; -// *(labelData + i * labelWidth + 1) = 0.400259; -// *(labelData + i * labelWidth + 2) = 0.377857; -// *(labelData + i * labelWidth + 3) = 0.525712; -// *(labelData + i * labelWidth + 4) = 0.519368; -// } -// vector seqStartPositions(gtNum + 1, 0); -// for (size_t i = 1; i <= gtNum; ++i) { -// seqStartPositions[i] = i; -// } -// -// // Ensure at lease one matched bbox -// MatrixPtr priorValue = Matrix::create(1, 72, false, false); -// priorValue->randomizeUniform(); -// priorValue->add(-0.5); -// priorValue->sigmoid(*priorValue); -// real* priorData = priorValue->getData(); -// *(priorData) = 0.424811; -// *(priorData + 1) = 0.397059; -// *(priorData + 2) = 0.538905; -// *(priorData + 3) = 0.447091; -// *(priorData + 4) = 0.425720; -// *(priorData + 5) = 0.515228; -// *(priorData + 6) = 0.519452; -// *(priorData + 7) = 0.591065; -// -// config.inputDefs.push_back( -// {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); -// config.inputDefs.push_back( -// {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); -// config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); -// config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); -// } -// } -// -// TEST(Layer, TransLayer) { -// TestConfig config; -// const int height = 128; -// const int width = 1028; -// config.layerConfig.set_type("trans"); -// config.layerConfig.set_size(width); -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, RowConvLayer) { -// const int context = 3; -// const int size = 512; -// -// TestConfig config; -// config.layerConfig.set_type("row_conv"); -// config.layerConfig.set_size(size); -// config.layerConfig.set_active_type("sigmoid"); -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// RowConvConfig* conv = input->mutable_row_conv_conf(); -// conv->set_context_length(context); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "row_conv", 100, false, useGpu, false); -// } -// } -// -// TEST(Layer, CropLayer) { -// TestConfig config; -// // config input_0 -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ImageConfig* img = input->mutable_image_conf(); -// img->set_channels(4); -// img->set_img_size(16); -// config.layerConfig.set_axis(2); -// config.layerConfig.add_offset(0); -// config.layerConfig.add_offset(0); -// -// // config input_1 -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); -// input = config.layerConfig.add_inputs(); -// img = input->mutable_image_conf(); -// img->set_channels(2); -// img->set_img_size(8); -// -// // config crop layer -// config.layerConfig.set_type("crop"); -// config.layerConfig.set_name("cropLayer"); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "crop", 100, false, useGpu, false); -// } -// } +TEST(Operator, dot_mul) { + TestConfig config; + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); + operatorConf.set_type("dot_mul"); + operatorConf.set_dotmul_scale(-1); + + testOperatorGrad(config, operatorConf, 100, false, false); +} + +TEST(Projection, context) { + for (auto contextStart : {-5, -3, -1, 0, 3}) { + for (auto contextLength : {1, 2, 5, 7}) { + for (auto batchSize : {1, 2, 5, 20, 50}) { + for (auto trainablePadding : {false, true}) { + LOG(INFO) << " contextStart=" << contextStart + << " contextLength=" << contextLength + << " batchSize=" << batchSize + << " trainablePadding=" << trainablePadding; + ProjectionConfig conf; + conf.set_type("context"); + conf.set_input_size(10); + conf.set_context_start(contextStart); + conf.set_context_length(contextLength); + conf.set_trainable_padding(trainablePadding); + conf.set_output_size(conf.context_length() * conf.input_size()); + int pad = + std::max(0, -conf.context_start()) + + std::max(0, conf.context_start() + conf.context_length() - 1); + for (auto useGpu : {false, true}) { + testProjectionGrad( + conf, + INPUT_SEQUENCE_DATA, + trainablePadding ? conf.input_size() * pad : 0, + batchSize, + useGpu, + contextStart + contextLength <= 1); // = testState + } + } + } + } + } +} + +TEST(Projection, trans_fc) { + ProjectionConfig conf; + conf.set_type("trans_fc"); + conf.set_input_size(50); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 1000, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, fc) { + ProjectionConfig conf; + conf.set_type("fc"); + conf.set_input_size(10); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 200, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, dot_mul) { + ProjectionConfig conf; + conf.set_type("dot_mul"); + conf.set_input_size(20); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 20, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, table) { + ProjectionConfig conf; + conf.set_type("table"); + conf.set_input_size(10); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_LABEL, + /* parameterSize */ 200, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, identity) { + ProjectionConfig conf; + conf.set_type("identity"); + conf.set_input_size(10); + conf.set_output_size(10); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 0, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, slice) { + ProjectionConfig conf; + conf.set_type("slice"); + conf.set_input_size(100); + SliceConfig& slice1 = *conf.add_slices(); + slice1.set_start(10); + slice1.set_end(20); + SliceConfig& slice2 = *conf.add_slices(); + slice2.set_start(50); + slice2.set_end(70); + conf.set_output_size(30); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 0, + /* batchSize */ 10, + useGpu); + } +} + +TEST(Projection, scaling) { + ProjectionConfig conf; + conf.set_type("scaling"); + conf.set_input_size(10); + conf.set_output_size(10); + for (auto useGpu : {false}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 1, + /* batchSize */ 100, + useGpu); + } +} + +void testProjectionConv(size_t groups, bool isDeconv) { + const int NUM_FILTERS = 18; + const int FILTER_SIZE = 2; + const int FILTER_SIZE_Y = 4; + const int CHANNELS = 3; + const int IMAGE_SIZE = 16; + + ProjectionConfig conf; + if (isDeconv) { + conf.set_type("convt"); + } else { + conf.set_type("conv"); + } + conf.set_num_filters(NUM_FILTERS); + + ConvConfig* conv = conf.mutable_conv_conf(); + conv->set_filter_size(FILTER_SIZE); + conv->set_filter_size_y(FILTER_SIZE_Y); + conv->set_channels(CHANNELS); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(groups); + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + } + conv->set_img_size(IMAGE_SIZE); + int output_x = outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true); + int output_y = outputSize(conv->img_size(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true); + conv->set_output_x(output_x); + conv->set_output_y(output_y); + if (isDeconv) { + conf.set_input_size(output_x * output_y * CHANNELS); + conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); + } else { + conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); + conf.set_output_size(output_x * output_y * NUM_FILTERS); + } + + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * + FILTER_SIZE_Y / groups, + /* batchSize */ 100, + true, + false, + NUM_FILTERS, + true); +} + +#ifndef PADDLE_ONLY_CPU +TEST(Projection, conv) { + /// test ConvProjection + testProjectionConv(1, false); + testProjectionConv(3, false); + /// test ConvTransProjection + testProjectionConv(1, true); + testProjectionConv(3, true); +} +#endif + +TEST(Layer, BilinearInterpLayer) { + TestConfig config; + config.layerConfig.set_type("bilinear_interp"); + config.biasSize = 0; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); + + LayerInputConfig* input = config.layerConfig.add_inputs(); + BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); + ImageConfig* image = bilinear->mutable_image_conf(); + image->set_img_size(32); + image->set_img_size_y(32); + image->set_channels(4); + + for (auto useGpu : {false, true}) { + for (auto outSize : {32, 64}) { + bilinear->set_out_size_x(outSize); + bilinear->set_out_size_y(outSize); + testLayerGrad(config, "bilinear_interp", 10, false, useGpu); + } + } +} + +TEST(Layer, concat) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("concat"); + config.layerConfig.set_size(15); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "concat", 100, false, useGpu); + } +} + +TEST(Layer, AddtoLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("addto"); + config.layerConfig.set_size(10); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "addto", 100, false, useGpu); + } +} + +TEST(Layer, CTCLayer) { + TestConfig config; + config.layerConfig.set_type("ctc"); + config.layerConfig.set_norm_by_times(false); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "ctc", + 100, + /* trans */ false, /* useGpu */ + useGpu); + } +} + +TEST(Layer, cosSimLayer) { + TestConfig config; + config.layerConfig.set_type("cos"); + config.layerConfig.set_size(1); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cos", 100, false, useGpu); + } +} + +TEST(Layer, CosSimVecMatLayer) { + TestConfig config; + config.layerConfig.set_type("cos_vm"); + config.layerConfig.set_size(5); // output size + config.layerConfig.set_cos_scale(2.0); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cos_vm", 100, false, useGpu); + } +} + +void testDepthwiseConvLayer(const string& type, bool useGpu) { + TestConfig config; + config.biasSize = 32; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(32); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(3); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(16); + conv->set_filter_channels(conv->channels() / conv->groups()); + conv->set_img_size(16); + conv->set_img_size_y(8); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "depthwise_conv", 100, false, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); +} + +TEST(Layer, depthwiseConvLayer) { + // 'depthwise_conv' is a sepecial case of 'exconv' whose + // groups size equals to the input channels size. + testDepthwiseConvLayer("exconv", /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testDepthwiseConvLayer("exconv", /* useGpu= */ true); +#endif +} + +void testConvLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + config.biasSize = 16; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(16); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(3); + conv->set_channels(3); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(conv->channels() / conv->groups()); + conv->set_img_size(16); + conv->set_img_size_y(8); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "conv", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, convLayer) { + testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); + testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); +#endif +} + +void testConvTransLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + config.biasSize = 3; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(3); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(4); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(3 / conv->groups()); + conv->set_img_size(16); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + + config.layerConfig.set_size(conv->img_size() * conv->img_size() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "convTrans", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, convTransLayer) { + for (auto useGpu : {false, true}) { + testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); + } +#ifndef PADDLE_ONLY_CPU + testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); +#endif +} + +TEST(Layer, blockExpandLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("blockexpand"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); + blockExpand->set_img_size_x(64); + blockExpand->set_img_size_y(32); + blockExpand->set_channels(3); + blockExpand->set_padding_x(0); + blockExpand->set_padding_y(0); + blockExpand->set_block_x(4); + blockExpand->set_block_y(32); + blockExpand->set_stride_x(2); + blockExpand->set_stride_y(2); + blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), + blockExpand->block_x(), + blockExpand->padding_x(), + blockExpand->stride_x(), + /* caffeMode */ false)); + blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), + blockExpand->block_y(), + blockExpand->padding_y(), + blockExpand->stride_y(), + /* caffeMode */ false)); + config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() * + blockExpand->channels()); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "blockexpand", 100, false, useGpu); + } +} + +TEST(Layer, maxoutLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("maxout"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + MaxOutConfig* maxout = input->mutable_maxout_conf(); + ImageConfig* image = maxout->mutable_image_conf(); + + image->set_img_size(32); + image->set_img_size_y(32); + image->set_channels(4); + maxout->set_groups(2); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "maxout", 10, false, useGpu); + } +} +void testFcLayer(string format, size_t nnz) { + TestConfig config; + config.biasSize = 4096; + config.layerConfig.set_type("fc"); + config.layerConfig.set_size(4096); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_drop_rate(0.1); + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); + config.layerConfig.add_inputs(); + + LOG(INFO) << config.inputDefs[0].sparse.sparse << " " + << config.inputDefs[0].sparse.format; + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "fc", + 100, + /* trans */ false, + useGpu, + /* weight */ true); + } +} + +TEST(Layer, fcLayer) { + testFcLayer("", 4096 * 4096 * 2); + testFcLayer("csc", 4096 * 40); + testFcLayer("csr", 4096 * 40); +} + +TEST(Layer, SelectiveFullyConnectedLayer) { + TestConfig config; + size_t nin = 16; + size_t nout = 256; + config.layerConfig.set_type("selective_fc"); + config.layerConfig.set_size(nout); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_has_selected_colums(true); + config.layerConfig.set_selective_fc_pass_generation(false); + config.biasSize = nout; + + config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back( + {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", true)}); + config.layerConfig.add_inputs(); + + testLayerGrad(config, + "selective_fc", + 100, + /* trans= */ false, + /* useGup= */ false, + false); +#ifndef PADDLE_ONLY_CPU + testLayerGrad(config, + "selective_fc", + 100, + /* trans= */ false, + /* useGup= */ true, + false); +#endif +} + +TEST(Layer, DataNormLayer) { + TestConfig config; + config.layerConfig.set_type("data_norm"); + config.layerConfig.set_size(20); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); + config.inputDefs.back().isStatic = true; + config.layerConfig.add_inputs(); + + for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { + config.layerConfig.set_data_norm_strategy(strategy); + // The parameters are static, so not support GPU now + testLayerGrad(config, + "data_norm", + 200, + /* trans */ false, + /* useGpu */ false); + } +} + +TEST(Layer, hsigmoidLayer) { + TestConfig config; + config.layerConfig.set_type("hsigmoid"); + config.layerConfig.set_num_classes(5); + config.layerConfig.set_size(1); + config.biasSize = config.layerConfig.num_classes() - 1; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // Not support GPU now + testLayerGrad(config, + "hsigmoid", + 100, + /* trans */ false, /* useGpu */ + false); +} + +TEST(Layer, multi_cross) { + TestConfig config; + config.layerConfig.set_type("multi-class-cross-entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad( + config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); + } +} + +TEST(Layer, multi_binary_label_sparse_mat) { + TestConfig config; + config.layerConfig.set_type("multi_binary_label_cross_entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "multi_binary_label_cross_entropy", + 100, + /* trans */ false, + useGpu); + } +} + +TEST(layer, multi_binary_label_id) { + TestConfig config; + config.layerConfig.set_type("multi_binary_label_cross_entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "multi_binary_label_cross_entropy", + 100, + /* trans */ false, + useGpu); + } +} + +TEST(Layer, multi_cross_with_selfnorm) { + TestConfig config; + config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); + config.layerConfig.set_softmax_selfnorm_alpha(0.1); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // Not support GPU now + testLayerGrad(config, + "multi_class_cross_entropy_with_selfnorm", + 100, + /* trans */ false, + /* useGpu */ false); +} + +TEST(Layer, multi_cross_soft) { + TestConfig config; + config.layerConfig.set_type("soft_binary_class_cross_entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "soft_binary_class_cross_entropy", + 100, + /* trans */ false, + useGpu); + } +} + +TEST(Layer, square_error) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); + } +} + +TEST(Layer, sparse_square_error) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // "GpuSparseMatrix" as label is not supported + testLayerGrad(config, + "square_error", + 100, + /* trans */ false, + /* useGpu */ false); +} + +TEST(Layer, sparse_float_square_error) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // "GpuSparseMatrix" as label is not supported + testLayerGrad(config, + "square_error", + 100, + /* trans */ false, + /* useGpu */ false); +} + +TEST(Layer, square_error_weighted) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + config.testAccumulate = false; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); + } +} + +TEST(Layer, huber_two_class) { + TestConfig config; + config.layerConfig.set_type("huber"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); + } +} + +void testExpandLayer(string trans_type, bool hasSubseq) { + TestConfig config; + config.layerConfig.set_type("expand"); + + config.inputDefs.push_back( + {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, + "layer_0", + 10, + 0}); + config.inputDefs.push_back( + {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, + "layer_1", + 10, + 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.set_trans_type(trans_type); + LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "expand", 30, false, useGpu); + } +} + +TEST(Layer, ExpandLayer) { + testExpandLayer("non-seq", false); // non-seq expand to seq + testExpandLayer("non-seq", true); // non-seq expand to hasSubseq + testExpandLayer("seq", true); // seq expand to hasSubseq +} + +void testDegradeLayer(bool hasSubseq, + string layer_type, + string trans_type, + int stride) { + TestConfig config; + config.layerConfig.set_type(layer_type); + config.layerConfig.set_size(10); + config.layerConfig.set_seq_pool_stride(stride); + config.biasSize = 0; + + config.inputDefs.push_back( + {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, + "layer_0", + 10, + 0}); + config.layerConfig.add_inputs(); + config.layerConfig.set_trans_type(trans_type); + + auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { + for (auto useGpu : {false, true}) { + testLayerGrad(config, layer_type, 100, false, useGpu); + } + }; + + if (layer_type == "average") { + for (auto strategy : {"average", "sum", "squarerootn"}) { + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " average_strategy=" << strategy + << " seq_pool_stride=" << stride; + config.layerConfig.set_average_strategy(strategy); + testDegradeLayerGrad(config, layer_type); + } + } else { + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " seq_pool_stride=" << stride; + testDegradeLayerGrad(config, layer_type); + } +} + +TEST(Layer, MaxLayer) { + testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq + testDegradeLayer(false, + "max", + "non-seq", + 5); // seq max to a shorten seq, stride window = 5 + testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq + testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq +} + +TEST(Layer, SequenceLastInstanceLayer) { + testDegradeLayer(false, + "seqlastins", + "non-seq", + -1); // seq seqlastins to non-seq + testDegradeLayer(false, + "seqlastins", + "non-seq", + 5); // seq seqlastins to a shorten seq, stride window = 5 + testDegradeLayer(true, + "seqlastins", + "non-seq", + -1); // hasSubseq seqlastins to non-seq + testDegradeLayer( + true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq +} + +TEST(Layer, AverageLayer) { + testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq + testDegradeLayer(false, + "average", + "non-seq", + 5); // seq average to a shorten seq, stride window = 5 + testDegradeLayer( + true, "average", "non-seq", -1); // hasSubseq average to non-seq + testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq +} + +TEST(Layer, SequenceConcatLayer) { + TestConfig config; + config.layerConfig.set_type("seqconcat"); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "seqconcat", 100, false, useGpu); + } +} + +TEST(Layer, SequenceReshapeLayer) { + TestConfig config; + config.layerConfig.set_type("seqreshape"); + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "seqreshape", 100, false, useGpu); + } +} + +TEST(Layer, ConvShiftLayer) { + TestConfig config; + config.layerConfig.set_type("conv_shift"); + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // Not support GPU now + testLayerGrad(config, "conv_shift", 100, false, false); +} + +TEST(Layer, PowerLayer) { + TestConfig config; + config.layerConfig.set_type("power"); + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "power", 100, false, useGpu); + } +} + +TEST(Layer, ConvexCombinationLayer) { + TestConfig config; + config.layerConfig.set_type("convex_comb"); + config.layerConfig.set_size(20); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "convex_comb", 100, false, useGpu); + } +} + +TEST(Layer, InterpolationLayer) { + TestConfig config; + config.layerConfig.set_type("interpolation"); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "interpolation", 100, false, useGpu); + } +} + +TEST(Layer, OuterProdLayer) { + TestConfig config; + config.layerConfig.set_type("out_prod"); + config.layerConfig.set_size(100); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "out_prod", 100, false, useGpu); + } +} + +TEST(Layer, SlopeInterceptLayer) { + TestConfig config; + config.layerConfig.set_type("slope_intercept"); + config.layerConfig.set_size(10); + config.layerConfig.set_slope(1.0); + config.layerConfig.set_intercept(0.1); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "slope_intercept", 100, false, useGpu); + } +} + +TEST(Layer, ScalingLayer) { + TestConfig config; + config.layerConfig.set_type("scaling"); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "scaling", 100, false, useGpu); + } +} + +void testNormLayer(const string& normType, bool trans, bool useGpu) { + TestConfig config; + config.layerConfig.set_type("norm"); + config.layerConfig.set_active_type("relu"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + NormConfig* norm = input->mutable_norm_conf(); + norm->set_norm_type(normType); + norm->set_channels(16); + norm->set_size(5); + norm->set_scale(0.001); + norm->set_pow(0.75); + norm->set_blocked(0); + norm->set_img_size(14); + norm->set_img_size_y(7); + norm->set_output_x(norm->img_size()); + norm->set_output_y(norm->img_size_y()); + if (norm->norm_type() == "cmrnorm" || + norm->norm_type() == "cmrnorm-projection") { + norm->set_scale(norm->scale() / norm->size()); + } else { + norm->set_scale(norm->scale() / (norm->size() * norm->size())); + } + + config.layerConfig.set_size(norm->output_x() * norm->output_y() * + norm->channels()); + config.biasSize = 0; + + testLayerGrad(config, "norm", 100, trans, useGpu); +} + +TEST(Layer, NormLayer) { + testNormLayer("cmrnorm-projection", + /* trans= */ false, /* useGpu= */ + true); + testNormLayer("cmrnorm-projection", + /* trans= */ false, /* useGpu= */ + false); +} + +void setPoolConfig(TestConfig* config, + PoolConfig* pool, + const string& poolType) { + (*config).biasSize = 0; + (*config).layerConfig.set_type("pool"); + (*config).layerConfig.set_num_filters(16); + + int kw = 3, kh = 3; + int pw = 0, ph = 0; + int sw = 2, sh = 2; + pool->set_pool_type(poolType); + pool->set_channels(16); + pool->set_size_x(kw); + pool->set_size_y(kh); + pool->set_start(0); + pool->set_padding(pw); + pool->set_padding_y(ph); + pool->set_stride(sw); + pool->set_stride_y(sh); + + int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); + int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); + pool->set_output_x(ow); + pool->set_output_y(oh); +} + +void testPoolLayer(const string& poolType, bool trans, bool useGpu) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + pool->set_img_size(14); + pool->set_img_size_y(14); + setPoolConfig(&config, pool, poolType); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + testLayerGrad(config, "pool", 100, trans, useGpu); +} + +#ifndef PADDLE_ONLY_CPU +void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + pool->set_size_y(4); + pool->set_stride_y(3); + pool->set_img_size(10); + pool->set_img_size_y(20); + setPoolConfig(&config, pool, poolType); + pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / + ((float)pool->stride_y()) + + 1.5); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + testLayerGrad(config, "pool", 100, trans, useGpu); +} +#endif + +TEST(Layer, PoolLayer) { + testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); + testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); + +#ifndef PADDLE_ONLY_CPU + testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); +#endif +} + +void testSppLayer(const string& poolType, + const int pyramidHeight, + bool trans, + bool useGpu) { + TestConfig config; + config.layerConfig.set_type("spp"); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + SppConfig* sppConfig = input->mutable_spp_conf(); + sppConfig->set_pool_type(poolType); + sppConfig->set_pyramid_height(pyramidHeight); + ImageConfig* imageConfig = sppConfig->mutable_image_conf(); + imageConfig->set_channels(16); + imageConfig->set_img_size(10); + imageConfig->set_img_size_y(20); + int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); + config.layerConfig.set_size(outputSize * imageConfig->channels()); + testLayerGrad(config, "spp", 100, trans, useGpu); +} + +TEST(Layer, SpatialPyramidPoolLayer) { + for (auto useGpu : {false, true}) { + for (auto pyramidHeight : {1, 2, 3}) { + testSppLayer("avg-projection", pyramidHeight, false, useGpu); + testSppLayer("max-projection", pyramidHeight, false, useGpu); + } + } +} + +TEST(Layer, rankCostLayer) { + TestConfig config; + config.layerConfig.set_type("rank-cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "rank-cost", 100, false, useGpu); + } +} + +TEST(Layer, sumCostLayer) { + TestConfig config; + config.layerConfig.set_type("sum_cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "sum_cost", 100, false, useGpu); + } +} + +TEST(Layer, weightedRankCostLayer) { + TestConfig config; + config.layerConfig.set_type("rank-cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); + } +} + +TEST(Layer, TensorLayer) { + TestConfig config; + config.layerConfig.set_type("tensor"); + config.layerConfig.set_size(10); + config.layerConfig.set_active_type("sigmoid"); + config.biasSize = config.layerConfig.size(); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "tensor", 100, false, useGpu); + } +} + +TEST(Layer, RecurrentLayer) { + TestConfig config; + config.layerConfig.set_type("recurrent"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("tanh"); + config.biasSize = 4; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto reversed : {false, true}) { + config.layerConfig.set_reversed(reversed); + config.testState = !reversed; + testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); + } + } +} + +TEST(Layer, LstmLayer) { + TestConfig config; + config.layerConfig.set_type("lstmemory"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("tanh"); + config.layerConfig.set_active_state_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 28; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto reversed : {false, true}) { + config.layerConfig.set_reversed(reversed); + config.testState = !reversed; + testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); + } + } + for (auto useGpu : {true}) { + config.testBatchState = true; + config.layerConfig.set_reversed(false); + testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); + } +} + +TEST(Layer, MDLstmLayer) { + TestConfig config; + config.layerConfig.set_type("mdlstmemory"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_state_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 4 * 9; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); + config.layerConfig.add_inputs(); + config.layerConfig.add_directions(true); + config.layerConfig.add_directions(true); + + for (auto useGpu : {false, true}) { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + config.layerConfig.set_directions(0, bool(i)); + config.layerConfig.set_directions(1, bool(j)); + testLayerGrad(config, "mdlstmemory", 100, false, useGpu); + } + } + } +} + +TEST(Layer, ParameterReluLayer) { + auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { + TestConfig config; + config.layerConfig.set_type("prelu"); + config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); + config.layerConfig.add_inputs(); + config.layerConfig.set_size(inputSize); + config.layerConfig.set_partial_sum(inputSize / + channels); // size of feature map + for (auto useGpu : {false, true}) { + testLayerGrad(config, "prelu", 100, false, useGpu); + } + }; + + testParameterReluLayer(192, 1); + testParameterReluLayer(192, 3); + testParameterReluLayer(192, 192); +} + +TEST(Layer, ResizeLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("resize"); + config.layerConfig.set_size(64); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "resize", 100, false, useGpu); + } +} + +TEST(Layer, RotateLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("rotate"); + const int CHANNEL = 2; + const int HEIGHT = 8; + const int WIDTH = 4; + const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; + config.layerConfig.set_size(INPUT_SIZE); + config.layerConfig.set_height(HEIGHT); + config.layerConfig.set_width(WIDTH); + config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "rotate", 100, false, useGpu); + } +} + +TEST(Layer, NCELayer) { + TestConfig config; + size_t numClasses = 4; + config.layerConfig.set_type("nce"); + config.layerConfig.set_size(1); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_num_classes(numClasses); + config.biasSize = numClasses; + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * numClasses}); + config.inputDefs.push_back( + {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto withWeight : {false, true}) { + if (withWeight) { + config.inputDefs.push_back( + {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + } + + for (auto isIdLabel : {false, true}) { + config.inputDefs[1] = { + isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, + "label", + /* dim= */ numClasses, + /* paraSize= */ 0}; + + for (auto withDist : {false, true}) { + config.layerConfig.clear_neg_sampling_dist(); + if (withDist) { + double sum = 0; + for (size_t i = 0; i < numClasses; ++i) { + real p = rand(); // NOLINT use rand_r + config.layerConfig.add_neg_sampling_dist(p); + sum += p; + } + for (size_t i = 0; i < numClasses; ++i) { + real p = config.layerConfig.neg_sampling_dist(i) / sum; + config.layerConfig.set_neg_sampling_dist(i, p); + } + } + LOG(INFO) << "NCELayer " + << " isIdLabel=" << isIdLabel << " withWeight=" << withWeight + << " withDist=" << withDist; + // Not support GPU now + testLayerGrad(config, + "nce", + 100, + /* trans= */ false, + /* useGpu */ false); + } + } + } +} + +TEST(Layer, GatedRecurrentLayer) { + TestConfig config; + config.layerConfig.set_type("gated_recurrent"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 12; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto reversed : {false, true}) { + config.layerConfig.set_reversed(reversed); + config.testState = !reversed; + testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, useGpu); + } + } +} + +TEST(Layer, GruStepLayer) { + TestConfig config; + config.layerConfig.set_type("gru_step"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 12; + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); + } +} + +TEST(Layer, LstmStepLayer) { + TestConfig config; + config.layerConfig.set_type("lstm_step"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_state_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 12; + config.testAccumulate = false; + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); + } +} + +void testBatchNormLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + const int CHANNELS = 10; + const int IMG_SIZE = 16; + const int IMG_SIZE_Y = 8; + size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; + config.layerConfig.set_type(type); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type("sigmoid"); + config.biasSize = CHANNELS; + config.inputDefs.push_back({INPUT_DATA, + "layer_0", + /* dim= */ size, + /* paraSize= */ CHANNELS}); + + config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); + config.inputDefs.back().isStatic = true; + config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); + config.inputDefs.back().isStatic = true; + + LayerInputConfig* input = config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + ImageConfig* img_conf = input->mutable_image_conf(); + img_conf->set_channels(CHANNELS); + img_conf->set_img_size(IMG_SIZE); + img_conf->set_img_size_y(IMG_SIZE_Y); + + testLayerGrad(config, + "batch_norm", + 64, + /* trans= */ trans, + useGpu, + /* useWeight */ true); +} + +TEST(Layer, BatchNormalizationLayer) { + testBatchNormLayer("batch_norm", false, false); +#ifndef PADDLE_ONLY_CPU + testBatchNormLayer("batch_norm", false, true); + if (hl_get_cudnn_lib_version() >= int(4000)) { + testBatchNormLayer("cudnn_batch_norm", false, true); + } +#endif +} + +void testConvOperator(bool isDeconv) { + TestConfig config; + const int NUM_FILTERS = 16; + const int FILTER_SIZE = 2; + const int FILTER_SIZE_Y = 3; + const int CHANNELS = 3; + const int IMAGE_SIZE = 16; + const int IMAGE_SIZE_Y = 9; + OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); + if (isDeconv) { + operatorConf.set_type("convt"); + } else { + operatorConf.set_type("conv"); + } + ConvConfig* conv = operatorConf.mutable_conv_conf(); + operatorConf.set_num_filters(NUM_FILTERS); + conv->set_filter_size(FILTER_SIZE); + conv->set_filter_size_y(FILTER_SIZE_Y); + conv->set_channels(CHANNELS); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_img_size(IMAGE_SIZE); + conv->set_img_size_y(IMAGE_SIZE_Y); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + config.inputDefs.push_back({INPUT_DATA, + "layer_0", + conv->output_x() * conv->output_y() * CHANNELS, + 0}); + config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + NUM_FILTERS); + } + + config.inputDefs.push_back( + {INPUT_DATA, + "layer_1", + FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, + 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); +} + +TEST(Operator, conv) { + testConvOperator(/*isDeconv*/ true); + testConvOperator(/*isDeconv*/ false); +} + +TEST(Layer, FeatureMapExpandLayer) { + TestConfig config; + config.layerConfig.set_type("featmap_expand"); + const int CHANNELS = 10; + const int INPUT_SIZE = 100; + config.layerConfig.set_size(INPUT_SIZE * CHANNELS); + config.layerConfig.set_num_filters(CHANNELS); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, + "layer_0", + /* dim= */ INPUT_SIZE, + /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + for (auto asRowVec : {false, true}) { + config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec"); + testLayerGrad(config, + "featmap_expand", + /*batch_size*/ 100, + /* trans= */ false, + useGpu, + /* useWeight */ true); + } + } +} + +TEST(Layer, MultiplexLayer) { + TestConfig config; + const int LAYER_SIZE = 100; + config.layerConfig.set_type("multiplex"); + config.layerConfig.set_size(LAYER_SIZE); + + config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); + } +} + +TEST(Layer, PadLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("pad"); + + int c = 4; + int h = 31; + int w = 36; + size_t size = c * h * w; + config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PadConfig* pad = input->mutable_pad_conf(); + ImageConfig* image = pad->mutable_image_conf(); + + image->set_channels(c); + image->set_img_size(h); + image->set_img_size_y(w); + pad->add_pad_c(1); + pad->add_pad_c(2); + pad->add_pad_h(2); + pad->add_pad_h(3); + pad->add_pad_w(3); + pad->add_pad_w(5); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "pad", 10, false, useGpu); + } +} + +TEST(Layer, CrossChannelNormLayer) { + TestConfig config; + config.paramInitialMean = 1.; + config.paramInitialStd = 0.; + config.layerConfig.set_type("norm"); + config.layerConfig.set_size(100); + LayerInputConfig* input = config.layerConfig.add_inputs(); + NormConfig* norm = input->mutable_norm_conf(); + norm->set_norm_type("cross-channel-norm"); + norm->set_channels(10); + norm->set_size(100); + norm->set_scale(0); + norm->set_pow(0); + norm->set_blocked(0); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); + } +} + +TEST(Layer, smooth_l1) { + TestConfig config; + config.layerConfig.set_type("smooth_l1"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); + } +} + +TEST(Layer, multibox_loss) { + TestConfig config; + config.layerConfig.set_type("multibox_loss"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); + multiboxLoss->set_num_classes(21); + multiboxLoss->set_input_num(1); + multiboxLoss->set_overlap_threshold(0.5); + multiboxLoss->set_neg_pos_ratio(3); + multiboxLoss->set_neg_overlap(0.5); + multiboxLoss->set_background_id(0); + multiboxLoss->set_height(3); + multiboxLoss->set_width(3); + + size_t gtNum = 1; + MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); + labelValue->randomizeUniform(); + labelValue->add(-0.5); + labelValue->sigmoid(*labelValue); + real* labelData = labelValue->getData(); + size_t labelWidth = labelValue->getWidth(); + for (size_t i = 0; i < gtNum; ++i) { + *(labelData + i * labelWidth) = std::rand() % 20 + 1; + *(labelData + i * labelWidth + 1) = 0.400259; + *(labelData + i * labelWidth + 2) = 0.377857; + *(labelData + i * labelWidth + 3) = 0.525712; + *(labelData + i * labelWidth + 4) = 0.519368; + } + vector seqStartPositions(gtNum + 1, 0); + for (size_t i = 1; i <= gtNum; ++i) { + seqStartPositions[i] = i; + } + + // Ensure at lease one matched bbox + MatrixPtr priorValue = Matrix::create(1, 72, false, false); + priorValue->randomizeUniform(); + priorValue->add(-0.5); + priorValue->sigmoid(*priorValue); + real* priorData = priorValue->getData(); + *(priorData) = 0.424811; + *(priorData + 1) = 0.397059; + *(priorData + 2) = 0.538905; + *(priorData + 3) = 0.447091; + *(priorData + 4) = 0.425720; + *(priorData + 5) = 0.515228; + *(priorData + 6) = 0.519452; + *(priorData + 7) = 0.591065; + + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); + config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); + config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); + } +} + +TEST(Layer, TransLayer) { + TestConfig config; + const int height = 128; + const int width = 1028; + config.layerConfig.set_type("trans"); + config.layerConfig.set_size(width); + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); + } +} + +TEST(Layer, RowConvLayer) { + const int context = 3; + const int size = 512; + + TestConfig config; + config.layerConfig.set_type("row_conv"); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + RowConvConfig* conv = input->mutable_row_conv_conf(); + conv->set_context_length(context); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_conv", 100, false, useGpu, false); + } +} + +TEST(Layer, CropLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + config.layerConfig.set_axis(2); + config.layerConfig.add_offset(0); + config.layerConfig.add_offset(0); + + // config input_1 + config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); + input = config.layerConfig.add_inputs(); + img = input->mutable_image_conf(); + img->set_channels(2); + img->set_img_size(8); + + // config crop layer + config.layerConfig.set_type("crop"); + config.layerConfig.set_name("cropLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "crop", 100, false, useGpu, false); + } +} vector randSampling(real range, int n) { CHECK_GE(range, n); @@ -1929,18 +1914,20 @@ vector randSampling(real range, int n) { TEST(Layer, SubNestedSequenceLayer) { // layer size is not crutial for this layer, // so use a small layer size in unittest - const int layerSize = 8; - const int maxSeqNum = 5; - const int maxSeqLen = 5; - const int beamSize = 3; + const int layerSize = 4; + + const int maxSeqNum = 50; + const int maxSeqLen = 50; + const int maxBeamSize = 32; + + srand((size_t)(time(NULL))); + int beamSize = 1 + (rand() % maxBeamSize); TestConfig config; config.layerConfig.set_type("sub_nested_seq"); config.layerConfig.set_name("sub_nested_seq_layer"); config.layerConfig.set_size(layerSize); - // srand((size_t)(time(NULL))); - srand(1); int seqNum = 1 + (rand() % maxSeqNum); // sequence information for the first input, it is a nested sequence @@ -1969,6 +1956,7 @@ TEST(Layer, SubNestedSequenceLayer) { MatrixPtr seqInputPtr = Matrix::create(seqStartPos.back(), layerSize, false, false); + seqInputPtr->randomizeUniform(); config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "nested_seq_input", seqInputPtr, @@ -1989,35 +1977,35 @@ TEST(Layer, SubNestedSequenceLayer) { } } -// TEST(Layer, ClipLayer) { -// const size_t batchSize = 128; -// const size_t size = 512; -// TestConfig config; -// config.layerConfig.set_type("clip"); -// config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ClipConfig* layerConf = input->mutable_clip_conf(); -// double p1 = std::rand() / (double)RAND_MAX; -// double p2 = std::rand() / (double)RAND_MAX; -// layerConf->set_min(std::min(p1, p2)); -// layerConf->set_max(std::max(p1, p2)); -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "clip", batchSize, false, useGpu, false); -// } -// } -// -// TEST(Layer, RowL2NormLayer) { -// const size_t batchSize = 128; -// const size_t size = 512; -// TestConfig config; -// config.layerConfig.set_type("row_l2_norm"); -// config.layerConfig.set_size(size); -// config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); -// config.layerConfig.add_inputs(); -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); -// } -// } +TEST(Layer, ClipLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("clip"); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ClipConfig* layerConf = input->mutable_clip_conf(); + double p1 = std::rand() / (double)RAND_MAX; + double p2 = std::rand() / (double)RAND_MAX; + layerConf->set_min(std::min(p1, p2)); + layerConf->set_max(std::max(p1, p2)); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "clip", batchSize, false, useGpu, false); + } +} + +TEST(Layer, RowL2NormLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("row_l2_norm"); + config.layerConfig.set_size(size); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); + } +} int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ebbe95a0c7..2bed2b5f45 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6097,9 +6097,11 @@ def sub_nested_seq_layer(input, selected_indices, name=None): The sub_nested_seq_layer accepts two inputs: the first one is a nested sequence; the second one is a set of selceted indices in the nested sequence. + Then sub_nest_seq_layer trims the first nested sequence input according to + the selected indices to form a new output. + + This layer is useful in beam training. - Then sub_nest_seq_layer selects trims the first input according to the - selected indices to give a new output. This layer is used in beam training. The example usage is: From 3b2e8fc8f3d7e81adbef3cd47e848a32ff4fc7f0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Mon, 7 Aug 2017 14:11:28 +0800 Subject: [PATCH 17/44] Accelerate CI process under android environment --- paddle/scripts/docker/build_android.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build_android.sh b/paddle/scripts/docker/build_android.sh index 56d290be4a..5584e29e2a 100644 --- a/paddle/scripts/docker/build_android.sh +++ b/paddle/scripts/docker/build_android.sh @@ -20,4 +20,4 @@ cmake -DCMAKE_SYSTEM_NAME=Android \ -DWITH_SWIG_PY=OFF \ .. make -j `nproc` -make install +make install -j `nproc` From 9f816352e56f9f350a49cb5822c1f2bf0327300a Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 7 Aug 2017 14:40:07 +0800 Subject: [PATCH 18/44] Follow comments --- python/paddle/v2/framework/op.py | 40 ++++++++++++------- .../paddle/v2/framework/tests/CMakeLists.txt | 2 +- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index a3dbd0cc89..81c8c3fed8 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -145,6 +145,16 @@ class OpDescCreationMethod(object): return False +class OpInfo(object): + def __init__(self, name, method, inputs, outputs, attrs, no_temp_outputs): + self.name = name + self.method = method + self.inputs = inputs + self.outputs = outputs + self.attrs = attrs + self.no_temp_outputs = no_temp_outputs + + def create_op_creation_method(op_proto): """ Generate op creation method for an OpProto @@ -155,15 +165,15 @@ def create_op_creation_method(op_proto): opdesc = method(*args, **kwargs) return core.Operator.create(opdesc.SerializeToString()) - return { - 'method': __impl__, - 'name': op_proto.type, - 'all_inputs': [var.name for var in op_proto.inputs], - 'all_outputs': [var.name for var in op_proto.outputs], - 'all_attrs': [attr.name for attr in op_proto.attrs], - 'all_no_temp_outputs': - [var.name for var in op_proto.outputs if not var.temporary] - } + return OpInfo( + method=__impl__, + name=op_proto.type, + inputs=[var.name for var in op_proto.inputs], + outputs=[var.name for var in op_proto.outputs], + attrs=[attr.name for attr in op_proto.attrs], + no_temp_outputs=[ + var.name for var in op_proto.outputs if not var.temporary + ]) class OperatorFactory(object): @@ -185,27 +195,27 @@ class OperatorFactory(object): "argument except type") t = args[0] - return self.get_op_creation_info(t)['method'](**kwargs) + return self.get_op_info(t).method(**kwargs) def types(self): return self.op_methods.keys() - def get_op_creation_info(self, t): + def get_op_info(self, t): if t not in self.op_methods: raise ValueError("operator %s is not registered", t) return self.op_methods.get(t) def get_op_input_names(self, type): - return self.get_op_creation_info(type)['all_inputs'] + return self.get_op_info(type).inputs def get_op_output_names(self, type): - return self.get_op_creation_info(type)['all_outputs'] + return self.get_op_info(type).outputs def get_op_attr_names(self, type): - return self.get_op_creation_info(type)['all_attrs'] + return self.get_op_info(type).attrs def get_op_no_temp_output_names(self, type): - return self.get_op_creation_info(type)['all_no_temp_outputs'] + return self.get_op_info(type).no_temp_outputs Operator = OperatorFactory() # Default global factory diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 2c648d22f3..d01e005aca 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -20,4 +20,4 @@ py_test(gradient_checker SRCS gradient_checker.py) py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) -py_test(test_operator SRCS test_operator.py +py_test(test_operator SRCS test_operator.py) From 16b70f3ef51eeb07412ec5f4f2e80db2f9a182e3 Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Mon, 7 Aug 2017 16:20:11 +0800 Subject: [PATCH 19/44] reduce time of test_TrainerOnePass --- paddle/trainer/tests/simple_sparse_neural_network.py | 4 ++-- paddle/trainer/tests/simple_sparse_neural_network_dp.py | 8 ++++---- paddle/trainer/tests/test_TrainerOnePass.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paddle/trainer/tests/simple_sparse_neural_network.py b/paddle/trainer/tests/simple_sparse_neural_network.py index 9604e1b9b4..30346ef299 100644 --- a/paddle/trainer/tests/simple_sparse_neural_network.py +++ b/paddle/trainer/tests/simple_sparse_neural_network.py @@ -1,6 +1,6 @@ from paddle.trainer_config_helpers import * -settings(batch_size=128, learning_method=AdaGradOptimizer(), learning_rate=1e-4) +settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4) file_list = 'trainer/tests/fake_file_list.list' @@ -12,7 +12,7 @@ define_py_data_sources2( embedding = embedding_layer( input=data_layer( - name="word_ids", size=65536), + name="word_ids", size=8191), size=128, param_attr=ParamAttr(sparse_update=True)) prediction = fc_layer(input=embedding, size=10, act=SoftmaxActivation()) diff --git a/paddle/trainer/tests/simple_sparse_neural_network_dp.py b/paddle/trainer/tests/simple_sparse_neural_network_dp.py index 8bfd1f37e7..86b272edfe 100644 --- a/paddle/trainer/tests/simple_sparse_neural_network_dp.py +++ b/paddle/trainer/tests/simple_sparse_neural_network_dp.py @@ -7,15 +7,15 @@ def init_hook(settings, is_train, **kwargs): @provider( - input_types={'word_ids': integer_value(65536), + input_types={'word_ids': integer_value(8191), 'label': integer_value(10)}, min_pool_size=0, init_hook=init_hook) def process(settings, filename): if settings.is_train: - data_size = 2**20 - else: data_size = 2**10 + else: + data_size = 2**5 for _ in xrange(data_size): - yield random.randint(0, 65535), random.randint(0, 9) + yield random.randint(0, 8190), random.randint(0, 9) diff --git a/paddle/trainer/tests/test_TrainerOnePass.cpp b/paddle/trainer/tests/test_TrainerOnePass.cpp index 4d0174f784..00ba61377a 100644 --- a/paddle/trainer/tests/test_TrainerOnePass.cpp +++ b/paddle/trainer/tests/test_TrainerOnePass.cpp @@ -100,25 +100,25 @@ TEST(average_window, gpu) { } TEST(average_window, gpu2) { - FLAGS_num_passes = 100; + FLAGS_num_passes = 20; trainerOnePassTest(configFile1, true, false, 2, 0.01); FLAGS_num_passes = 1; } TEST(average_window, gpu4) { - FLAGS_num_passes = 100; + FLAGS_num_passes = 20; trainerOnePassTest(configFile1, true, false, 4, 0.01); FLAGS_num_passes = 1; } TEST(average_window_cpu, gpu2) { - FLAGS_num_passes = 100; + FLAGS_num_passes = 20; trainerOnePassTest(configFile1, true, false, 2, 0.01, true); FLAGS_num_passes = 1; } TEST(average_window_cpu, gpu4) { - FLAGS_num_passes = 100; + FLAGS_num_passes = 20; trainerOnePassTest(configFile1, true, false, 4, 0.01, true); FLAGS_num_passes = 1; } From 460326f4b2233037aac7629d089f70d8ee389eb2 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Mon, 7 Aug 2017 16:45:56 +0800 Subject: [PATCH 20/44] Fix CI Test --- python/paddle/v2/framework/op.py | 2 +- python/paddle/v2/framework/tests/CMakeLists.txt | 1 - python/paddle/v2/framework/tests/op_test_util.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 81c8c3fed8..7fd8b55a5d 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -181,7 +181,7 @@ class OperatorFactory(object): self.op_methods = dict() for op_proto in get_all_op_protos(): method = create_op_creation_method(op_proto) - self.op_methods[method['name']] = method + self.op_methods[method.name] = method def __call__(self, *args, **kwargs): if 'type' in kwargs: diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index d01e005aca..4322781b34 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -6,7 +6,6 @@ py_test(test_scope SRCS test_scope.py) py_test(test_tensor SRCS test_tensor.py) py_test(test_mul_op SRCS test_mul_op.py) -py_test(test_network SRCS test_network.py) py_test(test_mean_op SRCS test_mean_op.py) py_test(test_protobuf SRCS test_protobuf.py) diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index d1f6de22e7..034df88ed8 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -29,7 +29,7 @@ class OpTestMeta(type): for place in places: for in_name in Operator.get_op_input_names(self.type): - if hasattr(self, "inputs") and in_name in self.inputs + if hasattr(self, "inputs") and in_name in self.inputs: kwargs[in_name] = in_name var = scope.new_var(in_name).get_tensor() arr = self.inputs[in_name] From 42c102a0b3761c0dba4ffddb5f9f1bac87e54841 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 7 Aug 2017 17:19:20 +0800 Subject: [PATCH 21/44] follow comments. --- paddle/gserver/layers/PrintLayer.cpp | 2 +- .../gserver/layers/SubNestedSequenceLayer.cpp | 120 +++++++++--------- paddle/parameter/Argument.cpp | 20 +++ paddle/parameter/Argument.h | 24 ++++ .../paddle/trainer_config_helpers/layers.py | 12 +- 5 files changed, 111 insertions(+), 67 deletions(-) diff --git a/paddle/gserver/layers/PrintLayer.cpp b/paddle/gserver/layers/PrintLayer.cpp index a97fa6bf78..0a1e17b9aa 100644 --- a/paddle/gserver/layers/PrintLayer.cpp +++ b/paddle/gserver/layers/PrintLayer.cpp @@ -29,7 +29,7 @@ public: vals.push_back(s.str()); } size_t pos = 0; - int i = 0; + size_t i = 0; std::ostringstream s; const std::string& format = config_.user_arg(); while (true) { diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index f875fdea45..76f587fff7 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -31,22 +31,42 @@ public: void backward(const UpdateCallback& callback = nullptr) override; private: - void reorganizeSeqInfo(const ICpuGpuVectorPtr seqStartPos, - const ICpuGpuVectorPtr subSeqStartPos); - void calSelectedCols(const MatrixPtr selectedIndices, - const std::vector> inputSeqInfo); - void buildOutputSeqInfo(); + /* + * This functions generates the indices of rows in a batch according to the + * indices of selected sub-sequence in each sequence. + * + * Examples: + * selectedIndices: + * [ + * [0, 1, -1], + * [0, 1, 2], + * [0, -1, -1], + * [0, 2, 3], + * ] + * inputSeqInfo: + * [ + * [0,3,4], + * [4,5,7,10,15], + * [15,20], + * [20,22,23,25,28] + * ] + * + * ths output is saved to private member rowIndice_; + * [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + * 16,17,18,19,20,21,22,23,24,25,26,27] + */ - std::vector outSeqStartInfo_; - std::vector outSubSeqStartInfo_; + void calSelectedCols(const MatrixPtr selectedIndices, + const std::vector>& inputSeqInfo); // if the second input of this layer is on GPU memory, copy it to CPU memory. MatrixPtr selIdsCpu_; - // reorganize sequenceStartPositions and subSequenceStartPositions altogether + + // reorganized sequenceStartPositions and subSequenceStartPositions // into a 2d vector to facilitate the sequence selection process. - std::vector> inputSeqInfo_; + std::vector> inputSeqInfoVec_; - // the final seleted row indices in a batch, + // the final selected row indices in a batch, // rowIdx_ and selectedRows_ actually share a same memory. IVectorPtr rowIndice_; std::vector selectedRows_; @@ -63,30 +83,13 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::reorganizeSeqInfo( - const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos) { - int* seqStarts = seqStartPos->getMutableData(false); - int* subSeqStarts = subSeqStartPos->getMutableData(false); - - int seqNum = seqStartPos->getSize() - 1; - inputSeqInfo_.resize(seqNum, std::vector()); - int seqIdx = 0; - for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { - inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]); - if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { - seqIdx++; - if (seqIdx == seqNum) return; - inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]); - } - } -} - void SubNestedSequenceLayer::calSelectedCols( const MatrixPtr selectedIndices, - const std::vector> inputSeqInfo) { + const std::vector>& inputSeqInfo) { selectedRows_.clear(); - outSubSeqStartInfo_.resize(1, 0); - outSeqStartInfo_.resize(1, 0); + + std::vector outSeqStartInfo(1, 0); + std::vector outSubSeqStartInfo(1, 0); size_t seqNum = selectedIndices->getHeight(); size_t beamSize = selectedIndices->getWidth(); @@ -94,30 +97,35 @@ void SubNestedSequenceLayer::calSelectedCols( for (size_t j = 0; j < beamSize; ++j) { if (selectedIndices->getElement(i, j) == -1.) break; int selSubSeqIdx = selectedIndices->getElement(i, j); - CHECK_GT(inputSeqInfo_[i].size() - 1, selSubSeqIdx); + CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx); - size_t subSeqLen = - inputSeqInfo_[i][selSubSeqIdx + 1] - inputSeqInfo_[i][selSubSeqIdx]; + size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] - + inputSeqInfoVec_[i][selSubSeqIdx]; for (size_t k = 0; k < subSeqLen; ++k) - selectedRows_.push_back(inputSeqInfo_[i][selSubSeqIdx] + k); - outSubSeqStartInfo_.push_back(outSubSeqStartInfo_.back() + subSeqLen); + selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k); + outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen); } - outSeqStartInfo_.push_back(outSubSeqStartInfo_.back()); + outSeqStartInfo.push_back(outSubSeqStartInfo.back()); } -} -void SubNestedSequenceLayer::buildOutputSeqInfo() { - Argument& output = getOutput(); + if (useGpu_) { + rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); + rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); + } else { + rowIndice_ = + IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); + } + // create the sequence information for the output. ICpuGpuVector::resizeOrCreate( - output.sequenceStartPositions, outSeqStartInfo_.size(), false); - output.sequenceStartPositions->copyFrom( - outSeqStartInfo_.data(), outSeqStartInfo_.size(), false); + output_.sequenceStartPositions, outSeqStartInfo.size(), false); + output_.sequenceStartPositions->copyFrom( + outSeqStartInfo.data(), outSeqStartInfo.size(), false); ICpuGpuVector::resizeOrCreate( - output.subSequenceStartPositions, outSubSeqStartInfo_.size(), false); - output.subSequenceStartPositions->copyFrom( - outSubSeqStartInfo_.data(), outSubSeqStartInfo_.size(), false); + output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false); + output_.subSequenceStartPositions->copyFrom( + outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false); } void SubNestedSequenceLayer::forward(PassType passType) { @@ -131,7 +139,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { if (dynamic_cast(selectedIndices.get())) { /* - * Currently, the second input for this layer generated by + * Currently, the second input for this layer is generated by * kmax_sequence_score_layer whose output is always stored on CPU, * or a data_layer which canbe on GPU. * @@ -149,20 +157,12 @@ void SubNestedSequenceLayer::forward(PassType passType) { selIdsCpu_ = selectedIndices; } - reorganizeSeqInfo(inputSeq.sequenceStartPositions, - inputSeq.subSequenceStartPositions); - calSelectedCols(selIdsCpu_, inputSeqInfo_); - resetOutput(selectedRows_.size(), getSize()); + Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions, + inputSeqInfoVec_); + calSelectedCols(selIdsCpu_, inputSeqInfoVec_); - if (useGpu_) { - rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); - rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size()); - } else { - rowIndice_ = - IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); - } - - buildOutputSeqInfo(); + resetOutput(selectedRows_.size(), getSize()); getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); } diff --git a/paddle/parameter/Argument.cpp b/paddle/parameter/Argument.cpp index ef72b973c1..0547ac93cd 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/parameter/Argument.cpp @@ -666,4 +666,24 @@ void Argument::subArgFrom(const Argument& input, } } +void Argument::reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos, + std::vector>& reorganizedSeqInfo) { + int* seqStarts = seqStartPos->getMutableData(false); + int* subSeqStarts = subSeqStartPos->getMutableData(false); + + int seqNum = seqStartPos->getSize() - 1; + reorganizedSeqInfo.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + reorganizedSeqInfo[seqIdx].push_back(subSeqStarts[i]); + } + } +} + } // namespace paddle diff --git a/paddle/parameter/Argument.h b/paddle/parameter/Argument.h index 0ccdef802e..d8d7a4398f 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/parameter/Argument.h @@ -317,6 +317,30 @@ struct Argument { */ void printValueString(std::ostream& stream, const std::string& prefix = "") const; + + /** + * @brief reorganizeSeqInfo will reorganize sequenceStartPositions and + * subSequenceStartPositions into a 2 dimensional arrary: reorganizedSeqInfo. + * + * @param seqStartPos: sequenceStartPositions of an Argument. + * @param subSeqStartPos: subSequenceStartPositions of an Argument. + * @param the reorganized sequence start position information. + * + * Examples: + * seqStartPos: [0, 4, 15, 20, 28] + * subSeqStartPos: [0, 3, 4, 5, 7, 10, 15, 20, 22, 23, 25, 28] + * reorganizedSeqInfo: + * [ + * [0,3,4], + * [4,5,7,10,15], + * [15,20], + * [20,22,23,25,28] + * ] + */ + static void reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos, + std::vector>& reorganizedSeqInfo); }; } // namespace paddle diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 2bed2b5f45..2c7cebc359 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6097,16 +6097,15 @@ def sub_nested_seq_layer(input, selected_indices, name=None): The sub_nested_seq_layer accepts two inputs: the first one is a nested sequence; the second one is a set of selceted indices in the nested sequence. - Then sub_nest_seq_layer trims the first nested sequence input according to - the selected indices to form a new output. - - This layer is useful in beam training. - + Then sub_nest_seq_layer trims the first nested sequence input according + to the selected indices to form a new output. This layer is useful in + beam training. The example usage is: .. code-block:: python - sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) + + sub_nest_seq = sub_nested_seq_layer(input=[data, selected_indices]) :param input: A nested sequence. @@ -6118,6 +6117,7 @@ def sub_nested_seq_layer(input, selected_indices, name=None): :return: LayerOutput object. :rtype: LayerOutput """ + assert isinstance(input, LayerOutput), ( 'The first input of ' 'sub_nested_seq_layer must be a Paddle layer.') From 73192bb12ac78a546ae04aab26db9107719c535a Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 7 Aug 2017 19:09:34 +0800 Subject: [PATCH 22/44] add a batch norm inference kernel. --- paddle/cuda/CMakeLists.txt | 1 + paddle/cuda/include/hl_batch_norm.h | 50 +++++++++++++ paddle/cuda/src/hl_batch_norm.cu | 68 ++++++++++++++++++ paddle/gserver/layers/CudnnBatchNormLayer.cpp | 37 +++++++--- paddle/gserver/tests/test_BatchNorm.cpp | 70 +++++++++++++++++++ 5 files changed, 216 insertions(+), 10 deletions(-) create mode 100644 paddle/cuda/include/hl_batch_norm.h create mode 100644 paddle/cuda/src/hl_batch_norm.cu diff --git a/paddle/cuda/CMakeLists.txt b/paddle/cuda/CMakeLists.txt index 73ffa690d9..0865b02c4f 100755 --- a/paddle/cuda/CMakeLists.txt +++ b/paddle/cuda/CMakeLists.txt @@ -39,6 +39,7 @@ set(CUDA_CU_SOURCES src/hl_cuda_lstm.cu src/hl_top_k.cu src/hl_batch_transpose.cu + src/hl_batch_norm.cu src/hl_cuda_sequence.cu src/hl_table_apply.cu) diff --git a/paddle/cuda/include/hl_batch_norm.h b/paddle/cuda/include/hl_batch_norm.h new file mode 100644 index 0000000000..e1fea13163 --- /dev/null +++ b/paddle/cuda/include/hl_batch_norm.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifndef HL_BATCH_NORM_H_ +#define HL_BATCH_NORM_H_ + +#include "hl_base.h" + +/** + * @brief batch norm inferece. + * + * @param[in] input input data. + * @param[out] output output data. + * @param[in] scale batch normalization scale parameter (in original + * paper scale is referred to as gamma). + * @param[in] bias batch normalization bias parameter (in original + * paper scale is referred to as beta). + * @param[in] estimatedMean + * @param[in] estimatedVar It is suggested that resultRunningMean, + * resultRunningVariance from the + * cudnnBatchNormalizationForwardTraining call + * accumulated during the training phase are passed + * as inputs here. + * @param[in] epsilon Epsilon value used in the batch + * normalization formula. + */ +extern void hl_batch_norm_cuda_inference(const real* input, + real* output, + const real* scale, + const real* bias, + const real* estimatedMean, + const real* estimatedVar, + const double epsilon, + size_t batchSize, + size_t channel, + size_t height, + size_t width); + +#endif // HL_BATCH_NORM_H_ diff --git a/paddle/cuda/src/hl_batch_norm.cu b/paddle/cuda/src/hl_batch_norm.cu new file mode 100644 index 0000000000..57474ee2f7 --- /dev/null +++ b/paddle/cuda/src/hl_batch_norm.cu @@ -0,0 +1,68 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "hl_batch_norm.h" + +__global__ void batchNormInference(real* output, + const real* input, + const real* scale, + const real* bias, + const real* estimatedMean, + const real* estimatedVar, + const double epsilon, + size_t batchSize, + size_t channel, + size_t height, + size_t width) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + const int num = channel * height * width; + const int batch = blockIdx.y; + for (int i = tid; i < num; i += blockDim.x) { + const int c = (i / (height * width)) % channel; + const int id = batch * num + i; + real val = input[id] - estimatedMean[c]; + val /= sqrt(estimatedVar[c] + epsilon); + val *= scale[c]; + val += bias[c]; + output[id] = val; + } +} + +void hl_batch_norm_cuda_inference(const real* input, + real* output, + const real* scale, + const real* bias, + const real* estimatedMean, + const real* estimatedVar, + const double epsilon, + size_t batchSize, + size_t channel, + size_t height, + size_t width) { + dim3 block(256, 1); + dim3 grid(1, batchSize); + batchNormInference<<>>(output, + input, + scale, + bias, + estimatedMean, + estimatedVar, + epsilon, + batchSize, + channel, + height, + width); + + CHECK_SYNC("hl_batch_norm_cuda_inference failed!"); +} diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index 09dac05a7a..d99b50385e 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "CudnnBatchNormLayer.h" #include "Layer.h" +#include "paddle/cuda/include/hl_batch_norm.h" #include "paddle/utils/Stat.h" namespace paddle { @@ -79,16 +80,32 @@ void CudnnBatchNormLayer::forward(PassType passType) { savedInvVar); } else { // used movingMean and movingVar in testing - hl_batch_norm_forward_inference(ioDesc_, - input, - ioDesc_, - output, - bnParamDesc_, - gamma, - beta, - movingMean, - movingVar, - EPS); + if (batchSize > 1024) { + // when batchSize is larger than 1024, there is a bug + // in cudnn library. + hl_batch_norm_cuda_inference(input, + output, + gamma, + beta, + movingMean, + movingVar, + EPS, + batchSize, + channels_, + imageH_, + imageW_); + } else { + hl_batch_norm_forward_inference(ioDesc_, + input, + ioDesc_, + output, + bnParamDesc_, + gamma, + beta, + movingMean, + movingVar, + EPS); + } } /* activation */ { diff --git a/paddle/gserver/tests/test_BatchNorm.cpp b/paddle/gserver/tests/test_BatchNorm.cpp index 83fcfed46c..659eefa31b 100644 --- a/paddle/gserver/tests/test_BatchNorm.cpp +++ b/paddle/gserver/tests/test_BatchNorm.cpp @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/utils/GlobalConstants.h" #include "LayerGradUtil.h" +#include "paddle/cuda/include/hl_batch_norm.h" +#include "paddle/math/tests/TensorCheck.h" #include "paddle/testing/TestUtil.h" using namespace paddle; // NOLINT @@ -117,6 +119,74 @@ TEST(Layer, batchNorm) { CHECK_EQ(static_cast(convLayer->getOutputValue()->getWidth()), 576); } +#ifndef PADDLE_ONLY_CPU +void batchNormInference(int n, int c, int h, int w) { + MatrixPtr input = std::make_shared(n, c * h * w); + MatrixPtr cudnnOut = std::make_shared(n, c * h * w); + MatrixPtr cudaOut = std::make_shared(n, c * h * w); + MatrixPtr cudnnCheck = std::make_shared(n, c * h * w); + MatrixPtr cudaCheck = std::make_shared(n, c * h * w); + input->randomizeUniform(); + cudnnOut->zeroMem(); + cudaOut->zeroMem(); + + MatrixPtr scale = std::make_shared(1, c); + scale->randomizeUniform(); + MatrixPtr bias = std::make_shared(1, c); + bias->randomizeUniform(); + + MatrixPtr movingMean = std::make_shared(1, c); + movingMean->randomizeUniform(); + + MatrixPtr movingVar = std::make_shared(1, c); + movingVar->randomizeUniform(); + movingVar->clip(0.01, 50); + + hl_tensor_descriptor ioDesc; + hl_tensor_descriptor bnDesc; + hl_create_tensor_descriptor(&ioDesc); + hl_create_tensor_descriptor(&bnDesc); + hl_tensor_reshape(ioDesc, n, c, h, w); + hl_tensor_reshape(bnDesc, 1, c, 1, 1); + + double EPS = 1E-5; + hl_batch_norm_forward_inference(ioDesc, + input->getData(), + ioDesc, + cudnnOut->getData(), + bnDesc, + scale->getData(), + bias->getData(), + movingMean->getData(), + movingVar->getData(), + EPS); + + hl_batch_norm_cuda_inference(input->getData(), + cudaOut->getData(), + scale->getData(), + bias->getData(), + movingMean->getData(), + movingVar->getData(), + EPS, + n, + c, + h, + w); + + cudnnCheck->copyFrom(*cudnnOut); + cudaCheck->copyFrom(*cudaOut); + autotest::TensorCheckErr(*cudnnCheck, *cudaCheck); + + hl_destroy_tensor_descriptor(ioDesc); + hl_destroy_tensor_descriptor(bnDesc); +} + +TEST(BatchNorm, Inference) { + batchNormInference(33, 267, 1, 1); + batchNormInference(19, 105, 4, 4); +} +#endif + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); From bf08e5d985a39f1bb4d9085c042cdc78de8fbecb Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 7 Aug 2017 19:18:40 +0800 Subject: [PATCH 23/44] modify code comments. --- paddle/cuda/include/hl_batch_norm.h | 24 +++++++++---------- paddle/gserver/layers/CudnnBatchNormLayer.cpp | 4 ++-- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/paddle/cuda/include/hl_batch_norm.h b/paddle/cuda/include/hl_batch_norm.h index e1fea13163..afc5e0b2de 100644 --- a/paddle/cuda/include/hl_batch_norm.h +++ b/paddle/cuda/include/hl_batch_norm.h @@ -20,20 +20,18 @@ limitations under the License. */ /** * @brief batch norm inferece. * - * @param[in] input input data. - * @param[out] output output data. - * @param[in] scale batch normalization scale parameter (in original - * paper scale is referred to as gamma). - * @param[in] bias batch normalization bias parameter (in original - * paper scale is referred to as beta). + * @param[in] input input data. + * @param[out] output output data. + * @param[in] scale batch normalization scale parameter (in original + * paper scale is referred to as gamma). + * @param[in] bias batch normalization bias parameter (in original + * paper scale is referred to as beta). * @param[in] estimatedMean - * @param[in] estimatedVar It is suggested that resultRunningMean, - * resultRunningVariance from the - * cudnnBatchNormalizationForwardTraining call - * accumulated during the training phase are passed - * as inputs here. - * @param[in] epsilon Epsilon value used in the batch - * normalization formula. + * @param[in] estimatedVar The moving mean and variance + * accumulated during the training phase are passed + * as inputs here. + * @param[in] epsilon Epsilon value used in the batch + * normalization formula. */ extern void hl_batch_norm_cuda_inference(const real* input, real* output, diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index d99b50385e..cc2cc21cdf 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -81,8 +81,8 @@ void CudnnBatchNormLayer::forward(PassType passType) { } else { // used movingMean and movingVar in testing if (batchSize > 1024) { - // when batchSize is larger than 1024, there is a bug - // in cudnn library. + // there is a bug in cudnn library when the batch size + // is larger than 1024. hl_batch_norm_cuda_inference(input, output, gamma, From da7b9a5eb309d936cf836b5201a71962e895e2c4 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 7 Aug 2017 19:26:10 +0800 Subject: [PATCH 24/44] Remove the warning in hl_batch_norm_forward_inference function. --- paddle/cuda/src/hl_cuda_cudnn.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 7ad8a39768..78642a1744 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -1023,14 +1023,6 @@ void hl_batch_norm_forward_inference(hl_tensor_descriptor inputDesc, real beta = 1.0f; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; - int batch_size = ((cudnn_tensor_descriptor)inputDesc)->batch_size; - if (batch_size > 1024 && g_cudnn_lib_version < 6000) { - LOG(INFO) << " To process current batch data with size " << batch_size - << " (>1024), cudnnBatchNorm requires cuDNN version >= 6000." - << " If there is an error complaining CUDNN_STATUS_NOT_SUPPORTED," - << " just recompile PaddlePaddle with cuDNN >= 6000, replacing" - << " current version " << g_cudnn_lib_version; - } CHECK_CUDNN( dynload::cudnnBatchNormalizationForwardInference(t_resource.cudnn_handle, mode, From 36ac89b9c4ba2662eea633d9bd1d8e492b6b1b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AD=A6=E6=AF=85?= Date: Mon, 7 Aug 2017 19:31:10 +0800 Subject: [PATCH 25/44] Put everything in paddle python wheel package enable pip install paddle (#3102) * put everything in paddle wheel * update * update * fix unitest * with platform specs --- paddle/CMakeLists.txt | 2 -- paddle/api/CMakeLists.txt | 10 ++-------- paddle/scripts/CMakeLists.txt | 14 ++++++-------- paddle/scripts/submit_local.sh.in | 0 paddle/setup.py.in | 32 ------------------------------- python/CMakeLists.txt | 2 +- python/setup.py.in | 29 +++++++++++++++++++++------- 7 files changed, 31 insertions(+), 58 deletions(-) mode change 100644 => 100755 paddle/scripts/submit_local.sh.in delete mode 100644 paddle/setup.py.in diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index f8a88cf317..cf61a243e9 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -22,7 +22,5 @@ if(WITH_C_API) endif() if(WITH_SWIG_PY) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in - ${CMAKE_CURRENT_SOURCE_DIR}/setup.py) add_subdirectory(api) endif() diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 84da89a142..7a1e8b8b26 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -82,9 +82,7 @@ SWIG_LINK_LIBRARIES(swig_paddle add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch dist/.timestamp - COMMAND rm -rf py_paddle.egg-info build + COMMAND ${CMAKE_COMMAND} -E touch .timestamp WORKING_DIRECTORY ${PROJ_ROOT}/paddle DEPENDS _swig_paddle ) @@ -92,10 +90,6 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so # TODO(yuyang18) : make wheel name calculated by cmake add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) -install(DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/dist/ - DESTINATION opt/paddle/share/wheels -) - if(WITH_TESTING) IF(NOT PY_PIP_FOUND) SET(PIP_SOURCES_DIR ${PYTHON_SOURCES_DIR}/pip) @@ -108,7 +102,7 @@ if(WITH_TESTING) BUILD_COMMAND "" INSTALL_COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py install BUILD_IN_SOURCE 1 - DEPENDS python setuptools python_api_wheel + #DEPENDS python setuptools python_api_wheel ) ENDIF() add_subdirectory(test) diff --git a/paddle/scripts/CMakeLists.txt b/paddle/scripts/CMakeLists.txt index 66a46e1883..a52f06fe49 100644 --- a/paddle/scripts/CMakeLists.txt +++ b/paddle/scripts/CMakeLists.txt @@ -1,17 +1,15 @@ configure_file(submit_local.sh.in - submit_local.sh + paddle @ONLY) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/submit_local.sh DESTINATION bin +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle DESTINATION bin PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ - RENAME paddle) + GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) configure_file(tools/usage_stat/usage.sh - usage.sh + paddle_usage @ONLY) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/usage.sh DESTINATION opt/paddle/bin +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/paddle_usage DESTINATION opt/paddle/bin PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ - RENAME paddle_usage) + GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ) diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in old mode 100644 new mode 100755 diff --git a/paddle/setup.py.in b/paddle/setup.py.in deleted file mode 100644 index af107e7672..0000000000 --- a/paddle/setup.py.in +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from setuptools import setup, Extension - -setup(name="py_paddle", - version="${PADDLE_VERSION}", - packages=['py_paddle'], - include_package_data=True, - package_data={'py_paddle':['*.py','_swig_paddle.so']}, - install_requires = [ - 'nltk>=3.2.2', - # We use `numpy.flip` in `test_image.py`. - # `numpy.flip` is introduced in `1.12.0` - 'numpy>=1.12.0', # The numpy is required. - 'protobuf==${PROTOBUF_VERSION}' # The paddle protobuf version - ], - url='http://www.paddlepaddle.org/', - license='Apache 2.0', -) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 0171f9d8cc..b5030da8e7 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -39,7 +39,7 @@ add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS - ${OUTPUT_DIR}/.timestamp) + ${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) diff --git a/python/setup.py.in b/python/setup.py.in index 7808238aa6..38f0a503be 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,4 +1,8 @@ -from setuptools import setup +from setuptools import setup, Distribution + +class BinaryDistribution(Distribution): + def has_ext_modules(foo): + return True packages=['paddle', 'paddle.proto', @@ -11,7 +15,8 @@ packages=['paddle', 'paddle.v2.master', 'paddle.v2.plot', 'paddle.v2.framework', - 'paddle.v2.framework.proto'] + 'paddle.v2.framework.proto', + 'py_paddle'] setup_requires=["requests", "numpy>=1.12", @@ -21,23 +26,33 @@ setup_requires=["requests", "rarfile", "scipy>=0.19.0", "Pillow", - "nltk"] + "nltk>=3.2.2"] if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: setup_requires+=["opencv-python"] -setup(name='paddle', +setup(name='paddlepaddle', version='${PADDLE_VERSION}', description='Parallel Distributed Deep Learning', install_requires=setup_requires, packages=packages, - package_data={'paddle.v2.master': ['libpaddle_master.so'], - 'paddle.v2.framework': ['core.so'] + package_data={ + 'paddle.v2.master': ['libpaddle_master.so'], + 'paddle.v2.framework': ['core.so'], + 'py_paddle':['*.py','_swig_paddle.so'] }, package_dir={ '': '${CMAKE_CURRENT_SOURCE_DIR}', # The paddle.v2.framework.proto will be generated while compiling. # So that package points to other directory. - 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework' + 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework', + 'py_paddle': '${PROJ_ROOT}/paddle/py_paddle' }, + scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'], + distclass=BinaryDistribution, + data_files=[('/usr/local/opt/paddle/bin', + ['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage', + '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer', + '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model', + '${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])] ) From 493396d81cbcd1e29b5ea6c3aa11cfa20496b773 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Mon, 7 Aug 2017 19:50:37 +0800 Subject: [PATCH 26/44] add support_gpu (#3304) * add support_gpu * fix allclose * fix name error and symplify code --- paddle/framework/op_registry.h | 6 ++++++ paddle/framework/pybind.cc | 2 ++ python/paddle/v2/framework/tests/op_test_util.py | 6 ++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/paddle/framework/op_registry.h b/paddle/framework/op_registry.h index 6c26183818..b2813da83d 100644 --- a/paddle/framework/op_registry.h +++ b/paddle/framework/op_registry.h @@ -260,6 +260,12 @@ class OpRegistry { return CreateOp(op_desc.type(), inputs, outputs, attrs); } + static bool SupportGPU(const std::string& op_type) { + OperatorWithKernel::OpKernelKey key; + key.place_ = platform::GPUPlace(); + return OperatorWithKernel::AllOpKernels().at(op_type).count(key) != 0; + } + static std::shared_ptr CreateGradOp(const OperatorBase& op) { PADDLE_ENFORCE(!op.IsNetOp(), "Use framework::Backward to get backward ops"); diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index cbb86c4195..d4ac8fda54 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -200,6 +200,8 @@ All parameter, weight, gradient are variables in Paddle. return OpRegistry::CreateOp(desc); }); + operator_base.def_static("support_gpu", &OpRegistry::SupportGPU); + operator_base.def("backward", [](const OperatorBase &forwardOp, const std::unordered_set &no_grad_vars) { diff --git a/python/paddle/v2/framework/tests/op_test_util.py b/python/paddle/v2/framework/tests/op_test_util.py index e6bc7d8a9b..636828064f 100644 --- a/python/paddle/v2/framework/tests/op_test_util.py +++ b/python/paddle/v2/framework/tests/op_test_util.py @@ -28,7 +28,7 @@ class OpTestMeta(type): kwargs = dict() places = [] places.append(core.CPUPlace()) - if core.is_compile_gpu(): + if core.is_compile_gpu() and core.Operator.support_gpu(self.type): places.append(core.GPUPlace(0)) for place in places: @@ -66,7 +66,9 @@ class OpTestMeta(type): for out_name in func.all_output_args: actual = numpy.array(scope.find_var(out_name).get_tensor()) expect = self.outputs[out_name] - numpy.isclose(actual, expect) + self.assertTrue( + numpy.allclose(actual, expect), + "output name: " + out_name + "has diff") obj.test_all = test_all return obj From 7da1db053bc14f3c3f96ba3bae36519f679abcb4 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Mon, 7 Aug 2017 20:27:08 +0800 Subject: [PATCH 27/44] update cuda kernel. --- paddle/cuda/src/hl_batch_norm.cu | 30 +++++++++---------- paddle/gserver/layers/CudnnBatchNormLayer.cpp | 29 +++++++++--------- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/paddle/cuda/src/hl_batch_norm.cu b/paddle/cuda/src/hl_batch_norm.cu index 57474ee2f7..5828ecb8e0 100644 --- a/paddle/cuda/src/hl_batch_norm.cu +++ b/paddle/cuda/src/hl_batch_norm.cu @@ -25,11 +25,11 @@ __global__ void batchNormInference(real* output, size_t channel, size_t height, size_t width) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; + const int tid = threadIdx.x; const int num = channel * height * width; - const int batch = blockIdx.y; + const int batch = blockIdx.x; for (int i = tid; i < num; i += blockDim.x) { - const int c = (i / (height * width)) % channel; + const int c = i / (height * width); const int id = batch * num + i; real val = input[id] - estimatedMean[c]; val /= sqrt(estimatedVar[c] + epsilon); @@ -50,19 +50,17 @@ void hl_batch_norm_cuda_inference(const real* input, size_t channel, size_t height, size_t width) { - dim3 block(256, 1); - dim3 grid(1, batchSize); - batchNormInference<<>>(output, - input, - scale, - bias, - estimatedMean, - estimatedVar, - epsilon, - batchSize, - channel, - height, - width); + batchNormInference<<>>(output, + input, + scale, + bias, + estimatedMean, + estimatedVar, + epsilon, + batchSize, + channel, + height, + width); CHECK_SYNC("hl_batch_norm_cuda_inference failed!"); } diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index cc2cc21cdf..44ba2c4b7d 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -80,9 +80,21 @@ void CudnnBatchNormLayer::forward(PassType passType) { savedInvVar); } else { // used movingMean and movingVar in testing - if (batchSize > 1024) { - // there is a bug in cudnn library when the batch size - // is larger than 1024. + if (batchSize <= 1024) { + hl_batch_norm_forward_inference(ioDesc_, + input, + ioDesc_, + output, + bnParamDesc_, + gamma, + beta, + movingMean, + movingVar, + EPS); + } else { + // There is a limitation in cudnn library. + // When the batch size is larger than 1024 in cuDNN v5.1, + // the cudnnBatchNormalizationForwardInference will fail. hl_batch_norm_cuda_inference(input, output, gamma, @@ -94,17 +106,6 @@ void CudnnBatchNormLayer::forward(PassType passType) { channels_, imageH_, imageW_); - } else { - hl_batch_norm_forward_inference(ioDesc_, - input, - ioDesc_, - output, - bnParamDesc_, - gamma, - beta, - movingMean, - movingVar, - EPS); } } From 4bbd05fd724a650d32f2b7e842c9fcd55032722a Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Mon, 7 Aug 2017 20:42:43 +0800 Subject: [PATCH 28/44] check INFINITY in cross_entropy (#3287) * check INFINITY in cross_entropy * fix error * use onehot_cross_entropy without GPU kernel * add support_gpu * fix allclose * fix name error and symplify code --- paddle/framework/pybind.cc | 2 +- paddle/operators/cross_entropy_op.cu | 3 --- paddle/operators/cross_entropy_op.h | 29 +++++++++++++++++++++------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/paddle/framework/pybind.cc b/paddle/framework/pybind.cc index d4ac8fda54..b189e6f9e8 100644 --- a/paddle/framework/pybind.cc +++ b/paddle/framework/pybind.cc @@ -32,7 +32,7 @@ limitations under the License. */ namespace py = pybind11; USE_OP(add_two); -USE_OP(onehot_cross_entropy); +USE_OP_CPU(onehot_cross_entropy); USE_OP_WITHOUT_KERNEL(fc); USE_OP(sgd); USE_OP(mul); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index 2f453f8379..ec73721a81 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -14,6 +14,3 @@ #define EIGEN_USE_GPU #include "paddle/operators/cross_entropy_op.h" - -REGISTER_OP_GPU_KERNEL(onehot_cross_entropy, - ops::OnehotCrossEntropyOpKernel); diff --git a/paddle/operators/cross_entropy_op.h b/paddle/operators/cross_entropy_op.h index 88d06e1346..e02e3e2945 100644 --- a/paddle/operators/cross_entropy_op.h +++ b/paddle/operators/cross_entropy_op.h @@ -18,7 +18,24 @@ limitations under the License. */ namespace paddle { namespace operators { -static const float kCrossEntropyLogThreshold{1e-20}; +template +T tolerable_value(T x) { + static_assert(std::is_floating_point::value, + "tolerable_value works only on float, " + "double and double double."); + + const T kApproInf = 1e20; + + if (x == INFINITY) { + return kApproInf; + } + + if (x == -INFINITY) { + return -kApproInf; + } + + return x; +} template class OnehotCrossEntropyOpKernel : public OpKernel { @@ -36,10 +53,9 @@ class OnehotCrossEntropyOpKernel : public OpKernel { int batch_size = X->dims()[0]; int class_num = X->dims()[1]; - // Y[i] = -log(X[i][j]) for (int i = 0; i < batch_size; ++i) { - Ydata[i] = -std::log(std::max(Xdata[i * class_num + label_data[i]], - kCrossEntropyLogThreshold)); + int index = i * class_num + label_data[i]; + Ydata[i] = -tolerable_value(std::log(Xdata[index])); } } }; @@ -62,9 +78,8 @@ class OnehotCrossEntropyGradientOpKernel : public OpKernel { const int class_num = X->dims()[1]; for (int i = 0; i < batch_size; ++i) { - dXdata[i * class_num + label_data[i]] = - -dYdata[i] / std::max(Xdata[i * class_num + label_data[i]], - kCrossEntropyLogThreshold); + int index = i * class_num + label_data[i]; + dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]); } } }; From 28476676202865ef762b8469cf44cce3f38a9b17 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 7 Aug 2017 11:09:36 -0700 Subject: [PATCH 29/44] fix bug --- paddle/operators/fill_zeros_like_op.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index 44886f12bf..dfaed2c9aa 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -24,7 +24,8 @@ class FillZerosLikeKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* output = context.Output(0); output->mutable_data(context.GetPlace()); - framework::EigenVector::Flatten(*output).setZero(); + auto t = framework::EigenVector::Flatten(*output); + t.device(context.GetEigenDevice()) = t.constant(T(0)); } }; From af2040bc7e58fcf71c98856b9c08ea2eb15d244c Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 7 Aug 2017 11:12:53 -0700 Subject: [PATCH 30/44] Change git tag of any --- cmake/external/any.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/external/any.cmake b/cmake/external/any.cmake index 5d2f7219b2..85cce80b70 100644 --- a/cmake/external/any.cmake +++ b/cmake/external/any.cmake @@ -8,7 +8,7 @@ ExternalProject_Add( extern_lib_any ${EXTERNAL_PROJECT_LOG_ARGS} GIT_REPOSITORY "https://github.com/PaddlePaddle/any.git" - GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020" + GIT_TAG "15595d8324be9e8a9a80d9ae442fdd12bd66df5d" PREFIX ${ANY_SOURCE_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" From e7b3e139594d2dfc6d195614acc30b340d40fd88 Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 7 Aug 2017 11:33:27 -0700 Subject: [PATCH 31/44] Rewrite test based on new python test format --- python/paddle/v2/framework/tests/test_fill_zeros_like_op.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py index 6e496f1be9..e5c862605f 100644 --- a/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py +++ b/python/paddle/v2/framework/tests/test_fill_zeros_like_op.py @@ -8,8 +8,8 @@ class TestFillZerosLikeOp(unittest.TestCase): def setUp(self): self.type = "fill_zeros_like" - self.Src = numpy.random.random((219, 232)).astype("float32") - self.Dst = numpy.zeros_like(self.Src) + self.inputs = {'Src': numpy.random.random((219, 232)).astype("float32")} + self.outputs = {'Dst': numpy.zeros_like(self.inputs['Src'])} if __name__ == '__main__': From 4b321446dcf613cfdc844ff5689143fe5eac5e40 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 7 Aug 2017 11:40:27 -0700 Subject: [PATCH 32/44] Remove wget progress info from downloading MKLML --- cmake/external/mklml.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 17a1ca4ed0..82714d6c14 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -17,7 +17,7 @@ IF(NOT ${WITH_MKLML}) ENDIF(NOT ${WITH_MKLML}) IF(WIN32 OR APPLE) - MESSAGE(WARNING + MESSAGE(WARNING "Windows or Mac is not supported with MKLML in Paddle yet." "Force WITH_MKLML=OFF") SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) @@ -54,11 +54,11 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${MKLML_SOURCE_DIR} DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} + DOWNLOAD_COMMAND wget --quiet --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} && tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} ) From 2ee418db78c93733a7a2cd5e21f24c2573993dc8 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 3 Aug 2017 23:14:57 +0000 Subject: [PATCH 33/44] fix pserver save / load checkpoint --- go/cmd/pserver/pserver.go | 4 +- go/glide.lock | 6 +- go/glide.yaml | 8 +- go/pserver/etcd_client.go | 10 ++- go/pserver/optimizer.go | 2 + go/pserver/service.go | 154 ++++++++++++++++++++++++------------- go/pserver/service_test.go | 8 +- 7 files changed, 126 insertions(+), 66 deletions(-) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index f9cd8f87e8..cfbfcde3e5 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -60,12 +60,12 @@ func main() { idx, err = e.Register(*port) candy.Must(err) - cp, err = pserver.NewCheckpointFromFile(*checkpointPath, idx, e) + cp, err = pserver.LoadCheckpoint(e, idx) if err != nil { if err == pserver.ErrCheckpointNotFound { log.Infof("Could not find the pserver checkpoint.") } else { - log.Errorf("Fetch checkpoint failed, %s", err) + panic(err) } } } diff --git a/go/glide.lock b/go/glide.lock index 1f16abdf66..be1fb24d77 100644 --- a/go/glide.lock +++ b/go/glide.lock @@ -1,5 +1,5 @@ -hash: 2a1c0eca5c07a130e3d224f9821f96cfa37a39bf6bce141c855bbc57ef569f1c -updated: 2017-07-29T07:34:48.722757905+08:00 +hash: 1b9b07408ca7fac27a374dc2ccd2433e4bff090484008a037df967284949a582 +updated: 2017-08-03T21:46:51.744995189Z imports: - name: github.com/beorn7/perks version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 @@ -145,6 +145,8 @@ imports: version: a1dba9ce8baed984a2495b658c82687f8157b98f subpackages: - xfs +- name: github.com/satori/go.uuid + version: 879c5887cd475cd7864858769793b2ceb0d44feb - name: github.com/sirupsen/logrus version: a3f95b5c423586578a4e099b11a46c2479628cac - name: github.com/topicai/candy diff --git a/go/glide.yaml b/go/glide.yaml index bc23fa6ebf..a90e71b615 100644 --- a/go/glide.yaml +++ b/go/glide.yaml @@ -14,11 +14,13 @@ import: version: ^1.0.0 - package: github.com/topicai/candy - package: golang.org/x/crypto - vcs: git repo: https://github.com/golang/crypto.git -- package: golang.org/x/sys vcs: git +- package: golang.org/x/sys repo: https://github.com/golang/sys.git -- package: golang.org/x/text vcs: git +- package: golang.org/x/text repo: https://github.com/golang/text.git + vcs: git +- package: github.com/satori/go.uuid + version: v1.1.0 diff --git a/go/pserver/etcd_client.go b/go/pserver/etcd_client.go index 4fb2630766..41f0640fc0 100644 --- a/go/pserver/etcd_client.go +++ b/go/pserver/etcd_client.go @@ -206,6 +206,7 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) { if err != nil { return []byte{}, err } + kvs := resp.Kvs if len(kvs) == 0 { return []byte{}, nil @@ -215,9 +216,14 @@ func (e *EtcdClient) GetKey(key string, timeout time.Duration) ([]byte, error) { } // PutKey put into etcd with value by key specified -func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration) error { +func (e *EtcdClient) PutKey(key string, value []byte, timeout time.Duration, withLease bool) error { ctx, cancel := context.WithTimeout(context.Background(), timeout) - _, err := e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease())) + var err error + if withLease { + _, err = e.client.Put(ctx, key, string(value), clientv3.WithLease(e.sess.Lease())) + } else { + _, err = e.client.Put(ctx, key, string(value)) + } cancel() return err } diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index 709160d45d..ae73590734 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -32,6 +32,7 @@ type optimizer struct { opt *C.struct_paddle_optimizer elementType ElementType contentLen int + config []byte } func cArrayToSlice(p unsafe.Pointer, len int) []byte { @@ -70,6 +71,7 @@ func newOptimizer(paramWithConfigs ParameterWithConfig, State []byte) *optimizer cstate = unsafe.Pointer(&s[0]) } + o.config = c o.opt = C.paddle_create_optimizer((*C.uchar)(&c[0]), C.int(len(c)), C.paddle_element_type(p.ElementType), cbuffer, C.int(paramBufferSize), (*C.char)(cstate), C.int(len(s))) return o diff --git a/go/pserver/service.go b/go/pserver/service.go index 7d297c46d0..25751540a9 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -25,11 +25,13 @@ import ( "fmt" "io/ioutil" "os" - "path/filepath" + "path" "strconv" "sync" "time" + uuid "github.com/satori/go.uuid" + log "github.com/sirupsen/logrus" ) @@ -42,9 +44,9 @@ var ErrCheckpointNotFound = errors.New("checkpoint not found") // RPC error message. const ( - AlreadyInitialized = "pserver already initialized" - Uninitialized = "pserver not fully initialized" - CheckpointMD5Failed = "checkpoint file MD5 validation failed" + AlreadyInitialized = "pserver already initialized" + Uninitialized = "pserver not fully initialized" + WrongChecksum = "checkpoint file checksum validation failed" ) // Supported element types. @@ -73,11 +75,12 @@ type ParameterWithConfig struct { // checkpointMeta saves checkpoint metadata type checkpointMeta struct { UUID string `json:"uuid"` + Path string `json:"path"` MD5 string `json:"md5"` Timestamp int64 `json:"timestamp"` } -// Checkpoint is the pserver shard persist in file +// Checkpoint is the pserver shard persist in file. type Checkpoint []parameterCheckpoint // Gradient is the gradient of the parameter. @@ -90,50 +93,58 @@ type Service struct { checkpointInterval time.Duration checkpointPath string client *EtcdClient - mu sync.Mutex - optMap map[string]*optimizer + + mu sync.Mutex + optMap map[string]*optimizer } -// parameterCheckpoint saves parameter checkpoint +// parameterCheckpoint saves parameter checkpoint. type parameterCheckpoint struct { ParameterWithConfig State []byte } -// NewCheckpointFromFile loads parameters and state from checkpoint file -func NewCheckpointFromFile(cpPath string, idx int, e *EtcdClient) (Checkpoint, error) { - v, err := e.GetKey(PsPath+string(idx), 3*time.Second) +func loadMeta(e *EtcdClient, idx int) (meta checkpointMeta, err error) { + v, err := e.GetKey(PsCheckpoint+strconv.Itoa(idx), 3*time.Second) if err != nil { - return nil, err + return } if len(v) == 0 { - return nil, ErrCheckpointNotFound + err = ErrCheckpointNotFound + return } - var cpMeta checkpointMeta - if err = json.Unmarshal(v, &cpMeta); err != nil { - return nil, err + if err = json.Unmarshal(v, &meta); err != nil { + return } - fn := filepath.Join(cpPath, cpMeta.UUID) - if _, err = os.Stat(fn); os.IsNotExist(err) { + return +} + +// LoadCheckpoint loads checkpoint from file. +func LoadCheckpoint(e *EtcdClient, idx int) (Checkpoint, error) { + cpMeta, err := loadMeta(e, idx) + if err != nil { return nil, err } - content, err := ioutil.ReadFile(fn) + + content, err := ioutil.ReadFile(cpMeta.Path) if err != nil { return nil, err } + // TODO(helin): change MD5 to CRC since CRC is better for file + // checksum in our use case (emphasize speed over security). h := md5.New() md5 := hex.EncodeToString(h.Sum(content)) if md5 != cpMeta.MD5 { - return nil, errors.New(CheckpointMD5Failed) + return nil, errors.New(WrongChecksum) } dec := gob.NewDecoder(bytes.NewReader(content)) - cp := Checkpoint{} - if err = dec.Decode(cp); err != nil { + var cp Checkpoint + if err = dec.Decode(&cp); err != nil { return nil, err } return cp, nil @@ -193,6 +204,15 @@ func (s *Service) FinishInitParams(_ int, _ *int) error { } close(s.initialized) + go func() { + t := time.Tick(s.checkpointInterval) + for range t { + err := s.checkpoint() + if err != nil { + log.Errorln(err) + } + } + }() return nil } @@ -240,23 +260,36 @@ func (s *Service) GetParam(name string, parameter *Parameter) error { return nil } -// pserver save checkpoint -func (s *Service) doCheckpoint() (err error) { - <-s.initialized - s.mu.Lock() - defer s.mu.Unlock() +func traceTime(start time.Time, name string) { + elapsed := time.Since(start) + log.Infof("%s took %v", name, elapsed) +} + +// checkpoint saves checkpoint to disk. +// +// checkpoint should be only called after the parameters are +// initialized. +func (s *Service) checkpoint() (err error) { + log.Infoln("Begin save checkpoint.") + defer traceTime(time.Now(), "save checkpoint") + s.mu.Lock() cp := make([]parameterCheckpoint, len(s.optMap)) index := 0 + // TODO(helin): write checkpoint incrementally to reduce memory + // footprint during checkpoint. for name, opt := range s.optMap { var pc parameterCheckpoint pc.Param.Name = name pc.Param.ElementType = opt.elementType pc.Param.Content = opt.GetWeights() + pc.Config = opt.config pc.State = opt.GetStates() cp[index] = pc index++ } + s.mu.Unlock() + var buf bytes.Buffer encoder := gob.NewEncoder(&buf) err = encoder.Encode(cp) @@ -264,32 +297,9 @@ func (s *Service) doCheckpoint() (err error) { return } - cpMeta := checkpointMeta{} - cpMeta.UUID = s.checkpointPath + strconv.Itoa(s.idx) - cpMeta.Timestamp = time.Now().UnixNano() - h := md5.New() - cpMeta.MD5 = hex.EncodeToString(h.Sum(buf.Bytes())) - - cpMetajson, err := json.Marshal(cpMeta) - if err != nil { - return - } - - err = s.client.PutKey(filepath.Join(PsCheckpoint, strconv.Itoa(s.idx)), cpMetajson, 3*time.Second) - if err != nil { - return - } - if _, err = os.Stat(cpMeta.UUID); os.IsNotExist(err) { - log.Info("checkpoint does not exists.") - } else { - err = os.Remove(cpMeta.UUID) - if err != nil { - log.Infof("Removing checkpoint %s failed", cpMeta.UUID) - } else { - log.Infof("checkpoint %s already exsits, removing ", cpMeta.UUID) - } - } - f, err := os.Create(cpMeta.UUID) + id := uuid.NewV4().String() + p := path.Join(s.checkpointPath, id) + f, err := os.Create(p) if err != nil { return } @@ -317,5 +327,43 @@ func (s *Service) doCheckpoint() (err error) { return } + oldMeta, err := loadMeta(s.client, s.idx) + if err == ErrCheckpointNotFound { + log.Infoln("Do not have existing checkpoint.") + err = nil + } + + if err != nil { + return + } + + h := md5.New() + md5 := hex.EncodeToString(h.Sum(buf.Bytes())) + cpMeta := checkpointMeta{ + UUID: id, + Timestamp: time.Now().UnixNano(), + MD5: md5, + Path: p, + } + + json, err := json.Marshal(cpMeta) + if err != nil { + return + } + + err = s.client.PutKey(PsCheckpoint+strconv.Itoa(s.idx), json, 3*time.Second, false) + if err != nil { + return + } + + if oldMeta.Path != "" { + rmErr := os.Remove(oldMeta.Path) + if rmErr != nil { + // log error, but still treat checkpoint as + // successful. + log.Errorln(rmErr) + } + } + return } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index 988f3b5acb..e686875bf7 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -30,7 +30,7 @@ const ( func TestServiceFull(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, 1, "", nil, cp) + s, err := pserver.NewService(0, time.Second, "", nil, cp) if err != nil { t.Error(err) } @@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) { func TestMultipleInit(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, 1, "", nil, cp) + s, err := pserver.NewService(0, time.Second, "", nil, cp) if err != nil { t.Fatal(err) } @@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) { func TestUninitialized(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, 1, "", nil, cp) + s, err := pserver.NewService(0, time.Second, "", nil, cp) err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.Fatal(err) @@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, 1, "", nil, cp) + s, err := pserver.NewService(0, time.Second, "", nil, cp) if err != nil { t.Error(err) } From 5ce7703ce859a12ff9efa418ed6440f7b81d767e Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 4 Aug 2017 00:30:11 +0000 Subject: [PATCH 34/44] fix test by not triggering save checkpoint when not intended(change save duration to 1 hour) --- go/pserver/client/client_test.go | 2 +- go/pserver/service_test.go | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/go/pserver/client/client_test.go b/go/pserver/client/client_test.go index b630d434dc..1243ebd683 100644 --- a/go/pserver/client/client_test.go +++ b/go/pserver/client/client_test.go @@ -59,7 +59,7 @@ func initClient() [numPserver]int { go func(l net.Listener) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, 1, "", nil, cp) + s, err := pserver.NewService(0, time.Hour, "", nil, cp) if err != nil { panic(err) } diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index e686875bf7..be648cd1e8 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -30,7 +30,7 @@ const ( func TestServiceFull(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, time.Second, "", nil, cp) + s, err := pserver.NewService(0, time.Hour, "", nil, cp) if err != nil { t.Error(err) } @@ -102,7 +102,7 @@ func TestServiceFull(t *testing.T) { func TestMultipleInit(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, time.Second, "", nil, cp) + s, err := pserver.NewService(0, time.Hour, "", nil, cp) if err != nil { t.Fatal(err) } @@ -119,7 +119,7 @@ func TestMultipleInit(t *testing.T) { func TestUninitialized(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, time.Second, "", nil, cp) + s, err := pserver.NewService(0, time.Hour, "", nil, cp) err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.Fatal(err) @@ -128,7 +128,7 @@ func TestUninitialized(t *testing.T) { func TestBlockUntilInitialized(t *testing.T) { var cp pserver.Checkpoint - s, err := pserver.NewService(0, time.Second, "", nil, cp) + s, err := pserver.NewService(0, time.Hour, "", nil, cp) if err != nil { t.Error(err) } From 33fb8d7abf18d899345505a74f885d6664ea265e Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 4 Aug 2017 18:11:33 +0000 Subject: [PATCH 35/44] fix according to comments --- go/cmd/pserver/pserver.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index cfbfcde3e5..bec5775d54 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -32,7 +32,7 @@ import ( func main() { port := flag.Int("port", 0, "port of the pserver") - index := flag.Int("index", -1, "index of this pserver, should be larger or equal than 0") + index := flag.Int("index", -1, "index of the pserver, set to -1 if use etcd for auto pserver index registry") etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", "comma separated endpoint string for pserver to connect to etcd") dialTimeout := flag.Duration("dial-timeout", 5*time.Second, "dial timeout") From 10794cf4de2c8764df04f22d7ad973cfcee75364 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 4 Aug 2017 00:17:16 +0000 Subject: [PATCH 36/44] Master persist more states to etcd, schedule pending timeout after load pending state. --- go/master/service.go | 91 +++++++++++++++++--------------- go/pserver/client/etcd_client.go | 2 +- 2 files changed, 50 insertions(+), 43 deletions(-) diff --git a/go/master/service.go b/go/master/service.go index d30e9a3322..f072dd786c 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -77,11 +77,13 @@ type taskEntry struct { NumFailure int } -type taskQueues struct { - Todo []taskEntry - Pending map[int]taskEntry // map from task ID to task entry - Done []taskEntry - Failed []taskEntry +type masterState struct { + Todo []taskEntry + Pending map[int]taskEntry // map from task ID to task entry + Done []taskEntry + Failed []taskEntry + CurPass int + JobTasks []taskEntry } // Service is the master server service. @@ -94,11 +96,11 @@ type Service struct { ready chan struct{} initDone bool - mu sync.Mutex - taskQueues taskQueues - currPass int - jobTasks []taskEntry - + mu sync.Mutex + // State to be persisted to snapshot. + state masterState + // The trainer that is currently saving model. This state is + // transient, does not need to be persisted to snapshot. savingTrainer string } @@ -141,8 +143,8 @@ func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, failur s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur s.failureMax = failureMax - s.taskQueues = taskQueues{} - s.taskQueues.Pending = make(map[int]taskEntry) + s.state = masterState{} + s.state.Pending = make(map[int]taskEntry) s.ready = make(chan struct{}) s.store = store recovered, err := s.recover() @@ -180,7 +182,7 @@ func (s *Service) recover() (bool, error) { } dec := gob.NewDecoder(gr) - var tqs taskQueues + var tqs masterState err = dec.Decode(&tqs) if err != nil { return false, err @@ -193,7 +195,12 @@ func (s *Service) recover() (bool, error) { log.Errorln(err) } - s.taskQueues = tqs + s.state = tqs + log.WithFields(s.logFields()).Infof("Master recovered from snapshot, scheduling pending task timeout check.") + for _, t := range s.state.Pending { + time.AfterFunc(s.timeoutDur, s.checkTimeoutFunc(t.Task.Meta.ID, t.Task.Meta.Epoch)) + } + return true, nil } @@ -208,7 +215,7 @@ func (s *Service) snapshot() error { var buf bytes.Buffer gw := gzip.NewWriter(&buf) enc := gob.NewEncoder(gw) - err := enc.Encode(s.taskQueues) + err := enc.Encode(s.state) if err != nil { return err } @@ -290,8 +297,8 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error { return err } - s.jobTasks = partition(chunks, s.chunksPerTask) - s.taskQueues.Todo = s.jobTasks + s.state.JobTasks = partition(chunks, s.chunksPerTask) + s.state.Todo = s.state.JobTasks err = s.snapshot() if err != nil { @@ -319,17 +326,17 @@ func (s *Service) processFailedTask(t taskEntry, epoch int) { } }() - delete(s.taskQueues.Pending, t.Task.Meta.ID) + delete(s.state.Pending, t.Task.Meta.ID) t.NumFailure++ if t.NumFailure > s.failureMax { log.Warningf("Task %v failed %d times, discard.", t.Task, t.NumFailure) - s.taskQueues.Failed = append(s.taskQueues.Failed, t) + s.state.Failed = append(s.state.Failed, t) return } log.Warningf("Task %v failed %d times, re-dispatch.", t.Task, t.NumFailure) - s.taskQueues.Todo = append(s.taskQueues.Todo, t) + s.state.Todo = append(s.state.Todo, t) return } @@ -338,7 +345,7 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { s.mu.Lock() defer s.mu.Unlock() - t, ok := s.taskQueues.Pending[taskID] + t, ok := s.state.Pending[taskID] if !ok { return } @@ -350,10 +357,10 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { // must be called with lock held. func (s *Service) logFields() log.Fields { return log.Fields{ - "todoLen": len(s.taskQueues.Todo), - "pendingLen": len(s.taskQueues.Pending), - "doneLen": len(s.taskQueues.Done), - "failedLen": len(s.taskQueues.Failed), + "todoLen": len(s.state.Todo), + "pendingLen": len(s.state.Pending), + "doneLen": len(s.state.Done), + "failedLen": len(s.state.Failed), } } @@ -366,17 +373,17 @@ func (s *Service) GetTask(passID int, task *Task) error { s.mu.Lock() defer s.mu.Unlock() - if passID < s.currPass { + if passID < s.state.CurPass { return ErrPassBefore } - if passID > s.currPass { + if passID > s.state.CurPass { // Client may get run to pass after master when one client faster than the // other return ErrPassAfter } - if len(s.taskQueues.Todo) == 0 { - if len(s.taskQueues.Done) == 0 && len(s.taskQueues.Pending) == 0 { + if len(s.state.Todo) == 0 { + if len(s.state.Done) == 0 && len(s.state.Pending) == 0 { log.WithFields(s.logFields()).Warningln("All tasks failed, may start next pass") return ErrAllTaskFailed } @@ -384,10 +391,10 @@ func (s *Service) GetTask(passID int, task *Task) error { return ErrNoMoreAvailable } - t := s.taskQueues.Todo[0] + t := s.state.Todo[0] t.Task.Meta.Epoch++ - s.taskQueues.Todo = s.taskQueues.Todo[1:] - s.taskQueues.Pending[t.Task.Meta.ID] = t + s.state.Todo = s.state.Todo[1:] + s.state.Pending[t.Task.Meta.ID] = t err := s.snapshot() if err != nil { return err @@ -409,7 +416,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { s.mu.Lock() defer s.mu.Unlock() - t, ok := s.taskQueues.Pending[taskID] + t, ok := s.state.Pending[taskID] if !ok { log.WithFields(s.logFields()).Warningln("Pending task #%d not found.", taskID) return nil @@ -417,18 +424,18 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { // task finished, reset timeout t.NumFailure = 0 - s.taskQueues.Done = append(s.taskQueues.Done, t) - delete(s.taskQueues.Pending, taskID) + s.state.Done = append(s.state.Done, t) + delete(s.state.Pending, taskID) log.WithFields(s.logFields()).Infof("Task #%d finished.", taskID) - if len(s.taskQueues.Todo) == 0 && len(s.taskQueues.Pending) == 0 { + if len(s.state.Todo) == 0 && len(s.state.Pending) == 0 { // increase master side pass count if all tasks finished - s.currPass++ - s.taskQueues.Todo = s.jobTasks - s.taskQueues.Done = []taskEntry{} + s.state.CurPass++ + s.state.Todo = s.state.JobTasks + s.state.Done = []taskEntry{} // TODO(typhoonzero): deal with failed tasks - s.taskQueues.Failed = []taskEntry{} - log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.currPass) + s.state.Failed = []taskEntry{} + log.WithFields(s.logFields()).Warningf("all task finished, add new pass data, newpass: %d.", s.state.CurPass) } err := s.snapshot() @@ -447,7 +454,7 @@ func (s *Service) TaskFailed(meta TaskMeta, dummy *int) error { s.mu.Lock() defer s.mu.Unlock() - t, ok := s.taskQueues.Pending[meta.ID] + t, ok := s.state.Pending[meta.ID] if !ok { log.WithFields(s.logFields()).Warningln("TaskFailed:Pending task #%v not found.", t.Task.Meta) return nil diff --git a/go/pserver/client/etcd_client.go b/go/pserver/client/etcd_client.go index b6ff1fec8a..977ae5af37 100644 --- a/go/pserver/client/etcd_client.go +++ b/go/pserver/client/etcd_client.go @@ -103,7 +103,7 @@ func (p *EtcdClient) List() []Server { time.Sleep(p.timeout) continue } - log.Infof("got value (%s) for key: %s", psAddr, psKey) + log.Debugf("got value (%s) for key: %s", psAddr, psKey) servers[i].Index = i servers[i].Addr = psAddr } From 01a62511b484c74a0f3f64f8a7686af93f637b02 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Mon, 7 Aug 2017 19:38:02 +0000 Subject: [PATCH 37/44] add curPass into log, remove JobTasks --- go/master/service.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/go/master/service.go b/go/master/service.go index f072dd786c..df7c6860e6 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -78,12 +78,11 @@ type taskEntry struct { } type masterState struct { - Todo []taskEntry - Pending map[int]taskEntry // map from task ID to task entry - Done []taskEntry - Failed []taskEntry - CurPass int - JobTasks []taskEntry + Todo []taskEntry + Pending map[int]taskEntry // map from task ID to task entry + Done []taskEntry + Failed []taskEntry + CurPass int } // Service is the master server service. @@ -297,8 +296,7 @@ func (s *Service) SetDataset(globPaths []string, _ *int) error { return err } - s.state.JobTasks = partition(chunks, s.chunksPerTask) - s.state.Todo = s.state.JobTasks + s.state.Todo = partition(chunks, s.chunksPerTask) err = s.snapshot() if err != nil { @@ -361,6 +359,7 @@ func (s *Service) logFields() log.Fields { "pendingLen": len(s.state.Pending), "doneLen": len(s.state.Done), "failedLen": len(s.state.Failed), + "curPass": s.state.CurPass, } } @@ -431,7 +430,7 @@ func (s *Service) TaskFinished(taskID int, dummy *int) error { if len(s.state.Todo) == 0 && len(s.state.Pending) == 0 { // increase master side pass count if all tasks finished s.state.CurPass++ - s.state.Todo = s.state.JobTasks + s.state.Todo = append(s.state.Done, s.state.Failed...) s.state.Done = []taskEntry{} // TODO(typhoonzero): deal with failed tasks s.state.Failed = []taskEntry{} From d777ac52bceaa4ae5c72a7f5228464d8ac7be8ca Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 7 Aug 2017 12:59:30 -0700 Subject: [PATCH 38/44] Simplify MKLML download command --- cmake/external/mklml.cmake | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index 82714d6c14..e9fd3d4bed 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -43,19 +43,18 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) -SET(mklml_cmakefile ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt) -FILE(WRITE ${mklml_cmakefile} "PROJECT(MKLML)\n" - "cmake_minimum_required(VERSION 3.0)\n" - "install(DIRECTORY ${MKLML_VER}\n" - " DESTINATION ${MKLML_DST_DIR})\n") +FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(MKLML)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${MKLML_VER}\n" + " DESTINATION ${MKLML_DST_DIR})\n") ExternalProject_Add( ${MKLML_PROJECT} ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${MKLML_SOURCE_DIR} DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --quiet --no-check-certificate -O ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz ${MKLML_URL} - && tar -xzf ${MKLML_DOWNLOAD_DIR}/${MKLML_VER}.tgz + DOWNLOAD_COMMAND wget --no-check-certificate -qO- ${MKLML_URL} | tar xz -C ${MKLML_DOWNLOAD_DIR} DOWNLOAD_NO_PROGRESS 1 UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} From 83cadd887fb131598a3edc6a38fbaa7c60fb7d63 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 7 Aug 2017 18:12:25 -0700 Subject: [PATCH 39/44] Make wget quite --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 156ad3552b..a2782c7efc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,8 +38,8 @@ RUN apt-get update && \ RUN pip --no-cache-dir install 'numpy>=1.12.0' # Install Go and glide -RUN wget -O go.tgz https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz && \ - tar -C /usr/local -xzf go.tgz && \ +RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \ + tar -xz -C /usr/local && \ mkdir /root/gopath && \ mkdir /root/gopath/bin && \ mkdir /root/gopath/src && \ From 72284b39d8049aef1789d3abeda2ac09beb6cb0b Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 7 Aug 2017 18:16:45 -0700 Subject: [PATCH 40/44] Update Dockerfile --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a2782c7efc..0f0d14503c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,8 +42,7 @@ RUN wget -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | tar -xz -C /usr/local && \ mkdir /root/gopath && \ mkdir /root/gopath/bin && \ - mkdir /root/gopath/src && \ - rm go.tgz + mkdir /root/gopath/src ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin From aeb84fa1bf4a431f7dd65ab6e6326ed35c179226 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 7 Aug 2017 18:57:43 -0700 Subject: [PATCH 41/44] Remove curl progress info in Dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 156ad3552b..1dd8ef1c58 100644 --- a/Dockerfile +++ b/Dockerfile @@ -48,7 +48,7 @@ ENV GOROOT=/usr/local/go GOPATH=/root/gopath # should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin # install glide -RUN curl -q https://glide.sh/get | sh +RUN curl -s -q https://glide.sh/get | sh # git credential to skip password typing RUN git config --global credential.helper store From 39910f0719986c545d4255eff1d17837edfeb3a7 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 8 Aug 2017 10:29:24 +0800 Subject: [PATCH 42/44] update releasing_process.md for pypi upload --- doc/design/releasing_process.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/design/releasing_process.md b/doc/design/releasing_process.md index 3692a5248a..0c10e78280 100644 --- a/doc/design/releasing_process.md +++ b/doc/design/releasing_process.md @@ -11,6 +11,15 @@ Paddle每次发新的版本,遵循以下流程: * 编译这个版本的Ubuntu Deb包。如果失败,修复Ubuntu Deb包编译问题,Patch号加一,返回第二步。 * 使用Regression Test List作为检查列表,测试Docker镜像/ubuntu安装包的功能正确性 * 如果失败,记录下所有失败的例子,在这个`release/版本号`分支中,修复所有bug后,Patch号加一,返回第二步 + * 编译这个版本的python wheel包,并发布到pypi。 + * 由于pypi.python.org目前遵循[严格的命名规范PEP 513](https://www.python.org/dev/peps/pep-0513),在使用twine上传之前,需要重命名wheel包中platform相关的后缀,比如将`linux_x86_64`修改成`manylinux1_x86_64`。 + * pypi上的package名称为paddlepaddle和paddlepaddle_gpu,如果要上传GPU版本的包,需要修改build/python/setup.py中,name: "paddlepaddle_gpu"并重新打包wheel包:`python setup.py bdist_wheel`。 + * 上传方法: + ``` + cd build/python + pip install twine + twine upload dist/[package to upload] + ``` 4. 第三步完成后,将`release/版本号`分支合入master分支,并删除`release/版本号`分支。将master分支的合入commit打上tag,tag为`版本号`。同时再将`master`分支合入`develop`分支。最后删除`release/版本号`分支。 5. 编译master分支的Docker发行镜像,发布到dockerhub。编译ubuntu的deb包,发布到github release页面 6. 协同完成Release Note的书写 From 2af35002f754212cd23b97a8328ad39f55a339f8 Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Tue, 8 Aug 2017 10:55:13 +0800 Subject: [PATCH 43/44] fix some enforce (#3301) * fix some enforce * remove compatible_type to avoid compile error * remove shared_ptr * fix tensor error msg --- paddle/framework/operator.cc | 10 ++++----- paddle/framework/operator.h | 22 ++++++++++---------- paddle/framework/tensor.h | 4 ++-- paddle/framework/tensor_impl.h | 27 +++++++++++++------------ paddle/framework/tensor_test.cc | 6 ++++-- paddle/operators/add_op.cc | 3 +-- paddle/operators/cross_entropy_op.cc | 25 ++++++++++++----------- paddle/operators/fill_zeros_like_op.cc | 16 +++++++-------- paddle/operators/mean_op.cc | 8 ++++---- paddle/operators/net_op.h | 6 +++--- paddle/operators/sgd_op.cc | 10 ++++----- paddle/operators/softmax_op.cc | 26 ++++++++++++------------ paddle/platform/enforce.h | 17 ++++------------ paddle/platform/enforce_test.cc | 28 +++++++++++++++++++++++++- 14 files changed, 114 insertions(+), 94 deletions(-) diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index beb6793289..d9a013b883 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -34,8 +34,8 @@ ExecutionContext::GetEigenDevice() const { #endif const std::string& OperatorBase::Input(const std::string& name) const { - PADDLE_ENFORCE(in_out_idxs_ != nullptr, - "Input Output Indices could not be nullptr"); + PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, + "Input Output Indices could not be nullptr"); auto it = in_out_idxs_->find(name); PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", name); @@ -49,7 +49,7 @@ const std::string& OperatorBase::Input(const std::string& name) const { } std::vector OperatorBase::Inputs(const std::string& name) const { - PADDLE_ENFORCE(in_out_idxs_ != nullptr, "IO Idx could not be nullptr"); + PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "IO Idx could not be nullptr"); auto input_format = GetAttr>("input_format"); auto offset = in_out_idxs_->at(name); PADDLE_ENFORCE(input_format.at(static_cast(offset) + 1) <= @@ -62,7 +62,7 @@ std::vector OperatorBase::Inputs(const std::string& name) const { } const std::string& OperatorBase::Output(const std::string& name) const { - PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); + PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr"); auto it = in_out_idxs_->find(name); PADDLE_ENFORCE(it != in_out_idxs_->end(), "no key [%s] in in_out_idxs_", name); @@ -76,7 +76,7 @@ const std::string& OperatorBase::Output(const std::string& name) const { } std::vector OperatorBase::Outputs(const std::string& name) const { - PADDLE_ENFORCE(in_out_idxs_ != nullptr, "InOut Indice could not be nullptr"); + PADDLE_ENFORCE_NOT_NULL(in_out_idxs_, "InOut Indice could not be nullptr"); auto output_format = GetAttr>("output_format"); auto offset = in_out_idxs_->at(name); PADDLE_ENFORCE(output_format.at(static_cast(offset) + 1) <= diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 9672492d1c..03fabff79b 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -167,15 +167,15 @@ class OperatorContext { template const T* Input(const size_t index) const { auto var = InputVar(index); - PADDLE_ENFORCE(var != nullptr, "Input(%d) should not be nullptr", index); + PADDLE_ENFORCE_NOT_NULL(var, "Input(%d) should not be nullptr", index); return &var->Get(); } template T* Output(const size_t index) const { auto var = OutputVar(index); - PADDLE_ENFORCE( - var != nullptr, + PADDLE_ENFORCE_NOT_NULL( + var, "Output(%d) not be nullptr, which means variable [%s] does not " "exist in scope", index, op_.outputs_[index]); @@ -185,14 +185,14 @@ class OperatorContext { template const T* Input(const std::string& name) const { auto var = InputVar(name); - PADDLE_ENFORCE(var != nullptr, "Input(%s) should not be nullptr", name); + PADDLE_ENFORCE_NOT_NULL(var, "Input(%s) should not be nullptr", name); return &var->Get(); } template T* Output(const std::string& name) const { auto var = OutputVar(name); - PADDLE_ENFORCE(var != nullptr, "Output(%s) should not be nullptr", name); + PADDLE_ENFORCE_NOT_NULL(var, "Output(%s) should not be nullptr", name); return var->GetMutable(); } @@ -204,9 +204,9 @@ class OperatorContext { std::transform(names.begin(), names.end(), std::back_inserter(res), [&](const std::string& sub_name) { auto var = scope_.FindVar(sub_name); - PADDLE_ENFORCE(var != nullptr, - "MultiInput(%s:%s) should not be nullptr", - name, sub_name); + PADDLE_ENFORCE_NOT_NULL( + var, "MultiInput(%s:%s) should not be nullptr", name, + sub_name); return &var->Get(); }); return res; @@ -220,9 +220,9 @@ class OperatorContext { std::transform(names.begin(), names.end(), std::back_inserter(res), [&](const std::string& sub_name) { auto var = scope_.FindVar(sub_name); - PADDLE_ENFORCE(var != nullptr, - "MultiOutput(%s:%s) should not be nullptr", - name, sub_name); + PADDLE_ENFORCE_NOT_NULL( + var, "MultiOutput(%s:%s) should not be nullptr", name, + sub_name); return var->GetMutable(); }); return res; diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 4c3b14b83d..c44df05e4b 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -127,8 +127,8 @@ class Tensor { memory::PODDeleter(place)), place_(place), size_(size) { - PADDLE_ENFORCE(ptr_ != nullptr, "Insufficient %s memory to allocation.", - is_cpu_place(place_) ? "CPU" : "GPU"); + PADDLE_ENFORCE_NOT_NULL(ptr_, "Insufficient %s memory to allocation.", + (is_cpu_place(place_) ? "CPU" : "GPU")); } virtual size_t size() const { return size_; } diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 92621f8c18..8d9bec6dc9 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -14,17 +14,18 @@ limitations under the License. */ #pragma once #include "paddle/memory/memcpy.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { template inline void Tensor::check_memory_size() const { - PADDLE_ENFORCE(holder_ != nullptr, - "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE(holder_->size() >= product(dims_) * sizeof(T) + offset_, - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory."); + PADDLE_ENFORCE_NOT_NULL( + holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); + PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_, + "Tensor's dims_ is out of bound. Call Tensor::mutable_data " + "first to re-allocate memory."); } template @@ -51,9 +52,9 @@ inline T* Tensor::mutable_data(DDim dims, platform::Place place) { template inline T* Tensor::mutable_data(platform::Place place) { static_assert(std::is_pod::value, "T must be POD"); - PADDLE_ENFORCE(product(dims_) > 0, - "Tensor's numel must be larger than zero to call " - "Tensor::mutable_data. Call Tensor::set_dim first."); + PADDLE_ENFORCE_GT(product(dims_), 0, + "Tensor's numel must be larger than zero to call " + "Tensor::mutable_data. Call Tensor::set_dim first."); /* some versions of boost::variant don't have operator!= */ size_t size = product(dims_) * sizeof(T); if (holder_ == nullptr || !(holder_->place() == place) || @@ -120,11 +121,11 @@ inline void Tensor::CopyFrom(const Tensor& src, template inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { check_memory_size(); - PADDLE_ENFORCE(begin_idx >= 0, "Slice begin index is less than zero."); - PADDLE_ENFORCE(end_idx <= dims_[0], "Slice end index is out of bound."); - PADDLE_ENFORCE(begin_idx < end_idx, - "Begin index must be less than end index."); - PADDLE_ENFORCE(dims_[0] != 1, "Can not slice a tensor with dims_[0] = 1."); + PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero."); + PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound."); + PADDLE_ENFORCE_LT(begin_idx, end_idx, + "Begin index must be less than end index."); + PADDLE_ENFORCE_NE(dims_[0], 1, "Can not slice a tensor with dims_[0] = 1."); int base = product(dims_) / dims_[0]; Tensor dst; dst.holder_ = holder_; diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index ef1cc10b84..20276181b9 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -36,7 +36,8 @@ TEST(Tensor, DataAssert) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "Tenosr holds no memory. Call Tensor::mutable_data first."; + "holder_ should not be null\nTenosr holds no memory. Call " + "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { ASSERT_EQ(what[i], msg[i]); @@ -111,7 +112,8 @@ TEST(Tensor, ShareDataWith) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "Tenosr holds no memory. Call Tensor::mutable_data first."; + "holder_ should not be null\nTenosr holds no memory. Call " + "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { ASSERT_EQ(what[i], msg[i]); diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index d4c05ed483..fb85093bb2 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -22,8 +22,7 @@ class AddOp : public OperatorWithKernel { void InferShape(const InferShapeContext &ctx) const override { PADDLE_ENFORCE_EQ(ctx.InputSize(), 2); PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1); - PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr, - "Inputs of AddOp must all be set"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "Inputs of AddOp must all be set"); PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, "Outputs of AddOp must all be set"); PADDLE_ENFORCE(ctx.Input(0)->dims() == ctx.Input(1)->dims(), diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index b0e1b8e41a..942b919079 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -20,18 +20,19 @@ namespace operators { class OnehotCrossEntropyOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2, - "Input size of OnehotCrossEntropyOp must be two"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, - "Output size of OnehotCrossEntropyOp must be one"); - PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.InputVar(1) != nullptr, - "Inputs of OnehotCrossEntropyOp must all be set"); - PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, - "Outputs of OnehotCrossEntropyOp must all be set"); - PADDLE_ENFORCE(ctx.Input(0)->dims().size() == 2, - "X's dimension must be 2."); - PADDLE_ENFORCE(ctx.Output(0)->dims().size() == 1, - "label's dimension must be 1."); + PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, + "Input size of OnehotCrossEntropyOp must be two"); + PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, + "Output size of OnehotCrossEntropyOp must be one"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), + "0-th input of OnehotCrossEntropyOp should be set"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), + "1-th input of OnehotCrossEntropyOp should be set"); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), + "Outputs of OnehotCrossEntropyOp must all be set"); + PADDLE_ENFORCE_EQ(ctx.Input(0)->dims().size(), 2); + PADDLE_ENFORCE_EQ(ctx.Output(0)->dims().size(), 1, + "label's dimension must be 1."); ctx.Output(0)->Resize({ctx.Input(0)->dims()[0]}); } }; diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 198b4576c8..6dcc9372b2 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -20,14 +20,14 @@ namespace operators { class FillZerosLikeOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1UL, - "Input size of FillZerosLikeOp must be one."); - PADDLE_ENFORCE(ctx.OutputSize() == 1UL, - "Output size of AddOp must be one."); - PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, - "Input of FillZerosLikeOp must be set."); - PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, - "Output of FillZerosLikeOp must be set."); + PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, + "Input size of FillZerosLikeOp must be one."); + PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, + "Output size of AddOp must be one."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), + "Input of FillZerosLikeOp must be set."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), + "Output of FillZerosLikeOp must be set."); ctx.Output(0)->Resize( ctx.Input(0)->dims()); } diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 8a4981c7be..8ab4e82ac4 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -20,10 +20,10 @@ namespace operators { class MeanOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1, "Input size of AddOp must be one"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of AddOp must be one"); - PADDLE_ENFORCE(ctx.InputVar(0) != nullptr && ctx.OutputVar(0) != nullptr, - "Input/Output of MeanOp must be initialized."); + PADDLE_ENFORCE_EQ(ctx.InputSize(), 1, "Input size of AddOp must be one"); + PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of AddOp must be one"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "input should be set"); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "output should be set"); ctx.Output(0)->Resize(framework::make_ddim({1})); } }; diff --git a/paddle/operators/net_op.h b/paddle/operators/net_op.h index 6e7af7f02a..bb2d02b56f 100644 --- a/paddle/operators/net_op.h +++ b/paddle/operators/net_op.h @@ -70,15 +70,15 @@ class NetOp : public framework::OperatorBase { */ void AddOp(const std::shared_ptr& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot AddOp when this network is sealed"); - PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); + PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); ops_.push_back(op); } void InsertOp(size_t pos, const std::shared_ptr& op) { PADDLE_ENFORCE(!add_op_done_, "Cannot InsertOp when this network is sealed"); - PADDLE_ENFORCE(op != nullptr, "Cannot Insert Null op"); - PADDLE_ENFORCE(pos <= ops_.size(), "Out of range"); + PADDLE_ENFORCE_NOT_NULL(op, "Cannot Insert Null op"); + PADDLE_ENFORCE_LE(pos, ops_.size(), "Out of range"); ops_.insert(ops_.begin() + pos, op); } diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 6307583f4e..e0532f2f09 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -20,11 +20,11 @@ namespace operators { class SGDOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 2, "Input size of SGDOp must be two"); - PADDLE_ENFORCE(ctx.OutputSize() == 1, "Output size of SGDOp must be one"); - PADDLE_ENFORCE(ctx.InputVar(0) != nullptr, "inputs[0] mast be set"); - PADDLE_ENFORCE(ctx.InputVar(1) != nullptr, "inputs[1] mast be set"); - PADDLE_ENFORCE(ctx.OutputVar(0) != nullptr, "outputs[0] mast be set"); + PADDLE_ENFORCE_EQ(ctx.InputSize(), 2, "Input size of SGDOp must be two"); + PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1, "Output size of SGDOp must be one"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(0), "inputs[0] mast be set"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(1), "inputs[1] mast be set"); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar(0), "outputs[0] mast be set"); PADDLE_ENFORCE(ctx.Input(0)->dims() == ctx.Input(1)->dims(), "Two input of SGD Op's dimension must be same."); ctx.Output(0)->Resize(ctx.Input(0)->dims()); diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index a070458f5e..c08e1b153c 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -20,12 +20,12 @@ namespace operators { class SoftmaxOp : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 1UL, - "Only one input is need for softmax"); - PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, - "The input of softmax op must be matrix"); - PADDLE_ENFORCE(ctx.OutputSize() == 1UL, - "Only one output is need for softmax"); + PADDLE_ENFORCE_EQ(ctx.InputSize(), 1UL, + "Only one input is need for softmax"); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims().size(), 2UL, + "The input of softmax op must be matrix"); + PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, + "Only one output is need for softmax"); ctx.Output("Y")->Resize(ctx.Input("X")->dims()); } }; @@ -43,13 +43,13 @@ class SoftmaxOpMaker : public OpProtoAndCheckerMaker { class SoftmaxOpGrad : public OperatorWithKernel { protected: void InferShape(const InferShapeContext &ctx) const override { - PADDLE_ENFORCE(ctx.InputSize() == 3UL, - "Input of SoftmaxOpGrad should be 3, X, Y, YG"); - PADDLE_ENFORCE(ctx.OutputSize() == 1UL, - "Output of SoftmaxOpGrad should be 1"); - PADDLE_ENFORCE(ctx.InputVar("Y") != nullptr, "Input(Y) should not be null"); - PADDLE_ENFORCE(ctx.InputVar(framework::GradVarName("Y")) != nullptr, - "Input(Y@GRAD) should not be null"); + PADDLE_ENFORCE_EQ(ctx.InputSize(), 3UL, + "Input of SoftmaxOpGrad should be 3, X, Y, YG"); + PADDLE_ENFORCE_EQ(ctx.OutputSize(), 1UL, + "Output of SoftmaxOpGrad should be 1"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Y")), + "Input(Y@GRAD) should not be null"); PADDLE_ENFORCE(ctx.Input("Y")->dims() == ctx.Input(framework::GradVarName("Y"))->dims(), "the shape of Input(0) and Input(1) should be the same"); diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index bc0715656a..d2adb997de 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -187,25 +187,16 @@ inline void throw_on_error(T e) { __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) - -// if two values have different data types, choose a compatible type for them. -template -struct CompatibleType { - static const bool t1_to_t2 = std::is_convertible::value; - typedef typename std::conditional::type type; -}; +#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ + PADDLE_ENFORCE(nullptr != (__VAL), #__VAL " should not be null\n%s", \ + paddle::string::Sprintf("" __VA_ARGS__)); #define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ - PADDLE_ENFORCE(__COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL0) \ - __CMP __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL1), \ + PADDLE_ENFORCE(__VAL0 __CMP __VAL1, \ "enforce %s " #__CMP " %s failed, %s " #__INV_CMP " %s\n%s", \ #__VAL0, #__VAL1, std::to_string(__VAL0), \ std::to_string(__VAL1), \ paddle::string::Sprintf("" __VA_ARGS__)); -#define __COMPATIBLE_TYPE(__VAL0, __VAL1, __VAL) \ - typename paddle::platform::CompatibleType::type(__VAL) - } // namespace platform } // namespace paddle diff --git a/paddle/platform/enforce_test.cc b/paddle/platform/enforce_test.cc index 7117b49474..4dfb697546 100644 --- a/paddle/platform/enforce_test.cc +++ b/paddle/platform/enforce_test.cc @@ -9,8 +9,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/platform/enforce.h" +#include + #include "gtest/gtest.h" +#include "paddle/platform/enforce.h" TEST(ENFORCE, OK) { PADDLE_ENFORCE(true, "Enforce is ok %d now %f", 123, 0.345); @@ -196,3 +198,27 @@ TEST(ENFORCE_LT, FAIL) { ASSERT_TRUE(in_catch); } + +TEST(ENFORCE_NOT_NULL, OK) { + int* a = new int; + PADDLE_ENFORCE_NOT_NULL(a); + delete a; +} +TEST(ENFORCE_NOT_NULL, FAIL) { + bool in_catch = false; + int* a{nullptr}; + + try { + PADDLE_ENFORCE_NOT_NULL(a); + + } catch (paddle::platform::EnforceNotMet error) { + in_catch = true; + const std::string msg = "a should not be null"; + const char* what = error.what(); + for (size_t i = 0; i < msg.length(); ++i) { + ASSERT_EQ(what[i], msg[i]); + } + } + + ASSERT_TRUE(in_catch); +} From 8c2a0a763ffee0288bf833e42a455bb4f8ceab51 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Tue, 8 Aug 2017 00:18:42 -0500 Subject: [PATCH 44/44] Fix C-API Compilation Error (#3311) Fix C-API Compilation Error --- paddle/framework/CMakeLists.txt | 3 +++ paddle/scripts/docker/build.sh | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index d8012fba27..f6ad5b2e42 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -35,6 +35,8 @@ add_dependencies(framework_py_proto framework_py_proto_init) cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) + +if(WITH_PYTHON) cc_library(paddle_pybind SHARED SRCS pybind.cc DEPS pybind python backward @@ -45,3 +47,4 @@ cc_library(paddle_pybind SHARED cross_entropy_op fill_zeros_like_op recurrent_op) +endif(WITH_PYTHON) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index ede9e21024..44442be472 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -33,6 +33,9 @@ Configuring cmake in /paddle/build ... -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_SWIG_PY=ON + -DWITH_C_API=${WITH_C_API:-OFF} + -DWITH_PYTHON=${WITH_PYTHON:-ON} + -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} -DCUDNN_ROOT=/usr/ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} -DWITH_TESTING=${WITH_TESTING:-OFF} @@ -49,7 +52,9 @@ cmake .. \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ - -DWITH_SWIG_PY=ON \ + -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ + -DWITH_C_API=${WITH_C_API:-OFF} \ + -DWITH_PYTHON=${WITH_PYTHON:-ON} \ -DCUDNN_ROOT=/usr/ \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ -DWITH_TESTING=${WITH_TESTING:-OFF} \