You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
484 lines
15 KiB
484 lines
15 KiB
9 years ago
|
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
|
||
|
|
||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
you may not use this file except in compliance with the License.
|
||
|
You may obtain a copy of the License at
|
||
|
|
||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
||
|
Unless required by applicable law or agreed to in writing, software
|
||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
See the License for the specific language governing permissions and
|
||
|
limitations under the License. */
|
||
|
|
||
|
import "ParameterConfig.proto";
|
||
|
|
||
|
package paddle;
|
||
|
|
||
|
/**
|
||
|
* Various structs for the configuration of a neural network
|
||
|
*/
|
||
|
sinclude(`ModelConfigExt.proto.m4')
|
||
|
|
||
|
message ExternalConfig {
|
||
|
repeated string layer_names = 1;
|
||
|
repeated string input_layer_names = 2;
|
||
|
repeated string output_layer_names = 3;
|
||
|
}
|
||
|
|
||
|
message ActivationConfig {
|
||
|
// identity: f(x) = x
|
||
|
// sigmoid: f(x) = 1 / (1 + exp(-x))
|
||
|
// logistic: f(x) = (1 - exp(-x)) / (1+ exp(-x))
|
||
|
// softmax: y_i = f(x_i) = exp(x_i) / (\sum_i exp(x_i))
|
||
|
// relu: y = max(0, x)
|
||
|
required string type = 1;
|
||
|
};
|
||
|
|
||
|
message ConvConfig {
|
||
|
// filter_size = 5, says that this layer will use
|
||
|
// filters of size 5x5 pixels.
|
||
|
required uint32 filter_size = 1;
|
||
|
|
||
|
// The image data dimensionality.
|
||
|
// This value must be either 1, 2, 3, or a multiple of 4.
|
||
|
required uint32 channels = 2;
|
||
|
|
||
|
// stride = 1, indicates that the distance between
|
||
|
// successive filter applications should be 1 pixel.
|
||
|
required uint32 stride = 3;
|
||
|
|
||
|
// padding = 4, instructs the net to implicitly
|
||
|
// pad the images with a 4-pixel border of zeros.
|
||
|
required uint32 padding = 4;
|
||
|
|
||
|
// If groups = 4 together with the filters = 32 parameter,
|
||
|
// they state that this convolutional layer is to have 4
|
||
|
// groups of 32 filters. Each filter will connect to 8
|
||
|
// input channels.
|
||
|
required uint32 groups = 5;
|
||
|
required uint32 filter_channels = 6;
|
||
|
|
||
|
// The size of output feature map.
|
||
|
required uint32 output_x = 7;
|
||
|
|
||
|
// The size of input feature map.
|
||
|
required uint32 img_size = 8;
|
||
|
|
||
|
// caffe mode for output size coherence
|
||
|
required bool caffe_mode = 9 [default = true];
|
||
|
|
||
|
// if filter_size_y is set , this convolutional layer will use
|
||
|
// filters of size filter_size * filter_size_y pixels.
|
||
|
// if filter_size_y is not set, this convolutional layer will use
|
||
|
// filters of size filter_size * filter_size
|
||
|
required uint32 filter_size_y = 10;
|
||
|
required uint32 padding_y = 11;
|
||
|
required uint32 stride_y = 12;
|
||
|
}
|
||
|
|
||
|
message PoolConfig {
|
||
|
// max or avg pooling
|
||
|
required string pool_type = 1;
|
||
|
required uint32 channels = 2;
|
||
|
|
||
|
// Defines the size of the pooling region in
|
||
|
// the x (equivalently, y) dimension.
|
||
|
required uint32 size_x = 3;
|
||
|
|
||
|
// Tell the net where in the input image to start the pooling.
|
||
|
required uint32 start = 4;
|
||
|
|
||
|
// Defines the stride size between successive pooling squares.
|
||
|
required uint32 stride = 5;
|
||
|
|
||
|
// The size of output feature map.
|
||
|
required uint32 output_x = 6;
|
||
|
|
||
|
// The size of input feature map.
|
||
|
required uint32 img_size = 7;
|
||
|
|
||
|
// padding = 4, instructs the net to implicitly
|
||
|
// pad the images with a 4-pixel border of zeros.
|
||
|
optional uint32 padding = 8 [default = 0];
|
||
|
|
||
|
// if not set, use size_x
|
||
|
optional uint32 size_y = 9 [default = 0];
|
||
|
|
||
|
// if not set, use stride
|
||
|
optional uint32 stride_y = 10 [default = 0];
|
||
|
|
||
|
// if not set, use output_x
|
||
|
optional uint32 output_y = 11 [default = 0];
|
||
|
|
||
|
// if not set, use img_size
|
||
|
optional uint32 img_size_y = 12 [default = 0];
|
||
|
|
||
|
// if not set, use padding
|
||
|
optional uint32 padding_y = 13 [default = 0];
|
||
|
}
|
||
|
|
||
|
message NormConfig {
|
||
|
// rnorm or cmrnorm
|
||
|
required string norm_type = 1;
|
||
|
required uint32 channels = 2;
|
||
|
|
||
|
// rnorm: this defines the size of the local regions
|
||
|
// used for response normalization.
|
||
|
// cmrnorm: The size parameter indicates how many
|
||
|
// nearby maps to use for normalization.
|
||
|
required uint32 size = 3;
|
||
|
|
||
|
// the parameters for normalization
|
||
|
// u = u / (1+scale*sum(u^2 in window))^pow
|
||
|
required real scale = 4;
|
||
|
required real pow = 5;
|
||
|
|
||
|
// The size of output feature map.
|
||
|
required uint32 output_x = 6;
|
||
|
|
||
|
// The size of input feature map.
|
||
|
required uint32 img_size = 7;
|
||
|
|
||
|
// normalize with fixed window or sliding window
|
||
|
// u = u / (1+scale*sum(u^2 in window))^pow
|
||
|
// fixed window: shared a fixed window for each value
|
||
|
// sliding window: have a different window for each value
|
||
|
optional bool blocked = 8;
|
||
|
}
|
||
|
|
||
|
message BlockExpandConfig {
|
||
|
required uint32 channels = 1;
|
||
|
|
||
|
required uint32 stride_x = 2;
|
||
|
required uint32 stride_y = 3;
|
||
|
|
||
|
required uint32 padding_x = 4;
|
||
|
required uint32 padding_y = 5;
|
||
|
|
||
|
required uint32 block_x = 6;
|
||
|
required uint32 block_y = 7;
|
||
|
|
||
|
// The size of output feature map.
|
||
|
required uint32 output_x = 8;
|
||
|
required uint32 output_y = 9;
|
||
|
|
||
|
// The size of input feature map.
|
||
|
required uint32 img_size_x = 10;
|
||
|
required uint32 img_size_y = 11;
|
||
|
}
|
||
|
|
||
|
message ProjectionConfig {
|
||
|
required string type = 1;
|
||
|
required string name = 2;
|
||
|
required uint64 input_size = 3;
|
||
|
required uint64 output_size = 4;
|
||
|
|
||
|
// For ShiftProjection
|
||
|
optional int32 context_start = 5;
|
||
|
optional int32 context_length = 6;
|
||
|
optional bool trainable_padding = 7 [default = false];
|
||
|
|
||
|
// For convolution
|
||
|
optional ConvConfig conv_conf = 8;
|
||
|
optional int32 num_filters = 9;
|
||
|
|
||
|
// For IdentityOffsetProjection
|
||
|
optional uint64 offset = 11 [default = 0];
|
||
|
}
|
||
|
|
||
|
message OperatorConfig {
|
||
|
required string type = 1;
|
||
|
repeated int32 input_indices = 2;
|
||
|
repeated uint64 input_sizes = 3;
|
||
|
required uint64 output_size = 4;
|
||
|
|
||
|
// For DotMulOperator
|
||
|
optional real dotmul_scale = 5 [default = 1.0];
|
||
|
|
||
|
// For ConvOperator
|
||
|
optional ConvConfig conv_conf = 6;
|
||
|
optional int32 num_filters = 7;
|
||
|
}
|
||
|
|
||
|
|
||
|
message ImageConfig {
|
||
|
// The image data dimensionality.
|
||
|
// This value must be either 1, 2, 3, or a multiple of 4.
|
||
|
required uint32 channels = 2;
|
||
|
|
||
|
// The size of input feature map.
|
||
|
required uint32 img_size = 8;
|
||
|
}
|
||
|
|
||
|
message LayerInputConfig {
|
||
|
required string input_layer_name = 1;
|
||
|
optional string input_parameter_name = 2;
|
||
|
optional ConvConfig conv_conf = 3;
|
||
|
optional PoolConfig pool_conf = 4;
|
||
|
optional NormConfig norm_conf = 5;
|
||
|
optional ProjectionConfig proj_conf = 6;
|
||
|
optional BlockExpandConfig block_expand_conf = 7;
|
||
|
optional ImageConfig image_conf = 8;
|
||
|
// If the input layer has multi-output.
|
||
|
// Set the argument name.
|
||
|
optional string input_layer_argument = 9;
|
||
|
}
|
||
|
|
||
|
message LayerConfig {
|
||
|
sinclude(`ModelConfigLayer.proto.m4')
|
||
|
required string name = 1;
|
||
|
required string type = 2;
|
||
|
optional uint64 size = 3;
|
||
|
//optional ActivationConfig activation = 4;
|
||
|
optional string active_type = 4;
|
||
|
repeated LayerInputConfig inputs = 5;
|
||
|
optional string bias_parameter_name = 6;
|
||
|
|
||
|
// This number must be a multiple of 16.
|
||
|
optional uint32 num_filters = 7;
|
||
|
|
||
|
// indicates that the biases of every filter in this layer
|
||
|
// should be shared amongst all applications of that filter
|
||
|
// (which is how convnets are usually trained). Setting this to
|
||
|
// false will untie the biases, yielding a separate bias for
|
||
|
// every location at which the filter is applied.
|
||
|
optional bool shared_biases = 8;
|
||
|
|
||
|
// Valid values are ones that divide the area of the output
|
||
|
// grid in this convolutional layer. For example if this layer
|
||
|
// produces 32-channel 20x20 output grid, valid values of
|
||
|
// partialSum are ones which divide 20*20 = 400.
|
||
|
// I'll update this comments when confirmed
|
||
|
optional uint32 partial_sum = 9;
|
||
|
|
||
|
// for dropout
|
||
|
optional real drop_rate = 10;
|
||
|
|
||
|
// for HierarchicalSoftmaxLayer and NCELayer
|
||
|
// the number of classes
|
||
|
optional uint32 num_classes = 11;
|
||
|
|
||
|
// the gpu device which the Layer's data in.
|
||
|
// Only used by ParallelNeuralNetork. Ignored otherwise.
|
||
|
optional int32 device = 12 [default = -1];
|
||
|
|
||
|
// for recurrent layer. If true, the recurrence runs from the end to the beginning.
|
||
|
optional bool reversed = 13 [default = false];
|
||
|
|
||
|
// for lstmemory layer. Different types of nodes have different activation type.
|
||
|
optional string active_gate_type = 14;
|
||
|
optional string active_state_type = 15;
|
||
|
|
||
|
// For NCELayer
|
||
|
// The number of random negative labels for each sample
|
||
|
optional int32 num_neg_samples = 16 [default = 10];
|
||
|
|
||
|
// For NCELayer
|
||
|
// The distribution for generating the random negative labels.
|
||
|
// A uniform distribution will be used if not provided
|
||
|
repeated real neg_sampling_dist = 17 [packed = true];
|
||
|
|
||
|
// For MaxLayer
|
||
|
// default: output VALUE of MaxLayer. set this flag to true for output INDEX
|
||
|
// INDEX will be put in Argument::value as real values.
|
||
|
optional bool output_max_index = 19 [default = false];
|
||
|
|
||
|
/// The filed number 20 have been deprecated.
|
||
|
|
||
|
// For self-normalized estimation
|
||
|
optional real softmax_selfnorm_alpha = 21 [default = 0.1];
|
||
|
|
||
|
/// The filed numbers 22 and 23 have been deprecated.
|
||
|
|
||
|
// for MDLstmLayer
|
||
|
repeated bool directions = 24;
|
||
|
|
||
|
// for CTCLayer
|
||
|
optional bool norm_by_times = 25;
|
||
|
|
||
|
// for CostLayers
|
||
|
optional real coeff = 26;
|
||
|
|
||
|
// for AverageLayer
|
||
|
// can be set to: 'average', 'sum' or 'squarerootn'
|
||
|
optional string average_strategy = 27;
|
||
|
|
||
|
// for error clipping
|
||
|
optional real error_clipping_threshold = 28 [default = 0.0];
|
||
|
|
||
|
// for operators used by mixed layer
|
||
|
repeated OperatorConfig operator_confs = 29;
|
||
|
|
||
|
// for lambdaCost
|
||
|
optional int32 NDCG_num = 30;
|
||
|
optional int32 max_sort_size = 31;
|
||
|
|
||
|
// for SlopeInterceptLayer
|
||
|
optional real slope = 32;
|
||
|
optional real intercept = 33;
|
||
|
|
||
|
// for CosSimVecMatLayer and CosSimLayer
|
||
|
optional real cos_scale = 34;
|
||
|
|
||
|
// for DataNormLayer
|
||
|
// can be set to: 'z-score', 'min-max' or 'decimal-scaling'
|
||
|
optional string data_norm_strategy = 36;
|
||
|
|
||
|
// for bos/eos id
|
||
|
optional uint32 bos_id = 37;
|
||
|
optional uint32 eos_id = 38;
|
||
|
|
||
|
// for max id layer
|
||
|
optional uint32 beam_size = 39;
|
||
|
|
||
|
// for seqlastins layer, whether select first instead last
|
||
|
optional bool select_first = 40 [default = false];
|
||
|
|
||
|
// for seqlastins layer, AverageLayer, MaxLayer and ExpandLayer
|
||
|
// can be set to: 'non-seq','seq'
|
||
|
optional string trans_type = 41 [default = 'non-seq'];
|
||
|
|
||
|
// to indicate whether selective_fc layer
|
||
|
// is used in sequence generation or not
|
||
|
optional bool selective_fc_pass_generation = 42 [default = false];
|
||
|
|
||
|
// to indicate whether selective_fc layer take its last input to
|
||
|
// selected several columns and only compute the multiplications
|
||
|
// between the input matrices and the selected columns of
|
||
|
// the parameter matrices of this layer.
|
||
|
// if set false, selective_fc degrades into fc.
|
||
|
optional bool has_selected_colums = 43 [default = true];
|
||
|
|
||
|
// this parameter is for speed consideration.
|
||
|
// if number of the selected columns is less than
|
||
|
// sample number * selective_fc output size * selective_fc_mull_mull_ratio
|
||
|
// sparse multiplication is used, otherwise, using full multiplication.
|
||
|
optional real selective_fc_full_mul_ratio = 44 [default = 0.02];
|
||
|
|
||
|
// to indicate how many threads selective_fc use to to accelate
|
||
|
// the plain_mul period
|
||
|
// leave empty or set to 0 to disable multi-thread accleleration
|
||
|
optional uint32 selective_fc_parallel_plain_mul_thread_num = 45 [default = 0];
|
||
|
|
||
|
// for batch normalization layer
|
||
|
// if set use_global_stats true, will use the loaded mean and variance.
|
||
|
optional bool use_global_stats = 46;
|
||
|
|
||
|
// use to compute moving mean and variance.
|
||
|
optional real moving_average_fraction = 47 [default = 0.9];
|
||
|
}
|
||
|
|
||
|
message EvaluatorConfig {
|
||
|
required string name = 1;
|
||
|
required string type = 2;
|
||
|
repeated string input_layers = 3;
|
||
|
|
||
|
// Used by ChunkEvaluator
|
||
|
optional string chunk_scheme = 4; // one of "IOB", "IOE", "IOBES"
|
||
|
optional int32 num_chunk_types = 5; // number of chunk types other than "other"
|
||
|
|
||
|
// Used by PrecisionRecallEvaluator and ClassificationErrorEvaluator
|
||
|
// For multi binary labels: true if output > classification_threshold
|
||
|
optional real classification_threshold = 6 [default = 0.5];
|
||
|
// The positive label. -1 means average precision and recall
|
||
|
optional int32 positive_label = 7 [default = -1];
|
||
|
|
||
|
// load dict from this file
|
||
|
optional string dict_file = 8;
|
||
|
|
||
|
// dump result in this file
|
||
|
optional string result_file = 9;
|
||
|
|
||
|
// top # results for max id printer
|
||
|
optional int32 num_results = 10 [default = 1];
|
||
|
|
||
|
// whether to delimit the sequence in the seq_text_printer
|
||
|
optional bool delimited = 11 [default = true];
|
||
|
}
|
||
|
|
||
|
message LinkConfig {
|
||
|
required string layer_name = 1;
|
||
|
required string link_name = 2;
|
||
|
// If true, this link has sub-sequence
|
||
|
optional bool has_subseq = 3 [default = false];
|
||
|
}
|
||
|
|
||
|
message MemoryConfig {
|
||
|
required string layer_name = 1;
|
||
|
required string link_name = 2;
|
||
|
|
||
|
optional string boot_layer_name = 3;
|
||
|
optional string boot_bias_parameter_name = 4;
|
||
|
optional string boot_bias_active_type = 5;
|
||
|
optional uint32 boot_with_const_id = 7;
|
||
|
|
||
|
// memory is a sequence, initailized by a sequence boot layer
|
||
|
optional bool is_sequence = 6 [default = false];
|
||
|
}
|
||
|
|
||
|
message GeneratorConfig {
|
||
|
required uint32 max_num_frames = 1;
|
||
|
required string eos_layer_name = 2;
|
||
|
optional int32 num_results_per_sample = 3 [default = 1];
|
||
|
|
||
|
// for beam search
|
||
|
optional int32 beam_size = 4 [default = 1];
|
||
|
|
||
|
optional bool log_prob = 5 [default = true];
|
||
|
}
|
||
|
|
||
|
message SubModelConfig {
|
||
|
required string name = 1;
|
||
|
repeated string layer_names = 2; // selected layers in sub model
|
||
|
repeated string input_layer_names = 3;
|
||
|
repeated string output_layer_names = 4;
|
||
|
repeated string evaluator_names = 5;
|
||
|
|
||
|
optional bool is_recurrent_layer_group = 6 [default = false];
|
||
|
|
||
|
// If true, the recurrence runs from the end to the beginning.
|
||
|
optional bool reversed = 7 [default = false];
|
||
|
|
||
|
// name and link name of memory
|
||
|
repeated MemoryConfig memories = 8;
|
||
|
|
||
|
// if use recurrent layer group, all layers in submodel will postfix by
|
||
|
// "_in_"+submodel.name, so we add a name pair to link between
|
||
|
// root model and layer group,
|
||
|
// note that these in/out layers are not input/output of the network.
|
||
|
repeated LinkConfig in_links = 9;
|
||
|
repeated LinkConfig out_links = 10;
|
||
|
|
||
|
optional GeneratorConfig generator = 11;
|
||
|
}
|
||
|
|
||
|
message ModelConfig {
|
||
|
// type of the model.
|
||
|
// Currently, "nn", "recurrent_nn" and "recursive_nn" are supported
|
||
|
required string type = 1 [default = "nn"];
|
||
|
|
||
|
// layers should be ordered in such a way that the forward propagation
|
||
|
// can be correctly executed by going from the first layer to the last layer
|
||
|
repeated LayerConfig layers = 2;
|
||
|
|
||
|
repeated ParameterConfig parameters = 3;
|
||
|
|
||
|
// Input layers should have the same order as the data streams provided
|
||
|
// by the data provider. The type of input layers should be "data"
|
||
|
repeated string input_layer_names = 4;
|
||
|
|
||
|
// For training, the type of a output layer is usually cost layer.
|
||
|
// For prediction, they should be the actual output layers.
|
||
|
repeated string output_layer_names = 5;
|
||
|
|
||
|
repeated EvaluatorConfig evaluators = 6;
|
||
|
|
||
|
repeated SubModelConfig sub_models = 8;
|
||
|
|
||
|
// For External Machine, defining how to split a neural network
|
||
|
// into multiple parts.
|
||
|
optional ExternalConfig external_config = 9;
|
||
|
};
|