You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
152 lines
4.0 KiB
152 lines
4.0 KiB
syntax = "proto2";
|
|
|
|
option optimize_for = LITE_RUNTIME;
|
|
|
|
package paddle;
|
|
|
|
message SGDConfig {
|
|
// SGD
|
|
// momentum: float >= 0. Parameter updates momentum.
|
|
// decay: float >= 0. Learning rate decay over each update.
|
|
// nesterov: boolean. Whether to apply Nesterov momentum.
|
|
optional double momentum = 21 [default = 0.0];
|
|
optional double decay = 23 [default = 0.0];
|
|
optional bool nesterov =24 [default = false];
|
|
|
|
}
|
|
|
|
|
|
message AdadeltaConfig {
|
|
// Adadelta
|
|
// It is recommended to leave it at the default value.
|
|
// rho: float >= 0.
|
|
// epsilon: float >= 0. Fuzz factor.
|
|
// decay: float >= 0. Learning rate decay over each update.
|
|
|
|
// reference : [Adadelta - an adaptive learning rate method](http://arxiv.org/abs/1212.5701)
|
|
optional double rho = 33 [default = 0.90];
|
|
optional double epsilon = 31 [default = 1e-5];
|
|
optional double decay = 32 [default = 0.0];
|
|
|
|
}
|
|
|
|
message AdagradConfig {
|
|
// Adagrad
|
|
// epsilon: float >= 0.
|
|
// decay: float >= 0. Learning rate decay over each update.
|
|
|
|
// reference : [Adaptive Subgradient Methods for Online Learning and Stochastic Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
|
|
optional double epsilon = 41 [default = 1e-5];
|
|
optional double decay = 42 [default = 0.0];
|
|
}
|
|
|
|
message AdamConfig {
|
|
// Adaj
|
|
// beta_1: float, 0 < beta < 1. Generally close to 1.
|
|
// beta_2: float, 0 < beta < 1. Generally close to 1.
|
|
// epsilon: float >= 0. Fuzz factor.
|
|
// decay: float >= 0. Learning rate decay over each update.
|
|
// reference : [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
|
|
optional double beta_1 = 41;
|
|
optional double beta_2 = 42;
|
|
optional double epsilon = 43;
|
|
optional double decay = 44;
|
|
}
|
|
|
|
message ConstLrConfig {
|
|
// learninRate Policy
|
|
optional double learning_rate = 1 [default = 1.0];
|
|
}
|
|
|
|
message LinearLrConfig {
|
|
// learninRate Policy
|
|
optional double learning_rate = 1 [default = 1.0];
|
|
optional double lr_decay_a = 2;
|
|
optional double lr_decay_b = 3;
|
|
}
|
|
|
|
message TensorProto {
|
|
enum DataType {
|
|
PADDLE_ELEMENT_TYPE_INT32 = 0;
|
|
PADDLE_ELEMENT_TYPE_UINT32 = 1;
|
|
PADDLE_ELEMENT_TYPE_INT64 = 2;
|
|
PADDLE_ELEMENT_TYPE_UINT64 = 3;
|
|
PADDLE_ELEMENT_TYPE_FLOAT32 = 4;
|
|
PADDLE_ELEMENT_TYPE_FLOAT64 = 5;
|
|
}
|
|
optional DataType data_type = 1;
|
|
repeated bytes content = 2;
|
|
}
|
|
|
|
message LrPolicyState {
|
|
// learninRate Policy
|
|
optional double learning_rate = 1 [default = 1.0];
|
|
optional double lr_decay_a = 2;
|
|
optional double lr_decay_b = 3;
|
|
}
|
|
|
|
message SGDOptimizerState {
|
|
optional LrPolicyState lr_state = 101;
|
|
optional double num_sample_passed = 104;
|
|
// state
|
|
optional TensorProto parameter = 1;
|
|
optional TensorProto momentums = 2;
|
|
}
|
|
|
|
message AdadeltaOptimizerState {
|
|
// learning rate policy
|
|
optional LrPolicyState lr_state = 101;
|
|
optional double num_sample_passed = 104;
|
|
// state
|
|
optional TensorProto parameter = 1;
|
|
optional TensorProto accum_gradient = 2;
|
|
optional TensorProto accum_delta = 3;
|
|
optional TensorProto update_delta = 4;
|
|
}
|
|
|
|
|
|
message AdagradOptimizerState {
|
|
optional LrPolicyState lr_state = 101;
|
|
optional double num_sample_passed = 104;
|
|
// state
|
|
optional TensorProto parameter = 1;
|
|
optional TensorProto accum_gradient = 2;
|
|
}
|
|
|
|
message AdamOptimizerState {
|
|
optional LrPolicyState lr_state = 101;
|
|
optional double num_sample_passed = 104;
|
|
// state
|
|
optional TensorProto parameter = 1;
|
|
optional TensorProto momentums = 2;
|
|
optional TensorProto velocitys = 3;
|
|
}
|
|
|
|
message OptimizerConfig {
|
|
enum Optimizer {
|
|
SGD = 1;
|
|
Adadelta = 2;
|
|
Adagrad = 3;
|
|
Adam = 4;
|
|
}
|
|
optional Optimizer optimizer = 1;
|
|
optional SGDConfig sgd = 3;
|
|
optional AdadeltaConfig adadelta = 4;
|
|
optional AdagradConfig adagrad = 5;
|
|
optional AdamConfig adam = 6;
|
|
|
|
enum LrPolicy {
|
|
Const = 0;
|
|
Linear = 1;
|
|
}
|
|
optional LrPolicy lr_policy = 11;
|
|
optional ConstLrConfig const_lr = 12;
|
|
optional LinearLrConfig linear_lr = 13;
|
|
|
|
// common config of optimizer
|
|
// gradient clip when L2 exceeding value
|
|
optional double clip_norm = 101;
|
|
// gradient clip when L1 exceeding value
|
|
optional double clip_value = 102;
|
|
}
|