From 4a5ce4feb1412c8dcdc5e719f7acd3b4a869f968 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 25 Sep 2019 20:06:53 +0800 Subject: [PATCH] Add AdadeltaOptimizer doc (#19875) * add AdadeltaOptimizer doc, test=develop * refine doc,test=develop * folllow lanxiang's comments, test=develop, test=document_fix --- paddle/fluid/API.spec | 2 +- python/paddle/fluid/optimizer.py | 48 +++++++++++++++++--------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 32025d93fa..77ad4f5415 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -968,7 +968,7 @@ paddle.fluid.optimizer.RMSPropOptimizer.backward (ArgSpec(args=['self', 'loss', paddle.fluid.optimizer.RMSPropOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.RMSPropOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde')) paddle.fluid.optimizer.RMSPropOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'b15cffad0903fc81af77a0580ceb2a9b')) -paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', 'b5e33fa8aca6cfbcaebfc6cd7742908a')) +paddle.fluid.optimizer.AdadeltaOptimizer ('paddle.fluid.optimizer.AdadeltaOptimizer', ('document', '3f1c5385519a3674c18c3a1ab34ac04f')) paddle.fluid.optimizer.AdadeltaOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'epsilon', 'rho', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, 0.95, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610')) paddle.fluid.optimizer.AdadeltaOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae')) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 5530672428..a3690de36e 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -43,9 +43,10 @@ __all__ = [ 'Ftrl', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'AdamaxOptimizer', 'DpsgdOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', 'FtrlOptimizer', 'Adadelta', - 'ModelAverage', 'LarsMomentum', 'LarsMomentumOptimizer', - 'DGCMomentumOptimizer', 'LambOptimizer', 'ExponentialMovingAverage', - 'PipelineOptimizer', 'LookaheadOptimizer', 'RecomputeOptimizer' + 'AdadeltaOptimizer', 'ModelAverage', 'LarsMomentum', + 'LarsMomentumOptimizer', 'DGCMomentumOptimizer', 'LambOptimizer', + 'ExponentialMovingAverage', 'PipelineOptimizer', 'LookaheadOptimizer', + 'RecomputeOptimizer' ] @@ -1778,39 +1779,42 @@ class DecayedAdagradOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer): """ - **Adadelta Optimizer** + **NOTES: This API does not support sparse parameter optimization.** - Simple Adadelta optimizer with average squared grad state and - average squared update state. - The details of adadelta please refer to this + Adadelta Optimizer. Please refer to this for details: `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD - `_. + `_. - .. math:: + .. math:: + + E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2\\ - E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\ - learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\ - E(g_t^2) + \\epsilon ) ) \\\\ - E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 + learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }\\ + + E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2 Args: - learning_rate(float): global learning rate - rho(float): rho in equation - epsilon(float): epsilon in equation - regularization: A Regularizer, such as - fluid.regularizer.L2DecayRegularizer. - name: A optional name prefix. + learning_rate(float|Variable): global learning rate. + epsilon(float): a small float number for numeric stability. Default 1.0e-6. + rho(float): a floating point value indicating the decay rate. + regularization(WeightDecayRegularizer, optional): A Regularizer, such as fluid.regularizer.L2DecayRegularizer. Default None, meaning that there is no regularization. + name(str, optional): A optional name prefix for debugging. Default None. Examples: .. code-block:: python import paddle.fluid as fluid + + image = fluid.layers.data(name='image', shape=[28], dtype='float32') + fc = fluid.layers.fc(image, size=10) + cost = fluid.layers.reduce_mean(fc) optimizer = fluid.optimizer.Adadelta( learning_rate=0.0003, epsilon=1.0e-6, rho=0.95) - _, params_grads = optimizer.minimize(cost) - Notes: - Currently, AdadeltaOptimizer doesn't support sparse parameter optimization. + # optimizer_ops is a list of optimizer operators to update parameters + # params_grads is a list of (param, param_grad), where param is each + # parameter and param_grad is the gradient variable of param. + optimizer_ops, params_grads = optimizer.minimize(cost) """ _avg_squared_grad_acc_str = "_avg_squared_grad"