|
|
|
@ -583,16 +583,44 @@ class DecayedAdagradOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AdadeltaOptimizer(Optimizer):
|
|
|
|
|
"""Simple Adadelta optimizer with average squared grad state and
|
|
|
|
|
"""
|
|
|
|
|
**Adadelta Optimizer**
|
|
|
|
|
Simple Adadelta optimizer with average squared grad state and
|
|
|
|
|
average squared update state.
|
|
|
|
|
The details of adadelta please refer to this
|
|
|
|
|
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
|
|
|
|
|
<http://www.matthewzeiler.com/pubs/googleTR2012/googleTR2012.pdf>`_.
|
|
|
|
|
|
|
|
|
|
.. math::
|
|
|
|
|
|
|
|
|
|
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2 \\\\
|
|
|
|
|
learning\\_rate &= sqrt( ( E(dx_{t-1}^2) + \\epsilon ) / ( \\
|
|
|
|
|
E(g_t^2) + \\epsilon ) ) \\\\
|
|
|
|
|
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
learning_rate(float): global leraning rate
|
|
|
|
|
rho(float): rho in equation
|
|
|
|
|
epsilon(float): epsilon in equation
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
optimizer = fluid.optimizer.Adadelta(
|
|
|
|
|
learning_rate=0.0003, epsilon=1.0e-6, rho=0.95)
|
|
|
|
|
_, params_grads = optimizer.minimize(cost)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
_avg_squared_grad_acc_str = "_avg_squared_grad"
|
|
|
|
|
_avg_squared_update_acc_str = "_avg_squared_update"
|
|
|
|
|
|
|
|
|
|
def __init__(self, learning_rate, epsilon=1.0e-6, rho=0.95, **kwargs):
|
|
|
|
|
assert learning_rate is not None
|
|
|
|
|
assert epsilon is not None
|
|
|
|
|
assert rho is not None
|
|
|
|
|
if learning_rate is None:
|
|
|
|
|
raise ValueError("learning_rate is not set.")
|
|
|
|
|
if epsilon is None:
|
|
|
|
|
raise ValueError("epsilon is not set.")
|
|
|
|
|
if rho is None:
|
|
|
|
|
raise ValueError("rho is not set.")
|
|
|
|
|
super(AdadeltaOptimizer, self).__init__(
|
|
|
|
|
learning_rate=learning_rate, **kwargs)
|
|
|
|
|
self.type = "adadelta"
|
|
|
|
@ -600,14 +628,16 @@ class AdadeltaOptimizer(Optimizer):
|
|
|
|
|
self._rho = rho
|
|
|
|
|
|
|
|
|
|
def _create_accumulators(self, block, parameters):
|
|
|
|
|
assert isinstance(block, framework.Block)
|
|
|
|
|
if not isinstance(block, framework.Block):
|
|
|
|
|
raise TypeError("block is not instance of framework.Block.")
|
|
|
|
|
|
|
|
|
|
for p in parameters:
|
|
|
|
|
self._add_accumulator(self._avg_squared_grad_acc_str, p)
|
|
|
|
|
self._add_accumulator(self._avg_squared_update_acc_str, p)
|
|
|
|
|
|
|
|
|
|
def _append_optimize_op(self, block, param_and_grad):
|
|
|
|
|
assert isinstance(block, framework.Block)
|
|
|
|
|
if not isinstance(block, framework.Block):
|
|
|
|
|
raise TypeError("block is not instance of framework.Block.")
|
|
|
|
|
|
|
|
|
|
avg_squared_grad_acc = self._get_accumulator(
|
|
|
|
|
self._avg_squared_grad_acc_str, param_and_grad[0])
|
|
|
|
|