|
|
@ -715,8 +715,8 @@ class Optimizer(object):
|
|
|
|
params_grads = append_gradient_clip_ops(params_grads)
|
|
|
|
params_grads = append_gradient_clip_ops(params_grads)
|
|
|
|
|
|
|
|
|
|
|
|
# Add regularization if any
|
|
|
|
# Add regularization if any
|
|
|
|
params_grads = append_regularization_ops(params_grads,
|
|
|
|
params_grads = append_regularization_ops(
|
|
|
|
self.regularization)
|
|
|
|
params_grads, self.regularization, self._param_device_map)
|
|
|
|
|
|
|
|
|
|
|
|
optimize_ops = self._create_optimization_pass(params_grads)
|
|
|
|
optimize_ops = self._create_optimization_pass(params_grads)
|
|
|
|
if table_optimize_op is not None:
|
|
|
|
if table_optimize_op is not None:
|
|
|
@ -1070,7 +1070,7 @@ class MomentumOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
|
|
class DGCMomentumOptimizer(Optimizer):
|
|
|
|
class DGCMomentumOptimizer(Optimizer):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887
|
|
|
|
DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887
|
|
|
|
|
|
|
|
|
|
|
@ -2996,7 +2996,7 @@ Lamb = LambOptimizer
|
|
|
|
|
|
|
|
|
|
|
|
class ModelAverage(Optimizer):
|
|
|
|
class ModelAverage(Optimizer):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
The ModelAverage optimizer accumulates specific continuous historical parameters
|
|
|
|
The ModelAverage optimizer accumulates specific continuous historical parameters
|
|
|
|
during training. The accumulated historical range can be controlled by the passed
|
|
|
|
during training. The accumulated historical range can be controlled by the passed
|
|
|
@ -3305,7 +3305,7 @@ class ModelAverage(Optimizer):
|
|
|
|
|
|
|
|
|
|
|
|
class ExponentialMovingAverage(object):
|
|
|
|
class ExponentialMovingAverage(object):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
Compute the moving average of parameters with exponential decay.
|
|
|
|
Compute the moving average of parameters with exponential decay.
|
|
|
|
Given a parameter :math:`\\theta`, its exponential moving average (EMA)
|
|
|
|
Given a parameter :math:`\\theta`, its exponential moving average (EMA)
|
|
|
@ -3555,7 +3555,7 @@ class ExponentialMovingAverage(object):
|
|
|
|
|
|
|
|
|
|
|
|
class PipelineOptimizer(object):
|
|
|
|
class PipelineOptimizer(object):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
Pipeline Optimizer
|
|
|
|
Pipeline Optimizer
|
|
|
|
|
|
|
|
|
|
|
@ -3857,7 +3857,7 @@ class PipelineOptimizer(object):
|
|
|
|
|
|
|
|
|
|
|
|
class RecomputeOptimizer(Optimizer):
|
|
|
|
class RecomputeOptimizer(Optimizer):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
Recompute Optimizer Wrapper
|
|
|
|
Recompute Optimizer Wrapper
|
|
|
|
|
|
|
|
|
|
|
@ -3931,7 +3931,7 @@ class RecomputeOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
|
|
def load(self, stat_dict):
|
|
|
|
def load(self, stat_dict):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
load function is not supported by Recompute Optimizer for now.
|
|
|
|
load function is not supported by Recompute Optimizer for now.
|
|
|
|
:return: None
|
|
|
|
:return: None
|
|
|
@ -4149,7 +4149,7 @@ class RecomputeOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
|
|
class LookaheadOptimizer(object):
|
|
|
|
class LookaheadOptimizer(object):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
:api_attr: Static Graph
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
|
|
This implements the Lookahead optimizer of the
|
|
|
|
This implements the Lookahead optimizer of the
|
|
|
|
paper : https://arxiv.org/abs/1907.08610.
|
|
|
|
paper : https://arxiv.org/abs/1907.08610.
|
|
|
|