|
|
|
@ -791,8 +791,8 @@ class Optimizer(object):
|
|
|
|
|
params_grads = append_gradient_clip_ops(params_grads)
|
|
|
|
|
|
|
|
|
|
# Add regularization if any
|
|
|
|
|
params_grads = append_regularization_ops(
|
|
|
|
|
params_grads, self.regularization, self._param_device_map)
|
|
|
|
|
params_grads = append_regularization_ops(params_grads,
|
|
|
|
|
self.regularization)
|
|
|
|
|
|
|
|
|
|
optimize_ops = self._create_optimization_pass(params_grads)
|
|
|
|
|
return optimize_ops
|
|
|
|
@ -1142,7 +1142,7 @@ class MomentumOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
class DGCMomentumOptimizer(Optimizer):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
DGC (Deep Gradient Compression) Momentum Optimizer. Original paper is https://arxiv.org/abs/1712.01887
|
|
|
|
|
|
|
|
|
@ -3068,7 +3068,7 @@ Lamb = LambOptimizer
|
|
|
|
|
|
|
|
|
|
class ModelAverage(Optimizer):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
The ModelAverage optimizer accumulates specific continuous historical parameters
|
|
|
|
|
during training. The accumulated historical range can be controlled by the passed
|
|
|
|
@ -3377,7 +3377,7 @@ class ModelAverage(Optimizer):
|
|
|
|
|
|
|
|
|
|
class ExponentialMovingAverage(object):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
Compute the moving average of parameters with exponential decay.
|
|
|
|
|
Given a parameter :math:`\\theta`, its exponential moving average (EMA)
|
|
|
|
@ -3627,7 +3627,7 @@ class ExponentialMovingAverage(object):
|
|
|
|
|
|
|
|
|
|
class PipelineOptimizer(object):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
Pipeline Optimizer: Make a program to run as pipeline, that is splitting a
|
|
|
|
|
program into multiple sections (sub-programs) and each section run on a
|
|
|
|
@ -4478,7 +4478,7 @@ class PipelineOptimizer(object):
|
|
|
|
|
|
|
|
|
|
class RecomputeOptimizer(Optimizer):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
Recompute Optimizer Wrapper
|
|
|
|
|
|
|
|
|
@ -4563,7 +4563,7 @@ class RecomputeOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
def load(self, stat_dict):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
load function is not supported by Recompute Optimizer for now.
|
|
|
|
|
:return: None
|
|
|
|
@ -4787,7 +4787,7 @@ class RecomputeOptimizer(Optimizer):
|
|
|
|
|
|
|
|
|
|
class LookaheadOptimizer(object):
|
|
|
|
|
"""
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
:api_attr: Static Graph
|
|
|
|
|
|
|
|
|
|
This implements the Lookahead optimizer of the
|
|
|
|
|
paper : https://arxiv.org/abs/1907.08610.
|
|
|
|
|