|
|
|
@ -30,11 +30,14 @@ strategy according to this module.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def exponential_decay(learning_rate,
|
|
|
|
|
global_step,
|
|
|
|
|
decay_steps,
|
|
|
|
|
decay_rate,
|
|
|
|
|
staircase=False):
|
|
|
|
|
def float_global_step():
|
|
|
|
|
# the first global step is zero in learning rate decay
|
|
|
|
|
global_step = layers.global_step_counter() - 1
|
|
|
|
|
global_step = layers.cast(global_step, 'float32')
|
|
|
|
|
return global_step
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
|
|
|
|
|
"""Applies exponential decay to the learning rate.
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
@ -44,7 +47,6 @@ def exponential_decay(learning_rate,
|
|
|
|
|
Args:
|
|
|
|
|
learning_rate: A scalar float32 value or a Variable. This
|
|
|
|
|
will be the initial learning rate during training
|
|
|
|
|
global_step: A Variable that record the training step.
|
|
|
|
|
decay_steps: A Python `int32` number.
|
|
|
|
|
decay_rate: A Python `float` number.
|
|
|
|
|
staircase: Boolean. If set true, decay the learning rate every decay_steps.
|
|
|
|
@ -52,8 +54,7 @@ def exponential_decay(learning_rate,
|
|
|
|
|
Returns:
|
|
|
|
|
The decayed learning rate
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(global_step, Variable):
|
|
|
|
|
raise ValueError("global_step is required for exponential_decay.")
|
|
|
|
|
global_step = float_global_step()
|
|
|
|
|
|
|
|
|
|
with init_on_cpu():
|
|
|
|
|
# update learning_rate
|
|
|
|
@ -65,23 +66,17 @@ def exponential_decay(learning_rate,
|
|
|
|
|
return decayed_lr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def natural_exp_decay(learning_rate,
|
|
|
|
|
global_step,
|
|
|
|
|
decay_steps,
|
|
|
|
|
decay_rate,
|
|
|
|
|
staircase=False):
|
|
|
|
|
def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
|
|
|
|
|
"""Applies natural exponential decay to the initial learning rate.
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
if not staircase:
|
|
|
|
|
decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
|
|
|
|
|
else:
|
|
|
|
|
decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
|
|
|
|
|
```
|
|
|
|
|
>>> if not staircase:
|
|
|
|
|
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
|
|
|
|
|
>>> else:
|
|
|
|
|
>>> decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps))
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
learning_rate: A scalar float32 value or a Variable. This
|
|
|
|
|
will be the initial learning rate during training
|
|
|
|
|
global_step: A Variable that record the training step.
|
|
|
|
|
decay_steps: A Python `int32` number.
|
|
|
|
|
decay_rate: A Python `float` number.
|
|
|
|
|
staircase: Boolean. If set true, decay the learning rate every decay_steps.
|
|
|
|
@ -89,8 +84,7 @@ def natural_exp_decay(learning_rate,
|
|
|
|
|
Returns:
|
|
|
|
|
The decayed learning rate
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(global_step, Variable):
|
|
|
|
|
raise ValueError("global_step is required for natural_exp_decay.")
|
|
|
|
|
global_step = float_global_step()
|
|
|
|
|
|
|
|
|
|
with init_on_cpu():
|
|
|
|
|
div_res = global_step / decay_steps
|
|
|
|
@ -101,23 +95,17 @@ def natural_exp_decay(learning_rate,
|
|
|
|
|
return decayed_lr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inverse_time_decay(learning_rate,
|
|
|
|
|
global_step,
|
|
|
|
|
decay_steps,
|
|
|
|
|
decay_rate,
|
|
|
|
|
staircase=False):
|
|
|
|
|
def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
|
|
|
|
|
"""Applies inverse time decay to the initial learning rate.
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
if staircase:
|
|
|
|
|
decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
|
|
|
|
|
else:
|
|
|
|
|
decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
|
|
|
|
|
```
|
|
|
|
|
>>> if staircase:
|
|
|
|
|
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step))
|
|
|
|
|
>>> else:
|
|
|
|
|
>>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step)
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
learning_rate: A scalar float32 value or a Variable. This
|
|
|
|
|
will be the initial learning rate during training
|
|
|
|
|
global_step: A Variable that record the training step.
|
|
|
|
|
will be the initial learning rate during training.
|
|
|
|
|
decay_steps: A Python `int32` number.
|
|
|
|
|
decay_rate: A Python `float` number.
|
|
|
|
|
staircase: Boolean. If set true, decay the learning rate every decay_steps.
|
|
|
|
@ -125,8 +113,7 @@ def inverse_time_decay(learning_rate,
|
|
|
|
|
Returns:
|
|
|
|
|
The decayed learning rate
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(global_step, Variable):
|
|
|
|
|
raise ValueError("global_step is required for inverse_time_decay.")
|
|
|
|
|
global_step = float_global_step()
|
|
|
|
|
|
|
|
|
|
with init_on_cpu():
|
|
|
|
|
div_res = global_step / decay_steps
|
|
|
|
@ -139,26 +126,22 @@ def inverse_time_decay(learning_rate,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def polynomial_decay(learning_rate,
|
|
|
|
|
global_step,
|
|
|
|
|
decay_steps,
|
|
|
|
|
end_learning_rate=0.0001,
|
|
|
|
|
power=1.0,
|
|
|
|
|
cycle=False):
|
|
|
|
|
"""Applies polynomial decay to the initial learning rate.
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
if cycle:
|
|
|
|
|
decay_steps = decay_steps * ceil(global_step / decay_steps)
|
|
|
|
|
else:
|
|
|
|
|
global_step = min(global_step, decay_steps)
|
|
|
|
|
decayed_learning_rate = (learning_rate - end_learning_rate) *
|
|
|
|
|
(1 - global_step / decay_steps) ^ power +
|
|
|
|
|
end_learning_rate
|
|
|
|
|
```
|
|
|
|
|
>>> if cycle:
|
|
|
|
|
>>> decay_steps = decay_steps * ceil(global_step / decay_steps)
|
|
|
|
|
>>> else:
|
|
|
|
|
>>> global_step = min(global_step, decay_steps)
|
|
|
|
|
>>> decayed_learning_rate = (learning_rate - end_learning_rate) *
|
|
|
|
|
>>> (1 - global_step / decay_steps) ^ power +
|
|
|
|
|
>>> end_learning_rate
|
|
|
|
|
Args:
|
|
|
|
|
learning_rate: A scalar float32 value or a Variable. This
|
|
|
|
|
will be the initial learning rate during training
|
|
|
|
|
global_step: A Variable that record the training step.
|
|
|
|
|
decay_steps: A Python `int32` number.
|
|
|
|
|
end_learning_rate: A Python `float` number.
|
|
|
|
|
power: A Python `float` number
|
|
|
|
@ -167,8 +150,7 @@ def polynomial_decay(learning_rate,
|
|
|
|
|
Returns:
|
|
|
|
|
The decayed learning rate
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(global_step, Variable):
|
|
|
|
|
raise ValueError("global_step is required for inverse_time_decay.")
|
|
|
|
|
global_step = float_global_step()
|
|
|
|
|
|
|
|
|
|
with init_on_cpu():
|
|
|
|
|
if cycle:
|
|
|
|
@ -193,27 +175,24 @@ def polynomial_decay(learning_rate,
|
|
|
|
|
return decayed_lr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def piecewise_decay(global_step, boundaries, values):
|
|
|
|
|
def piecewise_decay(boundaries, values):
|
|
|
|
|
"""Applies piecewise decay to the initial learning rate.
|
|
|
|
|
|
|
|
|
|
```python
|
|
|
|
|
boundaries = [10000, 20000]
|
|
|
|
|
values = [1.0, 0.5, 0.1]
|
|
|
|
|
|
|
|
|
|
if step < 10000:
|
|
|
|
|
learning_rate = 1.0
|
|
|
|
|
elif step >= 10000 and step < 20000:
|
|
|
|
|
learning_rate = 0.5
|
|
|
|
|
else:
|
|
|
|
|
learning_rate = 0.1
|
|
|
|
|
```
|
|
|
|
|
>>> boundaries = [10000, 20000]
|
|
|
|
|
>>> values = [1.0, 0.5, 0.1]
|
|
|
|
|
>>>
|
|
|
|
|
>>> if step < 10000:
|
|
|
|
|
>>> learning_rate = 1.0
|
|
|
|
|
>>> elif 10000 <= step < 20000:
|
|
|
|
|
>>> learning_rate = 0.5
|
|
|
|
|
>>> else:
|
|
|
|
|
>>> learning_rate = 0.1
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
if len(values) - len(boundaries) != 1:
|
|
|
|
|
raise ValueError("len(values) - len(boundaries) should be 1")
|
|
|
|
|
|
|
|
|
|
if not isinstance(global_step, Variable):
|
|
|
|
|
raise ValueError("global_step is required for piecewise_decay.")
|
|
|
|
|
global_step = float_global_step()
|
|
|
|
|
|
|
|
|
|
with init_on_cpu():
|
|
|
|
|
lr = layers.create_global_var(
|
|
|
|
|