You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
125 lines
4.6 KiB
125 lines
4.6 KiB
import paddle.v2.framework.framework as framework
|
|
|
|
__all__ = ['SGDOptimizer']
|
|
|
|
|
|
class Optimizer(object):
|
|
"""Optimizer Base class.
|
|
|
|
Define the common interface of an optimizer.
|
|
User should not use this class directly, but need to use one of it's implementation.
|
|
"""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def _append_optimize_op(self, block, param_and_grad):
|
|
""" append optimize operator to block and return all the added optimize_op
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None):
|
|
"""
|
|
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
|
|
for parameters in parameter_list
|
|
|
|
Args:
|
|
loss: an variable generated by cost function.
|
|
no_grad_set: variable that should not create gradient
|
|
parameter_list: parameters that need to compute gradient and update to optimize the lost.
|
|
|
|
Returns:
|
|
list of (parameters, gradients) pair.
|
|
"""
|
|
assert isinstance(loss, framework.Variable)
|
|
param_grad_map = loss.block.program.append_backward(loss, no_grad_set or
|
|
set())
|
|
if parameter_list is not None:
|
|
parameters = parameter_list
|
|
else:
|
|
params = loss.block.program.global_block().all_parameters()
|
|
parameters = [param.name for param in params]
|
|
params_and_grads = []
|
|
for param in parameters:
|
|
if param not in param_grad_map:
|
|
raise Exception("param %s is not in map" % param)
|
|
grad_info = param_grad_map[param]
|
|
grad_block = loss.block.program.block(grad_info[1])
|
|
if not grad_block.has_var(grad_info[0]):
|
|
raise Exception("grad block[%d] did not have grad var %s" %
|
|
grad_info[1], grad_info[0])
|
|
param_var = loss.block.var(param)
|
|
grad_var = grad_block.var(grad_info[0])
|
|
if loss.block.has_var(grad_info[0]):
|
|
params_and_grads.append((param_var, grad_var))
|
|
else:
|
|
params_and_grads.append((param_var, None))
|
|
return params_and_grads
|
|
|
|
def create_optimization_pass(self, parameters_and_grads, loss):
|
|
"""Add optimization operators to update gradients to variables.
|
|
|
|
Args:
|
|
loss: the target that this optimization is for.
|
|
parameters_and_grads: a list of (variable, gradient) pair to update.
|
|
|
|
Returns:
|
|
optmization_op_list: a list of optimization operator that will update parameter using gradient.
|
|
"""
|
|
optimize_ops = []
|
|
for param_and_grad in parameters_and_grads:
|
|
if param_and_grad[1] is not None:
|
|
optimize_op = self._append_optimize_op(loss.block,
|
|
param_and_grad)
|
|
optimize_ops.append(optimize_op)
|
|
return optimize_ops
|
|
|
|
def minimize(self, loss, parameter_list=None, no_grad_set=None):
|
|
"""Add operations to minimize `loss` by updating `parameter_list`.
|
|
|
|
This method combines interface `create_backward_pass()` and
|
|
`create_optimization_pass()` into one.
|
|
"""
|
|
params_grads = self.create_backward_pass(loss, parameter_list,
|
|
no_grad_set or set())
|
|
optimize_ops = self.create_optimization_pass(params_grads, loss)
|
|
return optimize_ops
|
|
|
|
|
|
class SGDOptimizer(Optimizer):
|
|
""" Simple SGD optimizer without any state.
|
|
"""
|
|
|
|
def __init__(self, learning_rate):
|
|
assert learning_rate is not None
|
|
super(Optimizer, self).__init__()
|
|
self.type = "sgd"
|
|
self._learning_rate = learning_rate
|
|
|
|
def _append_optimize_op(self, block, param_and_grad):
|
|
assert isinstance(block, framework.Block)
|
|
lr_shape = [1]
|
|
# create a var for learning_rate
|
|
lr = block.create_var(dtype="float32", shape=lr_shape, lod_level=0)
|
|
|
|
# create an op to init the learning_rate
|
|
init_op = block.append_op(
|
|
type="fill_constant",
|
|
outputs={"Out": lr},
|
|
attrs={"shape": lr_shape,
|
|
"value": self._learning_rate})
|
|
|
|
# create the optimize op
|
|
sgd_op = block.append_op(
|
|
type=self.type,
|
|
inputs={
|
|
"Param": param_and_grad[0],
|
|
"Grad": param_and_grad[1],
|
|
"LearningRate": lr
|
|
},
|
|
outputs={"ParamOut": param_and_grad[0]},
|
|
attrs={"shape": [1],
|
|
"value": self._learning_rate})
|
|
|
|
return sgd_op
|