Add support for optimizer

revert-15207-remove_op_handle_lock_and_fix_var
minqiyang 6 years ago
parent 224c90a84c
commit 68e9b841ab

@ -104,7 +104,7 @@ class Autograd {
framework::Variable* CreateVariable(const std::string& name, framework::Variable* CreateVariable(const std::string& name,
const framework::DDim& dim, float val, const framework::DDim& dim, float val,
framework::Scope* scope, framework::Scope* scope,
bool random_name = true) { bool random_name = false) {
std::string varname = name; std::string varname = name;
if (random_name) { if (random_name) {
std::mt19937 rng; std::mt19937 rng;

@ -45,6 +45,15 @@ class VarBase {
framework::LoDTensor& Grad(); framework::LoDTensor& Grad();
inline framework::Variable* GradVar() { return grads_; }
inline std::string GradName() const {
PADDLE_ENFORCE(
var_desc_,
"Couldn't get gradient variable's name, please call backward() first");
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
OpBase* pre_op_; OpBase* pre_op_;
int pre_op_out_idx_; int pre_op_out_idx_;

@ -52,7 +52,7 @@ class Tracer {
const std::vector<VarBase*>& outputs, framework::BlockDesc* block, const std::vector<VarBase*>& outputs, framework::BlockDesc* block,
const bool stop_gradient) { const bool stop_gradient) {
framework::OpDesc* op_desc = op->op_desc_; framework::OpDesc* op_desc = op->op_desc_;
VLOG(3) << "tracer tracing " << op_desc->Type(); LOG(ERROR) << "tracer tracing " << op_desc->Type();
op_desc->InferShape(*block); op_desc->InferShape(*block);
op_desc->InferVarType(block); op_desc->InferVarType(block);
std::unique_ptr<framework::OperatorBase> op_base = std::unique_ptr<framework::OperatorBase> op_base =
@ -61,7 +61,10 @@ class Tracer {
*op->input_vars_ = inputs; *op->input_vars_ = inputs;
for (VarBase* input : inputs) { for (VarBase* input : inputs) {
const std::string vname = input->var_desc_->Name(); const std::string vname = input->var_desc_->Name();
LOG(ERROR) << "input: " << vname;
LOG(ERROR) << "input var: " << input->var_;
framework::Variable* var = root_scope_->Var(vname); framework::Variable* var = root_scope_->Var(vname);
LOG(ERROR) << "var_ in tracer pointer: " << var;
input->var_ = var; input->var_ = var;
if (!var->IsInitialized()) { if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname); framework::VarDesc* var_desc = block->FindVar(vname);
@ -84,6 +87,7 @@ class Tracer {
*op->output_vars_ = outputs; *op->output_vars_ = outputs;
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
const std::string vname = outputs[i]->var_desc_->Name(); const std::string vname = outputs[i]->var_desc_->Name();
LOG(ERROR) << "output name: " << vname;
framework::Variable* var = root_scope_->Var(vname); framework::Variable* var = root_scope_->Var(vname);
if (!var->IsInitialized()) { if (!var->IsInitialized()) {
framework::VarDesc* var_desc = block->FindVar(vname); framework::VarDesc* var_desc = block->FindVar(vname);
@ -98,7 +102,7 @@ class Tracer {
outputs[i]->pre_op_out_idx_ = i; outputs[i]->pre_op_out_idx_ = i;
} }
VLOG(3) << "tracer running " << op_desc->Type(); LOG(ERROR) << "tracer running " << op_desc->Type();
op_base->Run(*root_scope_, platform::CPUPlace()); op_base->Run(*root_scope_, platform::CPUPlace());
if (!stop_gradient) { if (!stop_gradient) {
framework::OpDesc* grad_op_desc; framework::OpDesc* grad_op_desc;

@ -29,6 +29,8 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *param_var = ctx.InputVar("Param"); const auto *param_var = ctx.InputVar("Param");
const auto *grad_var = ctx.InputVar("Grad"); const auto *grad_var = ctx.InputVar("Grad");
LOG(ERROR) << "grad_var: " << grad_var;
if (param_var->IsType<framework::LoDTensor>()) { if (param_var->IsType<framework::LoDTensor>()) {
const auto *param = ctx.Input<framework::Tensor>("Param"); const auto *param = ctx.Input<framework::Tensor>("Param");
auto *param_out = ctx.Output<framework::Tensor>("ParamOut"); auto *param_out = ctx.Output<framework::Tensor>("ParamOut");
@ -39,8 +41,11 @@ class SGDOpKernel : public framework::OpKernel<T> {
const auto *grad = ctx.Input<framework::Tensor>("Grad"); const auto *grad = ctx.Input<framework::Tensor>("Grad");
auto p = framework::EigenVector<T>::Flatten(*param); auto p = framework::EigenVector<T>::Flatten(*param);
LOG(ERROR) << "param flattened";
auto g = framework::EigenVector<T>::Flatten(*grad); auto g = framework::EigenVector<T>::Flatten(*grad);
LOG(ERROR) << "grad flattened";
auto o = framework::EigenVector<T>::Flatten(*param_out); auto o = framework::EigenVector<T>::Flatten(*param_out);
LOG(ERROR) << "paramout flattened";
auto *lr = learning_rate->data<T>(); auto *lr = learning_rate->data<T>();
o = p - lr[0] * g; o = p - lr[0] * g;

@ -117,10 +117,23 @@ PYBIND11_MODULE(core, m) {
[](imperative::VarBase &self, framework::Scope *scope) { [](imperative::VarBase &self, framework::Scope *scope) {
self.RunBackward(scope); self.RunBackward(scope);
}) })
.def("_grad_var",
[](const imperative::VarBase &self) {
LOG(ERROR) << "grad_var_ pointer: " << self.grads_;
return self.grads_;
},
py::return_value_policy::reference)
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad) .def("_grad", &imperative::VarBase::Grad)
.def("_print_var_pointer",
[](const imperative::VarBase &self) {
LOG(ERROR) << self.var_desc_->Name()
<< " print_var pointer: " << self.var_;
})
.def_property("value", .def_property("value",
[](const imperative::VarBase &self) { return self.var_; }, [](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) { [](imperative::VarBase &self, framework::Variable *var) {
LOG(ERROR) << "set var to pointer: " << var;
self.var_ = var; self.var_ = var;
}, },
py::return_value_policy::reference) py::return_value_policy::reference)

@ -19,7 +19,6 @@ import contextlib
import os import os
import re import re
import six import six
import sys
import numpy as np import numpy as np
@ -369,6 +368,7 @@ class Variable(object):
self._ivar.stop_gradient = stop_gradient self._ivar.stop_gradient = stop_gradient
def _numpy(self): def _numpy(self):
print("get_variable_tensor", self.desc.name())
scope = _imperative_tracer().get_scope() scope = _imperative_tracer().get_scope()
tensor = core.get_variable_tensor(scope, self.desc.name()) tensor = core.get_variable_tensor(scope, self.desc.name())
return np.array(tensor) return np.array(tensor)
@ -380,6 +380,14 @@ class Variable(object):
def _gradient(self): def _gradient(self):
return np.array(self._ivar._grad()) return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self): def __str__(self):
return self.to_string(True) return self.to_string(True)
@ -632,6 +640,7 @@ class Operator(object):
if inputs is not None: if inputs is not None:
for in_proto in proto.inputs: for in_proto in proto.inputs:
print("create op: find_name", in_proto.name)
found = find_name(inputs, in_proto.name) found = find_name(inputs, in_proto.name)
assert found or in_proto.dispensable, "Input {} not found".format( assert found or in_proto.dispensable, "Input {} not found".format(
in_proto.name) in_proto.name)
@ -695,9 +704,11 @@ class Operator(object):
self._update_desc_attr(attr_name, attr_val) self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs() self.desc.check_attrs()
if self._has_kernel(type): if self._has_kernel(type):
self.desc.infer_var_type(self.block.desc) self.desc.infer_var_type(self.block.desc)
self.desc.infer_shape(self.block.desc) self.desc.infer_shape(self.block.desc)
if _in_imperative_mode(): if _in_imperative_mode():
self.iop = core.OpBase() self.iop = core.OpBase()
self.iop.desc = self.desc self.iop.desc = self.desc
@ -1167,6 +1178,7 @@ class Block(object):
def create_var(self, *args, **kwargs): def create_var(self, *args, **kwargs):
var = Variable(block=self, *args, **kwargs) var = Variable(block=self, *args, **kwargs)
if 'initializer' in kwargs: if 'initializer' in kwargs:
print("initializer, ", type(kwargs['initializer']))
kwargs['initializer'](var, self) kwargs['initializer'](var, self)
return var return var
@ -1281,6 +1293,16 @@ class Block(object):
""" """
op_desc = self.desc.append_op() op_desc = self.desc.append_op()
op = Operator(block=self, desc=op_desc, *args, **kwargs) op = Operator(block=self, desc=op_desc, *args, **kwargs)
print("op inputs: ", [v._numpy() for v in op.inputs])
print("op inputs: ", [v for v in op.inputs])
import sys
sys.stdout.flush()
for v in op.inputs:
v._ivar._print_var_pointer()
print("print var pointer end")
import sys
sys.stdout.flush()
if _in_imperative_mode(): if _in_imperative_mode():
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc, [v._ivar for v in op.outputs], self.desc,
@ -1338,6 +1360,10 @@ class Block(object):
_imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs], _imperative_tracer().trace(op.iop, [v._ivar for v in op.inputs],
[v._ivar for v in op.outputs], self.desc, [v._ivar for v in op.outputs], self.desc,
kwargs.get("stop_gradient", False)) kwargs.get("stop_gradient", False))
print([v.name for v in op.outputs])
for v in op.outputs:
v._ivar._print_var_pointer()
print("fill_constant end")
self.ops.insert(0, op) self.ops.insert(0, op)
return op return op

@ -153,6 +153,7 @@ class ConstantInitializer(Initializer):
assert isinstance(var, framework.Variable) assert isinstance(var, framework.Variable)
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
# Initialization Ops should be prepended and not appended # Initialization Ops should be prepended and not appended
print("fill_constant")
op = block._prepend_op( op = block._prepend_op(
type="fill_constant", type="fill_constant",
outputs={"Out": var}, outputs={"Out": var},

@ -369,7 +369,7 @@ class LayerHelper(object):
def set_variable_initializer(self, var, initializer): def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable) assert isinstance(var, Variable)
self.startup_program.global_block().create_var( return self.startup_program.global_block().create_var(
name=var.name, name=var.name,
type=var.type, type=var.type,
dtype=var.dtype, dtype=var.dtype,

@ -20,6 +20,7 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc from ..core import VarDesc
from ..imperative import base as imperative_base
from .layer_function_generator import templatedoc from .layer_function_generator import templatedoc
import numpy import numpy
@ -126,10 +127,22 @@ def create_global_var(shape,
""" """
helper = LayerHelper("global_var", **locals()) helper = LayerHelper("global_var", **locals())
var = helper.create_global_variable( var = helper.create_global_variable(
dtype=dtype, shape=shape, persistable=persistable, name=name) dtype=dtype,
shape=shape,
persistable=persistable,
name=name,
stop_gradient=True)
print("set_variable_initializer, ", var.name)
if imperative_base.enabled():
var = helper.set_variable_initializer(
var, initializer=Constant(
value=float(value), force_cpu=force_cpu))
print("get var", var)
else:
helper.set_variable_initializer( helper.set_variable_initializer(
var, initializer=Constant( var, initializer=Constant(
value=float(value), force_cpu=force_cpu)) value=float(value), force_cpu=force_cpu))
return var return var

@ -30,6 +30,7 @@ from .initializer import Constant
from .layer_helper import LayerHelper from .layer_helper import LayerHelper
from .layers import ops from .layers import ops
from .regularizer import append_regularization_ops from .regularizer import append_regularization_ops
from .imperative import base as imperative_base
__all__ = [ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl',
@ -108,6 +109,7 @@ class Optimizer(object):
# create learning rate variable for every parameter # create learning rate variable for every parameter
param = param_and_grad[0] param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate'] param_lr = param.optimize_attr['learning_rate']
print("param_lr: ", param_lr, self._global_learning_rate()._numpy())
if type(param_lr) == Variable: if type(param_lr) == Variable:
return param_lr return param_lr
else: else:
@ -301,6 +303,25 @@ class Optimizer(object):
This method combines interface `append_backward()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
if imperative_base.enabled:
if parameter_list is not None:
params_grads = parameter_list
else:
program = loss.block.program
parameters = program.global_block().all_parameters()
params_grads = []
for param in parameters:
grad_var = Variable(
block=loss.block,
name=param._ivar._grad_name(),
stop_gradient=True)
grad_var._value = param._ivar._grad_var()
print("create grad var: ", grad_var.name)
print("grad_var value: ", grad_var._numpy())
import sys
sys.stdout.flush()
params_grads.append((param, grad_var))
else:
params_grads = append_backward(loss, parameter_list, no_grad_set, params_grads = append_backward(loss, parameter_list, no_grad_set,
[error_clip_callback]) [error_clip_callback])
@ -356,6 +377,10 @@ class SGDOptimizer(Optimizer):
def _append_optimize_op(self, block, param_and_grad): def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block) assert isinstance(block, framework.Block)
print("append sgd")
import sys
sys.stdout.flush()
# create the optimize op # create the optimize op
sgd_op = block.append_op( sgd_op = block.append_op(
type=self.type, type=self.type,

@ -18,6 +18,7 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC
from paddle.fluid.imperative.base import to_variable from paddle.fluid.imperative.base import to_variable
@ -119,7 +120,11 @@ class TestImperativeMnist(unittest.TestCase):
out._backward() out._backward()
filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient( filter_grad = mnist._simple_img_conv_pool_1._conv2d._filter_param._gradient(
) )
print(filter_grad) # print(filter_grad)
sgd = SGDOptimizer(learning_rate=1e-3)
sgd.minimize(out)
# np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) # np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
# with fluid.imperative.guard(): # with fluid.imperative.guard():
# mlp = MLP() # mlp = MLP()

Loading…
Cancel
Save