Implement FC layer with helper (#4726)
* Implement FC layer with helper * Update LayerHelper * Add debug string for Python ProtoBuf and Rename `Sync` to `Flush` * Add check of ProtoBuf initialization * Layer wrapper for FC * Fix unittest * Fix CI * Add code generator * AttributeChecker Better error log and speicalize bool Since lots of types can be cast to bool * Complete mlp, fit_a_linerevert-4814-Add_sequence_project_op
parent
9fc593603b
commit
8e52b34a0c
@ -0,0 +1,160 @@
|
||||
from paddle.v2.framework.framework import Variable, OpProtoHolder, g_program
|
||||
import paddle.v2.framework.core as core
|
||||
import copy
|
||||
import itertools
|
||||
|
||||
|
||||
def unique_name(prefix):
|
||||
uid = core.unique_integer() # unique during whole process.
|
||||
return "_".join([prefix, str(uid)])
|
||||
|
||||
|
||||
class LayerHelper(object):
|
||||
def __init__(self, layer_type, **kwargs):
|
||||
self.kwargs = kwargs
|
||||
self.layer_type = layer_type
|
||||
name = self.kwargs.get('name', None)
|
||||
if name is None:
|
||||
self.kwargs['name'] = unique_name(self.layer_type)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.kwargs['name']
|
||||
|
||||
@property
|
||||
def program(self):
|
||||
prog = self.kwargs.get('program', None)
|
||||
if prog is None:
|
||||
return g_program
|
||||
else:
|
||||
return prog
|
||||
|
||||
def append_op(self, *args, **kwargs):
|
||||
return self.program.current_block().append_op(*args, **kwargs)
|
||||
|
||||
def multiple_input(self, input_param_name='input'):
|
||||
inputs = self.kwargs.get(input_param_name, [])
|
||||
type_error = TypeError(
|
||||
"Input of {0} layer should be Variable or sequence of Variable".
|
||||
format(self.layer_type))
|
||||
if isinstance(inputs, Variable):
|
||||
inputs = [inputs]
|
||||
elif not isinstance(inputs, list) and not isinstance(inputs, tuple):
|
||||
raise type_error
|
||||
else:
|
||||
for each in inputs:
|
||||
if not isinstance(each, Variable):
|
||||
raise type_error
|
||||
return inputs
|
||||
|
||||
def input(self, input_param_name='input'):
|
||||
inputs = self.multiple_input(input_param_name)
|
||||
if len(inputs) != 1:
|
||||
raise "{0} layer only takes one input".format(self.layer_type)
|
||||
return inputs[0]
|
||||
|
||||
@property
|
||||
def param_attr(self):
|
||||
default = {
|
||||
'name': None,
|
||||
'init_attr': {
|
||||
'type': 'uniform_random',
|
||||
'min': -1.0,
|
||||
'max': 1.0
|
||||
}
|
||||
}
|
||||
actual = self.kwargs.get('param_attr', None)
|
||||
return actual if actual is not None else default
|
||||
|
||||
def bias_attr(self, size, dtype):
|
||||
bias_attr = self.kwargs.get('bias_attr', False)
|
||||
if bias_attr is None or bias_attr:
|
||||
bias_attr = {
|
||||
'name': None,
|
||||
'init_attr': {
|
||||
'type': 'fill_constant',
|
||||
'value': 0.0,
|
||||
'shape': [size],
|
||||
'dataType': dtype
|
||||
}
|
||||
}
|
||||
return bias_attr
|
||||
|
||||
def multiple_param_attr(self, length):
|
||||
param_attr = self.param_attr
|
||||
if isinstance(param_attr, dict):
|
||||
param_attr = [param_attr]
|
||||
|
||||
if len(param_attr) != 1 and len(param_attr) != length:
|
||||
raise ValueError("parameter number mismatch")
|
||||
elif len(param_attr) == 1 and length != 1:
|
||||
tmp = [None] * length
|
||||
for i in xrange(length):
|
||||
tmp[i] = copy.deepcopy(param_attr[0])
|
||||
param_attr = tmp
|
||||
return param_attr
|
||||
|
||||
def iter_inputs_and_params(self, input_param_name='input'):
|
||||
inputs = self.multiple_input(input_param_name)
|
||||
param_attrs = self.multiple_param_attr(len(inputs))
|
||||
for ipt, param_attr in itertools.izip(inputs, param_attrs):
|
||||
yield ipt, param_attr
|
||||
|
||||
def input_dtype(self, input_param_name='input'):
|
||||
inputs = self.multiple_input(input_param_name)
|
||||
dtype = None
|
||||
for each in inputs:
|
||||
if dtype is None:
|
||||
dtype = each.data_type
|
||||
elif dtype != each.data_type:
|
||||
raise ValueError("Data Type mismatch")
|
||||
return dtype
|
||||
|
||||
def create_parameter(self, attr, shape, dtype, suffix='w'):
|
||||
if attr['name'] is None:
|
||||
attr['name'] = unique_name(".".join([self.name, suffix]))
|
||||
return self.program.global_block().create_parameter(
|
||||
name=attr['name'],
|
||||
dtype=dtype,
|
||||
shape=shape,
|
||||
initialize_attr=attr['init_attr'])
|
||||
|
||||
def create_tmp_variable(self, dtype):
|
||||
return self.program.current_block().create_var(
|
||||
name=unique_name(".".join([self.name, 'tmp'])), dtype=dtype)
|
||||
|
||||
def create_global_variable(self, *args, **kwargs):
|
||||
return self.program.global_block().create_var(*args, **kwargs)
|
||||
|
||||
def append_bias_op(self, input_var):
|
||||
bias_attr = self.bias_attr(
|
||||
self.kwargs['size'], dtype=input_var.data_type)
|
||||
if not bias_attr:
|
||||
return input_var
|
||||
b = self.create_parameter(
|
||||
attr=bias_attr,
|
||||
shape=[self.kwargs['size']],
|
||||
dtype=input_var.data_type,
|
||||
suffix='b')
|
||||
tmp = self.create_tmp_variable(dtype=input_var.data_type)
|
||||
self.append_op(
|
||||
type='elementwise_add',
|
||||
inputs={'X': [input_var],
|
||||
'Y': [b]},
|
||||
outputs={'Out': [tmp]})
|
||||
return tmp
|
||||
|
||||
def append_activation(self, input_var):
|
||||
act = self.kwargs.get('act', None)
|
||||
if act is None:
|
||||
return input_var
|
||||
if isinstance(act, basestring):
|
||||
act = {'type': act}
|
||||
tmp = self.create_tmp_variable(dtype=input_var.data_type)
|
||||
act_type = act.pop('type')
|
||||
self.append_op(
|
||||
type=act_type,
|
||||
inputs={"X": [input_var]},
|
||||
outputs={"Y": [tmp]},
|
||||
attrs=act)
|
||||
return tmp
|
@ -0,0 +1,143 @@
|
||||
from paddle.v2.framework.layer_helper import LayerHelper
|
||||
import paddle.v2.framework.core as core
|
||||
from paddle.v2.framework.framework import OpProtoHolder, Variable
|
||||
import re
|
||||
|
||||
__all__ = ['fc_layer', 'data_layer', 'cross_entropy']
|
||||
|
||||
|
||||
def fc_layer(input,
|
||||
size,
|
||||
param_attr=None,
|
||||
bias_attr=True,
|
||||
name=None,
|
||||
act=None,
|
||||
num_flatten_dims=1,
|
||||
program=None):
|
||||
# create helper
|
||||
helper = LayerHelper('fc', **locals())
|
||||
|
||||
dtype = helper.input_dtype()
|
||||
|
||||
# mul
|
||||
mul_results = []
|
||||
for input_var, param_attr in helper.iter_inputs_and_params():
|
||||
input_shape = input_var.shape
|
||||
param_shape = list(input_shape[num_flatten_dims:]) + [size]
|
||||
w = helper.create_parameter(
|
||||
attr=param_attr, shape=param_shape, dtype=dtype)
|
||||
tmp = helper.create_tmp_variable(dtype)
|
||||
helper.append_op(
|
||||
type="mul",
|
||||
inputs={
|
||||
"X": input_var,
|
||||
"Y": w,
|
||||
},
|
||||
outputs={"Out": tmp},
|
||||
attrs={'x_num_col_dims': num_flatten_dims})
|
||||
mul_results.append(tmp)
|
||||
|
||||
# sum
|
||||
if len(mul_results) == 1:
|
||||
pre_bias = mul_results[0]
|
||||
else:
|
||||
pre_bias = helper.create_tmp_variable(dtype)
|
||||
helper.append_op(
|
||||
type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias})
|
||||
# add bias
|
||||
pre_activation = helper.append_bias_op(pre_bias)
|
||||
# add activation
|
||||
return helper.append_activation(pre_activation)
|
||||
|
||||
|
||||
def data_layer(name,
|
||||
shape,
|
||||
data_type='float32',
|
||||
type=core.VarDesc.VarType.LOD_TENSOR,
|
||||
program=None):
|
||||
helper = LayerHelper('data', **locals())
|
||||
shape = [-1] + shape # append batch size as -1
|
||||
return helper.create_global_variable(
|
||||
name=name, shape=shape, dtype=data_type, type=type)
|
||||
|
||||
|
||||
def _convert_(name):
|
||||
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
|
||||
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
|
||||
|
||||
|
||||
def _create_op_func_(op_type):
|
||||
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
|
||||
if len(op_proto.outputs) != 1:
|
||||
raise ValueError(
|
||||
"Only one output operator can be automatically generated")
|
||||
|
||||
if op_proto.outputs[0].duplicable:
|
||||
raise ValueError(
|
||||
"Only not duplicable op can be automatically generated")
|
||||
|
||||
o_name = op_proto.outputs[0].name
|
||||
|
||||
def func(**kwargs):
|
||||
helper = LayerHelper(op_type, **kwargs)
|
||||
inputs = dict()
|
||||
dtype = None
|
||||
for ipt in op_proto.inputs:
|
||||
name = _convert_(ipt.name)
|
||||
val = kwargs.pop(name, [])
|
||||
if not isinstance(val, list) and not isinstance(val, tuple):
|
||||
val = [val]
|
||||
for each in val:
|
||||
if not isinstance(each, Variable):
|
||||
raise ValueError("input of {0} must be variable".format(
|
||||
op_type))
|
||||
|
||||
if dtype is None:
|
||||
dtype = each.data_type
|
||||
elif dtype != each.data_type:
|
||||
raise ValueError(
|
||||
"operator {0} must input same dtype".format(op_type))
|
||||
inputs[ipt.name] = val
|
||||
|
||||
out = helper.create_tmp_variable(dtype=dtype)
|
||||
helper.append_op(
|
||||
type=op_type, inputs=inputs, outputs={o_name: [out]}, attrs=kwargs)
|
||||
return out
|
||||
|
||||
func.__name__ = op_type
|
||||
globals()[op_type] = func
|
||||
global __all__
|
||||
__all__.append(op_type)
|
||||
|
||||
|
||||
_create_op_func_('mean')
|
||||
|
||||
|
||||
def cross_entropy(input, label, **kwargs):
|
||||
helper = LayerHelper('cross_entropy', **kwargs)
|
||||
out = helper.create_tmp_variable(dtype=input.data_type)
|
||||
helper.append_op(
|
||||
type='cross_entropy',
|
||||
inputs={'X': [input],
|
||||
'Label': [label]},
|
||||
outputs={'Y': [out]},
|
||||
attrs=kwargs)
|
||||
return out
|
||||
|
||||
|
||||
def square_error_cost(input, label, **kwargs):
|
||||
helper = LayerHelper('square_error_cost', **kwargs)
|
||||
minus_out = helper.create_tmp_variable(dtype=input.data_type)
|
||||
helper.append_op(
|
||||
type='elementwise_sub',
|
||||
inputs={'X': [input],
|
||||
'Y': [label]},
|
||||
outputs={'Out': [minus_out]})
|
||||
|
||||
square_out = helper.create_tmp_variable(dtype=input.data_type)
|
||||
helper.append_op(
|
||||
type='pow',
|
||||
inputs={'X': [minus_out]},
|
||||
outputs={'Y': [square_out]},
|
||||
attrs={'factor': 2.0})
|
||||
return square_out
|
@ -0,0 +1,43 @@
|
||||
from paddle.v2.framework.layers import fc_layer, data_layer, cross_entropy, mean, square_error_cost
|
||||
from paddle.v2.framework.framework import Program, g_program
|
||||
import paddle.v2.framework.core as core
|
||||
import unittest
|
||||
|
||||
|
||||
class TestBook(unittest.TestCase):
|
||||
def test_fit_a_line(self):
|
||||
pd = core.ProgramDesc.__create_program_desc__()
|
||||
program = Program(desc=pd)
|
||||
x = data_layer(
|
||||
name='x', shape=[13], data_type='float32', program=program)
|
||||
y_predict = fc_layer(input=x, size=1, act=None, program=program)
|
||||
|
||||
y = data_layer(
|
||||
name='y', shape=[1], data_type='float32', program=program)
|
||||
cost = square_error_cost(input=y_predict, label=y, program=program)
|
||||
|
||||
avg_cost = mean(x=cost, program=program)
|
||||
self.assertIsNotNone(avg_cost)
|
||||
print str(program)
|
||||
|
||||
def test_recognize_digits_mlp(self):
|
||||
pd = core.ProgramDesc.__create_program_desc__()
|
||||
program = Program(desc=pd)
|
||||
|
||||
# Change g_program, so the rest layers use `g_program`
|
||||
images = data_layer(
|
||||
name='pixel', shape=[784], data_type='float32', program=program)
|
||||
label = data_layer(
|
||||
name='label', shape=[1], data_type='int32', program=program)
|
||||
hidden1 = fc_layer(input=images, size=128, act='relu', program=program)
|
||||
hidden2 = fc_layer(input=hidden1, size=64, act='relu', program=program)
|
||||
predict = fc_layer(
|
||||
input=hidden2, size=10, act='softmax', program=program)
|
||||
cost = cross_entropy(input=predict, label=label, program=program)
|
||||
avg_cost = mean(x=cost, program=program)
|
||||
self.assertIsNotNone(avg_cost)
|
||||
print str(program)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue