Feature/param attr (#5996)

* Make param_attr as a strong typed class

Fix #5819
release/0.11.0
Yu Yang 7 years ago committed by GitHub
parent 399d3a2d74
commit 1b6dcc2fe8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,13 +13,14 @@ import nets
import optimizer
import backward
import regularizer
from param_attr import ParamAttr
from core import LoDTensor, CPUPlace, GPUPlace
Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [
'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward',
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor'
'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr'
]

@ -1,8 +1,10 @@
import copy
import itertools
from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating
from framework import Variable, default_main_program, default_startup_program, \
unique_name, dtype_is_floating
from paddle.v2.fluid.initializer import Constant, Xavier
from param_attr import ParamAttr
class LayerHelper(object):
@ -59,31 +61,15 @@ class LayerHelper(object):
@property
def param_attr(self):
default = {'name': None}
actual = self.kwargs.get('param_attr', None)
if actual is None:
actual = default
for default_field in default.keys():
if default_field not in actual:
actual[default_field] = default[default_field]
return actual
return ParamAttr.to_attr(self.kwargs.get('param_attr', None))
@property
def bias_attr(self):
default = {'name': None}
bias_attr = self.kwargs.get('bias_attr', None)
if bias_attr is None:
bias_attr = default
if isinstance(bias_attr, dict):
for default_field in default.keys():
if default_field not in bias_attr:
bias_attr[default_field] = default[default_field]
return bias_attr
return ParamAttr.to_attr(self.kwargs.get('bias_attr', None))
def multiple_param_attr(self, length):
param_attr = self.param_attr
if isinstance(param_attr, dict):
if isinstance(param_attr, ParamAttr):
param_attr = [param_attr]
if len(param_attr) != 1 and len(param_attr) != length:
@ -111,23 +97,30 @@ class LayerHelper(object):
raise ValueError("Data Type mismatch")
return dtype
def create_parameter(self, attr, shape, dtype, suffix='w',
initializer=None):
def create_parameter(self,
attr,
shape,
dtype,
is_bias=False,
default_initializer=None):
# Deepcopy the attr so that parameters can be shared in program
attr_copy = copy.deepcopy(attr)
if initializer is not None:
attr_copy['initializer'] = initializer
assert isinstance(attr, ParamAttr)
suffix = 'b' if is_bias else 'w'
if default_initializer is None:
if is_bias:
attr.set_default_bias_initializer()
else:
attr.set_default_param_initializer()
else:
attr_copy['initializer'] = self._get_default_initializer(dtype)
if attr_copy['name'] is None:
attr_copy['name'] = unique_name(".".join([self.name, suffix]))
attr.set_default_initializer(default_initializer)
if attr.name is None:
attr.name = unique_name(".".join([self.name, suffix]))
self.startup_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr_copy)
dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True))
return self.main_program.global_block().create_parameter(
name=attr_copy['name'],
dtype=dtype,
shape=shape,
trainable=attr_copy.get('trainable', True))
dtype=dtype, shape=shape, **attr.to_kwargs())
def create_tmp_variable(self, dtype):
return self.main_program.current_block().create_var(
@ -152,11 +145,7 @@ class LayerHelper(object):
persistable=True,
initializer=initializer)
def append_bias_op(self,
input_var,
bias_initializer,
dim_start=1,
dim_end=None):
def append_bias_op(self, input_var, dim_start=1, dim_end=None):
"""
Append bias operator and return its output. If the user does not set
bias_attr, append_bias_op will return input_var
@ -176,11 +165,7 @@ class LayerHelper(object):
return input_var
b = self.create_parameter(
attr=bias_attr,
shape=size,
dtype=input_var.dtype,
suffix='b',
initializer=bias_initializer)
attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True)
tmp = self.create_tmp_variable(dtype=input_var.dtype)
self.append_op(
type='elementwise_add',

File diff suppressed because it is too large Load Diff

@ -0,0 +1,61 @@
from initializer import Initializer, Xavier, Constant
from regularizer import WeightDecayRegularizer
class ParamAttr(object):
def __init__(self,
name=None,
initializer=None,
learning_rate=1.0,
regularizer=None,
trainable=True):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
def set_default_initializer(self, initializer):
if initializer is None:
if self.initializer is None:
raise ValueError("ParamAttr.initializer is not set")
return
if self.initializer is not None:
return
self.initializer = initializer
def set_default_param_initializer(self):
self.set_default_initializer(Xavier())
def set_default_bias_initializer(self):
self.set_default_initializer(Constant(0.0))
@staticmethod
def to_attr(arg):
if arg is None:
return ParamAttr()
elif isinstance(arg, ParamAttr):
return arg
elif isinstance(arg, str) or isinstance(arg, unicode):
return ParamAttr(name=arg)
elif isinstance(arg, Initializer):
return ParamAttr(initializer=arg)
elif isinstance(arg, WeightDecayRegularizer):
return ParamAttr(regularizer=arg)
elif isinstance(arg, bool):
return ParamAttr.to_attr(None) if arg else False
else:
raise TypeError("{0} cast to ParamAttr".format(type(arg)))
def to_kwargs(self, with_initializer=False):
kwargs = {
'name': self.name,
'learning_rate': self.learning_rate,
'regularizer': self.regularizer,
'trainable': self.trainable
}
if with_initializer:
kwargs['initializer'] = self.initializer
return kwargs

@ -44,7 +44,7 @@ def db_lstm():
size=[pred_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'vemb'})
param_attr='vemb')
mark_embedding = fluid.layers.embedding(
input=mark,
@ -57,8 +57,8 @@ def db_lstm():
fluid.layers.embedding(
size=[word_dict_len, word_dim],
input=x,
param_attr={'name': embedding_name,
'trainable': False}) for x in word_input
param_attr=fluid.ParamAttr(
name=embedding_name, trainable=False)) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
@ -125,8 +125,8 @@ def main():
crf_cost = fluid.layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr={"name": 'crfw',
"learning_rate": mix_hidden_lr})
param_attr=fluid.ParamAttr(
name='crfw', learning_rate=mix_hidden_lr))
avg_cost = fluid.layers.mean(x=crf_cost)
# TODO(qiao)
# 1. add crf_decode_layer and evaluator

@ -6,24 +6,21 @@ import paddle.v2.fluid as fluid
BATCH_SIZE = 128
image = fluid.layers.data(name='x', shape=[784], dtype='float32')
param_attr = {
'name': None,
'regularization': fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE)
}
regularizer = fluid.regularizer.L2Decay(0.0005 * BATCH_SIZE)
hidden1 = fluid.layers.fc(input=image,
size=128,
act='relu',
param_attr=param_attr)
param_attr=regularizer)
hidden2 = fluid.layers.fc(input=hidden1,
size=64,
act='relu',
param_attr=param_attr)
param_attr=regularizer)
predict = fluid.layers.fc(input=hidden2,
size=10,
act='softmax',
param_attr=param_attr)
param_attr=regularizer)
label = fluid.layers.data(name='y', shape=[1], dtype='int64')

@ -24,7 +24,7 @@ def get_usr_combined_features():
input=uid,
dtype='float32',
size=[USR_DICT_SIZE, 32],
param_attr={'name': 'user_table'},
param_attr='user_table',
is_sparse=IS_SPARSE)
usr_fc = layers.fc(input=usr_emb, size=32)
@ -36,7 +36,7 @@ def get_usr_combined_features():
usr_gender_emb = layers.embedding(
input=usr_gender_id,
size=[USR_GENDER_DICT_SIZE, 16],
param_attr={'name': 'gender_table'},
param_attr='gender_table',
is_sparse=IS_SPARSE)
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
@ -48,7 +48,7 @@ def get_usr_combined_features():
input=usr_age_id,
size=[USR_AGE_DICT_SIZE, 16],
is_sparse=IS_SPARSE,
param_attr={'name': 'age_table'})
param_attr='age_table')
usr_age_fc = layers.fc(input=usr_age_emb, size=16)
@ -58,7 +58,7 @@ def get_usr_combined_features():
usr_job_emb = layers.embedding(
input=usr_job_id,
size=[USR_JOB_DICT_SIZE, 16],
param_attr={'name': 'job_table'},
param_attr='job_table',
is_sparse=IS_SPARSE)
usr_job_fc = layers.fc(input=usr_job_emb, size=16)
@ -81,7 +81,7 @@ def get_mov_combined_features():
input=mov_id,
dtype='float32',
size=[MOV_DICT_SIZE, 32],
param_attr={'name': 'movie_table'},
param_attr='movie_table',
is_sparse=IS_SPARSE)
mov_fc = layers.fc(input=mov_emb, size=32)

@ -23,25 +23,25 @@ embed_first = fluid.layers.embedding(
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
param_attr='shared_w')
embed_second = fluid.layers.embedding(
input=second_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
param_attr='shared_w')
embed_third = fluid.layers.embedding(
input=third_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
param_attr='shared_w')
embed_forth = fluid.layers.embedding(
input=forth_word,
size=[dict_size, EMBED_SIZE],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'})
param_attr='shared_w')
concat_embed = fluid.layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], axis=1)

@ -132,26 +132,26 @@ class TestBook(unittest.TestCase):
input=first_word,
size=[dict_size, embed_size],
dtype='float32',
param_attr={'name': 'shared_w'},
param_attr='shared_w',
main_program=program)
embed_second = layers.embedding(
input=second_word,
size=[dict_size, embed_size],
dtype='float32',
param_attr={'name': 'shared_w'},
param_attr='shared_w',
main_program=program)
embed_third = layers.embedding(
input=third_word,
size=[dict_size, embed_size],
dtype='float32',
param_attr={'name': 'shared_w'},
param_attr='shared_w',
main_program=program)
embed_forth = layers.embedding(
input=forth_word,
size=[dict_size, embed_size],
dtype='float32',
param_attr={'name': 'shared_w'},
param_attr='shared_w',
main_program=program)
concat_embed = layers.concat(

@ -271,12 +271,12 @@ class RecurrentOpTest2(RecurrentOpTest1):
temp_l = layers.fc(input=x_t,
size=self.input_dim,
param_attr={'name': 'W'},
param_attr='W',
bias_attr=False,
**self.p_info)
temp_r = layers.fc(input=h_pre,
size=self.input_dim,
param_attr={'name': 'U'},
param_attr='U',
bias_attr=False,
**self.p_info)

Loading…
Cancel
Save