You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
292 lines
12 KiB
292 lines
12 KiB
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from paddle.trainer.config_parser import *
|
|
__all__ = [
|
|
'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute',
|
|
'ExtraLayerAttribute'
|
|
]
|
|
|
|
|
|
def convert_and_compare(x, Type):
|
|
"""
|
|
Convert x to be the same type as Type and then convert back to
|
|
check whether there is a loss of information
|
|
:param x: object to be checked
|
|
:param Type: target type to check x over
|
|
|
|
"""
|
|
return type(x)(Type(x)) == x
|
|
|
|
|
|
def is_compatible_with(x, Type):
|
|
"""
|
|
Check if x has a type compatible with Type
|
|
:param x: object to be checked
|
|
:param Type: target type to check x over
|
|
|
|
"""
|
|
if type(x) == Type:
|
|
return True
|
|
try:
|
|
if float == Type or int == Type:
|
|
# avoid those types that can be converted to float/int but not very
|
|
# meaningful and could potentially lead to error
|
|
# i.e., str and bool typed value should not be used for initializing float/int variable
|
|
if not isinstance(x, str) and not isinstance(x, bool):
|
|
return convert_and_compare(x, Type)
|
|
elif bool == Type:
|
|
# should not use string type to initialize bool variable
|
|
if not isinstance(x, str):
|
|
return convert_and_compare(x, Type)
|
|
else:
|
|
return False
|
|
except:
|
|
return False
|
|
|
|
|
|
class HookAttribute(object):
|
|
"""
|
|
Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs
|
|
during training process of a layer with parameters, such as img_conv layer, fc layer.
|
|
|
|
:param type: Hook type, currently supported types:
|
|
'pruning' : user specify a sparsity_ratio before training started, and the
|
|
network will prune the parameters based on the sparsity_ratio.
|
|
eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6)
|
|
The specific usage can be paddle.layer.img_conv(input=img, filter_size=3,
|
|
num_channels=3, num_filters=64,
|
|
param_attr=ParameterAttribute(update_hooks=hk) )
|
|
The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf
|
|
:type type: string
|
|
|
|
:param sparsity_ratio: Must be specified if hook type is 'pruning',
|
|
it represents the ratio of the zero elements to be set by the Parameter.
|
|
:type sparsity_ratio: float or None
|
|
|
|
"""
|
|
|
|
def __init__(self, type, sparsity_ratio=None):
|
|
self.type = type
|
|
self.sparsity_ratio = sparsity_ratio
|
|
if self.sparsity_ratio is not None:
|
|
assert is_compatible_with(
|
|
self.sparsity_ratio,
|
|
float), 'sparisity_ratio must be float type'
|
|
assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] '
|
|
|
|
def __call__(self):
|
|
return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio)
|
|
|
|
|
|
class ParameterAttribute(object):
|
|
"""
|
|
Parameter Attributes object. To fine-tuning network training process, user
|
|
can set attribute to control training details, such as l1,l2 rate / learning
|
|
rate / how to init param.
|
|
|
|
NOTE: IT IS A HIGH LEVEL USER INTERFACE.
|
|
|
|
:param is_static: True if this parameter will be fixed while training.
|
|
:type is_static: bool
|
|
|
|
:param initial_std: Gauss Random initialization standard deviation.
|
|
None if not using Gauss Random initialize parameter.
|
|
:type initial_std: float or None
|
|
:param initial_mean: Gauss Random initialization mean.
|
|
None if not using Gauss Random initialize parameter.
|
|
:type initial_mean: float or None
|
|
:param initial_max: Uniform initialization max value.
|
|
:type initial_max: float or None
|
|
:param initial_min: Uniform initialization min value.
|
|
:type initial_min: float or None
|
|
:param l1_rate: the l1 regularization factor
|
|
:type l1_rate: float or None
|
|
:param l2_rate: the l2 regularization factor
|
|
:type l2_rate: float or None
|
|
:param learning_rate: The parameter learning rate. None means 1.
|
|
The learning rate when optimize is LEARNING_RATE =
|
|
GLOBAL_LEARNING_RATE * PARAMETER_LEARNING_RATE
|
|
* SCHEDULER_FACTOR.
|
|
|
|
:type learning_rate: float or None
|
|
:param momentum: The parameter momentum. None means use global value.
|
|
:type momentum: float or None
|
|
:param gradient_clipping_threshold: gradient clipping threshold. If gradient
|
|
value larger than some value, will be
|
|
clipped.
|
|
:type gradient_clipping_threshold: float
|
|
:param sparse_update: Enable sparse update for this parameter. It will
|
|
enable both local and remote sparse update.
|
|
:type sparse_update: bool
|
|
:param update_hooks: A HookAttribute object.
|
|
:type update_hooks: HookAttribute
|
|
:param initializer: If not None, it should be a callable object which accepts
|
|
a parameter name and returns numpy array for the initial
|
|
value of the parameter
|
|
:type initializer: callable object
|
|
"""
|
|
|
|
def __init__(self,
|
|
name=None,
|
|
is_static=False,
|
|
initial_std=None,
|
|
initial_mean=None,
|
|
initial_max=None,
|
|
initial_min=None,
|
|
l1_rate=None,
|
|
l2_rate=None,
|
|
learning_rate=None,
|
|
momentum=None,
|
|
gradient_clipping_threshold=None,
|
|
sparse_update=False,
|
|
update_hooks=None,
|
|
initializer=None):
|
|
self.attr = {}
|
|
|
|
if is_static:
|
|
self.attr['is_static'] = True
|
|
|
|
if initial_std is None and initial_mean is None and initial_max \
|
|
is None and initial_min is None:
|
|
self.attr['initial_smart'] = True
|
|
elif is_compatible_with(initial_std, float) or \
|
|
is_compatible_with(initial_mean, float):
|
|
if initial_std is not None:
|
|
self.attr['initial_std'] = initial_std
|
|
if initial_mean is not None:
|
|
self.attr['initial_mean'] = initial_mean
|
|
self.attr['initial_strategy'] = 0 # Gauss Random
|
|
elif is_compatible_with(initial_max, float) and \
|
|
is_compatible_with(initial_min, float):
|
|
initial_max = initial_max
|
|
initial_min = initial_min
|
|
assert initial_min < initial_max
|
|
initial_mean = (initial_max + initial_min) / 2
|
|
initial_std = initial_mean - initial_min
|
|
self.attr['initial_mean'] = initial_mean
|
|
self.attr['initial_std'] = initial_std
|
|
self.attr['initial_strategy'] = 1 # Uniform Random
|
|
else:
|
|
raise RuntimeError("Unexpected branch.")
|
|
|
|
if not is_static and is_compatible_with(l1_rate, float):
|
|
self.attr['decay_rate_l1'] = l1_rate
|
|
|
|
if not is_static and is_compatible_with(l2_rate, float):
|
|
self.attr['decay_rate'] = l2_rate
|
|
|
|
if not is_static and is_compatible_with(learning_rate, float):
|
|
self.attr['learning_rate'] = learning_rate
|
|
|
|
if not is_static and is_compatible_with(momentum, float):
|
|
self.attr['momentum'] = momentum
|
|
|
|
if name is not None:
|
|
self.attr['parameter_name'] = name
|
|
|
|
if sparse_update:
|
|
self.attr['sparse_update'] = True
|
|
self.attr['sparse_remote_update'] = True
|
|
|
|
if gradient_clipping_threshold is not None and \
|
|
is_compatible_with(gradient_clipping_threshold, float):
|
|
self.attr['gradient_clipping_threshold'] = \
|
|
gradient_clipping_threshold
|
|
if initializer is not None:
|
|
self.attr['initializer'] = initializer
|
|
|
|
if update_hooks:
|
|
self.attr['update_hooks'] = update_hooks
|
|
|
|
def set_default_parameter_name(self, name):
|
|
"""
|
|
Set default parameter name. If parameter not set, then will use default
|
|
parameter name.
|
|
|
|
|
|
:param name: default parameter name.
|
|
:type name: basestring
|
|
"""
|
|
if 'parameter_name' not in self.attr:
|
|
self.attr['parameter_name'] = name
|
|
|
|
@staticmethod
|
|
def to_bias(bias_attr):
|
|
if isinstance(bias_attr, ParameterAttribute):
|
|
return Bias(**bias_attr.attr)
|
|
else:
|
|
return False
|
|
|
|
|
|
class ExtraLayerAttribute(object):
|
|
"""
|
|
Some high level layer attributes config. You can set all attributes here,
|
|
but some layer doesn't support all attributes. If you set an attribute to a
|
|
layer that not support this attribute, paddle will print an error and core.
|
|
|
|
:param error_clipping_threshold: Error clipping threshold.
|
|
:type error_clipping_threshold: float
|
|
:param drop_rate: Dropout rate. Dropout will create a mask on layer output.
|
|
The dropout rate is the zero rate of this mask. The
|
|
details of what dropout is please refer to `JMLRdropout
|
|
<https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
|
|
>`_.
|
|
:type drop_rate: float
|
|
:param device: device ID of layer. device=-1, use CPU. device>=0, use GPU.
|
|
The details allocation in parallel_nn please refer to `use_case
|
|
<https://github.com/PaddlePaddle/Paddle/blob/develop/doc/v2
|
|
/howto/cmd_parameter/use_case_en.md#case-2-specify-layers-in
|
|
-different-devices>`_.
|
|
:type device: int
|
|
"""
|
|
|
|
def __init__(self,
|
|
error_clipping_threshold=None,
|
|
drop_rate=None,
|
|
device=None):
|
|
self.attr = dict()
|
|
if error_clipping_threshold is not None:
|
|
error_clipping_threshold = float(error_clipping_threshold)
|
|
if error_clipping_threshold < 0:
|
|
raise ValueError("Error clipping must > 0")
|
|
self.attr['error_clipping_threshold'] = error_clipping_threshold
|
|
if drop_rate is not None:
|
|
drop_rate = float(drop_rate)
|
|
if drop_rate < 0:
|
|
raise ValueError("Dropout rate must > 0")
|
|
self.attr["drop_rate"] = drop_rate
|
|
|
|
if isinstance(device, int):
|
|
self.attr["device"] = device
|
|
|
|
def check(self, layer_name):
|
|
for key in self.attr:
|
|
if not hasattr(self, 'can_%s' % key) or \
|
|
not getattr(self, 'can_%s' % key):
|
|
raise NotImplementedError("Layer %s does not support %s" %
|
|
(layer_name, key))
|
|
|
|
@staticmethod
|
|
def to_kwargs(attr):
|
|
if attr is None:
|
|
return dict()
|
|
else:
|
|
return attr.attr
|
|
|
|
|
|
HookAttr = HookAttribute
|
|
ParamAttr = ParameterAttribute
|
|
ExtraAttr = ExtraLayerAttribute
|