You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1032 lines
40 KiB
1032 lines
40 KiB
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import collections
|
|
import contextlib
|
|
import sys
|
|
import numpy as np
|
|
import six
|
|
import re
|
|
import copy
|
|
import weakref
|
|
import warnings
|
|
|
|
from . import parallel_helper
|
|
from .. import unique_name
|
|
from paddle.fluid import core
|
|
from .layer_object_helper import LayerObjectHelper
|
|
from .base import program_desc_tracing_guard, param_guard
|
|
from paddle.fluid import framework
|
|
from ..param_attr import ParamAttr
|
|
from paddle.fluid.executor import Executor, global_scope
|
|
from paddle.fluid.framework import in_dygraph_mode
|
|
from paddle.fluid.framework import _current_expected_place as _get_device
|
|
|
|
__all__ = ['Layer']
|
|
|
|
_first_cap_re = re.compile('(.)([A-Z][a-z]+)')
|
|
_all_cap_re = re.compile('([a-z])([A-Z])')
|
|
|
|
|
|
def _convert_camel_to_snake(name):
|
|
s1 = _first_cap_re.sub(r'\1_\2', name)
|
|
return _all_cap_re.sub(r'\1_\2', s1).lower()
|
|
|
|
|
|
class HookRemoveHelper(object):
|
|
""" A HookRemoveHelper that can be used to remove hook. """
|
|
|
|
next_hook_id = 0
|
|
|
|
def __init__(self, hooks):
|
|
self._hooks_ref = weakref.ref(hooks)
|
|
self._hook_id = HookRemoveHelper.next_hook_id
|
|
HookRemoveHelper.next_hook_id += 1
|
|
|
|
def remove(self):
|
|
hooks = self._hooks_ref()
|
|
if hooks is not None and self._hook_id in hooks:
|
|
del hooks[self._hook_id]
|
|
|
|
|
|
class Layer(core.Layer):
|
|
"""
|
|
:alias_main: paddle.nn.Layer
|
|
:alias: paddle.nn.Layer
|
|
:old_api: paddle.fluid.dygraph.layers.Layer
|
|
|
|
Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
|
|
|
|
Parameters:
|
|
name_scope (str, optional): prefix name used by the layer to name parameters.
|
|
If prefix is "my_layer", parameter name in MyLayer
|
|
can be "my_layer_0.w_n", where "w" is the parameter
|
|
base name and "n" is an unique suffix auto-generated.
|
|
If None, prefix name will be snake cased class name. Default: None.
|
|
dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
|
|
If set str, it can be "bool", "float16", "float32", "float64",
|
|
"int8", "int16", "int32", "int64", "uint8" or "uint16".
|
|
Default: ``core.VarDesc.VarType.FP32``
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
|
|
def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
|
|
self.training = True
|
|
if name_scope is None:
|
|
name_scope = _convert_camel_to_snake(self.__class__.__name__)
|
|
self._full_name = unique_name.generate(name_scope)
|
|
self._helper = LayerObjectHelper(self._full_name)
|
|
self._built = False
|
|
self._dtype = dtype
|
|
|
|
self._parameters = collections.OrderedDict()
|
|
# Buffers the variable (not parameter) created in layer
|
|
self._buffers = collections.OrderedDict()
|
|
self._non_persistable_buffer_names_set = set()
|
|
self._sub_layers = collections.OrderedDict()
|
|
self._loaddict_holder = collections.OrderedDict()
|
|
|
|
self._forward_pre_hooks = collections.OrderedDict()
|
|
self._forward_post_hooks = collections.OrderedDict()
|
|
|
|
def train(self):
|
|
"""
|
|
Sets this Layer and all its sublayers to training mode.
|
|
This only effects certain modules like `Dropout` and `BatchNorm`.
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
# global setting
|
|
framework._dygraph_tracer().train_mode()
|
|
# Layer-level setting
|
|
self.training = True
|
|
for layer in self.sublayers():
|
|
layer.train()
|
|
|
|
def eval(self):
|
|
"""
|
|
Sets this Layer and all its sublayers to evaluation mode.
|
|
This only effects certain modules like `Dropout` and `BatchNorm`.
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
# global setting
|
|
framework._dygraph_tracer().eval_mode()
|
|
# Layer-level setting
|
|
self.training = False
|
|
for layer in self.sublayers():
|
|
layer.eval()
|
|
|
|
def apply(self, fn):
|
|
"""
|
|
Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
|
|
as well as self. Typical use includes initializing the parameters of a model.
|
|
|
|
Parameters:
|
|
fn (function): a function to be applied to each sublayer
|
|
|
|
Returns:
|
|
Layer: self
|
|
|
|
Example::
|
|
.. code-block:: python
|
|
|
|
import paddle
|
|
import paddle.nn as nn
|
|
|
|
paddle.disable_static()
|
|
|
|
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
|
|
|
|
def init_weights(layer):
|
|
if type(layer) == nn.Linear:
|
|
print('before init weight:', layer.weight.numpy())
|
|
new_weight = paddle.fill_constant(layer.weight.shape, layer.weight.dtype, value=0.9)
|
|
layer.weight.set_value(new_weight)
|
|
print('after init weight:', layer.weight.numpy())
|
|
|
|
net.apply(init_weights)
|
|
|
|
print(net.state_dict())
|
|
"""
|
|
for layer in self.children():
|
|
layer.apply(fn)
|
|
|
|
fn(self)
|
|
|
|
return self
|
|
|
|
def full_name(self):
|
|
"""Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__
|
|
|
|
Returns:
|
|
str: full name of this layer.
|
|
"""
|
|
return self._full_name
|
|
|
|
def register_forward_post_hook(self, hook):
|
|
"""Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.
|
|
|
|
It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
|
|
User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
|
|
|
|
hook(Layer, input, output) -> None or modified output
|
|
|
|
Parameters:
|
|
hook(function): a function registered as a forward post-hook
|
|
|
|
Returns:
|
|
HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import numpy as np
|
|
|
|
# the forward_post_hook change the output of the layer: output = output * 2
|
|
def forward_post_hook(layer, input, output):
|
|
# user can use layer, input and output for information statistis tasks
|
|
|
|
# change the output
|
|
return output * 2
|
|
|
|
with fluid.dygraph.guard():
|
|
linear = fluid.Linear(13, 5, dtype="float32")
|
|
|
|
# register the hook
|
|
forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)
|
|
|
|
value1 = np.arange(26).reshape(2, 13).astype("float32")
|
|
in1 = fluid.dygraph.to_variable(value1)
|
|
|
|
out0 = linear(in1)
|
|
|
|
# remove the hook
|
|
forward_post_hook_handle.remove()
|
|
|
|
out1 = linear(in1)
|
|
|
|
# hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
|
|
assert (out0.numpy() == (out1.numpy()) * 2).any()
|
|
"""
|
|
hook_remove_helper = HookRemoveHelper(self._forward_post_hooks)
|
|
self._forward_post_hooks[hook_remove_helper._hook_id] = hook
|
|
return hook_remove_helper
|
|
|
|
def register_forward_pre_hook(self, hook):
|
|
"""Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
|
|
|
|
It should have the following form, `input` of the `hook` is `input` of the `Layer`,
|
|
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
|
|
a single value is returned(unless that value is already a tuple).
|
|
User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer.
|
|
|
|
hook(Layer, input) -> None or modified input
|
|
|
|
Parameters:
|
|
hook(function): a function registered as a forward pre-hook
|
|
|
|
Returns:
|
|
HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import numpy as np
|
|
|
|
# the forward_post_hook change the input of the layer: input = input * 2
|
|
def forward_pre_hook(layer, input):
|
|
# user can use layer and input for information statistis tasks
|
|
|
|
# change the input
|
|
input_return = (input[0] * 2)
|
|
return input_return
|
|
|
|
with fluid.dygraph.guard():
|
|
linear = fluid.Linear(13, 5, dtype="float32")
|
|
|
|
# register the hook
|
|
forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)
|
|
|
|
value0 = np.arange(26).reshape(2, 13).astype("float32")
|
|
in0 = fluid.dygraph.to_variable(value0)
|
|
out0 = linear(in0)
|
|
|
|
# remove the hook
|
|
forward_pre_hook_handle.remove()
|
|
|
|
value1 = value0 * 2
|
|
in1 = fluid.dygraph.to_variable(value1)
|
|
out1 = linear(in1)
|
|
|
|
# hook change the linear's input to input * 2, so out0 is equal to out1.
|
|
assert (out0.numpy() == out1.numpy()).any()
|
|
"""
|
|
hook_remove_helper = HookRemoveHelper(self._forward_pre_hooks)
|
|
self._forward_pre_hooks[hook_remove_helper._hook_id] = hook
|
|
return hook_remove_helper
|
|
|
|
def create_parameter(self,
|
|
shape,
|
|
attr=None,
|
|
dtype=None,
|
|
is_bias=False,
|
|
default_initializer=None):
|
|
"""Create parameters for this layer.
|
|
|
|
Parameters:
|
|
shape(list): Shape of the parameter.
|
|
attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
|
|
dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
|
|
If set str, it can be "bool", "float16", "float32", "float64",
|
|
"int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
|
|
is_bias(bool, optional): if this is a bias parameter. Default: False.
|
|
default_initializer(Initializer, optional): the default initializer for this parameter.
|
|
If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
|
|
for non-bias and bias parameter, respectively. Default: None.
|
|
|
|
Returns:
|
|
:ref:`api_guide_Variable_en` : created parameter.
|
|
"""
|
|
temp_attr = copy.deepcopy(attr)
|
|
if isinstance(temp_attr, six.string_types) and temp_attr == "":
|
|
temp_attr = None
|
|
return self._helper.create_parameter(temp_attr, shape, dtype, is_bias,
|
|
default_initializer)
|
|
|
|
# TODO: Add more parameter list when we need them
|
|
def create_variable(self,
|
|
name=None,
|
|
persistable=None,
|
|
dtype=None,
|
|
type=core.VarDesc.VarType.LOD_TENSOR):
|
|
"""Create Variable for this layer.
|
|
|
|
Parameters:
|
|
name(str, optional): name of the variable. Please refer to :ref:`api_guide_Name` . Default: None
|
|
persistable(bool, optional): if set this variable persistable. Default: False
|
|
dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
|
|
If set str, it can be "bool", "float16", "float32", "float64",
|
|
"int8", "int16", "int32", "int64", "uint8" or "uint16".
|
|
If set None, it will be ``core.VarDesc.VarType.FP32``. Default: None
|
|
type(core.VarDesc.VarType, optional): type of the variable. No need to set this parameter. Default: ``core.VarDesc.VarType.LOD_TENSOR``
|
|
|
|
Returns:
|
|
:ref:`api_guide_Variable_en` : created Variable.
|
|
"""
|
|
if name is not None:
|
|
var_name = ".".join([self._full_name, name])
|
|
else:
|
|
var_name = unique_name.generate(".".join(
|
|
[self._full_name, "_generated_var"]))
|
|
|
|
return self._helper.main_program.current_block().create_var(
|
|
name=var_name, persistable=persistable, dtype=dtype, type=type)
|
|
|
|
def parameters(self, include_sublayers=True):
|
|
"""Returns a list of all Parameters from current layer and its sub-layers.
|
|
|
|
Parameters:
|
|
include_sublayers(bool, optional): Whether include the parameters of sublayers. If True, also include the parameters from sublayers. Default: True
|
|
|
|
Returns:
|
|
list of :ref:`api_guide_Variable_en` : a list of Parameters.
|
|
"""
|
|
ret = [
|
|
param
|
|
for _, param in self.named_parameters(
|
|
include_sublayers=include_sublayers)
|
|
]
|
|
return ret
|
|
|
|
def children(self):
|
|
"""Returns an iterator over immediate children layers.
|
|
|
|
Yields:
|
|
Layer: a child layer
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
with fluid.dygraph.guard():
|
|
fc1 = fluid.Linear(10, 3)
|
|
fc2 = fluid.Linear(3, 10, bias_attr=False)
|
|
model = fluid.dygraph.Sequential(fc1, fc2)
|
|
|
|
layer_list = list(model.children())
|
|
|
|
print(layer_list)
|
|
|
|
"""
|
|
for _, layer in self.named_children():
|
|
yield layer
|
|
|
|
def named_children(self):
|
|
"""Returns an iterator over immediate children layers, yielding both
|
|
the name of the layer as well as the layer itself.
|
|
|
|
Yields:
|
|
(string, Layer): Tuple containing a name and child layer
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
with fluid.dygraph.guard():
|
|
fc1 = fluid.Linear(10, 3)
|
|
fc2 = fluid.Linear(3, 10, bias_attr=False)
|
|
model = fluid.dygraph.Sequential(fc1, fc2)
|
|
for prefix, layer in model.named_children():
|
|
print(prefix, layer)
|
|
|
|
"""
|
|
memo = set()
|
|
for name, layer in self._sub_layers.items():
|
|
if layer is not None and layer not in memo:
|
|
memo.add(layer)
|
|
yield name, layer
|
|
|
|
def sublayers(self, include_sublayers=True):
|
|
"""Returns a list of sub layers.
|
|
|
|
Parameters:
|
|
include_sublayers(bool, optional): Whether return the sublayers of sublayers. If True, also include the sublayers of sublayers. Default: True
|
|
|
|
Returns:
|
|
list of Layer : a list of sub layers.
|
|
"""
|
|
ret = [
|
|
layer
|
|
for _, layer in self.named_sublayers(
|
|
include_sublayers=include_sublayers)
|
|
]
|
|
return ret
|
|
|
|
def named_parameters(self, prefix='', include_sublayers=True):
|
|
"""
|
|
Returns an iterator over all parameters in the Layer, yielding tuple of name and parameter.
|
|
|
|
Parameters:
|
|
prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
|
|
include_sublayers(bool, optional): Whether include the parameters of sublayers.
|
|
If True, also include the named parameters from sublayers. Default: True.
|
|
|
|
Yields:
|
|
(string, Parameter): Tuple of name and Parameter
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
with fluid.dygraph.guard():
|
|
fc1 = fluid.Linear(10, 3)
|
|
fc2 = fluid.Linear(3, 10, bias_attr=False)
|
|
model = fluid.dygraph.Sequential(fc1, fc2)
|
|
for name, param in model.named_parameters():
|
|
print(name, param)
|
|
|
|
"""
|
|
params_set = set()
|
|
named_sublayers = self.named_sublayers(
|
|
prefix=prefix,
|
|
include_sublayers=include_sublayers,
|
|
include_self=True)
|
|
for layer_prefix, sublayer in named_sublayers:
|
|
params = sublayer._parameters.items()
|
|
for key, param in params:
|
|
if param is None or param in params_set:
|
|
continue
|
|
params_set.add(param)
|
|
name = layer_prefix + ('.' if layer_prefix else '') + key
|
|
yield name, param
|
|
|
|
def named_sublayers(self,
|
|
prefix='',
|
|
include_sublayers=True,
|
|
include_self=False,
|
|
layers_set=None):
|
|
"""
|
|
Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
|
|
The duplicate sublayer will only be yielded once.
|
|
|
|
Parameters:
|
|
prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
|
|
include_sublayers(bool, optional): Whether include the sublayers. Default: True.
|
|
include_self(bool, optional): Whether include the Layer itself. Default: False.
|
|
layers_set(set, optioanl): The set to record duplicate sublayers. Default: None.
|
|
|
|
Yields:
|
|
(string, Layer): Tuple of name and Layer
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
|
|
with fluid.dygraph.guard():
|
|
fc1 = fluid.Linear(10, 3)
|
|
fc2 = fluid.Linear(3, 10, bias_attr=False)
|
|
model = fluid.dygraph.Sequential(fc1, fc2)
|
|
for prefix, layer in model.named_sublayers():
|
|
print(prefix, layer)
|
|
|
|
"""
|
|
if layers_set is None:
|
|
layers_set = set()
|
|
if include_self and self not in layers_set:
|
|
layers_set.add(self)
|
|
yield prefix, self
|
|
if include_sublayers:
|
|
for key, layer in self._sub_layers.items():
|
|
if layer is None:
|
|
continue
|
|
layer_prefix = prefix + ('.' if prefix else '') + key
|
|
for p, l in layer.named_sublayers(
|
|
prefix=layer_prefix,
|
|
include_sublayers=include_sublayers,
|
|
include_self=True,
|
|
layers_set=layers_set):
|
|
yield p, l
|
|
|
|
def register_buffer(self, name, variable, persistable=True):
|
|
"""
|
|
Registers a variable as buffer into the layer.
|
|
|
|
`buffer` is a non-parameteric variable and will not be updated by optimizer,
|
|
but is necessary for evaluation and inference. For example, the mean and variance in BatchNorm layers.
|
|
The registered buffer is persistable by default, and will be saved into
|
|
`state_dict` alongside parameters. If set persistable=False, it registers
|
|
a non-persistable buffer, so that it will not be a part of `state_dict` .
|
|
|
|
Buffers can be accessed as attributes using given names.
|
|
|
|
Parameters:
|
|
name (string): name of the buffer. The buffer can be accessed
|
|
from this layer using the given name
|
|
variable (Variable): the variable to be registered as buffer.
|
|
persistable (bool): whether the buffer is part of this layer's
|
|
state_dict.
|
|
|
|
Returns:
|
|
None
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import numpy as np
|
|
import paddle.fluid as fluid
|
|
|
|
with fluid.dygraph.guard():
|
|
linear = fluid.Linear(10, 3)
|
|
value = np.array([0]).astype("float32")
|
|
buffer = fluid.dygraph.to_variable(value)
|
|
linear.register_buffer("buf_name", buffer, persistable=True)
|
|
|
|
# get the buffer by attribute.
|
|
print(linear.buf_name)
|
|
|
|
"""
|
|
|
|
if '_buffers' not in self.__dict__:
|
|
raise ValueError(
|
|
"super(YourLayer, self).__init__() should be called first")
|
|
elif not isinstance(name, six.string_types):
|
|
raise TypeError(
|
|
"The name of buffer should be a string, but received {}.".
|
|
format(type(name).__name__))
|
|
elif '.' in name:
|
|
raise KeyError(
|
|
"The name of buffer can not contain `.`, "
|
|
"because when you access the newly added buffer in the "
|
|
"form of `self.**.**`, it will cause AttributeError.")
|
|
elif name == '':
|
|
raise KeyError("The name of buffer can not be empty.")
|
|
elif hasattr(self, name) and name not in self._buffers:
|
|
raise KeyError("attribute '{}' already exists.".format(name))
|
|
elif variable is not None and not type(variable) == core.VarBase:
|
|
raise TypeError(
|
|
"The registered buffer should be a core.VarBase, but received {}.".
|
|
format(type(variable).__name__))
|
|
else:
|
|
self._buffers[name] = variable
|
|
if persistable:
|
|
self._non_persistable_buffer_names_set.discard(name)
|
|
else:
|
|
self._non_persistable_buffer_names_set.add(name)
|
|
|
|
def buffers(self, include_sublayers=True):
|
|
"""
|
|
Returns a list of all buffers from current layer and its sub-layers.
|
|
|
|
Parameters:
|
|
include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True
|
|
|
|
Returns:
|
|
list of :ref:`api_guide_Variable_en` : a list of buffers.
|
|
"""
|
|
ret = [
|
|
buffer
|
|
for _, buffer in self.named_buffers(
|
|
include_sublayers=include_sublayers)
|
|
]
|
|
return ret
|
|
|
|
def named_buffers(self, prefix='', include_sublayers=True):
|
|
"""
|
|
Returns an iterator over all buffers in the Layer, yielding tuple of name and Variable.
|
|
|
|
Parameters:
|
|
prefix(str, optional): Prefix to prepend to all buffer names. Default: ''.
|
|
include_sublayers(bool, optional): Whether include the buffers of sublayers.
|
|
If True, also include the named buffers from sublayers. Default: True.
|
|
|
|
Yields:
|
|
(string, Variable): Tuple of name and Variable
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import numpy as np
|
|
import paddle.fluid as fluid
|
|
|
|
with fluid.dygraph.guard():
|
|
fc1 = fluid.Linear(10, 3)
|
|
buffer1 = fluid.dygraph.to_variable(np.array([0]).astype("float32"))
|
|
# register a variable as buffer by specific `persistable`
|
|
fc1.register_buffer("buf_name_1", buffer1, persistable=True)
|
|
|
|
fc2 = fluid.Linear(3, 10)
|
|
buffer2 = fluid.dygraph.to_variable(np.array([1]).astype("float32"))
|
|
# register a buffer by assigning an attribute with Variable.
|
|
# The `persistable` can only be False by this way.
|
|
fc2.buf_name_2 = buffer2
|
|
|
|
model = fluid.dygraph.Sequential(fc1, fc2)
|
|
|
|
# get all named buffers
|
|
for name, buffer in model.named_buffers():
|
|
print(name, buffer)
|
|
|
|
"""
|
|
buffers_set = set()
|
|
named_sublayers = self.named_sublayers(
|
|
prefix=prefix,
|
|
include_sublayers=include_sublayers,
|
|
include_self=True)
|
|
for layer_prefix, sublayer in named_sublayers:
|
|
buffers = sublayer._buffers.items()
|
|
for key, buffer in buffers:
|
|
if buffer is None or buffer in buffers_set:
|
|
continue
|
|
buffers_set.add(buffer)
|
|
name = layer_prefix + ('.' if layer_prefix else '') + key
|
|
yield name, buffer
|
|
|
|
def clear_gradients(self):
|
|
"""
|
|
Clear the gradients of all parameters for this layer.
|
|
|
|
Returns:
|
|
None
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
import numpy as np
|
|
|
|
with fluid.dygraph.guard():
|
|
value = np.arange(26).reshape(2, 13).astype("float32")
|
|
a = fluid.dygraph.to_variable(value)
|
|
linear = fluid.Linear(13, 5, dtype="float32")
|
|
adam = fluid.optimizer.Adam(learning_rate=0.01,
|
|
parameter_list=linear.parameters())
|
|
out = linear(a)
|
|
out.backward()
|
|
adam.minimize(out)
|
|
linear.clear_gradients()
|
|
|
|
"""
|
|
for p in self.parameters():
|
|
if p.trainable:
|
|
p.clear_gradient()
|
|
|
|
def _build_once(self, *args, **kwargs):
|
|
pass
|
|
|
|
def __call__(self, *inputs, **kwargs):
|
|
for forward_pre_hook in self._forward_pre_hooks.values():
|
|
hook_result = forward_pre_hook(self, inputs)
|
|
if hook_result is not None:
|
|
if not isinstance(hook_result, tuple):
|
|
hook_result = (hook_result, )
|
|
inputs = hook_result
|
|
|
|
if not self._built:
|
|
with program_desc_tracing_guard(False):
|
|
self._build_once(*inputs, **kwargs)
|
|
if parallel_helper._is_data_parallel_mode():
|
|
parallel_helper._broadcast_parameters(
|
|
self._parameters.values())
|
|
self._built = True
|
|
|
|
with param_guard(self._parameters), param_guard(self._buffers):
|
|
outputs = self.forward(*inputs, **kwargs)
|
|
|
|
for forward_post_hook in self._forward_post_hooks.values():
|
|
hook_result = forward_post_hook(self, inputs, outputs)
|
|
if hook_result is not None:
|
|
outputs = hook_result
|
|
|
|
return outputs
|
|
|
|
def forward(self, *inputs, **kwargs):
|
|
"""
|
|
Defines the computation performed at every call.
|
|
Should be overridden by all subclasses.
|
|
|
|
Parameters:
|
|
*inputs(tuple): unpacked tuple arguments
|
|
**kwargs(dict): unpacked dict arguments
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def backward(self, *inputs):
|
|
raise ValueError("Layer shouldn't implement backward")
|
|
|
|
def add_sublayer(self, name, sublayer):
|
|
"""Adds a sub Layer instance.
|
|
|
|
Added sublayer can be accessed by self.name
|
|
|
|
Parameters:
|
|
name(str): name of this sublayer.
|
|
sublayer(Layer): an instance of Layer.
|
|
Returns:
|
|
Layer: the sublayer passed in.
|
|
"""
|
|
assert isinstance(sublayer, core.Layer)
|
|
|
|
self._sub_layers[name] = sublayer
|
|
return sublayer
|
|
|
|
def add_parameter(self, name, parameter):
|
|
"""Adds a Parameter instance.
|
|
|
|
Added parameter can be accessed by self.name
|
|
|
|
Parameters:
|
|
name(str): name of this sublayer.
|
|
parameter(Parameter): an instance of Parameter.
|
|
Returns:
|
|
Parameter: the parameter passed in.
|
|
"""
|
|
if '_parameters' not in self.__dict__:
|
|
raise RuntimeError(
|
|
"super(YourLayer, self).__init__() should be called firstly.")
|
|
elif not isinstance(name, six.string_types):
|
|
raise TypeError(
|
|
"The name of parameter should be a string, but received {}.".
|
|
format(type(name).__name__))
|
|
elif '.' in name:
|
|
raise KeyError(
|
|
"The name of parameter can not contain `.`, "
|
|
"because when you access the newly added parameter in the "
|
|
"form of `self.**.**`, it will cause AttributeError.")
|
|
elif name == '':
|
|
raise KeyError("The name of parameter can not be empty.")
|
|
elif hasattr(self, name) and name not in self._parameters:
|
|
raise KeyError("The parameter '{}' already exists.".format(name))
|
|
elif parameter is not None and not isinstance(parameter,
|
|
framework.Parameter):
|
|
raise TypeError(
|
|
"The parameter to be added should be a Parameter, but received {}.".
|
|
format(type(parameter).__name__))
|
|
else:
|
|
if parameter is None:
|
|
self._parameters[name] = None
|
|
|
|
if len(self._loaddict_holder) > 0:
|
|
assert parameter.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format(
|
|
parameter.name)
|
|
|
|
parameter.set_value(self._loaddict_holder[parameter.name])
|
|
|
|
self._parameters[name] = parameter
|
|
return parameter
|
|
|
|
def __getattr__(self, name):
|
|
if name in self._parameters:
|
|
return self._parameters[name]
|
|
elif name in self._sub_layers:
|
|
return self._sub_layers[name]
|
|
elif name in self._buffers:
|
|
return self._buffers[name]
|
|
else:
|
|
return object.__getattribute__(self, name)
|
|
|
|
def __setattr__(self, name, value):
|
|
def _remove_if_exist(*dicts):
|
|
for d in dicts:
|
|
if name in d:
|
|
del d[name]
|
|
|
|
if isinstance(getattr(type(self), name, None), property):
|
|
object.__setattr__(self, name, value)
|
|
params = self.__dict__.get('_parameters', None)
|
|
if isinstance(value, framework.Parameter):
|
|
if params is None:
|
|
raise ValueError(
|
|
"super(YourLayer, self).__init__() should be called first")
|
|
if len(self._loaddict_holder) > 0:
|
|
assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format(
|
|
value.name)
|
|
|
|
value.set_value(self._loaddict_holder[value.name])
|
|
|
|
_remove_if_exist(self.__dict__, self._buffers, self._sub_layers)
|
|
params[name] = value
|
|
elif params is not None and name in params:
|
|
if value is not None:
|
|
raise TypeError(
|
|
"assignment to parameter '{}' should be of type Parameter or None, but got '{}'"
|
|
.format(name, type(value).__name__))
|
|
params[name] = None
|
|
else:
|
|
layers = self.__dict__.get('_sub_layers', None)
|
|
if isinstance(value, core.Layer):
|
|
if layers is None:
|
|
raise ValueError(
|
|
"super(YourLayer, self).__init__() should be called first"
|
|
)
|
|
|
|
_remove_if_exist(self.__dict__, self._parameters, self._buffers)
|
|
layers[name] = value
|
|
elif layers is not None and name in layers:
|
|
if value is not None:
|
|
raise TypeError(
|
|
"assignment to sublayer '{}' should be of type Layer or None, but got '{}'"
|
|
.format(name, type(value).__name__))
|
|
layers[name] = None
|
|
else:
|
|
_buffers = self.__dict__.get('_buffers', None)
|
|
if type(value) == core.VarBase:
|
|
if _buffers is None:
|
|
raise ValueError(
|
|
"super(YourLayer, self).__init__() should be called first"
|
|
)
|
|
_remove_if_exist(self.__dict__, self._parameters,
|
|
self._sub_layers)
|
|
# Set persistable=False by default. Only `register_buffer` can
|
|
# add a persistable buffer.
|
|
if name not in self._buffers:
|
|
self._non_persistable_buffer_names_set.add(name)
|
|
_buffers[name] = value
|
|
elif _buffers is not None and name in _buffers:
|
|
if value is not None:
|
|
raise TypeError(
|
|
"assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'"
|
|
.format(name, type(value).__name__))
|
|
# Assigning None will remove the buffer, but if re-assign a new varBase to it,
|
|
# it will be remarked as a buffer with same `persistable` attribute.
|
|
_buffers[name] = None
|
|
else:
|
|
object.__setattr__(self, name, value)
|
|
|
|
def __delattr__(self, name):
|
|
if name in self._parameters:
|
|
del self._parameters[name]
|
|
elif name in self._sub_layers:
|
|
del self._sub_layers[name]
|
|
elif name in self._buffers:
|
|
del self._buffers[name]
|
|
self._non_persistable_buffer_names_set.discard(name)
|
|
else:
|
|
object.__delattr__(self, name)
|
|
|
|
def __dir__(self):
|
|
"""
|
|
Return a list. Get all parameters, buffers(non-parameter variables), sublayers, method and attr of Layer.
|
|
|
|
Examples:
|
|
import paddle.fluid as fluid
|
|
import numpy as np
|
|
|
|
fluid.dygraph.enable_dygraph()
|
|
|
|
class Mylayer(fluid.dygraph.Layer):
|
|
def __init__(self):
|
|
super(Mylayer, self).__init__()
|
|
self.linear1 = fluid.dygraph.Linear(10, 10)
|
|
self.linear2 = fluid.dygraph.Linear(5, 5)
|
|
self.conv2d = fluid.dygraph.Conv2D(3, 2, 3)
|
|
self.embedding = fluid.dygraph.Embedding(size=[128, 16])
|
|
self.h_0 = fluid.dygraph.to_variable(np.zeros([10, 10]).astype('float32'))
|
|
|
|
mylayer = Mylayer()
|
|
print(dir(mylayer))
|
|
# only parts are shown, because of list have too much content
|
|
# ['__call__', '__class__', ... , 'conv2d', 'embedding', 'h_0', 'linear1', 'linear2', ... , 'sublayers', 'train']
|
|
|
|
"""
|
|
method = dir(self.__class__)
|
|
attrs = list(self.__dict__.keys())
|
|
parameters = list(self._parameters.keys())
|
|
sublayers = list(self._sub_layers.keys())
|
|
buffers = list(self._buffers.keys())
|
|
|
|
keys = method + attrs + parameters + sublayers + buffers
|
|
|
|
return keys
|
|
|
|
def state_dict(self,
|
|
destination=None,
|
|
include_sublayers=True,
|
|
structured_name_prefix=""):
|
|
'''
|
|
Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict
|
|
|
|
Parameters:
|
|
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
|
|
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
|
|
|
|
Retruns:
|
|
dict: a dict contains all the parameters and persistable buffers.
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle.fluid as fluid
|
|
with fluid.dygraph.guard():
|
|
emb = fluid.dygraph.Embedding([10, 10])
|
|
|
|
state_dict = emb.state_dict()
|
|
fluid.save_dygraph( state_dict, "paddle_dy")
|
|
|
|
'''
|
|
|
|
if destination is None:
|
|
destination = collections.OrderedDict()
|
|
for name, data in self._parameters.items():
|
|
if data is not None:
|
|
destination[structured_name_prefix + name] = data
|
|
for name, buffer in self._buffers.items():
|
|
if buffer is not None and name not in self._non_persistable_buffer_names_set:
|
|
destination[structured_name_prefix + name] = buffer
|
|
|
|
if include_sublayers:
|
|
for layer_name, layer_item in self._sub_layers.items():
|
|
if layer_item is not None:
|
|
destination_temp = destination.copy()
|
|
destination_temp.update(
|
|
layer_item.state_dict(
|
|
destination_temp, include_sublayers,
|
|
structured_name_prefix + layer_name + "."))
|
|
destination = destination_temp
|
|
return destination
|
|
|
|
@framework.deprecate_stat_dict
|
|
def set_state_dict(self,
|
|
state_dict,
|
|
include_sublayers=True,
|
|
use_structured_name=True):
|
|
'''
|
|
Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict
|
|
|
|
Parameters:
|
|
state_dict(dict) : Dict contains all the parameters and persistable buffers.
|
|
include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True
|
|
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
|
|
Default: True
|
|
Returns:
|
|
None
|
|
|
|
Examples:
|
|
.. code-block:: python
|
|
|
|
import paddle
|
|
|
|
paddle.disable_static()
|
|
|
|
emb = paddle.nn.Embedding([10, 10])
|
|
|
|
state_dict = emb.state_dict()
|
|
paddle.save(state_dict, "paddle_dy")
|
|
|
|
para_state_dict, _ = paddle.load("paddle_dy")
|
|
|
|
emb.set_state_dict(para_state_dict)
|
|
|
|
'''
|
|
|
|
def _check_match(key, param):
|
|
state = state_dict.get(key, None)
|
|
if state is None:
|
|
raise ValueError("{} is not found in the provided dict.".format(
|
|
key))
|
|
if list(state.shape) != list(param.shape):
|
|
raise ValueError(
|
|
"{} receives a shape {}, but the expected shape is {}.".
|
|
format(key, list(state.shape), list(param.shape)))
|
|
return param, state
|
|
|
|
matched_param_state = []
|
|
for key, param in self.state_dict().items():
|
|
key_name = key if use_structured_name else param.name
|
|
try:
|
|
match_res = _check_match(key_name, param)
|
|
matched_param_state.append(match_res)
|
|
except ValueError as err:
|
|
warnings.warn(("Skip loading for {}. ".format(key) + str(err)))
|
|
|
|
if in_dygraph_mode():
|
|
for param, state in matched_param_state:
|
|
param.set_value(state)
|
|
else:
|
|
|
|
def _set_var(var, ndarray):
|
|
t = global_scope().find_var(var.name).get_tensor()
|
|
p = t._place()
|
|
if p.is_cpu_place():
|
|
place = core.CPUPlace()
|
|
elif p.is_cuda_pinned_place():
|
|
place = core.CUDAPinnedPlace()
|
|
else:
|
|
p = core.Place()
|
|
p.set_place(t._place())
|
|
place = core.CUDAPlace(p.gpu_device_id())
|
|
t.set(ndarray, place)
|
|
|
|
executor = Executor(_get_device())._default_executor
|
|
# restore parameter states
|
|
core._create_loaded_parameter(
|
|
[param for param, state in matched_param_state],
|
|
global_scope(), executor)
|
|
for param, state in matched_param_state:
|
|
_set_var(param, state)
|
|
|
|
# [aliases] Compatible with old method names
|
|
set_dict = set_state_dict
|
|
load_dict = set_state_dict
|