From 02adf68dcc6b1a0d47717b4d386db55d7c74e2e0 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Mon, 29 Jun 2020 16:05:16 +0800 Subject: [PATCH] [Dy2stat]Support buffers and register_buffer in Layer (#24888) * support to save varBase created in __init__ test=develop * polish code test=develop * refine to_static_var test=develop * refine warning test=develop * add unitteset for to_static_var test=develop * fix logger test=develop * polish buffers en doc test=develop * fix param_guard test=develop * refine en doc test=develop --- python/paddle/fluid/dygraph/base.py | 23 +- .../dygraph_to_static/partial_program.py | 15 +- .../dygraph_to_static/program_translator.py | 45 +++- python/paddle/fluid/dygraph/jit.py | 8 +- python/paddle/fluid/dygraph/layers.py | 206 ++++++++++++++++-- .../fluid/dygraph/varbase_patch_methods.py | 60 ++++- .../unittests/dygraph_to_static/test_bmn.py | 18 +- .../fluid/tests/unittests/test_base_layer.py | 178 +++++++++++++++ .../fluid/tests/unittests/test_var_base.py | 46 ++++ 9 files changed, 531 insertions(+), 68 deletions(-) diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index c04f4b7b59..76c3518358 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -64,17 +64,26 @@ _functional_dygraph_context_manager = None @signature_safe_contextmanager def param_guard(parameters): - # Note: parameters is a reference of self._parameters + # Note: parameters is a reference of self._parameters or self._buffers if not framework.in_dygraph_mode() and parameters: origin_parameters = parameters.copy() for name, var_base in parameters.items(): if isinstance(var_base, core.VarBase): - new_var = framework.Parameter( - var_base.block, - var_base.shape, - var_base.dtype, - var_base.type, - name=var_base.name) + # Convert ParamBase into Parameter with same attributes in dy2stat. + if isinstance(var_base, framework.ParamBase): + new_var = var_base._to_static_var(to_parameter=True) + else: + # Check whether has been created before. + if var_base.name in var_base.block.vars: + new_var = var_base.block.vars[var_base.name] + # Note(Aurelius84): Convert VarBase in self._buffers into Variabe with + # same attributes and set persistable=True to allow saving this var. + # Because users can create a VarBase in `__init__` like a + # `mask` Tensor or `hidden_0` in RNN layers, which is equivalent to a Parameter + # and necessary for inferring. It will be pruned if it's not necessary for inferring. + else: + new_var = var_base._to_static_var( + to_parameter=False, persistable=True) parameters[name] = new_var yield parameters.update(origin_parameters) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py b/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py index 32c36bc381..446132fc0b 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/partial_program.py @@ -272,18 +272,19 @@ class PartialProgramLayer(layers.Layer): "Type of self._params in PartialProgramLayer should be list or tuple, but received %s." % type(self._params)) - params_name_set = set() - for i, param in enumerate(self._params): - if not isinstance(param, framework.ParamBase): + param_and_buffer_names_set = set() + for i, var in enumerate(self._params): + # self._params constains parameters and buffers with persistable=True. + if not isinstance(var, core.VarBase): raise TypeError( - 'Type of self._params[{}] in PartialProgramLayer should be framework.ParamBase, but received {}.'. - format(i, type(param))) - params_name_set.add(param.name) + 'Type of self._params[{}] in PartialProgramLayer should be Parameter or Variable, but received {}.'. + format(i, type(var))) + param_and_buffer_names_set.add(var.name) for block in main_program.blocks: for name, var in block.vars.items(): if isinstance(var, framework.Parameter): - if name not in params_name_set: + if name not in param_and_buffer_names_set: raise ValueError( "\n\tWe don't support to define layer with parameters in the function " "decorated by `@declarative`.\n\tBecause that will re-defined parameters " diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 463a968e56..57358818ea 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -15,7 +15,7 @@ from __future__ import print_function import gast import inspect -import logging +import warnings import textwrap import threading import collections @@ -39,8 +39,6 @@ from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_progr __all__ = ['ProgramTranslator', 'convert_to_static'] -logger = logging.getLogger("fluid") - class FunctionCache(object): """ @@ -131,16 +129,37 @@ class FunctionSpec(object): return self._args and isinstance(self._args[0], layers.Layer) def parameters(self, include_sublayer=True): + """ + Returns parameters of decorated layers. If set `include_sublayer` True, + the parameters created in sub layers will be added. + """ params = collections.OrderedDict() if self.is_method(): + layer_instance = self._args[0] if include_sublayer: - params = self._args[0].parameters() + params = layer_instance.parameters() names = [p.name for p in params] params = collections.OrderedDict(zip(names, params)) else: - params = self._args[0]._parameters + params = layer_instance._parameters return params + def buffers(self, include_sublayer=True): + """ + Returns Variable buffers of decorated layers. If set `include_sublayer` True, + the Variable buffers created in sub layers will be added. + """ + buffers = collections.OrderedDict() + if self.is_method(): + layer_instance = self._args[0] + if include_sublayer: + buffers = layer_instance.buffers() + names = [buffer.name for buffer in buffers] + buffers = collections.OrderedDict(zip(names, buffers)) + else: + buffers = layer_instance._buffers + return buffers + @switch_to_static_graph def to_static_inputs(self, main_program): inputs = [] @@ -251,11 +270,13 @@ class ConcreteProgram(object): # 1. Adds `fluid.data` layers for input if needed inputs = func_spec.to_static_inputs(main_program) - # 2. Gets all ParamBases in the function - all_parameters = list(func_spec.parameters().values()) + # 2. Gets all ParamBases and buffered VarBases in the function + all_parameters_and_buffers = list(func_spec.parameters().values( + )) + list(func_spec.buffers().values()) # 3. Builds program only once and returns the output Variables. - with param_guard(func_spec.parameters(False)): + with param_guard(func_spec.parameters(False)), param_guard( + func_spec.buffers(False)): outputs = static_func(*inputs) if not isinstance(outputs, (tuple, list)): outputs = [outputs] if outputs else [] @@ -263,7 +284,7 @@ class ConcreteProgram(object): return ConcreteProgram( inputs=inputs, outputs=outputs, - parameters=all_parameters, + parameters=all_parameters_and_buffers, func=dygraph_function, main_program=main_program, startup_program=startup_program) @@ -439,7 +460,7 @@ class ProgramTranslator(object): dygraph_func ), "Input dygraph_func is not a callable in ProgramTranslator.get_output" if not self.enable_declarative: - logger.info( + warnings.warn( "The ProgramTranslator.get_output doesn't work when setting ProgramTranslator.enable = False. " "We will just return dygraph output.") return dygraph_func(*args, **kwargs) @@ -490,7 +511,7 @@ class ProgramTranslator(object): dygraph_func ), "Input dygraph_func is not a callable in ProgramTranslator.get_func" if not self.enable_declarative: - logger.info( + warnings.warn( "The ProgramTranslator.get_func doesn't work when setting ProgramTranslator.enable=False. We will " "just return dygraph output.") return dygraph_func @@ -543,7 +564,7 @@ class ProgramTranslator(object): dygraph_func ), "Input dygraph_func is not a callable in ProgramTranslator.get_program" if not self.enable_declarative: - logger.info( + warnings.warn( "The ProgramTranslator.get_program doesn't work when setting ProgramTranslator.enable=False." "We will just return dygraph output.") return dygraph_func(*args, **kwargs) diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index a60e03f8a8..0dd369ca0a 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -16,7 +16,7 @@ from __future__ import print_function __all__ = ['TracedLayer', 'declarative', 'dygraph_to_static_func'] -import logging +import warnings from paddle.fluid import core from paddle.fluid.compiler import CompiledProgram from paddle.fluid.dygraph.base import program_desc_tracing_guard, switch_to_static_graph @@ -26,8 +26,6 @@ from paddle.fluid.executor import Executor, scope_guard from paddle.fluid.framework import Program, Block, Variable, _dygraph_tracer, dygraph_only, _dygraph_guard, _current_expected_place, in_dygraph_mode from paddle.fluid.wrapped_decorator import wrap_decorator -logger = logging.getLogger("fluid") - def create_program_from_desc(program_desc): program = Program() @@ -104,7 +102,7 @@ def _dygraph_to_static_func_(dygraph_func): def __impl__(*args, **kwargs): program_translator = ProgramTranslator() if in_dygraph_mode() or not program_translator.enable_declarative: - logger.info( + warnings.warn( "The decorator 'dygraph_to_static_func' doesn't work in " "dygraph mode or set ProgramTranslator.enable to False. " "We will just return dygraph output.") @@ -156,7 +154,7 @@ def _declarative_(dygraph_func): def __impl__(*args, **kwargs): program_translator = ProgramTranslator() if not program_translator.enable_declarative: - logger.info( + warnings.warn( "The decorator 'declarative' doesn't work when setting ProgramTranslator.enable=False. " "We will just return dygraph output.") return dygraph_func(*args, **kwargs) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index d80170800d..539376e9a0 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -90,6 +90,9 @@ class Layer(core.Layer): self._dtype = dtype self._parameters = collections.OrderedDict() + # Buffers the variable (not parameter) created in layer + self._buffers = collections.OrderedDict() + self._non_persistable_buffer_names_set = set() self._sub_layers = collections.OrderedDict() self._loaddict_holder = collections.OrderedDict() @@ -414,6 +417,137 @@ class Layer(core.Layer): layers_set=layers_set): yield p, l + def register_buffer(self, name, variable, persistable=True): + """ + Registers a variable as buffer into the layer. + + `buffer` is a non-parameteric variable and will not be updated by optimizer, + but is necessary for evaluation and inference. For example, the mean and variance in BatchNorm layers. + The registered buffer is persistable by default, and will be saved into + `state_dict` alongside parameters. If set persistable=False, it registers + a non-persistable buffer, so that it will not be a part of `state_dict` . + + Buffers can be accessed as attributes using given names. + + Parameters: + name (string): name of the buffer. The buffer can be accessed + from this layer using the given name + variable (Variable): the variable to be registered as buffer. + persistable (bool): whether the buffer is part of this layer's + state_dict. + + Returns: + None + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + linear = fluid.Linear(10, 3) + value = np.array([0]).astype("float32") + buffer = fluid.dygraph.to_variable(value) + linear.register_buffer("buf_name", buffer, persistable=True) + + # get the buffer by attribute. + print(linear.buf_name) + + """ + + if '_buffers' not in self.__dict__: + raise ValueError( + "super(YourLayer, self).__init__() should be called first") + elif not isinstance(name, six.string_types): + raise TypeError( + "The name of buffer should be a string, but received {}.". + format(type(name).__name__)) + elif '.' in name: + raise KeyError("The name of buffer can not contain \".\"") + elif name == '': + raise KeyError("The name of buffer can not be empty.") + elif hasattr(self, name) and name not in self._buffers: + raise KeyError("attribute '{}' already exists.".format(name)) + elif variable is not None and not type(variable) == core.VarBase: + raise TypeError( + "The registered buffer should be a core.VarBase, but received {}.". + format(type(variable).__name__)) + else: + self._buffers[name] = variable + if persistable: + self._non_persistable_buffer_names_set.discard(name) + else: + self._non_persistable_buffer_names_set.add(name) + + def buffers(self, include_sublayers=True): + """ + Returns a list of all buffers from current layer and its sub-layers. + + Parameters: + include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True + + Returns: + list of :ref:`api_guide_Variable_en` : a list of buffers. + """ + ret = [ + buffer + for _, buffer in self.named_buffers( + include_sublayers=include_sublayers) + ] + return ret + + def named_buffers(self, prefix='', include_sublayers=True): + """ + Returns an iterator over all buffers in the Layer, yielding tuple of name and Variable. + + Parameters: + prefix(str, optional): Prefix to prepend to all buffer names. Default: ''. + include_sublayers(bool, optional): Whether include the buffers of sublayers. + If True, also include the named buffers from sublayers. Default: True. + + Yields: + (string, Variable): Tuple of name and Variable + + Examples: + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + with fluid.dygraph.guard(): + fc1 = fluid.Linear(10, 3) + buffer1 = fluid.dygraph.to_variable(np.array([0]).astype("float32")) + # register a variable as buffer by specific `persistable` + fc1.register_buffer("buf_name_1", buffer1, persistable=True) + + fc2 = fluid.Linear(3, 10) + buffer2 = fluid.dygraph.to_variable(np.array([1]).astype("float32")) + # register a buffer by assigning an attribute with Variable. + # The `persistable` can only be False by this way. + fc2.buf_name_2 = buffer2 + + model = fluid.dygraph.Sequential(fc1, fc2) + + # get all named buffers + for name, buffer in model.named_buffers(): + print(name, buffer) + + """ + buffers_set = set() + named_sublayers = self.named_sublayers( + prefix=prefix, + include_sublayers=include_sublayers, + include_self=True) + for layer_prefix, sublayer in named_sublayers: + buffers = sublayer._buffers.items() + for key, buffer in buffers: + if buffer is None or buffer in buffers_set: + continue + buffers_set.add(buffer) + name = layer_prefix + ('.' if layer_prefix else '') + key + yield name, buffer + def clear_gradients(self): """ Clear the gradients of all parameters for this layer. @@ -462,7 +596,7 @@ class Layer(core.Layer): self._parameters.values()) self._built = True - with param_guard(self._parameters): + with param_guard(self._parameters), param_guard(self._buffers): outputs = self.forward(*inputs, **kwargs) for forward_post_hook in self._forward_post_hooks.values(): @@ -534,6 +668,8 @@ class Layer(core.Layer): return self._parameters[name] elif name in self._sub_layers: return self._sub_layers[name] + elif name in self._buffers: + return self._buffers[name] else: return object.__getattribute__(self, name) @@ -556,7 +692,7 @@ class Layer(core.Layer): value.set_value(self._loaddict_holder[value.name]) - _remove_if_exist(self.__dict__, self._sub_layers) + _remove_if_exist(self.__dict__, self._buffers, self._sub_layers) params[name] = value elif params is not None and name in params: if value is not None: @@ -572,7 +708,7 @@ class Layer(core.Layer): "super(YourLayer, self).__init__() should be called first" ) - _remove_if_exist(self.__dict__, self._parameters) + _remove_if_exist(self.__dict__, self._parameters, self._buffers) layers[name] = value elif layers is not None and name in layers: if value is not None: @@ -581,13 +717,38 @@ class Layer(core.Layer): .format(name, type(value).__name__)) layers[name] = None else: - object.__setattr__(self, name, value) + _buffers = self.__dict__.get('_buffers', None) + if type(value) == core.VarBase: + if _buffers is None: + raise ValueError( + "super(YourLayer, self).__init__() should be called first" + ) + _remove_if_exist(self.__dict__, self._parameters, + self._sub_layers) + # Set persistable=False by default. Only `register_buffer` can + # add a persistable buffer. + if name not in self._buffers: + self._non_persistable_buffer_names_set.add(name) + _buffers[name] = value + elif _buffers is not None and name in _buffers: + if value is not None: + raise TypeError( + "assignment to buffers '{}' should be of type core.VarBase or None, but got '{}'" + .format(name, type(value).__name__)) + # Assigning None will remove the buffer, but if re-assign a new varBase to it, + # it will be remarked as a buffer with same `persistable` attribute. + _buffers[name] = None + else: + object.__setattr__(self, name, value) def __delattr__(self, name): if name in self._parameters: del self._parameters[name] elif name in self._sub_layers: del self._sub_layers[name] + elif name in self._buffers: + del self._buffers[name] + self._non_persistable_buffer_names_set.discard(name) else: object.__delattr__(self, name) @@ -596,14 +757,14 @@ class Layer(core.Layer): include_sublayers=True, structured_name_prefix=""): ''' - Get all parameters of current layer and its sub-layers. And set all the parameters into a dict + Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict Parameters: - destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None - include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True + destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None + include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True Retruns: - dict: a dict contains all the parameters + dict: a dict contains all the parameters and persistable buffers. Examples: .. code-block:: python @@ -622,6 +783,9 @@ class Layer(core.Layer): for name, data in self._parameters.items(): if data is not None: destination[structured_name_prefix + name] = data + for name, buffer in self._buffers.items(): + if buffer is not None and name not in self._non_persistable_buffer_names_set: + destination[structured_name_prefix + name] = buffer if include_sublayers: for layer_name, layer_item in self._sub_layers.items(): @@ -639,12 +803,12 @@ class Layer(core.Layer): include_sublayers=True, use_structured_name=True): ''' - Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict + Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + state_dict(dict) : Dict contains all the parameters and persistable buffers. + include_sublayers(bool, optional) : If true, also include the parameters and peresistable buffers from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. Default: True Returns: None @@ -674,14 +838,14 @@ class Layer(core.Layer): include_sublayers=True, use_structured_name=True): ''' - Set parameters from stat_dict. All the parameters will be reset by the tensor in the stat_dict + Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict This api will be Deprecated. Please use set_dict Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. + state_dict(dict) : Dict contains all the parameters and persistable buffers. + include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True + use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. Default: True Returns: None @@ -704,13 +868,13 @@ class Layer(core.Layer): inner_state_dict = self.state_dict() - for name, para in inner_state_dict.items(): - key_name = name if use_structured_name else para.name + for name, param_or_buffer in inner_state_dict.items(): + key_name = name if use_structured_name else param_or_buffer.name if key_name in stat_dict: - para.set_value(stat_dict[key_name]) + param_or_buffer.set_value(stat_dict[key_name]) else: raise RuntimeError( - "Parameter not found, Can't not find [ {} ] in stat_dict" + "Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict" "use_structured_name is set to [{}]".format( key_name, use_structured_name)) unused_para_list = [] @@ -719,5 +883,5 @@ class Layer(core.Layer): unused_para_list.append(k) if len(unused_para_list) > 0: warnings.warn( - "Varibale [ {} ] are not used, because not included in layers state_dict". + "Variables [ {} ] are not used, because not included in layers state_dict". format(" ".join(unused_para_list))) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 6b528479ff..013aa23d94 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -12,16 +12,67 @@ # See the License for the specific language governing permissions and # limitations under the License. +import inspect from .. import framework from .. import core from . import BackwardStrategy -from ..framework import Variable, _getitem_impl_ -from .. import unique_name +from ..framework import Variable, Parameter, ParamBase +from .base import switch_to_static_graph import numpy as np from .math_op_patch import monkey_patch_math_varbase def monkey_patch_varbase(): + @switch_to_static_graph + def _to_static_var(self, to_parameter=False, **kwargs): + """ + **Notes**: + **This API is ONLY available in Dygraph mode** + + Transform a VarBase into static Variable with same attributes. It's a low level interface used + in dy2static and shall not be called directly. + + Args: + to_parameter (bool): It takes effect only if the input a VarBase. If set True, + the VarBase will be converted into framework.Parameters. Otherwise, it will + be converted into framework.Variable. Default False. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + from paddle.fluid.dygraph.base import to_variable + import numpy as np + + data = np.ones([3, 1024], dtype='float32') + with fluid.dygraph.guard(): + var_base = to_variable(data) + static_var = var_base._to_static_var() + + """ + if isinstance(self, ParamBase): + attr_kwargs = self.__dict__.copy() + else: + attr_names = [ + name for name in dir(self) + if not (inspect.ismethod(getattr(self, name)) or + name.startswith('_')) + ] + attr_kwargs = {name: getattr(self, name) for name in attr_names} + + attr_keys = ['block', 'shape', 'dtype', 'type', 'name', 'persistable'] + for attr in attr_keys: + attr_kwargs[attr] = getattr(self, attr, None) + + attr_kwargs.update(kwargs) + + if to_parameter or isinstance(self, ParamBase): + del attr_kwargs['persistable'] + static_var = Parameter(**attr_kwargs) + else: + static_var = Variable(**attr_kwargs) + return static_var + # TODO(jiabin): move this to cplusplus end if we find some performance issue on it @framework.dygraph_only def set_value(self, value): @@ -214,8 +265,9 @@ def monkey_patch_varbase(): for method_name, method in ( ("__bool__", __bool__), ("__nonzero__", __nonzero__), - ("set_value", set_value), ("block", block), ("backward", backward), - ("gradient", gradient), ("__str__", __str__), ("to_string", to_string)): + ("_to_static_var", _to_static_var), ("set_value", set_value), + ("block", block), ("backward", backward), ("gradient", gradient), + ("__str__", __str__), ("to_string", to_string)): setattr(core.VarBase, method_name, method) # patch math methods for varbase diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index 0e0084aca3..5896d3a292 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -186,11 +186,11 @@ class BMN(fluid.dygraph.Layer): act="relu") # init to speed up - self.sample_mask = get_interp1d_mask( - self.tscale, self.dscale, self.prop_boundary_ratio, self.num_sample, - self.num_sample_perbin) - # self.sample_mask = fluid.dygraph.base.to_variable(sample_mask) - # self.sample_mask.stop_gradient = True + sample_mask = get_interp1d_mask(self.tscale, self.dscale, + self.prop_boundary_ratio, + self.num_sample, self.num_sample_perbin) + self.sample_mask = fluid.dygraph.base.to_variable(sample_mask) + self.sample_mask.stop_gradient = True self.p_conv3d1 = fluid.dygraph.Conv3D( num_channels=128, @@ -241,12 +241,6 @@ class BMN(fluid.dygraph.Layer): @declarative def forward(self, x): - # TODO(Aurelius84): sample_mask is created in `__init__`, - # but currently we don't support that. The two lines code - # will be removed when support creating var outside of forward. - sample_mask = to_variable(self.sample_mask) - sample_mask.stop_gradient = True - # Base Module x = self.b_conv1(x) x = self.b_conv2(x) @@ -262,7 +256,7 @@ class BMN(fluid.dygraph.Layer): # PEM xp = self.p_conv1(x) # BM layer - xp = fluid.layers.matmul(xp, sample_mask) + xp = fluid.layers.matmul(xp, self.sample_mask) xp = fluid.layers.reshape( xp, shape=[0, 0, -1, self.dscale, self.tscale]) diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index 707b0b00b4..bc666c0de5 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -16,6 +16,8 @@ import unittest import numpy as np import paddle.fluid as fluid +from paddle.fluid.dygraph import to_variable +from paddle.fluid.framework import ParamBase class L1(fluid.Layer): @@ -85,5 +87,181 @@ class TestBaseLayer(unittest.TestCase): self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2]))) +class BufferLayer(fluid.Layer): + def __init__(self): + super(BufferLayer, self).__init__() + buffer_var = to_variable(np.zeros([2, 4]).astype('int32')) + self.register_buffer("layer_buffer", buffer_var) + + def forward(self): + pass + + +class BufferNet(fluid.Layer): + def __init__(self): + super(BufferNet, self).__init__() + self.buffer_layer = BufferLayer() + self.w1 = self.create_parameter( + shape=[2, 2], dtype='float32', is_bias=False) + buffer_var = to_variable(np.ones([2, 4]).astype('int32')) + self.register_buffer("net_buffer", buffer_var) + + self.new_buffer = to_variable(np.ones([4, 2]).astype('int32')) + + def forward(self): + pass + + +class TestBuffer(unittest.TestCase): + def test_buffers_and_named_buffers(self): + def names(named_buffers): + return [name for name, _ in named_buffers] + + with fluid.dygraph.guard(): + layer = BufferLayer() + net = BufferNet() + + self.assertEqual(len(layer.buffers()), 1) + self.assertEqual(names(layer.named_buffers()), ['layer_buffer']) + + self.assertEqual(len(net.buffers()), 3) + self.assertEqual( + names(net.named_buffers()), + ['net_buffer', 'new_buffer', 'buffer_layer.layer_buffer']) + + self.assertEqual(len(net.buffers(include_sublayers=False)), 2) + self.assertEqual( + names(net.named_buffers(include_sublayers=False)), + ['net_buffer', 'new_buffer']) + + def test_register_buffer_with_error(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var = to_variable(np.zeros([1])) + + with self.assertRaisesRegexp(TypeError, + "name of buffer should be a string"): + net.register_buffer(12, var) + + with self.assertRaisesRegexp(TypeError, + "buffer should be a core.VarBase"): + net.register_buffer("buffer_name", ParamBase([2, 2], 'float32')) + + with self.assertRaisesRegexp(KeyError, + "name of buffer can not contain"): + net.register_buffer("buffer.name", var) + + with self.assertRaisesRegexp(KeyError, + "name of buffer can not be empty"): + net.register_buffer("", var) + + net.attr_name = 10 + with self.assertRaisesRegexp(KeyError, "already exists"): + net.register_buffer("attr_name", var) + + del net.attr_name + net.attr_name = ParamBase([2, 2], 'float32') + with self.assertRaisesRegexp(KeyError, "already exists"): + net.register_buffer("attr_name", var) + + def test_register_buffer_same_name(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([1])) + var2 = to_variable(np.zeros([2])) + var3 = to_variable(np.zeros([3])) + + net.register_buffer("buffer_name", var1) + self.assert_var_base_equal(net.buffer_name, var1) + net.register_buffer("buffer_name", var2) + self.assert_var_base_equal(net.buffer_name, var2) + net.register_buffer("buffer_name", var3) + self.assert_var_base_equal(net.buffer_name, var3) + + def test_buffer_not_persistable(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([1])) + + net.register_buffer("buffer_name", var1, persistable=False) + self.assertEqual(len(net.buffers()), 1) + self.assertEqual(len(net.state_dict()), 0) + + def test_buffer_not_persistable_del(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([1])) + net.register_buffer("buffer_name", var1, persistable=False) + del net.buffer_name + self.assertEqual(len(net.buffers()), 0) + + def test_buffer_not_persistable_overwrite(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([1])) + var2 = to_variable(np.zeros([2])) + net.register_buffer("buffer_name", var1, persistable=False) + net.register_buffer("buffer_name", var2) + + # Allow to overwrite a non-persistable buffer with a persistable var. + self.assertEqual(len(net.buffers()), 1) + self.assertEqual(len(net.state_dict()), 1) + + net.register_buffer("buffer_name", var1, persistable=False) + self.assertEqual(len(net.buffers()), 1) + self.assertEqual(len(net.state_dict()), 0) + + def test_buffer_not_persistable_assign(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([1])) + net.register_buffer("buffer_name", var1, persistable=False) + + # Assigning Nones will remove the buffer, but allow to re-assign + # to remark it as buffer. + net.buffer_name = None + self.assertEqual(len(net.buffers()), 0) + self.assertEqual(len(net.state_dict()), 0) + + net.buffer_name = var1 + self.assertEqual(len(net.buffers()), 1) + self.assertEqual(len(net.state_dict()), 0) + + # Re-assign a ParamBase will remove the buffer. + net.buffer_name = ParamBase([2, 2], 'float32') + self.assertEqual(len(net.buffers()), 0) + self.assertEqual(len(net.state_dict()), 1) + + def test_buffer_not_persistable_load(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([1])) + net.register_buffer("buffer_name", var1, persistable=False) + net.load_dict({}) + + def test_buffer_state_dict(self): + with fluid.dygraph.guard(): + net = fluid.Layer() + var1 = to_variable(np.zeros([2, 3])) + var2 = to_variable(np.zeros([3, 2])) + net.register_buffer("buffer_var1", var1) + net.register_buffer("buffer_var2", var2, persistable=False) + + self.assertEqual(len(net.state_dict()), 1) + self.assertEqual([name for name, _ in net.state_dict().items()], + ["buffer_var1"]) + + # load state_dict + net_load = fluid.Layer() + var = to_variable(np.ones([2, 3])) + net_load.register_buffer("buffer_var1", var) + net_load.load_dict(net.state_dict()) + + self.assert_var_base_equal(net_load.buffer_var1, var1) + + def assert_var_base_equal(self, var1, var2): + self.assertTrue(np.array_equal(var1.numpy(), var2.numpy())) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index ebd357106c..fcf8bc46f5 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -233,6 +233,52 @@ class TestVarBase(unittest.TestCase): assert bool(var1) == False, "bool(var1) is False" assert bool(var2) == True, "bool(var2) is True" + def test_to_static_var(self): + with fluid.dygraph.guard(): + # Convert VarBase into Variable or Parameter + var_base = fluid.dygraph.to_variable(self.array, name="var_base_1") + static_var = var_base._to_static_var() + self._assert_to_static(var_base, static_var) + + var_base = fluid.dygraph.to_variable(self.array, name="var_base_2") + static_param = var_base._to_static_var(to_parameter=True) + self._assert_to_static(var_base, static_param, True) + + # Convert ParamBase into Parameter + fc = fluid.dygraph.Linear( + 10, + 20, + param_attr=fluid.ParamAttr( + learning_rate=0.001, + do_model_average=True, + regularizer=fluid.regularizer.L1Decay())) + weight = fc.parameters()[0] + static_param = weight._to_static_var() + self._assert_to_static(weight, static_param, True) + + def _assert_to_static(self, var_base, static_var, is_param=False): + if is_param: + self.assertTrue(isinstance(static_var, fluid.framework.Parameter)) + self.assertTrue(static_var.persistable, True) + if isinstance(var_base, fluid.framework.ParamBase): + for attr in ['trainable', 'is_distributed', 'do_model_average']: + self.assertEqual( + getattr(var_base, attr), getattr(static_var, attr)) + + self.assertEqual(static_var.optimize_attr['learning_rate'], + 0.001) + self.assertTrue( + isinstance(static_var.regularizer, + fluid.regularizer.L1Decay)) + else: + self.assertTrue(isinstance(static_var, fluid.framework.Variable)) + + attr_keys = ['block', 'dtype', 'type', 'name'] + for attr in attr_keys: + self.assertEqual(getattr(var_base, attr), getattr(static_var, attr)) + + self.assertListEqual(list(var_base.shape), list(static_var.shape)) + if __name__ == '__main__': unittest.main()