State dict do not count data parallel layers (#22169)

* DataParallel state dict don't include _layers.; test=develop

* add unitest of data parallel; test=develop

* add load state test; test=develop
revert-22710-feature/integrated_ps_api
hong 5 years ago committed by GitHub
parent 5e601a92ad
commit 737334989b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
def _is_data_parallel_mode(self):
return self._strategy.nranks > 1
def state_dict(self,
destination=None,
include_sublayers=True,
structured_name_prefix=""):
'''
Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
Parameters:
destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
structured_name_prefix(str, optional): If not empty str, all the key in state dict will start
with structured_name_prefix
Retruns:
dict: a dict contains all the parameters of self._layers
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
'''
return self._layers.state_dict(
destination=destination,
include_sublayers=include_sublayers,
structured_name_prefix=structured_name_prefix)
def set_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.set_dict( para_state_dict )
'''
self._layers.set_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)
def load_dict(self,
stat_dict,
include_sublayers=True,
use_structured_name=True):
'''
Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
This api will be Deprecated. Please use set_dict
Parameters:
state_dict(dict) : Dict contains all the parameters
include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
Default: True
Returns:
None
Examples:
.. code-block:: python
import paddle.fluid as fluid
with fluid.dygraph.guard():
strategy=dygraph.parallel.prepare_context()
emb = fluid.dygraph.Embedding([10, 10])
emb = dygraph.parallel.DataParallel(emb, strategy)
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
emb.load_dict( para_state_dict )
'''
self._layers.load_dict(
stat_dict,
include_sublayers=include_sublayers,
use_structured_name=use_structured_name)

@ -0,0 +1,82 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import unittest
import numpy as np
import six
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid.dygraph.nn import Linear
import paddle.fluid.core as core
class MLP(fluid.Layer):
def __init__(self, param_attr=None, bias_attr=None):
super(MLP, self).__init__()
self._linear1 = Linear(784, 10)
self._linear2 = Linear(10, 10)
def forward(self, inputs):
y = self._linear1(inputs)
y = self._linear2(y)
return y
class TestDataParallelStateDict(unittest.TestCase):
def test_data_parallel_state_dict(self):
with fluid.dygraph.guard():
strategy = dygraph.parallel.prepare_context()
mlp = MLP()
parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
single_state = mlp.state_dict()
parallel_state = parallel_mlp.state_dict()
base_para = {}
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
) else fluid.CUDAPlace(0)
for k, v in single_state.items():
self.assertTrue(k in parallel_state)
self.assertTrue(
np.array_equal(v.numpy(), parallel_state[k].numpy()))
base_para[k] = v.numpy()
for k, v in parallel_state.items():
np_t = v.numpy()
var = v.value().get_tensor()
var.set(np.zeros_like(np_t), place)
self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
parallel_mlp.set_dict(base_para)
parallel_state = parallel_mlp.state_dict()
for k, v in parallel_state.items():
self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
parallel_mlp.load_dict(base_para)
if __name__ == '__main__':
unittest.main()
Loading…
Cancel
Save