State dict do not count data parallel layers (#22169)

* DataParallel state dict don't include _layers.; test=develop * add unitest of data parallel; test=develop * add load state test; test=develop
6 years ago · 737334989b
parent 5e601a92ad
commit 737334989b
2 changed files with 195 additions and 0 deletions
--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
    def _is_data_parallel_mode(self):
        return self._strategy.nranks > 1
    def state_dict(self,
                   destination=None,
                   include_sublayers=True,
                   structured_name_prefix=""):
        '''
        Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
        Parameters:
            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
            structured_name_prefix(str, optional): If not empty str, all the key in state dict will start 
                                                 with structured_name_prefix
        Retruns:
            dict: a dict contains all the parameters of self._layers
        Examples:
            .. code-block:: python
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
                    strategy=dygraph.parallel.prepare_context()
                    emb = fluid.dygraph.Embedding([10, 10])
                    emb = dygraph.parallel.DataParallel(emb, strategy)
                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
        '''
        return self._layers.state_dict(
            destination=destination,
            include_sublayers=include_sublayers,
            structured_name_prefix=structured_name_prefix)
    def set_dict(self,
                 stat_dict,
                 include_sublayers=True,
                 use_structured_name=True):
        '''
        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
                                                  Default: True
        Returns:
            None
        Examples:
            .. code-block:: python
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
                    strategy=dygraph.parallel.prepare_context()
                    emb = fluid.dygraph.Embedding([10, 10])
                    emb = dygraph.parallel.DataParallel(emb, strategy)
                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
                    emb.set_dict( para_state_dict )
        '''
        self._layers.set_dict(
            stat_dict,
            include_sublayers=include_sublayers,
            use_structured_name=use_structured_name)
    def load_dict(self,
                  stat_dict,
                  include_sublayers=True,
                  use_structured_name=True):
        '''
        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
        This api will be Deprecated. Please use set_dict
        Parameters:
            state_dict(dict) : Dict contains all the parameters
            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
                                                  Default: True
        Returns:
            None
        Examples:
            .. code-block:: python
                import paddle.fluid as fluid
                with fluid.dygraph.guard():
                    strategy=dygraph.parallel.prepare_context()
                    emb = fluid.dygraph.Embedding([10, 10])
                    emb = dygraph.parallel.DataParallel(emb, strategy)
                    state_dict = emb.state_dict()
                    fluid.save_dygraph( state_dict, "paddle_dy")
                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
                    emb.load_dict( para_state_dict )
        '''
        self._layers.load_dict(
            stat_dict,
            include_sublayers=include_sublayers,
            use_structured_name=use_structured_name)
--- a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
@ -0,0 +1,82 @@
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from __future__ import print_function
 import contextlib
 import unittest
 import numpy as np
 import six
 import unittest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.dygraph as dygraph
 from paddle.fluid.dygraph.nn import Linear
 import paddle.fluid.core as core
 class MLP(fluid.Layer):
    def __init__(self, param_attr=None, bias_attr=None):
        super(MLP, self).__init__()
        self._linear1 = Linear(784, 10)
        self._linear2 = Linear(10, 10)
    def forward(self, inputs):
        y = self._linear1(inputs)
        y = self._linear2(y)
        return y
 class TestDataParallelStateDict(unittest.TestCase):
    def test_data_parallel_state_dict(self):
        with fluid.dygraph.guard():
            strategy = dygraph.parallel.prepare_context()
            mlp = MLP()
            parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
            single_state = mlp.state_dict()
            parallel_state = parallel_mlp.state_dict()
            base_para = {}
            place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
            ) else fluid.CUDAPlace(0)
            for k, v in single_state.items():
                self.assertTrue(k in parallel_state)
                self.assertTrue(
                    np.array_equal(v.numpy(), parallel_state[k].numpy()))
                base_para[k] = v.numpy()
            for k, v in parallel_state.items():
                np_t = v.numpy()
                var = v.value().get_tensor()
                var.set(np.zeros_like(np_t), place)
                self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
            parallel_mlp.set_dict(base_para)
            parallel_state = parallel_mlp.state_dict()
            for k, v in parallel_state.items():
                self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
            parallel_mlp.load_dict(base_para)
 if __name__ == '__main__':
    unittest.main()