State dict do not count data parallel layers (#22169)
* DataParallel state dict don't include _layers.; test=develop * add unitest of data parallel; test=develop * add load state test; test=developrevert-22710-feature/integrated_ps_api
parent
5e601a92ad
commit
737334989b
@ -0,0 +1,82 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import contextlib
|
||||
import unittest
|
||||
import numpy as np
|
||||
import six
|
||||
import unittest
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.dygraph as dygraph
|
||||
from paddle.fluid.dygraph.nn import Linear
|
||||
import paddle.fluid.core as core
|
||||
|
||||
|
||||
class MLP(fluid.Layer):
|
||||
def __init__(self, param_attr=None, bias_attr=None):
|
||||
super(MLP, self).__init__()
|
||||
|
||||
self._linear1 = Linear(784, 10)
|
||||
self._linear2 = Linear(10, 10)
|
||||
|
||||
def forward(self, inputs):
|
||||
y = self._linear1(inputs)
|
||||
y = self._linear2(y)
|
||||
return y
|
||||
|
||||
|
||||
class TestDataParallelStateDict(unittest.TestCase):
|
||||
def test_data_parallel_state_dict(self):
|
||||
with fluid.dygraph.guard():
|
||||
strategy = dygraph.parallel.prepare_context()
|
||||
mlp = MLP()
|
||||
parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
|
||||
|
||||
single_state = mlp.state_dict()
|
||||
parallel_state = parallel_mlp.state_dict()
|
||||
|
||||
base_para = {}
|
||||
place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
|
||||
) else fluid.CUDAPlace(0)
|
||||
for k, v in single_state.items():
|
||||
self.assertTrue(k in parallel_state)
|
||||
|
||||
self.assertTrue(
|
||||
np.array_equal(v.numpy(), parallel_state[k].numpy()))
|
||||
|
||||
base_para[k] = v.numpy()
|
||||
|
||||
for k, v in parallel_state.items():
|
||||
np_t = v.numpy()
|
||||
var = v.value().get_tensor()
|
||||
var.set(np.zeros_like(np_t), place)
|
||||
|
||||
self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
|
||||
|
||||
parallel_mlp.set_dict(base_para)
|
||||
|
||||
parallel_state = parallel_mlp.state_dict()
|
||||
|
||||
for k, v in parallel_state.items():
|
||||
self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
|
||||
|
||||
parallel_mlp.load_dict(base_para)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue