not supporte to do auto saving intergrated checkpoint files in manual mode parallel

pull/715/head
WeibiaoYu 5 years ago
parent 5519bce8ae
commit aacc85caec

@ -150,8 +150,8 @@ class CheckpointConfig:
keep_checkpoint_max (int): Maximum step to save checkpoint. Default: 5.
keep_checkpoint_per_n_minutes (int): Keep one checkpoint every n minutes. Default: 0.
Can't be used with keep_checkpoint_max at the same time.
integrated_save (bool): Whether to intergrated save in automatic model parall scene. Default: True.
Integrated save function is only supported in automatic parall scene, not supported in manual parallel.
integrated_save (bool): Whether to intergrated save in automatic model parallel scene. Default: True.
Integrated save function is only supported in automatic parallel scene, not supported in manual parallel.
Raises:
ValueError: If the input_param is None or 0.

@ -225,15 +225,6 @@ def load_param_into_net(net, parameter_dict):
raise TypeError(msg)
logger.info("Execute load parameter into net process.")
for name in parameter_dict:
for _, param in net.parameters_and_names():
if name == param.name and param.layerwise_parallel:
# layerwise parallel parameter data loaded from checkpoint file,
# was a complete(merged) data, need to be splited
new_param = parameter_dict[param.name]
_load_tensor_for_layerwise(new_param, param)
break
param_not_load = []
for _, param in net.parameters_and_names():
if param.name in parameter_dict:
@ -363,34 +354,6 @@ def _get_merged_param_data(net, param_name, param_data):
return param_data
def _load_tensor_for_layerwise(new_param, old_param):
"""
Replaces parameters with sliced tensors by layerwise parallel strategies.
Args:
new_param (Parameter): The new layerwise parallel parameter, will be loaded into net.
old_param(Parameter): The current parameter in the net.
"""
if not isinstance(new_param.data, Tensor) or not isinstance(old_param.data, Tensor):
logger.error("Failed to combine the net and the parameters.")
msg = ("layerwise parallel parameter should be a Tensor, but got {}.".format(type(new_param.data)))
raise TypeError(msg)
if old_param.data.shape() == new_param.data.shape():
return
from mindspore.parallel._tensor import _load_tensor
from mindspore.communication.management import get_group_size
dev_mat = [get_group_size()]
shape = new_param.data.shape()
for x in range(len(shape)): # dim 0 set 0, others set -1
if x:
tensor_map.append(-1)
new_tensor = _load_tensor(new_param.data, dev_mat, tensor_map)
new_param.set_parameter_data(new_tensor)
def _fill_param_into_net(net, parameter_list):
"""
Fills parameter_list into net.

Loading…
Cancel
Save